fix(agents): preserve Codex model capacity guidance

2026-05-06 06:30:42 +00:00 · 2026-04-23 14:27:48 -07:00
parent f0300253c1
commit f718ba6601
5 changed files with 130 additions and 18 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

- Agents/OpenAI: surface selected-model capacity failures with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
+- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc.
 - Providers/OpenAI: stop advertising the removed `gpt-5.3-codex-spark` Codex model through fallback catalogs, and suppress stale rows with a GPT-5.5 recovery hint.
 - Plugins/QR: replace legacy `qrcode-terminal` QR rendering with bounded `qrcode-tui` helpers for plugin login/setup flows. (#65969) Thanks @vincentkoc.
 - Voice-call/realtime: wait for OpenAI session configuration before greeting or forwarding buffered audio, and reject non-allowlisted Twilio callers before stream setup. (#43501) Thanks @forrestblount.
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -13,6 +13,7 @@ export {
  BILLING_ERROR_USER_MESSAGE,
  classifyProviderRuntimeFailureKind,
  formatBillingErrorMessage,
+  formatRateLimitOrOverloadedErrorCopy,
  classifyFailoverReason,
  classifyFailoverReasonFromHttpStatus,
  formatRawAssistantErrorForUi,
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -53,6 +53,7 @@ import type { FailoverReason } from "./types.js";
 export {
  BILLING_ERROR_USER_MESSAGE,
  formatBillingErrorMessage,
+  formatRateLimitOrOverloadedErrorCopy,
  getApiErrorPayloadFingerprint,
  isRawApiErrorPayload,
  sanitizeUserFacingText,
--- a/src/auto-reply/reply/agent-runner-execution.test.ts
+++ b/src/auto-reply/reply/agent-runner-execution.test.ts
@@ -49,10 +49,23 @@ vi.mock("../../agents/bootstrap-budget.js", () => ({

 vi.mock("../../agents/pi-embedded-helpers.js", () => ({
  BILLING_ERROR_USER_MESSAGE: "billing",
+  formatRateLimitOrOverloadedErrorCopy: (message: string) => {
+    if (/model\s+(?:is\s+)?at capacity/i.test(message)) {
+      return "⚠️ Selected model is at capacity. Try a different model, or wait and retry.";
+    }
+    if (/rate.limit|too many requests|429/i.test(message)) {
+      return "⚠️ API rate limit reached. Please try again later.";
+    }
+    if (/overloaded/i.test(message)) {
+      return "The AI service is temporarily overloaded. Please try again in a moment.";
+    }
+    return undefined;
+  },
  isCompactionFailureError: () => false,
  isContextOverflowError: () => false,
  isBillingErrorMessage: () => false,
  isLikelyContextOverflowError: () => false,
+  isOverloadedErrorMessage: (message: string) => /overloaded|capacity/i.test(message),
  isRateLimitErrorMessage: () => false,
  isTransientHttpError: () => false,
  sanitizeUserFacingText: (text?: string) => text ?? "",
@@ -410,6 +423,95 @@ describe("runAgentTurnWithFallback", () => {
    expect(onToolResult.mock.calls[0]?.[0]?.text).toBeUndefined();
  });

+  it("surfaces model capacity errors from no-text mid-turn failures", async () => {
+    state.runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "thinking", isReasoning: true }],
+      meta: {
+        error: {
+          kind: "server_overloaded",
+          message: "Selected model is at capacity. Please try a different model.",
+        },
+      },
+    });
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun: createFollowupRun(),
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result.kind).toBe("success");
+    if (result.kind === "success") {
+      expect(result.runResult.payloads).toEqual([
+        {
+          text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
+          isError: true,
+        },
+      ]);
+    }
+  });
+
+  it("surfaces model capacity errors from pre-reply CLI failures", async () => {
+    state.runWithModelFallbackMock.mockRejectedValueOnce(
+      new Error("Selected model is at capacity. Please try a different model."),
+    );
+
+    const runAgentTurnWithFallback = await getRunAgentTurnWithFallback();
+    const followupRun = createFollowupRun();
+    followupRun.run.provider = "openai-codex";
+    followupRun.run.model = "gpt-5.5";
+
+    const result = await runAgentTurnWithFallback({
+      commandBody: "hello",
+      followupRun,
+      sessionCtx: {
+        Provider: "whatsapp",
+        MessageSid: "msg",
+      } as unknown as TemplateContext,
+      opts: {},
+      typingSignals: createMockTypingSignaler(),
+      blockReplyPipeline: null,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      applyReplyToMode: (payload) => payload,
+      shouldEmitToolResult: () => true,
+      shouldEmitToolOutput: () => false,
+      pendingToolTasks: new Set(),
+      resetSessionAfterCompactionFailure: async () => false,
+      resetSessionAfterRoleOrderingConflict: async () => false,
+      isHeartbeat: false,
+      sessionKey: "main",
+      getActiveSessionEntry: () => undefined,
+      resolvedVerboseLevel: "off",
+    });
+
+    expect(result).toEqual({
+      kind: "final",
+      payload: {
+        text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.",
+      },
+    });
+  });
+
  it("strips a glued leading NO_REPLY token from streamed tool results", async () => {
    const onToolResult = vi.fn();
    state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => {
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -16,6 +16,7 @@ import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model
 import { isCliProvider } from "../../agents/model-selection.js";
 import {
  BILLING_ERROR_USER_MESSAGE,
+  formatRateLimitOrOverloadedErrorCopy,
  isCompactionFailureError,
  isContextOverflowError,
  isBillingErrorMessage,
@@ -1516,24 +1517,34 @@ export async function runAgentTurnWithFallback(params: {
      // underlying error. FallbackSummaryError messages embed per-attempt
      // reason labels like `(rate_limit)`, so string-matching the summary text
      // would misclassify mixed-cause exhaustion as a pure transient cooldown.
-      const isRateLimit = isFallbackSummaryError(err)
+      const isFallbackSummary = isFallbackSummaryError(err);
+      const isPureTransientSummary = isFallbackSummary
        ? isPureTransientRateLimitSummary(err)
+        : false;
+      const isRateLimit = isFallbackSummary
+        ? isPureTransientSummary
        : isRateLimitErrorMessage(message);
+      const rateLimitOrOverloadedCopy =
+        !isFallbackSummary || isPureTransientSummary
+          ? formatRateLimitOrOverloadedErrorCopy(message)
+          : undefined;
      const safeMessage = isTransientHttp
        ? sanitizeUserFacingText(message, { errorContext: true })
        : message;
      const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
      const fallbackText = isBilling
        ? BILLING_ERROR_USER_MESSAGE
-        : isRateLimit
+        : isRateLimit && !isOverloadedErrorMessage(message)
          ? buildRateLimitCooldownMessage(err)
-          : isContextOverflow
-            ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
-            : isRoleOrderingError
-              ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
-              : shouldSurfaceToControlUi
-                ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
-                : buildExternalRunFailureText(message);
+          : rateLimitOrOverloadedCopy
+            ? rateLimitOrOverloadedCopy
+            : isContextOverflow
+              ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
+              : isRoleOrderingError
+                ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session."
+                : shouldSurfaceToControlUi
+                  ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow`
+                  : buildExternalRunFailureText(message);

      params.replyOperation?.fail("run_failed", err);
      return {
@@ -1590,16 +1601,13 @@ export async function runAgentTurnWithFallback(params: {
          (p) => p.isError && hasNonEmptyString(p.text) && !p.text.startsWith("⚠️"),
        )?.text ?? "";
      const errorCandidate = metaErrorMsg || rawErrorPayloadText;
-      if (
-        errorCandidate &&
-        (isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate))
-      ) {
-        const isOverloaded = isOverloadedErrorMessage(errorCandidate);
+      const formattedErrorCandidate = errorCandidate
+        ? formatRateLimitOrOverloadedErrorCopy(errorCandidate)
+        : undefined;
+      if (formattedErrorCandidate) {
        runResult.payloads = [
          {
-            text: isOverloaded
-              ? "⚠️ The AI service is temporarily overloaded. Please try again in a moment."
-              : "⚠️ API rate limit reached — the model couldn't generate a response. Please try again in a moment.",
+            text: formattedErrorCandidate,
            isError: true,
          },
        ];