diff --git a/CHANGELOG.md b/CHANGELOG.md index 0462723781c..0d09bb8e39b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ Docs: https://docs.openclaw.ai ### Fixes -- Agents/OpenAI: surface selected-model capacity failures with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc. +- Agents/OpenAI: surface selected-model capacity failures from PI, Codex, and auto-reply harness paths with a model-switch hint instead of the generic empty-response error. Thanks @vincentkoc. - Providers/OpenAI: stop advertising the removed `gpt-5.3-codex-spark` Codex model through fallback catalogs, and suppress stale rows with a GPT-5.5 recovery hint. - Plugins/QR: replace legacy `qrcode-terminal` QR rendering with bounded `qrcode-tui` helpers for plugin login/setup flows. (#65969) Thanks @vincentkoc. - Voice-call/realtime: wait for OpenAI session configuration before greeting or forwarding buffered audio, and reject non-allowlisted Twilio callers before stream setup. (#43501) Thanks @forrestblount. diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 9b8d184379e..ce47ef8dcd0 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -13,6 +13,7 @@ export { BILLING_ERROR_USER_MESSAGE, classifyProviderRuntimeFailureKind, formatBillingErrorMessage, + formatRateLimitOrOverloadedErrorCopy, classifyFailoverReason, classifyFailoverReasonFromHttpStatus, formatRawAssistantErrorForUi, diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 2e8ae9f2536..2243c44cf92 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -53,6 +53,7 @@ import type { FailoverReason } from "./types.js"; export { BILLING_ERROR_USER_MESSAGE, formatBillingErrorMessage, + formatRateLimitOrOverloadedErrorCopy, getApiErrorPayloadFingerprint, isRawApiErrorPayload, sanitizeUserFacingText, diff --git a/src/auto-reply/reply/agent-runner-execution.test.ts b/src/auto-reply/reply/agent-runner-execution.test.ts index 661b19f891e..240ffa17e51 100644 --- a/src/auto-reply/reply/agent-runner-execution.test.ts +++ b/src/auto-reply/reply/agent-runner-execution.test.ts @@ -49,10 +49,23 @@ vi.mock("../../agents/bootstrap-budget.js", () => ({ vi.mock("../../agents/pi-embedded-helpers.js", () => ({ BILLING_ERROR_USER_MESSAGE: "billing", + formatRateLimitOrOverloadedErrorCopy: (message: string) => { + if (/model\s+(?:is\s+)?at capacity/i.test(message)) { + return "⚠️ Selected model is at capacity. Try a different model, or wait and retry."; + } + if (/rate.limit|too many requests|429/i.test(message)) { + return "⚠️ API rate limit reached. Please try again later."; + } + if (/overloaded/i.test(message)) { + return "The AI service is temporarily overloaded. Please try again in a moment."; + } + return undefined; + }, isCompactionFailureError: () => false, isContextOverflowError: () => false, isBillingErrorMessage: () => false, isLikelyContextOverflowError: () => false, + isOverloadedErrorMessage: (message: string) => /overloaded|capacity/i.test(message), isRateLimitErrorMessage: () => false, isTransientHttpError: () => false, sanitizeUserFacingText: (text?: string) => text ?? "", @@ -410,6 +423,95 @@ describe("runAgentTurnWithFallback", () => { expect(onToolResult.mock.calls[0]?.[0]?.text).toBeUndefined(); }); + it("surfaces model capacity errors from no-text mid-turn failures", async () => { + state.runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "thinking", isReasoning: true }], + meta: { + error: { + kind: "server_overloaded", + message: "Selected model is at capacity. Please try a different model.", + }, + }, + }); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const result = await runAgentTurnWithFallback({ + commandBody: "hello", + followupRun: createFollowupRun(), + sessionCtx: { + Provider: "whatsapp", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: {}, + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + applyReplyToMode: (payload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off", + }); + + expect(result.kind).toBe("success"); + if (result.kind === "success") { + expect(result.runResult.payloads).toEqual([ + { + text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.", + isError: true, + }, + ]); + } + }); + + it("surfaces model capacity errors from pre-reply CLI failures", async () => { + state.runWithModelFallbackMock.mockRejectedValueOnce( + new Error("Selected model is at capacity. Please try a different model."), + ); + + const runAgentTurnWithFallback = await getRunAgentTurnWithFallback(); + const followupRun = createFollowupRun(); + followupRun.run.provider = "openai-codex"; + followupRun.run.model = "gpt-5.5"; + + const result = await runAgentTurnWithFallback({ + commandBody: "hello", + followupRun, + sessionCtx: { + Provider: "whatsapp", + MessageSid: "msg", + } as unknown as TemplateContext, + opts: {}, + typingSignals: createMockTypingSignaler(), + blockReplyPipeline: null, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + applyReplyToMode: (payload) => payload, + shouldEmitToolResult: () => true, + shouldEmitToolOutput: () => false, + pendingToolTasks: new Set(), + resetSessionAfterCompactionFailure: async () => false, + resetSessionAfterRoleOrderingConflict: async () => false, + isHeartbeat: false, + sessionKey: "main", + getActiveSessionEntry: () => undefined, + resolvedVerboseLevel: "off", + }); + + expect(result).toEqual({ + kind: "final", + payload: { + text: "⚠️ Selected model is at capacity. Try a different model, or wait and retry.", + }, + }); + }); + it("strips a glued leading NO_REPLY token from streamed tool results", async () => { const onToolResult = vi.fn(); state.runEmbeddedPiAgentMock.mockImplementationOnce(async (params: EmbeddedAgentParams) => { diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index c9c2c0ba230..3fb2023dd8e 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -16,6 +16,7 @@ import { runWithModelFallback, isFallbackSummaryError } from "../../agents/model import { isCliProvider } from "../../agents/model-selection.js"; import { BILLING_ERROR_USER_MESSAGE, + formatRateLimitOrOverloadedErrorCopy, isCompactionFailureError, isContextOverflowError, isBillingErrorMessage, @@ -1516,24 +1517,34 @@ export async function runAgentTurnWithFallback(params: { // underlying error. FallbackSummaryError messages embed per-attempt // reason labels like `(rate_limit)`, so string-matching the summary text // would misclassify mixed-cause exhaustion as a pure transient cooldown. - const isRateLimit = isFallbackSummaryError(err) + const isFallbackSummary = isFallbackSummaryError(err); + const isPureTransientSummary = isFallbackSummary ? isPureTransientRateLimitSummary(err) + : false; + const isRateLimit = isFallbackSummary + ? isPureTransientSummary : isRateLimitErrorMessage(message); + const rateLimitOrOverloadedCopy = + !isFallbackSummary || isPureTransientSummary + ? formatRateLimitOrOverloadedErrorCopy(message) + : undefined; const safeMessage = isTransientHttp ? sanitizeUserFacingText(message, { errorContext: true }) : message; const trimmedMessage = safeMessage.replace(/\.\s*$/, ""); const fallbackText = isBilling ? BILLING_ERROR_USER_MESSAGE - : isRateLimit + : isRateLimit && !isOverloadedErrorMessage(message) ? buildRateLimitCooldownMessage(err) - : isContextOverflow - ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." - : isRoleOrderingError - ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." - : shouldSurfaceToControlUi - ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow` - : buildExternalRunFailureText(message); + : rateLimitOrOverloadedCopy + ? rateLimitOrOverloadedCopy + : isContextOverflow + ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." + : isRoleOrderingError + ? "⚠️ Message ordering conflict - please try again. If this persists, use /new to start a fresh session." + : shouldSurfaceToControlUi + ? `⚠️ Agent failed before reply: ${trimmedMessage}.\nLogs: openclaw logs --follow` + : buildExternalRunFailureText(message); params.replyOperation?.fail("run_failed", err); return { @@ -1590,16 +1601,13 @@ export async function runAgentTurnWithFallback(params: { (p) => p.isError && hasNonEmptyString(p.text) && !p.text.startsWith("⚠️"), )?.text ?? ""; const errorCandidate = metaErrorMsg || rawErrorPayloadText; - if ( - errorCandidate && - (isRateLimitErrorMessage(errorCandidate) || isOverloadedErrorMessage(errorCandidate)) - ) { - const isOverloaded = isOverloadedErrorMessage(errorCandidate); + const formattedErrorCandidate = errorCandidate + ? formatRateLimitOrOverloadedErrorCopy(errorCandidate) + : undefined; + if (formattedErrorCandidate) { runResult.payloads = [ { - text: isOverloaded - ? "⚠️ The AI service is temporarily overloaded. Please try again in a moment." - : "⚠️ API rate limit reached — the model couldn't generate a response. Please try again in a moment.", + text: formattedErrorCandidate, isError: true, }, ];