diff --git a/CHANGELOG.md b/CHANGELOG.md index 74ca3309a25..669ca418efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,7 @@ Docs: https://docs.openclaw.ai - Gateway/device auth: reuse cached device-token scopes only for cached-token reconnects, while keeping explicit `deviceToken` scope requests and empty-cache fallbacks intact so reconnects preserve `operator.read` without breaking explicit auth flows. (#46032) Thanks @caicongyang. - Google Gemini CLI auth: improve OAuth credential discovery across Windows nvm and Homebrew libexec installs, and align Code Assist metadata so Gemini login stops failing on packaged CLI layouts. (#40729) Thanks @hughcube. - Mattermost/config schema: accept `groups.*.requireMention` again so existing Mattermost configs no longer fail strict validation after upgrade. (#58271) Thanks @MoerAI. +- Agents/failover: scope Anthropic `An unknown error occurred` failover matching by provider so generic internal unknown-error text no longer triggers retryable timeout fallback. (#59325) Thanks @aaron-he-zhu. - Providers/OpenRouter failover: classify `403 "Key limit exceeded"` spending-limit responses as billing so model fallback continues instead of stopping on generic auth. (#59892) Thanks @rockcent. - Device pairing/security: keep non-operator device scope checks bound to the requested role prefix so bootstrap verification cannot redeem `operator.*` scopes through `node` auth. (#57258) Thanks @jlapenna. - Gateway/device pairing: require non-admin paired-device sessions to manage only their own device for token rotate/revoke and paired-device removal, blocking cross-device token theft inside pairing-scoped sessions. (#50627) Thanks @coygeek. diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts index 25fed7fc20a..9ae4d3909d9 100644 --- a/src/agents/cli-runner.ts +++ b/src/agents/cli-runner.ts @@ -73,8 +73,8 @@ export async function runCliAgent(params: RunCliAgentParams): Promise { ).toBe("overloaded"); }); + it("classifies Anthropic bare 'unknown error' as timeout for failover (#49706)", () => { + expect( + resolveFailoverReasonFromError({ + provider: "anthropic", + message: "An unknown error occurred", + }), + ).toBe("timeout"); + }); + + it("does not classify generic internal unknown-error text as failover timeout", () => { + expect( + resolveFailoverReasonFromError({ + message: "LLM request failed with an unknown error.", + }), + ).toBeNull(); + expect( + resolveFailoverReasonFromError({ + message: "An unknown error occurred", + }), + ).toBeNull(); + expect( + resolveFailoverReasonFromError({ + provider: "openrouter", + message: "An unknown error occurred", + }), + ).toBeNull(); + expect( + resolveFailoverReasonFromError({ + message: "Provider returned error", + }), + ).toBeNull(); + }); it("treats 400 insufficient_quota payloads as billing instead of format", () => { expect( resolveFailoverReasonFromError({ diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 12814e2d9f3..ffd3208e27d 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -132,6 +132,22 @@ function getErrorCode(err: unknown): string | undefined { return findErrorProperty(err, readDirectErrorCode); } +function readDirectProvider(err: unknown): string | undefined { + if (!err || typeof err !== "object") { + return undefined; + } + const provider = (err as { provider?: unknown }).provider; + if (typeof provider !== "string") { + return undefined; + } + const trimmed = provider.trim(); + return trimmed || undefined; +} + +function getProvider(err: unknown): string | undefined { + return findErrorProperty(err, readDirectProvider); +} + function readDirectErrorMessage(err: unknown): string | undefined { if (err instanceof Error) { return err.message || undefined; @@ -207,6 +223,7 @@ function normalizeErrorSignal(err: unknown): FailoverSignal { status: getStatusCode(err), code: getErrorCode(err), message: message || undefined, + provider: getProvider(err), }; } diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 6fae38b259d..47678b95a39 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -638,6 +638,21 @@ describe("classifyFailoverReason", () => { ), ).toBeNull(); }); + it("classifies Anthropic bare 'unknown error' as timeout for failover", () => { + expect(classifyFailoverReason("An unknown error occurred", { provider: "anthropic" })).toBe( + "timeout", + ); + }); + + it("does not classify generic internal unknown-error text as timeout", () => { + expect(classifyFailoverReason("An unknown error occurred")).toBeNull(); + expect( + classifyFailoverReason("An unknown error occurred", { provider: "openrouter" }), + ).toBeNull(); + expect(classifyFailoverReason("Provider returned error")).toBeNull(); + expect(classifyFailoverReason("Unknown error")).toBeNull(); + expect(classifyFailoverReason("LLM request failed with an unknown error.")).toBeNull(); + }); }); describe("isFailoverErrorMessage", () => { diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 1350d2499ca..82738d7c61b 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -371,6 +371,7 @@ export type FailoverSignal = { status?: number; code?: string; message?: string; + provider?: string; }; export type FailoverClassification = @@ -629,7 +630,19 @@ function classifyFailoverReasonFromCode(raw: string | undefined): FailoverReason } } -function classifyFailoverClassificationFromMessage(raw: string): FailoverClassification | null { +function isAnthropicProvider(provider?: string): boolean { + const normalized = provider?.trim().toLowerCase(); + return Boolean(normalized && normalized.includes("anthropic")); +} + +function isAnthropicGenericUnknownError(raw: string, provider?: string): boolean { + return isAnthropicProvider(provider) && raw.toLowerCase().includes("an unknown error occurred"); +} + +function classifyFailoverClassificationFromMessage( + raw: string, + provider?: string, +): FailoverClassification | null { if (isImageDimensionErrorMessage(raw)) { return null; } @@ -677,6 +690,9 @@ function classifyFailoverClassificationFromMessage(raw: string): FailoverClassif if (isAuthErrorMessage(raw)) { return toReasonClassification("auth"); } + if (isAnthropicGenericUnknownError(raw, provider)) { + return toReasonClassification("timeout"); + } if (isServerErrorMessage(raw)) { return toReasonClassification("timeout"); } @@ -703,7 +719,7 @@ export function classifyFailoverSignal(signal: FailoverSignal): FailoverClassifi ? signal.status : extractLeadingHttpStatus(signal.message?.trim() ?? "")?.code; const messageClassification = signal.message - ? classifyFailoverClassificationFromMessage(signal.message) + ? classifyFailoverClassificationFromMessage(signal.message, signal.provider) : null; const statusClassification = classifyFailoverClassificationFromHttpStatus( inferredStatus, @@ -1207,24 +1223,28 @@ function isCliSessionExpiredErrorMessage(raw: string): boolean { ); } -export function classifyFailoverReason(raw: string): FailoverReason | null { +export function classifyFailoverReason( + raw: string, + opts?: { provider?: string }, +): FailoverReason | null { const trimmed = raw.trim(); const leadingStatus = extractLeadingHttpStatus(trimmed); return failoverReasonFromClassification( classifyFailoverSignal({ status: leadingStatus?.code, message: raw, + provider: opts?.provider, }), ); } -export function isFailoverErrorMessage(raw: string): boolean { - return classifyFailoverReason(raw) !== null; +export function isFailoverErrorMessage(raw: string, opts?: { provider?: string }): boolean { + return classifyFailoverReason(raw, opts) !== null; } export function isFailoverAssistantError(msg: AssistantMessage | undefined): boolean { if (!msg || msg.stopReason !== "error") { return false; } - return isFailoverErrorMessage(msg.errorMessage ?? ""); + return isFailoverErrorMessage(msg.errorMessage ?? "", { provider: msg.provider }); } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 698f7e862c8..47a6ed319fd 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -1048,7 +1048,7 @@ export async function runEmbeddedPiAgent( }; } const promptFailoverReason = - promptErrorDetails.reason ?? classifyFailoverReason(errorText); + promptErrorDetails.reason ?? classifyFailoverReason(errorText, { provider }); const promptProfileFailureReason = resolveAuthProfileFailureReason(promptFailoverReason); await maybeMarkAuthProfileFailure({ @@ -1161,7 +1161,12 @@ export async function runEmbeddedPiAgent( const rateLimitFailure = isRateLimitAssistantError(lastAssistant); const billingFailure = isBillingAssistantError(lastAssistant); const failoverFailure = isFailoverAssistantError(lastAssistant); - const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? ""); + const assistantFailoverReason = classifyFailoverReason( + lastAssistant?.errorMessage ?? "", + { + provider: lastAssistant?.provider, + }, + ); const assistantProfileFailureReason = resolveAuthProfileFailureReason(assistantFailoverReason); const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError; diff --git a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts index 658b2215165..3b573c24fc6 100644 --- a/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts +++ b/src/agents/pi-embedded-subscribe.handlers.lifecycle.ts @@ -47,7 +47,9 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) { model: lastAssistant.model, }); const rawError = lastAssistant.errorMessage?.trim(); - const failoverReason = classifyFailoverReason(rawError ?? ""); + const failoverReason = classifyFailoverReason(rawError ?? "", { + provider: lastAssistant.provider, + }); const errorText = (friendlyError || lastAssistant.errorMessage || "LLM request failed.").trim(); const observedError = buildApiErrorObservationFields(rawError); const safeErrorText =