From 4b5c2f9aa3633e9626be443baf4e02dc73d85e93 Mon Sep 17 00:00:00 2001 From: willamhou Date: Sat, 25 Apr 2026 23:56:21 +0800 Subject: [PATCH] fix(agents/failover): classify bare pi-ai stream wrapper as timeout regardless of provider (#71620) --- CHANGELOG.md | 1 + src/agents/failover-error.test.ts | 35 +++++++++++------- ...dded-helpers.isbillingerrormessage.test.ts | 37 ++++++++++++++++--- src/agents/pi-embedded-helpers/errors.ts | 14 ++++--- 4 files changed, 62 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9f6ac49740..b7f6e4a2ee3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -399,6 +399,7 @@ Docs: https://docs.openclaw.ai - Agents/sessions: stop session write-lock timeouts from entering model failover, so local lock contention surfaces directly instead of cascading across providers. (#68700) Thanks @MonkeyLeeT. - Auto-reply: run inbound reply delivery through `message_sending` hooks so plugins can transform or cancel generated replies before they are sent. (#70118) Thanks @jzakirov. - CI/release-checks: pass workflow inputs and matrix values through step environment variables instead of embedding them directly into `run:` shell commands, reducing template-injection surface in the cross-OS release-check workflow. (#66884) Thanks @alexlomt. +- Agents/failover: classify the bare `An unknown error occurred` stream-wrapper message that pi-ai providers throw when streams end with `stopReason: "aborted" | "error"` as a transient timeout regardless of provider, so configured fallback chains rotate for non-Anthropic providers (Google, OpenRouter, Bedrock, etc.) instead of surfacing the literal string to users. Fixes #71620. Thanks @mattcproctor. ## 2026.4.23 diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index 62655860660..8b3b57a875f 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -441,13 +441,33 @@ describe("failover-error", () => { ).toBeNull(); }); - it("classifies provider-scoped generic upstream errors for failover", () => { + it("classifies bare pi-ai stream wrapper as timeout regardless of provider (#71620)", () => { + expect( + resolveFailoverReasonFromError({ + message: "An unknown error occurred", + }), + ).toBe("timeout"); expect( resolveFailoverReasonFromError({ provider: "anthropic", message: "An unknown error occurred", }), ).toBe("timeout"); + expect( + resolveFailoverReasonFromError({ + provider: "google", + message: "An unknown error occurred", + }), + ).toBe("timeout"); + expect( + resolveFailoverReasonFromError({ + provider: "openrouter", + message: "An unknown error occurred", + }), + ).toBe("timeout"); + }); + + it("classifies openrouter-scoped upstream errors for failover", () => { expect( resolveFailoverReasonFromError({ provider: "openrouter", @@ -456,18 +476,7 @@ describe("failover-error", () => { ).toBe("timeout"); }); - it("does not classify provider-scoped upstream errors without the matching provider", () => { - expect( - resolveFailoverReasonFromError({ - message: "An unknown error occurred", - }), - ).toBeNull(); - expect( - resolveFailoverReasonFromError({ - provider: "openrouter", - message: "An unknown error occurred", - }), - ).toBeNull(); + it("does not classify openrouter-scoped upstream errors without the matching provider", () => { expect( resolveFailoverReasonFromError({ message: "Provider returned error", diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 705461d5c3d..44d76dc5bfe 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -739,10 +739,39 @@ describe("classifyFailoverReason", () => { expect(isFailoverErrorMessage(message)).toBe(true); }); - it("classifies provider-scoped generic upstream messages", () => { + it("classifies bare pi-ai stream wrapper as timeout regardless of provider (#71620)", () => { + // pi-ai providers throw `Error("An unknown error occurred")` provider-agnostically + // when streams end with stopReason "aborted" | "error" with no specific info. + for (const sample of [ + "An unknown error occurred", + "an unknown error occurred", + "AN UNKNOWN ERROR OCCURRED", + "An unknown error occurred.", + " An unknown error occurred ", + ]) { + expect(classifyFailoverReason(sample)).toBe("timeout"); + expect(isFailoverErrorMessage(sample)).toBe(true); + } expect(classifyFailoverReason("An unknown error occurred", { provider: "anthropic" })).toBe( "timeout", ); + expect(classifyFailoverReason("An unknown error occurred", { provider: "google" })).toBe( + "timeout", + ); + expect(classifyFailoverReason("An unknown error occurred", { provider: "openrouter" })).toBe( + "timeout", + ); + }); + + it("does not match wrapped or unrelated unknown-error phrases as bare wrapper", () => { + // Wrapped messages must not slip into failover-as-timeout via the bare match. + expect(classifyFailoverReason("LLM request failed with an unknown error.")).toBeNull(); + expect( + classifyFailoverReason("user reported that an unknown error occurred during sync"), + ).toBeNull(); + }); + + it("classifies openrouter-scoped upstream messages", () => { expect(classifyFailoverReason("Provider returned error", { provider: "openrouter" })).toBe( "timeout", ); @@ -751,11 +780,7 @@ describe("classifyFailoverReason", () => { ); }); - it("does not classify provider-scoped generic upstream messages without provider context", () => { - expect(classifyFailoverReason("An unknown error occurred")).toBeNull(); - expect( - classifyFailoverReason("An unknown error occurred", { provider: "openrouter" }), - ).toBeNull(); + it("does not classify openrouter-scoped upstream messages without provider context", () => { expect(classifyFailoverReason("Provider returned error")).toBeNull(); expect(classifyFailoverReason("Provider returned error", { provider: "anthropic" })).toBeNull(); expect(classifyFailoverReason("Key limit exceeded")).toBeNull(); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index ea9a9b25eb0..df440252230 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -753,11 +753,13 @@ function isProvider(provider: string | undefined, match: string): boolean { return Boolean(normalized && normalized.includes(match)); } -function isAnthropicGenericUnknownError(raw: string, provider?: string): boolean { - return ( - isProvider(provider, "anthropic") && - (normalizeOptionalLowercaseString(raw)?.includes("an unknown error occurred") ?? false) - ); +// pi-ai providers throw `Error("An unknown error occurred")` provider-agnostically +// (anthropic, google, vertex, openai-completions, mistral, bedrock, etc.) when a +// stream ends with stopReason === "aborted" | "error" without specific info. Treat +// it as a transient transport failure so the configured fallback chain rotates +// instead of returning the bare string to the user (#71620). +function isGenericUnknownStreamError(raw: string): boolean { + return /^\s*an unknown error occurred\.?\s*$/i.test(raw); } function isOpenRouterProviderReturnedError(raw: string, provider?: string): boolean { @@ -833,7 +835,7 @@ function classifyFailoverClassificationFromMessage( if (isAuthErrorMessage(raw)) { return toReasonClassification("auth"); } - if (isAnthropicGenericUnknownError(raw, provider)) { + if (isGenericUnknownStreamError(raw)) { return toReasonClassification("timeout"); } if (isOpenRouterProviderReturnedError(raw, provider)) {