From cb8d36c6cdb09a4fb90b8414c0131dc4bb338ccd Mon Sep 17 00:00:00 2001 From: KingMo Date: Mon, 2 Mar 2026 20:30:12 +0800 Subject: [PATCH] fix(agents): recognize connection errors as retryable timeout failures When a model endpoint becomes unreachable (e.g., local proxy down, relay server offline), the failover system fails to switch to the next candidate model. Errors like "Connection error." are not classified as retryable, causing the session to hang on a broken endpoint instead of falling back to healthy alternatives. Connection/network errors are not recognized by the current failover classifier: - Text patterns like "Connection error.", "fetch failed", "network error" - Error codes like ECONNREFUSED, ENOTFOUND, EAI_AGAIN (in message text) While `failover-error.ts` handles these as error codes (err.code), it misses them when they appear as plain text in error messages. Extend timeout error patterns to include connection/network failures: **In `errors.ts` (ERROR_PATTERNS.timeout):** - Text: "connection error", "network error", "fetch failed", etc. - Regex: /\beconn(?:refused|reset|aborted)\b/i, /\benotfound\b/i, /\beai_again\b/i **In `failover-error.ts` (TIMEOUT_HINT_RE):** - Same patterns for non-assistant error paths Added test cases covering: - "Connection error." - "fetch failed" - "network error: ECONNREFUSED" - "ENOTFOUND" / "EAI_AGAIN" in message text - **Compatibility:** High - only expands retryable error detection - **Behavior:** Connection failures now trigger automatic fallback - **Risk:** Low - changes are additive and well-tested --- src/agents/failover-error.test.ts | 16 ++++++++++++++++ src/agents/failover-error.ts | 2 +- ...mbedded-helpers.isbillingerrormessage.test.ts | 8 ++++++++ src/agents/pi-embedded-helpers/errors.ts | 8 ++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index 33ffe2d2d57..fa8a4e553a6 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -48,6 +48,22 @@ describe("failover-error", () => { expect(resolveFailoverReasonFromError({ message: "reason: error" })).toBe("timeout"); }); + it("infers timeout from connection/network error messages", () => { + expect(resolveFailoverReasonFromError({ message: "Connection error." })).toBe("timeout"); + expect(resolveFailoverReasonFromError({ message: "fetch failed" })).toBe("timeout"); + expect(resolveFailoverReasonFromError({ message: "Network error: ECONNREFUSED" })).toBe( + "timeout", + ); + expect( + resolveFailoverReasonFromError({ + message: "dial tcp: lookup api.example.com: no such host (ENOTFOUND)", + }), + ).toBe("timeout"); + expect(resolveFailoverReasonFromError({ message: "temporary dns failure EAI_AGAIN" })).toBe( + "timeout", + ); + }); + it("treats AbortError reason=abort as timeout", () => { const err = Object.assign(new Error("aborted"), { name: "AbortError", diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 63e5c26c7a3..47660664c8c 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -6,7 +6,7 @@ import { } from "./pi-embedded-helpers.js"; const TIMEOUT_HINT_RE = - /timeout|timed out|deadline exceeded|context deadline exceeded|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i; + /timeout|timed out|deadline exceeded|context deadline exceeded|connection error|network error|network request failed|fetch failed|socket hang up|econnrefused|econnreset|econnaborted|enotfound|eai_again|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i; const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i; export class FailoverError extends Error { diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 11b29abad3a..4dfd4344449 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -415,6 +415,7 @@ describe("isFailoverErrorMessage", () => { "429 rate limit exceeded", "Your credit balance is too low", "request timed out", + "Connection error.", "invalid request format", ]; for (const sample of samples) { @@ -494,6 +495,13 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("credit balance too low")).toBe("billing"); expect(classifyFailoverReason("deadline exceeded")).toBe("timeout"); expect(classifyFailoverReason("request ended without sending any chunks")).toBe("timeout"); + expect(classifyFailoverReason("Connection error.")).toBe("timeout"); + expect(classifyFailoverReason("fetch failed")).toBe("timeout"); + expect(classifyFailoverReason("network error: ECONNREFUSED")).toBe("timeout"); + expect(classifyFailoverReason("dial tcp: lookup api.example.com: no such host (ENOTFOUND)")).toBe( + "timeout", + ); + expect(classifyFailoverReason("temporary dns failure EAI_AGAIN")).toBe("timeout"); expect( classifyFailoverReason( "521 Web server is downCloudflare", diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 754bd03ba9c..cfd1460fc61 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -640,6 +640,14 @@ const ERROR_PATTERNS = { "timed out", "deadline exceeded", "context deadline exceeded", + "connection error", + "network error", + "network request failed", + "fetch failed", + "socket hang up", + /\beconn(?:refused|reset|aborted)\b/i, + /\benotfound\b/i, + /\beai_again\b/i, /without sending (?:any )?chunks?/i, /\bstop reason:\s*(?:abort|error)\b/i, /\breason:\s*(?:abort|error)\b/i,