From f014e255dfb000620b02177cb374f34e717e9291 Mon Sep 17 00:00:00 2001 From: Altay Date: Thu, 5 Mar 2026 23:50:36 +0300 Subject: [PATCH] refactor(agents): share failover HTTP status classification (#36615) * fix(agents): classify transient failover statuses consistently * fix(agents): preserve legacy failover status mapping --- src/agents/failover-error.test.ts | 8 +++-- src/agents/failover-error.ts | 32 ++++---------------- src/agents/pi-embedded-helpers.ts | 1 + src/agents/pi-embedded-helpers/errors.ts | 37 ++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index fa8a4e553a6..772b4707b0c 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -14,11 +14,15 @@ describe("failover-error", () => { expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth"); expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format"); - // Transient server errors (502/503/504) should trigger failover as timeout. + // Keep the status-only path behavior-preserving and conservative. + expect(resolveFailoverReasonFromError({ status: 500 })).toBeNull(); expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout"); - // Anthropic 529 (overloaded) should trigger failover as rate_limit. + expect(resolveFailoverReasonFromError({ status: 521 })).toBeNull(); + expect(resolveFailoverReasonFromError({ status: 522 })).toBeNull(); + expect(resolveFailoverReasonFromError({ status: 523 })).toBeNull(); + expect(resolveFailoverReasonFromError({ status: 524 })).toBeNull(); expect(resolveFailoverReasonFromError({ status: 529 })).toBe("rate_limit"); }); diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 3bdc8650c81..5c16d3508fd 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -1,7 +1,7 @@ import { readErrorName } from "../infra/errors.js"; import { classifyFailoverReason, - isAuthPermanentErrorMessage, + classifyFailoverReasonFromHttpStatus, isTimeoutErrorMessage, type FailoverReason, } from "./pi-embedded-helpers.js"; @@ -152,30 +152,10 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n } const status = getStatusCode(err); - if (status === 402) { - return "billing"; - } - if (status === 429) { - return "rate_limit"; - } - if (status === 401 || status === 403) { - const msg = getErrorMessage(err); - if (msg && isAuthPermanentErrorMessage(msg)) { - return "auth_permanent"; - } - return "auth"; - } - if (status === 408) { - return "timeout"; - } - if (status === 502 || status === 503 || status === 504) { - return "timeout"; - } - if (status === 529) { - return "rate_limit"; - } - if (status === 400) { - return "format"; + const message = getErrorMessage(err); + const statusReason = classifyFailoverReasonFromHttpStatus(status, message); + if (statusReason) { + return statusReason; } const code = (getErrorCode(err) ?? "").toUpperCase(); @@ -197,8 +177,6 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n if (isTimeoutError(err)) { return "timeout"; } - - const message = getErrorMessage(err); if (!message) { return null; } diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 34a54a2405e..53f21814492 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -13,6 +13,7 @@ export { BILLING_ERROR_USER_MESSAGE, formatBillingErrorMessage, classifyFailoverReason, + classifyFailoverReasonFromHttpStatus, formatRawAssistantErrorForUi, formatAssistantErrorText, getApiErrorPayloadFingerprint, diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 630071df451..58ad24f953a 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -251,6 +251,43 @@ export function isTransientHttpError(raw: string): boolean { return TRANSIENT_HTTP_ERROR_CODES.has(status.code); } +export function classifyFailoverReasonFromHttpStatus( + status: number | undefined, + message?: string, +): FailoverReason | null { + if (typeof status !== "number" || !Number.isFinite(status)) { + return null; + } + + if (status === 402) { + return "billing"; + } + if (status === 429) { + return "rate_limit"; + } + if (status === 401 || status === 403) { + if (message && isAuthPermanentErrorMessage(message)) { + return "auth_permanent"; + } + return "auth"; + } + if (status === 408) { + return "timeout"; + } + // Keep the status-only path conservative and behavior-preserving. + // Message-path HTTP heuristics are broader and should not leak in here. + if (status === 502 || status === 503 || status === 504) { + return "timeout"; + } + if (status === 529) { + return "rate_limit"; + } + if (status === 400) { + return "format"; + } + return null; +} + function stripFinalTagsFromText(text: string): string { if (!text) { return text;