refactor(agents): share failover HTTP status classification (#36615)

* fix(agents): classify transient failover statuses consistently

* fix(agents): preserve legacy failover status mapping
This commit is contained in:
Altay
2026-03-05 23:50:36 +03:00
committed by GitHub
parent 8ac7ce73b3
commit f014e255df
4 changed files with 49 additions and 29 deletions

View File

@@ -14,11 +14,15 @@ describe("failover-error", () => {
expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
// Transient server errors (502/503/504) should trigger failover as timeout.
// Keep the status-only path behavior-preserving and conservative.
expect(resolveFailoverReasonFromError({ status: 500 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");
// Anthropic 529 (overloaded) should trigger failover as rate_limit.
expect(resolveFailoverReasonFromError({ status: 521 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 522 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 523 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 524 })).toBeNull();
expect(resolveFailoverReasonFromError({ status: 529 })).toBe("rate_limit");
});

View File

@@ -1,7 +1,7 @@
import { readErrorName } from "../infra/errors.js";
import {
classifyFailoverReason,
isAuthPermanentErrorMessage,
classifyFailoverReasonFromHttpStatus,
isTimeoutErrorMessage,
type FailoverReason,
} from "./pi-embedded-helpers.js";
@@ -152,30 +152,10 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
}
const status = getStatusCode(err);
if (status === 402) {
return "billing";
}
if (status === 429) {
return "rate_limit";
}
if (status === 401 || status === 403) {
const msg = getErrorMessage(err);
if (msg && isAuthPermanentErrorMessage(msg)) {
return "auth_permanent";
}
return "auth";
}
if (status === 408) {
return "timeout";
}
if (status === 502 || status === 503 || status === 504) {
return "timeout";
}
if (status === 529) {
return "rate_limit";
}
if (status === 400) {
return "format";
const message = getErrorMessage(err);
const statusReason = classifyFailoverReasonFromHttpStatus(status, message);
if (statusReason) {
return statusReason;
}
const code = (getErrorCode(err) ?? "").toUpperCase();
@@ -197,8 +177,6 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
if (isTimeoutError(err)) {
return "timeout";
}
const message = getErrorMessage(err);
if (!message) {
return null;
}

View File

@@ -13,6 +13,7 @@ export {
BILLING_ERROR_USER_MESSAGE,
formatBillingErrorMessage,
classifyFailoverReason,
classifyFailoverReasonFromHttpStatus,
formatRawAssistantErrorForUi,
formatAssistantErrorText,
getApiErrorPayloadFingerprint,

View File

@@ -251,6 +251,43 @@ export function isTransientHttpError(raw: string): boolean {
return TRANSIENT_HTTP_ERROR_CODES.has(status.code);
}
export function classifyFailoverReasonFromHttpStatus(
status: number | undefined,
message?: string,
): FailoverReason | null {
if (typeof status !== "number" || !Number.isFinite(status)) {
return null;
}
if (status === 402) {
return "billing";
}
if (status === 429) {
return "rate_limit";
}
if (status === 401 || status === 403) {
if (message && isAuthPermanentErrorMessage(message)) {
return "auth_permanent";
}
return "auth";
}
if (status === 408) {
return "timeout";
}
// Keep the status-only path conservative and behavior-preserving.
// Message-path HTTP heuristics are broader and should not leak in here.
if (status === 502 || status === 503 || status === 504) {
return "timeout";
}
if (status === 529) {
return "rate_limit";
}
if (status === 400) {
return "format";
}
return null;
}
function stripFinalTagsFromText(text: string): string {
if (!text) {
return text;