fix(failover): scope openrouter-specific matchers (#60909)

This commit is contained in:
Altay
2026-04-04 18:24:03 +03:00
committed by GitHub
parent fba6e194bd
commit ae460eff84
7 changed files with 88 additions and 25 deletions

View File

@@ -279,7 +279,7 @@ export async function executePreparedCliRun(
});
}
const err = stderr || stdout || "CLI failed.";
const reason = classifyFailoverReason(err) ?? "unknown";
const reason = classifyFailoverReason(err, { provider: params.provider }) ?? "unknown";
const status = resolveFailoverStatus(reason);
throw new FailoverError(err, {
reason,

View File

@@ -196,21 +196,22 @@ describe("failover-error", () => {
).toBe("overloaded");
});
it("classifies Anthropic bare 'unknown error' as timeout for failover (#49706)", () => {
it("classifies provider-scoped generic upstream errors for failover", () => {
expect(
resolveFailoverReasonFromError({
provider: "anthropic",
message: "An unknown error occurred",
}),
).toBe("timeout");
});
it("does not classify generic internal unknown-error text as failover timeout", () => {
expect(
resolveFailoverReasonFromError({
message: "LLM request failed with an unknown error.",
provider: "openrouter",
message: "Provider returned error",
}),
).toBeNull();
).toBe("timeout");
});
it("does not classify provider-scoped upstream errors without the matching provider", () => {
expect(
resolveFailoverReasonFromError({
message: "An unknown error occurred",
@@ -227,7 +228,14 @@ describe("failover-error", () => {
message: "Provider returned error",
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
provider: "anthropic",
message: "Provider returned error",
}),
).toBeNull();
});
it("treats 400 insufficient_quota payloads as billing instead of format", () => {
expect(
resolveFailoverReasonFromError({
@@ -548,11 +556,16 @@ describe("failover-error", () => {
// GitHub: openclaw/openclaw#53849 — OpenRouter returns 403 with "Key limit exceeded"
// when the monthly key spending limit is reached. This must trigger billing failover
// (model fallback), not generic auth.
expect(resolveFailoverReasonFromError({ status: 403, message: "Key limit exceeded" })).toBe(
"billing",
);
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
status: 403,
message: "Key limit exceeded",
}),
).toBe("billing");
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
status: 403,
message: "403 Key limit exceeded (monthly limit)",
}),
@@ -562,12 +575,23 @@ describe("failover-error", () => {
it("401 billing-style message returns billing instead of generic auth", () => {
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
status: 401,
message: "401 Key limit exceeded (monthly limit)",
}),
).toBe("billing");
});
it("does not treat OpenRouter key-limit text as billing without provider context", () => {
expect(resolveFailoverReasonFromError({ message: "Key limit exceeded" })).toBeNull();
expect(
resolveFailoverReasonFromError({
status: 403,
message: "403 Key limit exceeded (monthly limit)",
}),
).toBe("auth");
});
it("resolveFailoverStatus maps auth_permanent to 403", () => {
expect(resolveFailoverStatus("auth_permanent")).toBe(403);
});

View File

@@ -570,10 +570,18 @@ describe("classifyFailoverReasonFromHttpStatus", () => {
).toBeNull();
});
it("lets billing-classified HTTP 401 responses bypass generic auth", () => {
it("lets OpenRouter billing-classified HTTP 401 responses bypass generic auth", () => {
expect(
classifyFailoverReasonFromHttpStatus(401, "401 Key limit exceeded (monthly limit)", {
provider: "openrouter",
}),
).toBe("billing");
});
it("keeps generic HTTP 401 key-limit text on the auth path without provider context", () => {
expect(
classifyFailoverReasonFromHttpStatus(401, "401 Key limit exceeded (monthly limit)"),
).toBe("billing");
).toBe("auth");
});
it("treats HTTP 499 as transient for structured errors", () => {
@@ -638,20 +646,27 @@ describe("classifyFailoverReason", () => {
),
).toBeNull();
});
it("classifies Anthropic bare 'unknown error' as timeout for failover", () => {
it("classifies provider-scoped generic upstream messages", () => {
expect(classifyFailoverReason("An unknown error occurred", { provider: "anthropic" })).toBe(
"timeout",
);
expect(classifyFailoverReason("Provider returned error", { provider: "openrouter" })).toBe(
"timeout",
);
expect(classifyFailoverReason("Key limit exceeded", { provider: "openrouter" })).toBe(
"billing",
);
});
it("does not classify generic internal unknown-error text as timeout", () => {
it("does not classify provider-scoped generic upstream messages without provider context", () => {
expect(classifyFailoverReason("An unknown error occurred")).toBeNull();
expect(
classifyFailoverReason("An unknown error occurred", { provider: "openrouter" }),
).toBeNull();
expect(classifyFailoverReason("Provider returned error")).toBeNull();
expect(classifyFailoverReason("Unknown error")).toBeNull();
expect(classifyFailoverReason("LLM request failed with an unknown error.")).toBeNull();
expect(classifyFailoverReason("Provider returned error", { provider: "anthropic" })).toBeNull();
expect(classifyFailoverReason("Key limit exceeded")).toBeNull();
});
});

View File

@@ -524,8 +524,11 @@ export function isTransientHttpError(raw: string): boolean {
export function classifyFailoverReasonFromHttpStatus(
status: number | undefined,
message?: string,
opts?: { provider?: string },
): FailoverReason | null {
const messageClassification = message ? classifyFailoverClassificationFromMessage(message) : null;
const messageClassification = message
? classifyFailoverClassificationFromMessage(message, opts?.provider)
: null;
return failoverReasonFromClassification(
classifyFailoverClassificationFromHttpStatus(status, message, messageClassification),
);
@@ -630,13 +633,27 @@ function classifyFailoverReasonFromCode(raw: string | undefined): FailoverReason
}
}
function isAnthropicProvider(provider?: string): boolean {
function isProvider(provider: string | undefined, match: string): boolean {
const normalized = provider?.trim().toLowerCase();
return Boolean(normalized && normalized.includes("anthropic"));
return Boolean(normalized && normalized.includes(match));
}
function isAnthropicGenericUnknownError(raw: string, provider?: string): boolean {
return isAnthropicProvider(provider) && raw.toLowerCase().includes("an unknown error occurred");
return (
isProvider(provider, "anthropic") && raw.toLowerCase().includes("an unknown error occurred")
);
}
function isOpenRouterProviderReturnedError(raw: string, provider?: string): boolean {
return (
isProvider(provider, "openrouter") && raw.toLowerCase().includes("provider returned error")
);
}
function isOpenRouterKeyLimitExceededError(raw: string, provider?: string): boolean {
return (
isProvider(provider, "openrouter") && /\bkey\s+limit\s*(?:exceeded|reached|hit)\b/i.test(raw)
);
}
function classifyFailoverClassificationFromMessage(
@@ -662,6 +679,9 @@ function classifyFailoverClassificationFromMessage(
if (reasonFrom402Text) {
return toReasonClassification(reasonFrom402Text);
}
if (isOpenRouterKeyLimitExceededError(raw, provider)) {
return toReasonClassification("billing");
}
if (isPeriodicUsageLimitErrorMessage(raw)) {
return toReasonClassification(isBillingErrorMessage(raw) ? "billing" : "rate_limit");
}
@@ -693,6 +713,9 @@ function classifyFailoverClassificationFromMessage(
if (isAnthropicGenericUnknownError(raw, provider)) {
return toReasonClassification("timeout");
}
if (isOpenRouterProviderReturnedError(raw, provider)) {
return toReasonClassification("timeout");
}
if (isServerErrorMessage(raw)) {
return toReasonClassification("timeout");
}

View File

@@ -119,7 +119,6 @@ const ERROR_PATTERNS = {
"insufficient balance",
"insufficient usd or diem balance",
/requires?\s+more\s+credits/i,
/\bkey\s+limit\s*(?:exceeded|reached|hit)\b/i,
],
authPermanent: HIGH_CONFIDENCE_AUTH_PERMANENT_PATTERNS,
auth: [...AMBIGUOUS_AUTH_ERROR_PATTERNS, ...COMMON_AUTH_ERROR_PATTERNS],

View File

@@ -1057,7 +1057,7 @@ export async function runEmbeddedPiAgent(
modelId,
});
const promptFailoverFailure =
promptFailoverReason !== null || isFailoverErrorMessage(errorText);
promptFailoverReason !== null || isFailoverErrorMessage(errorText, { provider });
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
const failedPromptProfileId = lastProfileId;
const logPromptFailoverDecision = createFailoverDecisionLogger({

View File

@@ -273,7 +273,9 @@ export function createEmbeddedRunAuthController(params: {
}) ?? "unknown"
);
}
const classified = classifyFailoverReason(failoverParams.message);
const classified = classifyFailoverReason(failoverParams.message, {
provider: params.getProvider(),
});
return classified ?? "auth";
};
@@ -475,10 +477,10 @@ export function createEmbeddedRunAuthController(params: {
if (!params.getRuntimeAuthState() || retried) {
return false;
}
if (!isFailoverErrorMessage(errorText)) {
if (!isFailoverErrorMessage(errorText, { provider: params.getProvider() })) {
return false;
}
if (classifyFailoverReason(errorText) !== "auth") {
if (classifyFailoverReason(errorText, { provider: params.getProvider() }) !== "auth") {
return false;
}
try {