mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 02:00:43 +00:00
* fix(agents,failover): propagate sessionId/lane/provider attribution through FailoverError Adds optional `sessionId` and `lane` fields to `FailoverError` and threads them — together with the existing `provider`, `model`, `profileId` — through `describeFailoverError` and `coerceToFailoverError` context, so structured error log ingestion can attribute exhausted-fallback wrapper errors back to the originating request instead of dropping the per-profile metadata when the final wrapper is built. Fixes #42713. * fix: preserve failover error attribution --------- Co-authored-by: Altay <altay@uinaf.dev>
1012 lines
35 KiB
TypeScript
1012 lines
35 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
||
import {
|
||
coerceToFailoverError,
|
||
describeFailoverError,
|
||
FailoverError,
|
||
isTimeoutError,
|
||
resolveFailoverReasonFromError,
|
||
resolveFailoverStatus,
|
||
} from "./failover-error.js";
|
||
import { classifyFailoverSignal } from "./pi-embedded-helpers/errors.js";
|
||
import { SessionWriteLockTimeoutError } from "./session-write-lock-error.js";
|
||
|
||
// OpenAI 429 example shape: https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors
|
||
const OPENAI_RATE_LIMIT_MESSAGE =
|
||
"Rate limit reached for gpt-4.1-mini in organization org_test on requests per min. Limit: 3.000000 / min. Current: 3.000000 / min.";
|
||
// Anthropic overloaded_error example shape: https://docs.anthropic.com/en/api/errors
|
||
const ANTHROPIC_OVERLOADED_PAYLOAD =
|
||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_test"}';
|
||
// Gemini RESOURCE_EXHAUSTED troubleshooting example: https://ai.google.dev/gemini-api/docs/troubleshooting
|
||
const GEMINI_RESOURCE_EXHAUSTED_MESSAGE =
|
||
"RESOURCE_EXHAUSTED: Resource has been exhausted (e.g. check quota).";
|
||
// OpenRouter 402 billing example: https://openrouter.ai/docs/api-reference/errors
|
||
const OPENROUTER_CREDITS_MESSAGE = "Payment Required: insufficient credits";
|
||
const OPENROUTER_MODEL_NOT_FOUND_PAYLOAD =
|
||
'{"error":{"message":"Healer Alpha was a stealth model revealed on March 18th as an early testing version of MiMo-V2-Omni. Find it here: https://openrouter.ai/xiaomi/mimo-v2-omni","code":404},"user_id":"user_33GTyP8uDSYYbaeBO48AGHXyuMC"}';
|
||
const TOGETHER_MONTHLY_SPEND_CAP_MESSAGE =
|
||
"The account associated with this API key has reached its maximum allowed monthly spending limit.";
|
||
// Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400:
|
||
// https://github.com/openclaw/openclaw/issues/23440
|
||
const INSUFFICIENT_QUOTA_PAYLOAD =
|
||
'{"type":"error","error":{"type":"insufficient_quota","message":"Your account has insufficient quota balance to run this request."}}';
|
||
// Issue-backed ZhipuAI/GLM quota-exhausted log from #33785:
|
||
// https://github.com/openclaw/openclaw/issues/33785
|
||
const ZHIPUAI_WEEKLY_MONTHLY_LIMIT_EXHAUSTED_MESSAGE =
|
||
"LLM error 1310: Weekly/Monthly Limit Exhausted. Your limit will reset at 2026-03-06 22:19:54 (request_id: 20260303141547610b7f574d1b44cb)";
|
||
// AWS Bedrock 429 ThrottlingException / 503 ServiceUnavailable:
|
||
// https://docs.aws.amazon.com/bedrock/latest/userguide/troubleshooting-api-error-codes.html
|
||
const BEDROCK_THROTTLING_EXCEPTION_MESSAGE =
|
||
"ThrottlingException: Your request was denied due to exceeding the account quotas for Amazon Bedrock.";
|
||
const BEDROCK_SERVICE_UNAVAILABLE_MESSAGE =
|
||
"ServiceUnavailable: The service is temporarily unable to handle the request.";
|
||
// Groq error codes examples: https://console.groq.com/docs/errors
|
||
const GROQ_TOO_MANY_REQUESTS_MESSAGE =
|
||
"429 Too Many Requests: Too many requests were sent in a given timeframe.";
|
||
const GROQ_SERVICE_UNAVAILABLE_MESSAGE =
|
||
"503 Service Unavailable: The server is temporarily unable to handle the request due to overloading or maintenance.";
|
||
|
||
describe("failover-error", () => {
|
||
it("infers failover reason from HTTP status", () => {
|
||
expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
|
||
// Anthropic Claude Max plan surfaces rate limits as HTTP 402 (#30484)
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "HTTP 402: request reached organization usage limit, try again later",
|
||
}),
|
||
).toBe("rate_limit");
|
||
// Explicit billing messages on 402 stay classified as billing
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "insufficient credits — please top up your account",
|
||
}),
|
||
).toBe("billing");
|
||
// Ambiguous "quota exceeded" + billing signal → billing wins
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "HTTP 402: You have exceeded your current quota. Please add more credits.",
|
||
}),
|
||
).toBe("billing");
|
||
expect(resolveFailoverReasonFromError({ statusCode: "429" })).toBe("rate_limit");
|
||
expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
|
||
expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 410 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 499 })).toBe("timeout");
|
||
// 400/422 with no body returns null — avoids triggering a compaction loop
|
||
// when the provider returns an empty or wrapper-only 400/422 (e.g.
|
||
// transient proxy issue).
|
||
expect(resolveFailoverReasonFromError({ status: 400 })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ status: 422 })).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 400,
|
||
message: "400 status code (no body)",
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "HTTP 422: No body",
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "HTTP 422: No response body",
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "Error: HTTP 422: No response body",
|
||
}),
|
||
).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ message: "400 status code (no body)" })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No body" })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ message: "HTTP 422: No response body" })).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "outer wrapper",
|
||
cause: {
|
||
status: 422,
|
||
message: "HTTP 422: No response body",
|
||
},
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "check open ai req parameter error",
|
||
cause: {
|
||
status: 422,
|
||
message: "HTTP 422: No response body",
|
||
},
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "check open ai req parameter error",
|
||
cause: new Error("No response body"),
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "Unprocessable Entity",
|
||
error: {
|
||
message: "HTTP 422: No response body",
|
||
},
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "Unprocessable Entity",
|
||
cause: {
|
||
message: "Unprocessable Entity",
|
||
error: {
|
||
message: "HTTP 422: No response body",
|
||
},
|
||
},
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
error: {
|
||
message: "missing required property",
|
||
},
|
||
cause: {},
|
||
}),
|
||
).toBe("format");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
error: {
|
||
message: "missing required property",
|
||
},
|
||
cause: {
|
||
message: "HTTP 422: No response body",
|
||
},
|
||
}),
|
||
).toBe("format");
|
||
// Transient server errors (500/502/503/504) should trigger failover as timeout.
|
||
expect(resolveFailoverReasonFromError({ status: 500 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ status: 521 })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ status: 522 })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ status: 523 })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ status: 524 })).toBeNull();
|
||
expect(resolveFailoverReasonFromError({ status: 529 })).toBe("overloaded");
|
||
});
|
||
|
||
it("stops on cyclic cause chains", () => {
|
||
const first: { cause?: unknown } = {};
|
||
const second: { cause?: unknown } = { cause: first };
|
||
first.cause = second;
|
||
|
||
expect(resolveFailoverReasonFromError(first)).toBeNull();
|
||
});
|
||
|
||
it("treats session-specific HTTP 410s differently from generic 410s", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 410,
|
||
message: "session not found",
|
||
}),
|
||
).toBe("session_expired");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "HTTP 410: No body",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "HTTP 410: conversation expired",
|
||
}),
|
||
).toBe("session_expired");
|
||
});
|
||
|
||
it("preserves explicit auth and billing signals on HTTP 410", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 410,
|
||
message: "invalid_api_key",
|
||
}),
|
||
).toBe("auth");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 410,
|
||
message: "authentication failed",
|
||
}),
|
||
).toBe("auth");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 410,
|
||
message: "insufficient credits",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("classifies documented provider error shapes at the error boundary", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 429,
|
||
message: OPENAI_RATE_LIMIT_MESSAGE,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 529,
|
||
message: ANTHROPIC_OVERLOADED_PAYLOAD,
|
||
}),
|
||
).toBe("overloaded");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 499,
|
||
message: ANTHROPIC_OVERLOADED_PAYLOAD,
|
||
}),
|
||
).toBe("overloaded");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 429,
|
||
message: GEMINI_RESOURCE_EXHAUSTED_MESSAGE,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: OPENROUTER_CREDITS_MESSAGE,
|
||
}),
|
||
).toBe("billing");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 429,
|
||
message: BEDROCK_THROTTLING_EXCEPTION_MESSAGE,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 503,
|
||
message: BEDROCK_SERVICE_UNAVAILABLE_MESSAGE,
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 429,
|
||
message: GROQ_TOO_MANY_REQUESTS_MESSAGE,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 503,
|
||
message: GROQ_SERVICE_UNAVAILABLE_MESSAGE,
|
||
}),
|
||
).toBe("overloaded");
|
||
});
|
||
|
||
it("classifies OpenRouter no-endpoints 404s as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 404,
|
||
message: "No endpoints found for deepseek/deepseek-r1:free.",
|
||
}),
|
||
).toBe("model_not_found");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "404 No endpoints found for deepseek/deepseek-r1:free.",
|
||
}),
|
||
).toBe("model_not_found");
|
||
});
|
||
|
||
it("classifies JSON-wrapped OpenRouter stealth-model 404s as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: OPENROUTER_MODEL_NOT_FOUND_PAYLOAD,
|
||
}),
|
||
).toBe("model_not_found");
|
||
});
|
||
|
||
it("classifies generic model-does-not-exist messages as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "The model gpt-foo does not exist.",
|
||
}),
|
||
).toBe("model_not_found");
|
||
});
|
||
|
||
it("does not classify generic access errors as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "The deployment does not exist or you do not have access.",
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("does not classify generic deprecation transition messages as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "The endpoint has been deprecated. Transition to v2 API for continued access.",
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("classifies model-scoped deprecation transition messages as model_not_found", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message:
|
||
"404 The free model has been deprecated. Transition to qwen/qwen3.6-plus for continued paid access.",
|
||
}),
|
||
).toBe("model_not_found");
|
||
});
|
||
|
||
it("keeps status-only 503s conservative unless the payload is clearly overloaded", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 503,
|
||
message: "Internal database error",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 503,
|
||
message: '{"error":{"message":"The model is overloaded. Please try later"}}',
|
||
}),
|
||
).toBe("overloaded");
|
||
});
|
||
|
||
it("does not classify session lock wait errors as model timeout failover", () => {
|
||
const sessionLockError = new SessionWriteLockTimeoutError({
|
||
timeoutMs: 10_000,
|
||
owner: "pid=37121",
|
||
lockPath: "/tmp/openclaw/session.jsonl.lock",
|
||
});
|
||
expect(resolveFailoverReasonFromError(sessionLockError)).toBeNull();
|
||
expect(isTimeoutError(sessionLockError)).toBe(false);
|
||
|
||
const wrappedLockError = Object.assign(new Error("operation timed out"), {
|
||
name: "AbortError",
|
||
cause: sessionLockError,
|
||
});
|
||
expect(resolveFailoverReasonFromError(wrappedLockError)).toBeNull();
|
||
expect(isTimeoutError(wrappedLockError)).toBe(false);
|
||
|
||
const abortWrappedLockError = Object.assign(new Error("request was aborted"), {
|
||
name: "AbortError",
|
||
cause: sessionLockError,
|
||
});
|
||
expect(resolveFailoverReasonFromError(abortWrappedLockError)).toBeNull();
|
||
expect(isTimeoutError(abortWrappedLockError)).toBe(false);
|
||
});
|
||
|
||
it("keeps explicit provider failover metadata authoritative over nested session lock text", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 429,
|
||
code: "RESOURCE_EXHAUSTED",
|
||
message: "upstream quota pressure",
|
||
cause: new SessionWriteLockTimeoutError({
|
||
timeoutMs: 10_000,
|
||
owner: "pid=37121",
|
||
lockPath: "/tmp/openclaw/session.jsonl.lock",
|
||
}),
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("keeps inferred HTTP failover metadata authoritative over nested session lock text", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "HTTP 429: upstream quota pressure",
|
||
cause: new SessionWriteLockTimeoutError({
|
||
timeoutMs: 10_000,
|
||
owner: "pid=37121",
|
||
lockPath: "/tmp/openclaw/session.jsonl.lock",
|
||
}),
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("does not treat generic abort codes as explicit failover metadata over nested session lock text", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
name: "AbortError",
|
||
code: "ABORT_ERR",
|
||
message: "The operation was aborted",
|
||
cause: new SessionWriteLockTimeoutError({
|
||
timeoutMs: 10_000,
|
||
owner: "pid=37121",
|
||
lockPath: "/tmp/openclaw/session.jsonl.lock",
|
||
}),
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("does not let cause-based failover classification bypass wrapper session lock suppression", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "wrapper",
|
||
reason: new SessionWriteLockTimeoutError({
|
||
timeoutMs: 10_000,
|
||
owner: "pid=37121",
|
||
lockPath: "/tmp/openclaw/session.jsonl.lock",
|
||
}),
|
||
cause: new Error("operation timed out"),
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("classifies bare pi-ai stream wrapper as timeout regardless of provider (#71620)", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "An unknown error occurred",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "anthropic",
|
||
message: "An unknown error occurred",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "google",
|
||
message: "An unknown error occurred",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "openrouter",
|
||
message: "An unknown error occurred",
|
||
}),
|
||
).toBe("timeout");
|
||
});
|
||
|
||
it("classifies openrouter-scoped upstream errors for failover", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "openrouter",
|
||
message: "Provider returned error",
|
||
}),
|
||
).toBe("timeout");
|
||
});
|
||
|
||
it("does not classify openrouter-scoped upstream errors without the matching provider", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "Provider returned error",
|
||
}),
|
||
).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "anthropic",
|
||
message: "Provider returned error",
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("treats 400 insufficient_quota payloads as billing instead of format", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 400,
|
||
message: INSUFFICIENT_QUOTA_PAYLOAD,
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("lets structured HTTP 400 payloads reuse provider-specific message classification", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 400,
|
||
message: "ThrottlingException: Too many concurrent requests",
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("does not misclassify structured HTTP 400 context overflow payloads as format", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 400,
|
||
message: "INVALID_ARGUMENT: input exceeds the maximum number of tokens",
|
||
}),
|
||
).toBeNull();
|
||
});
|
||
|
||
it("keeps context overflow first-class in the shared signal classifier", () => {
|
||
expect(
|
||
classifyFailoverSignal({
|
||
status: 400,
|
||
message: "INVALID_ARGUMENT: input exceeds the maximum number of tokens",
|
||
}),
|
||
).toEqual({ kind: "context_overflow" });
|
||
expect(
|
||
classifyFailoverSignal({
|
||
message: "prompt is too long: 150000 tokens > 128000 maximum",
|
||
}),
|
||
).toEqual({ kind: "context_overflow" });
|
||
});
|
||
|
||
it("treats invalid-model HTTP 400 payloads as model_not_found instead of format", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "openrouter/__invalid_test_model__ is not a valid model ID",
|
||
}),
|
||
).toBe("model_not_found");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 400,
|
||
message: "HTTP 400: openrouter/__invalid_test_model__ is not a valid model ID",
|
||
}),
|
||
).toBe("model_not_found");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "invalid model: openrouter/__invalid_test_model__",
|
||
}),
|
||
).toBe("model_not_found");
|
||
});
|
||
|
||
it("treats HTTP 422 as format error", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "check open ai req parameter error",
|
||
}),
|
||
).toBe("format");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "Unprocessable Entity",
|
||
}),
|
||
).toBe("format");
|
||
});
|
||
|
||
it("treats 422 with billing message as billing instead of format", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 422,
|
||
message: "insufficient credits",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("classifies OpenRouter 'requires more credits' text as billing", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "This model requires more credits to use",
|
||
}),
|
||
).toBe("billing");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "This model require more credits",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("treats zhipuai weekly/monthly limit exhausted as rate_limit", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: ZHIPUAI_WEEKLY_MONTHLY_LIMIT_EXHAUSTED_MESSAGE,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "LLM error: monthly limit reached",
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("treats Chinese provider network/server errors as timeout for failover", () => {
|
||
// ZhipuAI/GLM error code 1234: "网络错误" — real production error
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message:
|
||
"LLM error 1234: 网络错误,错误id:202603281427587491f4467f1c4712,请联系客服。 (request_id: 202603281427587491f4467f1c4712)",
|
||
}),
|
||
).toBe("timeout");
|
||
// JSON payload variant
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message:
|
||
'{"error":{"code":"1234","message":"网络错误,错误id:abc123,请联系客服。"},"request_id":"abc123"}',
|
||
}),
|
||
).toBe("timeout");
|
||
// Generic Chinese server errors
|
||
expect(resolveFailoverReasonFromError({ message: "系统错误,请稍后重试" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "服务器内部错误" })).toBe("timeout");
|
||
});
|
||
|
||
it("treats Chinese provider auth errors as auth for failover", () => {
|
||
// ZhipuAI/GLM 403: "您无权访问glm-5.1" — real production error
|
||
expect(resolveFailoverReasonFromError({ message: "403 您无权访问glm-5.1。" })).toBe("auth");
|
||
expect(resolveFailoverReasonFromError({ message: "认证失败" })).toBe("auth");
|
||
expect(resolveFailoverReasonFromError({ message: "鉴权失败,请检查API Key" })).toBe("auth");
|
||
});
|
||
|
||
it("treats overloaded provider payloads as overloaded", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: ANTHROPIC_OVERLOADED_PAYLOAD,
|
||
}),
|
||
).toBe("overloaded");
|
||
});
|
||
|
||
it("keeps raw-text 402 weekly/monthly limit errors in billing", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "402 Payment Required: Weekly/Monthly Limit Exhausted",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("keeps temporary 402 spend limits retryable without downgrading explicit billing", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "Monthly spend limit reached. Please visit your billing settings.",
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: "Workspace spend limit reached. Contact your admin.",
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message:
|
||
"You have reached your subscription quota limit. Please wait for automatic quota refresh in the rolling time window, upgrade to a higher plan, or use a Pay-As-You-Go API Key for unlimited access. Learn more: https://zenmux.ai/docs/guide/subscription.html",
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`,
|
||
}),
|
||
).toBe("billing");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message: TOGETHER_MONTHLY_SPEND_CAP_MESSAGE,
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("keeps raw 402 wrappers aligned with status-split temporary spend limits", () => {
|
||
const message = "Monthly spend limit reached. Please visit your billing settings.";
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: `402 Payment Required: ${message}`,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message,
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("keeps explicit 402 rate-limit wrappers aligned with status-split payloads", () => {
|
||
const message = "rate limit exceeded";
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: `HTTP 402 Payment Required: ${message}`,
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message,
|
||
}),
|
||
).toBe("rate_limit");
|
||
});
|
||
|
||
it("keeps plan-upgrade 402 wrappers aligned with status-split billing payloads", () => {
|
||
const message = "Your usage limit has been reached. Please upgrade your plan.";
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: `HTTP 402 Payment Required: ${message}`,
|
||
}),
|
||
).toBe("billing");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 402,
|
||
message,
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("infers format errors from error messages", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "invalid request format: messages.1.content.1.tool_use.id",
|
||
}),
|
||
).toBe("format");
|
||
});
|
||
|
||
it("infers timeout from common node error codes", () => {
|
||
expect(resolveFailoverReasonFromError({ code: "ETIMEDOUT" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "ECONNREFUSED" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "ECONNRESET" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "EAI_AGAIN" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "EHOSTUNREACH" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "EHOSTDOWN" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "ENETRESET" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "ENETUNREACH" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ code: "EPIPE" })).toBe("timeout");
|
||
});
|
||
|
||
it("infers rate-limit and overload from symbolic error codes", () => {
|
||
expect(resolveFailoverReasonFromError({ code: "RESOURCE_EXHAUSTED" })).toBe("rate_limit");
|
||
expect(resolveFailoverReasonFromError({ code: "THROTTLING_EXCEPTION" })).toBe("rate_limit");
|
||
expect(resolveFailoverReasonFromError({ code: "OVERLOADED_ERROR" })).toBe("overloaded");
|
||
});
|
||
|
||
it("infers timeout from abort/error stop-reason messages", () => {
|
||
expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: abort" })).toBe(
|
||
"timeout",
|
||
);
|
||
expect(resolveFailoverReasonFromError({ message: "Unhandled stop reason: error" })).toBe(
|
||
"timeout",
|
||
);
|
||
expect(resolveFailoverReasonFromError({ message: "stop reason: abort" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "stop reason: error" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "reason: abort" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "reason: error" })).toBe("timeout");
|
||
expect(
|
||
resolveFailoverReasonFromError({ message: "Unhandled stop reason: network_error" }),
|
||
).toBe("timeout");
|
||
});
|
||
|
||
it("infers timeout from connection/network error messages", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "model_cooldown: All credentials for model gpt-5 are cooling down",
|
||
}),
|
||
).toBe("rate_limit");
|
||
expect(resolveFailoverReasonFromError({ message: "Connection error." })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "fetch failed" })).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "Network error: ECONNREFUSED" })).toBe(
|
||
"timeout",
|
||
);
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
message: "dial tcp: lookup api.example.com: no such host (ENOTFOUND)",
|
||
}),
|
||
).toBe("timeout");
|
||
expect(resolveFailoverReasonFromError({ message: "temporary dns failure EAI_AGAIN" })).toBe(
|
||
"timeout",
|
||
);
|
||
});
|
||
|
||
it("treats AbortError reason=abort as timeout", () => {
|
||
const err = Object.assign(new Error("aborted"), {
|
||
name: "AbortError",
|
||
reason: "reason: abort",
|
||
});
|
||
expect(isTimeoutError(err)).toBe(true);
|
||
});
|
||
|
||
it("classifies abort-wrapped RESOURCE_EXHAUSTED as rate_limit", () => {
|
||
const err = Object.assign(new Error("request aborted"), {
|
||
name: "AbortError",
|
||
cause: {
|
||
error: {
|
||
code: 429,
|
||
message: GEMINI_RESOURCE_EXHAUSTED_MESSAGE,
|
||
status: "RESOURCE_EXHAUSTED",
|
||
},
|
||
},
|
||
});
|
||
|
||
expect(resolveFailoverReasonFromError(err)).toBe("rate_limit");
|
||
expect(coerceToFailoverError(err)?.reason).toBe("rate_limit");
|
||
expect(coerceToFailoverError(err)?.status).toBe(429);
|
||
});
|
||
|
||
it("lets wrapped causes override parent context-overflow classifications", () => {
|
||
const err = new Error("INVALID_ARGUMENT: input exceeds the maximum number of tokens", {
|
||
cause: { code: "RESOURCE_EXHAUSTED" },
|
||
});
|
||
|
||
expect(resolveFailoverReasonFromError(err)).toBe("rate_limit");
|
||
expect(coerceToFailoverError(err)?.reason).toBe("rate_limit");
|
||
});
|
||
|
||
it("coerces failover-worthy errors into FailoverError with metadata", () => {
|
||
const err = coerceToFailoverError("credit balance too low", {
|
||
provider: "anthropic",
|
||
model: "claude-opus-4-6",
|
||
});
|
||
expect(err?.name).toBe("FailoverError");
|
||
expect(err?.reason).toBe("billing");
|
||
expect(err?.status).toBe(402);
|
||
expect(err?.provider).toBe("anthropic");
|
||
expect(err?.model).toBe("claude-opus-4-6");
|
||
});
|
||
|
||
it("preserves raw provider error text for diagnostic logs", () => {
|
||
const err = new FailoverError("LLM request failed: provider rejected the request schema.", {
|
||
reason: "format",
|
||
provider: "openai",
|
||
model: "gpt-5.4",
|
||
status: 400,
|
||
rawError:
|
||
"400 The following tools cannot be used with reasoning.effort 'minimal': web_search.",
|
||
});
|
||
|
||
expect(describeFailoverError(err)).toMatchObject({
|
||
message: "LLM request failed: provider rejected the request schema.",
|
||
rawError:
|
||
"400 The following tools cannot be used with reasoning.effort 'minimal': web_search.",
|
||
reason: "format",
|
||
status: 400,
|
||
});
|
||
});
|
||
|
||
it("coerces JSON-wrapped OpenRouter stealth-model 404s into FailoverError", () => {
|
||
const err = coerceToFailoverError(OPENROUTER_MODEL_NOT_FOUND_PAYLOAD, {
|
||
provider: "openrouter",
|
||
model: "openrouter/healer-alpha",
|
||
});
|
||
|
||
expect(err?.reason).toBe("model_not_found");
|
||
expect(err?.status).toBe(404);
|
||
});
|
||
|
||
it("maps overloaded to a 503 fallback status", () => {
|
||
expect(resolveFailoverStatus("overloaded")).toBe(503);
|
||
});
|
||
|
||
it("coerces format errors with a 400 status", () => {
|
||
const err = coerceToFailoverError("invalid request format", {
|
||
provider: "google",
|
||
model: "cloud-code-assist",
|
||
});
|
||
expect(err?.reason).toBe("format");
|
||
expect(err?.status).toBe(400);
|
||
});
|
||
|
||
it("401/403 with generic message still returns auth (backward compat)", () => {
|
||
expect(resolveFailoverReasonFromError({ status: 401, message: "Unauthorized" })).toBe("auth");
|
||
expect(resolveFailoverReasonFromError({ status: 403, message: "Forbidden" })).toBe("auth");
|
||
});
|
||
|
||
it("401 with ambiguous auth message returns auth", () => {
|
||
expect(resolveFailoverReasonFromError({ status: 401, message: "invalid_api_key" })).toBe(
|
||
"auth",
|
||
);
|
||
});
|
||
|
||
it("403 with revoked key message returns auth_permanent", () => {
|
||
expect(resolveFailoverReasonFromError({ status: 403, message: "api key revoked" })).toBe(
|
||
"auth_permanent",
|
||
);
|
||
});
|
||
|
||
it("403 OpenRouter 'Key limit exceeded' returns billing (model fallback trigger)", () => {
|
||
// GitHub: openclaw/openclaw#53849 — OpenRouter returns 403 with "Key limit exceeded"
|
||
// when the monthly key spending limit is reached. This must trigger billing failover
|
||
// (model fallback), not generic auth.
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "openrouter",
|
||
status: 403,
|
||
message: "Key limit exceeded",
|
||
}),
|
||
).toBe("billing");
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "openrouter",
|
||
status: 403,
|
||
message: "403 Key limit exceeded (monthly limit)",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("401 billing-style message returns billing instead of generic auth", () => {
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
provider: "openrouter",
|
||
status: 401,
|
||
message: "401 Key limit exceeded (monthly limit)",
|
||
}),
|
||
).toBe("billing");
|
||
});
|
||
|
||
it("does not treat OpenRouter key-limit text as billing without provider context", () => {
|
||
expect(resolveFailoverReasonFromError({ message: "Key limit exceeded" })).toBeNull();
|
||
expect(
|
||
resolveFailoverReasonFromError({
|
||
status: 403,
|
||
message: "403 Key limit exceeded (monthly limit)",
|
||
}),
|
||
).toBe("auth");
|
||
});
|
||
|
||
it("resolveFailoverStatus maps auth_permanent to 403", () => {
|
||
expect(resolveFailoverStatus("auth_permanent")).toBe(403);
|
||
});
|
||
|
||
it("coerces ambiguous auth error into the short auth lane", () => {
|
||
const err = coerceToFailoverError(
|
||
{ status: 401, message: "invalid_api_key" },
|
||
{ provider: "anthropic", model: "claude-opus-4-6" },
|
||
);
|
||
expect(err?.reason).toBe("auth");
|
||
expect(err?.provider).toBe("anthropic");
|
||
});
|
||
|
||
it("403 bare permission_error returns auth", () => {
|
||
expect(resolveFailoverReasonFromError({ status: 403, message: "permission_error" })).toBe(
|
||
"auth",
|
||
);
|
||
});
|
||
|
||
it("permission_error with organization denial stays auth_permanent", () => {
|
||
const err = coerceToFailoverError(
|
||
"HTTP 403 permission_error: OAuth authentication is currently not allowed for this organization.",
|
||
{ provider: "anthropic", model: "claude-opus-4-6" },
|
||
);
|
||
expect(err?.reason).toBe("auth_permanent");
|
||
});
|
||
|
||
it("'not allowed for this organization' classifies as auth_permanent", () => {
|
||
const err = coerceToFailoverError(
|
||
"OAuth authentication is currently not allowed for this organization",
|
||
{ provider: "anthropic", model: "claude-opus-4-6" },
|
||
);
|
||
expect(err?.reason).toBe("auth_permanent");
|
||
});
|
||
|
||
it("describes non-Error values consistently", () => {
|
||
const described = describeFailoverError(123);
|
||
expect(described.message).toBe("123");
|
||
expect(described.reason).toBeUndefined();
|
||
});
|
||
|
||
it("propagates sessionId/lane/provider attribution through FailoverError (#42713)", () => {
|
||
const err = new FailoverError("all fallbacks exhausted", {
|
||
reason: "rate_limit",
|
||
provider: "anthropic",
|
||
model: "claude-opus-4-6",
|
||
profileId: "profile-2",
|
||
sessionId: "session:browser-abcd",
|
||
lane: "answer",
|
||
status: 429,
|
||
});
|
||
expect(err.sessionId).toBe("session:browser-abcd");
|
||
expect(err.lane).toBe("answer");
|
||
expect(describeFailoverError(err)).toMatchObject({
|
||
provider: "anthropic",
|
||
model: "claude-opus-4-6",
|
||
profileId: "profile-2",
|
||
sessionId: "session:browser-abcd",
|
||
lane: "answer",
|
||
reason: "rate_limit",
|
||
status: 429,
|
||
});
|
||
});
|
||
|
||
it("coerceToFailoverError carries sessionId/lane from context (#42713)", () => {
|
||
const err = coerceToFailoverError("rate limit exceeded", {
|
||
provider: "openai",
|
||
model: "gpt-5",
|
||
profileId: "p1",
|
||
sessionId: "session:browser-1234",
|
||
lane: "draft",
|
||
});
|
||
expect(err?.sessionId).toBe("session:browser-1234");
|
||
expect(err?.lane).toBe("draft");
|
||
expect(err?.provider).toBe("openai");
|
||
});
|
||
});
|