fix(agents/failover): classify bare pi-ai stream wrapper as timeout regardless of provider (#71620)

This commit is contained in:
willamhou
2026-04-25 23:56:21 +08:00
committed by Peter Steinberger
parent dcdf97685b
commit 4b5c2f9aa3
4 changed files with 62 additions and 25 deletions

View File

@@ -441,13 +441,33 @@ describe("failover-error", () => {
).toBeNull();
});
it("classifies provider-scoped generic upstream errors for failover", () => {
it("classifies bare pi-ai stream wrapper as timeout regardless of provider (#71620)", () => {
expect(
resolveFailoverReasonFromError({
message: "An unknown error occurred",
}),
).toBe("timeout");
expect(
resolveFailoverReasonFromError({
provider: "anthropic",
message: "An unknown error occurred",
}),
).toBe("timeout");
expect(
resolveFailoverReasonFromError({
provider: "google",
message: "An unknown error occurred",
}),
).toBe("timeout");
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
message: "An unknown error occurred",
}),
).toBe("timeout");
});
it("classifies openrouter-scoped upstream errors for failover", () => {
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
@@ -456,18 +476,7 @@ describe("failover-error", () => {
).toBe("timeout");
});
it("does not classify provider-scoped upstream errors without the matching provider", () => {
expect(
resolveFailoverReasonFromError({
message: "An unknown error occurred",
}),
).toBeNull();
expect(
resolveFailoverReasonFromError({
provider: "openrouter",
message: "An unknown error occurred",
}),
).toBeNull();
it("does not classify openrouter-scoped upstream errors without the matching provider", () => {
expect(
resolveFailoverReasonFromError({
message: "Provider returned error",

View File

@@ -739,10 +739,39 @@ describe("classifyFailoverReason", () => {
expect(isFailoverErrorMessage(message)).toBe(true);
});
it("classifies provider-scoped generic upstream messages", () => {
it("classifies bare pi-ai stream wrapper as timeout regardless of provider (#71620)", () => {
// pi-ai providers throw `Error("An unknown error occurred")` provider-agnostically
// when streams end with stopReason "aborted" | "error" with no specific info.
for (const sample of [
"An unknown error occurred",
"an unknown error occurred",
"AN UNKNOWN ERROR OCCURRED",
"An unknown error occurred.",
" An unknown error occurred ",
]) {
expect(classifyFailoverReason(sample)).toBe("timeout");
expect(isFailoverErrorMessage(sample)).toBe(true);
}
expect(classifyFailoverReason("An unknown error occurred", { provider: "anthropic" })).toBe(
"timeout",
);
expect(classifyFailoverReason("An unknown error occurred", { provider: "google" })).toBe(
"timeout",
);
expect(classifyFailoverReason("An unknown error occurred", { provider: "openrouter" })).toBe(
"timeout",
);
});
it("does not match wrapped or unrelated unknown-error phrases as bare wrapper", () => {
// Wrapped messages must not slip into failover-as-timeout via the bare match.
expect(classifyFailoverReason("LLM request failed with an unknown error.")).toBeNull();
expect(
classifyFailoverReason("user reported that an unknown error occurred during sync"),
).toBeNull();
});
it("classifies openrouter-scoped upstream messages", () => {
expect(classifyFailoverReason("Provider returned error", { provider: "openrouter" })).toBe(
"timeout",
);
@@ -751,11 +780,7 @@ describe("classifyFailoverReason", () => {
);
});
it("does not classify provider-scoped generic upstream messages without provider context", () => {
expect(classifyFailoverReason("An unknown error occurred")).toBeNull();
expect(
classifyFailoverReason("An unknown error occurred", { provider: "openrouter" }),
).toBeNull();
it("does not classify openrouter-scoped upstream messages without provider context", () => {
expect(classifyFailoverReason("Provider returned error")).toBeNull();
expect(classifyFailoverReason("Provider returned error", { provider: "anthropic" })).toBeNull();
expect(classifyFailoverReason("Key limit exceeded")).toBeNull();

View File

@@ -753,11 +753,13 @@ function isProvider(provider: string | undefined, match: string): boolean {
return Boolean(normalized && normalized.includes(match));
}
function isAnthropicGenericUnknownError(raw: string, provider?: string): boolean {
return (
isProvider(provider, "anthropic") &&
(normalizeOptionalLowercaseString(raw)?.includes("an unknown error occurred") ?? false)
);
// pi-ai providers throw `Error("An unknown error occurred")` provider-agnostically
// (anthropic, google, vertex, openai-completions, mistral, bedrock, etc.) when a
// stream ends with stopReason === "aborted" | "error" without specific info. Treat
// it as a transient transport failure so the configured fallback chain rotates
// instead of returning the bare string to the user (#71620).
function isGenericUnknownStreamError(raw: string): boolean {
return /^\s*an unknown error occurred\.?\s*$/i.test(raw);
}
function isOpenRouterProviderReturnedError(raw: string, provider?: string): boolean {
@@ -833,7 +835,7 @@ function classifyFailoverClassificationFromMessage(
if (isAuthErrorMessage(raw)) {
return toReasonClassification("auth");
}
if (isAnthropicGenericUnknownError(raw, provider)) {
if (isGenericUnknownStreamError(raw)) {
return toReasonClassification("timeout");
}
if (isOpenRouterProviderReturnedError(raw, provider)) {