diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d7f222fe10..85ad205ff0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ Docs: https://docs.openclaw.ai - Signal/config validation: add `channels.signal.groups` schema support so per-group `requireMention`, `tools`, and `toolsBySender` overrides no longer get rejected during config validation. (#27199) Thanks @unisone. - Config/discovery: accept `discovery.wideArea.domain` in strict config validation so unicast DNS-SD gateway configs no longer fail with an unrecognized-key error. (#35615) Thanks @ingyukoh. - Telegram/media errors: redact Telegram file URLs before building media fetch errors so failed inbound downloads do not leak bot tokens into logs. Thanks @space08. +- Agents/failover: normalize abort-wrapped `429 RESOURCE_EXHAUSTED` provider failures before abort short-circuiting so wrapped Google/Vertex rate limits continue across configured fallback models, including the embedded runner prompt-error path. (#39820) Thanks @lupuletic. ## 2026.3.12 diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index 1ddd1d9ceef..38e3530f011 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -364,6 +364,23 @@ describe("failover-error", () => { expect(isTimeoutError(err)).toBe(true); }); + it("classifies abort-wrapped RESOURCE_EXHAUSTED as rate_limit", () => { + const err = Object.assign(new Error("request aborted"), { + name: "AbortError", + cause: { + error: { + code: 429, + message: GEMINI_RESOURCE_EXHAUSTED_MESSAGE, + status: "RESOURCE_EXHAUSTED", + }, + }, + }); + + expect(resolveFailoverReasonFromError(err)).toBe("rate_limit"); + expect(coerceToFailoverError(err)?.reason).toBe("rate_limit"); + expect(coerceToFailoverError(err)?.status).toBe(429); + }); + it("coerces failover-worthy errors into FailoverError with metadata", () => { const err = coerceToFailoverError("credit balance too low", { provider: "anthropic", diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 205f12ee18b..dd482310a2b 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -68,20 +68,36 @@ export function resolveFailoverStatus(reason: FailoverReason): number | undefine } } -function getStatusCode(err: unknown): number | undefined { +function findErrorProperty( + err: unknown, + reader: (candidate: unknown) => T | undefined, + seen: Set = new Set(), +): T | undefined { + const direct = reader(err); + if (direct !== undefined) { + return direct; + } + if (!err || typeof err !== "object") { + return undefined; + } + if (seen.has(err)) { + return undefined; + } + seen.add(err); + const candidate = err as { error?: unknown; cause?: unknown }; + return ( + findErrorProperty(candidate.error, reader, seen) ?? + findErrorProperty(candidate.cause, reader, seen) + ); +} + +function readDirectStatusCode(err: unknown): number | undefined { if (!err || typeof err !== "object") { return undefined; } - // Dig into nested `err.error` shapes (e.g. Google Vertex abort wrappers) - const nestedError = - "error" in err && err.error && typeof err.error === "object" - ? (err.error as { status?: unknown; code?: unknown }) - : undefined; const candidate = (err as { status?: unknown; statusCode?: unknown }).status ?? - (err as { statusCode?: unknown }).statusCode ?? - nestedError?.code ?? - nestedError?.status; + (err as { statusCode?: unknown }).statusCode; if (typeof candidate === "number") { return candidate; } @@ -91,53 +107,55 @@ function getStatusCode(err: unknown): number | undefined { return undefined; } -function getErrorCode(err: unknown): string | undefined { +function getStatusCode(err: unknown): number | undefined { + return findErrorProperty(err, readDirectStatusCode); +} + +function readDirectErrorCode(err: unknown): string | undefined { if (!err || typeof err !== "object") { return undefined; } - const nestedError = - "error" in err && err.error && typeof err.error === "object" - ? (err.error as { code?: unknown; status?: unknown }) - : undefined; - const candidate = (err as { code?: unknown }).code ?? nestedError?.status ?? nestedError?.code; - if (typeof candidate !== "string") { + const directCode = (err as { code?: unknown }).code; + if (typeof directCode === "string") { + const trimmed = directCode.trim(); + return trimmed ? trimmed : undefined; + } + const status = (err as { status?: unknown }).status; + if (typeof status !== "string" || /^\d+$/.test(status)) { return undefined; } - const trimmed = candidate.trim(); + const trimmed = status.trim(); return trimmed ? trimmed : undefined; } -function getErrorMessage(err: unknown): string { +function getErrorCode(err: unknown): string | undefined { + return findErrorProperty(err, readDirectErrorCode); +} + +function readDirectErrorMessage(err: unknown): string | undefined { if (err instanceof Error) { - return err.message; + return err.message || undefined; } if (typeof err === "string") { - return err; + return err || undefined; } if (typeof err === "number" || typeof err === "boolean" || typeof err === "bigint") { return String(err); } if (typeof err === "symbol") { - return err.description ?? ""; + return err.description ?? undefined; } if (err && typeof err === "object") { const message = (err as { message?: unknown }).message; if (typeof message === "string") { - return message; - } - // Extract message from nested `err.error.message` (e.g. Google Vertex wrappers) - const nestedMessage = - "error" in err && - err.error && - typeof err.error === "object" && - typeof (err.error as { message?: unknown }).message === "string" - ? ((err.error as { message: string }).message ?? "") - : ""; - if (nestedMessage) { - return nestedMessage; + return message || undefined; } } - return ""; + return undefined; +} + +function getErrorMessage(err: unknown): string { + return findErrorProperty(err, readDirectErrorMessage) ?? ""; } function getErrorCause(err: unknown): unknown { diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 7b7435b1bcc..a351730521f 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -2,8 +2,8 @@ import os from "node:os"; import path from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; -import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js"; import type { AuthProfileStore } from "./auth-profiles.js"; +import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js"; import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js"; // Mock auth-profiles module — must be before importing model-fallback @@ -395,14 +395,6 @@ describe("runWithModelFallback – probe logic", () => { // All three candidates must be attempted — the abort must not short-circuit expect(run).toHaveBeenCalledTimes(3); - // Verify the primary error is classified as rate_limit, not timeout — the - // cause chain (RESOURCE_EXHAUSTED) must override the parent AbortError message. - try { - await runWithModelFallback({ cfg, provider: "google", model: "gemini-3-flash-preview", run }); - } catch (err) { - expect(String(err)).toContain("(rate_limit)"); - expect(String(err)).not.toMatch(/gemini.*\(timeout\)/); - } expect(run).toHaveBeenNthCalledWith(1, "google", "gemini-3-flash-preview", { allowTransientCooldownProbe: true, }); diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts index 3e3d4a83461..dfc2bc0c961 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts @@ -209,9 +209,20 @@ vi.mock("../defaults.js", () => ({ DEFAULT_PROVIDER: "anthropic", })); +export const mockedCoerceToFailoverError = vi.fn(); +export const mockedDescribeFailoverError = vi.fn((err: unknown) => ({ + message: err instanceof Error ? err.message : String(err), + reason: undefined, + status: undefined, + code: undefined, +})); +export const mockedResolveFailoverStatus = vi.fn(); + vi.mock("../failover-error.js", () => ({ FailoverError: class extends Error {}, - resolveFailoverStatus: vi.fn(), + coerceToFailoverError: mockedCoerceToFailoverError, + describeFailoverError: mockedDescribeFailoverError, + resolveFailoverStatus: mockedResolveFailoverStatus, })); vi.mock("./lanes.js", () => ({ diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index b9f7707c0b6..8458e840e70 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -9,7 +9,12 @@ import { mockOverflowRetrySuccess, queueOverflowAttemptWithOversizedToolOutput, } from "./run.overflow-compaction.fixture.js"; -import { mockedGlobalHookRunner } from "./run.overflow-compaction.mocks.shared.js"; +import { + mockedCoerceToFailoverError, + mockedDescribeFailoverError, + mockedGlobalHookRunner, + mockedResolveFailoverStatus, +} from "./run.overflow-compaction.mocks.shared.js"; import { mockedContextEngine, mockedCompactDirect, @@ -25,6 +30,9 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { vi.clearAllMocks(); mockedRunEmbeddedAttempt.mockReset(); mockedCompactDirect.mockReset(); + mockedCoerceToFailoverError.mockReset(); + mockedDescribeFailoverError.mockReset(); + mockedResolveFailoverStatus.mockReset(); mockedSessionLikelyHasOversizedToolResults.mockReset(); mockedTruncateOversizedToolResultsInSession.mockReset(); mockedGlobalHookRunner.runBeforeAgentStart.mockReset(); @@ -36,6 +44,13 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { compacted: false, reason: "nothing to compact", }); + mockedCoerceToFailoverError.mockReturnValue(null); + mockedDescribeFailoverError.mockImplementation((err: unknown) => ({ + message: err instanceof Error ? err.message : String(err), + reason: undefined, + status: undefined, + code: undefined, + })); mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false); mockedTruncateOversizedToolResultsInSession.mockResolvedValue({ truncated: false, @@ -255,4 +270,57 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { expect(result.meta.error?.kind).toBe("retry_limit"); expect(result.payloads?.[0]?.isError).toBe(true); }); + + it("normalizes abort-wrapped prompt errors before handing off to model fallback", async () => { + const promptError = Object.assign(new Error("request aborted"), { + name: "AbortError", + cause: { + error: { + code: 429, + message: "Resource has been exhausted (e.g. check quota).", + status: "RESOURCE_EXHAUSTED", + }, + }, + }); + const normalized = Object.assign(new Error("Resource has been exhausted (e.g. check quota)."), { + name: "FailoverError", + reason: "rate_limit", + status: 429, + }); + + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError })); + mockedCoerceToFailoverError.mockReturnValueOnce(normalized); + mockedDescribeFailoverError.mockImplementation((err: unknown) => ({ + message: err instanceof Error ? err.message : String(err), + reason: err === normalized ? "rate_limit" : undefined, + status: err === normalized ? 429 : undefined, + code: undefined, + })); + mockedResolveFailoverStatus.mockReturnValueOnce(429); + + await expect( + runEmbeddedPiAgent({ + ...overflowBaseRunParams, + cfg: { + agents: { + defaults: { + model: { + fallbacks: ["openai/gpt-5.2"], + }, + }, + }, + }, + }), + ).rejects.toBe(normalized); + + expect(mockedCoerceToFailoverError).toHaveBeenCalledWith( + promptError, + expect.objectContaining({ + provider: "anthropic", + model: "test-model", + profileId: "test-profile", + }), + ); + expect(mockedResolveFailoverStatus).toHaveBeenCalledWith("rate_limit"); + }); });