fix: harden wrapped rate-limit failover (openclaw#39820) thanks @lupuletic

This commit is contained in:
Darshil
2026-03-13 00:16:12 -07:00
committed by Darshil
parent c1c74f9952
commit e403ed6546
6 changed files with 152 additions and 45 deletions

View File

@@ -74,6 +74,7 @@ Docs: https://docs.openclaw.ai
- Signal/config validation: add `channels.signal.groups` schema support so per-group `requireMention`, `tools`, and `toolsBySender` overrides no longer get rejected during config validation. (#27199) Thanks @unisone.
- Config/discovery: accept `discovery.wideArea.domain` in strict config validation so unicast DNS-SD gateway configs no longer fail with an unrecognized-key error. (#35615) Thanks @ingyukoh.
- Telegram/media errors: redact Telegram file URLs before building media fetch errors so failed inbound downloads do not leak bot tokens into logs. Thanks @space08.
- Agents/failover: normalize abort-wrapped `429 RESOURCE_EXHAUSTED` provider failures before abort short-circuiting so wrapped Google/Vertex rate limits continue across configured fallback models, including the embedded runner prompt-error path. (#39820) Thanks @lupuletic.
## 2026.3.12

View File

@@ -364,6 +364,23 @@ describe("failover-error", () => {
expect(isTimeoutError(err)).toBe(true);
});
it("classifies abort-wrapped RESOURCE_EXHAUSTED as rate_limit", () => {
const err = Object.assign(new Error("request aborted"), {
name: "AbortError",
cause: {
error: {
code: 429,
message: GEMINI_RESOURCE_EXHAUSTED_MESSAGE,
status: "RESOURCE_EXHAUSTED",
},
},
});
expect(resolveFailoverReasonFromError(err)).toBe("rate_limit");
expect(coerceToFailoverError(err)?.reason).toBe("rate_limit");
expect(coerceToFailoverError(err)?.status).toBe(429);
});
it("coerces failover-worthy errors into FailoverError with metadata", () => {
const err = coerceToFailoverError("credit balance too low", {
provider: "anthropic",

View File

@@ -68,20 +68,36 @@ export function resolveFailoverStatus(reason: FailoverReason): number | undefine
}
}
function getStatusCode(err: unknown): number | undefined {
function findErrorProperty<T>(
err: unknown,
reader: (candidate: unknown) => T | undefined,
seen: Set<object> = new Set(),
): T | undefined {
const direct = reader(err);
if (direct !== undefined) {
return direct;
}
if (!err || typeof err !== "object") {
return undefined;
}
if (seen.has(err)) {
return undefined;
}
seen.add(err);
const candidate = err as { error?: unknown; cause?: unknown };
return (
findErrorProperty(candidate.error, reader, seen) ??
findErrorProperty(candidate.cause, reader, seen)
);
}
function readDirectStatusCode(err: unknown): number | undefined {
if (!err || typeof err !== "object") {
return undefined;
}
// Dig into nested `err.error` shapes (e.g. Google Vertex abort wrappers)
const nestedError =
"error" in err && err.error && typeof err.error === "object"
? (err.error as { status?: unknown; code?: unknown })
: undefined;
const candidate =
(err as { status?: unknown; statusCode?: unknown }).status ??
(err as { statusCode?: unknown }).statusCode ??
nestedError?.code ??
nestedError?.status;
(err as { statusCode?: unknown }).statusCode;
if (typeof candidate === "number") {
return candidate;
}
@@ -91,53 +107,55 @@ function getStatusCode(err: unknown): number | undefined {
return undefined;
}
function getErrorCode(err: unknown): string | undefined {
function getStatusCode(err: unknown): number | undefined {
return findErrorProperty(err, readDirectStatusCode);
}
function readDirectErrorCode(err: unknown): string | undefined {
if (!err || typeof err !== "object") {
return undefined;
}
const nestedError =
"error" in err && err.error && typeof err.error === "object"
? (err.error as { code?: unknown; status?: unknown })
: undefined;
const candidate = (err as { code?: unknown }).code ?? nestedError?.status ?? nestedError?.code;
if (typeof candidate !== "string") {
const directCode = (err as { code?: unknown }).code;
if (typeof directCode === "string") {
const trimmed = directCode.trim();
return trimmed ? trimmed : undefined;
}
const status = (err as { status?: unknown }).status;
if (typeof status !== "string" || /^\d+$/.test(status)) {
return undefined;
}
const trimmed = candidate.trim();
const trimmed = status.trim();
return trimmed ? trimmed : undefined;
}
function getErrorMessage(err: unknown): string {
function getErrorCode(err: unknown): string | undefined {
return findErrorProperty(err, readDirectErrorCode);
}
function readDirectErrorMessage(err: unknown): string | undefined {
if (err instanceof Error) {
return err.message;
return err.message || undefined;
}
if (typeof err === "string") {
return err;
return err || undefined;
}
if (typeof err === "number" || typeof err === "boolean" || typeof err === "bigint") {
return String(err);
}
if (typeof err === "symbol") {
return err.description ?? "";
return err.description ?? undefined;
}
if (err && typeof err === "object") {
const message = (err as { message?: unknown }).message;
if (typeof message === "string") {
return message;
}
// Extract message from nested `err.error.message` (e.g. Google Vertex wrappers)
const nestedMessage =
"error" in err &&
err.error &&
typeof err.error === "object" &&
typeof (err.error as { message?: unknown }).message === "string"
? ((err.error as { message: string }).message ?? "")
: "";
if (nestedMessage) {
return nestedMessage;
return message || undefined;
}
}
return "";
return undefined;
}
function getErrorMessage(err: unknown): string {
return findErrorProperty(err, readDirectErrorMessage) ?? "";
}
function getErrorCause(err: unknown): unknown {

View File

@@ -2,8 +2,8 @@ import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js";
import type { AuthProfileStore } from "./auth-profiles.js";
import { registerLogTransport, resetLogger, setLoggerOverride } from "../logging/logger.js";
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
// Mock auth-profiles module — must be before importing model-fallback
@@ -395,14 +395,6 @@ describe("runWithModelFallback probe logic", () => {
// All three candidates must be attempted — the abort must not short-circuit
expect(run).toHaveBeenCalledTimes(3);
// Verify the primary error is classified as rate_limit, not timeout — the
// cause chain (RESOURCE_EXHAUSTED) must override the parent AbortError message.
try {
await runWithModelFallback({ cfg, provider: "google", model: "gemini-3-flash-preview", run });
} catch (err) {
expect(String(err)).toContain("(rate_limit)");
expect(String(err)).not.toMatch(/gemini.*\(timeout\)/);
}
expect(run).toHaveBeenNthCalledWith(1, "google", "gemini-3-flash-preview", {
allowTransientCooldownProbe: true,
});

View File

@@ -209,9 +209,20 @@ vi.mock("../defaults.js", () => ({
DEFAULT_PROVIDER: "anthropic",
}));
export const mockedCoerceToFailoverError = vi.fn();
export const mockedDescribeFailoverError = vi.fn((err: unknown) => ({
message: err instanceof Error ? err.message : String(err),
reason: undefined,
status: undefined,
code: undefined,
}));
export const mockedResolveFailoverStatus = vi.fn();
vi.mock("../failover-error.js", () => ({
FailoverError: class extends Error {},
resolveFailoverStatus: vi.fn(),
coerceToFailoverError: mockedCoerceToFailoverError,
describeFailoverError: mockedDescribeFailoverError,
resolveFailoverStatus: mockedResolveFailoverStatus,
}));
vi.mock("./lanes.js", () => ({

View File

@@ -9,7 +9,12 @@ import {
mockOverflowRetrySuccess,
queueOverflowAttemptWithOversizedToolOutput,
} from "./run.overflow-compaction.fixture.js";
import { mockedGlobalHookRunner } from "./run.overflow-compaction.mocks.shared.js";
import {
mockedCoerceToFailoverError,
mockedDescribeFailoverError,
mockedGlobalHookRunner,
mockedResolveFailoverStatus,
} from "./run.overflow-compaction.mocks.shared.js";
import {
mockedContextEngine,
mockedCompactDirect,
@@ -25,6 +30,9 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
vi.clearAllMocks();
mockedRunEmbeddedAttempt.mockReset();
mockedCompactDirect.mockReset();
mockedCoerceToFailoverError.mockReset();
mockedDescribeFailoverError.mockReset();
mockedResolveFailoverStatus.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedGlobalHookRunner.runBeforeAgentStart.mockReset();
@@ -36,6 +44,13 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
compacted: false,
reason: "nothing to compact",
});
mockedCoerceToFailoverError.mockReturnValue(null);
mockedDescribeFailoverError.mockImplementation((err: unknown) => ({
message: err instanceof Error ? err.message : String(err),
reason: undefined,
status: undefined,
code: undefined,
}));
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
@@ -255,4 +270,57 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
expect(result.meta.error?.kind).toBe("retry_limit");
expect(result.payloads?.[0]?.isError).toBe(true);
});
it("normalizes abort-wrapped prompt errors before handing off to model fallback", async () => {
const promptError = Object.assign(new Error("request aborted"), {
name: "AbortError",
cause: {
error: {
code: 429,
message: "Resource has been exhausted (e.g. check quota).",
status: "RESOURCE_EXHAUSTED",
},
},
});
const normalized = Object.assign(new Error("Resource has been exhausted (e.g. check quota)."), {
name: "FailoverError",
reason: "rate_limit",
status: 429,
});
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError }));
mockedCoerceToFailoverError.mockReturnValueOnce(normalized);
mockedDescribeFailoverError.mockImplementation((err: unknown) => ({
message: err instanceof Error ? err.message : String(err),
reason: err === normalized ? "rate_limit" : undefined,
status: err === normalized ? 429 : undefined,
code: undefined,
}));
mockedResolveFailoverStatus.mockReturnValueOnce(429);
await expect(
runEmbeddedPiAgent({
...overflowBaseRunParams,
cfg: {
agents: {
defaults: {
model: {
fallbacks: ["openai/gpt-5.2"],
},
},
},
},
}),
).rejects.toBe(normalized);
expect(mockedCoerceToFailoverError).toHaveBeenCalledWith(
promptError,
expect.objectContaining({
provider: "anthropic",
model: "test-model",
profileId: "test-profile",
}),
);
expect(mockedResolveFailoverStatus).toHaveBeenCalledWith("rate_limit");
});
});