From 92648f9ba9d1ba1ee441b805cf6cb17ed9b68358 Mon Sep 17 00:00:00 2001 From: Peter Lee Date: Sun, 8 Mar 2026 01:27:01 -0600 Subject: [PATCH] fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probe (#38533) Merged via squash. Prepared head SHA: 282b9186c6f48fcdbf0c81c49f739e5e9ed2df23 Co-authored-by: xialonglee <22994703+xialonglee@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf --- CHANGELOG.md | 1 + src/agents/failover-error.test.ts | 74 +++++++++++ src/agents/model-fallback.probe.test.ts | 62 +++++++++ src/agents/model-fallback.ts | 28 ++++- ...dded-helpers.isbillingerrormessage.test.ts | 82 ++++++++++++ src/agents/pi-embedded-helpers/errors.ts | 118 +++++++++++++++--- src/agents/pi-embedded-runner/run.ts | 4 +- 7 files changed, 343 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00eefe4e277..2902617108f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -361,6 +361,7 @@ Docs: https://docs.openclaw.ai - Discord/config schema parity: add `channels.discord.agentComponents` to the strict Zod config schema so valid `agentComponents.enabled` settings (root and account-scoped) no longer fail with unrecognized-key validation errors. Landed from contributor PR #39378 by @gambletan. Thanks @gambletan and @thewilloftheshadow. - ACPX/MCP session bootstrap: inject configured MCP servers into ACP `session/new` and `session/load` for acpx-backed sessions, restoring Canva and other external MCP tools. Landed from contributor PR #39337. Thanks @goodspeed-apps. - Control UI/Telegram sender labels: preserve inbound sender labels in sanitized chat history so dashboard user-message groups split correctly and show real group-member names instead of `You`. (#39414) Thanks @obviyus. +- Agents/failover 402 recovery: keep temporary spend-limit `402` payloads retryable, preserve explicit insufficient-credit billing detection even in long provider payloads, and allow throttled billing-cooldown probes so single-provider setups can recover instead of staying locked out. (#38533) Thanks @xialonglee. ## 2026.3.2 diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index f581dd0ede2..a99cfb5c4b2 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -18,6 +18,8 @@ const GEMINI_RESOURCE_EXHAUSTED_MESSAGE = "RESOURCE_EXHAUSTED: Resource has been exhausted (e.g. check quota)."; // OpenRouter 402 billing example: https://openrouter.ai/docs/api-reference/errors const OPENROUTER_CREDITS_MESSAGE = "Payment Required: insufficient credits"; +const TOGETHER_MONTHLY_SPEND_CAP_MESSAGE = + "The account associated with this API key has reached its maximum allowed monthly spending limit."; // Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400: // https://github.com/openclaw/openclaw/issues/23440 const INSUFFICIENT_QUOTA_PAYLOAD = @@ -182,6 +184,78 @@ describe("failover-error", () => { ).toBe("billing"); }); + it("keeps temporary 402 spend limits retryable without downgrading explicit billing", () => { + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Monthly spend limit reached. Please visit your billing settings.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Workspace spend limit reached. Contact your admin.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`, + }), + ).toBe("billing"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: TOGETHER_MONTHLY_SPEND_CAP_MESSAGE, + }), + ).toBe("billing"); + }); + + it("keeps raw 402 wrappers aligned with status-split temporary spend limits", () => { + const message = "Monthly spend limit reached. Please visit your billing settings."; + expect( + resolveFailoverReasonFromError({ + message: `402 Payment Required: ${message}`, + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message, + }), + ).toBe("rate_limit"); + }); + + it("keeps explicit 402 rate-limit wrappers aligned with status-split payloads", () => { + const message = "rate limit exceeded"; + expect( + resolveFailoverReasonFromError({ + message: `HTTP 402 Payment Required: ${message}`, + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message, + }), + ).toBe("rate_limit"); + }); + + it("keeps plan-upgrade 402 wrappers aligned with status-split billing payloads", () => { + const message = "Your usage limit has been reached. Please upgrade your plan."; + expect( + resolveFailoverReasonFromError({ + message: `HTTP 402 Payment Required: ${message}`, + }), + ).toBe("billing"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message, + }), + ).toBe("billing"); + }); + it("infers format errors from error messages", () => { expect( resolveFailoverReasonFromError({ diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index bcb66628d66..01bcb2dc3a8 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -345,4 +345,66 @@ describe("runWithModelFallback – probe logic", () => { allowTransientCooldownProbe: true, }); }); + + it("skips billing-cooldowned primary when no fallback candidates exist", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: [], + }, + }, + }, + } as Partial); + + // Billing cooldown far from expiry — would normally be skipped + const expiresIn30Min = NOW + 30 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + await expect( + runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run: vi.fn().mockResolvedValue("billing-recovered"), + }), + ).rejects.toThrow("All models failed"); + }); + + it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => { + const cfg = makeCfg(); + // Cooldown expires in 1 minute — within 2-min probe margin + const expiresIn1Min = NOW + 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + const run = vi.fn().mockResolvedValue("billing-probe-ok"); + + const result = await runPrimaryCandidate(cfg, run); + + expect(result.result).toBe("billing-probe-ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", { + allowTransientCooldownProbe: true, + }); + }); + + it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => { + const cfg = makeCfg(); + const expiresIn30Min = NOW + 30 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + const run = vi.fn().mockResolvedValue("ok"); + + const result = await runPrimaryCandidate(cfg, run); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5"); + expect(result.attempts[0]?.reason).toBe("billing"); + }); }); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 0094ef731fc..ad2b5759233 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -419,11 +419,23 @@ function resolveCooldownDecision(params: { profileIds: params.profileIds, now: params.now, }) ?? "rate_limit"; - const isPersistentIssue = - inferredReason === "auth" || - inferredReason === "auth_permanent" || - inferredReason === "billing"; - if (isPersistentIssue) { + const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent"; + if (isPersistentAuthIssue) { + return { + type: "skip", + reason: inferredReason, + error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, + }; + } + + // Billing is semi-persistent: the user may fix their balance, or a transient + // 402 might have been misclassified. Probe the primary only when fallbacks + // exist; otherwise repeated single-provider probes just churn the disabled + // auth state without opening any recovery path. + if (inferredReason === "billing") { + if (params.isPrimary && params.hasFallbackCandidates && shouldProbe) { + return { type: "attempt", reason: inferredReason, markProbe: true }; + } return { type: "skip", reason: inferredReason, @@ -518,7 +530,11 @@ export async function runWithModelFallback(params: { if (decision.markProbe) { lastProbeAttempt.set(probeThrottleKey, now); } - if (decision.reason === "rate_limit" || decision.reason === "overloaded") { + if ( + decision.reason === "rate_limit" || + decision.reason === "overloaded" || + decision.reason === "billing" + ) { runOptions = { allowTransientCooldownProbe: true }; } } diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 4919bc607c0..8649f46f871 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { classifyFailoverReason, + classifyFailoverReasonFromHttpStatus, isAuthErrorMessage, isAuthPermanentErrorMessage, isBillingErrorMessage, @@ -505,6 +506,87 @@ describe("image dimension errors", () => { }); }); +describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => { + it("reclassifies periodic usage limits as rate_limit", () => { + const samples = [ + "Monthly spend limit reached.", + "Weekly usage limit exhausted.", + "Daily limit reached, resets tomorrow.", + ]; + for (const sample of samples) { + expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit"); + } + }); + + it("reclassifies org/workspace spend limits as rate_limit", () => { + const samples = [ + "Organization spending limit exceeded.", + "Workspace spend limit reached.", + "Organization limit exceeded for this billing period.", + ]; + for (const sample of samples) { + expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit"); + } + }); + + it("keeps 402 as billing when explicit billing signals are present", () => { + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "Your credit balance is too low. Monthly limit exceeded.", + ), + ).toBe("billing"); + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "Insufficient credits. Organization limit reached.", + ), + ).toBe("billing"); + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "The account associated with this API key has reached its maximum allowed monthly spending limit.", + ), + ).toBe("billing"); + }); + + it("keeps long 402 payloads with explicit billing text as billing", () => { + const longBillingPayload = `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`; + expect(classifyFailoverReasonFromHttpStatus(402, longBillingPayload)).toBe("billing"); + }); + + it("keeps 402 as billing without message or with generic message", () => { + expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing"); + }); + + it("matches raw 402 wrappers and status-split payloads for the same message", () => { + const transientMessage = "Monthly spend limit reached. Please visit your billing settings."; + expect(classifyFailoverReason(`402 Payment Required: ${transientMessage}`)).toBe("rate_limit"); + expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit"); + + const billingMessage = + "The account associated with this API key has reached its maximum allowed monthly spending limit."; + expect(classifyFailoverReason(`402 Payment Required: ${billingMessage}`)).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing"); + }); + + it("keeps explicit 402 rate-limit messages in the rate_limit lane", () => { + const transientMessage = "rate limit exceeded"; + expect(classifyFailoverReason(`HTTP 402 Payment Required: ${transientMessage}`)).toBe( + "rate_limit", + ); + expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit"); + }); + + it("keeps plan-upgrade 402 limit messages in billing", () => { + const billingMessage = "Your usage limit has been reached. Please upgrade your plan."; + expect(classifyFailoverReason(`HTTP 402 Payment Required: ${billingMessage}`)).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing"); + }); +}); + describe("classifyFailoverReason", () => { it("classifies documented provider error messages", () => { expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit"); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 5e4fc4c541e..cd4701c9db9 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -208,6 +208,100 @@ const HTTP_ERROR_HINTS = [ "permission", ]; +type PaymentRequiredFailoverReason = Extract; + +const BILLING_402_HINTS = [ + "insufficient credits", + "insufficient quota", + "credit balance", + "insufficient balance", + "plans & billing", + "add more credits", + "top up", +] as const; +const BILLING_402_PLAN_HINTS = [ + "upgrade your plan", + "upgrade plan", + "current plan", + "subscription", +] as const; + +const PERIODIC_402_HINTS = ["daily", "weekly", "monthly"] as const; +const RETRYABLE_402_RETRY_HINTS = ["try again", "retry", "temporary", "cooldown"] as const; +const RETRYABLE_402_LIMIT_HINTS = ["usage limit", "rate limit", "organization usage"] as const; +const RETRYABLE_402_SCOPED_HINTS = ["organization", "workspace"] as const; +const RETRYABLE_402_SCOPED_RESULT_HINTS = [ + "billing period", + "exceeded", + "reached", + "exhausted", +] as const; +const RAW_402_MARKER_RE = + /["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment required\b/i; +const LEADING_402_WRAPPER_RE = + /^(?:error[:\s-]+)?(?:(?:http\s*)?402(?:\s+payment required)?|payment required)(?:[:\s-]+|$)/i; + +function includesAnyHint(text: string, hints: readonly string[]): boolean { + return hints.some((hint) => text.includes(hint)); +} + +function hasExplicit402BillingSignal(text: string): boolean { + return ( + includesAnyHint(text, BILLING_402_HINTS) || + (includesAnyHint(text, BILLING_402_PLAN_HINTS) && text.includes("limit")) || + text.includes("billing hard limit") || + text.includes("hard limit reached") || + (text.includes("maximum allowed") && text.includes("limit")) + ); +} + +function hasRetryable402TransientSignal(text: string): boolean { + const hasPeriodicHint = includesAnyHint(text, PERIODIC_402_HINTS); + const hasSpendLimit = text.includes("spend limit") || text.includes("spending limit"); + const hasScopedHint = includesAnyHint(text, RETRYABLE_402_SCOPED_HINTS); + return ( + (includesAnyHint(text, RETRYABLE_402_RETRY_HINTS) && + includesAnyHint(text, RETRYABLE_402_LIMIT_HINTS)) || + (hasPeriodicHint && (text.includes("usage limit") || hasSpendLimit)) || + (hasPeriodicHint && text.includes("limit") && text.includes("reset")) || + (hasScopedHint && + text.includes("limit") && + (hasSpendLimit || includesAnyHint(text, RETRYABLE_402_SCOPED_RESULT_HINTS))) + ); +} + +function normalize402Message(raw: string): string { + return raw.trim().toLowerCase().replace(LEADING_402_WRAPPER_RE, "").trim(); +} + +function classify402Message(message: string): PaymentRequiredFailoverReason { + const normalized = normalize402Message(message); + if (!normalized) { + return "billing"; + } + + if (hasExplicit402BillingSignal(normalized)) { + return "billing"; + } + + if (isRateLimitErrorMessage(normalized)) { + return "rate_limit"; + } + + if (hasRetryable402TransientSignal(normalized)) { + return "rate_limit"; + } + + return "billing"; +} + +function classifyFailoverReasonFrom402Text(raw: string): PaymentRequiredFailoverReason | null { + if (!RAW_402_MARKER_RE.test(raw)) { + return null; + } + return classify402Message(raw); +} + function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null { const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE); if (!match) { @@ -261,25 +355,7 @@ export function classifyFailoverReasonFromHttpStatus( } if (status === 402) { - // Some providers (e.g. Anthropic Claude Max plan) surface temporary - // usage/rate-limit failures as HTTP 402. Use a narrow matcher for - // temporary limits to avoid misclassifying billing failures (#30484). - if (message) { - const lower = message.toLowerCase(); - // Temporary usage limit signals: retry language + usage/limit terminology - const hasTemporarySignal = - (lower.includes("try again") || - lower.includes("retry") || - lower.includes("temporary") || - lower.includes("cooldown")) && - (lower.includes("usage limit") || - lower.includes("rate limit") || - lower.includes("organization usage")); - if (hasTemporarySignal) { - return "rate_limit"; - } - } - return "billing"; + return message ? classify402Message(message) : "billing"; } if (status === 429) { return "rate_limit"; @@ -858,6 +934,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { if (isModelNotFoundErrorMessage(raw)) { return "model_not_found"; } + const reasonFrom402Text = classifyFailoverReasonFrom402Text(raw); + if (reasonFrom402Text) { + return reasonFrom402Text; + } if (isPeriodicUsageLimitErrorMessage(raw)) { return isBillingErrorMessage(raw) ? "billing" : "rate_limit"; } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 80ef934d63e..c763fbd2a94 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent( const allowTransientCooldownProbe = params.allowTransientCooldownProbe === true && allAutoProfilesInCooldown && - (unavailableReason === "rate_limit" || unavailableReason === "overloaded"); + (unavailableReason === "rate_limit" || + unavailableReason === "overloaded" || + unavailableReason === "billing"); let didTransientCooldownProbe = false; while (profileIndex < profileCandidates.length) {