fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probe (#38533)

Merged via squash.

Prepared head SHA: 282b9186c6
Co-authored-by: xialonglee <22994703+xialonglee@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
Peter Lee
2026-03-08 01:27:01 -06:00
committed by GitHub
parent d15b6af77b
commit 92648f9ba9
7 changed files with 343 additions and 26 deletions

View File

@@ -361,6 +361,7 @@ Docs: https://docs.openclaw.ai
- Discord/config schema parity: add `channels.discord.agentComponents` to the strict Zod config schema so valid `agentComponents.enabled` settings (root and account-scoped) no longer fail with unrecognized-key validation errors. Landed from contributor PR #39378 by @gambletan. Thanks @gambletan and @thewilloftheshadow.
- ACPX/MCP session bootstrap: inject configured MCP servers into ACP `session/new` and `session/load` for acpx-backed sessions, restoring Canva and other external MCP tools. Landed from contributor PR #39337. Thanks @goodspeed-apps.
- Control UI/Telegram sender labels: preserve inbound sender labels in sanitized chat history so dashboard user-message groups split correctly and show real group-member names instead of `You`. (#39414) Thanks @obviyus.
- Agents/failover 402 recovery: keep temporary spend-limit `402` payloads retryable, preserve explicit insufficient-credit billing detection even in long provider payloads, and allow throttled billing-cooldown probes so single-provider setups can recover instead of staying locked out. (#38533) Thanks @xialonglee.
## 2026.3.2

View File

@@ -18,6 +18,8 @@ const GEMINI_RESOURCE_EXHAUSTED_MESSAGE =
"RESOURCE_EXHAUSTED: Resource has been exhausted (e.g. check quota).";
// OpenRouter 402 billing example: https://openrouter.ai/docs/api-reference/errors
const OPENROUTER_CREDITS_MESSAGE = "Payment Required: insufficient credits";
const TOGETHER_MONTHLY_SPEND_CAP_MESSAGE =
"The account associated with this API key has reached its maximum allowed monthly spending limit.";
// Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400:
// https://github.com/openclaw/openclaw/issues/23440
const INSUFFICIENT_QUOTA_PAYLOAD =
@@ -182,6 +184,78 @@ describe("failover-error", () => {
).toBe("billing");
});
it("keeps temporary 402 spend limits retryable without downgrading explicit billing", () => {
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Monthly spend limit reached. Please visit your billing settings.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Workspace spend limit reached. Contact your admin.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`,
}),
).toBe("billing");
expect(
resolveFailoverReasonFromError({
status: 402,
message: TOGETHER_MONTHLY_SPEND_CAP_MESSAGE,
}),
).toBe("billing");
});
it("keeps raw 402 wrappers aligned with status-split temporary spend limits", () => {
const message = "Monthly spend limit reached. Please visit your billing settings.";
expect(
resolveFailoverReasonFromError({
message: `402 Payment Required: ${message}`,
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message,
}),
).toBe("rate_limit");
});
it("keeps explicit 402 rate-limit wrappers aligned with status-split payloads", () => {
const message = "rate limit exceeded";
expect(
resolveFailoverReasonFromError({
message: `HTTP 402 Payment Required: ${message}`,
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message,
}),
).toBe("rate_limit");
});
it("keeps plan-upgrade 402 wrappers aligned with status-split billing payloads", () => {
const message = "Your usage limit has been reached. Please upgrade your plan.";
expect(
resolveFailoverReasonFromError({
message: `HTTP 402 Payment Required: ${message}`,
}),
).toBe("billing");
expect(
resolveFailoverReasonFromError({
status: 402,
message,
}),
).toBe("billing");
});
it("infers format errors from error messages", () => {
expect(
resolveFailoverReasonFromError({

View File

@@ -345,4 +345,66 @@ describe("runWithModelFallback probe logic", () => {
allowTransientCooldownProbe: true,
});
});
it("skips billing-cooldowned primary when no fallback candidates exist", async () => {
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "openai/gpt-4.1-mini",
fallbacks: [],
},
},
},
} as Partial<OpenClawConfig>);
// Billing cooldown far from expiry — would normally be skipped
const expiresIn30Min = NOW + 30 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
await expect(
runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
fallbacksOverride: [],
run: vi.fn().mockResolvedValue("billing-recovered"),
}),
).rejects.toThrow("All models failed");
});
it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
const cfg = makeCfg();
// Cooldown expires in 1 minute — within 2-min probe margin
const expiresIn1Min = NOW + 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("billing-probe-ok");
const result = await runPrimaryCandidate(cfg, run);
expect(result.result).toBe("billing-probe-ok");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
});
it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
const cfg = makeCfg();
const expiresIn30Min = NOW + 30 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("ok");
const result = await runPrimaryCandidate(cfg, run);
expect(result.result).toBe("ok");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
expect(result.attempts[0]?.reason).toBe("billing");
});
});

View File

@@ -419,11 +419,23 @@ function resolveCooldownDecision(params: {
profileIds: params.profileIds,
now: params.now,
}) ?? "rate_limit";
const isPersistentIssue =
inferredReason === "auth" ||
inferredReason === "auth_permanent" ||
inferredReason === "billing";
if (isPersistentIssue) {
const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
if (isPersistentAuthIssue) {
return {
type: "skip",
reason: inferredReason,
error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
};
}
// Billing is semi-persistent: the user may fix their balance, or a transient
// 402 might have been misclassified. Probe the primary only when fallbacks
// exist; otherwise repeated single-provider probes just churn the disabled
// auth state without opening any recovery path.
if (inferredReason === "billing") {
if (params.isPrimary && params.hasFallbackCandidates && shouldProbe) {
return { type: "attempt", reason: inferredReason, markProbe: true };
}
return {
type: "skip",
reason: inferredReason,
@@ -518,7 +530,11 @@ export async function runWithModelFallback<T>(params: {
if (decision.markProbe) {
lastProbeAttempt.set(probeThrottleKey, now);
}
if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
if (
decision.reason === "rate_limit" ||
decision.reason === "overloaded" ||
decision.reason === "billing"
) {
runOptions = { allowTransientCooldownProbe: true };
}
}

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import {
classifyFailoverReason,
classifyFailoverReasonFromHttpStatus,
isAuthErrorMessage,
isAuthPermanentErrorMessage,
isBillingErrorMessage,
@@ -505,6 +506,87 @@ describe("image dimension errors", () => {
});
});
describe("classifyFailoverReasonFromHttpStatus 402 temporary limits", () => {
it("reclassifies periodic usage limits as rate_limit", () => {
const samples = [
"Monthly spend limit reached.",
"Weekly usage limit exhausted.",
"Daily limit reached, resets tomorrow.",
];
for (const sample of samples) {
expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit");
}
});
it("reclassifies org/workspace spend limits as rate_limit", () => {
const samples = [
"Organization spending limit exceeded.",
"Workspace spend limit reached.",
"Organization limit exceeded for this billing period.",
];
for (const sample of samples) {
expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit");
}
});
it("keeps 402 as billing when explicit billing signals are present", () => {
expect(
classifyFailoverReasonFromHttpStatus(
402,
"Your credit balance is too low. Monthly limit exceeded.",
),
).toBe("billing");
expect(
classifyFailoverReasonFromHttpStatus(
402,
"Insufficient credits. Organization limit reached.",
),
).toBe("billing");
expect(
classifyFailoverReasonFromHttpStatus(
402,
"The account associated with this API key has reached its maximum allowed monthly spending limit.",
),
).toBe("billing");
});
it("keeps long 402 payloads with explicit billing text as billing", () => {
const longBillingPayload = `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`;
expect(classifyFailoverReasonFromHttpStatus(402, longBillingPayload)).toBe("billing");
});
it("keeps 402 as billing without message or with generic message", () => {
expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
});
it("matches raw 402 wrappers and status-split payloads for the same message", () => {
const transientMessage = "Monthly spend limit reached. Please visit your billing settings.";
expect(classifyFailoverReason(`402 Payment Required: ${transientMessage}`)).toBe("rate_limit");
expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit");
const billingMessage =
"The account associated with this API key has reached its maximum allowed monthly spending limit.";
expect(classifyFailoverReason(`402 Payment Required: ${billingMessage}`)).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing");
});
it("keeps explicit 402 rate-limit messages in the rate_limit lane", () => {
const transientMessage = "rate limit exceeded";
expect(classifyFailoverReason(`HTTP 402 Payment Required: ${transientMessage}`)).toBe(
"rate_limit",
);
expect(classifyFailoverReasonFromHttpStatus(402, transientMessage)).toBe("rate_limit");
});
it("keeps plan-upgrade 402 limit messages in billing", () => {
const billingMessage = "Your usage limit has been reached. Please upgrade your plan.";
expect(classifyFailoverReason(`HTTP 402 Payment Required: ${billingMessage}`)).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, billingMessage)).toBe("billing");
});
});
describe("classifyFailoverReason", () => {
it("classifies documented provider error messages", () => {
expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");

View File

@@ -208,6 +208,100 @@ const HTTP_ERROR_HINTS = [
"permission",
];
type PaymentRequiredFailoverReason = Extract<FailoverReason, "billing" | "rate_limit">;
const BILLING_402_HINTS = [
"insufficient credits",
"insufficient quota",
"credit balance",
"insufficient balance",
"plans & billing",
"add more credits",
"top up",
] as const;
const BILLING_402_PLAN_HINTS = [
"upgrade your plan",
"upgrade plan",
"current plan",
"subscription",
] as const;
const PERIODIC_402_HINTS = ["daily", "weekly", "monthly"] as const;
const RETRYABLE_402_RETRY_HINTS = ["try again", "retry", "temporary", "cooldown"] as const;
const RETRYABLE_402_LIMIT_HINTS = ["usage limit", "rate limit", "organization usage"] as const;
const RETRYABLE_402_SCOPED_HINTS = ["organization", "workspace"] as const;
const RETRYABLE_402_SCOPED_RESULT_HINTS = [
"billing period",
"exceeded",
"reached",
"exhausted",
] as const;
const RAW_402_MARKER_RE =
/["']?(?:status|code)["']?\s*[:=]\s*402\b|\bhttp\s*402\b|\berror(?:\s+code)?\s*[:=]?\s*402\b|\b(?:got|returned|received)\s+(?:a\s+)?402\b|^\s*402\s+payment required\b/i;
const LEADING_402_WRAPPER_RE =
/^(?:error[:\s-]+)?(?:(?:http\s*)?402(?:\s+payment required)?|payment required)(?:[:\s-]+|$)/i;
function includesAnyHint(text: string, hints: readonly string[]): boolean {
return hints.some((hint) => text.includes(hint));
}
function hasExplicit402BillingSignal(text: string): boolean {
return (
includesAnyHint(text, BILLING_402_HINTS) ||
(includesAnyHint(text, BILLING_402_PLAN_HINTS) && text.includes("limit")) ||
text.includes("billing hard limit") ||
text.includes("hard limit reached") ||
(text.includes("maximum allowed") && text.includes("limit"))
);
}
function hasRetryable402TransientSignal(text: string): boolean {
const hasPeriodicHint = includesAnyHint(text, PERIODIC_402_HINTS);
const hasSpendLimit = text.includes("spend limit") || text.includes("spending limit");
const hasScopedHint = includesAnyHint(text, RETRYABLE_402_SCOPED_HINTS);
return (
(includesAnyHint(text, RETRYABLE_402_RETRY_HINTS) &&
includesAnyHint(text, RETRYABLE_402_LIMIT_HINTS)) ||
(hasPeriodicHint && (text.includes("usage limit") || hasSpendLimit)) ||
(hasPeriodicHint && text.includes("limit") && text.includes("reset")) ||
(hasScopedHint &&
text.includes("limit") &&
(hasSpendLimit || includesAnyHint(text, RETRYABLE_402_SCOPED_RESULT_HINTS)))
);
}
function normalize402Message(raw: string): string {
return raw.trim().toLowerCase().replace(LEADING_402_WRAPPER_RE, "").trim();
}
function classify402Message(message: string): PaymentRequiredFailoverReason {
const normalized = normalize402Message(message);
if (!normalized) {
return "billing";
}
if (hasExplicit402BillingSignal(normalized)) {
return "billing";
}
if (isRateLimitErrorMessage(normalized)) {
return "rate_limit";
}
if (hasRetryable402TransientSignal(normalized)) {
return "rate_limit";
}
return "billing";
}
function classifyFailoverReasonFrom402Text(raw: string): PaymentRequiredFailoverReason | null {
if (!RAW_402_MARKER_RE.test(raw)) {
return null;
}
return classify402Message(raw);
}
function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null {
const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE);
if (!match) {
@@ -261,25 +355,7 @@ export function classifyFailoverReasonFromHttpStatus(
}
if (status === 402) {
// Some providers (e.g. Anthropic Claude Max plan) surface temporary
// usage/rate-limit failures as HTTP 402. Use a narrow matcher for
// temporary limits to avoid misclassifying billing failures (#30484).
if (message) {
const lower = message.toLowerCase();
// Temporary usage limit signals: retry language + usage/limit terminology
const hasTemporarySignal =
(lower.includes("try again") ||
lower.includes("retry") ||
lower.includes("temporary") ||
lower.includes("cooldown")) &&
(lower.includes("usage limit") ||
lower.includes("rate limit") ||
lower.includes("organization usage"));
if (hasTemporarySignal) {
return "rate_limit";
}
}
return "billing";
return message ? classify402Message(message) : "billing";
}
if (status === 429) {
return "rate_limit";
@@ -858,6 +934,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isModelNotFoundErrorMessage(raw)) {
return "model_not_found";
}
const reasonFrom402Text = classifyFailoverReasonFrom402Text(raw);
if (reasonFrom402Text) {
return reasonFrom402Text;
}
if (isPeriodicUsageLimitErrorMessage(raw)) {
return isBillingErrorMessage(raw) ? "billing" : "rate_limit";
}

View File

@@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
const allowTransientCooldownProbe =
params.allowTransientCooldownProbe === true &&
allAutoProfilesInCooldown &&
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
(unavailableReason === "rate_limit" ||
unavailableReason === "overloaded" ||
unavailableReason === "billing");
let didTransientCooldownProbe = false;
while (profileIndex < profileCandidates.length) {