From ea1143a2872acdfa7df76cf1fade4545c40baedd Mon Sep 17 00:00:00 2001
From: xialonglee
Date: Sat, 7 Mar 2026 11:27:56 +0800
Subject: [PATCH] fix(agents): broaden 402 temporary-limit detection and allow
billing cooldown probes
- Broaden classifyFailoverReasonFromHttpStatus to treat periodic usage limits
and org/workspace spend limits as rate_limit instead of billing
- Treat billing as semi-persistent in model-fallback: allow probes when
no fallbacks exist (30s throttle) or when fallbacks exist (near expiry)
- Add tests for new 402 classification and billing probe behavior
---
src/agents/failover-error.test.ts | 62 +++++++++++
src/agents/model-fallback.probe.test.ts | 101 ++++++++++++++++++
src/agents/model-fallback.ts | 35 ++++--
...dded-helpers.isbillingerrormessage.test.ts | 51 +++++++++
src/agents/pi-embedded-helpers/errors.ts | 26 ++++-
src/agents/pi-embedded-runner/run.ts | 4 +-
6 files changed, 267 insertions(+), 12 deletions(-)
diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts
index f581dd0ede2..a78e806759d 100644
--- a/src/agents/failover-error.test.ts
+++ b/src/agents/failover-error.test.ts
@@ -182,6 +182,68 @@ describe("failover-error", () => {
).toBe("billing");
});
+ it("treats 402 with periodic usage limit as rate_limit", () => {
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Monthly spend limit reached. Please visit your billing settings.",
+ }),
+ ).toBe("rate_limit");
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Weekly usage limit exhausted for this plan.",
+ }),
+ ).toBe("rate_limit");
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Daily limit reached. Your limit will reset tomorrow.",
+ }),
+ ).toBe("rate_limit");
+ });
+
+ it("treats 402 with organization/workspace limit as rate_limit", () => {
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Organization spending limit exceeded.",
+ }),
+ ).toBe("rate_limit");
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Workspace spend limit reached. Contact your admin.",
+ }),
+ ).toBe("rate_limit");
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Organization limit exceeded for this billing period.",
+ }),
+ ).toBe("rate_limit");
+ });
+
+ it("keeps 402 with explicit billing signals as billing even with limit language", () => {
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Your credit balance is too low. Monthly limit exceeded.",
+ }),
+ ).toBe("billing");
+ expect(
+ resolveFailoverReasonFromError({
+ status: 402,
+ message: "Insufficient credits. Spend limit reached.",
+ }),
+ ).toBe("billing");
+ });
+
+ it("keeps 402 without message body as billing", () => {
+ expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
+ expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing");
+ });
+
it("infers format errors from error messages", () => {
expect(
resolveFailoverReasonFromError({
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
index bcb66628d66..480b8c31d38 100644
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -345,4 +345,105 @@ describe("runWithModelFallback – probe logic", () => {
allowTransientCooldownProbe: true,
});
});
+
+ it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
+ const cfg = makeCfg({
+ agents: {
+ defaults: {
+ model: {
+ primary: "openai/gpt-4.1-mini",
+ fallbacks: [],
+ },
+ },
+ },
+ } as Partial);
+
+ // Billing cooldown far from expiry — would normally be skipped
+ const expiresIn30Min = NOW + 30 * 60 * 1000;
+ mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+ mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+ const run = vi.fn().mockResolvedValue("billing-recovered");
+
+ const result = await runWithModelFallback({
+ cfg,
+ provider: "openai",
+ model: "gpt-4.1-mini",
+ fallbacksOverride: [],
+ run,
+ });
+
+ expect(result.result).toBe("billing-recovered");
+ expect(run).toHaveBeenCalledTimes(1);
+ expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+ allowTransientCooldownProbe: true,
+ });
+ });
+
+ it("throttles billing probe for single-candidate at 30s intervals", async () => {
+ const cfg = makeCfg({
+ agents: {
+ defaults: {
+ model: {
+ primary: "openai/gpt-4.1-mini",
+ fallbacks: [],
+ },
+ },
+ },
+ } as Partial);
+
+ mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000);
+ mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+ // Simulate a recent probe 10s ago
+ _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
+
+ const run = vi.fn().mockResolvedValue("unreachable");
+
+ await expect(
+ runWithModelFallback({
+ cfg,
+ provider: "openai",
+ model: "gpt-4.1-mini",
+ fallbacksOverride: [],
+ run,
+ }),
+ ).rejects.toThrow("All models failed");
+
+ expect(run).not.toHaveBeenCalled();
+ });
+
+ it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
+ const cfg = makeCfg();
+ // Cooldown expires in 1 minute — within 2-min probe margin
+ const expiresIn1Min = NOW + 60 * 1000;
+ mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
+ mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+ const run = vi.fn().mockResolvedValue("billing-probe-ok");
+
+ const result = await runPrimaryCandidate(cfg, run);
+
+ expect(result.result).toBe("billing-probe-ok");
+ expect(run).toHaveBeenCalledTimes(1);
+ expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+ allowTransientCooldownProbe: true,
+ });
+ });
+
+ it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
+ const cfg = makeCfg();
+ const expiresIn30Min = NOW + 30 * 60 * 1000;
+ mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+ mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+ const run = vi.fn().mockResolvedValue("ok");
+
+ const result = await runPrimaryCandidate(cfg, run);
+
+ expect(result.result).toBe("ok");
+ expect(run).toHaveBeenCalledTimes(1);
+ expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
+ expect(result.attempts[0]?.reason).toBe("billing");
+ });
});
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 0094ef731fc..3b6eb691088 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -419,11 +419,30 @@ function resolveCooldownDecision(params: {
profileIds: params.profileIds,
now: params.now,
}) ?? "rate_limit";
- const isPersistentIssue =
- inferredReason === "auth" ||
- inferredReason === "auth_permanent" ||
- inferredReason === "billing";
- if (isPersistentIssue) {
+ const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
+ if (isPersistentAuthIssue) {
+ return {
+ type: "skip",
+ reason: inferredReason,
+ error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
+ };
+ }
+
+ // Billing is semi-persistent: the user may fix their balance, or a transient
+ // 402 might have been misclassified. Without fallback candidates, skipping is
+ // guaranteed failure so we attempt (throttled). With fallbacks, probe the
+ // primary when the standard probe schedule allows.
+ if (inferredReason === "billing") {
+ if (params.isPrimary) {
+ if (!params.hasFallbackCandidates) {
+ const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0;
+ if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) {
+ return { type: "attempt", reason: inferredReason, markProbe: true };
+ }
+ } else if (shouldProbe) {
+ return { type: "attempt", reason: inferredReason, markProbe: true };
+ }
+ }
return {
type: "skip",
reason: inferredReason,
@@ -518,7 +537,11 @@ export async function runWithModelFallback(params: {
if (decision.markProbe) {
lastProbeAttempt.set(probeThrottleKey, now);
}
- if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
+ if (
+ decision.reason === "rate_limit" ||
+ decision.reason === "overloaded" ||
+ decision.reason === "billing"
+ ) {
runOptions = { allowTransientCooldownProbe: true };
}
}
diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
index 4919bc607c0..1aaa92b5f54 100644
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import {
classifyFailoverReason,
+ classifyFailoverReasonFromHttpStatus,
isAuthErrorMessage,
isAuthPermanentErrorMessage,
isBillingErrorMessage,
@@ -505,6 +506,56 @@ describe("image dimension errors", () => {
});
});
+describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => {
+ it("reclassifies 402 with periodic usage limit as rate_limit", () => {
+ expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe(
+ "rate_limit",
+ );
+ expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe(
+ "rate_limit",
+ );
+ expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe(
+ "rate_limit",
+ );
+ });
+
+ it("reclassifies 402 with organization/workspace limit as rate_limit", () => {
+ expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe(
+ "rate_limit",
+ );
+ expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe(
+ "rate_limit",
+ );
+ expect(
+ classifyFailoverReasonFromHttpStatus(
+ 402,
+ "Organization limit exceeded for this billing period.",
+ ),
+ ).toBe("rate_limit");
+ });
+
+ it("keeps 402 as billing when explicit billing signals are present", () => {
+ expect(
+ classifyFailoverReasonFromHttpStatus(
+ 402,
+ "Your credit balance is too low. Monthly limit exceeded.",
+ ),
+ ).toBe("billing");
+ expect(
+ classifyFailoverReasonFromHttpStatus(
+ 402,
+ "Insufficient credits. Organization limit reached.",
+ ),
+ ).toBe("billing");
+ });
+
+ it("keeps 402 as billing without message or with generic message", () => {
+ expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
+ expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
+ expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
+ });
+});
+
describe("classifyFailoverReason", () => {
it("classifies documented provider error messages", () => {
expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");
diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts
index 5e4fc4c541e..d4d5b0a7be7 100644
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -262,12 +262,12 @@ export function classifyFailoverReasonFromHttpStatus(
if (status === 402) {
// Some providers (e.g. Anthropic Claude Max plan) surface temporary
- // usage/rate-limit failures as HTTP 402. Use a narrow matcher for
- // temporary limits to avoid misclassifying billing failures (#30484).
+ // usage/rate-limit failures as HTTP 402. Detect temporary limits to
+ // avoid misclassifying them as persistent billing failures (#30484).
if (message) {
const lower = message.toLowerCase();
- // Temporary usage limit signals: retry language + usage/limit terminology
- const hasTemporarySignal =
+ // Explicit retry language + usage/limit terminology
+ const hasTemporaryRetrySignal =
(lower.includes("try again") ||
lower.includes("retry") ||
lower.includes("temporary") ||
@@ -275,7 +275,23 @@ export function classifyFailoverReasonFromHttpStatus(
(lower.includes("usage limit") ||
lower.includes("rate limit") ||
lower.includes("organization usage"));
- if (hasTemporarySignal) {
+ if (hasTemporaryRetrySignal) {
+ return "rate_limit";
+ }
+ // Periodic usage limits (daily/weekly/monthly) are inherently temporary
+ // and should not trigger persistent billing cooldown, unless the message
+ // also contains explicit billing signals (e.g. "insufficient credits").
+ if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) {
+ return "rate_limit";
+ }
+ // Spending/organization/workspace limits are typically resettable caps
+ // set by the organization admin, not permanent credit-balance failures.
+ const hasSpendOrOrgLimitSignal =
+ lower.includes("spend limit") ||
+ lower.includes("spending limit") ||
+ ((lower.includes("organization") || lower.includes("workspace")) &&
+ (lower.includes("limit") || lower.includes("exceeded")));
+ if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) {
return "rate_limit";
}
}
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 80ef934d63e..c763fbd2a94 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
const allowTransientCooldownProbe =
params.allowTransientCooldownProbe === true &&
allAutoProfilesInCooldown &&
- (unavailableReason === "rate_limit" || unavailableReason === "overloaded");
+ (unavailableReason === "rate_limit" ||
+ unavailableReason === "overloaded" ||
+ unavailableReason === "billing");
let didTransientCooldownProbe = false;
while (profileIndex < profileCandidates.length) {