From ea1143a2872acdfa7df76cf1fade4545c40baedd Mon Sep 17 00:00:00 2001
From: xialonglee <li.xialong@xydigit.com>
Date: Sat, 7 Mar 2026 11:27:56 +0800
Subject: [PATCH] fix(agents): broaden 402 temporary-limit detection and allow
 billing cooldown probes

- Broaden classifyFailoverReasonFromHttpStatus to treat periodic usage limits
  and org/workspace spend limits as rate_limit instead of billing
- Treat billing as semi-persistent in model-fallback: allow probes when
  no fallbacks exist (30s throttle) or when fallbacks exist (near expiry)
- Add tests for new 402 classification and billing probe behavior
---
 src/agents/failover-error.test.ts             |  62 +++++++++++
 src/agents/model-fallback.probe.test.ts       | 101 ++++++++++++++++++
 src/agents/model-fallback.ts                  |  35 ++++--
 ...dded-helpers.isbillingerrormessage.test.ts |  51 +++++++++
 src/agents/pi-embedded-helpers/errors.ts      |  26 ++++-
 src/agents/pi-embedded-runner/run.ts          |   4 +-
 6 files changed, 267 insertions(+), 12 deletions(-)

diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts
index f581dd0ede2..a78e806759d 100644
--- a/src/agents/failover-error.test.ts
+++ b/src/agents/failover-error.test.ts
@@ -182,6 +182,68 @@ describe("failover-error", () => {
     ).toBe("billing");
   });
 
+  it("treats 402 with periodic usage limit as rate_limit", () => {
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Monthly spend limit reached. Please visit your billing settings.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Weekly usage limit exhausted for this plan.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Daily limit reached. Your limit will reset tomorrow.",
+      }),
+    ).toBe("rate_limit");
+  });
+
+  it("treats 402 with organization/workspace limit as rate_limit", () => {
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Organization spending limit exceeded.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Workspace spend limit reached. Contact your admin.",
+      }),
+    ).toBe("rate_limit");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Organization limit exceeded for this billing period.",
+      }),
+    ).toBe("rate_limit");
+  });
+
+  it("keeps 402 with explicit billing signals as billing even with limit language", () => {
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Your credit balance is too low. Monthly limit exceeded.",
+      }),
+    ).toBe("billing");
+    expect(
+      resolveFailoverReasonFromError({
+        status: 402,
+        message: "Insufficient credits. Spend limit reached.",
+      }),
+    ).toBe("billing");
+  });
+
+  it("keeps 402 without message body as billing", () => {
+    expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
+    expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing");
+  });
+
   it("infers format errors from error messages", () => {
     expect(
       resolveFailoverReasonFromError({
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
index bcb66628d66..480b8c31d38 100644
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -345,4 +345,105 @@ describe("runWithModelFallback – probe logic", () => {
       allowTransientCooldownProbe: true,
     });
   });
+
+  it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    // Billing cooldown far from expiry — would normally be skipped
+    const expiresIn30Min = NOW + 30 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("billing-recovered");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      fallbacksOverride: [],
+      run,
+    });
+
+    expect(result.result).toBe("billing-recovered");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+  });
+
+  it("throttles billing probe for single-candidate at 30s intervals", async () => {
+    const cfg = makeCfg({
+      agents: {
+        defaults: {
+          model: {
+            primary: "openai/gpt-4.1-mini",
+            fallbacks: [],
+          },
+        },
+      },
+    } as Partial<OpenClawConfig>);
+
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    // Simulate a recent probe 10s ago
+    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
+
+    const run = vi.fn().mockResolvedValue("unreachable");
+
+    await expect(
+      runWithModelFallback({
+        cfg,
+        provider: "openai",
+        model: "gpt-4.1-mini",
+        fallbacksOverride: [],
+        run,
+      }),
+    ).rejects.toThrow("All models failed");
+
+    expect(run).not.toHaveBeenCalled();
+  });
+
+  it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
+    const cfg = makeCfg();
+    // Cooldown expires in 1 minute — within 2-min probe margin
+    const expiresIn1Min = NOW + 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("billing-probe-ok");
+
+    const result = await runPrimaryCandidate(cfg, run);
+
+    expect(result.result).toBe("billing-probe-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
+  });
+
+  it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
+    const cfg = makeCfg();
+    const expiresIn30Min = NOW + 30 * 60 * 1000;
+    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
+    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
+
+    const run = vi.fn().mockResolvedValue("ok");
+
+    const result = await runPrimaryCandidate(cfg, run);
+
+    expect(result.result).toBe("ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
+    expect(result.attempts[0]?.reason).toBe("billing");
+  });
 });
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 0094ef731fc..3b6eb691088 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -419,11 +419,30 @@ function resolveCooldownDecision(params: {
       profileIds: params.profileIds,
       now: params.now,
     }) ?? "rate_limit";
-  const isPersistentIssue =
-    inferredReason === "auth" ||
-    inferredReason === "auth_permanent" ||
-    inferredReason === "billing";
-  if (isPersistentIssue) {
+  const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
+  if (isPersistentAuthIssue) {
+    return {
+      type: "skip",
+      reason: inferredReason,
+      error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
+    };
+  }
+
+  // Billing is semi-persistent: the user may fix their balance, or a transient
+  // 402 might have been misclassified. Without fallback candidates, skipping is
+  // guaranteed failure so we attempt (throttled). With fallbacks, probe the
+  // primary when the standard probe schedule allows.
+  if (inferredReason === "billing") {
+    if (params.isPrimary) {
+      if (!params.hasFallbackCandidates) {
+        const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0;
+        if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) {
+          return { type: "attempt", reason: inferredReason, markProbe: true };
+        }
+      } else if (shouldProbe) {
+        return { type: "attempt", reason: inferredReason, markProbe: true };
+      }
+    }
     return {
       type: "skip",
       reason: inferredReason,
@@ -518,7 +537,11 @@ export async function runWithModelFallback<T>(params: {
         if (decision.markProbe) {
           lastProbeAttempt.set(probeThrottleKey, now);
         }
-        if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
+        if (
+          decision.reason === "rate_limit" ||
+          decision.reason === "overloaded" ||
+          decision.reason === "billing"
+        ) {
           runOptions = { allowTransientCooldownProbe: true };
         }
       }
diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
index 4919bc607c0..1aaa92b5f54 100644
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from "vitest";
 import {
   classifyFailoverReason,
+  classifyFailoverReasonFromHttpStatus,
   isAuthErrorMessage,
   isAuthPermanentErrorMessage,
   isBillingErrorMessage,
@@ -505,6 +506,56 @@ describe("image dimension errors", () => {
   });
 });
 
+describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => {
+  it("reclassifies 402 with periodic usage limit as rate_limit", () => {
+    expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe(
+      "rate_limit",
+    );
+    expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe(
+      "rate_limit",
+    );
+    expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe(
+      "rate_limit",
+    );
+  });
+
+  it("reclassifies 402 with organization/workspace limit as rate_limit", () => {
+    expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe(
+      "rate_limit",
+    );
+    expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe(
+      "rate_limit",
+    );
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "Organization limit exceeded for this billing period.",
+      ),
+    ).toBe("rate_limit");
+  });
+
+  it("keeps 402 as billing when explicit billing signals are present", () => {
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "Your credit balance is too low. Monthly limit exceeded.",
+      ),
+    ).toBe("billing");
+    expect(
+      classifyFailoverReasonFromHttpStatus(
+        402,
+        "Insufficient credits. Organization limit reached.",
+      ),
+    ).toBe("billing");
+  });
+
+  it("keeps 402 as billing without message or with generic message", () => {
+    expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
+    expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
+    expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
+  });
+});
+
 describe("classifyFailoverReason", () => {
   it("classifies documented provider error messages", () => {
     expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");
diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts
index 5e4fc4c541e..d4d5b0a7be7 100644
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -262,12 +262,12 @@ export function classifyFailoverReasonFromHttpStatus(
 
   if (status === 402) {
     // Some providers (e.g. Anthropic Claude Max plan) surface temporary
-    // usage/rate-limit failures as HTTP 402. Use a narrow matcher for
-    // temporary limits to avoid misclassifying billing failures (#30484).
+    // usage/rate-limit failures as HTTP 402. Detect temporary limits to
+    // avoid misclassifying them as persistent billing failures (#30484).
     if (message) {
       const lower = message.toLowerCase();
-      // Temporary usage limit signals: retry language + usage/limit terminology
-      const hasTemporarySignal =
+      // Explicit retry language + usage/limit terminology
+      const hasTemporaryRetrySignal =
         (lower.includes("try again") ||
           lower.includes("retry") ||
           lower.includes("temporary") ||
@@ -275,7 +275,23 @@ export function classifyFailoverReasonFromHttpStatus(
         (lower.includes("usage limit") ||
           lower.includes("rate limit") ||
           lower.includes("organization usage"));
-      if (hasTemporarySignal) {
+      if (hasTemporaryRetrySignal) {
+        return "rate_limit";
+      }
+      // Periodic usage limits (daily/weekly/monthly) are inherently temporary
+      // and should not trigger persistent billing cooldown, unless the message
+      // also contains explicit billing signals (e.g. "insufficient credits").
+      if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) {
+        return "rate_limit";
+      }
+      // Spending/organization/workspace limits are typically resettable caps
+      // set by the organization admin, not permanent credit-balance failures.
+      const hasSpendOrOrgLimitSignal =
+        lower.includes("spend limit") ||
+        lower.includes("spending limit") ||
+        ((lower.includes("organization") || lower.includes("workspace")) &&
+          (lower.includes("limit") || lower.includes("exceeded")));
+      if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) {
         return "rate_limit";
       }
     }
diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts
index 80ef934d63e..c763fbd2a94 100644
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
         const allowTransientCooldownProbe =
           params.allowTransientCooldownProbe === true &&
           allAutoProfilesInCooldown &&
-          (unavailableReason === "rate_limit" || unavailableReason === "overloaded");
+          (unavailableReason === "rate_limit" ||
+            unavailableReason === "overloaded" ||
+            unavailableReason === "billing");
         let didTransientCooldownProbe = false;
 
         while (profileIndex < profileCandidates.length) {