fix: stop defaulting to rate_limit when cooldown reason is unknown

When a profile enters cooldown but no failure counts are recorded (e.g. a transient network error or unclassified server error), the inferred reason was hardcoded to "rate_limit". This caused misleading "API rate limit reached" warnings for users who hadn't actually hit any rate limit. Change both default-reason sites to use "unknown" instead: - resolveProfilesUnavailableReason fallback in usage.ts - resolveCooldownDecision null-coalesce in model-fallback.ts Treat "unknown" as a transient reason for probe and same-provider fallback purposes, matching the existing behavior for rate_limit and overloaded cooldowns so recovery attempts still work. Fixes #32828
2026-05-06 05:10:44 +00:00 · 2026-03-11 12:16:38 +05:30
parent 60aed95346
commit 8b8be59532
3 changed files with 17 additions and 7 deletions
--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -207,7 +207,7 @@ describe("resolveProfilesUnavailableReason", () => {
    ).toBe("overloaded");
  });

-  it("falls back to rate_limit when active cooldown has no reason history", () => {
+  it("falls back to unknown when active cooldown has no reason history", () => {
    const now = Date.now();
    const store = makeStore({
      "anthropic:default": {
@@ -221,7 +221,7 @@ describe("resolveProfilesUnavailableReason", () => {
        profileIds: ["anthropic:default"],
        now,
      }),
-    ).toBe("rate_limit");
+    ).toBe("unknown");
  });

  it("ignores expired windows and returns null when no profile is actively unavailable", () => {
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -110,7 +110,11 @@ export function resolveProfilesUnavailableReason(params: {
      recordedReason = true;
    }
    if (!recordedReason) {
-      addScore("rate_limit", 1);
+      // No failure counts recorded for this cooldown window. Previously this
+      // defaulted to "rate_limit", which caused false "rate limit reached"
+      // warnings when the actual reason was unknown (e.g. transient network
+      // blip or server error without a classified failure count).
+      addScore("unknown", 1);
    }
  }

--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -449,7 +449,7 @@ function resolveCooldownDecision(params: {
      store: params.authStore,
      profileIds: params.profileIds,
      now: params.now,
-    }) ?? "rate_limit";
+    }) ?? "unknown";
  const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
  if (isPersistentAuthIssue) {
    return {
@@ -483,7 +483,10 @@ function resolveCooldownDecision(params: {
  // limits, which are often model-scoped and can recover on a sibling model.
  const shouldAttemptDespiteCooldown =
    (params.isPrimary && (!params.requestedModel || shouldProbe)) ||
-    (!params.isPrimary && (inferredReason === "rate_limit" || inferredReason === "overloaded"));
+    (!params.isPrimary &&
+      (inferredReason === "rate_limit" ||
+        inferredReason === "overloaded" ||
+        inferredReason === "unknown"));
  if (!shouldAttemptDespiteCooldown) {
    return {
      type: "skip",
@@ -588,13 +591,16 @@ export async function runWithModelFallback<T>(params: {
        if (
          decision.reason === "rate_limit" ||
          decision.reason === "overloaded" ||
-          decision.reason === "billing"
+          decision.reason === "billing" ||
+          decision.reason === "unknown"
        ) {
          // Probe at most once per provider per fallback run when all profiles
          // are cooldowned. Re-probing every same-provider candidate can stall
          // cross-provider fallback on providers with long internal retries.
          const isTransientCooldownReason =
-            decision.reason === "rate_limit" || decision.reason === "overloaded";
+            decision.reason === "rate_limit" ||
+            decision.reason === "overloaded" ||
+            decision.reason === "unknown";
          if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) {
            const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
            attempts.push({