fix: use unknown instead of rate_limit as default cooldown reason (#42911)

Merged via squash. Prepared head SHA: bebf6704d7 Co-authored-by: VibhorGautam <55019395+VibhorGautam@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
2026-05-06 04:40:43 +00:00 · 2026-03-12 00:04:14 +05:30
parent 60aed95346
commit 4473242b4f
7 changed files with 25 additions and 13 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -105,6 +105,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/final preview cleanup follow-up: clear stale cleanup-retain state only for transient preview finals so archived-preview retains no longer leave a stale partial bubble beside a later fallback-sent final. (#41763) Thanks @obviyus.
 - Signal/config schema: accept `channels.signal.accountUuid` in strict config validation so loop-protection configs no longer fail with an unrecognized-key error. (#35578) Thanks @ingyukoh.
 - Telegram/config schema: accept `channels.telegram.actions.editMessage` and `createForumTopic` in strict config validation so existing Telegram action toggles no longer fail as unrecognized keys. (#35498) Thanks @ingyukoh.
+- Agents/cooldowns: default cooldown windows with no recorded failure history to `unknown` instead of `rate_limit`, avoiding false API rate-limit warnings while preserving cooldown recovery probes. (#42911) Thanks @VibhorGautam.

 ## 2026.3.8

--- a/src/agents/auth-profiles/usage.test.ts
+++ b/src/agents/auth-profiles/usage.test.ts
@@ -207,7 +207,7 @@ describe("resolveProfilesUnavailableReason", () => {
    ).toBe("overloaded");
  });

-  it("falls back to rate_limit when active cooldown has no reason history", () => {
+  it("falls back to unknown when active cooldown has no reason history", () => {
    const now = Date.now();
    const store = makeStore({
      "anthropic:default": {
@@ -221,7 +221,7 @@ describe("resolveProfilesUnavailableReason", () => {
        profileIds: ["anthropic:default"],
        now,
      }),
-    ).toBe("rate_limit");
+    ).toBe("unknown");
  });

  it("ignores expired windows and returns null when no profile is actively unavailable", () => {
--- a/src/agents/auth-profiles/usage.ts
+++ b/src/agents/auth-profiles/usage.ts
@@ -110,7 +110,11 @@ export function resolveProfilesUnavailableReason(params: {
      recordedReason = true;
    }
    if (!recordedReason) {
-      addScore("rate_limit", 1);
+      // No failure counts recorded for this cooldown window. Previously this
+      // defaulted to "rate_limit", which caused false "rate limit reached"
+      // warnings when the actual reason was unknown (e.g. transient network
+      // blip or server error without a classified failure count).
+      addScore("unknown", 1);
    }
  }

--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -555,7 +555,7 @@ describe("runWithModelFallback", () => {
      usageStat: {
        cooldownUntil: Date.now() + 5 * 60_000,
      },
-      expectedReason: "rate_limit",
+      expectedReason: "unknown",
    });
  });

--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -449,7 +449,7 @@ function resolveCooldownDecision(params: {
      store: params.authStore,
      profileIds: params.profileIds,
      now: params.now,
-    }) ?? "rate_limit";
+    }) ?? "unknown";
  const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
  if (isPersistentAuthIssue) {
    return {
@@ -483,7 +483,10 @@ function resolveCooldownDecision(params: {
  // limits, which are often model-scoped and can recover on a sibling model.
  const shouldAttemptDespiteCooldown =
    (params.isPrimary && (!params.requestedModel || shouldProbe)) ||
-    (!params.isPrimary && (inferredReason === "rate_limit" || inferredReason === "overloaded"));
+    (!params.isPrimary &&
+      (inferredReason === "rate_limit" ||
+        inferredReason === "overloaded" ||
+        inferredReason === "unknown"));
  if (!shouldAttemptDespiteCooldown) {
    return {
      type: "skip",
@@ -588,13 +591,16 @@ export async function runWithModelFallback<T>(params: {
        if (
          decision.reason === "rate_limit" ||
          decision.reason === "overloaded" ||
-          decision.reason === "billing"
+          decision.reason === "billing" ||
+          decision.reason === "unknown"
        ) {
          // Probe at most once per provider per fallback run when all profiles
          // are cooldowned. Re-probing every same-provider candidate can stall
          // cross-provider fallback on providers with long internal retries.
          const isTransientCooldownReason =
-            decision.reason === "rate_limit" || decision.reason === "overloaded";
+            decision.reason === "rate_limit" ||
+            decision.reason === "overloaded" ||
+            decision.reason === "unknown";
          if (isTransientCooldownReason && cooldownProbeUsedProviders.has(candidate.provider)) {
            const error = `Provider ${candidate.provider} is in cooldown (probe already attempted this run)`;
            attempts.push({
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@@ -981,7 +981,7 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
        }),
      ).rejects.toMatchObject({
        name: "FailoverError",
-        reason: "rate_limit",
+        reason: "unknown",
        provider: "openai",
        model: "mock-1",
      });
@@ -1153,7 +1153,7 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
        }),
      ).rejects.toMatchObject({
        name: "FailoverError",
-        reason: "rate_limit",
+        reason: "unknown",
        provider: "openai",
        model: "mock-1",
      });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -553,7 +553,7 @@ export async function runEmbeddedPiAgent(
            resolveProfilesUnavailableReason({
              store: authStore,
              profileIds,
-            }) ?? "rate_limit"
+            }) ?? "unknown"
          );
        }
        const classified = classifyFailoverReason(params.message);
@@ -669,14 +669,15 @@ export async function runEmbeddedPiAgent(
          ? (resolveProfilesUnavailableReason({
              store: authStore,
              profileIds: autoProfileCandidates,
-            }) ?? "rate_limit")
+            }) ?? "unknown")
          : null;
        const allowTransientCooldownProbe =
          params.allowTransientCooldownProbe === true &&
          allAutoProfilesInCooldown &&
          (unavailableReason === "rate_limit" ||
            unavailableReason === "overloaded" ||
-            unavailableReason === "billing");
+            unavailableReason === "billing" ||
+            unavailableReason === "unknown");
        let didTransientCooldownProbe = false;

        while (profileIndex < profileCandidates.length) {