fix: avoid false global rate-limit classification from generic cooldown text (#32972)

Merged via squash. Prepared head SHA: 813c16f5af Co-authored-by: stakeswky <64798754+stakeswky@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
2026-03-12 07:20:45 +00:00 · 2026-03-06 03:58:21 +08:00
parent 591264ef52
commit 8ac7ce73b3
4 changed files with 12 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -138,6 +138,8 @@ Docs: https://docs.openclaw.ai

 - Mattermost/plugin SDK import policy: replace remaining monolithic `openclaw/plugin-sdk` imports in Mattermost mention-gating paths/tests with scoped subpaths (`openclaw/plugin-sdk/compat` and `openclaw/plugin-sdk/mattermost`) so `pnpm check` passes `lint:plugins:no-monolithic-plugin-sdk-entry-imports` on baseline. (#36480) Thanks @Takhoffman.

+- Agents/failover cooldown classification: stop treating generic `cooling down` text as provider `rate_limit` so healthy models no longer show false global cooldown/rate-limit warnings while explicit `model_cooldown` markers still trigger failover. (#32972) thanks @stakeswky.
+
 ## 2026.3.2

 ### Changes
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts
@@ -498,9 +498,7 @@ describe("classifyFailoverReason", () => {
    expect(
      classifyFailoverReason("model_cooldown: All credentials for model gpt-5 are cooling down"),
    ).toBe("rate_limit");
-    expect(classifyFailoverReason("all credentials for model x are cooling down")).toBe(
-      "rate_limit",
-    );
+    expect(classifyFailoverReason("all credentials for model x are cooling down")).toBeNull();
    expect(
      classifyFailoverReason(
        '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
--- a/src/agents/pi-embedded-helpers/failover-matches.ts
+++ b/src/agents/pi-embedded-helpers/failover-matches.ts
@@ -4,7 +4,6 @@ const ERROR_PATTERNS = {
  rateLimit: [
    /rate[_ ]limit|too many requests|429/,
    "model_cooldown",
-    "cooling down",
    "exceeded your current quota",
    "resource has been exhausted",
    "quota exceeded",
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@@ -639,6 +639,15 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
  });

+  it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
+    const { usageStats } = await runAutoPinnedRotationCase({
+      errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      sessionKey: "agent:test:overloaded-rotation",
+      runId: "run:overloaded-rotation",
+    });
+    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
+  });
+
  it("rotates on timeout without cooling down the timed-out profile", async () => {
    const { usageStats } = await runAutoPinnedRotationCase({
      errorMessage: "request ended without sending any chunks",