diff --git a/CHANGELOG.md b/CHANGELOG.md index ab04d677757..1063cd2aea9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -138,6 +138,8 @@ Docs: https://docs.openclaw.ai - Mattermost/plugin SDK import policy: replace remaining monolithic `openclaw/plugin-sdk` imports in Mattermost mention-gating paths/tests with scoped subpaths (`openclaw/plugin-sdk/compat` and `openclaw/plugin-sdk/mattermost`) so `pnpm check` passes `lint:plugins:no-monolithic-plugin-sdk-entry-imports` on baseline. (#36480) Thanks @Takhoffman. +- Agents/failover cooldown classification: stop treating generic `cooling down` text as provider `rate_limit` so healthy models no longer show false global cooldown/rate-limit warnings while explicit `model_cooldown` markers still trigger failover. (#32972) thanks @stakeswky. + ## 2026.3.2 ### Changes diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 8d9c678035a..599440ca0b2 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -498,9 +498,7 @@ describe("classifyFailoverReason", () => { expect( classifyFailoverReason("model_cooldown: All credentials for model gpt-5 are cooling down"), ).toBe("rate_limit"); - expect(classifyFailoverReason("all credentials for model x are cooling down")).toBe( - "rate_limit", - ); + expect(classifyFailoverReason("all credentials for model x are cooling down")).toBeNull(); expect( classifyFailoverReason( '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', diff --git a/src/agents/pi-embedded-helpers/failover-matches.ts b/src/agents/pi-embedded-helpers/failover-matches.ts index 451852282c6..ecf7be953d9 100644 --- a/src/agents/pi-embedded-helpers/failover-matches.ts +++ b/src/agents/pi-embedded-helpers/failover-matches.ts @@ -4,7 +4,6 @@ const ERROR_PATTERNS = { rateLimit: [ /rate[_ ]limit|too many requests|429/, "model_cooldown", - "cooling down", "exceeded your current quota", "resource has been exhausted", "quota exceeded", diff --git a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts index cf56036c3ea..cfefc20cc67 100644 --- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts +++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts @@ -639,6 +639,15 @@ describe("runEmbeddedPiAgent auth profile rotation", () => { expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); }); + it("rotates for overloaded prompt failures across auto-pinned profiles", async () => { + const { usageStats } = await runAutoPinnedRotationCase({ + errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}', + sessionKey: "agent:test:overloaded-rotation", + runId: "run:overloaded-rotation", + }); + expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number"); + }); + it("rotates on timeout without cooling down the timed-out profile", async () => { const { usageStats } = await runAutoPinnedRotationCase({ errorMessage: "request ended without sending any chunks",