fix: escalate to model fallback after rate-limit profile rotation cap (#58707)

* fix: escalate to model fallback after rate-limit profile rotation cap

Per-model rate limits (e.g. Anthropic Sonnet-only quotas) are not
relieved by rotating auth profiles — if all profiles share the same
model quota, cycling between them loops forever without falling back
to the next model in the configured fallbacks chain.

Apply the same rotation-cap pattern introduced for overloaded_error
(#58348) to rate_limit errors:

- Add `rateLimitedProfileRotations` to auth.cooldowns config (default: 1)
- After N profile rotations on a rate_limit error, throw FailoverError
  to trigger cross-provider model fallback
- Add `resolveRateLimitProfileRotationLimit` helper following the same
  pattern as `resolveOverloadProfileRotationLimit`

Fixes #58572

* fix: cap prompt-side rate-limit failover (#58707) (thanks @Forgely3D)

* fix: restore latest-main gates for #58707

---------

Co-authored-by: Ember (Forgely3D) <ember@forgely.co>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Forgely3D
2026-04-01 02:54:10 -06:00
committed by GitHub
parent 8fce663861
commit 4fa11632b4
22 changed files with 357 additions and 45 deletions

View File

@@ -94,6 +94,7 @@ beforeEach(() => {
const OVERLOADED_ERROR_PAYLOAD =
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}';
const RATE_LIMIT_ERROR_MESSAGE = "rate limit exceeded";
function makeConfig(): OpenClawConfig {
const apiKeyField = ["api", "Key"].join("");
@@ -196,6 +197,27 @@ async function readUsageStats(agentDir: string) {
return JSON.parse(raw).usageStats as Record<string, Record<string, unknown> | undefined>;
}
async function writeMultiProfileAuthStore(agentDir: string) {
await fs.writeFile(
path.join(agentDir, "auth-profiles.json"),
JSON.stringify({
version: 1,
profiles: {
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
"openai:p3": { type: "api_key", provider: "openai", key: "sk-openai-3" },
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
},
usageStats: {
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
"openai:p3": { lastUsed: 3 },
"groq:p1": { lastUsed: 4 },
},
}),
);
}
async function runEmbeddedFallback(params: {
agentDir: string;
workspaceDir: string;
@@ -236,6 +258,29 @@ function mockPrimaryOverloadedThenFallbackSuccess() {
mockPrimaryErrorThenFallbackSuccess(OVERLOADED_ERROR_PAYLOAD);
}
function mockPrimaryPromptErrorThenFallbackSuccess(errorMessage: string) {
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string };
if (attemptParams.provider === "openai") {
return makeEmbeddedRunnerAttempt({
promptError: new Error(errorMessage),
});
}
if (attemptParams.provider === "groq") {
return makeEmbeddedRunnerAttempt({
assistantTexts: ["fallback ok"],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "groq",
model: "mock-2",
stopReason: "stop",
content: [{ type: "text", text: "fallback ok" }],
}),
});
}
throw new Error(`Unexpected provider ${attemptParams.provider}`);
});
}
function mockPrimaryErrorThenFallbackSuccess(errorMessage: string) {
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string; modelId: string; authProfileId?: string };
@@ -572,22 +617,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await fs.writeFile(
path.join(agentDir, "auth-profiles.json"),
JSON.stringify({
version: 1,
profiles: {
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
},
usageStats: {
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
"groq:p1": { lastUsed: 3 },
},
}),
);
await writeMultiProfileAuthStore(agentDir);
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string };
@@ -638,4 +668,117 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(groqAttempts.length).toBe(1);
});
});
it("caps rate-limit profile rotations and escalates to cross-provider fallback (#58572)", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);
mockPrimaryErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:rate-limit-multi-profile-cap",
runId: "run:rate-limit-multi-profile-cap",
});
expect(result.provider).toBe("groq");
expect(result.model).toBe("mock-2");
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(2);
expect(groqAttempts.length).toBe(1);
});
});
it("respects rateLimitedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);
mockPrimaryErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:rate-limit-no-rotation",
runId: "run:rate-limit-no-rotation",
config: {
...makeConfig(),
auth: { cooldowns: { rateLimitedProfileRotations: 0 } },
},
});
expect(result.provider).toBe("groq");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(1);
expect(groqAttempts.length).toBe(1);
});
});
it("caps prompt-side rate-limit profile rotations before cross-provider fallback", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);
mockPrimaryPromptErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:prompt-rate-limit-multi-profile-cap",
runId: "run:prompt-rate-limit-multi-profile-cap",
});
expect(result.provider).toBe("groq");
expect(result.model).toBe("mock-2");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(2);
expect(groqAttempts.length).toBe(1);
});
});
it("respects prompt-side rateLimitedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await writeMultiProfileAuthStore(agentDir);
mockPrimaryPromptErrorThenFallbackSuccess(RATE_LIMIT_ERROR_MESSAGE);
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:prompt-rate-limit-no-rotation",
runId: "run:prompt-rate-limit-no-rotation",
config: {
...makeConfig(),
auth: { cooldowns: { rateLimitedProfileRotations: 0 } },
},
});
expect(result.provider).toBe("groq");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(1);
expect(groqAttempts.length).toBe(1);
});
});
});