fix: make overload failover configurable

This commit is contained in:
Peter Steinberger
2026-03-31 21:33:35 +01:00
parent 2a60e34f2a
commit 418fa12dfa
14 changed files with 255 additions and 81 deletions

View File

@@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: {
sessionKey: string;
runId: string;
abortSignal?: AbortSignal;
config?: OpenClawConfig;
}) {
const cfg = makeConfig();
const cfg = params.config ?? makeConfig();
return await runWithModelFallback({
cfg,
provider: "openai",
@@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number");
expectOpenAiThenGroqAttemptOrder();
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined();
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
expect(computeBackoffMock).toHaveBeenCalledTimes(2);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(2);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
const usageStats = await readUsageStats(agentDir);
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 });
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
});
@@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
sessionKey: "agent:test:overloaded-backoff-abort",
runId: "run:overloaded-backoff-abort",
abortSignal: controller.signal,
config: {
...makeConfig(),
auth: { cooldowns: { overloadedBackoffMs: 321 } },
},
}),
).rejects.toMatchObject({
name: "AbortError",
@@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => {
// When a provider has multiple auth profiles and all return overloaded_error,
// the runner should not exhaust all profiles before falling back. It should
// cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate
// cap profile rotations at overloadedProfileRotations=1 and escalate
// to cross-provider fallback immediately.
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
// Write auth store with multiple profiles for openai
@@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(result.model).toBe("mock-2");
expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");
// With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect:
// With overloadedProfileRotations=1, we expect:
// - 1 initial openai attempt (p1)
// - 1 rotation to p2 (capped)
// - escalation to groq (1 attempt)
@@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
expect(groqAttempts.length).toBe(1);
});
});
it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
await fs.writeFile(
path.join(agentDir, "auth-profiles.json"),
JSON.stringify({
version: 1,
profiles: {
"openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
"openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
"groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
},
usageStats: {
"openai:p1": { lastUsed: 1 },
"openai:p2": { lastUsed: 2 },
"groq:p1": { lastUsed: 3 },
},
}),
);
runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
const attemptParams = params as { provider: string };
if (attemptParams.provider === "openai") {
return makeEmbeddedRunnerAttempt({
assistantTexts: [],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "openai",
model: "mock-1",
stopReason: "error",
errorMessage: OVERLOADED_ERROR_PAYLOAD,
}),
});
}
if (attemptParams.provider === "groq") {
return makeEmbeddedRunnerAttempt({
assistantTexts: ["fallback ok"],
lastAssistant: buildEmbeddedRunnerAssistant({
provider: "groq",
model: "mock-2",
stopReason: "stop",
content: [{ type: "text", text: "fallback ok" }],
}),
});
}
throw new Error(`Unexpected provider ${attemptParams.provider}`);
});
const result = await runEmbeddedFallback({
agentDir,
workspaceDir,
sessionKey: "agent:test:overloaded-no-rotation",
runId: "run:overloaded-no-rotation",
config: {
...makeConfig(),
auth: { cooldowns: { overloadedProfileRotations: 0 } },
},
});
expect(result.provider).toBe("groq");
const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "openai",
);
const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
(call) => (call[0] as { provider?: string })?.provider === "groq",
);
expect(openaiAttempts.length).toBe(1);
expect(groqAttempts.length).toBe(1);
});
});
});

View File

@@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => {
vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({
runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
}));
vi.doMock("../plugins/provider-runtime.js", () => ({
prepareProviderRuntimeAuth: async (params: {
provider: string;
context: { apiKey: string };
}) => {
if (params.provider !== "github-copilot") {
return undefined;
}
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
return {
apiKey: token.token,
baseUrl: token.baseUrl,
expiresAt: token.expiresAt,
};
},
}));
vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../plugins/provider-runtime.js")>();
return {
...actual,
prepareProviderRuntimeAuth: async (params: {
provider: string;
context: { apiKey: string };
}) => {
if (params.provider !== "github-copilot") {
return undefined;
}
const token = await resolveCopilotApiTokenMock(params.context.apiKey);
return {
apiKey: token.token,
baseUrl: token.baseUrl,
expiresAt: token.expiresAt,
};
},
resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined),
};
});
vi.doMock("../infra/backoff.js", () => ({
computeBackoff: (
policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
@@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
...overrides,
});
const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig =>
const makeConfig = (opts?: {
fallbacks?: string[];
apiKey?: string;
overloadedBackoffMs?: number;
overloadedProfileRotations?: number;
}): OpenClawConfig =>
({
auth:
opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null
? {
cooldowns: {
...(opts?.overloadedBackoffMs != null
? { overloadedBackoffMs: opts.overloadedBackoffMs }
: {}),
...(opts?.overloadedProfileRotations != null
? { overloadedProfileRotations: opts.overloadedProfileRotations }
: {}),
},
}
: undefined,
agents: {
defaults: {
model: {
@@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: {
sessionKey: string;
runId: string;
authProfileId?: string;
config?: OpenClawConfig;
}) {
await runEmbeddedPiAgentInline({
sessionId: "session:test",
@@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: {
sessionFile: path.join(params.workspaceDir, "session.jsonl"),
workspaceDir: params.workspaceDir,
agentDir: params.agentDir,
config: makeConfig(),
config: params.config ?? makeConfig(),
prompt: "hello",
provider: "openai",
model: "mock-1",
@@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: {
errorMessage: string;
sessionKey: string;
runId: string;
config?: OpenClawConfig;
}) {
runEmbeddedAttemptMock.mockReset();
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: {
workspaceDir,
sessionKey: params.sessionKey,
runId: params.runId,
config: params.config,
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
errorMessage: string;
sessionKey: string;
runId: string;
config?: OpenClawConfig;
}) {
runEmbeddedAttemptMock.mockReset();
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
workspaceDir,
sessionKey: params.sessionKey,
runId: params.runId,
config: params.config,
});
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).toHaveBeenCalledWith(
expect.objectContaining({
initialMs: 250,
maxMs: 1500,
factor: 2,
jitter: 0.2,
}),
1,
);
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
@@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
expect(computeBackoffMock).toHaveBeenCalledTimes(1);
expect(computeBackoffMock).toHaveBeenCalledWith(
expect.objectContaining({
initialMs: 250,
maxMs: 1500,
factor: 2,
jitter: 0.2,
}),
1,
);
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).not.toHaveBeenCalled();
});
it("uses configured overload backoff before rotating profiles", async () => {
const { usageStats } = await runAutoPinnedRotationCase({
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
sessionKey: "agent:test:overloaded-configured-backoff",
runId: "run:overloaded-configured-backoff",
config: makeConfig({ overloadedBackoffMs: 321 }),
});
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
expect(computeBackoffMock).not.toHaveBeenCalled();
expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
});

View File

@@ -5,7 +5,7 @@ import {
ensureContextEnginesInitialized,
resolveContextEngine,
} from "../../context-engine/index.js";
import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js";
import { sleepWithAbort } from "../../infra/backoff.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import { enqueueCommandInLane } from "../../process/command-queue.js";
import { sanitizeForLog } from "../../terminal/ansi.js";
@@ -76,10 +76,10 @@ import {
buildErrorAgentMeta,
buildUsageAgentMetaFields,
createCompactionDiagId,
MAX_OVERLOAD_PROFILE_ROTATIONS,
OVERLOAD_FAILOVER_BACKOFF_POLICY,
resolveActiveErrorContext,
resolveMaxRunRetryIterations,
resolveOverloadFailoverBackoffMs,
resolveOverloadProfileRotationLimit,
type RuntimeAuthState,
scrubAnthropicRefusalMagic,
} from "./run/helpers.js";
@@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent(
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
let autoCompactionCount = 0;
let runLoopIterations = 0;
let overloadFailoverAttempts = 0;
let overloadProfileRotations = 0;
let timeoutCompactionAttempts = 0;
const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
const maybeMarkAuthProfileFailure = async (failure: {
profileId?: string;
reason?: AuthProfileFailureReason | null;
@@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent(
return failoverReason;
};
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
if (reason !== "overloaded") {
if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
return;
}
overloadFailoverAttempts += 1;
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
log.warn(
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
`overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
);
try {
await sleepWithAbort(delayMs, params.abortSignal);
await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
} catch (err) {
if (params.abortSignal?.aborted) {
const abortErr = new Error("Operation aborted", { cause: err });
@@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent(
}
}
// For overloaded errors, check the rotation cap *before* calling
// advanceAuthProfile() to avoid a wasted auth-profile setup cycle.
// advanceAuthProfile() runs applyApiKeyInfo() which initialises the
// next profile — costly work that is pointless when we already know
// we will escalate to cross-provider fallback.
// For overloaded errors, check the configured rotation cap *before*
// calling advanceAuthProfile() to avoid a wasted auth-profile setup
// cycle. advanceAuthProfile() runs applyApiKeyInfo() which
// initializes the next profile — costly work that is pointless when
// we already know we will escalate to cross-provider fallback.
// See: https://github.com/openclaw/openclaw/issues/58348
if (assistantFailoverReason === "overloaded") {
overloadProfileRotations += 1;
if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) {
if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) {
const status = resolveFailoverStatus("overloaded");
log.warn(
`overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,

View File

@@ -1,4 +1,4 @@
import { type BackoffPolicy } from "../../../infra/backoff.js";
import type { OpenClawConfig } from "../../../config/config.js";
import { generateSecureToken } from "../../../infra/secure-random.js";
import { derivePromptTokens, normalizeUsage } from "../../usage.js";
import type { EmbeddedPiAgentMeta } from "../types.js";
@@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000;
export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000;
export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
// enough that fallback still feels responsive within a single turn.
export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
initialMs: 250,
maxMs: 1_500,
factor: 2,
jitter: 0.2,
};
export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
// Maximum number of auth-profile rotations to attempt for overloaded errors
// before escalating to cross-provider fallback. Overloaded is a provider-level
// capacity issue — rotating auth profiles on the same provider is unlikely to
// help and wastes time with backoff delays. A cap of 1 allows one probe attempt
// (in case the overload was transient) before giving up on the provider.
// See: https://github.com/openclaw/openclaw/issues/58348
export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
}
export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number {
return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
}
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";