fix: make overload failover configurable

2026-05-02 06:30:22 +00:00 · 2026-03-31 21:33:35 +01:00
parent 2a60e34f2a
commit 418fa12dfa
14 changed files with 255 additions and 81 deletions
--- a/src/agents/model-fallback.run-embedded.e2e.test.ts
+++ b/src/agents/model-fallback.run-embedded.e2e.test.ts
@@ -202,8 +202,9 @@ async function runEmbeddedFallback(params: {
  sessionKey: string;
  runId: string;
  abortSignal?: AbortSignal;
+  config?: OpenClawConfig;
 }) {
-  const cfg = makeConfig();
+  const cfg = params.config ?? makeConfig();
  return await runWithModelFallback({
    cfg,
    provider: "openai",
@@ -321,8 +322,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(typeof usageStats["groq:p1"]?.lastUsed).toBe("number");

      expectOpenAiThenGroqAttemptOrder();
-      expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@@ -358,8 +359,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(usageStats["groq:p1"]?.disabledUntil).toBeUndefined();

      expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
-      expect(computeBackoffMock).toHaveBeenCalledTimes(2);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(2);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@@ -421,8 +422,8 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      const usageStats = await readUsageStats(agentDir);
      expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
      expect(usageStats["openai:p1"]?.failureCounts).toMatchObject({ overloaded: 2 });
-      expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-      expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
+      expect(computeBackoffMock).not.toHaveBeenCalled();
+      expect(sleepWithAbortMock).not.toHaveBeenCalled();
    });
  });

@@ -466,6 +467,10 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
          sessionKey: "agent:test:overloaded-backoff-abort",
          runId: "run:overloaded-backoff-abort",
          abortSignal: controller.signal,
+          config: {
+            ...makeConfig(),
+            auth: { cooldowns: { overloadedBackoffMs: 321 } },
+          },
        }),
      ).rejects.toMatchObject({
        name: "AbortError",
@@ -483,7 +488,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
  it("caps overloaded profile rotations and escalates to cross-provider fallback (#58348)", async () => {
    // When a provider has multiple auth profiles and all return overloaded_error,
    // the runner should not exhaust all profiles before falling back. It should
-    // cap profile rotations at MAX_OVERLOAD_PROFILE_ROTATIONS (1) and escalate
+    // cap profile rotations at overloadedProfileRotations=1 and escalate
    // to cross-provider fallback immediately.
    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
      // Write auth store with multiple profiles for openai
@@ -549,7 +554,7 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(result.model).toBe("mock-2");
      expect(result.result.payloads?.[0]?.text ?? "").toContain("fallback ok");

-      // With MAX_OVERLOAD_PROFILE_ROTATIONS=1, we expect:
+      // With overloadedProfileRotations=1, we expect:
      // - 1 initial openai attempt (p1)
      // - 1 rotation to p2 (capped)
      // - escalation to groq (1 attempt)
@@ -564,4 +569,73 @@ describe("runWithModelFallback + runEmbeddedPiAgent overload policy", () => {
      expect(groqAttempts.length).toBe(1);
    });
  });
+
+  it("respects overloadedProfileRotations=0 and falls back immediately", async () => {
+    await withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
+      await fs.writeFile(
+        path.join(agentDir, "auth-profiles.json"),
+        JSON.stringify({
+          version: 1,
+          profiles: {
+            "openai:p1": { type: "api_key", provider: "openai", key: "sk-openai-1" },
+            "openai:p2": { type: "api_key", provider: "openai", key: "sk-openai-2" },
+            "groq:p1": { type: "api_key", provider: "groq", key: "sk-groq" },
+          },
+          usageStats: {
+            "openai:p1": { lastUsed: 1 },
+            "openai:p2": { lastUsed: 2 },
+            "groq:p1": { lastUsed: 3 },
+          },
+        }),
+      );
+
+      runEmbeddedAttemptMock.mockImplementation(async (params: unknown) => {
+        const attemptParams = params as { provider: string };
+        if (attemptParams.provider === "openai") {
+          return makeEmbeddedRunnerAttempt({
+            assistantTexts: [],
+            lastAssistant: buildEmbeddedRunnerAssistant({
+              provider: "openai",
+              model: "mock-1",
+              stopReason: "error",
+              errorMessage: OVERLOADED_ERROR_PAYLOAD,
+            }),
+          });
+        }
+        if (attemptParams.provider === "groq") {
+          return makeEmbeddedRunnerAttempt({
+            assistantTexts: ["fallback ok"],
+            lastAssistant: buildEmbeddedRunnerAssistant({
+              provider: "groq",
+              model: "mock-2",
+              stopReason: "stop",
+              content: [{ type: "text", text: "fallback ok" }],
+            }),
+          });
+        }
+        throw new Error(`Unexpected provider ${attemptParams.provider}`);
+      });
+
+      const result = await runEmbeddedFallback({
+        agentDir,
+        workspaceDir,
+        sessionKey: "agent:test:overloaded-no-rotation",
+        runId: "run:overloaded-no-rotation",
+        config: {
+          ...makeConfig(),
+          auth: { cooldowns: { overloadedProfileRotations: 0 } },
+        },
+      });
+
+      expect(result.provider).toBe("groq");
+      const openaiAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "openai",
+      );
+      const groqAttempts = runEmbeddedAttemptMock.mock.calls.filter(
+        (call) => (call[0] as { provider?: string })?.provider === "groq",
+      );
+      expect(openaiAttempts.length).toBe(1);
+      expect(groqAttempts.length).toBe(1);
+    });
+  });
 });
--- a/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.run-embedded-pi-agent.auth-profile-rotation.e2e.test.ts
@@ -58,22 +58,27 @@ const installRunEmbeddedMocks = () => {
  vi.doMock("./pi-embedded-runner/run/attempt.js", () => ({
    runEmbeddedAttempt: (params: unknown) => runEmbeddedAttemptMock(params),
  }));
-  vi.doMock("../plugins/provider-runtime.js", () => ({
-    prepareProviderRuntimeAuth: async (params: {
-      provider: string;
-      context: { apiKey: string };
-    }) => {
-      if (params.provider !== "github-copilot") {
-        return undefined;
-      }
-      const token = await resolveCopilotApiTokenMock(params.context.apiKey);
-      return {
-        apiKey: token.token,
-        baseUrl: token.baseUrl,
-        expiresAt: token.expiresAt,
-      };
-    },
-  }));
+  vi.doMock("../plugins/provider-runtime.js", async (importOriginal) => {
+    const actual = await importOriginal<typeof import("../plugins/provider-runtime.js")>();
+    return {
+      ...actual,
+      prepareProviderRuntimeAuth: async (params: {
+        provider: string;
+        context: { apiKey: string };
+      }) => {
+        if (params.provider !== "github-copilot") {
+          return undefined;
+        }
+        const token = await resolveCopilotApiTokenMock(params.context.apiKey);
+        return {
+          apiKey: token.token,
+          baseUrl: token.baseUrl,
+          expiresAt: token.expiresAt,
+        };
+      },
+      resolveProviderCapabilitiesWithPlugin: vi.fn(() => undefined),
+    };
+  });
  vi.doMock("../infra/backoff.js", () => ({
    computeBackoff: (
      policy: { initialMs: number; maxMs: number; factor: number; jitter: number },
@@ -188,8 +193,26 @@ const makeAttempt = (overrides: Partial<EmbeddedRunAttemptResult>): EmbeddedRunA
  ...overrides,
 });

-const makeConfig = (opts?: { fallbacks?: string[]; apiKey?: string }): OpenClawConfig =>
+const makeConfig = (opts?: {
+  fallbacks?: string[];
+  apiKey?: string;
+  overloadedBackoffMs?: number;
+  overloadedProfileRotations?: number;
+}): OpenClawConfig =>
  ({
+    auth:
+      opts?.overloadedBackoffMs != null || opts?.overloadedProfileRotations != null
+        ? {
+            cooldowns: {
+              ...(opts?.overloadedBackoffMs != null
+                ? { overloadedBackoffMs: opts.overloadedBackoffMs }
+                : {}),
+              ...(opts?.overloadedProfileRotations != null
+                ? { overloadedProfileRotations: opts.overloadedProfileRotations }
+                : {}),
+            },
+          }
+        : undefined,
    agents: {
      defaults: {
        model: {
@@ -379,6 +402,7 @@ async function runAutoPinnedOpenAiTurn(params: {
  sessionKey: string;
  runId: string;
  authProfileId?: string;
+  config?: OpenClawConfig;
 }) {
  await runEmbeddedPiAgentInline({
    sessionId: "session:test",
@@ -386,7 +410,7 @@ async function runAutoPinnedOpenAiTurn(params: {
    sessionFile: path.join(params.workspaceDir, "session.jsonl"),
    workspaceDir: params.workspaceDir,
    agentDir: params.agentDir,
-    config: makeConfig(),
+    config: params.config ?? makeConfig(),
    prompt: "hello",
    provider: "openai",
    model: "mock-1",
@@ -423,6 +447,7 @@ async function runAutoPinnedRotationCase(params: {
  errorMessage: string;
  sessionKey: string;
  runId: string;
+  config?: OpenClawConfig;
 }) {
  runEmbeddedAttemptMock.mockReset();
  return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@@ -433,6 +458,7 @@ async function runAutoPinnedRotationCase(params: {
      workspaceDir,
      sessionKey: params.sessionKey,
      runId: params.runId,
+      config: params.config,
    });

    expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@@ -445,6 +471,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
  errorMessage: string;
  sessionKey: string;
  runId: string;
+  config?: OpenClawConfig;
 }) {
  runEmbeddedAttemptMock.mockReset();
  return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
@@ -455,6 +482,7 @@ async function runAutoPinnedPromptErrorRotationCase(params: {
      workspaceDir,
      sessionKey: params.sessionKey,
      runId: params.runId,
+      config: params.config,
    });

    expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
@@ -786,18 +814,8 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
    expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
-    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-    expect(computeBackoffMock).toHaveBeenCalledWith(
-      expect.objectContaining({
-        initialMs: 250,
-        maxMs: 1500,
-        factor: 2,
-        jitter: 0.2,
-      }),
-      1,
-    );
-    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
-    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
+    expect(computeBackoffMock).not.toHaveBeenCalled();
+    expect(sleepWithAbortMock).not.toHaveBeenCalled();
  });

  it("logs structured failover decision metadata for overloaded assistant rotation", async () => {
@@ -863,16 +881,19 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
    });
    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
    expect(typeof usageStats["openai:p1"]?.cooldownUntil).toBe("number");
-    expect(computeBackoffMock).toHaveBeenCalledTimes(1);
-    expect(computeBackoffMock).toHaveBeenCalledWith(
-      expect.objectContaining({
-        initialMs: 250,
-        maxMs: 1500,
-        factor: 2,
-        jitter: 0.2,
-      }),
-      1,
-    );
+    expect(computeBackoffMock).not.toHaveBeenCalled();
+    expect(sleepWithAbortMock).not.toHaveBeenCalled();
+  });
+
+  it("uses configured overload backoff before rotating profiles", async () => {
+    const { usageStats } = await runAutoPinnedRotationCase({
+      errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
+      sessionKey: "agent:test:overloaded-configured-backoff",
+      runId: "run:overloaded-configured-backoff",
+      config: makeConfig({ overloadedBackoffMs: 321 }),
+    });
+    expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
+    expect(computeBackoffMock).not.toHaveBeenCalled();
    expect(sleepWithAbortMock).toHaveBeenCalledTimes(1);
    expect(sleepWithAbortMock).toHaveBeenCalledWith(321, undefined);
  });
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -5,7 +5,7 @@ import {
  ensureContextEnginesInitialized,
  resolveContextEngine,
 } from "../../context-engine/index.js";
-import { computeBackoff, sleepWithAbort } from "../../infra/backoff.js";
+import { sleepWithAbort } from "../../infra/backoff.js";
 import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
 import { enqueueCommandInLane } from "../../process/command-queue.js";
 import { sanitizeForLog } from "../../terminal/ansi.js";
@@ -76,10 +76,10 @@ import {
  buildErrorAgentMeta,
  buildUsageAgentMetaFields,
  createCompactionDiagId,
-  MAX_OVERLOAD_PROFILE_ROTATIONS,
-  OVERLOAD_FAILOVER_BACKOFF_POLICY,
  resolveActiveErrorContext,
  resolveMaxRunRetryIterations,
+  resolveOverloadFailoverBackoffMs,
+  resolveOverloadProfileRotationLimit,
  type RuntimeAuthState,
  scrubAnthropicRefusalMagic,
 } from "./run/helpers.js";
@@ -317,9 +317,10 @@ export async function runEmbeddedPiAgent(
      let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
      let autoCompactionCount = 0;
      let runLoopIterations = 0;
-      let overloadFailoverAttempts = 0;
      let overloadProfileRotations = 0;
      let timeoutCompactionAttempts = 0;
+      const overloadFailoverBackoffMs = resolveOverloadFailoverBackoffMs(params.config);
+      const overloadProfileRotationLimit = resolveOverloadProfileRotationLimit(params.config);
      const maybeMarkAuthProfileFailure = async (failure: {
        profileId?: string;
        reason?: AuthProfileFailureReason | null;
@@ -352,16 +353,14 @@ export async function runEmbeddedPiAgent(
        return failoverReason;
      };
      const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
-        if (reason !== "overloaded") {
+        if (reason !== "overloaded" || overloadFailoverBackoffMs <= 0) {
          return;
        }
-        overloadFailoverAttempts += 1;
-        const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
        log.warn(
-          `overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
+          `overload backoff before failover for ${provider}/${modelId}: delayMs=${overloadFailoverBackoffMs}`,
        );
        try {
-          await sleepWithAbort(delayMs, params.abortSignal);
+          await sleepWithAbort(overloadFailoverBackoffMs, params.abortSignal);
        } catch (err) {
          if (params.abortSignal?.aborted) {
            const abortErr = new Error("Operation aborted", { cause: err });
@@ -1199,15 +1198,15 @@ export async function runEmbeddedPiAgent(
              }
            }

-            // For overloaded errors, check the rotation cap *before* calling
-            // advanceAuthProfile() to avoid a wasted auth-profile setup cycle.
-            // advanceAuthProfile() runs applyApiKeyInfo() which initialises the
-            // next profile — costly work that is pointless when we already know
-            // we will escalate to cross-provider fallback.
+            // For overloaded errors, check the configured rotation cap *before*
+            // calling advanceAuthProfile() to avoid a wasted auth-profile setup
+            // cycle. advanceAuthProfile() runs applyApiKeyInfo() which
+            // initializes the next profile — costly work that is pointless when
+            // we already know we will escalate to cross-provider fallback.
            // See: https://github.com/openclaw/openclaw/issues/58348
            if (assistantFailoverReason === "overloaded") {
              overloadProfileRotations += 1;
-              if (overloadProfileRotations > MAX_OVERLOAD_PROFILE_ROTATIONS && fallbackConfigured) {
+              if (overloadProfileRotations > overloadProfileRotationLimit && fallbackConfigured) {
                const status = resolveFailoverStatus("overloaded");
                log.warn(
                  `overload profile rotation cap reached for ${sanitizeForLog(provider)}/${sanitizeForLog(modelId)} after ${overloadProfileRotations} rotations; escalating to model fallback`,
--- a/src/agents/pi-embedded-runner/run/helpers.ts
+++ b/src/agents/pi-embedded-runner/run/helpers.ts
@@ -1,4 +1,4 @@
-import { type BackoffPolicy } from "../../../infra/backoff.js";
+import type { OpenClawConfig } from "../../../config/config.js";
 import { generateSecureToken } from "../../../infra/secure-random.js";
 import { derivePromptTokens, normalizeUsage } from "../../usage.js";
 import type { EmbeddedPiAgentMeta } from "../types.js";
@@ -25,22 +25,16 @@ export const RUNTIME_AUTH_REFRESH_MARGIN_MS = 5 * 60 * 1000;
 export const RUNTIME_AUTH_REFRESH_RETRY_MS = 60 * 1000;
 export const RUNTIME_AUTH_REFRESH_MIN_DELAY_MS = 5 * 1000;

-// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
-// enough that fallback still feels responsive within a single turn.
-export const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
-  initialMs: 250,
-  maxMs: 1_500,
-  factor: 2,
-  jitter: 0.2,
-};
+export const DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS = 0;
+export const DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS = 1;

-// Maximum number of auth-profile rotations to attempt for overloaded errors
-// before escalating to cross-provider fallback. Overloaded is a provider-level
-// capacity issue — rotating auth profiles on the same provider is unlikely to
-// help and wastes time with backoff delays. A cap of 1 allows one probe attempt
-// (in case the overload was transient) before giving up on the provider.
-// See: https://github.com/openclaw/openclaw/issues/58348
-export const MAX_OVERLOAD_PROFILE_ROTATIONS = 1;
+export function resolveOverloadFailoverBackoffMs(cfg?: OpenClawConfig): number {
+  return cfg?.auth?.cooldowns?.overloadedBackoffMs ?? DEFAULT_OVERLOAD_FAILOVER_BACKOFF_MS;
+}
+
+export function resolveOverloadProfileRotationLimit(cfg?: OpenClawConfig): number {
+  return cfg?.auth?.cooldowns?.overloadedProfileRotations ?? DEFAULT_MAX_OVERLOAD_PROFILE_ROTATIONS;
+}

 const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
 const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";