test: streamline model fallback probe coverage

2026-05-06 06:30:42 +00:00 · 2026-05-06 01:12:11 +01:00
parent 1daba5240b
commit d111605453
3 changed files with 208 additions and 218 deletions
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -57,6 +57,7 @@ let mockedResolveAuthProfileOrder: ReturnType<
  typeof vi.mocked<AuthProfilesOrderModule["resolveAuthProfileOrder"]>
 >;
 let runWithModelFallback: ModelFallbackModule["runWithModelFallback"];
+let modelFallbackTesting: ModelFallbackModule["__testing"];
 let _probeThrottleInternals: ModelFallbackModule["_probeThrottleInternals"];
 let resetLogger: LoggerModule["resetLogger"];
 let setLoggerOverride: LoggerModule["setLoggerOverride"];
@@ -82,6 +83,7 @@ async function loadModelFallbackProbeModules() {
  );
  mockedResolveAuthProfileOrder = vi.mocked(authProfilesOrderModule.resolveAuthProfileOrder);
  runWithModelFallback = modelFallbackModule.runWithModelFallback;
+  modelFallbackTesting = modelFallbackModule.__testing;
  _probeThrottleInternals = modelFallbackModule._probeThrottleInternals;
  resetLogger = loggerModule.resetLogger;
  setLoggerOverride = loggerModule.setLoggerOverride;
@@ -187,6 +189,34 @@ describe("runWithModelFallback – probe logic", () => {
      run,
    });

+  function resolveOpenAiCooldownDecision(params: {
+    reason: "rate_limit" | "overloaded" | "timeout" | "auth" | "billing";
+    soonest: number | null;
+    isPrimary?: boolean;
+    hasFallbackCandidates?: boolean;
+    requestedModel?: boolean;
+    throttleKey?: string;
+  }) {
+    mockedGetSoonestCooldownExpiry.mockReturnValue(params.soonest);
+    mockedResolveProfilesUnavailableReason.mockReturnValue(params.reason);
+    return modelFallbackTesting.resolveCooldownDecision({
+      candidate: { provider: "openai", model: "gpt-4.1-mini" },
+      isPrimary: params.isPrimary ?? true,
+      requestedModel: params.requestedModel ?? true,
+      hasFallbackCandidates: params.hasFallbackCandidates ?? true,
+      now: NOW,
+      probeThrottleKey: params.throttleKey ?? "openai",
+      authRuntime: {
+        getSoonestCooldownExpiry: mockedGetSoonestCooldownExpiry,
+        resolveProfilesUnavailableReason: mockedResolveProfilesUnavailableReason,
+      } as unknown as Parameters<
+        typeof modelFallbackTesting.resolveCooldownDecision
+      >[0]["authRuntime"],
+      authStore: { version: 1, profiles: {} },
+      profileIds: ["openai-profile-1"],
+    });
+  }
+
  async function expectPrimarySkippedAfterLongCooldown(reason: "billing" | "rate_limit") {
    const cfg = makeCfg();
    const expiresIn30Min = NOW + 30 * 60 * 1000;
@@ -202,6 +232,7 @@ describe("runWithModelFallback – probe logic", () => {
  beforeEach(() => {
    realDateNow = Date.now;
    Date.now = vi.fn(() => NOW);
+    setLoggerOverride({ level: "silent", consoleLevel: "silent" });

    // Clear throttle state between tests
    _probeThrottleInternals.lastProbeAttempt.clear();
@@ -261,16 +292,35 @@ describe("runWithModelFallback – probe logic", () => {
    await expectPrimarySkippedAfterLongCooldown("billing");
  });

-  it("probes primary model when within 2-min margin of cooldown expiry", async () => {
-    const cfg = makeCfg();
-    // Cooldown expires in 1 minute — within 2-min probe margin
-    const expiresIn1Min = NOW + 60 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
+  it("decides when cooldowned primary probes are allowed", () => {
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + 60 * 1000,
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW - 5 * 60 * 1000,
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + 30 * 1000,
+        throttleKey: "recent-openai",
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });

-    const run = vi.fn().mockResolvedValue("probed-ok");
-
-    const result = await runPrimaryCandidate(cfg, run);
-    expectPrimaryProbeSuccess(result, run, "probed-ok");
+    _probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000);
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + 30 * 1000,
+        throttleKey: "recent-openai",
+      }),
+    ).toMatchObject({ type: "skip", reason: "rate_limit" });
  });

  it("logs primary metadata on probe success and failure fallback decisions", async () => {
@@ -399,18 +449,6 @@ describe("runWithModelFallback – probe logic", () => {
    );
  });

-  it("probes primary model when cooldown already expired", async () => {
-    const cfg = makeCfg();
-    // Cooldown expired 5 min ago
-    const expiredAlready = NOW - 5 * 60 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(expiredAlready);
-
-    const run = vi.fn().mockResolvedValue("recovered");
-
-    const result = await runPrimaryCandidate(cfg, run);
-    expectPrimaryProbeSuccess(result, run, "recovered");
-  });
-
  it.each([
    {
      label: "rate-limit",
@@ -503,37 +541,6 @@ describe("runWithModelFallback – probe logic", () => {
    expect(run).toHaveBeenNthCalledWith(3, "deepseek", "deepseek-chat");
  });

-  it("throttles probe when called within 30s interval", async () => {
-    const cfg = makeCfg();
-    // Cooldown just about to expire (within probe margin)
-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
-
-    // Simulate a recent probe 10s ago
-    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
-
-    const run = vi.fn().mockResolvedValue("ok");
-
-    const result = await runPrimaryCandidate(cfg, run);
-
-    // Should be throttled → skip primary, use fallback
-    expectFallbackUsed(result, run);
-  });
-
-  it("allows probe when 30s have passed since last probe", async () => {
-    const cfg = makeCfg();
-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
-
-    // Last probe was 31s ago — should NOT be throttled
-    _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 31_000);
-
-    const run = vi.fn().mockResolvedValue("probed-ok");
-
-    const result = await runPrimaryCandidate(cfg, run);
-    expectPrimaryProbeSuccess(result, run, "probed-ok");
-  });
-
  it("prunes stale probe throttle entries before checking eligibility", () => {
    _probeThrottleInternals.lastProbeAttempt.set(
      "stale",
@@ -564,21 +571,21 @@ describe("runWithModelFallback – probe logic", () => {
    expect(_probeThrottleInternals.lastProbeAttempt.has("key-0")).toBe(true);
  });

-  it("handles missing or non-finite soonest safely (treats as probe-worthy)", async () => {
-    const cfg = makeCfg();
-
+  it("handles missing or non-finite soonest safely (treats as probe-worthy)", () => {
    for (const [label, soonest] of [
      ["infinity", Infinity],
      ["nan", Number.NaN],
      ["null", null],
    ] as const) {
      _probeThrottleInternals.lastProbeAttempt.clear();
-      mockedGetSoonestCooldownExpiry.mockReturnValue(soonest);

-      const run = vi.fn().mockResolvedValue(`ok-${label}`);
-
-      const result = await runPrimaryCandidate(cfg, run);
-      expectPrimaryProbeSuccess(result, run, `ok-${label}`);
+      expect(
+        resolveOpenAiCooldownDecision({
+          reason: "rate_limit",
+          soonest,
+        }),
+        label,
+      ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
    }
  });

@@ -612,84 +619,49 @@ describe("runWithModelFallback – probe logic", () => {
    expect(run).not.toHaveBeenCalled();
  });

-  it("scopes probe throttling by agentDir to avoid cross-agent suppression", async () => {
-    const cfg = makeCfg();
-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+  it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => {
+    const agentAKey = _probeThrottleInternals.resolveProbeThrottleKey("openai", "/tmp/agent-a");
+    const agentBKey = _probeThrottleInternals.resolveProbeThrottleKey("openai", "/tmp/agent-b");
+    _probeThrottleInternals.lastProbeAttempt.set(agentAKey, NOW - 10_000);

-    const run = vi.fn().mockResolvedValue("probed-ok");
-
-    await runWithModelFallback({
-      cfg,
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      agentDir: "/tmp/agent-a",
-      run,
-    });
-
-    await runWithModelFallback({
-      cfg,
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      agentDir: "/tmp/agent-b",
-      run,
-    });
-
-    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
-      allowTransientCooldownProbe: true,
-    });
-    expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini", {
-      allowTransientCooldownProbe: true,
-    });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + 30 * 1000,
+        throttleKey: agentAKey,
+      }),
+    ).toMatchObject({ type: "skip", reason: "rate_limit" });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + 30 * 1000,
+        throttleKey: agentBKey,
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
  });

-  it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          model: {
-            primary: "openai/gpt-4.1-mini",
-            fallbacks: [],
-          },
-        },
-      },
-    } as Partial<OpenClawConfig>);
-
+  it("decides when billing cooldowns should probe", () => {
    // Single-provider setups need periodic probes even when the billing
    // cooldown is far from expiry, otherwise topping up credits never recovers
    // without a restart.
-    const expiresIn30Min = NOW + 30 * 60 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
-    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
-
-    const run = vi.fn().mockResolvedValue("billing-recovered");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      fallbacksOverride: [],
-      run,
-    });
-
-    expect(result.result).toBe("billing-recovered");
-    expect(run).toHaveBeenCalledTimes(1);
-    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
-      allowTransientCooldownProbe: true,
-    });
-  });
-
-  it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
-    const cfg = makeCfg();
-    // Cooldown expires in 1 minute — within 2-min probe margin
-    const expiresIn1Min = NOW + 60 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
-    mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
-
-    const run = vi.fn().mockResolvedValue("billing-probe-ok");
-
-    const result = await runPrimaryCandidate(cfg, run);
-
-    expectPrimaryProbeSuccess(result, run, "billing-probe-ok");
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "billing",
+        soonest: NOW + 30 * 60 * 1000,
+        hasFallbackCandidates: false,
+      }),
+    ).toEqual({ type: "attempt", reason: "billing", markProbe: true });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "billing",
+        soonest: NOW + 60 * 1000,
+      }),
+    ).toEqual({ type: "attempt", reason: "billing", markProbe: true });
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "billing",
+        soonest: NOW + 30 * 60 * 1000,
+      }),
+    ).toMatchObject({ type: "skip", reason: "billing" });
  });
 });
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -1,6 +1,6 @@
 import crypto from "node:crypto";
 import path from "node:path";
-import { afterEach, describe, expect, it, vi } from "vitest";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
 import { resetLogger, setLoggerOverride } from "../logging/logger.js";
 import { createWarnLogCapture } from "../logging/test-helpers/warn-log-capture.js";
@@ -150,6 +150,15 @@ function resetModelFallbackTestState(): void {

 afterEach(resetModelFallbackTestState);

+beforeEach(() => {
+  setLoggerOverride({ level: "silent", consoleLevel: "silent" });
+});
+
+afterEach(() => {
+  setLoggerOverride(null);
+  resetLogger();
+});
+
 async function runModelFallbackCase(name: string, run: () => Promise<void>): Promise<void> {
  try {
    await run();
@@ -352,7 +361,7 @@ describe("runWithModelFallback", () => {
    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
  });

-  it("resolves primary model aliases before running", async () => {
+  it("resolves primary model aliases before running", () => {
    const cases = [
      {
        name: "keeps openai gpt-5.4 on provider",
@@ -408,17 +417,15 @@ describe("runWithModelFallback", () => {
    }>;

    for (const testCase of cases) {
-      await runModelFallbackCase(testCase.name, async () => {
-        const candidates = __testing.resolveFallbackCandidates({
-          cfg: testCase.cfg,
-          provider: testCase.provider,
-          model: testCase.model,
-        });
+      const candidates = __testing.resolveFallbackCandidates({
+        cfg: testCase.cfg,
+        provider: testCase.provider,
+        model: testCase.model,
+      });

-        expect(candidates[0]).toEqual({
-          provider: testCase.expected[0],
-          model: testCase.expected[1],
-        });
+      expect(candidates[0], testCase.name).toEqual({
+        provider: testCase.expected[0],
+        model: testCase.expected[1],
      });
    }
  });
@@ -1049,7 +1056,14 @@ describe("runWithModelFallback", () => {
  });

  it("falls back on model-not-found error shapes", async () => {
-    const cases = [
+    const cases: Array<{
+      name: string;
+      provider: string;
+      model: string;
+      error: Error;
+      expectedFallback: [string, string];
+      expectedReason?: string;
+    }> = [
      {
        name: "unknown anthropic override",
        provider: "anthropic",
@@ -1064,14 +1078,7 @@ describe("runWithModelFallback", () => {
        error: new Error("Model not found: openai/gpt-6"),
        expectedFallback: ["anthropic", "claude-haiku-3-5"],
      },
-    ] satisfies Array<{
-      name: string;
-      provider: string;
-      model: string;
-      error: Error;
-      expectedFallback: [string, string];
-      expectedReason?: string;
-    }>;
+    ];

    for (const testCase of cases) {
      await runModelFallbackCase(testCase.name, async () => {
@@ -1517,7 +1524,7 @@ describe("runWithModelFallback", () => {

  // Tests for Bug A fix: Model fallback with session overrides
  describe("fallback behavior with session model overrides", () => {
-    it("keeps fallback ordering correct across session overrides", async () => {
+    it("keeps fallback ordering correct across session overrides", () => {
      const cases = [
        {
          name: "same provider versioned session model",
@@ -1604,17 +1611,15 @@ describe("runWithModelFallback", () => {
      }>;

      for (const testCase of cases) {
-        await runModelFallbackCase(testCase.name, async () => {
-          const candidates = __testing.resolveFallbackCandidates({
-            cfg: testCase.cfg,
-            provider: testCase.provider,
-            model: testCase.model,
-          });
-
-          expect(candidates.slice(0, testCase.calls.length)).toEqual(
-            testCase.calls.map(([provider, model]) => ({ provider, model })),
-          );
+        const candidates = __testing.resolveFallbackCandidates({
+          cfg: testCase.cfg,
+          provider: testCase.provider,
+          model: testCase.model,
        });
+
+        expect(candidates.slice(0, testCase.calls.length), testCase.name).toEqual(
+          testCase.calls.map(([provider, model]) => ({ provider, model })),
+        );
      }
    });
  });
@@ -1649,37 +1654,33 @@ describe("runWithModelFallback", () => {
    }

    it("attempts same-provider fallbacks during transient cooldowns", async () => {
-      for (const reason of ["rate_limit", "overloaded", "timeout"] as const) {
-        await runModelFallbackCase(reason, async () => {
-          const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
-          const cfg = makeCfg({
-            agents: {
-              defaults: {
-                model: {
-                  primary: "anthropic/claude-opus-4-6",
-                  fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
-                },
-              },
+      const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout");
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
            },
-          });
+          },
+        },
+      });

-          const run = vi.fn().mockResolvedValueOnce("sonnet success");
+      const run = vi.fn().mockResolvedValueOnce("sonnet success");

-          const result = await runWithModelFallback({
-            cfg,
-            provider: "anthropic",
-            model: "claude-opus-4-6",
-            run,
-            agentDir: dir,
-          });
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: dir,
+      });

-          expect(result.result).toBe("sonnet success");
-          expect(run).toHaveBeenCalledTimes(1);
-          expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
-            allowTransientCooldownProbe: true,
-          });
-        });
-      }
+      expect(result.result).toBe("sonnet success");
+      expect(run).toHaveBeenCalledTimes(1);
+      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
+        allowTransientCooldownProbe: true,
+      });
    });

    it("keeps alias-resolved primary models subject to transient cooldowns", async () => {
@@ -1715,36 +1716,32 @@ describe("runWithModelFallback", () => {
      });
    });

-    it("skips same-provider models on persistent cooldowns", async () => {
-      for (const reason of ["auth", "billing"] as const) {
-        await runModelFallbackCase(reason, async () => {
-          const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
-          const cfg = makeCfg({
-            agents: {
-              defaults: {
-                model: {
-                  primary: "anthropic/claude-opus-4-6",
-                  fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
-                },
-              },
+    it("skips same-provider models on persistent auth cooldowns", async () => {
+      const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
+      const cfg = makeCfg({
+        agents: {
+          defaults: {
+            model: {
+              primary: "anthropic/claude-opus-4-6",
+              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
            },
-          });
+          },
+        },
+      });

-          const run = vi.fn().mockResolvedValueOnce("groq success");
+      const run = vi.fn().mockResolvedValueOnce("groq success");

-          const result = await runWithModelFallback({
-            cfg,
-            provider: "anthropic",
-            model: "claude-opus-4-6",
-            run,
-            agentDir: dir,
-          });
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: dir,
+      });

-          expect(result.result).toBe("groq success");
-          expect(run).toHaveBeenCalledTimes(1);
-          expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
-        });
-      }
+      expect(result.result).toBe("groq success");
+      expect(run).toHaveBeenCalledTimes(1);
+      expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
    });

    it("tries cross-provider fallbacks when same provider has rate limit", async () => {
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -358,6 +358,22 @@ function recordFailedCandidateAttempt(params: {
  });
 }

+function appendFailedCandidateAttempt(params: {
+  attempts: FallbackAttempt[];
+  candidate: ModelCandidate;
+  error: unknown;
+}): void {
+  const described = describeFailoverError(params.error);
+  params.attempts.push({
+    provider: params.candidate.provider,
+    model: params.candidate.model,
+    error: described.rawError ?? described.message,
+    reason: described.reason ?? "unknown",
+    status: described.status,
+    code: described.code,
+  });
+}
+
 function findLiveSessionModelSwitchRedirectIndex(params: {
  error: LiveSessionModelSwitchError;
  candidates: ModelCandidate[];
@@ -512,6 +528,7 @@ function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): s
 export const __testing = {
  resolveFallbackCandidates,
  resolveImageFallbackCandidates,
+  resolveCooldownDecision,
 } as const;

 function resolveFallbackCandidates(params: {
@@ -832,6 +849,10 @@ export async function runWithModelFallback<T>(params: {
  const observeFailedCandidate = async (
    failedAttempt: Parameters<typeof recordFailedCandidateAttempt>[0],
  ) => {
+    if (!params.onFallbackStep && !isModelFallbackDecisionLogEnabled()) {
+      appendFailedCandidateAttempt(failedAttempt);
+      return;
+    }
    const fallbackStep = recordFailedCandidateAttempt(failedAttempt);
    if (fallbackStep) {
      await params.onFallbackStep?.(fallbackStep);