test: streamline model fallback probe coverage

This commit is contained in:
Peter Steinberger
2026-05-06 01:12:11 +01:00
parent 1daba5240b
commit d111605453
3 changed files with 208 additions and 218 deletions

View File

@@ -57,6 +57,7 @@ let mockedResolveAuthProfileOrder: ReturnType<
typeof vi.mocked<AuthProfilesOrderModule["resolveAuthProfileOrder"]>
>;
let runWithModelFallback: ModelFallbackModule["runWithModelFallback"];
let modelFallbackTesting: ModelFallbackModule["__testing"];
let _probeThrottleInternals: ModelFallbackModule["_probeThrottleInternals"];
let resetLogger: LoggerModule["resetLogger"];
let setLoggerOverride: LoggerModule["setLoggerOverride"];
@@ -82,6 +83,7 @@ async function loadModelFallbackProbeModules() {
);
mockedResolveAuthProfileOrder = vi.mocked(authProfilesOrderModule.resolveAuthProfileOrder);
runWithModelFallback = modelFallbackModule.runWithModelFallback;
modelFallbackTesting = modelFallbackModule.__testing;
_probeThrottleInternals = modelFallbackModule._probeThrottleInternals;
resetLogger = loggerModule.resetLogger;
setLoggerOverride = loggerModule.setLoggerOverride;
@@ -187,6 +189,34 @@ describe("runWithModelFallback probe logic", () => {
run,
});
function resolveOpenAiCooldownDecision(params: {
reason: "rate_limit" | "overloaded" | "timeout" | "auth" | "billing";
soonest: number | null;
isPrimary?: boolean;
hasFallbackCandidates?: boolean;
requestedModel?: boolean;
throttleKey?: string;
}) {
mockedGetSoonestCooldownExpiry.mockReturnValue(params.soonest);
mockedResolveProfilesUnavailableReason.mockReturnValue(params.reason);
return modelFallbackTesting.resolveCooldownDecision({
candidate: { provider: "openai", model: "gpt-4.1-mini" },
isPrimary: params.isPrimary ?? true,
requestedModel: params.requestedModel ?? true,
hasFallbackCandidates: params.hasFallbackCandidates ?? true,
now: NOW,
probeThrottleKey: params.throttleKey ?? "openai",
authRuntime: {
getSoonestCooldownExpiry: mockedGetSoonestCooldownExpiry,
resolveProfilesUnavailableReason: mockedResolveProfilesUnavailableReason,
} as unknown as Parameters<
typeof modelFallbackTesting.resolveCooldownDecision
>[0]["authRuntime"],
authStore: { version: 1, profiles: {} },
profileIds: ["openai-profile-1"],
});
}
async function expectPrimarySkippedAfterLongCooldown(reason: "billing" | "rate_limit") {
const cfg = makeCfg();
const expiresIn30Min = NOW + 30 * 60 * 1000;
@@ -202,6 +232,7 @@ describe("runWithModelFallback probe logic", () => {
beforeEach(() => {
realDateNow = Date.now;
Date.now = vi.fn(() => NOW);
setLoggerOverride({ level: "silent", consoleLevel: "silent" });
// Clear throttle state between tests
_probeThrottleInternals.lastProbeAttempt.clear();
@@ -261,16 +292,35 @@ describe("runWithModelFallback probe logic", () => {
await expectPrimarySkippedAfterLongCooldown("billing");
});
it("probes primary model when within 2-min margin of cooldown expiry", async () => {
const cfg = makeCfg();
// Cooldown expires in 1 minute — within 2-min probe margin
const expiresIn1Min = NOW + 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
it("decides when cooldowned primary probes are allowed", () => {
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + 60 * 1000,
}),
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW - 5 * 60 * 1000,
}),
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + 30 * 1000,
throttleKey: "recent-openai",
}),
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
const run = vi.fn().mockResolvedValue("probed-ok");
const result = await runPrimaryCandidate(cfg, run);
expectPrimaryProbeSuccess(result, run, "probed-ok");
_probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000);
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + 30 * 1000,
throttleKey: "recent-openai",
}),
).toMatchObject({ type: "skip", reason: "rate_limit" });
});
it("logs primary metadata on probe success and failure fallback decisions", async () => {
@@ -399,18 +449,6 @@ describe("runWithModelFallback probe logic", () => {
);
});
it("probes primary model when cooldown already expired", async () => {
const cfg = makeCfg();
// Cooldown expired 5 min ago
const expiredAlready = NOW - 5 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiredAlready);
const run = vi.fn().mockResolvedValue("recovered");
const result = await runPrimaryCandidate(cfg, run);
expectPrimaryProbeSuccess(result, run, "recovered");
});
it.each([
{
label: "rate-limit",
@@ -503,37 +541,6 @@ describe("runWithModelFallback probe logic", () => {
expect(run).toHaveBeenNthCalledWith(3, "deepseek", "deepseek-chat");
});
it("throttles probe when called within 30s interval", async () => {
const cfg = makeCfg();
// Cooldown just about to expire (within probe margin)
const almostExpired = NOW + 30 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
// Simulate a recent probe 10s ago
_probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
const run = vi.fn().mockResolvedValue("ok");
const result = await runPrimaryCandidate(cfg, run);
// Should be throttled → skip primary, use fallback
expectFallbackUsed(result, run);
});
it("allows probe when 30s have passed since last probe", async () => {
const cfg = makeCfg();
const almostExpired = NOW + 30 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
// Last probe was 31s ago — should NOT be throttled
_probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 31_000);
const run = vi.fn().mockResolvedValue("probed-ok");
const result = await runPrimaryCandidate(cfg, run);
expectPrimaryProbeSuccess(result, run, "probed-ok");
});
it("prunes stale probe throttle entries before checking eligibility", () => {
_probeThrottleInternals.lastProbeAttempt.set(
"stale",
@@ -564,21 +571,21 @@ describe("runWithModelFallback probe logic", () => {
expect(_probeThrottleInternals.lastProbeAttempt.has("key-0")).toBe(true);
});
it("handles missing or non-finite soonest safely (treats as probe-worthy)", async () => {
const cfg = makeCfg();
it("handles missing or non-finite soonest safely (treats as probe-worthy)", () => {
for (const [label, soonest] of [
["infinity", Infinity],
["nan", Number.NaN],
["null", null],
] as const) {
_probeThrottleInternals.lastProbeAttempt.clear();
mockedGetSoonestCooldownExpiry.mockReturnValue(soonest);
const run = vi.fn().mockResolvedValue(`ok-${label}`);
const result = await runPrimaryCandidate(cfg, run);
expectPrimaryProbeSuccess(result, run, `ok-${label}`);
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest,
}),
label,
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
}
});
@@ -612,84 +619,49 @@ describe("runWithModelFallback probe logic", () => {
expect(run).not.toHaveBeenCalled();
});
it("scopes probe throttling by agentDir to avoid cross-agent suppression", async () => {
const cfg = makeCfg();
const almostExpired = NOW + 30 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => {
const agentAKey = _probeThrottleInternals.resolveProbeThrottleKey("openai", "/tmp/agent-a");
const agentBKey = _probeThrottleInternals.resolveProbeThrottleKey("openai", "/tmp/agent-b");
_probeThrottleInternals.lastProbeAttempt.set(agentAKey, NOW - 10_000);
const run = vi.fn().mockResolvedValue("probed-ok");
await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
agentDir: "/tmp/agent-a",
run,
});
await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
agentDir: "/tmp/agent-b",
run,
});
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + 30 * 1000,
throttleKey: agentAKey,
}),
).toMatchObject({ type: "skip", reason: "rate_limit" });
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + 30 * 1000,
throttleKey: agentBKey,
}),
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
});
it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "openai/gpt-4.1-mini",
fallbacks: [],
},
},
},
} as Partial<OpenClawConfig>);
it("decides when billing cooldowns should probe", () => {
// Single-provider setups need periodic probes even when the billing
// cooldown is far from expiry, otherwise topping up credits never recovers
// without a restart.
const expiresIn30Min = NOW + 30 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("billing-recovered");
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
fallbacksOverride: [],
run,
});
expect(result.result).toBe("billing-recovered");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
});
it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
const cfg = makeCfg();
// Cooldown expires in 1 minute — within 2-min probe margin
const expiresIn1Min = NOW + 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("billing-probe-ok");
const result = await runPrimaryCandidate(cfg, run);
expectPrimaryProbeSuccess(result, run, "billing-probe-ok");
expect(
resolveOpenAiCooldownDecision({
reason: "billing",
soonest: NOW + 30 * 60 * 1000,
hasFallbackCandidates: false,
}),
).toEqual({ type: "attempt", reason: "billing", markProbe: true });
expect(
resolveOpenAiCooldownDecision({
reason: "billing",
soonest: NOW + 60 * 1000,
}),
).toEqual({ type: "attempt", reason: "billing", markProbe: true });
expect(
resolveOpenAiCooldownDecision({
reason: "billing",
soonest: NOW + 30 * 60 * 1000,
}),
).toMatchObject({ type: "skip", reason: "billing" });
});
});

View File

@@ -1,6 +1,6 @@
import crypto from "node:crypto";
import path from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { resetLogger, setLoggerOverride } from "../logging/logger.js";
import { createWarnLogCapture } from "../logging/test-helpers/warn-log-capture.js";
@@ -150,6 +150,15 @@ function resetModelFallbackTestState(): void {
afterEach(resetModelFallbackTestState);
beforeEach(() => {
setLoggerOverride({ level: "silent", consoleLevel: "silent" });
});
afterEach(() => {
setLoggerOverride(null);
resetLogger();
});
async function runModelFallbackCase(name: string, run: () => Promise<void>): Promise<void> {
try {
await run();
@@ -352,7 +361,7 @@ describe("runWithModelFallback", () => {
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
});
it("resolves primary model aliases before running", async () => {
it("resolves primary model aliases before running", () => {
const cases = [
{
name: "keeps openai gpt-5.4 on provider",
@@ -408,17 +417,15 @@ describe("runWithModelFallback", () => {
}>;
for (const testCase of cases) {
await runModelFallbackCase(testCase.name, async () => {
const candidates = __testing.resolveFallbackCandidates({
cfg: testCase.cfg,
provider: testCase.provider,
model: testCase.model,
});
const candidates = __testing.resolveFallbackCandidates({
cfg: testCase.cfg,
provider: testCase.provider,
model: testCase.model,
});
expect(candidates[0]).toEqual({
provider: testCase.expected[0],
model: testCase.expected[1],
});
expect(candidates[0], testCase.name).toEqual({
provider: testCase.expected[0],
model: testCase.expected[1],
});
}
});
@@ -1049,7 +1056,14 @@ describe("runWithModelFallback", () => {
});
it("falls back on model-not-found error shapes", async () => {
const cases = [
const cases: Array<{
name: string;
provider: string;
model: string;
error: Error;
expectedFallback: [string, string];
expectedReason?: string;
}> = [
{
name: "unknown anthropic override",
provider: "anthropic",
@@ -1064,14 +1078,7 @@ describe("runWithModelFallback", () => {
error: new Error("Model not found: openai/gpt-6"),
expectedFallback: ["anthropic", "claude-haiku-3-5"],
},
] satisfies Array<{
name: string;
provider: string;
model: string;
error: Error;
expectedFallback: [string, string];
expectedReason?: string;
}>;
];
for (const testCase of cases) {
await runModelFallbackCase(testCase.name, async () => {
@@ -1517,7 +1524,7 @@ describe("runWithModelFallback", () => {
// Tests for Bug A fix: Model fallback with session overrides
describe("fallback behavior with session model overrides", () => {
it("keeps fallback ordering correct across session overrides", async () => {
it("keeps fallback ordering correct across session overrides", () => {
const cases = [
{
name: "same provider versioned session model",
@@ -1604,17 +1611,15 @@ describe("runWithModelFallback", () => {
}>;
for (const testCase of cases) {
await runModelFallbackCase(testCase.name, async () => {
const candidates = __testing.resolveFallbackCandidates({
cfg: testCase.cfg,
provider: testCase.provider,
model: testCase.model,
});
expect(candidates.slice(0, testCase.calls.length)).toEqual(
testCase.calls.map(([provider, model]) => ({ provider, model })),
);
const candidates = __testing.resolveFallbackCandidates({
cfg: testCase.cfg,
provider: testCase.provider,
model: testCase.model,
});
expect(candidates.slice(0, testCase.calls.length), testCase.name).toEqual(
testCase.calls.map(([provider, model]) => ({ provider, model })),
);
}
});
});
@@ -1649,37 +1654,33 @@ describe("runWithModelFallback", () => {
}
it("attempts same-provider fallbacks during transient cooldowns", async () => {
for (const reason of ["rate_limit", "overloaded", "timeout"] as const) {
await runModelFallbackCase(reason, async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
},
},
const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
},
});
},
},
});
const run = vi.fn().mockResolvedValueOnce("sonnet success");
const run = vi.fn().mockResolvedValueOnce("sonnet success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("sonnet success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
});
}
expect(result.result).toBe("sonnet success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
allowTransientCooldownProbe: true,
});
});
it("keeps alias-resolved primary models subject to transient cooldowns", async () => {
@@ -1715,36 +1716,32 @@ describe("runWithModelFallback", () => {
});
});
it("skips same-provider models on persistent cooldowns", async () => {
for (const reason of ["auth", "billing"] as const) {
await runModelFallbackCase(reason, async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
},
},
it("skips same-provider models on persistent auth cooldowns", async () => {
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "anthropic/claude-opus-4-6",
fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
},
});
},
},
});
const run = vi.fn().mockResolvedValueOnce("groq success");
const run = vi.fn().mockResolvedValueOnce("groq success");
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
const result = await runWithModelFallback({
cfg,
provider: "anthropic",
model: "claude-opus-4-6",
run,
agentDir: dir,
});
expect(result.result).toBe("groq success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
});
}
expect(result.result).toBe("groq success");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
});
it("tries cross-provider fallbacks when same provider has rate limit", async () => {

View File

@@ -358,6 +358,22 @@ function recordFailedCandidateAttempt(params: {
});
}
function appendFailedCandidateAttempt(params: {
attempts: FallbackAttempt[];
candidate: ModelCandidate;
error: unknown;
}): void {
const described = describeFailoverError(params.error);
params.attempts.push({
provider: params.candidate.provider,
model: params.candidate.model,
error: described.rawError ?? described.message,
reason: described.reason ?? "unknown",
status: described.status,
code: described.code,
});
}
function findLiveSessionModelSwitchRedirectIndex(params: {
error: LiveSessionModelSwitchError;
candidates: ModelCandidate[];
@@ -512,6 +528,7 @@ function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): s
export const __testing = {
resolveFallbackCandidates,
resolveImageFallbackCandidates,
resolveCooldownDecision,
} as const;
function resolveFallbackCandidates(params: {
@@ -832,6 +849,10 @@ export async function runWithModelFallback<T>(params: {
const observeFailedCandidate = async (
failedAttempt: Parameters<typeof recordFailedCandidateAttempt>[0],
) => {
if (!params.onFallbackStep && !isModelFallbackDecisionLogEnabled()) {
appendFailedCandidateAttempt(failedAttempt);
return;
}
const fallbackStep = recordFailedCandidateAttempt(failedAttempt);
if (fallbackStep) {
await params.onFallbackStep?.(fallbackStep);