From cb42efb6e660b39ad7ab94e23a454105bfcbc7a3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 6 May 2026 00:52:26 +0100 Subject: [PATCH] test: trim slow agent fallback coverage --- src/agents/model-fallback.test.ts | 907 +++++++----------- src/agents/model-fallback.ts | 5 + src/agents/model-selection.test.ts | 32 +- .../outcome-fallback-runtime-contract.test.ts | 116 +-- src/agents/pi-tools.policy.test.ts | 11 +- src/agents/tools/message-tool.test.ts | 83 ++ src/config/doc-baseline.integration.test.ts | 10 +- 7 files changed, 522 insertions(+), 642 deletions(-) diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index a14200b0b64..f71441212a7 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -10,6 +10,7 @@ import { FailoverError } from "./failover-error.js"; import { LiveSessionModelSwitchError } from "./live-model-switch-error.js"; import { FallbackSummaryError, + __testing, runWithImageModelFallback, runWithModelFallback, } from "./model-fallback.js"; @@ -27,7 +28,7 @@ vi.mock("../plugins/provider-runtime.js", () => ({ })); const authSourceCheckMock = vi.hoisted(() => ({ - hasAnyAuthProfileStoreSource: vi.fn(() => true), + hasAnyAuthProfileStoreSource: vi.fn(() => false), })); vi.mock("./auth-profiles/source-check.js", () => authSourceCheckMock); @@ -137,17 +138,27 @@ const authRuntimeMock = vi.hoisted(() => { vi.mock("./model-fallback-auth.runtime.js", () => authRuntimeMock.runtime); const makeCfg = makeModelFallbackCfg; -const OPENROUTER_MODEL_NOT_FOUND_PAYLOAD = - '{"error":{"message":"Healer Alpha was a stealth model revealed on March 18th as an early testing version of MiMo-V2-Omni. Find it here: https://openrouter.ai/xiaomi/mimo-v2-omni","code":404},"user_id":"user_33GTyP8uDSYYbaeBO48AGHXyuMC"}'; let authTempRoot = ""; let authTempCounter = 0; -afterEach(() => { +function resetModelFallbackTestState(): void { authRuntimeMock.clear(); authRuntimeMock.runtime.ensureAuthProfileStore.mockClear(); authRuntimeMock.runtime.loadAuthProfileStoreForRuntime.mockClear(); - authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReset().mockReturnValue(true); -}); + authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReset().mockReturnValue(false); +} + +afterEach(resetModelFallbackTestState); + +async function runModelFallbackCase(name: string, run: () => Promise): Promise { + try { + await run(); + } catch (err) { + throw new Error(`case failed: ${name}`, { cause: err }); + } finally { + resetModelFallbackTestState(); + } +} function makeFallbacksOnlyCfg(): OpenClawConfig { return { @@ -206,6 +217,7 @@ async function runWithStoredAuth(params: { } function setAuthRuntimeStore(agentDir: string | undefined, store: AuthProfileStore): void { + authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReturnValue(true); authRuntimeMock.setStore(agentDir, store); } @@ -314,12 +326,6 @@ async function expectSkippedUnavailableProvider(params: { expect(result.attempts[0]?.reason).toBe(params.expectedReason); } -// OpenAI 429 example shape: https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors -const OPENAI_RATE_LIMIT_MESSAGE = - "Rate limit reached for gpt-4.1-mini in organization org_test on requests per min. Limit: 3.000000 / min. Current: 3.000000 / min."; -// Anthropic overloaded_error example shape: https://docs.anthropic.com/en/api/errors -const ANTHROPIC_OVERLOADED_PAYLOAD = - '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_test"}'; // Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400: // https://github.com/openclaw/openclaw/issues/23440 const INSUFFICIENT_QUOTA_PAYLOAD = @@ -346,76 +352,75 @@ describe("runWithModelFallback", () => { expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); }); - it("keeps openai gpt-5.3 codex on the openai provider before running", async () => { - const cfg = makeCfg(); - const run = vi.fn().mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-5.4", - run, - }); - - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenCalledWith("openai", "gpt-5.4"); - }); - - it("resolves a persisted bare primary alias before running", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-sonnet-4-6", - fallbacks: [], - }, - models: { - "anthropic/claude-sonnet-4-6": { alias: "sonnet" }, - }, - }, + it("resolves primary model aliases before running", async () => { + const cases = [ + { + name: "keeps openai gpt-5.4 on provider", + cfg: makeCfg(), + provider: "openai", + model: "gpt-5.4", + expected: ["openai", "gpt-5.4"], }, - }); - const run = vi.fn().mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "sonnet", - run, - }); - - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenCalledWith("anthropic", "claude-sonnet-4-6"); - }); - - it("resolves a slash-form primary alias before provider/model parsing", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "openai/xiaomi/mimo-v2-pro-mit", - fallbacks: [], + { + name: "resolves bare alias", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-sonnet-4-6", + fallbacks: [], + }, + models: { + "anthropic/claude-sonnet-4-6": { alias: "sonnet" }, + }, + }, }, - models: { - "openai/xiaomi/mimo-v2-pro-mit": { alias: "xiaomi/mimo-v2-pro-mit" }, - }, - }, + }), + provider: "anthropic", + model: "sonnet", + expected: ["anthropic", "claude-sonnet-4-6"], }, - }); - const run = vi.fn().mockResolvedValueOnce("ok"); + { + name: "resolves slash-form alias before provider parsing", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/xiaomi/mimo-v2-pro-mit", + fallbacks: [], + }, + models: { + "openai/xiaomi/mimo-v2-pro-mit": { alias: "xiaomi/mimo-v2-pro-mit" }, + }, + }, + }, + }), + provider: "xiaomi", + model: "mimo-v2-pro-mit", + expected: ["openai", "xiaomi/mimo-v2-pro-mit"], + }, + ] satisfies Array<{ + name: string; + cfg: OpenClawConfig; + provider: string; + model: string; + expected: [string, string]; + }>; - const result = await runWithModelFallback({ - cfg, - provider: "xiaomi", - model: "mimo-v2-pro-mit", - run, - }); + for (const testCase of cases) { + await runModelFallbackCase(testCase.name, async () => { + const candidates = __testing.resolveFallbackCandidates({ + cfg: testCase.cfg, + provider: testCase.provider, + model: testCase.model, + }); - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenCalledWith("openai", "xiaomi/mimo-v2-pro-mit"); + expect(candidates[0]).toEqual({ + provider: testCase.expected[0], + model: testCase.expected[1], + }); + }); + } }); it("falls back on unrecognized errors when candidates remain", async () => { @@ -887,7 +892,7 @@ describe("runWithModelFallback", () => { ]); }); - it("falls back on auth errors", async () => { + it("falls back to the configured haiku candidate for retryable provider failures", async () => { await expectFallsBackToHaiku({ provider: "openai", model: "gpt-4.1-mini", @@ -1000,44 +1005,6 @@ describe("runWithModelFallback", () => { ]); }); - it("falls back on transient HTTP 5xx errors", async () => { - await expectFallsBackToHaiku({ - provider: "openai", - model: "gpt-4.1-mini", - firstError: new Error( - "521 Web server is downCloudflare", - ), - }); - }); - - it("falls back on 402 payment required", async () => { - await expectFallsBackToHaiku({ - provider: "openai", - model: "gpt-4.1-mini", - firstError: Object.assign(new Error("payment required"), { status: 402 }), - }); - }); - - it("falls back on billing errors", async () => { - await expectFallsBackToHaiku({ - provider: "openai", - model: "gpt-4.1-mini", - firstError: new Error( - "LLM request rejected: Your credit balance is too low to access the Anthropic API. Please go to Plans & Billing to upgrade or purchase credits.", - ), - }); - }); - - it("falls back on bare leading 402 quota-refresh errors", async () => { - await expectFallsBackToHaiku({ - provider: "openai", - model: "gpt-4.1-mini", - firstError: new Error( - "402 You have reached your subscription quota limit. Please wait for automatic quota refresh in the rolling time window, upgrade to a higher plan, or use a Pay-As-You-Go API Key for unlimited access.", - ), - }); - }); - it("records 400 insufficient_quota payloads as billing during fallback", async () => { const cfg = makeCfg(); const run = vi @@ -1081,94 +1048,52 @@ describe("runWithModelFallback", () => { ]); }); - it("falls back on unknown model errors", async () => { - const cfg = makeCfg(); - const run = vi - .fn() - .mockRejectedValueOnce(new Error("Unknown model: anthropic/claude-opus-4-6")) - .mockResolvedValueOnce("ok"); + it("falls back on model-not-found error shapes", async () => { + const cases = [ + { + name: "unknown anthropic override", + provider: "anthropic", + model: "claude-opus-4-6", + error: new Error("Unknown model: anthropic/claude-opus-4-6"), + expectedFallback: ["openai", "gpt-4.1-mini"], + }, + { + name: "openai model not found", + provider: "openai", + model: "gpt-6", + error: new Error("Model not found: openai/gpt-6"), + expectedFallback: ["anthropic", "claude-haiku-3-5"], + }, + ] satisfies Array<{ + name: string; + provider: string; + model: string; + error: Error; + expectedFallback: [string, string]; + expectedReason?: string; + }>; - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - }); + for (const testCase of cases) { + await runModelFallbackCase(testCase.name, async () => { + const cfg = makeCfg(); + const run = vi.fn().mockRejectedValueOnce(testCase.error).mockResolvedValueOnce("ok"); - // Override model failed with model_not_found → falls back to configured primary. - // (Same candidate-resolution path as other override-model failures.) - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(2); - expect(run.mock.calls[1]?.[0]).toBe("openai"); - expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini"); - }); + const result = await runWithModelFallback({ + cfg, + provider: testCase.provider, + model: testCase.model, + run, + }); - it("falls back on model not found errors", async () => { - const cfg = makeCfg(); - const run = vi - .fn() - .mockRejectedValueOnce(new Error("Model not found: openai/gpt-6")) - .mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-6", - run, - }); - - // Override model failed with model_not_found → tries fallbacks first (same provider). - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(2); - expect(run.mock.calls[1]?.[0]).toBe("anthropic"); - expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); - }); - - it("falls back on JSON-wrapped OpenRouter stealth-model 404s", async () => { - const cfg = makeCfg(); - const run = vi - .fn() - .mockRejectedValueOnce(new Error(OPENROUTER_MODEL_NOT_FOUND_PAYLOAD)) - .mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ - cfg, - provider: "openrouter", - model: "openrouter/healer-alpha", - run, - }); - - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(2); - expect(run.mock.calls[1]?.[0]).toBe("openai"); - expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini"); - }); - - it("records invalid-model HTTP 400 responses as model_not_found during fallback", async () => { - const cfg = makeCfg(); - const run = vi - .fn() - .mockRejectedValueOnce( - Object.assign( - new Error("HTTP 400: openrouter/__invalid_test_model__ is not a valid model ID"), - { status: 400 }, - ), - ) - .mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ - cfg, - provider: "openrouter", - model: "__invalid_test_model__", - run, - }); - - expect(result.result).toBe("ok"); - expect(run).toHaveBeenCalledTimes(2); - expect(result.attempts).toHaveLength(1); - expect(result.attempts[0]?.reason).toBe("model_not_found"); - expect(run.mock.calls[1]?.[0]).toBe("openai"); - expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini"); + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]).toEqual(testCase.expectedFallback); + if (testCase.expectedReason) { + expect(result.attempts).toHaveLength(1); + expect(result.attempts[0]?.reason).toBe(testCase.expectedReason); + } + }); + } }); it("warns when falling back due to model_not_found", async () => { @@ -1468,27 +1393,14 @@ describe("runWithModelFallback", () => { it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => { const cfg = makeFallbacksOnlyCfg(); - const calls: Array<{ provider: string; model: string }> = []; - - const res = await runWithModelFallback({ + const candidates = __testing.resolveFallbackCandidates({ cfg, provider: "anthropic", model: "claude-opus-4-5", fallbacksOverride: ["openai/gpt-4.1"], - run: async (provider, model) => { - calls.push({ provider, model }); - if (provider === "anthropic") { - throw Object.assign(new Error("nope"), { status: 401 }); - } - if (provider === "openai" && model === "gpt-4.1") { - return "ok"; - } - throw new Error(`unexpected candidate: ${provider}/${model}`); - }, }); - expect(res.result).toBe("ok"); - expect(calls).toEqual([ + expect(candidates).toEqual([ { provider: "anthropic", model: "claude-opus-4-5" }, { provider: "openai", model: "gpt-4.1" }, ]); @@ -1497,22 +1409,14 @@ describe("runWithModelFallback", () => { it("treats an empty fallbacksOverride as disabling global fallbacks", async () => { const cfg = makeFallbacksOnlyCfg(); - const calls: Array<{ provider: string; model: string }> = []; + const candidates = __testing.resolveFallbackCandidates({ + cfg, + provider: "anthropic", + model: "claude-opus-4-5", + fallbacksOverride: [], + }); - await expect( - runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-5", - fallbacksOverride: [], - run: async (provider, model) => { - calls.push({ provider, model }); - throw new Error("primary failed"); - }, - }), - ).rejects.toThrow("primary failed"); - - expect(calls).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]); + expect(candidates).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]); }); it("keeps explicit fallbacks reachable when models allowlist is present", async () => { @@ -1529,22 +1433,16 @@ describe("runWithModelFallback", () => { }, }, }); - const run = vi - .fn() - .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 })) - .mockResolvedValueOnce("ok"); - - const result = await runWithModelFallback({ + const candidates = __testing.resolveFallbackCandidates({ cfg, provider: "anthropic", model: "claude-sonnet-4", - run, }); - expect(result.result).toBe("ok"); - expect(run.mock.calls).toEqual([ - ["anthropic", "claude-sonnet-4"], - ["openai", "gpt-4o"], + expect(candidates).toEqual([ + { provider: "anthropic", model: "claude-sonnet-4" }, + { provider: "openai", model: "gpt-4o" }, + { provider: "ollama", model: "llama-3" }, ]); }); @@ -1560,67 +1458,13 @@ describe("runWithModelFallback", () => { }, }); - const calls: Array<{ provider: string; model: string }> = []; - - const result = await runWithModelFallback({ + const candidates = __testing.resolveFallbackCandidates({ cfg, provider: undefined as unknown as string, model: undefined as unknown as string, - run: async (provider, model) => { - calls.push({ provider, model }); - return "ok"; - }, }); - expect(result.result).toBe("ok"); - expect(calls).toEqual([{ provider: "openai", model: "gpt-4.1-mini" }]); - }); - - it("falls back on representative transient/provider error shapes", async () => { - // The full classification matrix lives in failover-error tests; keep this as integration - // coverage that classified errors actually advance the model fallback chain. - const cases: Array<{ name: string; firstError: Error }> = [ - { - name: "missing API key", - firstError: new Error("No API key found for profile openai."), - }, - { - name: "lowercase credential", - firstError: new Error("no api key found for profile openai"), - }, - { - name: "documented OpenAI 429 rate limit", - firstError: Object.assign(new Error(OPENAI_RATE_LIMIT_MESSAGE), { status: 429 }), - }, - { - name: "documented overloaded_error payload", - firstError: new Error(ANTHROPIC_OVERLOADED_PAYLOAD), - }, - { - name: "provider request aborted", - firstError: Object.assign(new Error("Request was aborted"), { name: "AbortError" }), - }, - { - name: "bare undici terminated transport failure", - firstError: new Error("terminated"), - }, - { - name: "bare Codex transport failure", - firstError: new Error("Request failed"), - }, - ]; - - for (const { name, firstError } of cases) { - try { - await expectFallsBackToHaiku({ - provider: "openai", - model: "gpt-4.1-mini", - firstError, - }); - } catch (error) { - throw new Error(`fallback case failed: ${name}`, { cause: error }); - } - } + expect(candidates).toEqual([{ provider: "openai", model: "gpt-4.1-mini" }]); }); it("does not fall back on user aborts", async () => { @@ -1673,123 +1517,105 @@ describe("runWithModelFallback", () => { // Tests for Bug A fix: Model fallback with session overrides describe("fallback behavior with session model overrides", () => { - it("allows fallbacks when session model differs from config within same provider", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"], + it("keeps fallback ordering correct across session overrides", async () => { + const cases = [ + { + name: "same provider versioned session model", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"], + }, + }, }, - }, + }), + provider: "anthropic", + model: "claude-sonnet-4-20250514", + calls: [ + ["anthropic", "claude-sonnet-4-20250514"], + ["anthropic", "claude-sonnet-4-5"], + ], }, - }); - - const run = vi - .fn() - .mockRejectedValueOnce(new Error("Rate limit exceeded")) // Session model fails - .mockResolvedValueOnce("fallback success"); // First fallback succeeds - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-sonnet-4-20250514", // Different from config primary - run, - }); - - expect(result.result).toBe("fallback success"); - expect(run).toHaveBeenCalledTimes(2); - expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-20250514"); - expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-sonnet-4-5"); // Fallback tried - }); - - it("allows fallbacks with model version differences within same provider", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["groq/llama-3.3-70b-versatile"], + { + name: "same provider model version difference", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["groq/llama-3.3-70b-versatile"], + }, + }, }, - }, + }), + provider: "anthropic", + model: "claude-opus-4-5", + calls: [ + ["anthropic", "claude-opus-4-5"], + ["groq", "llama-3.3-70b-versatile"], + ], }, - }); - - const run = vi - .fn() - .mockRejectedValueOnce(new Error("Weekly quota exceeded")) - .mockResolvedValueOnce("groq success"); - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-5", // Version difference from config - run, - }); - - expect(result.result).toBe("groq success"); - expect(run).toHaveBeenCalledTimes(2); - expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); - }); - - it("still skips fallbacks when using different provider than config", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: [], // Empty fallbacks to match working pattern + { + name: "different provider skips configured fallbacks", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: [], + }, + }, }, - }, + }), + provider: "openai", + model: "gpt-4.1-mini", + calls: [ + ["openai", "gpt-4.1-mini"], + ["anthropic", "claude-opus-4-6"], + ], }, - }); - - const run = vi - .fn() - .mockRejectedValueOnce(new Error('No credentials found for profile "openai:default".')) - .mockResolvedValueOnce("config primary worked"); - - const result = await runWithModelFallback({ - cfg, - provider: "openai", // Different provider - model: "gpt-4.1-mini", - run, - }); - - // Cross-provider requests should skip configured fallbacks but still try configured primary - expect(result.result).toBe("config primary worked"); - expect(run).toHaveBeenCalledTimes(2); - expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); // Original request - expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-opus-4-6"); // Config primary as final fallback - }); - - it("uses fallbacks when session model exactly matches config primary", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["groq/llama-3.3-70b-versatile"], + { + name: "exact primary uses fallbacks", + cfg: makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["groq/llama-3.3-70b-versatile"], + }, + }, }, - }, + }), + provider: "anthropic", + model: "claude-opus-4-6", + calls: [ + ["anthropic", "claude-opus-4-6"], + ["groq", "llama-3.3-70b-versatile"], + ], }, - }); + ] satisfies Array<{ + name: string; + cfg: OpenClawConfig; + provider: string; + model: string; + calls: Array<[string, string]>; + }>; - const run = vi - .fn() - .mockRejectedValueOnce(new Error("Quota exceeded")) - .mockResolvedValueOnce("fallback worked"); + for (const testCase of cases) { + await runModelFallbackCase(testCase.name, async () => { + const candidates = __testing.resolveFallbackCandidates({ + cfg: testCase.cfg, + provider: testCase.provider, + model: testCase.model, + }); - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", // Exact match - run, - }); - - expect(result.result).toBe("fallback worked"); - expect(run).toHaveBeenCalledTimes(2); - expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); + expect(candidates.slice(0, testCase.calls.length)).toEqual( + testCase.calls.map(([provider, model]) => ({ provider, model })), + ); + }); + } }); }); @@ -1822,34 +1648,38 @@ describe("runWithModelFallback", () => { return { dir: tmpDir }; } - it("attempts same-provider fallbacks during rate limit cooldown", async () => { - const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit"); - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + it("attempts same-provider fallbacks during transient cooldowns", async () => { + for (const reason of ["rate_limit", "overloaded", "timeout"] as const) { + await runModelFallbackCase(reason, async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", reason); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, }, - }, - }, - }); + }); - const run = vi.fn().mockResolvedValueOnce("sonnet success"); + const run = vi.fn().mockResolvedValueOnce("sonnet success"); - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }); + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); - expect(result.result).toBe("sonnet success"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { - allowTransientCooldownProbe: true, - }); + expect(result.result).toBe("sonnet success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { + allowTransientCooldownProbe: true, + }); + }); + } }); it("keeps alias-resolved primary models subject to transient cooldowns", async () => { @@ -1885,120 +1715,36 @@ describe("runWithModelFallback", () => { }); }); - it("attempts same-provider fallbacks during overloaded cooldown", async () => { - const { dir } = await makeAuthStoreWithCooldown("anthropic", "overloaded"); - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + it("skips same-provider models on persistent cooldowns", async () => { + for (const reason of ["auth", "billing"] as const) { + await runModelFallbackCase(reason, async () => { + const { dir } = await makeAuthStoreWithCooldown("anthropic", reason); + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "anthropic/claude-opus-4-6", + fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], + }, + }, }, - }, - }, - }); + }); - const run = vi.fn().mockResolvedValueOnce("sonnet success"); + const run = vi.fn().mockResolvedValueOnce("groq success"); - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }); + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); - expect(result.result).toBe("sonnet success"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { - allowTransientCooldownProbe: true, - }); - }); - - it("attempts same-provider fallbacks during timeout cooldown", async () => { - const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout"); - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], - }, - }, - }, - }); - - const run = vi.fn().mockResolvedValueOnce("sonnet success"); - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }); - - expect(result.result).toBe("sonnet success"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", { - allowTransientCooldownProbe: true, - }); - }); - - it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => { - const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth"); - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], - }, - }, - }, - }); - - const run = vi.fn().mockResolvedValueOnce("groq success"); - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }); - - expect(result.result).toBe("groq success"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); - }); - - it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => { - const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing"); - const cfg = makeCfg({ - agents: { - defaults: { - model: { - primary: "anthropic/claude-opus-4-6", - fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"], - }, - }, - }, - }); - - const run = vi.fn().mockResolvedValueOnce("groq success"); - - const result = await runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }); - - expect(result.result).toBe("groq success"); - expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); + }); + } }); it("tries cross-provider fallbacks when same provider has rate limit", async () => { @@ -2131,49 +1877,58 @@ describe("runWithModelFallback", () => { }); describe("runWithImageModelFallback", () => { - it("inherits the configured image-model provider for bare override ids", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - imageModel: { - primary: "openai-codex/gpt-5.4", - fallbacks: ["openai-codex/gpt-5.4-mini"], + it("resolves image-model override providers", async () => { + const cases = [ + { + name: "bare override inherits configured provider", + cfg: makeCfg({ + agents: { + defaults: { + imageModel: { + primary: "openai-codex/gpt-5.4", + fallbacks: ["openai-codex/gpt-5.4-mini"], + }, + }, }, - }, + }), + modelOverride: "gpt-5.4-mini", + expected: [["openai-codex", "gpt-5.4-mini"]], }, - }); - const run = vi.fn().mockResolvedValueOnce("ok"); - - const result = await runWithImageModelFallback({ - cfg, - modelOverride: "gpt-5.4-mini", - run, - }); - - expect(result.result).toBe("ok"); - expect(run.mock.calls).toEqual([["openai-codex", "gpt-5.4-mini"]]); - }); - - it("keeps a fully qualified override provider over the configured default", async () => { - const cfg = makeCfg({ - agents: { - defaults: { - imageModel: { - primary: "openai-codex/gpt-5.4", + { + name: "qualified override keeps provider", + cfg: makeCfg({ + agents: { + defaults: { + imageModel: { + primary: "openai-codex/gpt-5.4", + }, + }, }, - }, + }), + modelOverride: "google/gemini-3-pro-image", + expected: [["google", "gemini-3-pro-image"]], }, - }); - const run = vi.fn().mockResolvedValueOnce("ok"); + ] satisfies Array<{ + name: string; + cfg: OpenClawConfig; + modelOverride: string; + expected: Array<[string, string]>; + }>; - const result = await runWithImageModelFallback({ - cfg, - modelOverride: "google/gemini-3-pro-image", - run, - }); + for (const testCase of cases) { + await runModelFallbackCase(testCase.name, async () => { + const run = vi.fn().mockResolvedValueOnce("ok"); - expect(result.result).toBe("ok"); - expect(run.mock.calls).toEqual([["google", "gemini-3-pro-image"]]); + const result = await runWithImageModelFallback({ + cfg: testCase.cfg, + modelOverride: testCase.modelOverride, + run, + }); + + expect(result.result).toBe("ok"); + expect(run.mock.calls).toEqual(testCase.expected); + }); + } }); it("keeps explicit image fallbacks reachable when models allowlist is present", async () => { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 0ff9ba8b31c..d8f9800ed7c 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -509,6 +509,11 @@ function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): s return DEFAULT_PROVIDER; } +export const __testing = { + resolveFallbackCandidates, + resolveImageFallbackCandidates, +} as const; + function resolveFallbackCandidates(params: { cfg: OpenClawConfig | undefined; provider: string; diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index 4822201832b..e4c7be8629e 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -25,6 +25,10 @@ import { resolveModelRefFromString, } from "./model-selection.js"; +vi.mock("./provider-model-normalization.runtime.js", () => ({ + normalizeProviderModelIdWithRuntime: () => undefined, +})); + const EXPLICIT_ALLOWLIST_CONFIG = { agents: { defaults: { @@ -195,11 +199,14 @@ describe("model-selection", () => { expected: { provider: string; model: string }, ) => { for (const raw of variants) { - expect(parseModelRef(raw, defaultProvider), raw).toEqual(expected); + expect( + parseModelRef(raw, defaultProvider, { allowPluginNormalization: false }), + raw, + ).toEqual(expected); } }; - it.each([ + const parseModelRefCases = [ { name: "parses explicit provider/model refs", variants: ["anthropic/claude-3-5-sonnet"], @@ -335,19 +342,30 @@ describe("model-selection", () => { defaultProvider: "google-vertex", expected: { provider: "google-vertex", model: "gemini-3.1-flash-lite-preview" }, }, - ])("$name", ({ variants, defaultProvider, expected }) => { - expectParsedModelVariants(variants, defaultProvider, expected); + ]; + + it("parses and normalizes provider/model refs", () => { + for (const { variants, defaultProvider, expected } of parseModelRefCases) { + expectParsedModelVariants(variants, defaultProvider, expected); + } }); it("round-trips normalized refs through modelKey", () => { - const parsed = parseModelRef(" opus-4.6 ", "anthropic"); + const parsed = parseModelRef(" opus-4.6 ", "anthropic", { + allowPluginNormalization: false, + }); expect(parsed).toEqual({ provider: "anthropic", model: "claude-opus-4-6" }); expect(modelKey(parsed?.provider ?? "", parsed?.model ?? "")).toBe( "anthropic/claude-opus-4-6", ); }); - it.each(["", " ", "/", "anthropic/", "/model"])("returns null for invalid ref %j", (raw) => { - expect(parseModelRef(raw, "anthropic")).toBeNull(); + it("returns null for invalid refs", () => { + for (const raw of ["", " ", "/", "anthropic/", "/model"]) { + expect( + parseModelRef(raw, "anthropic", { allowPluginNormalization: false }), + raw, + ).toBeNull(); + } }); }); diff --git a/src/agents/outcome-fallback-runtime-contract.test.ts b/src/agents/outcome-fallback-runtime-contract.test.ts index d5fe9ecdaa2..6e4e497a341 100644 --- a/src/agents/outcome-fallback-runtime-contract.test.ts +++ b/src/agents/outcome-fallback-runtime-contract.test.ts @@ -13,11 +13,13 @@ vi.mock("./auth-profiles/source-check.js", () => ({ })); describe("Outcome/fallback runtime contract - Pi fallback classifier", () => { - it.each([ + const fallbackClassificationCases = [ ["empty", "empty_result"], ["reasoning-only", "reasoning_only_result"], ["planning-only", "planning_only_result"], - ] as const)( + ] as const; + + it.each(fallbackClassificationCases)( "maps harness classification %s to a format fallback code", (classification, code) => { expect( @@ -38,54 +40,47 @@ describe("Outcome/fallback runtime contract - Pi fallback classifier", () => { }, ); - it.each([ - ["empty", "empty_result"], - ["reasoning-only", "reasoning_only_result"], - ["planning-only", "planning_only_result"], - ] as const)( - "advances to the configured fallback after a classified GPT-5 %s terminal result", - async (classification, code) => { - const primary = createContractRunResult({ - meta: { - durationMs: 1, - agentHarnessResultClassification: classification, - }, - }); - const fallback = createContractRunResult({ - payloads: [{ text: "fallback ok" }], - meta: { durationMs: 1, finalAssistantVisibleText: "fallback ok" }, - }); - const run = vi.fn().mockResolvedValueOnce(primary).mockResolvedValueOnce(fallback); + it("advances to the configured fallback after a classified GPT-5 terminal result", async () => { + const primary = createContractRunResult({ + meta: { + durationMs: 1, + agentHarnessResultClassification: "empty", + }, + }); + const fallback = createContractRunResult({ + payloads: [{ text: "fallback ok" }], + meta: { durationMs: 1, finalAssistantVisibleText: "fallback ok" }, + }); + const run = vi.fn().mockResolvedValueOnce(primary).mockResolvedValueOnce(fallback); - const result = await runWithModelFallback({ - cfg: createContractFallbackConfig() as unknown as OpenClawConfig, - provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, - model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, - run, - classifyResult: ({ provider, model, result }) => - classifyEmbeddedPiRunResultForModelFallback({ - provider, - model, - result, - }), - }); + const result = await runWithModelFallback({ + cfg: createContractFallbackConfig() as unknown as OpenClawConfig, + provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, + model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, + run, + classifyResult: ({ provider, model, result }) => + classifyEmbeddedPiRunResultForModelFallback({ + provider, + model, + result, + }), + }); - expect(result.result).toBe(fallback); - expect(run).toHaveBeenCalledTimes(2); - expect(run.mock.calls[1]).toEqual([ - OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackProvider, - OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackModel, - ]); - expect(result.attempts[0]).toMatchObject({ - provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, - model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, - reason: "format", - code, - }); - }, - ); + expect(result.result).toBe(fallback); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]).toEqual([ + OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackProvider, + OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackModel, + ]); + expect(result.attempts[0]).toMatchObject({ + provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, + model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, + reason: "format", + code: "empty_result", + }); + }); - it.each([ + const nonFallbackCases = [ { name: "intentional NO_REPLY", result: createContractRunResult({ @@ -153,17 +148,24 @@ describe("Outcome/fallback runtime contract - Pi fallback classifier", () => { }), hasBlockReplyPipelineOutput: true, }, - ])("does not fallback for $name", async (contractCase) => { - expect( - classifyEmbeddedPiRunResultForModelFallback({ - provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, - model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, - result: contractCase.result, - hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply, - hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput, - }), - ).toBeNull(); + ]; + it("does not classify terminal results with visible output or side effects as fallbacks", () => { + for (const contractCase of nonFallbackCases) { + expect( + classifyEmbeddedPiRunResultForModelFallback({ + provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider, + model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel, + result: contractCase.result, + hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply, + hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput, + }), + ).toBeNull(); + } + }); + + it("keeps running on the primary when terminal output is not classified as fallback", async () => { + const contractCase = nonFallbackCases[0]; const run = vi.fn().mockResolvedValue(contractCase.result); const result = await runWithModelFallback({ cfg: createContractFallbackConfig() as unknown as OpenClawConfig, diff --git a/src/agents/pi-tools.policy.test.ts b/src/agents/pi-tools.policy.test.ts index 0ec3a496c93..8658b3004e3 100644 --- a/src/agents/pi-tools.policy.test.ts +++ b/src/agents/pi-tools.policy.test.ts @@ -1,7 +1,7 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; import { filterToolsByPolicy, @@ -15,6 +15,15 @@ import { import { createStubTool } from "./test-helpers/pi-tool-stubs.js"; import { providerAliasCases } from "./test-helpers/provider-alias-cases.js"; +vi.mock("../channels/plugins/session-conversation.js", () => ({ + resolveSessionConversation: ({ rawId }: { rawId: string }) => ({ + id: rawId, + threadId: undefined, + baseConversationId: rawId, + parentConversationCandidates: [], + }), +})); + describe("pi-tools.policy", () => { it("treats * in allow as allow-all", () => { const tools = [createStubTool("read"), createStubTool("exec")]; diff --git a/src/agents/tools/message-tool.test.ts b/src/agents/tools/message-tool.test.ts index 2a1d6bd662e..ad3a94a9446 100644 --- a/src/agents/tools/message-tool.test.ts +++ b/src/agents/tools/message-tool.test.ts @@ -103,6 +103,19 @@ const mocks = vi.hoisted(() => ({ ), })); +const openClawToolsFactoryMocks = vi.hoisted(() => { + const tool = (name: string) => ({ + name, + displaySummary: `${name} test stub`, + description: `${name} test stub`, + parameters: { type: "object", properties: {} }, + execute: vi.fn(async () => ({ type: "json", data: { ok: true } })), + }); + return { + tool, + }; +}); + vi.mock("../../infra/outbound/message-action-runner.js", async () => { const actual = await vi.importActual< typeof import("../../infra/outbound/message-action-runner.js") @@ -130,6 +143,76 @@ vi.mock("../../cli/command-secret-targets.js", () => ({ getScopedChannelsCommandSecretTargets: mocks.getScopedChannelsCommandSecretTargets, })); +vi.mock("./agents-list-tool.js", () => ({ + createAgentsListTool: () => openClawToolsFactoryMocks.tool("agents"), +})); +vi.mock("./canvas-tool.js", () => ({ + createCanvasTool: () => openClawToolsFactoryMocks.tool("canvas"), +})); +vi.mock("./cron-tool.js", () => ({ + createCronTool: () => openClawToolsFactoryMocks.tool("cron"), +})); +vi.mock("./gateway-tool.js", () => ({ + createGatewayTool: () => openClawToolsFactoryMocks.tool("gateway"), +})); +vi.mock("./heartbeat-response-tool.js", () => ({ + createHeartbeatResponseTool: () => openClawToolsFactoryMocks.tool("heartbeat_response"), +})); +vi.mock("./image-generate-tool.js", () => ({ + createImageGenerateTool: () => null, +})); +vi.mock("./image-tool.js", () => ({ + createImageTool: () => null, +})); +vi.mock("./manifest-capability-availability.js", () => ({ + hasSnapshotCapabilityAvailability: () => false, + hasSnapshotProviderEnvAvailability: () => false, + loadCapabilityMetadataSnapshot: () => ({ index: {}, plugins: [] }), +})); +vi.mock("./music-generate-tool.js", () => ({ + createMusicGenerateTool: () => null, +})); +vi.mock("./nodes-tool.js", () => ({ + createNodesTool: () => openClawToolsFactoryMocks.tool("nodes"), +})); +vi.mock("./pdf-tool.js", () => ({ + createPdfTool: () => null, +})); +vi.mock("./session-status-tool.js", () => ({ + createSessionStatusTool: () => openClawToolsFactoryMocks.tool("session_status"), +})); +vi.mock("./sessions-history-tool.js", () => ({ + createSessionsHistoryTool: () => openClawToolsFactoryMocks.tool("sessions_history"), +})); +vi.mock("./sessions-list-tool.js", () => ({ + createSessionsListTool: () => openClawToolsFactoryMocks.tool("sessions_list"), +})); +vi.mock("./sessions-send-tool.js", () => ({ + createSessionsSendTool: () => openClawToolsFactoryMocks.tool("sessions_send"), +})); +vi.mock("./sessions-spawn-tool.js", () => ({ + createSessionsSpawnTool: () => openClawToolsFactoryMocks.tool("sessions_spawn"), +})); +vi.mock("./sessions-yield-tool.js", () => ({ + createSessionsYieldTool: () => openClawToolsFactoryMocks.tool("sessions_yield"), +})); +vi.mock("./subagents-tool.js", () => ({ + createSubagentsTool: () => openClawToolsFactoryMocks.tool("subagents"), +})); +vi.mock("./tts-tool.js", () => ({ + createTtsTool: () => openClawToolsFactoryMocks.tool("tts"), +})); +vi.mock("./update-plan-tool.js", () => ({ + createUpdatePlanTool: () => openClawToolsFactoryMocks.tool("update_plan"), +})); +vi.mock("./video-generate-tool.js", () => ({ + createVideoGenerateTool: () => null, +})); +vi.mock("./web-tools.js", () => ({ + createWebFetchTool: () => openClawToolsFactoryMocks.tool("web_fetch"), + createWebSearchTool: () => openClawToolsFactoryMocks.tool("web_search"), +})); + function mockSendResult(overrides: { channel?: string; to?: string } = {}) { mocks.runMessageAction.mockClear(); mocks.runMessageAction.mockResolvedValue({ diff --git a/src/config/doc-baseline.integration.test.ts b/src/config/doc-baseline.integration.test.ts index f945f363d1e..e410b5b08df 100644 --- a/src/config/doc-baseline.integration.test.ts +++ b/src/config/doc-baseline.integration.test.ts @@ -1,6 +1,6 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { withTempDir } from "../test-helpers/temp-dir.js"; import { type ConfigDocBaselineEntry, @@ -9,6 +9,14 @@ import { writeConfigDocBaselineArtifacts, } from "./doc-baseline.js"; +vi.mock("./doc-baseline.runtime.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + collectBundledChannelConfigs: () => undefined, + }; +}); + describe("config doc baseline integration", () => { let sharedRenderedPromise: Promise< Awaited>