From cb42efb6e660b39ad7ab94e23a454105bfcbc7a3 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 6 May 2026 00:52:26 +0100
Subject: [PATCH] test: trim slow agent fallback coverage

---
 src/agents/model-fallback.test.ts             | 907 +++++++-----------
 src/agents/model-fallback.ts                  |   5 +
 src/agents/model-selection.test.ts            |  32 +-
 .../outcome-fallback-runtime-contract.test.ts | 116 +--
 src/agents/pi-tools.policy.test.ts            |  11 +-
 src/agents/tools/message-tool.test.ts         |  83 ++
 src/config/doc-baseline.integration.test.ts   |  10 +-
 7 files changed, 522 insertions(+), 642 deletions(-)
diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index a14200b0b64..f71441212a7 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -10,6 +10,7 @@ import { FailoverError } from "./failover-error.js";
 import { LiveSessionModelSwitchError } from "./live-model-switch-error.js";
 import {
   FallbackSummaryError,
+  __testing,
   runWithImageModelFallback,
   runWithModelFallback,
 } from "./model-fallback.js";
@@ -27,7 +28,7 @@ vi.mock("../plugins/provider-runtime.js", () => ({
 }));
 
 const authSourceCheckMock = vi.hoisted(() => ({
-  hasAnyAuthProfileStoreSource: vi.fn(() => true),
+  hasAnyAuthProfileStoreSource: vi.fn(() => false),
 }));
 
 vi.mock("./auth-profiles/source-check.js", () => authSourceCheckMock);
@@ -137,17 +138,27 @@ const authRuntimeMock = vi.hoisted(() => {
 vi.mock("./model-fallback-auth.runtime.js", () => authRuntimeMock.runtime);
 
 const makeCfg = makeModelFallbackCfg;
-const OPENROUTER_MODEL_NOT_FOUND_PAYLOAD =
-  '{"error":{"message":"Healer Alpha was a stealth model revealed on March 18th as an early testing version of MiMo-V2-Omni. Find it here: https://openrouter.ai/xiaomi/mimo-v2-omni","code":404},"user_id":"user_33GTyP8uDSYYbaeBO48AGHXyuMC"}';
 let authTempRoot = "";
 let authTempCounter = 0;
 
-afterEach(() => {
+function resetModelFallbackTestState(): void {
   authRuntimeMock.clear();
   authRuntimeMock.runtime.ensureAuthProfileStore.mockClear();
   authRuntimeMock.runtime.loadAuthProfileStoreForRuntime.mockClear();
-  authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReset().mockReturnValue(true);
-});
+  authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReset().mockReturnValue(false);
+}
+
+afterEach(resetModelFallbackTestState);
+
+async function runModelFallbackCase(name: string, run: () => Promise<void>): Promise<void> {
+  try {
+    await run();
+  } catch (err) {
+    throw new Error(`case failed: ${name}`, { cause: err });
+  } finally {
+    resetModelFallbackTestState();
+  }
+}
 
 function makeFallbacksOnlyCfg(): OpenClawConfig {
   return {
@@ -206,6 +217,7 @@ async function runWithStoredAuth(params: {
 }
 
 function setAuthRuntimeStore(agentDir: string | undefined, store: AuthProfileStore): void {
+  authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReturnValue(true);
   authRuntimeMock.setStore(agentDir, store);
 }
 
@@ -314,12 +326,6 @@ async function expectSkippedUnavailableProvider(params: {
   expect(result.attempts[0]?.reason).toBe(params.expectedReason);
 }
 
-// OpenAI 429 example shape: https://help.openai.com/en/articles/5955604-how-can-i-solve-429-too-many-requests-errors
-const OPENAI_RATE_LIMIT_MESSAGE =
-  "Rate limit reached for gpt-4.1-mini in organization org_test on requests per min. Limit: 3.000000 / min. Current: 3.000000 / min.";
-// Anthropic overloaded_error example shape: https://docs.anthropic.com/en/api/errors
-const ANTHROPIC_OVERLOADED_PAYLOAD =
-  '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_test"}';
 // Issue-backed Anthropic/OpenAI-compatible insufficient_quota payload under HTTP 400:
 // https://github.com/openclaw/openclaw/issues/23440
 const INSUFFICIENT_QUOTA_PAYLOAD =
@@ -346,76 +352,75 @@ describe("runWithModelFallback", () => {
     expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
   });
 
-  it("keeps openai gpt-5.3 codex on the openai provider before running", async () => {
-    const cfg = makeCfg();
-    const run = vi.fn().mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "openai",
-      model: "gpt-5.4",
-      run,
-    });
-
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(1);
-    expect(run).toHaveBeenCalledWith("openai", "gpt-5.4");
-  });
-
-  it("resolves a persisted bare primary alias before running", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          model: {
-            primary: "anthropic/claude-sonnet-4-6",
-            fallbacks: [],
-          },
-          models: {
-            "anthropic/claude-sonnet-4-6": { alias: "sonnet" },
-          },
-        },
+  it("resolves primary model aliases before running", async () => {
+    const cases = [
+      {
+        name: "keeps openai gpt-5.4 on provider",
+        cfg: makeCfg(),
+        provider: "openai",
+        model: "gpt-5.4",
+        expected: ["openai", "gpt-5.4"],
       },
-    });
-    const run = vi.fn().mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "anthropic",
-      model: "sonnet",
-      run,
-    });
-
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(1);
-    expect(run).toHaveBeenCalledWith("anthropic", "claude-sonnet-4-6");
-  });
-
-  it("resolves a slash-form primary alias before provider/model parsing", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          model: {
-            primary: "openai/xiaomi/mimo-v2-pro-mit",
-            fallbacks: [],
+      {
+        name: "resolves bare alias",
+        cfg: makeCfg({
+          agents: {
+            defaults: {
+              model: {
+                primary: "anthropic/claude-sonnet-4-6",
+                fallbacks: [],
+              },
+              models: {
+                "anthropic/claude-sonnet-4-6": { alias: "sonnet" },
+              },
+            },
           },
-          models: {
-            "openai/xiaomi/mimo-v2-pro-mit": { alias: "xiaomi/mimo-v2-pro-mit" },
-          },
-        },
+        }),
+        provider: "anthropic",
+        model: "sonnet",
+        expected: ["anthropic", "claude-sonnet-4-6"],
       },
-    });
-    const run = vi.fn().mockResolvedValueOnce("ok");
+      {
+        name: "resolves slash-form alias before provider parsing",
+        cfg: makeCfg({
+          agents: {
+            defaults: {
+              model: {
+                primary: "openai/xiaomi/mimo-v2-pro-mit",
+                fallbacks: [],
+              },
+              models: {
+                "openai/xiaomi/mimo-v2-pro-mit": { alias: "xiaomi/mimo-v2-pro-mit" },
+              },
+            },
+          },
+        }),
+        provider: "xiaomi",
+        model: "mimo-v2-pro-mit",
+        expected: ["openai", "xiaomi/mimo-v2-pro-mit"],
+      },
+    ] satisfies Array<{
+      name: string;
+      cfg: OpenClawConfig;
+      provider: string;
+      model: string;
+      expected: [string, string];
+    }>;
 
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "xiaomi",
-      model: "mimo-v2-pro-mit",
-      run,
-    });
+    for (const testCase of cases) {
+      await runModelFallbackCase(testCase.name, async () => {
+        const candidates = __testing.resolveFallbackCandidates({
+          cfg: testCase.cfg,
+          provider: testCase.provider,
+          model: testCase.model,
+        });
 
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(1);
-    expect(run).toHaveBeenCalledWith("openai", "xiaomi/mimo-v2-pro-mit");
+        expect(candidates[0]).toEqual({
+          provider: testCase.expected[0],
+          model: testCase.expected[1],
+        });
+      });
+    }
   });
 
   it("falls back on unrecognized errors when candidates remain", async () => {
@@ -887,7 +892,7 @@ describe("runWithModelFallback", () => {
     ]);
   });
 
-  it("falls back on auth errors", async () => {
+  it("falls back to the configured haiku candidate for retryable provider failures", async () => {
     await expectFallsBackToHaiku({
       provider: "openai",
       model: "gpt-4.1-mini",
@@ -1000,44 +1005,6 @@ describe("runWithModelFallback", () => {
     ]);
   });
 
-  it("falls back on transient HTTP 5xx errors", async () => {
-    await expectFallsBackToHaiku({
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      firstError: new Error(
-        "521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
-      ),
-    });
-  });
-
-  it("falls back on 402 payment required", async () => {
-    await expectFallsBackToHaiku({
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      firstError: Object.assign(new Error("payment required"), { status: 402 }),
-    });
-  });
-
-  it("falls back on billing errors", async () => {
-    await expectFallsBackToHaiku({
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      firstError: new Error(
-        "LLM request rejected: Your credit balance is too low to access the Anthropic API. Please go to Plans & Billing to upgrade or purchase credits.",
-      ),
-    });
-  });
-
-  it("falls back on bare leading 402 quota-refresh errors", async () => {
-    await expectFallsBackToHaiku({
-      provider: "openai",
-      model: "gpt-4.1-mini",
-      firstError: new Error(
-        "402 You have reached your subscription quota limit. Please wait for automatic quota refresh in the rolling time window, upgrade to a higher plan, or use a Pay-As-You-Go API Key for unlimited access.",
-      ),
-    });
-  });
-
   it("records 400 insufficient_quota payloads as billing during fallback", async () => {
     const cfg = makeCfg();
     const run = vi
@@ -1081,94 +1048,52 @@ describe("runWithModelFallback", () => {
     ]);
   });
 
-  it("falls back on unknown model errors", async () => {
-    const cfg = makeCfg();
-    const run = vi
-      .fn()
-      .mockRejectedValueOnce(new Error("Unknown model: anthropic/claude-opus-4-6"))
-      .mockResolvedValueOnce("ok");
+  it("falls back on model-not-found error shapes", async () => {
+    const cases = [
+      {
+        name: "unknown anthropic override",
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        error: new Error("Unknown model: anthropic/claude-opus-4-6"),
+        expectedFallback: ["openai", "gpt-4.1-mini"],
+      },
+      {
+        name: "openai model not found",
+        provider: "openai",
+        model: "gpt-6",
+        error: new Error("Model not found: openai/gpt-6"),
+        expectedFallback: ["anthropic", "claude-haiku-3-5"],
+      },
+    ] satisfies Array<{
+      name: string;
+      provider: string;
+      model: string;
+      error: Error;
+      expectedFallback: [string, string];
+      expectedReason?: string;
+    }>;
 
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "anthropic",
-      model: "claude-opus-4-6",
-      run,
-    });
+    for (const testCase of cases) {
+      await runModelFallbackCase(testCase.name, async () => {
+        const cfg = makeCfg();
+        const run = vi.fn().mockRejectedValueOnce(testCase.error).mockResolvedValueOnce("ok");
 
-    // Override model failed with model_not_found → falls back to configured primary.
-    // (Same candidate-resolution path as other override-model failures.)
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(2);
-    expect(run.mock.calls[1]?.[0]).toBe("openai");
-    expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini");
-  });
+        const result = await runWithModelFallback({
+          cfg,
+          provider: testCase.provider,
+          model: testCase.model,
+          run,
+        });
 
-  it("falls back on model not found errors", async () => {
-    const cfg = makeCfg();
-    const run = vi
-      .fn()
-      .mockRejectedValueOnce(new Error("Model not found: openai/gpt-6"))
-      .mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "openai",
-      model: "gpt-6",
-      run,
-    });
-
-    // Override model failed with model_not_found → tries fallbacks first (same provider).
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(2);
-    expect(run.mock.calls[1]?.[0]).toBe("anthropic");
-    expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
-  });
-
-  it("falls back on JSON-wrapped OpenRouter stealth-model 404s", async () => {
-    const cfg = makeCfg();
-    const run = vi
-      .fn()
-      .mockRejectedValueOnce(new Error(OPENROUTER_MODEL_NOT_FOUND_PAYLOAD))
-      .mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "openrouter",
-      model: "openrouter/healer-alpha",
-      run,
-    });
-
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(2);
-    expect(run.mock.calls[1]?.[0]).toBe("openai");
-    expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini");
-  });
-
-  it("records invalid-model HTTP 400 responses as model_not_found during fallback", async () => {
-    const cfg = makeCfg();
-    const run = vi
-      .fn()
-      .mockRejectedValueOnce(
-        Object.assign(
-          new Error("HTTP 400: openrouter/__invalid_test_model__ is not a valid model ID"),
-          { status: 400 },
-        ),
-      )
-      .mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
-      cfg,
-      provider: "openrouter",
-      model: "__invalid_test_model__",
-      run,
-    });
-
-    expect(result.result).toBe("ok");
-    expect(run).toHaveBeenCalledTimes(2);
-    expect(result.attempts).toHaveLength(1);
-    expect(result.attempts[0]?.reason).toBe("model_not_found");
-    expect(run.mock.calls[1]?.[0]).toBe("openai");
-    expect(run.mock.calls[1]?.[1]).toBe("gpt-4.1-mini");
+        expect(result.result).toBe("ok");
+        expect(run).toHaveBeenCalledTimes(2);
+        expect(run.mock.calls[1]).toEqual(testCase.expectedFallback);
+        if (testCase.expectedReason) {
+          expect(result.attempts).toHaveLength(1);
+          expect(result.attempts[0]?.reason).toBe(testCase.expectedReason);
+        }
+      });
+    }
   });
 
   it("warns when falling back due to model_not_found", async () => {
@@ -1468,27 +1393,14 @@ describe("runWithModelFallback", () => {
   it("uses fallbacksOverride instead of agents.defaults.model.fallbacks", async () => {
     const cfg = makeFallbacksOnlyCfg();
 
-    const calls: Array<{ provider: string; model: string }> = [];
-
-    const res = await runWithModelFallback({
+    const candidates = __testing.resolveFallbackCandidates({
       cfg,
       provider: "anthropic",
       model: "claude-opus-4-5",
       fallbacksOverride: ["openai/gpt-4.1"],
-      run: async (provider, model) => {
-        calls.push({ provider, model });
-        if (provider === "anthropic") {
-          throw Object.assign(new Error("nope"), { status: 401 });
-        }
-        if (provider === "openai" && model === "gpt-4.1") {
-          return "ok";
-        }
-        throw new Error(`unexpected candidate: ${provider}/${model}`);
-      },
     });
 
-    expect(res.result).toBe("ok");
-    expect(calls).toEqual([
+    expect(candidates).toEqual([
       { provider: "anthropic", model: "claude-opus-4-5" },
       { provider: "openai", model: "gpt-4.1" },
     ]);
@@ -1497,22 +1409,14 @@ describe("runWithModelFallback", () => {
   it("treats an empty fallbacksOverride as disabling global fallbacks", async () => {
     const cfg = makeFallbacksOnlyCfg();
 
-    const calls: Array<{ provider: string; model: string }> = [];
+    const candidates = __testing.resolveFallbackCandidates({
+      cfg,
+      provider: "anthropic",
+      model: "claude-opus-4-5",
+      fallbacksOverride: [],
+    });
 
-    await expect(
-      runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-5",
-        fallbacksOverride: [],
-        run: async (provider, model) => {
-          calls.push({ provider, model });
-          throw new Error("primary failed");
-        },
-      }),
-    ).rejects.toThrow("primary failed");
-
-    expect(calls).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]);
+    expect(candidates).toEqual([{ provider: "anthropic", model: "claude-opus-4-5" }]);
   });
 
   it("keeps explicit fallbacks reachable when models allowlist is present", async () => {
@@ -1529,22 +1433,16 @@ describe("runWithModelFallback", () => {
         },
       },
     });
-    const run = vi
-      .fn()
-      .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
-      .mockResolvedValueOnce("ok");
-
-    const result = await runWithModelFallback({
+    const candidates = __testing.resolveFallbackCandidates({
       cfg,
       provider: "anthropic",
       model: "claude-sonnet-4",
-      run,
     });
 
-    expect(result.result).toBe("ok");
-    expect(run.mock.calls).toEqual([
-      ["anthropic", "claude-sonnet-4"],
-      ["openai", "gpt-4o"],
+    expect(candidates).toEqual([
+      { provider: "anthropic", model: "claude-sonnet-4" },
+      { provider: "openai", model: "gpt-4o" },
+      { provider: "ollama", model: "llama-3" },
     ]);
   });
 
@@ -1560,67 +1458,13 @@ describe("runWithModelFallback", () => {
       },
     });
 
-    const calls: Array<{ provider: string; model: string }> = [];
-
-    const result = await runWithModelFallback({
+    const candidates = __testing.resolveFallbackCandidates({
       cfg,
       provider: undefined as unknown as string,
       model: undefined as unknown as string,
-      run: async (provider, model) => {
-        calls.push({ provider, model });
-        return "ok";
-      },
     });
 
-    expect(result.result).toBe("ok");
-    expect(calls).toEqual([{ provider: "openai", model: "gpt-4.1-mini" }]);
-  });
-
-  it("falls back on representative transient/provider error shapes", async () => {
-    // The full classification matrix lives in failover-error tests; keep this as integration
-    // coverage that classified errors actually advance the model fallback chain.
-    const cases: Array<{ name: string; firstError: Error }> = [
-      {
-        name: "missing API key",
-        firstError: new Error("No API key found for profile openai."),
-      },
-      {
-        name: "lowercase credential",
-        firstError: new Error("no api key found for profile openai"),
-      },
-      {
-        name: "documented OpenAI 429 rate limit",
-        firstError: Object.assign(new Error(OPENAI_RATE_LIMIT_MESSAGE), { status: 429 }),
-      },
-      {
-        name: "documented overloaded_error payload",
-        firstError: new Error(ANTHROPIC_OVERLOADED_PAYLOAD),
-      },
-      {
-        name: "provider request aborted",
-        firstError: Object.assign(new Error("Request was aborted"), { name: "AbortError" }),
-      },
-      {
-        name: "bare undici terminated transport failure",
-        firstError: new Error("terminated"),
-      },
-      {
-        name: "bare Codex transport failure",
-        firstError: new Error("Request failed"),
-      },
-    ];
-
-    for (const { name, firstError } of cases) {
-      try {
-        await expectFallsBackToHaiku({
-          provider: "openai",
-          model: "gpt-4.1-mini",
-          firstError,
-        });
-      } catch (error) {
-        throw new Error(`fallback case failed: ${name}`, { cause: error });
-      }
-    }
+    expect(candidates).toEqual([{ provider: "openai", model: "gpt-4.1-mini" }]);
   });
 
   it("does not fall back on user aborts", async () => {
@@ -1673,123 +1517,105 @@ describe("runWithModelFallback", () => {
 
   // Tests for Bug A fix: Model fallback with session overrides
   describe("fallback behavior with session model overrides", () => {
-    it("allows fallbacks when session model differs from config within same provider", async () => {
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"],
+    it("keeps fallback ordering correct across session overrides", async () => {
+      const cases = [
+        {
+          name: "same provider versioned session model",
+          cfg: makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: ["anthropic/claude-sonnet-4-5", "google/gemini-2.5-flash"],
+                },
+              },
             },
-          },
+          }),
+          provider: "anthropic",
+          model: "claude-sonnet-4-20250514",
+          calls: [
+            ["anthropic", "claude-sonnet-4-20250514"],
+            ["anthropic", "claude-sonnet-4-5"],
+          ],
         },
-      });
-
-      const run = vi
-        .fn()
-        .mockRejectedValueOnce(new Error("Rate limit exceeded")) // Session model fails
-        .mockResolvedValueOnce("fallback success"); // First fallback succeeds
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-sonnet-4-20250514", // Different from config primary
-        run,
-      });
-
-      expect(result.result).toBe("fallback success");
-      expect(run).toHaveBeenCalledTimes(2);
-      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-20250514");
-      expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-sonnet-4-5"); // Fallback tried
-    });
-
-    it("allows fallbacks with model version differences within same provider", async () => {
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["groq/llama-3.3-70b-versatile"],
+        {
+          name: "same provider model version difference",
+          cfg: makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: ["groq/llama-3.3-70b-versatile"],
+                },
+              },
             },
-          },
+          }),
+          provider: "anthropic",
+          model: "claude-opus-4-5",
+          calls: [
+            ["anthropic", "claude-opus-4-5"],
+            ["groq", "llama-3.3-70b-versatile"],
+          ],
         },
-      });
-
-      const run = vi
-        .fn()
-        .mockRejectedValueOnce(new Error("Weekly quota exceeded"))
-        .mockResolvedValueOnce("groq success");
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-5", // Version difference from config
-        run,
-      });
-
-      expect(result.result).toBe("groq success");
-      expect(run).toHaveBeenCalledTimes(2);
-      expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
-    });
-
-    it("still skips fallbacks when using different provider than config", async () => {
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: [], // Empty fallbacks to match working pattern
+        {
+          name: "different provider skips configured fallbacks",
+          cfg: makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: [],
+                },
+              },
             },
-          },
+          }),
+          provider: "openai",
+          model: "gpt-4.1-mini",
+          calls: [
+            ["openai", "gpt-4.1-mini"],
+            ["anthropic", "claude-opus-4-6"],
+          ],
         },
-      });
-
-      const run = vi
-        .fn()
-        .mockRejectedValueOnce(new Error('No credentials found for profile "openai:default".'))
-        .mockResolvedValueOnce("config primary worked");
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "openai", // Different provider
-        model: "gpt-4.1-mini",
-        run,
-      });
-
-      // Cross-provider requests should skip configured fallbacks but still try configured primary
-      expect(result.result).toBe("config primary worked");
-      expect(run).toHaveBeenCalledTimes(2);
-      expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); // Original request
-      expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-opus-4-6"); // Config primary as final fallback
-    });
-
-    it("uses fallbacks when session model exactly matches config primary", async () => {
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["groq/llama-3.3-70b-versatile"],
+        {
+          name: "exact primary uses fallbacks",
+          cfg: makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: ["groq/llama-3.3-70b-versatile"],
+                },
+              },
             },
-          },
+          }),
+          provider: "anthropic",
+          model: "claude-opus-4-6",
+          calls: [
+            ["anthropic", "claude-opus-4-6"],
+            ["groq", "llama-3.3-70b-versatile"],
+          ],
         },
-      });
+      ] satisfies Array<{
+        name: string;
+        cfg: OpenClawConfig;
+        provider: string;
+        model: string;
+        calls: Array<[string, string]>;
+      }>;
 
-      const run = vi
-        .fn()
-        .mockRejectedValueOnce(new Error("Quota exceeded"))
-        .mockResolvedValueOnce("fallback worked");
+      for (const testCase of cases) {
+        await runModelFallbackCase(testCase.name, async () => {
+          const candidates = __testing.resolveFallbackCandidates({
+            cfg: testCase.cfg,
+            provider: testCase.provider,
+            model: testCase.model,
+          });
 
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6", // Exact match
-        run,
-      });
-
-      expect(result.result).toBe("fallback worked");
-      expect(run).toHaveBeenCalledTimes(2);
-      expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile");
+          expect(candidates.slice(0, testCase.calls.length)).toEqual(
+            testCase.calls.map(([provider, model]) => ({ provider, model })),
+          );
+        });
+      }
     });
   });
 
@@ -1822,34 +1648,38 @@ describe("runWithModelFallback", () => {
       return { dir: tmpDir };
     }
 
-    it("attempts same-provider fallbacks during rate limit cooldown", async () => {
-      const { dir } = await makeAuthStoreWithCooldown("anthropic", "rate_limit");
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+    it("attempts same-provider fallbacks during transient cooldowns", async () => {
+      for (const reason of ["rate_limit", "overloaded", "timeout"] as const) {
+        await runModelFallbackCase(reason, async () => {
+          const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
+          const cfg = makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+                },
+              },
             },
-          },
-        },
-      });
+          });
 
-      const run = vi.fn().mockResolvedValueOnce("sonnet success");
+          const run = vi.fn().mockResolvedValueOnce("sonnet success");
 
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6",
-        run,
-        agentDir: dir,
-      });
+          const result = await runWithModelFallback({
+            cfg,
+            provider: "anthropic",
+            model: "claude-opus-4-6",
+            run,
+            agentDir: dir,
+          });
 
-      expect(result.result).toBe("sonnet success");
-      expect(run).toHaveBeenCalledTimes(1);
-      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
-        allowTransientCooldownProbe: true,
-      });
+          expect(result.result).toBe("sonnet success");
+          expect(run).toHaveBeenCalledTimes(1);
+          expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
+            allowTransientCooldownProbe: true,
+          });
+        });
+      }
     });
 
     it("keeps alias-resolved primary models subject to transient cooldowns", async () => {
@@ -1885,120 +1715,36 @@ describe("runWithModelFallback", () => {
       });
     });
 
-    it("attempts same-provider fallbacks during overloaded cooldown", async () => {
-      const { dir } = await makeAuthStoreWithCooldown("anthropic", "overloaded");
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+    it("skips same-provider models on persistent cooldowns", async () => {
+      for (const reason of ["auth", "billing"] as const) {
+        await runModelFallbackCase(reason, async () => {
+          const { dir } = await makeAuthStoreWithCooldown("anthropic", reason);
+          const cfg = makeCfg({
+            agents: {
+              defaults: {
+                model: {
+                  primary: "anthropic/claude-opus-4-6",
+                  fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
+                },
+              },
             },
-          },
-        },
-      });
+          });
 
-      const run = vi.fn().mockResolvedValueOnce("sonnet success");
+          const run = vi.fn().mockResolvedValueOnce("groq success");
 
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6",
-        run,
-        agentDir: dir,
-      });
+          const result = await runWithModelFallback({
+            cfg,
+            provider: "anthropic",
+            model: "claude-opus-4-6",
+            run,
+            agentDir: dir,
+          });
 
-      expect(result.result).toBe("sonnet success");
-      expect(run).toHaveBeenCalledTimes(1);
-      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
-        allowTransientCooldownProbe: true,
-      });
-    });
-
-    it("attempts same-provider fallbacks during timeout cooldown", async () => {
-      const { dir } = await makeAuthStoreWithCooldown("anthropic", "timeout");
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
-            },
-          },
-        },
-      });
-
-      const run = vi.fn().mockResolvedValueOnce("sonnet success");
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6",
-        run,
-        agentDir: dir,
-      });
-
-      expect(result.result).toBe("sonnet success");
-      expect(run).toHaveBeenCalledTimes(1);
-      expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
-        allowTransientCooldownProbe: true,
-      });
-    });
-
-    it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
-      const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
-            },
-          },
-        },
-      });
-
-      const run = vi.fn().mockResolvedValueOnce("groq success");
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6",
-        run,
-        agentDir: dir,
-      });
-
-      expect(result.result).toBe("groq success");
-      expect(run).toHaveBeenCalledTimes(1);
-      expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
-    });
-
-    it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => {
-      const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
-      const cfg = makeCfg({
-        agents: {
-          defaults: {
-            model: {
-              primary: "anthropic/claude-opus-4-6",
-              fallbacks: ["anthropic/claude-sonnet-4-5", "groq/llama-3.3-70b-versatile"],
-            },
-          },
-        },
-      });
-
-      const run = vi.fn().mockResolvedValueOnce("groq success");
-
-      const result = await runWithModelFallback({
-        cfg,
-        provider: "anthropic",
-        model: "claude-opus-4-6",
-        run,
-        agentDir: dir,
-      });
-
-      expect(result.result).toBe("groq success");
-      expect(run).toHaveBeenCalledTimes(1);
-      expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
+          expect(result.result).toBe("groq success");
+          expect(run).toHaveBeenCalledTimes(1);
+          expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
+        });
+      }
     });
 
     it("tries cross-provider fallbacks when same provider has rate limit", async () => {
@@ -2131,49 +1877,58 @@ describe("runWithModelFallback", () => {
 });
 
 describe("runWithImageModelFallback", () => {
-  it("inherits the configured image-model provider for bare override ids", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          imageModel: {
-            primary: "openai-codex/gpt-5.4",
-            fallbacks: ["openai-codex/gpt-5.4-mini"],
+  it("resolves image-model override providers", async () => {
+    const cases = [
+      {
+        name: "bare override inherits configured provider",
+        cfg: makeCfg({
+          agents: {
+            defaults: {
+              imageModel: {
+                primary: "openai-codex/gpt-5.4",
+                fallbacks: ["openai-codex/gpt-5.4-mini"],
+              },
+            },
           },
-        },
+        }),
+        modelOverride: "gpt-5.4-mini",
+        expected: [["openai-codex", "gpt-5.4-mini"]],
       },
-    });
-    const run = vi.fn().mockResolvedValueOnce("ok");
-
-    const result = await runWithImageModelFallback({
-      cfg,
-      modelOverride: "gpt-5.4-mini",
-      run,
-    });
-
-    expect(result.result).toBe("ok");
-    expect(run.mock.calls).toEqual([["openai-codex", "gpt-5.4-mini"]]);
-  });
-
-  it("keeps a fully qualified override provider over the configured default", async () => {
-    const cfg = makeCfg({
-      agents: {
-        defaults: {
-          imageModel: {
-            primary: "openai-codex/gpt-5.4",
+      {
+        name: "qualified override keeps provider",
+        cfg: makeCfg({
+          agents: {
+            defaults: {
+              imageModel: {
+                primary: "openai-codex/gpt-5.4",
+              },
+            },
           },
-        },
+        }),
+        modelOverride: "google/gemini-3-pro-image",
+        expected: [["google", "gemini-3-pro-image"]],
       },
-    });
-    const run = vi.fn().mockResolvedValueOnce("ok");
+    ] satisfies Array<{
+      name: string;
+      cfg: OpenClawConfig;
+      modelOverride: string;
+      expected: Array<[string, string]>;
+    }>;
 
-    const result = await runWithImageModelFallback({
-      cfg,
-      modelOverride: "google/gemini-3-pro-image",
-      run,
-    });
+    for (const testCase of cases) {
+      await runModelFallbackCase(testCase.name, async () => {
+        const run = vi.fn().mockResolvedValueOnce("ok");
 
-    expect(result.result).toBe("ok");
-    expect(run.mock.calls).toEqual([["google", "gemini-3-pro-image"]]);
+        const result = await runWithImageModelFallback({
+          cfg: testCase.cfg,
+          modelOverride: testCase.modelOverride,
+          run,
+        });
+
+        expect(result.result).toBe("ok");
+        expect(run.mock.calls).toEqual(testCase.expected);
+      });
+    }
   });
 
   it("keeps explicit image fallbacks reachable when models allowlist is present", async () => {
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 0ff9ba8b31c..d8f9800ed7c 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -509,6 +509,11 @@ function resolveImageFallbackDefaultProvider(cfg: OpenClawConfig | undefined): s
   return DEFAULT_PROVIDER;
 }
 
+export const __testing = {
+  resolveFallbackCandidates,
+  resolveImageFallbackCandidates,
+} as const;
+
 function resolveFallbackCandidates(params: {
   cfg: OpenClawConfig | undefined;
   provider: string;
diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts
index 4822201832b..e4c7be8629e 100644
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -25,6 +25,10 @@ import {
   resolveModelRefFromString,
 } from "./model-selection.js";
 
+vi.mock("./provider-model-normalization.runtime.js", () => ({
+  normalizeProviderModelIdWithRuntime: () => undefined,
+}));
+
 const EXPLICIT_ALLOWLIST_CONFIG = {
   agents: {
     defaults: {
@@ -195,11 +199,14 @@ describe("model-selection", () => {
       expected: { provider: string; model: string },
     ) => {
       for (const raw of variants) {
-        expect(parseModelRef(raw, defaultProvider), raw).toEqual(expected);
+        expect(
+          parseModelRef(raw, defaultProvider, { allowPluginNormalization: false }),
+          raw,
+        ).toEqual(expected);
       }
     };
 
-    it.each([
+    const parseModelRefCases = [
       {
         name: "parses explicit provider/model refs",
         variants: ["anthropic/claude-3-5-sonnet"],
@@ -335,19 +342,30 @@ describe("model-selection", () => {
         defaultProvider: "google-vertex",
         expected: { provider: "google-vertex", model: "gemini-3.1-flash-lite-preview" },
       },
-    ])("$name", ({ variants, defaultProvider, expected }) => {
-      expectParsedModelVariants(variants, defaultProvider, expected);
+    ];
+
+    it("parses and normalizes provider/model refs", () => {
+      for (const { variants, defaultProvider, expected } of parseModelRefCases) {
+        expectParsedModelVariants(variants, defaultProvider, expected);
+      }
     });
 
     it("round-trips normalized refs through modelKey", () => {
-      const parsed = parseModelRef(" opus-4.6 ", "anthropic");
+      const parsed = parseModelRef(" opus-4.6 ", "anthropic", {
+        allowPluginNormalization: false,
+      });
       expect(parsed).toEqual({ provider: "anthropic", model: "claude-opus-4-6" });
       expect(modelKey(parsed?.provider ?? "", parsed?.model ?? "")).toBe(
         "anthropic/claude-opus-4-6",
       );
     });
-    it.each(["", "  ", "/", "anthropic/", "/model"])("returns null for invalid ref %j", (raw) => {
-      expect(parseModelRef(raw, "anthropic")).toBeNull();
+    it("returns null for invalid refs", () => {
+      for (const raw of ["", "  ", "/", "anthropic/", "/model"]) {
+        expect(
+          parseModelRef(raw, "anthropic", { allowPluginNormalization: false }),
+          raw,
+        ).toBeNull();
+      }
     });
   });
 
diff --git a/src/agents/outcome-fallback-runtime-contract.test.ts b/src/agents/outcome-fallback-runtime-contract.test.ts
index d5fe9ecdaa2..6e4e497a341 100644
--- a/src/agents/outcome-fallback-runtime-contract.test.ts
+++ b/src/agents/outcome-fallback-runtime-contract.test.ts
@@ -13,11 +13,13 @@ vi.mock("./auth-profiles/source-check.js", () => ({
 }));
 
 describe("Outcome/fallback runtime contract - Pi fallback classifier", () => {
-  it.each([
+  const fallbackClassificationCases = [
     ["empty", "empty_result"],
     ["reasoning-only", "reasoning_only_result"],
     ["planning-only", "planning_only_result"],
-  ] as const)(
+  ] as const;
+
+  it.each(fallbackClassificationCases)(
     "maps harness classification %s to a format fallback code",
     (classification, code) => {
       expect(
@@ -38,54 +40,47 @@ describe("Outcome/fallback runtime contract - Pi fallback classifier", () => {
     },
   );
 
-  it.each([
-    ["empty", "empty_result"],
-    ["reasoning-only", "reasoning_only_result"],
-    ["planning-only", "planning_only_result"],
-  ] as const)(
-    "advances to the configured fallback after a classified GPT-5 %s terminal result",
-    async (classification, code) => {
-      const primary = createContractRunResult({
-        meta: {
-          durationMs: 1,
-          agentHarnessResultClassification: classification,
-        },
-      });
-      const fallback = createContractRunResult({
-        payloads: [{ text: "fallback ok" }],
-        meta: { durationMs: 1, finalAssistantVisibleText: "fallback ok" },
-      });
-      const run = vi.fn().mockResolvedValueOnce(primary).mockResolvedValueOnce(fallback);
+  it("advances to the configured fallback after a classified GPT-5 terminal result", async () => {
+    const primary = createContractRunResult({
+      meta: {
+        durationMs: 1,
+        agentHarnessResultClassification: "empty",
+      },
+    });
+    const fallback = createContractRunResult({
+      payloads: [{ text: "fallback ok" }],
+      meta: { durationMs: 1, finalAssistantVisibleText: "fallback ok" },
+    });
+    const run = vi.fn().mockResolvedValueOnce(primary).mockResolvedValueOnce(fallback);
 
-      const result = await runWithModelFallback({
-        cfg: createContractFallbackConfig() as unknown as OpenClawConfig,
-        provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
-        model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
-        run,
-        classifyResult: ({ provider, model, result }) =>
-          classifyEmbeddedPiRunResultForModelFallback({
-            provider,
-            model,
-            result,
-          }),
-      });
+    const result = await runWithModelFallback({
+      cfg: createContractFallbackConfig() as unknown as OpenClawConfig,
+      provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
+      model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
+      run,
+      classifyResult: ({ provider, model, result }) =>
+        classifyEmbeddedPiRunResultForModelFallback({
+          provider,
+          model,
+          result,
+        }),
+    });
 
-      expect(result.result).toBe(fallback);
-      expect(run).toHaveBeenCalledTimes(2);
-      expect(run.mock.calls[1]).toEqual([
-        OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackProvider,
-        OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackModel,
-      ]);
-      expect(result.attempts[0]).toMatchObject({
-        provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
-        model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
-        reason: "format",
-        code,
-      });
-    },
-  );
+    expect(result.result).toBe(fallback);
+    expect(run).toHaveBeenCalledTimes(2);
+    expect(run.mock.calls[1]).toEqual([
+      OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackProvider,
+      OUTCOME_FALLBACK_RUNTIME_CONTRACT.fallbackModel,
+    ]);
+    expect(result.attempts[0]).toMatchObject({
+      provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
+      model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
+      reason: "format",
+      code: "empty_result",
+    });
+  });
 
-  it.each([
+  const nonFallbackCases = [
     {
       name: "intentional NO_REPLY",
       result: createContractRunResult({
@@ -153,17 +148,24 @@ describe("Outcome/fallback runtime contract - Pi fallback classifier", () => {
       }),
       hasBlockReplyPipelineOutput: true,
     },
-  ])("does not fallback for $name", async (contractCase) => {
-    expect(
-      classifyEmbeddedPiRunResultForModelFallback({
-        provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
-        model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
-        result: contractCase.result,
-        hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply,
-        hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput,
-      }),
-    ).toBeNull();
+  ];
 
+  it("does not classify terminal results with visible output or side effects as fallbacks", () => {
+    for (const contractCase of nonFallbackCases) {
+      expect(
+        classifyEmbeddedPiRunResultForModelFallback({
+          provider: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryProvider,
+          model: OUTCOME_FALLBACK_RUNTIME_CONTRACT.primaryModel,
+          result: contractCase.result,
+          hasDirectlySentBlockReply: contractCase.hasDirectlySentBlockReply,
+          hasBlockReplyPipelineOutput: contractCase.hasBlockReplyPipelineOutput,
+        }),
+      ).toBeNull();
+    }
+  });
+
+  it("keeps running on the primary when terminal output is not classified as fallback", async () => {
+    const contractCase = nonFallbackCases[0];
     const run = vi.fn().mockResolvedValue(contractCase.result);
     const result = await runWithModelFallback({
       cfg: createContractFallbackConfig() as unknown as OpenClawConfig,
diff --git a/src/agents/pi-tools.policy.test.ts b/src/agents/pi-tools.policy.test.ts
index 0ec3a496c93..8658b3004e3 100644
--- a/src/agents/pi-tools.policy.test.ts
+++ b/src/agents/pi-tools.policy.test.ts
@@ -1,7 +1,7 @@
 import fs from "node:fs";
 import os from "node:os";
 import path from "node:path";
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../config/config.js";
 import {
   filterToolsByPolicy,
@@ -15,6 +15,15 @@ import {
 import { createStubTool } from "./test-helpers/pi-tool-stubs.js";
 import { providerAliasCases } from "./test-helpers/provider-alias-cases.js";
 
+vi.mock("../channels/plugins/session-conversation.js", () => ({
+  resolveSessionConversation: ({ rawId }: { rawId: string }) => ({
+    id: rawId,
+    threadId: undefined,
+    baseConversationId: rawId,
+    parentConversationCandidates: [],
+  }),
+}));
+
 describe("pi-tools.policy", () => {
   it("treats * in allow as allow-all", () => {
     const tools = [createStubTool("read"), createStubTool("exec")];
diff --git a/src/agents/tools/message-tool.test.ts b/src/agents/tools/message-tool.test.ts
index 2a1d6bd662e..ad3a94a9446 100644
--- a/src/agents/tools/message-tool.test.ts
+++ b/src/agents/tools/message-tool.test.ts
@@ -103,6 +103,19 @@ const mocks = vi.hoisted(() => ({
   ),
 }));
 
+const openClawToolsFactoryMocks = vi.hoisted(() => {
+  const tool = (name: string) => ({
+    name,
+    displaySummary: `${name} test stub`,
+    description: `${name} test stub`,
+    parameters: { type: "object", properties: {} },
+    execute: vi.fn(async () => ({ type: "json", data: { ok: true } })),
+  });
+  return {
+    tool,
+  };
+});
+
 vi.mock("../../infra/outbound/message-action-runner.js", async () => {
   const actual = await vi.importActual<
     typeof import("../../infra/outbound/message-action-runner.js")
@@ -130,6 +143,76 @@ vi.mock("../../cli/command-secret-targets.js", () => ({
   getScopedChannelsCommandSecretTargets: mocks.getScopedChannelsCommandSecretTargets,
 }));
 
+vi.mock("./agents-list-tool.js", () => ({
+  createAgentsListTool: () => openClawToolsFactoryMocks.tool("agents"),
+}));
+vi.mock("./canvas-tool.js", () => ({
+  createCanvasTool: () => openClawToolsFactoryMocks.tool("canvas"),
+}));
+vi.mock("./cron-tool.js", () => ({
+  createCronTool: () => openClawToolsFactoryMocks.tool("cron"),
+}));
+vi.mock("./gateway-tool.js", () => ({
+  createGatewayTool: () => openClawToolsFactoryMocks.tool("gateway"),
+}));
+vi.mock("./heartbeat-response-tool.js", () => ({
+  createHeartbeatResponseTool: () => openClawToolsFactoryMocks.tool("heartbeat_response"),
+}));
+vi.mock("./image-generate-tool.js", () => ({
+  createImageGenerateTool: () => null,
+}));
+vi.mock("./image-tool.js", () => ({
+  createImageTool: () => null,
+}));
+vi.mock("./manifest-capability-availability.js", () => ({
+  hasSnapshotCapabilityAvailability: () => false,
+  hasSnapshotProviderEnvAvailability: () => false,
+  loadCapabilityMetadataSnapshot: () => ({ index: {}, plugins: [] }),
+}));
+vi.mock("./music-generate-tool.js", () => ({
+  createMusicGenerateTool: () => null,
+}));
+vi.mock("./nodes-tool.js", () => ({
+  createNodesTool: () => openClawToolsFactoryMocks.tool("nodes"),
+}));
+vi.mock("./pdf-tool.js", () => ({
+  createPdfTool: () => null,
+}));
+vi.mock("./session-status-tool.js", () => ({
+  createSessionStatusTool: () => openClawToolsFactoryMocks.tool("session_status"),
+}));
+vi.mock("./sessions-history-tool.js", () => ({
+  createSessionsHistoryTool: () => openClawToolsFactoryMocks.tool("sessions_history"),
+}));
+vi.mock("./sessions-list-tool.js", () => ({
+  createSessionsListTool: () => openClawToolsFactoryMocks.tool("sessions_list"),
+}));
+vi.mock("./sessions-send-tool.js", () => ({
+  createSessionsSendTool: () => openClawToolsFactoryMocks.tool("sessions_send"),
+}));
+vi.mock("./sessions-spawn-tool.js", () => ({
+  createSessionsSpawnTool: () => openClawToolsFactoryMocks.tool("sessions_spawn"),
+}));
+vi.mock("./sessions-yield-tool.js", () => ({
+  createSessionsYieldTool: () => openClawToolsFactoryMocks.tool("sessions_yield"),
+}));
+vi.mock("./subagents-tool.js", () => ({
+  createSubagentsTool: () => openClawToolsFactoryMocks.tool("subagents"),
+}));
+vi.mock("./tts-tool.js", () => ({
+  createTtsTool: () => openClawToolsFactoryMocks.tool("tts"),
+}));
+vi.mock("./update-plan-tool.js", () => ({
+  createUpdatePlanTool: () => openClawToolsFactoryMocks.tool("update_plan"),
+}));
+vi.mock("./video-generate-tool.js", () => ({
+  createVideoGenerateTool: () => null,
+}));
+vi.mock("./web-tools.js", () => ({
+  createWebFetchTool: () => openClawToolsFactoryMocks.tool("web_fetch"),
+  createWebSearchTool: () => openClawToolsFactoryMocks.tool("web_search"),
+}));
+
 function mockSendResult(overrides: { channel?: string; to?: string } = {}) {
   mocks.runMessageAction.mockClear();
   mocks.runMessageAction.mockResolvedValue({
diff --git a/src/config/doc-baseline.integration.test.ts b/src/config/doc-baseline.integration.test.ts
index f945f363d1e..e410b5b08df 100644
--- a/src/config/doc-baseline.integration.test.ts
+++ b/src/config/doc-baseline.integration.test.ts
@@ -1,6 +1,6 @@
 import fs from "node:fs/promises";
 import path from "node:path";
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { withTempDir } from "../test-helpers/temp-dir.js";
 import {
   type ConfigDocBaselineEntry,
@@ -9,6 +9,14 @@ import {
   writeConfigDocBaselineArtifacts,
 } from "./doc-baseline.js";
 
+vi.mock("./doc-baseline.runtime.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("./doc-baseline.runtime.js")>();
+  return {
+    ...actual,
+    collectBundledChannelConfigs: () => undefined,
+  };
+});
+
 describe("config doc baseline integration", () => {
   let sharedRenderedPromise: Promise<
     Awaited<ReturnType<typeof renderConfigDocBaselineArtifacts>>