From 48ae976333035563418f15a9fd3a76aa9bdeaeec Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Fri, 27 Mar 2026 00:16:11 +0000
Subject: [PATCH] refactor: split cli runner pipeline

---
 extensions/anthropic/cli-backend.ts       |  83 +--
 extensions/anthropic/cli-shared.ts        |  84 +++
 src/agents/cli-output.test.ts             |  75 ++
 src/agents/cli-output.ts                  | 215 ++++++
 src/agents/cli-runner.helpers.test.ts     |  73 +-
 src/agents/cli-runner.reliability.test.ts | 177 +++++
 src/agents/cli-runner.session.test.ts     |  49 ++
 src/agents/cli-runner.spawn.test.ts       | 433 +++++++++++
 src/agents/cli-runner.test-support.ts     | 243 +++++++
 src/agents/cli-runner.test.ts             | 849 ----------------------
 src/agents/cli-runner.ts                  | 478 +-----------
 src/agents/cli-runner/execute.ts          | 270 +++++++
 src/agents/cli-runner/helpers.ts          | 162 -----
 src/agents/cli-runner/log.ts              |   5 +
 src/agents/cli-runner/prepare.ts          | 167 +++++
 src/agents/cli-runner/types.ts            |  58 ++
 src/agents/cli-session.ts                 |   7 +-
 17 files changed, 1817 insertions(+), 1611 deletions(-)
 create mode 100644 extensions/anthropic/cli-shared.ts
 create mode 100644 src/agents/cli-output.test.ts
 create mode 100644 src/agents/cli-output.ts
 create mode 100644 src/agents/cli-runner.reliability.test.ts
 create mode 100644 src/agents/cli-runner.session.test.ts
 create mode 100644 src/agents/cli-runner.spawn.test.ts
 create mode 100644 src/agents/cli-runner.test-support.ts
 delete mode 100644 src/agents/cli-runner.test.ts
 create mode 100644 src/agents/cli-runner/execute.ts
 create mode 100644 src/agents/cli-runner/log.ts
 create mode 100644 src/agents/cli-runner/prepare.ts
 create mode 100644 src/agents/cli-runner/types.ts

diff --git a/extensions/anthropic/cli-backend.ts b/extensions/anthropic/cli-backend.ts
index 75c3793c34a..c2bc0458418 100644
--- a/extensions/anthropic/cli-backend.ts
+++ b/extensions/anthropic/cli-backend.ts
@@ -3,78 +3,17 @@ import {
   CLI_FRESH_WATCHDOG_DEFAULTS,
   CLI_RESUME_WATCHDOG_DEFAULTS,
 } from "openclaw/plugin-sdk/cli-backend";
-
-const CLAUDE_MODEL_ALIASES: Record<string, string> = {
-  opus: "opus",
-  "opus-4.6": "opus",
-  "opus-4.5": "opus",
-  "opus-4": "opus",
-  "claude-opus-4-6": "opus",
-  "claude-opus-4-5": "opus",
-  "claude-opus-4": "opus",
-  sonnet: "sonnet",
-  "sonnet-4.6": "sonnet",
-  "sonnet-4.5": "sonnet",
-  "sonnet-4.1": "sonnet",
-  "sonnet-4.0": "sonnet",
-  "claude-sonnet-4-6": "sonnet",
-  "claude-sonnet-4-5": "sonnet",
-  "claude-sonnet-4-1": "sonnet",
-  "claude-sonnet-4-0": "sonnet",
-  haiku: "haiku",
-  "haiku-3.5": "haiku",
-  "claude-haiku-3-5": "haiku",
-};
-
-const CLAUDE_LEGACY_SKIP_PERMISSIONS_ARG = "--dangerously-skip-permissions";
-const CLAUDE_PERMISSION_MODE_ARG = "--permission-mode";
-const CLAUDE_BYPASS_PERMISSIONS_MODE = "bypassPermissions";
-
-function normalizeClaudePermissionArgs(args?: string[]): string[] | undefined {
-  if (!args) {
-    return args;
-  }
-  const normalized: string[] = [];
-  let sawLegacySkip = false;
-  let hasPermissionMode = false;
-  for (let i = 0; i < args.length; i += 1) {
-    const arg = args[i];
-    if (arg === CLAUDE_LEGACY_SKIP_PERMISSIONS_ARG) {
-      sawLegacySkip = true;
-      continue;
-    }
-    if (arg === CLAUDE_PERMISSION_MODE_ARG) {
-      hasPermissionMode = true;
-      normalized.push(arg);
-      const maybeValue = args[i + 1];
-      if (typeof maybeValue === "string") {
-        normalized.push(maybeValue);
-        i += 1;
-      }
-      continue;
-    }
-    if (arg.startsWith(`${CLAUDE_PERMISSION_MODE_ARG}=`)) {
-      hasPermissionMode = true;
-    }
-    normalized.push(arg);
-  }
-  if (sawLegacySkip && !hasPermissionMode) {
-    normalized.push(CLAUDE_PERMISSION_MODE_ARG, CLAUDE_BYPASS_PERMISSIONS_MODE);
-  }
-  return normalized;
-}
-
-function normalizeClaudeBackendConfig(config: CliBackendConfig): CliBackendConfig {
-  return {
-    ...config,
-    args: normalizeClaudePermissionArgs(config.args),
-    resumeArgs: normalizeClaudePermissionArgs(config.resumeArgs),
-  };
-}
+import {
+  CLAUDE_CLI_BACKEND_ID,
+  CLAUDE_CLI_CLEAR_ENV,
+  CLAUDE_CLI_MODEL_ALIASES,
+  CLAUDE_CLI_SESSION_ID_FIELDS,
+  normalizeClaudeBackendConfig,
+} from "./cli-shared.js";
 
 export function buildAnthropicCliBackend(): CliBackendPlugin {
   return {
-    id: "claude-cli",
+    id: CLAUDE_CLI_BACKEND_ID,
     bundleMcp: true,
     config: {
       command: "claude",
@@ -99,14 +38,14 @@ export function buildAnthropicCliBackend(): CliBackendPlugin {
       output: "jsonl",
       input: "arg",
       modelArg: "--model",
-      modelAliases: CLAUDE_MODEL_ALIASES,
+      modelAliases: CLAUDE_CLI_MODEL_ALIASES,
       sessionArg: "--session-id",
       sessionMode: "always",
-      sessionIdFields: ["session_id", "sessionId", "conversation_id", "conversationId"],
+      sessionIdFields: [...CLAUDE_CLI_SESSION_ID_FIELDS],
       systemPromptArg: "--append-system-prompt",
       systemPromptMode: "append",
       systemPromptWhen: "first",
-      clearEnv: ["ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY_OLD"],
+      clearEnv: [...CLAUDE_CLI_CLEAR_ENV],
       reliability: {
         watchdog: {
           fresh: { ...CLI_FRESH_WATCHDOG_DEFAULTS },
diff --git a/extensions/anthropic/cli-shared.ts b/extensions/anthropic/cli-shared.ts
new file mode 100644
index 00000000000..3050f6c781a
--- /dev/null
+++ b/extensions/anthropic/cli-shared.ts
@@ -0,0 +1,84 @@
+import type { CliBackendConfig } from "openclaw/plugin-sdk/cli-backend";
+
+export const CLAUDE_CLI_BACKEND_ID = "claude-cli";
+
+export const CLAUDE_CLI_MODEL_ALIASES: Record<string, string> = {
+  opus: "opus",
+  "opus-4.6": "opus",
+  "opus-4.5": "opus",
+  "opus-4": "opus",
+  "claude-opus-4-6": "opus",
+  "claude-opus-4-5": "opus",
+  "claude-opus-4": "opus",
+  sonnet: "sonnet",
+  "sonnet-4.6": "sonnet",
+  "sonnet-4.5": "sonnet",
+  "sonnet-4.1": "sonnet",
+  "sonnet-4.0": "sonnet",
+  "claude-sonnet-4-6": "sonnet",
+  "claude-sonnet-4-5": "sonnet",
+  "claude-sonnet-4-1": "sonnet",
+  "claude-sonnet-4-0": "sonnet",
+  haiku: "haiku",
+  "haiku-3.5": "haiku",
+  "claude-haiku-3-5": "haiku",
+};
+
+export const CLAUDE_CLI_SESSION_ID_FIELDS = [
+  "session_id",
+  "sessionId",
+  "conversation_id",
+  "conversationId",
+] as const;
+
+export const CLAUDE_CLI_CLEAR_ENV = ["ANTHROPIC_API_KEY", "ANTHROPIC_API_KEY_OLD"] as const;
+
+const CLAUDE_LEGACY_SKIP_PERMISSIONS_ARG = "--dangerously-skip-permissions";
+const CLAUDE_PERMISSION_MODE_ARG = "--permission-mode";
+const CLAUDE_BYPASS_PERMISSIONS_MODE = "bypassPermissions";
+
+export function isClaudeCliProvider(providerId: string): boolean {
+  return providerId.trim().toLowerCase() === CLAUDE_CLI_BACKEND_ID;
+}
+
+export function normalizeClaudePermissionArgs(args?: string[]): string[] | undefined {
+  if (!args) {
+    return args;
+  }
+  const normalized: string[] = [];
+  let sawLegacySkip = false;
+  let hasPermissionMode = false;
+  for (let i = 0; i < args.length; i += 1) {
+    const arg = args[i];
+    if (arg === CLAUDE_LEGACY_SKIP_PERMISSIONS_ARG) {
+      sawLegacySkip = true;
+      continue;
+    }
+    if (arg === CLAUDE_PERMISSION_MODE_ARG) {
+      hasPermissionMode = true;
+      normalized.push(arg);
+      const maybeValue = args[i + 1];
+      if (typeof maybeValue === "string") {
+        normalized.push(maybeValue);
+        i += 1;
+      }
+      continue;
+    }
+    if (arg.startsWith(`${CLAUDE_PERMISSION_MODE_ARG}=`)) {
+      hasPermissionMode = true;
+    }
+    normalized.push(arg);
+  }
+  if (sawLegacySkip && !hasPermissionMode) {
+    normalized.push(CLAUDE_PERMISSION_MODE_ARG, CLAUDE_BYPASS_PERMISSIONS_MODE);
+  }
+  return normalized;
+}
+
+export function normalizeClaudeBackendConfig(config: CliBackendConfig): CliBackendConfig {
+  return {
+    ...config,
+    args: normalizeClaudePermissionArgs(config.args),
+    resumeArgs: normalizeClaudePermissionArgs(config.resumeArgs),
+  };
+}
diff --git a/src/agents/cli-output.test.ts b/src/agents/cli-output.test.ts
new file mode 100644
index 00000000000..8c203058e9d
--- /dev/null
+++ b/src/agents/cli-output.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it } from "vitest";
+import { parseCliJsonl } from "./cli-output.js";
+
+describe("parseCliJsonl", () => {
+  it("parses Claude stream-json result events", () => {
+    const result = parseCliJsonl(
+      [
+        JSON.stringify({ type: "init", session_id: "session-123" }),
+        JSON.stringify({
+          type: "result",
+          session_id: "session-123",
+          result: "Claude says hello",
+          usage: {
+            input_tokens: 12,
+            output_tokens: 3,
+            cache_read_input_tokens: 4,
+          },
+        }),
+      ].join("\n"),
+      {
+        command: "claude",
+        output: "jsonl",
+        sessionIdFields: ["session_id"],
+      },
+      "claude-cli",
+    );
+
+    expect(result).toEqual({
+      text: "Claude says hello",
+      sessionId: "session-123",
+      usage: {
+        input: 12,
+        output: 3,
+        cacheRead: 4,
+        cacheWrite: undefined,
+        total: undefined,
+      },
+    });
+  });
+
+  it("preserves Claude session metadata even when the final result text is empty", () => {
+    const result = parseCliJsonl(
+      [
+        JSON.stringify({ type: "init", session_id: "session-456" }),
+        JSON.stringify({
+          type: "result",
+          session_id: "session-456",
+          result: "   ",
+          usage: {
+            input_tokens: 18,
+            output_tokens: 0,
+          },
+        }),
+      ].join("\n"),
+      {
+        command: "claude",
+        output: "jsonl",
+        sessionIdFields: ["session_id"],
+      },
+      "claude-cli",
+    );
+
+    expect(result).toEqual({
+      text: "",
+      sessionId: "session-456",
+      usage: {
+        input: 18,
+        output: undefined,
+        cacheRead: undefined,
+        cacheWrite: undefined,
+        total: undefined,
+      },
+    });
+  });
+});
diff --git a/src/agents/cli-output.ts b/src/agents/cli-output.ts
new file mode 100644
index 00000000000..c02c2670ab2
--- /dev/null
+++ b/src/agents/cli-output.ts
@@ -0,0 +1,215 @@
+import { isClaudeCliProvider } from "../../extensions/anthropic/cli-shared.js";
+import type { CliBackendConfig } from "../config/types.js";
+import { isRecord } from "../utils.js";
+
+type CliUsage = {
+  input?: number;
+  output?: number;
+  cacheRead?: number;
+  cacheWrite?: number;
+  total?: number;
+};
+
+export type CliOutput = {
+  text: string;
+  sessionId?: string;
+  usage?: CliUsage;
+};
+
+function toCliUsage(raw: Record<string, unknown>): CliUsage | undefined {
+  const pick = (key: string) =>
+    typeof raw[key] === "number" && raw[key] > 0 ? raw[key] : undefined;
+  const input = pick("input_tokens") ?? pick("inputTokens");
+  const output = pick("output_tokens") ?? pick("outputTokens");
+  const cacheRead =
+    pick("cache_read_input_tokens") ?? pick("cached_input_tokens") ?? pick("cacheRead");
+  const cacheWrite = pick("cache_write_input_tokens") ?? pick("cacheWrite");
+  const total = pick("total_tokens") ?? pick("total");
+  if (!input && !output && !cacheRead && !cacheWrite && !total) {
+    return undefined;
+  }
+  return { input, output, cacheRead, cacheWrite, total };
+}
+
+function collectCliText(value: unknown): string {
+  if (!value) {
+    return "";
+  }
+  if (typeof value === "string") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((entry) => collectCliText(entry)).join("");
+  }
+  if (!isRecord(value)) {
+    return "";
+  }
+  if (typeof value.text === "string") {
+    return value.text;
+  }
+  if (typeof value.content === "string") {
+    return value.content;
+  }
+  if (Array.isArray(value.content)) {
+    return value.content.map((entry) => collectCliText(entry)).join("");
+  }
+  if (isRecord(value.message)) {
+    return collectCliText(value.message);
+  }
+  return "";
+}
+
+function pickCliSessionId(
+  parsed: Record<string, unknown>,
+  backend: CliBackendConfig,
+): string | undefined {
+  const fields = backend.sessionIdFields ?? [
+    "session_id",
+    "sessionId",
+    "conversation_id",
+    "conversationId",
+  ];
+  for (const field of fields) {
+    const value = parsed[field];
+    if (typeof value === "string" && value.trim()) {
+      return value.trim();
+    }
+  }
+  return undefined;
+}
+
+export function parseCliJson(raw: string, backend: CliBackendConfig): CliOutput | null {
+  const trimmed = raw.trim();
+  if (!trimmed) {
+    return null;
+  }
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(trimmed);
+  } catch {
+    return null;
+  }
+  if (!isRecord(parsed)) {
+    return null;
+  }
+  const sessionId = pickCliSessionId(parsed, backend);
+  const usage = isRecord(parsed.usage) ? toCliUsage(parsed.usage) : undefined;
+  const text =
+    collectCliText(parsed.message) ||
+    collectCliText(parsed.content) ||
+    collectCliText(parsed.result) ||
+    collectCliText(parsed);
+  return { text: text.trim(), sessionId, usage };
+}
+
+function parseClaudeCliJsonlResult(params: {
+  providerId: string;
+  parsed: Record<string, unknown>;
+  sessionId?: string;
+  usage?: CliUsage;
+}): CliOutput | null {
+  if (!isClaudeCliProvider(params.providerId)) {
+    return null;
+  }
+  if (
+    typeof params.parsed.type === "string" &&
+    params.parsed.type === "result" &&
+    typeof params.parsed.result === "string"
+  ) {
+    const resultText = params.parsed.result.trim();
+    if (resultText) {
+      return { text: resultText, sessionId: params.sessionId, usage: params.usage };
+    }
+    // Claude may finish with an empty result after tool-only work. Keep the
+    // resolved session handle and usage instead of dropping them.
+    return { text: "", sessionId: params.sessionId, usage: params.usage };
+  }
+  return null;
+}
+
+export function parseCliJsonl(
+  raw: string,
+  backend: CliBackendConfig,
+  providerId: string,
+): CliOutput | null {
+  const lines = raw
+    .split(/\r?\n/g)
+    .map((line) => line.trim())
+    .filter(Boolean);
+  if (lines.length === 0) {
+    return null;
+  }
+  let sessionId: string | undefined;
+  let usage: CliUsage | undefined;
+  const texts: string[] = [];
+  for (const line of lines) {
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(line);
+    } catch {
+      continue;
+    }
+    if (!isRecord(parsed)) {
+      continue;
+    }
+    if (!sessionId) {
+      sessionId = pickCliSessionId(parsed, backend);
+    }
+    if (!sessionId && typeof parsed.thread_id === "string") {
+      sessionId = parsed.thread_id.trim();
+    }
+    if (isRecord(parsed.usage)) {
+      usage = toCliUsage(parsed.usage) ?? usage;
+    }
+
+    const claudeResult = parseClaudeCliJsonlResult({
+      providerId,
+      parsed,
+      sessionId,
+      usage,
+    });
+    if (claudeResult) {
+      return claudeResult;
+    }
+
+    const item = isRecord(parsed.item) ? parsed.item : null;
+    if (item && typeof item.text === "string") {
+      const type = typeof item.type === "string" ? item.type.toLowerCase() : "";
+      if (!type || type.includes("message")) {
+        texts.push(item.text);
+      }
+    }
+  }
+  const text = texts.join("\n").trim();
+  if (!text) {
+    return null;
+  }
+  return { text, sessionId, usage };
+}
+
+export function parseCliOutput(params: {
+  raw: string;
+  backend: CliBackendConfig;
+  providerId: string;
+  outputMode?: "json" | "jsonl" | "text";
+  fallbackSessionId?: string;
+}): CliOutput {
+  const outputMode = params.outputMode ?? "text";
+  if (outputMode === "text") {
+    return { text: params.raw.trim(), sessionId: params.fallbackSessionId };
+  }
+  if (outputMode === "jsonl") {
+    return (
+      parseCliJsonl(params.raw, params.backend, params.providerId) ?? {
+        text: params.raw.trim(),
+        sessionId: params.fallbackSessionId,
+      }
+    );
+  }
+  return (
+    parseCliJson(params.raw, params.backend) ?? {
+      text: params.raw.trim(),
+      sessionId: params.fallbackSessionId,
+    }
+  );
+}
diff --git a/src/agents/cli-runner.helpers.test.ts b/src/agents/cli-runner.helpers.test.ts
index 600d3e3d2bc..95659c178bf 100644
--- a/src/agents/cli-runner.helpers.test.ts
+++ b/src/agents/cli-runner.helpers.test.ts
@@ -1,7 +1,7 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import { MAX_IMAGE_BYTES } from "../media/constants.js";
-import { buildCliArgs, loadPromptRefImages, parseCliJsonl } from "./cli-runner/helpers.js";
+import { buildCliArgs, loadPromptRefImages } from "./cli-runner/helpers.js";
 import * as promptImageUtils from "./pi-embedded-runner/run/images.js";
 import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
 import * as toolImages from "./tool-images.js";
@@ -118,74 +118,3 @@ describe("buildCliArgs", () => {
     ).toEqual(["exec", "resume", "thread-123", "--model", "gpt-5.4"]);
   });
 });
-
-describe("parseCliJsonl", () => {
-  it("parses Claude stream-json result events", () => {
-    const result = parseCliJsonl(
-      [
-        JSON.stringify({ type: "init", session_id: "session-123" }),
-        JSON.stringify({
-          type: "result",
-          session_id: "session-123",
-          result: "Claude says hello",
-          usage: {
-            input_tokens: 12,
-            output_tokens: 3,
-            cache_read_input_tokens: 4,
-          },
-        }),
-      ].join("\n"),
-      {
-        command: "claude",
-        output: "jsonl",
-        sessionIdFields: ["session_id"],
-      },
-    );
-
-    expect(result).toEqual({
-      text: "Claude says hello",
-      sessionId: "session-123",
-      usage: {
-        input: 12,
-        output: 3,
-        cacheRead: 4,
-        cacheWrite: undefined,
-        total: undefined,
-      },
-    });
-  });
-
-  it("preserves Claude session metadata even when the final result text is empty", () => {
-    const result = parseCliJsonl(
-      [
-        JSON.stringify({ type: "init", session_id: "session-456" }),
-        JSON.stringify({
-          type: "result",
-          session_id: "session-456",
-          result: "   ",
-          usage: {
-            input_tokens: 18,
-            output_tokens: 0,
-          },
-        }),
-      ].join("\n"),
-      {
-        command: "claude",
-        output: "jsonl",
-        sessionIdFields: ["session_id"],
-      },
-    );
-
-    expect(result).toEqual({
-      text: "",
-      sessionId: "session-456",
-      usage: {
-        input: 18,
-        output: undefined,
-        cacheRead: undefined,
-        cacheWrite: undefined,
-        total: undefined,
-      },
-    });
-  });
-});
diff --git a/src/agents/cli-runner.reliability.test.ts b/src/agents/cli-runner.reliability.test.ts
new file mode 100644
index 00000000000..12fb530bd44
--- /dev/null
+++ b/src/agents/cli-runner.reliability.test.ts
@@ -0,0 +1,177 @@
+import { describe, expect, it } from "vitest";
+import {
+  createManagedRun,
+  enqueueSystemEventMock,
+  requestHeartbeatNowMock,
+  setupCliRunnerTestModule,
+  supervisorSpawnMock,
+} from "./cli-runner.test-support.js";
+import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
+
+describe("runCliAgent reliability", () => {
+  it("fails with timeout when no-output watchdog trips", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "no-output-timeout",
+        exitCode: null,
+        exitSignal: "SIGKILL",
+        durationMs: 200,
+        stdout: "",
+        stderr: "",
+        timedOut: true,
+        noOutputTimedOut: true,
+      }),
+    );
+
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-2",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("produced no output");
+  });
+
+  it("enqueues a system event and heartbeat wake on no-output watchdog timeout for session runs", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "no-output-timeout",
+        exitCode: null,
+        exitSignal: "SIGKILL",
+        durationMs: 200,
+        stdout: "",
+        stderr: "",
+        timedOut: true,
+        noOutputTimedOut: true,
+      }),
+    );
+
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionKey: "agent:main:main",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-2b",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("produced no output");
+
+    expect(enqueueSystemEventMock).toHaveBeenCalledTimes(1);
+    const [notice, opts] = enqueueSystemEventMock.mock.calls[0] ?? [];
+    expect(String(notice)).toContain("produced no output");
+    expect(String(notice)).toContain("interactive input or an approval prompt");
+    expect(opts).toMatchObject({ sessionKey: "agent:main:main" });
+    expect(requestHeartbeatNowMock).toHaveBeenCalledWith({
+      reason: "cli:watchdog:stall",
+      sessionKey: "agent:main:main",
+    });
+  });
+
+  it("fails with timeout when overall timeout trips", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "overall-timeout",
+        exitCode: null,
+        exitSignal: "SIGKILL",
+        durationMs: 200,
+        stdout: "",
+        stderr: "",
+        timedOut: true,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-3",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("exceeded timeout");
+  });
+
+  it("rethrows the retry failure when session-expired recovery retry also fails", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 1,
+        exitSignal: null,
+        durationMs: 150,
+        stdout: "",
+        stderr: "session expired",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 1,
+        exitSignal: null,
+        durationMs: 150,
+        stdout: "",
+        stderr: "rate limit exceeded",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    await expect(
+      runCliAgent({
+        sessionId: "s1",
+        sessionKey: "agent:main:subagent:retry",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-retry-failure",
+        cliSessionId: "thread-123",
+      }),
+    ).rejects.toThrow("rate limit exceeded");
+
+    expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
+  });
+});
+
+describe("resolveCliNoOutputTimeoutMs", () => {
+  it("uses backend-configured resume watchdog override", () => {
+    const timeoutMs = resolveCliNoOutputTimeoutMs({
+      backend: {
+        command: "codex",
+        reliability: {
+          watchdog: {
+            resume: {
+              noOutputTimeoutMs: 42_000,
+            },
+          },
+        },
+      },
+      timeoutMs: 120_000,
+      useResume: true,
+    });
+    expect(timeoutMs).toBe(42_000);
+  });
+});
diff --git a/src/agents/cli-runner.session.test.ts b/src/agents/cli-runner.session.test.ts
new file mode 100644
index 00000000000..ea812243594
--- /dev/null
+++ b/src/agents/cli-runner.session.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it } from "vitest";
+import {
+  mockSuccessfulCliRun,
+  runExistingCodexCliAgent,
+  setupCliRunnerTestModule,
+  supervisorSpawnMock,
+} from "./cli-runner.test-support.js";
+
+describe("runCliAgent session behavior", () => {
+  it("keeps resuming the CLI across model changes and passes the new model flag", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    mockSuccessfulCliRun();
+
+    await runExistingCodexCliAgent({
+      runCliAgent,
+      runId: "run-model-switch",
+      cliSessionBindingAuthProfileId: "openai:default",
+      authProfileId: "openai:default",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
+    expect(input.argv).toEqual([
+      "codex",
+      "exec",
+      "resume",
+      "thread-123",
+      "--json",
+      "--model",
+      "gpt-5.4",
+      "hi",
+    ]);
+  });
+
+  it("starts a fresh CLI session when the auth profile changes", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    mockSuccessfulCliRun();
+
+    await runExistingCodexCliAgent({
+      runCliAgent,
+      runId: "run-auth-change",
+      cliSessionBindingAuthProfileId: "openai:work",
+      authProfileId: "openai:personal",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; scopeKey?: string };
+    expect(input.argv).toEqual(["codex", "exec", "--json", "--model", "gpt-5.4", "hi"]);
+    expect(input.scopeKey).toBeUndefined();
+  });
+});
diff --git a/src/agents/cli-runner.spawn.test.ts b/src/agents/cli-runner.spawn.test.ts
new file mode 100644
index 00000000000..c80ec374e09
--- /dev/null
+++ b/src/agents/cli-runner.spawn.test.ts
@@ -0,0 +1,433 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import { describe, expect, it } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
+import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
+import {
+  createManagedRun,
+  mockSuccessfulCliRun,
+  runCliAgentWithBackendConfig,
+  setupCliRunnerTestModule,
+  SMALL_PNG_BASE64,
+  stubBootstrapContext,
+  supervisorSpawnMock,
+} from "./cli-runner.test-support.js";
+
+describe("runCliAgent spawn path", () => {
+  it("does not inject hardcoded 'Tools are disabled' text into CLI arguments", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    await runCliAgent({
+      sessionId: "s1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      prompt: "Run: node script.mjs",
+      provider: "claude-cli",
+      model: "sonnet",
+      timeoutMs: 1_000,
+      runId: "run-no-tools-disabled",
+      extraSystemPrompt: "You are a helpful assistant.",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
+    const allArgs = (input.argv ?? []).join("\n");
+    expect(allArgs).not.toContain("Tools are disabled in this session");
+    expect(allArgs).toContain("You are a helpful assistant.");
+  });
+
+  it("injects a strict empty MCP config for bundle-MCP-enabled Claude CLI runs", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: JSON.stringify({
+          session_id: "session-123",
+          message: "ok",
+        }),
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    await runCliAgent({
+      sessionId: "s1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      config: {
+        agents: {
+          defaults: {
+            cliBackends: {
+              "claude-cli": {
+                command: "node",
+                args: ["/tmp/fake-claude.mjs"],
+                clearEnv: [],
+              },
+            },
+          },
+        },
+      } satisfies OpenClawConfig,
+      prompt: "hi",
+      provider: "claude-cli",
+      model: "claude-sonnet-4-6",
+      timeoutMs: 1_000,
+      runId: "run-bundle-mcp-empty",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
+    expect(input.argv?.[0]).toBe("node");
+    expect(input.argv).toContain("/tmp/fake-claude.mjs");
+    expect(input.argv).toContain("--strict-mcp-config");
+    const configFlagIndex = input.argv?.indexOf("--mcp-config") ?? -1;
+    expect(configFlagIndex).toBeGreaterThanOrEqual(0);
+    expect(input.argv?.[configFlagIndex + 1]).toMatch(/^\/.+\/mcp\.json$/);
+  });
+
+  it("runs CLI through supervisor and returns payload", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    const result = await runCliAgent({
+      sessionId: "s1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      prompt: "hi",
+      provider: "codex-cli",
+      model: "gpt-5.2-codex",
+      timeoutMs: 1_000,
+      runId: "run-1",
+      cliSessionId: "thread-123",
+    });
+
+    expect(result.payloads?.[0]?.text).toBe("ok");
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
+      argv?: string[];
+      mode?: string;
+      timeoutMs?: number;
+      noOutputTimeoutMs?: number;
+      replaceExistingScope?: boolean;
+      scopeKey?: string;
+    };
+    expect(input.mode).toBe("child");
+    expect(input.argv?.[0]).toBe("codex");
+    expect(input.timeoutMs).toBe(1_000);
+    expect(input.noOutputTimeoutMs).toBeGreaterThanOrEqual(1_000);
+    expect(input.replaceExistingScope).toBe(true);
+    expect(input.scopeKey).toContain("thread-123");
+  });
+
+  it("sanitizes dangerous backend env overrides before spawn", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    mockSuccessfulCliRun();
+    await runCliAgentWithBackendConfig({
+      runCliAgent,
+      backend: {
+        command: "codex",
+        env: {
+          NODE_OPTIONS: "--require ./malicious.js",
+          LD_PRELOAD: "/tmp/pwn.so",
+          PATH: "/tmp/evil",
+          HOME: "/tmp/evil-home",
+          SAFE_KEY: "ok",
+        },
+      },
+      runId: "run-env-sanitized",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
+      env?: Record<string, string | undefined>;
+    };
+    expect(input.env?.SAFE_KEY).toBe("ok");
+    expect(input.env?.PATH).toBe(process.env.PATH);
+    expect(input.env?.HOME).toBe(process.env.HOME);
+    expect(input.env?.NODE_OPTIONS).toBeUndefined();
+    expect(input.env?.LD_PRELOAD).toBeUndefined();
+  });
+
+  it("applies clearEnv after sanitizing backend env overrides", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    process.env.SAFE_CLEAR = "from-base";
+    mockSuccessfulCliRun();
+    await runCliAgentWithBackendConfig({
+      runCliAgent,
+      backend: {
+        command: "codex",
+        env: {
+          SAFE_KEEP: "keep-me",
+        },
+        clearEnv: ["SAFE_CLEAR"],
+      },
+      runId: "run-clear-env",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
+      env?: Record<string, string | undefined>;
+    };
+    expect(input.env?.SAFE_KEEP).toBe("keep-me");
+    expect(input.env?.SAFE_CLEAR).toBeUndefined();
+  });
+
+  it("prepends bootstrap warnings to the CLI prompt body", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+    stubBootstrapContext({
+      bootstrapFiles: [
+        {
+          name: "AGENTS.md",
+          path: "/tmp/AGENTS.md",
+          content: "A".repeat(200),
+          missing: false,
+        },
+      ],
+      contextFiles: [{ path: "AGENTS.md", content: "A".repeat(20) }],
+    });
+
+    await runCliAgent({
+      sessionId: "s1",
+      sessionFile: "/tmp/session.jsonl",
+      workspaceDir: "/tmp",
+      config: {
+        agents: {
+          defaults: {
+            bootstrapMaxChars: 50,
+            bootstrapTotalMaxChars: 50,
+          },
+        },
+      } satisfies OpenClawConfig,
+      prompt: "hi",
+      provider: "codex-cli",
+      model: "gpt-5.2-codex",
+      timeoutMs: 1_000,
+      runId: "run-warning",
+      cliSessionId: "thread-123",
+    });
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
+      argv?: string[];
+      input?: string;
+    };
+    const promptCarrier = [input.input ?? "", ...(input.argv ?? [])].join("\n");
+
+    expect(promptCarrier).toContain("[Bootstrap truncation warning]");
+    expect(promptCarrier).toContain("- AGENTS.md: 200 raw -> 20 injected");
+    expect(promptCarrier).toContain("hi");
+  });
+
+  it("hydrates prompt media refs into CLI image args", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    const tempDir = await fs.mkdtemp(
+      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
+    );
+    const sourceImage = path.join(tempDir, "bb-image.png");
+    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
+
+    try {
+      await runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: tempDir,
+        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-prompt-image",
+      });
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
+    const argv = input.argv ?? [];
+    const imageArgIndex = argv.indexOf("--image");
+    expect(imageArgIndex).toBeGreaterThanOrEqual(0);
+    expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-");
+    expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
+  });
+
+  it("appends hydrated prompt media refs to generic backend prompts", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    const tempDir = await fs.mkdtemp(
+      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"),
+    );
+    const sourceImage = path.join(tempDir, "claude-image.png");
+    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
+
+    try {
+      await runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: tempDir,
+        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
+        provider: "claude-cli",
+        model: "claude-opus-4-1",
+        timeoutMs: 1_000,
+        runId: "run-prompt-image-generic",
+      });
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string };
+    const argv = input.argv ?? [];
+    expect(argv).not.toContain("--image");
+    const promptCarrier = [input.input ?? "", ...argv].join("\n");
+    const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-"));
+    expect(appendedPath).toBeDefined();
+    expect(appendedPath).not.toBe(sourceImage);
+    expect(promptCarrier).toContain(appendedPath ?? "");
+  });
+
+  it("prefers explicit images over prompt refs", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 50,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    const tempDir = await fs.mkdtemp(
+      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"),
+    );
+    const sourceImage = path.join(tempDir, "ignored-prompt-image.png");
+    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
+
+    try {
+      await runCliAgent({
+        sessionId: "s1",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: tempDir,
+        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
+        images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }],
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-explicit-image-precedence",
+      });
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
+    const argv = input.argv ?? [];
+    expect(argv.filter((arg) => arg === "--image")).toHaveLength(1);
+  });
+
+  it("falls back to per-agent workspace when workspaceDir is missing", async () => {
+    const runCliAgent = await setupCliRunnerTestModule();
+    const tempDir = await fs.mkdtemp(
+      path.join(process.env.TMPDIR ?? "/tmp", "openclaw-cli-runner-"),
+    );
+    const fallbackWorkspace = path.join(tempDir, "workspace-main");
+    await fs.mkdir(fallbackWorkspace, { recursive: true });
+    const cfg = {
+      agents: {
+        defaults: {
+          workspace: fallbackWorkspace,
+        },
+      },
+    } satisfies OpenClawConfig;
+
+    supervisorSpawnMock.mockResolvedValueOnce(
+      createManagedRun({
+        reason: "exit",
+        exitCode: 0,
+        exitSignal: null,
+        durationMs: 25,
+        stdout: "ok",
+        stderr: "",
+        timedOut: false,
+        noOutputTimedOut: false,
+      }),
+    );
+
+    try {
+      await runCliAgent({
+        sessionId: "s1",
+        sessionKey: "agent:main:subagent:missing-workspace",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: undefined as unknown as string,
+        config: cfg,
+        prompt: "hi",
+        provider: "codex-cli",
+        model: "gpt-5.2-codex",
+        timeoutMs: 1_000,
+        runId: "run-4",
+      });
+    } finally {
+      await fs.rm(tempDir, { recursive: true, force: true });
+    }
+
+    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { cwd?: string };
+    expect(input.cwd).toBe(path.resolve(fallbackWorkspace));
+  });
+});
diff --git a/src/agents/cli-runner.test-support.ts b/src/agents/cli-runner.test-support.ts
new file mode 100644
index 00000000000..1f85aa69417
--- /dev/null
+++ b/src/agents/cli-runner.test-support.ts
@@ -0,0 +1,243 @@
+import fs from "node:fs/promises";
+import { beforeEach, vi } from "vitest";
+import { buildAnthropicCliBackend } from "../../extensions/anthropic/cli-backend.js";
+import { buildGoogleGeminiCliBackend } from "../../extensions/google/cli-backend.js";
+import { buildOpenAICodexCliBackend } from "../../extensions/openai/cli-backend.js";
+import type { OpenClawConfig } from "../config/config.js";
+import { createEmptyPluginRegistry } from "../plugins/registry.js";
+import { setActivePluginRegistry } from "../plugins/runtime.js";
+import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
+import type { WorkspaceBootstrapFile } from "./workspace.js";
+
+export const supervisorSpawnMock = vi.fn();
+export const enqueueSystemEventMock = vi.fn();
+export const requestHeartbeatNowMock = vi.fn();
+export const SMALL_PNG_BASE64 =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
+
+const hoisted = vi.hoisted(() => {
+  type BootstrapContext = {
+    bootstrapFiles: WorkspaceBootstrapFile[];
+    contextFiles: EmbeddedContextFile[];
+  };
+
+  return {
+    resolveBootstrapContextForRunMock: vi.fn<() => Promise<BootstrapContext>>(async () => ({
+      bootstrapFiles: [],
+      contextFiles: [],
+    })),
+  };
+});
+
+vi.mock("../process/supervisor/index.js", () => ({
+  getProcessSupervisor: () => ({
+    spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
+    cancel: vi.fn(),
+    cancelScope: vi.fn(),
+    reconcileOrphans: vi.fn(),
+    getRecord: vi.fn(),
+  }),
+}));
+
+vi.mock("../infra/system-events.js", () => ({
+  enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args),
+}));
+
+vi.mock("../infra/heartbeat-wake.js", () => ({
+  requestHeartbeatNow: (...args: unknown[]) => requestHeartbeatNowMock(...args),
+}));
+
+vi.mock("./bootstrap-files.js", () => ({
+  makeBootstrapWarn: () => () => {},
+  resolveBootstrapContextForRun: hoisted.resolveBootstrapContextForRunMock,
+}));
+
+type MockRunExit = {
+  reason:
+    | "manual-cancel"
+    | "overall-timeout"
+    | "no-output-timeout"
+    | "spawn-error"
+    | "signal"
+    | "exit";
+  exitCode: number | null;
+  exitSignal: NodeJS.Signals | number | null;
+  durationMs: number;
+  stdout: string;
+  stderr: string;
+  timedOut: boolean;
+  noOutputTimedOut: boolean;
+};
+
+type TestCliBackendConfig = {
+  command: string;
+  env?: Record<string, string>;
+  clearEnv?: string[];
+};
+
+export function createManagedRun(exit: MockRunExit, pid = 1234) {
+  return {
+    runId: "run-supervisor",
+    pid,
+    startedAtMs: Date.now(),
+    stdin: undefined,
+    wait: vi.fn().mockResolvedValue(exit),
+    cancel: vi.fn(),
+  };
+}
+
+export function mockSuccessfulCliRun() {
+  supervisorSpawnMock.mockResolvedValueOnce(
+    createManagedRun({
+      reason: "exit",
+      exitCode: 0,
+      exitSignal: null,
+      durationMs: 50,
+      stdout: "ok",
+      stderr: "",
+      timedOut: false,
+      noOutputTimedOut: false,
+    }),
+  );
+}
+
+export const EXISTING_CODEX_CONFIG = {
+  agents: {
+    defaults: {
+      cliBackends: {
+        "codex-cli": {
+          command: "codex",
+          args: ["exec", "--json"],
+          resumeArgs: ["exec", "resume", "{sessionId}", "--json"],
+          output: "text",
+          modelArg: "--model",
+          sessionMode: "existing",
+        },
+      },
+    },
+  },
+} satisfies OpenClawConfig;
+
+export async function setupCliRunnerTestModule() {
+  const registry = createEmptyPluginRegistry();
+  registry.cliBackends = [
+    {
+      pluginId: "anthropic",
+      backend: buildAnthropicCliBackend(),
+      source: "test",
+    },
+    {
+      pluginId: "openai",
+      backend: buildOpenAICodexCliBackend(),
+      source: "test",
+    },
+    {
+      pluginId: "google",
+      backend: buildGoogleGeminiCliBackend(),
+      source: "test",
+    },
+  ];
+  setActivePluginRegistry(registry);
+  supervisorSpawnMock.mockClear();
+  enqueueSystemEventMock.mockClear();
+  requestHeartbeatNowMock.mockClear();
+  hoisted.resolveBootstrapContextForRunMock.mockReset().mockResolvedValue({
+    bootstrapFiles: [],
+    contextFiles: [],
+  });
+
+  vi.resetModules();
+  vi.doMock("../process/supervisor/index.js", () => ({
+    getProcessSupervisor: () => ({
+      spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
+      cancel: vi.fn(),
+      cancelScope: vi.fn(),
+      reconcileOrphans: vi.fn(),
+      getRecord: vi.fn(),
+    }),
+  }));
+  vi.doMock("../infra/system-events.js", () => ({
+    enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args),
+  }));
+  vi.doMock("../infra/heartbeat-wake.js", () => ({
+    requestHeartbeatNow: (...args: unknown[]) => requestHeartbeatNowMock(...args),
+  }));
+  vi.doMock("./bootstrap-files.js", () => ({
+    makeBootstrapWarn: () => () => {},
+    resolveBootstrapContextForRun: hoisted.resolveBootstrapContextForRunMock,
+  }));
+  return (await import("./cli-runner.js")).runCliAgent;
+}
+
+export function stubBootstrapContext(params: {
+  bootstrapFiles: WorkspaceBootstrapFile[];
+  contextFiles: EmbeddedContextFile[];
+}) {
+  hoisted.resolveBootstrapContextForRunMock.mockResolvedValueOnce(params);
+}
+
+export async function runCliAgentWithBackendConfig(params: {
+  runCliAgent: typeof import("./cli-runner.js").runCliAgent;
+  backend: TestCliBackendConfig;
+  runId: string;
+}) {
+  await params.runCliAgent({
+    sessionId: "s1",
+    sessionFile: "/tmp/session.jsonl",
+    workspaceDir: "/tmp",
+    config: {
+      agents: {
+        defaults: {
+          cliBackends: {
+            "codex-cli": params.backend,
+          },
+        },
+      },
+    } satisfies OpenClawConfig,
+    prompt: "hi",
+    provider: "codex-cli",
+    model: "gpt-5.2-codex",
+    timeoutMs: 1_000,
+    runId: params.runId,
+    cliSessionId: "thread-123",
+  });
+}
+
+export async function runExistingCodexCliAgent(params: {
+  runCliAgent: typeof import("./cli-runner.js").runCliAgent;
+  runId: string;
+  cliSessionBindingAuthProfileId: string;
+  authProfileId: string;
+}) {
+  await params.runCliAgent({
+    sessionId: "s1",
+    sessionFile: "/tmp/session.jsonl",
+    workspaceDir: "/tmp",
+    config: EXISTING_CODEX_CONFIG,
+    prompt: "hi",
+    provider: "codex-cli",
+    model: "gpt-5.4",
+    timeoutMs: 1_000,
+    runId: params.runId,
+    cliSessionBinding: {
+      sessionId: "thread-123",
+      authProfileId: params.cliSessionBindingAuthProfileId,
+    },
+    authProfileId: params.authProfileId,
+  });
+}
+
+export async function withTempImageFile(
+  prefix: string,
+): Promise<{ tempDir: string; sourceImage: string }> {
+  const os = await import("node:os");
+  const path = await import("node:path");
+  const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
+  const sourceImage = path.join(tempDir, "image.png");
+  await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
+  return { tempDir, sourceImage };
+}
+
+beforeEach(() => {
+  vi.unstubAllEnvs();
+});
diff --git a/src/agents/cli-runner.test.ts b/src/agents/cli-runner.test.ts
deleted file mode 100644
index cd1ab119952..00000000000
--- a/src/agents/cli-runner.test.ts
+++ /dev/null
@@ -1,849 +0,0 @@
-import fs from "node:fs/promises";
-import os from "node:os";
-import path from "node:path";
-import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
-import { buildAnthropicCliBackend } from "../../extensions/anthropic/cli-backend.js";
-import { buildGoogleGeminiCliBackend } from "../../extensions/google/cli-backend.js";
-import { buildOpenAICodexCliBackend } from "../../extensions/openai/cli-backend.js";
-import type { OpenClawConfig } from "../config/config.js";
-import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
-import { createEmptyPluginRegistry } from "../plugins/registry.js";
-import { setActivePluginRegistry } from "../plugins/runtime.js";
-import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
-import type { EmbeddedContextFile } from "./pi-embedded-helpers.js";
-import type { WorkspaceBootstrapFile } from "./workspace.js";
-
-const supervisorSpawnMock = vi.fn();
-const enqueueSystemEventMock = vi.fn();
-const requestHeartbeatNowMock = vi.fn();
-const SMALL_PNG_BASE64 =
-  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
-const hoisted = vi.hoisted(() => {
-  type BootstrapContext = {
-    bootstrapFiles: WorkspaceBootstrapFile[];
-    contextFiles: EmbeddedContextFile[];
-  };
-
-  return {
-    resolveBootstrapContextForRunMock: vi.fn<() => Promise<BootstrapContext>>(async () => ({
-      bootstrapFiles: [],
-      contextFiles: [],
-    })),
-  };
-});
-
-vi.mock("../process/supervisor/index.js", () => ({
-  getProcessSupervisor: () => ({
-    spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
-    cancel: vi.fn(),
-    cancelScope: vi.fn(),
-    reconcileOrphans: vi.fn(),
-    getRecord: vi.fn(),
-  }),
-}));
-
-vi.mock("../infra/system-events.js", () => ({
-  enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args),
-}));
-
-vi.mock("../infra/heartbeat-wake.js", () => ({
-  requestHeartbeatNow: (...args: unknown[]) => requestHeartbeatNowMock(...args),
-}));
-
-vi.mock("./bootstrap-files.js", () => ({
-  makeBootstrapWarn: () => () => {},
-  resolveBootstrapContextForRun: hoisted.resolveBootstrapContextForRunMock,
-}));
-
-let runCliAgent: typeof import("./cli-runner.js").runCliAgent;
-
-async function loadFreshCliRunnerModuleForTest() {
-  vi.resetModules();
-  vi.doMock("../process/supervisor/index.js", () => ({
-    getProcessSupervisor: () => ({
-      spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
-      cancel: vi.fn(),
-      cancelScope: vi.fn(),
-      reconcileOrphans: vi.fn(),
-      getRecord: vi.fn(),
-    }),
-  }));
-  vi.doMock("../infra/system-events.js", () => ({
-    enqueueSystemEvent: (...args: unknown[]) => enqueueSystemEventMock(...args),
-  }));
-  vi.doMock("../infra/heartbeat-wake.js", () => ({
-    requestHeartbeatNow: (...args: unknown[]) => requestHeartbeatNowMock(...args),
-  }));
-  vi.doMock("./bootstrap-files.js", () => ({
-    makeBootstrapWarn: () => () => {},
-    resolveBootstrapContextForRun: hoisted.resolveBootstrapContextForRunMock,
-  }));
-  ({ runCliAgent } = await import("./cli-runner.js"));
-}
-
-type MockRunExit = {
-  reason:
-    | "manual-cancel"
-    | "overall-timeout"
-    | "no-output-timeout"
-    | "spawn-error"
-    | "signal"
-    | "exit";
-  exitCode: number | null;
-  exitSignal: NodeJS.Signals | number | null;
-  durationMs: number;
-  stdout: string;
-  stderr: string;
-  timedOut: boolean;
-  noOutputTimedOut: boolean;
-};
-
-type TestCliBackendConfig = {
-  command: string;
-  env?: Record<string, string>;
-  clearEnv?: string[];
-};
-
-function createManagedRun(exit: MockRunExit, pid = 1234) {
-  return {
-    runId: "run-supervisor",
-    pid,
-    startedAtMs: Date.now(),
-    stdin: undefined,
-    wait: vi.fn().mockResolvedValue(exit),
-    cancel: vi.fn(),
-  };
-}
-
-function mockSuccessfulCliRun() {
-  supervisorSpawnMock.mockResolvedValueOnce(
-    createManagedRun({
-      reason: "exit",
-      exitCode: 0,
-      exitSignal: null,
-      durationMs: 50,
-      stdout: "ok",
-      stderr: "",
-      timedOut: false,
-      noOutputTimedOut: false,
-    }),
-  );
-}
-
-async function runCliAgentWithBackendConfig(params: {
-  backend: TestCliBackendConfig;
-  runId: string;
-}) {
-  await runCliAgent({
-    sessionId: "s1",
-    sessionFile: "/tmp/session.jsonl",
-    workspaceDir: "/tmp",
-    config: {
-      agents: {
-        defaults: {
-          cliBackends: {
-            "codex-cli": params.backend,
-          },
-        },
-      },
-    } satisfies OpenClawConfig,
-    prompt: "hi",
-    provider: "codex-cli",
-    model: "gpt-5.2-codex",
-    timeoutMs: 1_000,
-    runId: params.runId,
-    cliSessionId: "thread-123",
-  });
-}
-
-const EXISTING_CODEX_CONFIG = {
-  agents: {
-    defaults: {
-      cliBackends: {
-        "codex-cli": {
-          command: "codex",
-          args: ["exec", "--json"],
-          resumeArgs: ["exec", "resume", "{sessionId}", "--json"],
-          output: "text",
-          modelArg: "--model",
-          sessionMode: "existing",
-        },
-      },
-    },
-  },
-} satisfies OpenClawConfig;
-
-async function runExistingCodexCliAgent(params: {
-  runId: string;
-  cliSessionBindingAuthProfileId: string;
-  authProfileId: string;
-}) {
-  await runCliAgent({
-    sessionId: "s1",
-    sessionFile: "/tmp/session.jsonl",
-    workspaceDir: "/tmp",
-    config: EXISTING_CODEX_CONFIG,
-    prompt: "hi",
-    provider: "codex-cli",
-    model: "gpt-5.4",
-    timeoutMs: 1_000,
-    runId: params.runId,
-    cliSessionBinding: {
-      sessionId: "thread-123",
-      authProfileId: params.cliSessionBindingAuthProfileId,
-    },
-    authProfileId: params.authProfileId,
-  });
-}
-
-describe("runCliAgent with process supervisor", () => {
-  afterEach(() => {
-    vi.unstubAllEnvs();
-  });
-
-  beforeEach(async () => {
-    const registry = createEmptyPluginRegistry();
-    registry.cliBackends = [
-      {
-        pluginId: "anthropic",
-        backend: buildAnthropicCliBackend(),
-        source: "test",
-      },
-      {
-        pluginId: "openai",
-        backend: buildOpenAICodexCliBackend(),
-        source: "test",
-      },
-      {
-        pluginId: "google",
-        backend: buildGoogleGeminiCliBackend(),
-        source: "test",
-      },
-    ];
-    setActivePluginRegistry(registry);
-    supervisorSpawnMock.mockClear();
-    enqueueSystemEventMock.mockClear();
-    requestHeartbeatNowMock.mockClear();
-    hoisted.resolveBootstrapContextForRunMock.mockReset().mockResolvedValue({
-      bootstrapFiles: [],
-      contextFiles: [],
-    });
-    await loadFreshCliRunnerModuleForTest();
-  });
-
-  it("does not inject hardcoded 'Tools are disabled' text into CLI arguments", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    await runCliAgent({
-      sessionId: "s1",
-      sessionFile: "/tmp/session.jsonl",
-      workspaceDir: "/tmp",
-      prompt: "Run: node script.mjs",
-      provider: "claude-cli",
-      model: "sonnet",
-      timeoutMs: 1_000,
-      runId: "run-no-tools-disabled",
-      extraSystemPrompt: "You are a helpful assistant.",
-    });
-
-    expect(supervisorSpawnMock).toHaveBeenCalledTimes(1);
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
-      argv?: string[];
-      input?: string;
-    };
-    // Use claude-cli because it defines systemPromptArg ("--append-system-prompt"),
-    // so the system prompt is serialized into argv. The codex-cli backend lacks
-    // systemPromptArg, meaning the prompt is dropped before reaching argv —
-    // making the assertion vacuous. See: openclaw/openclaw#44135
-    const allArgs = (input.argv ?? []).join("\n");
-    expect(allArgs).not.toContain("Tools are disabled in this session");
-    // Verify the user-supplied system prompt IS present (proves the arg path works)
-    expect(allArgs).toContain("You are a helpful assistant.");
-  });
-
-  it("injects a strict empty MCP config for bundle-MCP-enabled Claude CLI runs", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: JSON.stringify({
-          session_id: "session-123",
-          message: "ok",
-        }),
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    await runCliAgent({
-      sessionId: "s1",
-      sessionFile: "/tmp/session.jsonl",
-      workspaceDir: "/tmp",
-      config: {
-        agents: {
-          defaults: {
-            cliBackends: {
-              "claude-cli": {
-                command: "node",
-                args: ["/tmp/fake-claude.mjs"],
-                clearEnv: [],
-              },
-            },
-          },
-        },
-      } satisfies OpenClawConfig,
-      prompt: "hi",
-      provider: "claude-cli",
-      model: "claude-sonnet-4-6",
-      timeoutMs: 1_000,
-      runId: "run-bundle-mcp-empty",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
-    expect(input.argv?.[0]).toBe("node");
-    expect(input.argv).toContain("/tmp/fake-claude.mjs");
-    expect(input.argv).toContain("--strict-mcp-config");
-    const configFlagIndex = input.argv?.indexOf("--mcp-config") ?? -1;
-    expect(configFlagIndex).toBeGreaterThanOrEqual(0);
-    expect(input.argv?.[configFlagIndex + 1]).toMatch(/^\/.+\/mcp\.json$/);
-  });
-
-  it("runs CLI through supervisor and returns payload", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    const result = await runCliAgent({
-      sessionId: "s1",
-      sessionFile: "/tmp/session.jsonl",
-      workspaceDir: "/tmp",
-      prompt: "hi",
-      provider: "codex-cli",
-      model: "gpt-5.2-codex",
-      timeoutMs: 1_000,
-      runId: "run-1",
-      cliSessionId: "thread-123",
-    });
-
-    expect(result.payloads?.[0]?.text).toBe("ok");
-    expect(supervisorSpawnMock).toHaveBeenCalledTimes(1);
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
-      argv?: string[];
-      mode?: string;
-      timeoutMs?: number;
-      noOutputTimeoutMs?: number;
-      replaceExistingScope?: boolean;
-      scopeKey?: string;
-    };
-    expect(input.mode).toBe("child");
-    expect(input.argv?.[0]).toBe("codex");
-    expect(input.timeoutMs).toBe(1_000);
-    expect(input.noOutputTimeoutMs).toBeGreaterThanOrEqual(1_000);
-    expect(input.replaceExistingScope).toBe(true);
-    expect(input.scopeKey).toContain("thread-123");
-  });
-
-  it("keeps resuming the CLI across model changes and passes the new model flag", async () => {
-    mockSuccessfulCliRun();
-
-    await runExistingCodexCliAgent({
-      runId: "run-model-switch",
-      cliSessionBindingAuthProfileId: "openai:default",
-      authProfileId: "openai:default",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
-    expect(input.argv).toEqual([
-      "codex",
-      "exec",
-      "resume",
-      "thread-123",
-      "--json",
-      "--model",
-      "gpt-5.4",
-      "hi",
-    ]);
-  });
-
-  it("starts a fresh CLI session when the auth profile changes", async () => {
-    mockSuccessfulCliRun();
-
-    await runExistingCodexCliAgent({
-      runId: "run-auth-change",
-      cliSessionBindingAuthProfileId: "openai:work",
-      authProfileId: "openai:personal",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; scopeKey?: string };
-    expect(input.argv).toEqual(["codex", "exec", "--json", "--model", "gpt-5.4", "hi"]);
-    expect(input.scopeKey).toBeUndefined();
-  });
-
-  it("sanitizes dangerous backend env overrides before spawn", async () => {
-    vi.stubEnv("PATH", "/usr/bin:/bin");
-    vi.stubEnv("HOME", "/tmp/trusted-home");
-
-    mockSuccessfulCliRun();
-    await runCliAgentWithBackendConfig({
-      backend: {
-        command: "codex",
-        env: {
-          NODE_OPTIONS: "--require ./malicious.js",
-          LD_PRELOAD: "/tmp/pwn.so",
-          PATH: "/tmp/evil",
-          HOME: "/tmp/evil-home",
-          SAFE_KEY: "ok",
-        },
-      },
-      runId: "run-env-sanitized",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
-      env?: Record<string, string | undefined>;
-    };
-    expect(input.env?.SAFE_KEY).toBe("ok");
-    expect(input.env?.PATH).toBe("/usr/bin:/bin");
-    expect(input.env?.HOME).toBe("/tmp/trusted-home");
-    expect(input.env?.NODE_OPTIONS).toBeUndefined();
-    expect(input.env?.LD_PRELOAD).toBeUndefined();
-  });
-
-  it("applies clearEnv after sanitizing backend env overrides", async () => {
-    vi.stubEnv("PATH", "/usr/bin:/bin");
-    vi.stubEnv("SAFE_CLEAR", "from-base");
-
-    mockSuccessfulCliRun();
-    await runCliAgentWithBackendConfig({
-      backend: {
-        command: "codex",
-        env: {
-          SAFE_KEEP: "keep-me",
-        },
-        clearEnv: ["SAFE_CLEAR"],
-      },
-      runId: "run-clear-env",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
-      env?: Record<string, string | undefined>;
-    };
-    expect(input.env?.SAFE_KEEP).toBe("keep-me");
-    expect(input.env?.SAFE_CLEAR).toBeUndefined();
-  });
-
-  it("prepends bootstrap warnings to the CLI prompt body", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-    hoisted.resolveBootstrapContextForRunMock.mockResolvedValueOnce({
-      bootstrapFiles: [
-        {
-          name: "AGENTS.md",
-          path: "/tmp/AGENTS.md",
-          content: "A".repeat(200),
-          missing: false,
-        },
-      ],
-      contextFiles: [{ path: "AGENTS.md", content: "A".repeat(20) }],
-    });
-
-    await runCliAgent({
-      sessionId: "s1",
-      sessionFile: "/tmp/session.jsonl",
-      workspaceDir: "/tmp",
-      config: {
-        agents: {
-          defaults: {
-            bootstrapMaxChars: 50,
-            bootstrapTotalMaxChars: 50,
-          },
-        },
-      } satisfies OpenClawConfig,
-      prompt: "hi",
-      provider: "codex-cli",
-      model: "gpt-5.2-codex",
-      timeoutMs: 1_000,
-      runId: "run-warning",
-      cliSessionId: "thread-123",
-    });
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
-      argv?: string[];
-      input?: string;
-    };
-    const promptCarrier = [input.input ?? "", ...(input.argv ?? [])].join("\n");
-
-    expect(promptCarrier).toContain("[Bootstrap truncation warning]");
-    expect(promptCarrier).toContain("- AGENTS.md: 200 raw -> 20 injected");
-    expect(promptCarrier).toContain("hi");
-  });
-
-  it("hydrates prompt media refs into CLI image args", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    const tempDir = await fs.mkdtemp(
-      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-"),
-    );
-    const sourceImage = path.join(tempDir, "bb-image.png");
-    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
-
-    try {
-      await runCliAgent({
-        sessionId: "s1",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: tempDir,
-        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-prompt-image",
-      });
-    } finally {
-      await fs.rm(tempDir, { recursive: true, force: true });
-    }
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
-    const argv = input.argv ?? [];
-    const imageArgIndex = argv.indexOf("--image");
-    expect(imageArgIndex).toBeGreaterThanOrEqual(0);
-    expect(argv[imageArgIndex + 1]).toContain("openclaw-cli-images-");
-    expect(argv[imageArgIndex + 1]).not.toBe(sourceImage);
-  });
-
-  it("appends hydrated prompt media refs to generic backend prompts", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    const tempDir = await fs.mkdtemp(
-      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-prompt-image-generic-"),
-    );
-    const sourceImage = path.join(tempDir, "claude-image.png");
-    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
-
-    try {
-      await runCliAgent({
-        sessionId: "s1",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: tempDir,
-        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
-        provider: "claude-cli",
-        model: "claude-opus-4-1",
-        timeoutMs: 1_000,
-        runId: "run-prompt-image-generic",
-      });
-    } finally {
-      await fs.rm(tempDir, { recursive: true, force: true });
-    }
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; input?: string };
-    const argv = input.argv ?? [];
-    expect(argv).not.toContain("--image");
-    const promptCarrier = [input.input ?? "", ...argv].join("\n");
-    const appendedPath = argv.find((value) => value.includes("openclaw-cli-images-"));
-    expect(appendedPath).toBeDefined();
-    expect(appendedPath).not.toBe(sourceImage);
-    expect(promptCarrier).toContain(appendedPath ?? "");
-  });
-
-  it("prefers explicit images over prompt refs", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 50,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    const tempDir = await fs.mkdtemp(
-      path.join(resolvePreferredOpenClawTmpDir(), "openclaw-cli-explicit-images-"),
-    );
-    const sourceImage = path.join(tempDir, "ignored-prompt-image.png");
-    await fs.writeFile(sourceImage, Buffer.from(SMALL_PNG_BASE64, "base64"));
-
-    try {
-      await runCliAgent({
-        sessionId: "s1",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: tempDir,
-        prompt: `[media attached: ${sourceImage} (image/png)]\n\n<media:image>`,
-        images: [{ type: "image", data: SMALL_PNG_BASE64, mimeType: "image/png" }],
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-explicit-image-precedence",
-      });
-    } finally {
-      await fs.rm(tempDir, { recursive: true, force: true });
-    }
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[] };
-    const argv = input.argv ?? [];
-    expect(argv.filter((arg) => arg === "--image")).toHaveLength(1);
-  });
-
-  it("fails with timeout when no-output watchdog trips", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "no-output-timeout",
-        exitCode: null,
-        exitSignal: "SIGKILL",
-        durationMs: 200,
-        stdout: "",
-        stderr: "",
-        timedOut: true,
-        noOutputTimedOut: true,
-      }),
-    );
-
-    await expect(
-      runCliAgent({
-        sessionId: "s1",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: "/tmp",
-        prompt: "hi",
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-2",
-        cliSessionId: "thread-123",
-      }),
-    ).rejects.toThrow("produced no output");
-  });
-
-  it("enqueues a system event and heartbeat wake on no-output watchdog timeout for session runs", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "no-output-timeout",
-        exitCode: null,
-        exitSignal: "SIGKILL",
-        durationMs: 200,
-        stdout: "",
-        stderr: "",
-        timedOut: true,
-        noOutputTimedOut: true,
-      }),
-    );
-
-    await expect(
-      runCliAgent({
-        sessionId: "s1",
-        sessionKey: "agent:main:main",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: "/tmp",
-        prompt: "hi",
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-2b",
-        cliSessionId: "thread-123",
-      }),
-    ).rejects.toThrow("produced no output");
-
-    expect(enqueueSystemEventMock).toHaveBeenCalledTimes(1);
-    const [notice, opts] = enqueueSystemEventMock.mock.calls[0] ?? [];
-    expect(String(notice)).toContain("produced no output");
-    expect(String(notice)).toContain("interactive input or an approval prompt");
-    expect(opts).toMatchObject({ sessionKey: "agent:main:main" });
-    expect(requestHeartbeatNowMock).toHaveBeenCalledWith({
-      reason: "cli:watchdog:stall",
-      sessionKey: "agent:main:main",
-    });
-  });
-
-  it("fails with timeout when overall timeout trips", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "overall-timeout",
-        exitCode: null,
-        exitSignal: "SIGKILL",
-        durationMs: 200,
-        stdout: "",
-        stderr: "",
-        timedOut: true,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    await expect(
-      runCliAgent({
-        sessionId: "s1",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: "/tmp",
-        prompt: "hi",
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-3",
-        cliSessionId: "thread-123",
-      }),
-    ).rejects.toThrow("exceeded timeout");
-  });
-
-  it("rethrows the retry failure when session-expired recovery retry also fails", async () => {
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 1,
-        exitSignal: null,
-        durationMs: 150,
-        stdout: "",
-        stderr: "session expired",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 1,
-        exitSignal: null,
-        durationMs: 150,
-        stdout: "",
-        stderr: "rate limit exceeded",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    await expect(
-      runCliAgent({
-        sessionId: "s1",
-        sessionKey: "agent:main:subagent:retry",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: "/tmp",
-        prompt: "hi",
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-retry-failure",
-        cliSessionId: "thread-123",
-      }),
-    ).rejects.toThrow("rate limit exceeded");
-
-    expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
-  });
-
-  it("falls back to per-agent workspace when workspaceDir is missing", async () => {
-    const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
-    const fallbackWorkspace = path.join(tempDir, "workspace-main");
-    await fs.mkdir(fallbackWorkspace, { recursive: true });
-    const cfg = {
-      agents: {
-        defaults: {
-          workspace: fallbackWorkspace,
-        },
-      },
-    } satisfies OpenClawConfig;
-
-    supervisorSpawnMock.mockResolvedValueOnce(
-      createManagedRun({
-        reason: "exit",
-        exitCode: 0,
-        exitSignal: null,
-        durationMs: 25,
-        stdout: "ok",
-        stderr: "",
-        timedOut: false,
-        noOutputTimedOut: false,
-      }),
-    );
-
-    try {
-      await runCliAgent({
-        sessionId: "s1",
-        sessionKey: "agent:main:subagent:missing-workspace",
-        sessionFile: "/tmp/session.jsonl",
-        workspaceDir: undefined as unknown as string,
-        config: cfg,
-        prompt: "hi",
-        provider: "codex-cli",
-        model: "gpt-5.2-codex",
-        timeoutMs: 1_000,
-        runId: "run-4",
-      });
-    } finally {
-      await fs.rm(tempDir, { recursive: true, force: true });
-    }
-
-    const input = supervisorSpawnMock.mock.calls[0]?.[0] as { cwd?: string };
-    expect(input.cwd).toBe(path.resolve(fallbackWorkspace));
-  });
-});
-
-describe("resolveCliNoOutputTimeoutMs", () => {
-  it("uses backend-configured resume watchdog override", () => {
-    const timeoutMs = resolveCliNoOutputTimeoutMs({
-      backend: {
-        command: "codex",
-        reliability: {
-          watchdog: {
-            resume: {
-              noOutputTimeoutMs: 42_000,
-            },
-          },
-        },
-      },
-      timeoutMs: 120_000,
-      useResume: true,
-    });
-    expect(timeoutMs).toBe(42_000);
-  });
-});
diff --git a/src/agents/cli-runner.ts b/src/agents/cli-runner.ts
index 55a2d2ad803..25fed7fc20a 100644
--- a/src/agents/cli-runner.ts
+++ b/src/agents/cli-runner.ts
@@ -1,448 +1,18 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
-import { resolveHeartbeatPrompt } from "../auto-reply/heartbeat.js";
 import type { ThinkLevel } from "../auto-reply/thinking.js";
 import type { OpenClawConfig } from "../config/config.js";
-import type { CliSessionBinding } from "../config/sessions.js";
-import { shouldLogVerbose } from "../globals.js";
-import { isTruthyEnvValue } from "../infra/env.js";
-import { requestHeartbeatNow } from "../infra/heartbeat-wake.js";
-import { sanitizeHostExecEnv } from "../infra/host-env-security.js";
-import { enqueueSystemEvent } from "../infra/system-events.js";
-import { createSubsystemLogger } from "../logging/subsystem.js";
-import { getProcessSupervisor } from "../process/supervisor/index.js";
-import { scopedHeartbeatWakeOptions } from "../routing/session-key.js";
-import { resolveSessionAgentIds } from "./agent-scope.js";
-import {
-  analyzeBootstrapBudget,
-  buildBootstrapInjectionStats,
-  buildBootstrapPromptWarning,
-  buildBootstrapTruncationReportMeta,
-  prependBootstrapPromptWarning,
-} from "./bootstrap-budget.js";
-import { makeBootstrapWarn, resolveBootstrapContextForRun } from "./bootstrap-files.js";
-import { resolveCliBackendConfig } from "./cli-backends.js";
-import { prepareCliBundleMcpConfig } from "./cli-runner/bundle-mcp.js";
-import {
-  appendImagePathsToPrompt,
-  buildCliSupervisorScopeKey,
-  buildCliArgs,
-  buildSystemPrompt,
-  enqueueCliRun,
-  loadPromptRefImages,
-  normalizeCliModel,
-  parseCliJson,
-  parseCliJsonl,
-  resolveCliNoOutputTimeoutMs,
-  resolvePromptInput,
-  resolveSessionIdToSend,
-  resolveSystemPromptUsage,
-  writeCliImages,
-} from "./cli-runner/helpers.js";
-import { hashCliSessionText, resolveCliSessionReuse } from "./cli-session.js";
-import { resolveOpenClawDocsPath } from "./docs-path.js";
+import { executePreparedCliRun } from "./cli-runner/execute.js";
+import { prepareCliRunContext } from "./cli-runner/prepare.js";
+import type { RunCliAgentParams } from "./cli-runner/types.js";
 import { FailoverError, resolveFailoverStatus } from "./failover-error.js";
-import {
-  classifyFailoverReason,
-  isFailoverErrorMessage,
-  resolveBootstrapMaxChars,
-  resolveBootstrapPromptTruncationWarningMode,
-  resolveBootstrapTotalMaxChars,
-} from "./pi-embedded-helpers.js";
+import { classifyFailoverReason, isFailoverErrorMessage } from "./pi-embedded-helpers.js";
 import type { EmbeddedPiRunResult } from "./pi-embedded-runner.js";
-import { buildSystemPromptReport } from "./system-prompt-report.js";
-import { redactRunIdentifier, resolveRunWorkspaceDir } from "./workspace-run.js";
 
-const log = createSubsystemLogger("agent/cli-backend");
-const CLI_BACKEND_LOG_OUTPUT_ENV = "OPENCLAW_CLI_BACKEND_LOG_OUTPUT";
-const LEGACY_CLAUDE_CLI_LOG_OUTPUT_ENV = "OPENCLAW_CLAUDE_CLI_LOG_OUTPUT";
-
-export async function runCliAgent(params: {
-  sessionId: string;
-  sessionKey?: string;
-  agentId?: string;
-  sessionFile: string;
-  workspaceDir: string;
-  config?: OpenClawConfig;
-  prompt: string;
-  provider: string;
-  model?: string;
-  thinkLevel?: ThinkLevel;
-  timeoutMs: number;
-  runId: string;
-  extraSystemPrompt?: string;
-  streamParams?: import("./command/types.js").AgentStreamParams;
-  ownerNumbers?: string[];
-  cliSessionId?: string;
-  cliSessionBinding?: CliSessionBinding;
-  authProfileId?: string;
-  bootstrapPromptWarningSignaturesSeen?: string[];
-  /** Backward-compat fallback when only the previous signature is available. */
-  bootstrapPromptWarningSignature?: string;
-  images?: ImageContent[];
-}): Promise<EmbeddedPiRunResult> {
-  const started = Date.now();
-  const workspaceResolution = resolveRunWorkspaceDir({
-    workspaceDir: params.workspaceDir,
-    sessionKey: params.sessionKey,
-    agentId: params.agentId,
-    config: params.config,
-  });
-  const resolvedWorkspace = workspaceResolution.workspaceDir;
-  const redactedSessionId = redactRunIdentifier(params.sessionId);
-  const redactedSessionKey = redactRunIdentifier(params.sessionKey);
-  const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
-  if (workspaceResolution.usedFallback) {
-    log.warn(
-      `[workspace-fallback] caller=runCliAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
-    );
-  }
-  const workspaceDir = resolvedWorkspace;
-
-  const backendResolved = resolveCliBackendConfig(params.provider, params.config);
-  if (!backendResolved) {
-    throw new Error(`Unknown CLI backend: ${params.provider}`);
-  }
-  const preparedBackend = await prepareCliBundleMcpConfig({
-    enabled: backendResolved.bundleMcp,
-    backend: backendResolved.config,
-    workspaceDir,
-    config: params.config,
-    warn: (message) => log.warn(message),
-  });
-  const backend = preparedBackend.backend;
-  const extraSystemPromptHash = hashCliSessionText(params.extraSystemPrompt);
-  const reusableCliSession = resolveCliSessionReuse({
-    binding:
-      params.cliSessionBinding ??
-      (params.cliSessionId ? { sessionId: params.cliSessionId } : undefined),
-    authProfileId: params.authProfileId,
-    extraSystemPromptHash,
-    mcpConfigHash: preparedBackend.mcpConfigHash,
-  });
-  if (reusableCliSession.invalidatedReason) {
-    log.info(
-      `cli session reset: provider=${params.provider} reason=${reusableCliSession.invalidatedReason}`,
-    );
-  }
-  const modelId = (params.model ?? "default").trim() || "default";
-  const normalizedModel = normalizeCliModel(modelId, backend);
-  const modelDisplay = `${params.provider}/${modelId}`;
-
-  const extraSystemPrompt = params.extraSystemPrompt?.trim() ?? "";
-
-  const sessionLabel = params.sessionKey ?? params.sessionId;
-  const { bootstrapFiles, contextFiles } = await resolveBootstrapContextForRun({
-    workspaceDir,
-    config: params.config,
-    sessionKey: params.sessionKey,
-    sessionId: params.sessionId,
-    warn: makeBootstrapWarn({ sessionLabel, warn: (message) => log.warn(message) }),
-  });
-  const bootstrapMaxChars = resolveBootstrapMaxChars(params.config);
-  const bootstrapTotalMaxChars = resolveBootstrapTotalMaxChars(params.config);
-  const bootstrapAnalysis = analyzeBootstrapBudget({
-    files: buildBootstrapInjectionStats({
-      bootstrapFiles,
-      injectedFiles: contextFiles,
-    }),
-    bootstrapMaxChars,
-    bootstrapTotalMaxChars,
-  });
-  const bootstrapPromptWarningMode = resolveBootstrapPromptTruncationWarningMode(params.config);
-  const bootstrapPromptWarning = buildBootstrapPromptWarning({
-    analysis: bootstrapAnalysis,
-    mode: bootstrapPromptWarningMode,
-    seenSignatures: params.bootstrapPromptWarningSignaturesSeen,
-    previousSignature: params.bootstrapPromptWarningSignature,
-  });
-  const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({
-    sessionKey: params.sessionKey,
-    config: params.config,
-    agentId: params.agentId,
-  });
-  const heartbeatPrompt =
-    sessionAgentId === defaultAgentId
-      ? resolveHeartbeatPrompt(params.config?.agents?.defaults?.heartbeat?.prompt)
-      : undefined;
-  const docsPath = await resolveOpenClawDocsPath({
-    workspaceDir,
-    argv1: process.argv[1],
-    cwd: process.cwd(),
-    moduleUrl: import.meta.url,
-  });
-  const systemPrompt = buildSystemPrompt({
-    workspaceDir,
-    config: params.config,
-    defaultThinkLevel: params.thinkLevel,
-    extraSystemPrompt,
-    ownerNumbers: params.ownerNumbers,
-    heartbeatPrompt,
-    docsPath: docsPath ?? undefined,
-    tools: [],
-    contextFiles,
-    modelDisplay,
-    agentId: sessionAgentId,
-  });
-  const systemPromptReport = buildSystemPromptReport({
-    source: "run",
-    generatedAt: Date.now(),
-    sessionId: params.sessionId,
-    sessionKey: params.sessionKey,
-    provider: params.provider,
-    model: modelId,
-    workspaceDir,
-    bootstrapMaxChars,
-    bootstrapTotalMaxChars,
-    bootstrapTruncation: buildBootstrapTruncationReportMeta({
-      analysis: bootstrapAnalysis,
-      warningMode: bootstrapPromptWarningMode,
-      warning: bootstrapPromptWarning,
-    }),
-    sandbox: { mode: "off", sandboxed: false },
-    systemPrompt,
-    bootstrapFiles,
-    injectedFiles: contextFiles,
-    skillsPrompt: "",
-    tools: [],
-  });
-
-  // Helper function to execute CLI with given session ID
-  const executeCliWithSession = async (
-    cliSessionIdToUse?: string,
-  ): Promise<{
-    text: string;
-    sessionId?: string;
-    usage?: {
-      input?: number;
-      output?: number;
-      cacheRead?: number;
-      cacheWrite?: number;
-      total?: number;
-    };
-  }> => {
-    const { sessionId: resolvedSessionId, isNew } = resolveSessionIdToSend({
-      backend,
-      cliSessionId: cliSessionIdToUse,
-    });
-    const useResume = Boolean(
-      cliSessionIdToUse && resolvedSessionId && backend.resumeArgs && backend.resumeArgs.length > 0,
-    );
-    const systemPromptArg = resolveSystemPromptUsage({
-      backend,
-      isNewSession: isNew,
-      systemPrompt,
-    });
-
-    let imagePaths: string[] | undefined;
-    let cleanupImages: (() => Promise<void>) | undefined;
-    let prompt = prependBootstrapPromptWarning(params.prompt, bootstrapPromptWarning.lines, {
-      preserveExactPrompt: heartbeatPrompt,
-    });
-    const resolvedImages =
-      params.images && params.images.length > 0
-        ? params.images
-        : await loadPromptRefImages({ prompt, workspaceDir });
-    if (resolvedImages.length > 0) {
-      const imagePayload = await writeCliImages(resolvedImages);
-      imagePaths = imagePayload.paths;
-      cleanupImages = imagePayload.cleanup;
-      if (!backend.imageArg) {
-        prompt = appendImagePathsToPrompt(prompt, imagePaths);
-      }
-    }
-
-    const { argsPrompt, stdin } = resolvePromptInput({
-      backend,
-      prompt,
-    });
-    const stdinPayload = stdin ?? "";
-    const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
-    const resolvedArgs = useResume
-      ? baseArgs.map((entry) => entry.replaceAll("{sessionId}", resolvedSessionId ?? ""))
-      : baseArgs;
-    const args = buildCliArgs({
-      backend,
-      baseArgs: resolvedArgs,
-      modelId: normalizedModel,
-      sessionId: resolvedSessionId,
-      systemPrompt: systemPromptArg,
-      imagePaths,
-      promptArg: argsPrompt,
-      useResume,
-    });
-
-    const serialize = backend.serialize ?? true;
-    const queueKey = serialize ? backendResolved.id : `${backendResolved.id}:${params.runId}`;
-
-    try {
-      const output = await enqueueCliRun(queueKey, async () => {
-        log.info(
-          `cli exec: provider=${params.provider} model=${normalizedModel} promptChars=${params.prompt.length}`,
-        );
-        const logOutputText =
-          isTruthyEnvValue(process.env[CLI_BACKEND_LOG_OUTPUT_ENV]) ||
-          isTruthyEnvValue(process.env[LEGACY_CLAUDE_CLI_LOG_OUTPUT_ENV]);
-        if (logOutputText) {
-          const logArgs: string[] = [];
-          for (let i = 0; i < args.length; i += 1) {
-            const arg = args[i] ?? "";
-            if (arg === backend.systemPromptArg) {
-              const systemPromptValue = args[i + 1] ?? "";
-              logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
-              i += 1;
-              continue;
-            }
-            if (arg === backend.sessionArg) {
-              logArgs.push(arg, args[i + 1] ?? "");
-              i += 1;
-              continue;
-            }
-            if (arg === backend.modelArg) {
-              logArgs.push(arg, args[i + 1] ?? "");
-              i += 1;
-              continue;
-            }
-            if (arg === backend.imageArg) {
-              logArgs.push(arg, "<image>");
-              i += 1;
-              continue;
-            }
-            logArgs.push(arg);
-          }
-          if (argsPrompt) {
-            const promptIndex = logArgs.indexOf(argsPrompt);
-            if (promptIndex >= 0) {
-              logArgs[promptIndex] = `<prompt:${argsPrompt.length} chars>`;
-            }
-          }
-          log.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
-        }
-
-        const env = (() => {
-          const next = sanitizeHostExecEnv({
-            baseEnv: process.env,
-            overrides: backend.env,
-            blockPathOverrides: true,
-          });
-          for (const key of backend.clearEnv ?? []) {
-            delete next[key];
-          }
-          return next;
-        })();
-        const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
-          backend,
-          timeoutMs: params.timeoutMs,
-          useResume,
-        });
-        const supervisor = getProcessSupervisor();
-        const scopeKey = buildCliSupervisorScopeKey({
-          backend,
-          backendId: backendResolved.id,
-          cliSessionId: useResume ? resolvedSessionId : undefined,
-        });
-
-        const managedRun = await supervisor.spawn({
-          sessionId: params.sessionId,
-          backendId: backendResolved.id,
-          scopeKey,
-          replaceExistingScope: Boolean(useResume && scopeKey),
-          mode: "child",
-          argv: [backend.command, ...args],
-          timeoutMs: params.timeoutMs,
-          noOutputTimeoutMs,
-          cwd: workspaceDir,
-          env,
-          input: stdinPayload,
-        });
-        const result = await managedRun.wait();
-
-        const stdout = result.stdout.trim();
-        const stderr = result.stderr.trim();
-        if (logOutputText) {
-          if (stdout) {
-            log.info(`cli stdout:\n${stdout}`);
-          }
-          if (stderr) {
-            log.info(`cli stderr:\n${stderr}`);
-          }
-        }
-        if (shouldLogVerbose()) {
-          if (stdout) {
-            log.debug(`cli stdout:\n${stdout}`);
-          }
-          if (stderr) {
-            log.debug(`cli stderr:\n${stderr}`);
-          }
-        }
-
-        if (result.exitCode !== 0 || result.reason !== "exit") {
-          if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
-            const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
-            log.warn(
-              `cli watchdog timeout: provider=${params.provider} model=${modelId} session=${resolvedSessionId ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
-            );
-            if (params.sessionKey) {
-              const stallNotice = [
-                `CLI agent (${params.provider}) produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`,
-                "It may have been waiting for interactive input or an approval prompt.",
-                "For Claude Code, prefer --permission-mode bypassPermissions --print.",
-              ].join(" ");
-              enqueueSystemEvent(stallNotice, { sessionKey: params.sessionKey });
-              requestHeartbeatNow(
-                scopedHeartbeatWakeOptions(params.sessionKey, { reason: "cli:watchdog:stall" }),
-              );
-            }
-            throw new FailoverError(timeoutReason, {
-              reason: "timeout",
-              provider: params.provider,
-              model: modelId,
-              status: resolveFailoverStatus("timeout"),
-            });
-          }
-          if (result.reason === "overall-timeout") {
-            const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
-            throw new FailoverError(timeoutReason, {
-              reason: "timeout",
-              provider: params.provider,
-              model: modelId,
-              status: resolveFailoverStatus("timeout"),
-            });
-          }
-          const err = stderr || stdout || "CLI failed.";
-          const reason = classifyFailoverReason(err) ?? "unknown";
-          const status = resolveFailoverStatus(reason);
-          throw new FailoverError(err, {
-            reason,
-            provider: params.provider,
-            model: modelId,
-            status,
-          });
-        }
-
-        const outputMode = useResume ? (backend.resumeOutput ?? backend.output) : backend.output;
-
-        if (outputMode === "text") {
-          return { text: stdout, sessionId: resolvedSessionId };
-        }
-        if (outputMode === "jsonl") {
-          const parsed = parseCliJsonl(stdout, backend);
-          return parsed ?? { text: stdout };
-        }
-
-        const parsed = parseCliJson(stdout, backend);
-        return parsed ?? { text: stdout };
-      });
-
-      return output;
-    } finally {
-      if (cleanupImages) {
-        await cleanupImages();
-      }
-    }
-  };
+export async function runCliAgent(params: RunCliAgentParams): Promise<EmbeddedPiRunResult> {
+  const context = await prepareCliRunContext(params);
 
   const buildCliRunResult = (resultParams: {
-    output: Awaited<ReturnType<typeof executeCliWithSession>>;
+    output: Awaited<ReturnType<typeof executePreparedCliRun>>;
     effectiveCliSessionId?: string;
   }): EmbeddedPiRunResult => {
     const text = resultParams.output.text?.trim();
@@ -451,21 +21,23 @@ export async function runCliAgent(params: {
     return {
       payloads,
       meta: {
-        durationMs: Date.now() - started,
-        systemPromptReport,
+        durationMs: Date.now() - context.started,
+        systemPromptReport: context.systemPromptReport,
         agentMeta: {
           sessionId: resultParams.effectiveCliSessionId ?? params.sessionId ?? "",
           provider: params.provider,
-          model: modelId,
+          model: context.modelId,
           usage: resultParams.output.usage,
           ...(resultParams.effectiveCliSessionId
             ? {
                 cliSessionBinding: {
                   sessionId: resultParams.effectiveCliSessionId,
                   ...(params.authProfileId ? { authProfileId: params.authProfileId } : {}),
-                  ...(extraSystemPromptHash ? { extraSystemPromptHash } : {}),
-                  ...(preparedBackend.mcpConfigHash
-                    ? { mcpConfigHash: preparedBackend.mcpConfigHash }
+                  ...(context.extraSystemPromptHash
+                    ? { extraSystemPromptHash: context.extraSystemPromptHash }
+                    : {}),
+                  ...(context.preparedBackend.mcpConfigHash
+                    ? { mcpConfigHash: context.preparedBackend.mcpConfigHash }
                     : {}),
                 },
               }
@@ -478,23 +50,23 @@ export async function runCliAgent(params: {
   // Try with the provided CLI session ID first
   try {
     try {
-      const output = await executeCliWithSession(reusableCliSession.sessionId);
-      const effectiveCliSessionId = output.sessionId ?? reusableCliSession.sessionId;
+      const output = await executePreparedCliRun(context, context.reusableCliSession.sessionId);
+      const effectiveCliSessionId = output.sessionId ?? context.reusableCliSession.sessionId;
       return buildCliRunResult({ output, effectiveCliSessionId });
     } catch (err) {
       if (err instanceof FailoverError) {
         // Check if this is a session expired error and we have a session to clear
-        if (err.reason === "session_expired" && reusableCliSession.sessionId && params.sessionKey) {
-          log.warn(
-            `CLI session expired, clearing session ID and retrying: provider=${params.provider} session=${redactRunIdentifier(reusableCliSession.sessionId)}`,
-          );
-
+        if (
+          err.reason === "session_expired" &&
+          context.reusableCliSession.sessionId &&
+          params.sessionKey
+        ) {
           // Clear the expired session ID from the session entry
           // This requires access to the session store, which we don't have here
           // We'll need to modify the caller to handle this case
 
           // For now, retry without the session ID to create a new session
-          const output = await executeCliWithSession(undefined);
+          const output = await executePreparedCliRun(context, undefined);
           const effectiveCliSessionId = output.sessionId;
           return buildCliRunResult({ output, effectiveCliSessionId });
         }
@@ -507,14 +79,14 @@ export async function runCliAgent(params: {
         throw new FailoverError(message, {
           reason,
           provider: params.provider,
-          model: modelId,
+          model: context.modelId,
           status,
         });
       }
       throw err;
     }
   } finally {
-    await preparedBackend.cleanup?.();
+    await context.preparedBackend.cleanup?.();
   }
 }
 
diff --git a/src/agents/cli-runner/execute.ts b/src/agents/cli-runner/execute.ts
new file mode 100644
index 00000000000..fb92657157f
--- /dev/null
+++ b/src/agents/cli-runner/execute.ts
@@ -0,0 +1,270 @@
+import { shouldLogVerbose } from "../../globals.js";
+import { isTruthyEnvValue } from "../../infra/env.js";
+import { requestHeartbeatNow } from "../../infra/heartbeat-wake.js";
+import { sanitizeHostExecEnv } from "../../infra/host-env-security.js";
+import { enqueueSystemEvent } from "../../infra/system-events.js";
+import { getProcessSupervisor } from "../../process/supervisor/index.js";
+import { scopedHeartbeatWakeOptions } from "../../routing/session-key.js";
+import { prependBootstrapPromptWarning } from "../bootstrap-budget.js";
+import { parseCliOutput, type CliOutput } from "../cli-output.js";
+import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
+import { classifyFailoverReason } from "../pi-embedded-helpers.js";
+import {
+  appendImagePathsToPrompt,
+  buildCliSupervisorScopeKey,
+  buildCliArgs,
+  enqueueCliRun,
+  loadPromptRefImages,
+  resolveCliNoOutputTimeoutMs,
+  resolvePromptInput,
+  resolveSessionIdToSend,
+  resolveSystemPromptUsage,
+  writeCliImages,
+} from "./helpers.js";
+import {
+  cliBackendLog,
+  CLI_BACKEND_LOG_OUTPUT_ENV,
+  LEGACY_CLAUDE_CLI_LOG_OUTPUT_ENV,
+} from "./log.js";
+import type { PreparedCliRunContext } from "./types.js";
+
+function buildCliLogArgs(params: {
+  args: string[];
+  systemPromptArg?: string;
+  sessionArg?: string;
+  modelArg?: string;
+  imageArg?: string;
+  argsPrompt?: string;
+}): string[] {
+  const logArgs: string[] = [];
+  for (let i = 0; i < params.args.length; i += 1) {
+    const arg = params.args[i] ?? "";
+    if (arg === params.systemPromptArg) {
+      const systemPromptValue = params.args[i + 1] ?? "";
+      logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
+      i += 1;
+      continue;
+    }
+    if (arg === params.sessionArg) {
+      logArgs.push(arg, params.args[i + 1] ?? "");
+      i += 1;
+      continue;
+    }
+    if (arg === params.modelArg) {
+      logArgs.push(arg, params.args[i + 1] ?? "");
+      i += 1;
+      continue;
+    }
+    if (arg === params.imageArg) {
+      logArgs.push(arg, "<image>");
+      i += 1;
+      continue;
+    }
+    logArgs.push(arg);
+  }
+  if (params.argsPrompt) {
+    const promptIndex = logArgs.indexOf(params.argsPrompt);
+    if (promptIndex >= 0) {
+      logArgs[promptIndex] = `<prompt:${params.argsPrompt.length} chars>`;
+    }
+  }
+  return logArgs;
+}
+
+export async function executePreparedCliRun(
+  context: PreparedCliRunContext,
+  cliSessionIdToUse?: string,
+): Promise<CliOutput> {
+  const params = context.params;
+  const backend = context.preparedBackend.backend;
+  const { sessionId: resolvedSessionId, isNew } = resolveSessionIdToSend({
+    backend,
+    cliSessionId: cliSessionIdToUse,
+  });
+  const useResume = Boolean(
+    cliSessionIdToUse && resolvedSessionId && backend.resumeArgs && backend.resumeArgs.length > 0,
+  );
+  const systemPromptArg = resolveSystemPromptUsage({
+    backend,
+    isNewSession: isNew,
+    systemPrompt: context.systemPrompt,
+  });
+
+  let imagePaths: string[] | undefined;
+  let cleanupImages: (() => Promise<void>) | undefined;
+  let prompt = prependBootstrapPromptWarning(params.prompt, context.bootstrapPromptWarningLines, {
+    preserveExactPrompt: context.heartbeatPrompt,
+  });
+  const resolvedImages =
+    params.images && params.images.length > 0
+      ? params.images
+      : await loadPromptRefImages({ prompt, workspaceDir: context.workspaceDir });
+  if (resolvedImages.length > 0) {
+    const imagePayload = await writeCliImages(resolvedImages);
+    imagePaths = imagePayload.paths;
+    cleanupImages = imagePayload.cleanup;
+    if (!backend.imageArg) {
+      prompt = appendImagePathsToPrompt(prompt, imagePaths);
+    }
+  }
+
+  const { argsPrompt, stdin } = resolvePromptInput({
+    backend,
+    prompt,
+  });
+  const stdinPayload = stdin ?? "";
+  const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
+  const resolvedArgs = useResume
+    ? baseArgs.map((entry) => entry.replaceAll("{sessionId}", resolvedSessionId ?? ""))
+    : baseArgs;
+  const args = buildCliArgs({
+    backend,
+    baseArgs: resolvedArgs,
+    modelId: context.normalizedModel,
+    sessionId: resolvedSessionId,
+    systemPrompt: systemPromptArg,
+    imagePaths,
+    promptArg: argsPrompt,
+    useResume,
+  });
+
+  const serialize = backend.serialize ?? true;
+  const queueKey = serialize
+    ? context.backendResolved.id
+    : `${context.backendResolved.id}:${params.runId}`;
+
+  try {
+    return await enqueueCliRun(queueKey, async () => {
+      cliBackendLog.info(
+        `cli exec: provider=${params.provider} model=${context.normalizedModel} promptChars=${params.prompt.length}`,
+      );
+      const logOutputText =
+        isTruthyEnvValue(process.env[CLI_BACKEND_LOG_OUTPUT_ENV]) ||
+        isTruthyEnvValue(process.env[LEGACY_CLAUDE_CLI_LOG_OUTPUT_ENV]);
+      if (logOutputText) {
+        const logArgs = buildCliLogArgs({
+          args,
+          systemPromptArg: backend.systemPromptArg,
+          sessionArg: backend.sessionArg,
+          modelArg: backend.modelArg,
+          imageArg: backend.imageArg,
+          argsPrompt,
+        });
+        cliBackendLog.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
+      }
+
+      const env = (() => {
+        const next = sanitizeHostExecEnv({
+          baseEnv: process.env,
+          overrides: backend.env,
+          blockPathOverrides: true,
+        });
+        for (const key of backend.clearEnv ?? []) {
+          delete next[key];
+        }
+        return next;
+      })();
+      const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
+        backend,
+        timeoutMs: params.timeoutMs,
+        useResume,
+      });
+      const supervisor = getProcessSupervisor();
+      const scopeKey = buildCliSupervisorScopeKey({
+        backend,
+        backendId: context.backendResolved.id,
+        cliSessionId: useResume ? resolvedSessionId : undefined,
+      });
+
+      const managedRun = await supervisor.spawn({
+        sessionId: params.sessionId,
+        backendId: context.backendResolved.id,
+        scopeKey,
+        replaceExistingScope: Boolean(useResume && scopeKey),
+        mode: "child",
+        argv: [backend.command, ...args],
+        timeoutMs: params.timeoutMs,
+        noOutputTimeoutMs,
+        cwd: context.workspaceDir,
+        env,
+        input: stdinPayload,
+      });
+      const result = await managedRun.wait();
+
+      const stdout = result.stdout.trim();
+      const stderr = result.stderr.trim();
+      if (logOutputText) {
+        if (stdout) {
+          cliBackendLog.info(`cli stdout:\n${stdout}`);
+        }
+        if (stderr) {
+          cliBackendLog.info(`cli stderr:\n${stderr}`);
+        }
+      }
+      if (shouldLogVerbose()) {
+        if (stdout) {
+          cliBackendLog.debug(`cli stdout:\n${stdout}`);
+        }
+        if (stderr) {
+          cliBackendLog.debug(`cli stderr:\n${stderr}`);
+        }
+      }
+
+      if (result.exitCode !== 0 || result.reason !== "exit") {
+        if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
+          const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
+          cliBackendLog.warn(
+            `cli watchdog timeout: provider=${params.provider} model=${context.modelId} session=${resolvedSessionId ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
+          );
+          if (params.sessionKey) {
+            const stallNotice = [
+              `CLI agent (${params.provider}) produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`,
+              "It may have been waiting for interactive input or an approval prompt.",
+              "For Claude Code, prefer --permission-mode bypassPermissions --print.",
+            ].join(" ");
+            enqueueSystemEvent(stallNotice, { sessionKey: params.sessionKey });
+            requestHeartbeatNow(
+              scopedHeartbeatWakeOptions(params.sessionKey, { reason: "cli:watchdog:stall" }),
+            );
+          }
+          throw new FailoverError(timeoutReason, {
+            reason: "timeout",
+            provider: params.provider,
+            model: context.modelId,
+            status: resolveFailoverStatus("timeout"),
+          });
+        }
+        if (result.reason === "overall-timeout") {
+          const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
+          throw new FailoverError(timeoutReason, {
+            reason: "timeout",
+            provider: params.provider,
+            model: context.modelId,
+            status: resolveFailoverStatus("timeout"),
+          });
+        }
+        const err = stderr || stdout || "CLI failed.";
+        const reason = classifyFailoverReason(err) ?? "unknown";
+        const status = resolveFailoverStatus(reason);
+        throw new FailoverError(err, {
+          reason,
+          provider: params.provider,
+          model: context.modelId,
+          status,
+        });
+      }
+
+      return parseCliOutput({
+        raw: stdout,
+        backend,
+        providerId: context.backendResolved.id,
+        outputMode: useResume ? (backend.resumeOutput ?? backend.output) : backend.output,
+        fallbackSessionId: resolvedSessionId,
+      });
+    });
+  } finally {
+    if (cleanupImages) {
+      await cleanupImages();
+    }
+  }
+}
diff --git a/src/agents/cli-runner/helpers.ts b/src/agents/cli-runner/helpers.ts
index 13c5e497094..296fd9fb1ad 100644
--- a/src/agents/cli-runner/helpers.ts
+++ b/src/agents/cli-runner/helpers.ts
@@ -10,7 +10,6 @@ import type { OpenClawConfig } from "../../config/config.js";
 import type { CliBackendConfig } from "../../config/types.js";
 import { MAX_IMAGE_BYTES } from "../../media/constants.js";
 import { buildTtsSystemPromptHint } from "../../tts/tts.js";
-import { isRecord } from "../../utils.js";
 import { buildModelAliasLines } from "../model-alias-lines.js";
 import { resolveDefaultModelForAgent } from "../model-selection.js";
 import { resolveOwnerDisplaySetting } from "../owner-display.js";
@@ -28,20 +27,6 @@ export function enqueueCliRun<T>(key: string, task: () => Promise<T>): Promise<T
   return CLI_RUN_QUEUE.enqueue(key, task);
 }
 
-type CliUsage = {
-  input?: number;
-  output?: number;
-  cacheRead?: number;
-  cacheWrite?: number;
-  total?: number;
-};
-
-export type CliOutput = {
-  text: string;
-  sessionId?: string;
-  usage?: CliUsage;
-};
-
 export function buildSystemPrompt(params: {
   workspaceDir: string;
   config?: OpenClawConfig;
@@ -117,153 +102,6 @@ export function normalizeCliModel(modelId: string, backend: CliBackendConfig): s
   return trimmed;
 }
 
-function toUsage(raw: Record<string, unknown>): CliUsage | undefined {
-  const pick = (key: string) =>
-    typeof raw[key] === "number" && raw[key] > 0 ? raw[key] : undefined;
-  const input = pick("input_tokens") ?? pick("inputTokens");
-  const output = pick("output_tokens") ?? pick("outputTokens");
-  const cacheRead =
-    pick("cache_read_input_tokens") ?? pick("cached_input_tokens") ?? pick("cacheRead");
-  const cacheWrite = pick("cache_write_input_tokens") ?? pick("cacheWrite");
-  const total = pick("total_tokens") ?? pick("total");
-  if (!input && !output && !cacheRead && !cacheWrite && !total) {
-    return undefined;
-  }
-  return { input, output, cacheRead, cacheWrite, total };
-}
-
-function collectText(value: unknown): string {
-  if (!value) {
-    return "";
-  }
-  if (typeof value === "string") {
-    return value;
-  }
-  if (Array.isArray(value)) {
-    return value.map((entry) => collectText(entry)).join("");
-  }
-  if (!isRecord(value)) {
-    return "";
-  }
-  if (typeof value.text === "string") {
-    return value.text;
-  }
-  if (typeof value.content === "string") {
-    return value.content;
-  }
-  if (Array.isArray(value.content)) {
-    return value.content.map((entry) => collectText(entry)).join("");
-  }
-  if (isRecord(value.message)) {
-    return collectText(value.message);
-  }
-  return "";
-}
-
-function pickSessionId(
-  parsed: Record<string, unknown>,
-  backend: CliBackendConfig,
-): string | undefined {
-  const fields = backend.sessionIdFields ?? [
-    "session_id",
-    "sessionId",
-    "conversation_id",
-    "conversationId",
-  ];
-  for (const field of fields) {
-    const value = parsed[field];
-    if (typeof value === "string" && value.trim()) {
-      return value.trim();
-    }
-  }
-  return undefined;
-}
-
-export function parseCliJson(raw: string, backend: CliBackendConfig): CliOutput | null {
-  const trimmed = raw.trim();
-  if (!trimmed) {
-    return null;
-  }
-  let parsed: unknown;
-  try {
-    parsed = JSON.parse(trimmed);
-  } catch {
-    return null;
-  }
-  if (!isRecord(parsed)) {
-    return null;
-  }
-  const sessionId = pickSessionId(parsed, backend);
-  const usage = isRecord(parsed.usage) ? toUsage(parsed.usage) : undefined;
-  const text =
-    collectText(parsed.message) ||
-    collectText(parsed.content) ||
-    collectText(parsed.result) ||
-    collectText(parsed);
-  return { text: text.trim(), sessionId, usage };
-}
-
-export function parseCliJsonl(raw: string, backend: CliBackendConfig): CliOutput | null {
-  const lines = raw
-    .split(/\r?\n/g)
-    .map((line) => line.trim())
-    .filter(Boolean);
-  if (lines.length === 0) {
-    return null;
-  }
-  let sessionId: string | undefined;
-  let usage: CliUsage | undefined;
-  const texts: string[] = [];
-  for (const line of lines) {
-    let parsed: unknown;
-    try {
-      parsed = JSON.parse(line);
-    } catch {
-      continue;
-    }
-    if (!isRecord(parsed)) {
-      continue;
-    }
-    if (!sessionId) {
-      sessionId = pickSessionId(parsed, backend);
-    }
-    if (!sessionId && typeof parsed.thread_id === "string") {
-      sessionId = parsed.thread_id.trim();
-    }
-    if (isRecord(parsed.usage)) {
-      usage = toUsage(parsed.usage) ?? usage;
-    }
-
-    // Claude stream-json: {"type":"result","result":"...","session_id":"...","usage":{...}}
-    if (
-      typeof parsed.type === "string" &&
-      parsed.type === "result" &&
-      typeof parsed.result === "string"
-    ) {
-      const resultText = parsed.result.trim();
-      if (resultText) {
-        return { text: resultText, sessionId, usage };
-      }
-      // Claude may finish with an empty result after tool-only work. Keep the
-      // resolved session handle and usage instead of dropping them.
-      return { text: "", sessionId, usage };
-    }
-
-    const item = isRecord(parsed.item) ? parsed.item : null;
-    if (item && typeof item.text === "string") {
-      const type = typeof item.type === "string" ? item.type.toLowerCase() : "";
-      if (!type || type.includes("message")) {
-        texts.push(item.text);
-      }
-    }
-  }
-  const text = texts.join("\n").trim();
-  if (!text) {
-    return null;
-  }
-  return { text, sessionId, usage };
-}
-
 export function resolveSystemPromptUsage(params: {
   backend: CliBackendConfig;
   isNewSession: boolean;
diff --git a/src/agents/cli-runner/log.ts b/src/agents/cli-runner/log.ts
new file mode 100644
index 00000000000..8143e680d3d
--- /dev/null
+++ b/src/agents/cli-runner/log.ts
@@ -0,0 +1,5 @@
+import { createSubsystemLogger } from "../../logging/subsystem.js";
+
+export const cliBackendLog = createSubsystemLogger("agent/cli-backend");
+export const CLI_BACKEND_LOG_OUTPUT_ENV = "OPENCLAW_CLI_BACKEND_LOG_OUTPUT";
+export const LEGACY_CLAUDE_CLI_LOG_OUTPUT_ENV = "OPENCLAW_CLAUDE_CLI_LOG_OUTPUT";
diff --git a/src/agents/cli-runner/prepare.ts b/src/agents/cli-runner/prepare.ts
new file mode 100644
index 00000000000..f6194b57c95
--- /dev/null
+++ b/src/agents/cli-runner/prepare.ts
@@ -0,0 +1,167 @@
+import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
+import { resolveSessionAgentIds } from "../agent-scope.js";
+import {
+  buildBootstrapInjectionStats,
+  buildBootstrapPromptWarning,
+  buildBootstrapTruncationReportMeta,
+  analyzeBootstrapBudget,
+} from "../bootstrap-budget.js";
+import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js";
+import { resolveCliBackendConfig } from "../cli-backends.js";
+import { hashCliSessionText, resolveCliSessionReuse } from "../cli-session.js";
+import { resolveOpenClawDocsPath } from "../docs-path.js";
+import {
+  resolveBootstrapMaxChars,
+  resolveBootstrapPromptTruncationWarningMode,
+  resolveBootstrapTotalMaxChars,
+} from "../pi-embedded-helpers.js";
+import { buildSystemPromptReport } from "../system-prompt-report.js";
+import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
+import { prepareCliBundleMcpConfig } from "./bundle-mcp.js";
+import { buildSystemPrompt, normalizeCliModel } from "./helpers.js";
+import { cliBackendLog } from "./log.js";
+import type { PreparedCliRunContext, RunCliAgentParams } from "./types.js";
+
+export async function prepareCliRunContext(
+  params: RunCliAgentParams,
+): Promise<PreparedCliRunContext> {
+  const started = Date.now();
+  const workspaceResolution = resolveRunWorkspaceDir({
+    workspaceDir: params.workspaceDir,
+    sessionKey: params.sessionKey,
+    agentId: params.agentId,
+    config: params.config,
+  });
+  const resolvedWorkspace = workspaceResolution.workspaceDir;
+  const redactedSessionId = redactRunIdentifier(params.sessionId);
+  const redactedSessionKey = redactRunIdentifier(params.sessionKey);
+  const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
+  if (workspaceResolution.usedFallback) {
+    cliBackendLog.warn(
+      `[workspace-fallback] caller=runCliAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
+    );
+  }
+  const workspaceDir = resolvedWorkspace;
+
+  const backendResolved = resolveCliBackendConfig(params.provider, params.config);
+  if (!backendResolved) {
+    throw new Error(`Unknown CLI backend: ${params.provider}`);
+  }
+  const preparedBackend = await prepareCliBundleMcpConfig({
+    enabled: backendResolved.bundleMcp,
+    backend: backendResolved.config,
+    workspaceDir,
+    config: params.config,
+    warn: (message) => cliBackendLog.warn(message),
+  });
+  const extraSystemPrompt = params.extraSystemPrompt?.trim() ?? "";
+  const extraSystemPromptHash = hashCliSessionText(extraSystemPrompt);
+  const reusableCliSession = resolveCliSessionReuse({
+    binding:
+      params.cliSessionBinding ??
+      (params.cliSessionId ? { sessionId: params.cliSessionId } : undefined),
+    authProfileId: params.authProfileId,
+    extraSystemPromptHash,
+    mcpConfigHash: preparedBackend.mcpConfigHash,
+  });
+  if (reusableCliSession.invalidatedReason) {
+    cliBackendLog.info(
+      `cli session reset: provider=${params.provider} reason=${reusableCliSession.invalidatedReason}`,
+    );
+  }
+  const modelId = (params.model ?? "default").trim() || "default";
+  const normalizedModel = normalizeCliModel(modelId, preparedBackend.backend);
+  const modelDisplay = `${params.provider}/${modelId}`;
+
+  const sessionLabel = params.sessionKey ?? params.sessionId;
+  const { bootstrapFiles, contextFiles } = await resolveBootstrapContextForRun({
+    workspaceDir,
+    config: params.config,
+    sessionKey: params.sessionKey,
+    sessionId: params.sessionId,
+    warn: makeBootstrapWarn({ sessionLabel, warn: (message) => cliBackendLog.warn(message) }),
+  });
+  const bootstrapMaxChars = resolveBootstrapMaxChars(params.config);
+  const bootstrapTotalMaxChars = resolveBootstrapTotalMaxChars(params.config);
+  const bootstrapAnalysis = analyzeBootstrapBudget({
+    files: buildBootstrapInjectionStats({
+      bootstrapFiles,
+      injectedFiles: contextFiles,
+    }),
+    bootstrapMaxChars,
+    bootstrapTotalMaxChars,
+  });
+  const bootstrapPromptWarningMode = resolveBootstrapPromptTruncationWarningMode(params.config);
+  const bootstrapPromptWarning = buildBootstrapPromptWarning({
+    analysis: bootstrapAnalysis,
+    mode: bootstrapPromptWarningMode,
+    seenSignatures: params.bootstrapPromptWarningSignaturesSeen,
+    previousSignature: params.bootstrapPromptWarningSignature,
+  });
+  const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({
+    sessionKey: params.sessionKey,
+    config: params.config,
+    agentId: params.agentId,
+  });
+  const heartbeatPrompt =
+    sessionAgentId === defaultAgentId
+      ? resolveHeartbeatPrompt(params.config?.agents?.defaults?.heartbeat?.prompt)
+      : undefined;
+  const docsPath = await resolveOpenClawDocsPath({
+    workspaceDir,
+    argv1: process.argv[1],
+    cwd: process.cwd(),
+    moduleUrl: import.meta.url,
+  });
+  const systemPrompt = buildSystemPrompt({
+    workspaceDir,
+    config: params.config,
+    defaultThinkLevel: params.thinkLevel,
+    extraSystemPrompt,
+    ownerNumbers: params.ownerNumbers,
+    heartbeatPrompt,
+    docsPath: docsPath ?? undefined,
+    tools: [],
+    contextFiles,
+    modelDisplay,
+    agentId: sessionAgentId,
+  });
+  const systemPromptReport = buildSystemPromptReport({
+    source: "run",
+    generatedAt: Date.now(),
+    sessionId: params.sessionId,
+    sessionKey: params.sessionKey,
+    provider: params.provider,
+    model: modelId,
+    workspaceDir,
+    bootstrapMaxChars,
+    bootstrapTotalMaxChars,
+    bootstrapTruncation: buildBootstrapTruncationReportMeta({
+      analysis: bootstrapAnalysis,
+      warningMode: bootstrapPromptWarningMode,
+      warning: bootstrapPromptWarning,
+    }),
+    sandbox: { mode: "off", sandboxed: false },
+    systemPrompt,
+    bootstrapFiles,
+    injectedFiles: contextFiles,
+    skillsPrompt: "",
+    tools: [],
+  });
+
+  return {
+    params,
+    started,
+    workspaceDir,
+    backendResolved,
+    preparedBackend,
+    reusableCliSession,
+    modelId,
+    normalizedModel,
+    systemPrompt,
+    systemPromptReport,
+    bootstrapPromptWarningLines: bootstrapPromptWarning.lines,
+    heartbeatPrompt,
+    extraSystemPromptHash,
+  };
+}
diff --git a/src/agents/cli-runner/types.ts b/src/agents/cli-runner/types.ts
new file mode 100644
index 00000000000..7b4f30f207d
--- /dev/null
+++ b/src/agents/cli-runner/types.ts
@@ -0,0 +1,58 @@
+import type { ImageContent } from "@mariozechner/pi-ai";
+import type { ThinkLevel } from "../../auto-reply/thinking.js";
+import type { OpenClawConfig } from "../../config/config.js";
+import type { CliSessionBinding } from "../../config/sessions.js";
+import type { SessionSystemPromptReport } from "../../config/sessions/types.js";
+import type { CliBackendConfig } from "../../config/types.js";
+import type { ResolvedCliBackend } from "../cli-backends.js";
+
+export type RunCliAgentParams = {
+  sessionId: string;
+  sessionKey?: string;
+  agentId?: string;
+  sessionFile: string;
+  workspaceDir: string;
+  config?: OpenClawConfig;
+  prompt: string;
+  provider: string;
+  model?: string;
+  thinkLevel?: ThinkLevel;
+  timeoutMs: number;
+  runId: string;
+  extraSystemPrompt?: string;
+  streamParams?: import("../command/types.js").AgentStreamParams;
+  ownerNumbers?: string[];
+  cliSessionId?: string;
+  cliSessionBinding?: CliSessionBinding;
+  authProfileId?: string;
+  bootstrapPromptWarningSignaturesSeen?: string[];
+  bootstrapPromptWarningSignature?: string;
+  images?: ImageContent[];
+};
+
+export type CliPreparedBackend = {
+  backend: CliBackendConfig;
+  cleanup?: () => Promise<void>;
+  mcpConfigHash?: string;
+};
+
+export type CliReusableSession = {
+  sessionId?: string;
+  invalidatedReason?: "auth-profile" | "system-prompt" | "mcp";
+};
+
+export type PreparedCliRunContext = {
+  params: RunCliAgentParams;
+  started: number;
+  workspaceDir: string;
+  backendResolved: ResolvedCliBackend;
+  preparedBackend: CliPreparedBackend;
+  reusableCliSession: CliReusableSession;
+  modelId: string;
+  normalizedModel: string;
+  systemPrompt: string;
+  systemPromptReport: SessionSystemPromptReport;
+  bootstrapPromptWarningLines: string[];
+  heartbeatPrompt?: string;
+  extraSystemPromptHash?: string;
+};
diff --git a/src/agents/cli-session.ts b/src/agents/cli-session.ts
index 48ad5ff789d..0cc187afec7 100644
--- a/src/agents/cli-session.ts
+++ b/src/agents/cli-session.ts
@@ -1,4 +1,5 @@
 import crypto from "node:crypto";
+import { CLAUDE_CLI_BACKEND_ID } from "../../extensions/anthropic/cli-shared.js";
 import type { CliSessionBinding, SessionEntry } from "../config/sessions.js";
 import { normalizeProviderId } from "./model-selection.js";
 
@@ -37,7 +38,7 @@ export function getCliSessionBinding(
   if (fromMap?.trim()) {
     return { sessionId: fromMap.trim() };
   }
-  if (normalized === "claude-cli") {
+  if (normalized === CLAUDE_CLI_BACKEND_ID) {
     const legacy = entry.claudeCliSessionId?.trim();
     if (legacy) {
       return { sessionId: legacy };
@@ -83,7 +84,7 @@ export function setCliSessionBinding(
     },
   };
   entry.cliSessionIds = { ...entry.cliSessionIds, [normalized]: trimmed };
-  if (normalized === "claude-cli") {
+  if (normalized === CLAUDE_CLI_BACKEND_ID) {
     entry.claudeCliSessionId = trimmed;
   }
 }
@@ -100,7 +101,7 @@ export function clearCliSession(entry: SessionEntry, provider: string): void {
     delete next[normalized];
     entry.cliSessionIds = Object.keys(next).length > 0 ? next : undefined;
   }
-  if (normalized === "claude-cli") {
+  if (normalized === CLAUDE_CLI_BACKEND_ID) {
     delete entry.claudeCliSessionId;
   }
 }