test: cover docker MCP cleanup for subagents

2026-05-06 06:50:43 +00:00 · 2026-04-23 02:45:59 +01:00
parent ccf2e77e8d
commit 1489febee9
11 changed files with 448 additions and 79 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai
 - Agents/OpenAI websocket: route native OpenAI websocket metadata and session-header decisions through the shared endpoint classifier so local mocks and custom `models.providers.openai.baseUrl` endpoints stay out of the native OpenAI path consistently across embedded-runner and websocket transport code. Thanks @vincentkoc.
 - Cron/MCP: retire bundled MCP runtimes through one shared cleanup path for isolated cron run ends, persistent cron session rollover, and direct cron `deleteAfterRun` fallback cleanup. Fixes #69145, #68623, and #68827.
 - MCP/gateway: tear down stdio MCP process trees on transport close and dispose bundled MCP runtimes during session delete/reset, preventing orphaned wrapper/server processes from accumulating. Fixes #68809 and #69465.
+- Agents/MCP: retire bundled MCP runtimes after completed one-shot subagent cleanup and nested `sessions_send` steps, while keeping persistent subagent sessions warm.
 - Config: render validation warnings with real line breaks instead of a literal `\n` sequence in CLI/audit output. Fixes #70140.
 - Cron/doctor: repair malformed persisted cron job IDs through `openclaw doctor`, including legacy `jobId`, non-string `id`, and missing `id` rows, so `cron list` no longer needs display-layer coercion for corrupt store data. Fixes #70128.
 - Discord: normalize prefixed channel targets only at the thread-binding API boundary, so `sessions_spawn({ runtime: "acp", thread: true })` can create child threads from Discord channels without breaking current-channel ACP bindings. (#68034) Thanks @Zetarcos.
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -910,6 +910,7 @@ The live-model Docker runners also bind-mount only the needed CLI auth homes (or
 - Gateway networking (two containers, WS auth + health): `pnpm test:docker:gateway-network` (script: `scripts/e2e/gateway-network-docker.sh`)
 - MCP channel bridge (seeded Gateway + stdio bridge + raw Claude notification-frame smoke): `pnpm test:docker:mcp-channels` (script: `scripts/e2e/mcp-channels-docker.sh`)
 - Pi bundle MCP tools (real stdio MCP server + embedded Pi profile allow/deny smoke): `pnpm test:docker:pi-bundle-mcp-tools` (script: `scripts/e2e/pi-bundle-mcp-tools-docker.sh`)
+- Cron/subagent MCP cleanup (real Gateway + stdio MCP child teardown after isolated cron and one-shot subagent runs): `pnpm test:docker:cron-mcp-cleanup` (script: `scripts/e2e/cron-mcp-cleanup-docker.sh`)
 - Plugins (install smoke + `/plugin` alias + Claude-bundle restart semantics): `pnpm test:docker:plugins` (script: `scripts/e2e/plugins-docker.sh`)
 - Bundled plugin runtime deps: `pnpm test:docker:bundled-channel-deps` builds a small Docker runner image by default, builds and packs OpenClaw once on the host, then mounts that tarball into each Linux install scenario. Reuse the image with `OPENCLAW_SKIP_DOCKER_BUILD=1`, skip the host rebuild after a fresh local build with `OPENCLAW_BUNDLED_CHANNEL_HOST_BUILD=0`, or point at an existing tarball with `OPENCLAW_BUNDLED_CHANNEL_PACKAGE_TGZ=/path/to/openclaw-*.tgz`.
 - Narrow bundled plugin runtime deps while iterating by disabling unrelated scenarios, for example:
@@ -951,6 +952,10 @@ model key. It builds the repo Docker image, starts a real stdio MCP probe server
 inside the container, materializes that server through the embedded Pi bundle
 MCP runtime, executes the tool, then verifies `coding` and `messaging` keep
 `bundle-mcp` tools while `minimal` and `tools.deny: ["bundle-mcp"]` filter them.
+`test:docker:cron-mcp-cleanup` is deterministic and does not need a live model
+key. It starts a seeded Gateway with a real stdio MCP probe server, runs an
+isolated cron turn and a `/subagents spawn` one-shot child turn, then verifies
+the MCP child process exits after each run.

 Manual ACP plain-language thread smoke (not CI):

--- a/scripts/e2e/cron-mcp-cleanup-docker-client.ts
+++ b/scripts/e2e/cron-mcp-cleanup-docker-client.ts
@@ -1,15 +1,17 @@
 import { execFile } from "node:child_process";
+import { randomUUID } from "node:crypto";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
 import { setTimeout as delay } from "node:timers/promises";
 import { promisify } from "node:util";
-import { assert, connectGateway, waitFor } from "./mcp-channels-harness.ts";
+import { assert, connectGateway, type GatewayRpcClient, waitFor } from "./mcp-channels-harness.ts";

 const execFileAsync = promisify(execFile);

 type CronJob = { id?: string };
 type CronRunResult = { ok?: boolean; enqueued?: boolean; runId?: string };
+type AgentRunResult = { runId?: string; status?: string };

 async function readProbePid(pidPath: string): Promise<number | undefined> {
  try {
@@ -21,6 +23,25 @@ async function readProbePid(pidPath: string): Promise<number | undefined> {
  }
 }

+async function readProbePids(pidsPath: string): Promise<number[]> {
+  try {
+    const raw = await fs.readFile(pidsPath, "utf-8");
+    const pids: number[] = [];
+    const seen = new Set<number>();
+    for (const line of raw.split(/\r?\n/)) {
+      const pid = Number.parseInt(line.trim(), 10);
+      if (!Number.isInteger(pid) || pid <= 0 || seen.has(pid)) {
+        continue;
+      }
+      seen.add(pid);
+      pids.push(pid);
+    }
+    return pids;
+  } catch {
+    return [];
+  }
+}
+
 async function describeProbePid(pid: number): Promise<string | undefined> {
  try {
    const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "args="]);
@@ -43,9 +64,14 @@ async function waitForProbePid(pidPath: string): Promise<number | undefined> {
  return undefined;
 }

-async function waitForProbeExit(pid: number): Promise<void> {
+async function waitForProbeExit(params: {
+  pid: number;
+  label: string;
+  timeoutMs?: number;
+}): Promise<void> {
+  const { pid, label, timeoutMs = 30_000 } = params;
  const startedAt = Date.now();
-  while (Date.now() - startedAt < 30_000) {
+  while (Date.now() - startedAt < timeoutMs) {
    const args = await describeProbePid(pid);
    if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) {
      return;
@@ -53,7 +79,157 @@ async function waitForProbeExit(pid: number): Promise<void> {
    await delay(100);
  }
  const args = await describeProbePid(pid);
-  throw new Error(`cron MCP probe process still alive after run: pid=${pid} args=${args}`);
+  throw new Error(`${label} MCP probe process still alive after run: pid=${pid} args=${args}`);
+}
+
+async function waitForAnyProbeExit(params: {
+  pidsPath: string;
+  label: string;
+  timeoutMs: number;
+}): Promise<number> {
+  const startedAt = Date.now();
+  let observed: number[] = [];
+  while (Date.now() - startedAt < params.timeoutMs) {
+    observed = await readProbePids(params.pidsPath);
+    for (const pid of observed) {
+      const args = await describeProbePid(pid);
+      if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) {
+        return pid;
+      }
+    }
+    await delay(100);
+  }
+  const descriptions = await Promise.all(
+    observed.map(async (pid) => ({ pid, args: await describeProbePid(pid) })),
+  );
+  throw new Error(
+    `${params.label} MCP probe processes still alive after run: ${JSON.stringify(descriptions)}`,
+  );
+}
+
+async function resetProbeFiles(params: {
+  pidPath: string;
+  pidsPath: string;
+  exitPath: string;
+}): Promise<void> {
+  await fs.rm(params.pidPath, { force: true });
+  await fs.rm(params.pidsPath, { force: true });
+  await fs.rm(params.exitPath, { force: true });
+}
+
+async function runCronCleanupScenario(params: {
+  gateway: GatewayRpcClient;
+  pidPath: string;
+}): Promise<{ jobId: string; runId?: string; pid: number; status?: unknown }> {
+  const { gateway, pidPath } = params;
+  const job = await gateway.request<CronJob>("cron.add", {
+    name: "cron mcp cleanup docker e2e",
+    enabled: true,
+    schedule: { kind: "every", everyMs: 60_000 },
+    sessionTarget: "isolated",
+    wakeMode: "next-heartbeat",
+    payload: {
+      kind: "agentTurn",
+      message: "Use available context and then stop.",
+      timeoutSeconds: 12,
+      lightContext: true,
+    },
+    delivery: { mode: "none" },
+  });
+  assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`);
+
+  const run = await gateway.request<CronRunResult>("cron.run", {
+    id: job.id,
+    mode: "force",
+  });
+  assert(
+    run.ok === true && run.enqueued === true,
+    `cron.run was not enqueued: ${JSON.stringify(run)}`,
+  );
+
+  const started = await waitFor(
+    "cron started event",
+    () =>
+      gateway.events.find(
+        (entry) =>
+          entry.event === "cron" &&
+          entry.payload.jobId === job.id &&
+          entry.payload.action === "started",
+      )?.payload,
+    60_000,
+  );
+  assert(started, "missing cron started event");
+
+  const pid = await waitForProbePid(pidPath);
+  assert(
+    pid,
+    `cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify(
+      gateway.events.slice(-10),
+    )}`,
+  );
+  const initialArgs = await describeProbePid(pid);
+  assert(
+    initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"),
+    `cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`,
+  );
+
+  const finished = await waitFor(
+    "cron finished event",
+    () =>
+      gateway.events.find(
+        (entry) =>
+          entry.event === "cron" &&
+          entry.payload.jobId === job.id &&
+          entry.payload.action === "finished",
+      )?.payload,
+    90_000,
+  );
+  assert(finished, "missing cron finished event");
+
+  await waitForProbeExit({ pid, label: "cron" });
+  return {
+    jobId: job.id,
+    runId: run.runId,
+    pid,
+    status: finished.status,
+  };
+}
+
+async function runSubagentCleanupScenario(params: {
+  gateway: GatewayRpcClient;
+  pidPath: string;
+  pidsPath: string;
+  exitPath: string;
+}): Promise<{ runId: string; exitedPid: number; pids: number[] }> {
+  const { gateway, pidPath, pidsPath, exitPath } = params;
+  await resetProbeFiles({ pidPath, pidsPath, exitPath });
+
+  const run = await gateway.request<AgentRunResult>("agent", {
+    message: "Use available context and then stop.",
+    sessionKey: `agent:main:subagent:docker-${randomUUID()}`,
+    agentId: "main",
+    lane: "subagent",
+    cleanupBundleMcpOnRunEnd: true,
+    idempotencyKey: randomUUID(),
+    deliver: false,
+    timeout: 20,
+    bestEffortDeliver: true,
+  });
+  assert(
+    run.status === "accepted" && run.runId,
+    `agent did not accept subagent cleanup run: ${JSON.stringify(run)}`,
+  );
+
+  const exitedPid = await waitForAnyProbeExit({
+    pidsPath,
+    label: "subagent",
+    timeoutMs: 90_000,
+  });
+  return {
+    runId: run.runId,
+    exitedPid,
+    pids: await readProbePids(pidsPath),
+  };
 }

 async function main() {
@@ -61,83 +237,20 @@ async function main() {
  const gatewayToken = process.env.GW_TOKEN?.trim();
  const stateDir = process.env.OPENCLAW_STATE_DIR?.trim() || path.join(os.homedir(), ".openclaw");
  const pidPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pid");
+  const pidsPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pids");
+  const exitPath = path.join(stateDir, "cron-mcp-cleanup", "probe.exit");
  assert(gatewayUrl, "missing GW_URL");
  assert(gatewayToken, "missing GW_TOKEN");

  const gateway = await connectGateway({ url: gatewayUrl, token: gatewayToken });
  try {
-    const job = await gateway.request<CronJob>("cron.add", {
-      name: "cron mcp cleanup docker e2e",
-      enabled: true,
-      schedule: { kind: "every", everyMs: 60_000 },
-      sessionTarget: "isolated",
-      wakeMode: "next-heartbeat",
-      payload: {
-        kind: "agentTurn",
-        message: "Use available context and then stop.",
-        timeoutSeconds: 12,
-        lightContext: true,
-      },
-      delivery: { mode: "none" },
-    });
-    assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`);
-
-    const run = await gateway.request<CronRunResult>("cron.run", {
-      id: job.id,
-      mode: "force",
-    });
-    assert(
-      run.ok === true && run.enqueued === true,
-      `cron.run was not enqueued: ${JSON.stringify(run)}`,
-    );
-
-    const started = await waitFor(
-      "cron started event",
-      () =>
-        gateway.events.find(
-          (entry) =>
-            entry.event === "cron" &&
-            entry.payload.jobId === job.id &&
-            entry.payload.action === "started",
-        )?.payload,
-      60_000,
-    );
-    assert(started, "missing cron started event");
-
-    const pid = await waitForProbePid(pidPath);
-    assert(
-      pid,
-      `cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify(
-        gateway.events.slice(-10),
-      )}`,
-    );
-    const initialArgs = await describeProbePid(pid);
-    assert(
-      initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"),
-      `cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`,
-    );
-
-    const finished = await waitFor(
-      "cron finished event",
-      () =>
-        gateway.events.find(
-          (entry) =>
-            entry.event === "cron" &&
-            entry.payload.jobId === job.id &&
-            entry.payload.action === "finished",
-        )?.payload,
-      90_000,
-    );
-    assert(finished, "missing cron finished event");
-
-    await waitForProbeExit(pid);
+    const cron = await runCronCleanupScenario({ gateway, pidPath });
+    const subagent = await runSubagentCleanupScenario({ gateway, pidPath, pidsPath, exitPath });
    process.stdout.write(
      JSON.stringify({
        ok: true,
-        jobId: job.id,
-        runId: run.runId,
-        pid,
-        status: finished.status,
+        cron,
+        subagent,
      }) + "\n",
    );
  } finally {
--- a/scripts/e2e/cron-mcp-cleanup-docker.sh
+++ b/scripts/e2e/cron-mcp-cleanup-docker.sh
@@ -18,7 +18,7 @@ trap cleanup EXIT
 echo "Building Docker image..."
 run_logged cron-mcp-cleanup-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"

-echo "Running in-container cron MCP cleanup smoke..."
+echo "Running in-container cron/subagent MCP cleanup smoke..."
 set +e
 docker run --rm \
  --name "$CONTAINER_NAME" \
@@ -81,7 +81,7 @@ status=${PIPESTATUS[0]}
 set -e

 if [ "$status" -ne 0 ]; then
-  echo "Docker cron MCP cleanup smoke failed"
+  echo "Docker cron/subagent MCP cleanup smoke failed"
  cat "$CLIENT_LOG"
  exit "$status"
 fi
--- a/scripts/e2e/cron-mcp-cleanup-seed.ts
+++ b/scripts/e2e/cron-mcp-cleanup-seed.ts
@@ -27,7 +27,12 @@ const DOCKER_OPENAI_MODEL: ModelDefinitionConfig = {
  maxTokens: 128_000,
 };

-async function writeProbeServer(params: { serverPath: string; pidPath: string; exitPath: string }) {
+async function writeProbeServer(params: {
+  serverPath: string;
+  pidPath: string;
+  pidsPath: string;
+  exitPath: string;
+}) {
  const sdkMcpServerPath = require.resolve("@modelcontextprotocol/sdk/server/mcp.js");
  const sdkStdioServerPath = require.resolve("@modelcontextprotocol/sdk/server/stdio.js");
  await fs.writeFile(
@@ -41,6 +46,7 @@ import { StdioServerTransport } from ${JSON.stringify(sdkStdioServerPath)};
 process.title = "openclaw-cron-mcp-cleanup-probe";
 await fsp.mkdir(${JSON.stringify(path.dirname(params.pidPath))}, { recursive: true });
 await fsp.writeFile(${JSON.stringify(params.pidPath)}, String(process.pid), "utf8");
+await fsp.appendFile(${JSON.stringify(params.pidsPath)}, String(process.pid) + "\\n", "utf8");
 process.once("exit", () => {
  try {
    fs.writeFileSync(${JSON.stringify(params.exitPath)}, "exited", "utf8");
@@ -72,13 +78,15 @@ async function main() {
  const probeDir = path.join(stateDir, "cron-mcp-cleanup");
  const serverPath = path.join(probeDir, "probe-server.mjs");
  const pidPath = path.join(probeDir, "probe.pid");
+  const pidsPath = path.join(probeDir, "probe.pids");
  const exitPath = path.join(probeDir, "probe.exit");

  await fs.mkdir(probeDir, { recursive: true });
  await fs.mkdir(path.dirname(configPath), { recursive: true });
  await fs.rm(pidPath, { force: true });
+  await fs.rm(pidsPath, { force: true });
  await fs.rm(exitPath, { force: true });
-  await writeProbeServer({ serverPath, pidPath, exitPath });
+  await writeProbeServer({ serverPath, pidPath, pidsPath, exitPath });

  const seededConfig = applyProviderConfigWithDefaultModelPreset(
    {
@@ -91,6 +99,20 @@ async function main() {
      cron: {
        enabled: false,
      },
+      agents: {
+        defaults: {
+          subagents: {
+            runTimeoutSeconds: 8,
+          },
+        },
+      },
+      tools: {
+        subagents: {
+          tools: {
+            alsoAllow: ["bundle-mcp"],
+          },
+        },
+      },
      mcp: {
        servers: {
          cronCleanupProbe: {
@@ -126,6 +148,7 @@ async function main() {
      configPath,
      serverPath,
      pidPath,
+      pidsPath,
      exitPath,
    }) + "\n",
  );
--- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
+++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts
@@ -9,10 +9,15 @@ import {
  resetSessionsSpawnConfigOverride,
  resetSessionsSpawnHookRunnerOverride,
  setSessionsSpawnHookRunnerOverride,
+  setSessionsSpawnAnnounceFlowOverride,
  setupSessionsSpawnGatewayMock,
  setSessionsSpawnConfigOverride,
  waitForSessionsSpawnEvent,
 } from "./openclaw-tools.subagents.sessions-spawn.test-harness.js";
+import {
+  __testing as bundleMcpRuntimeTesting,
+  getOrCreateSessionMcpRuntime,
+} from "./pi-bundle-mcp-tools.js";
 import {
  getLatestSubagentRunByChildSessionKey,
  resetSubagentRegistryForTests,
@@ -151,7 +156,8 @@ async function waitForRunCleanup(childSessionKey: string) {
 }

 describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
-  beforeEach(() => {
+  beforeEach(async () => {
+    await bundleMcpRuntimeTesting.resetSessionMcpRuntimeManager();
    resetSessionsSpawnAnnounceFlowOverride();
    resetSessionsSpawnHookRunnerOverride();
    resetSessionsSpawnConfigOverride();
@@ -182,11 +188,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    callGatewayMock.mockClear();
  });

-  afterEach(() => {
+  afterEach(async () => {
    resetSessionsSpawnAnnounceFlowOverride();
    resetSessionsSpawnHookRunnerOverride();
    resetSessionsSpawnConfigOverride();
    resetSubagentRegistryForTests({ persist: false });
+    await bundleMcpRuntimeTesting.resetSessionMcpRuntimeManager();
  });

  afterAll(() => {
@@ -262,6 +269,56 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
    expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
  });

+  it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => {
+    let resumeAnnounceFlow: ((value: boolean) => void) | undefined;
+    let announceFlowStarted: (() => void) | undefined;
+    const announceFlowStartedPromise = new Promise<void>((resolve) => {
+      announceFlowStarted = resolve;
+    });
+    const announceFlowGate = new Promise<boolean>((resolve) => {
+      resumeAnnounceFlow = resolve;
+    });
+    setSessionsSpawnAnnounceFlowOverride(async () => {
+      announceFlowStarted?.();
+      return await announceFlowGate;
+    });
+    const ctx = setupSessionsSpawnGatewayMock({
+      includeChatHistory: true,
+      agentWaitResult: { status: "ok", startedAt: 3000, endedAt: 4000 },
+    });
+
+    const tool = await getSessionsSpawnTool({
+      agentSessionKey: "main",
+      agentChannel: "whatsapp",
+    });
+
+    await executeSpawnAndExpectAccepted({
+      tool,
+      callId: "call-mcp-retire",
+      cleanup: "keep",
+    });
+
+    await announceFlowStartedPromise;
+    const child = ctx.getChild();
+    if (!child.sessionKey) {
+      throw new Error("missing child sessionKey");
+    }
+    await getOrCreateSessionMcpRuntime({
+      sessionId: "session:subagent:mcp-retire",
+      sessionKey: child.sessionKey,
+      workspaceDir: "/tmp/openclaw-subagent-mcp-retire",
+      cfg: { mcp: { servers: {} } } as Parameters<typeof getOrCreateSessionMcpRuntime>[0]["cfg"],
+    });
+    expect(bundleMcpRuntimeTesting.getCachedSessionIds()).toContain("session:subagent:mcp-retire");
+
+    resumeAnnounceFlow?.(true);
+    await waitForRunCleanup(child.sessionKey);
+    await waitForSessionsSpawnEvent(
+      "bundle MCP runtime retirement",
+      () => !bundleMcpRuntimeTesting.getCachedSessionIds().includes("session:subagent:mcp-retire"),
+    );
+  });
+
  it("sessions_spawn runs cleanup via lifecycle events", async () => {
    let deletedKey: string | undefined;
    const ctx = setupSessionsSpawnGatewayMock({
--- a/src/agents/pi-bundle-mcp-runtime.test.ts
+++ b/src/agents/pi-bundle-mcp-runtime.test.ts
@@ -5,6 +5,7 @@ import {
  getOrCreateSessionMcpRuntime,
  materializeBundleMcpToolsForRun,
  retireSessionMcpRuntime,
+  retireSessionMcpRuntimeForSessionKey,
 } from "./pi-bundle-mcp-tools.js";
 import type { SessionMcpRuntime } from "./pi-bundle-mcp-types.js";

@@ -319,4 +320,25 @@ describe("session MCP runtime", () => {

    await expect(retireSessionMcpRuntime({ sessionId: " ", reason: "test" })).resolves.toBe(false);
  });
+
+  it("retires global session runtimes by session key", async () => {
+    await getOrCreateSessionMcpRuntime({
+      sessionId: "session-retire-key",
+      sessionKey: "agent:test:session-retire-key",
+      workspaceDir: "/workspace",
+    });
+    expect(__testing.getCachedSessionIds()).toContain("session-retire-key");
+
+    await expect(
+      retireSessionMcpRuntimeForSessionKey({
+        sessionKey: " agent:test:session-retire-key ",
+        reason: "test",
+      }),
+    ).resolves.toBe(true);
+    expect(__testing.getCachedSessionIds()).not.toContain("session-retire-key");
+
+    await expect(
+      retireSessionMcpRuntimeForSessionKey({ sessionKey: "agent:test:missing", reason: "test" }),
+    ).resolves.toBe(false);
+  });
 });
--- a/src/agents/pi-embedded-runner.e2e.test.ts
+++ b/src/agents/pi-embedded-runner.e2e.test.ts
@@ -111,6 +111,7 @@ const installRunEmbeddedMocks = () => {
  }));
  vi.doMock("./pi-bundle-mcp-tools.js", () => ({
    disposeSessionMcpRuntime: (sessionId: string) => disposeSessionMcpRuntimeMock(sessionId),
+    retireSessionMcpRuntimeForSessionKey: () => Promise.resolve(false),
    retireSessionMcpRuntime: ({ sessionId }: { sessionId?: string | null }) =>
      sessionId ? disposeSessionMcpRuntimeMock(sessionId) : Promise.resolve(false),
  }));
--- a/src/agents/subagent-registry-lifecycle.test.ts
+++ b/src/agents/subagent-registry-lifecycle.test.ts
@@ -27,6 +27,10 @@ const browserLifecycleCleanupMocks = vi.hoisted(() => ({
  cleanupBrowserSessionsForLifecycleEnd: vi.fn(async () => {}),
 }));

+const bundleMcpRuntimeMocks = vi.hoisted(() => ({
+  retireSessionMcpRuntimeForSessionKey: vi.fn(async () => true),
+}));
+
 vi.mock("../tasks/detached-task-runtime.js", () => ({
  completeTaskRunByRunId: taskExecutorMocks.completeTaskRunByRunId,
  failTaskRunByRunId: taskExecutorMocks.failTaskRunByRunId,
@@ -42,6 +46,10 @@ vi.mock("../browser-lifecycle-cleanup.js", () => ({
    browserLifecycleCleanupMocks.cleanupBrowserSessionsForLifecycleEnd,
 }));

+vi.mock("./pi-bundle-mcp-tools.js", () => ({
+  retireSessionMcpRuntimeForSessionKey: bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey,
+}));
+
 vi.mock("../runtime.js", () => ({
  defaultRuntime: {
    log: runtimeMocks.log,
@@ -120,6 +128,8 @@ describe("subagent registry lifecycle hardening", () => {
  beforeEach(() => {
    vi.clearAllMocks();
    browserLifecycleCleanupMocks.cleanupBrowserSessionsForLifecycleEnd.mockClear();
+    bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey.mockClear();
+    bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey.mockResolvedValue(true);
  });

  it("does not reject completion when task finalization throws", async () => {
@@ -233,6 +243,54 @@ describe("subagent registry lifecycle hardening", () => {
    );
  });

+  it("retires bundle MCP runtimes when run-mode cleanup completes", async () => {
+    const entry = createRunEntry({
+      endedAt: 4_000,
+      expectsCompletionMessage: false,
+      spawnMode: "run",
+    });
+
+    const controller = createLifecycleController({ entry });
+
+    await expect(
+      controller.completeSubagentRun({
+        runId: entry.runId,
+        endedAt: 4_000,
+        outcome: { status: "ok" },
+        reason: SUBAGENT_ENDED_REASON_COMPLETE,
+        triggerCleanup: true,
+      }),
+    ).resolves.toBeUndefined();
+
+    expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).toHaveBeenCalledWith({
+      sessionKey: entry.childSessionKey,
+      reason: "subagent-run-cleanup",
+      onError: expect.any(Function),
+    });
+  });
+
+  it("keeps bundle MCP runtimes warm for persistent session-mode cleanup", async () => {
+    const entry = createRunEntry({
+      endedAt: 4_000,
+      expectsCompletionMessage: false,
+      spawnMode: "session",
+    });
+
+    const controller = createLifecycleController({ entry });
+
+    await expect(
+      controller.completeSubagentRun({
+        runId: entry.runId,
+        endedAt: 4_000,
+        outcome: { status: "ok" },
+        reason: SUBAGENT_ENDED_REASON_COMPLETE,
+        triggerCleanup: true,
+      }),
+    ).resolves.toBeUndefined();
+
+    expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).not.toHaveBeenCalled();
+  });
+
  it("enriches registered-run outcomes with persisted timing before cleanup", async () => {
    const persist = vi.fn();
    const runSubagentAnnounceFlow = vi.fn(async () => true);
--- a/src/agents/subagent-spawn.test.ts
+++ b/src/agents/subagent-spawn.test.ts
@@ -161,6 +161,15 @@ describe("spawnSubagentDirect seam flow", () => {
      operations.indexOf("gateway:sessions.patch"),
    );
    expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update"));
+    expect(hoisted.callGatewayMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        method: "agent",
+        params: expect.objectContaining({
+          sessionKey: childSessionKey,
+          cleanupBundleMcpOnRunEnd: true,
+        }),
+      }),
+    );
  });

  it("omits requesterOrigin threadId when no requester thread is provided", async () => {
--- a/src/agents/tools/agent-step.test.ts
+++ b/src/agents/tools/agent-step.test.ts
@@ -0,0 +1,80 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import type { CallGatewayOptions } from "../../gateway/call.js";
+import { runAgentStep, __testing } from "./agent-step.js";
+
+const runWaitMocks = vi.hoisted(() => ({
+  waitForAgentRunAndReadUpdatedAssistantReply: vi.fn(),
+}));
+
+const bundleMcpRuntimeMocks = vi.hoisted(() => ({
+  retireSessionMcpRuntimeForSessionKey: vi.fn(async () => true),
+}));
+
+vi.mock("../run-wait.js", () => ({
+  waitForAgentRunAndReadUpdatedAssistantReply:
+    runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply,
+}));
+
+vi.mock("../pi-bundle-mcp-tools.js", () => ({
+  retireSessionMcpRuntimeForSessionKey: bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey,
+}));
+
+describe("runAgentStep", () => {
+  afterEach(() => {
+    __testing.setDepsForTest();
+    vi.clearAllMocks();
+  });
+
+  it("retires bundle MCP runtime after successful nested agent steps", async () => {
+    const gatewayCalls: CallGatewayOptions[] = [];
+    __testing.setDepsForTest({
+      callGateway: async <T = unknown>(opts: CallGatewayOptions): Promise<T> => {
+        gatewayCalls.push(opts);
+        return { runId: "run-nested" } as T;
+      },
+    });
+    runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply.mockResolvedValue({
+      status: "ok",
+      replyText: "done",
+    });
+
+    await expect(
+      runAgentStep({
+        sessionKey: "agent:main:subagent:child",
+        message: "hello",
+        extraSystemPrompt: "reply briefly",
+        timeoutMs: 10_000,
+      }),
+    ).resolves.toBe("done");
+
+    expect(gatewayCalls[0]?.params).toMatchObject({
+      sessionKey: "agent:main:subagent:child",
+      deliver: false,
+      lane: "nested:agent:main:subagent:child",
+    });
+    expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).toHaveBeenCalledWith({
+      sessionKey: "agent:main:subagent:child",
+      reason: "nested-agent-step-complete",
+    });
+  });
+
+  it("does not retire bundle MCP runtime while nested agent steps are still pending", async () => {
+    __testing.setDepsForTest({
+      callGateway: async <T = unknown>(): Promise<T> => ({ runId: "run-pending" }) as T,
+    });
+    runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply.mockResolvedValue({
+      status: "timeout",
+    });
+
+    await expect(
+      runAgentStep({
+        sessionKey: "agent:main:subagent:child",
+        message: "hello",
+        extraSystemPrompt: "reply briefly",
+        timeoutMs: 10_000,
+      }),
+    ).resolves.toBeUndefined();
+
+    expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).not.toHaveBeenCalled();
+  });
+});