diff --git a/CHANGELOG.md b/CHANGELOG.md index a07f3f00c37..73e7a9d457d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ Docs: https://docs.openclaw.ai - Agents/OpenAI websocket: route native OpenAI websocket metadata and session-header decisions through the shared endpoint classifier so local mocks and custom `models.providers.openai.baseUrl` endpoints stay out of the native OpenAI path consistently across embedded-runner and websocket transport code. Thanks @vincentkoc. - Cron/MCP: retire bundled MCP runtimes through one shared cleanup path for isolated cron run ends, persistent cron session rollover, and direct cron `deleteAfterRun` fallback cleanup. Fixes #69145, #68623, and #68827. - MCP/gateway: tear down stdio MCP process trees on transport close and dispose bundled MCP runtimes during session delete/reset, preventing orphaned wrapper/server processes from accumulating. Fixes #68809 and #69465. +- Agents/MCP: retire bundled MCP runtimes after completed one-shot subagent cleanup and nested `sessions_send` steps, while keeping persistent subagent sessions warm. - Config: render validation warnings with real line breaks instead of a literal `\n` sequence in CLI/audit output. Fixes #70140. - Cron/doctor: repair malformed persisted cron job IDs through `openclaw doctor`, including legacy `jobId`, non-string `id`, and missing `id` rows, so `cron list` no longer needs display-layer coercion for corrupt store data. Fixes #70128. - Discord: normalize prefixed channel targets only at the thread-binding API boundary, so `sessions_spawn({ runtime: "acp", thread: true })` can create child threads from Discord channels without breaking current-channel ACP bindings. (#68034) Thanks @Zetarcos. diff --git a/docs/help/testing.md b/docs/help/testing.md index 85c760cca62..6a1d19ca1b7 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -910,6 +910,7 @@ The live-model Docker runners also bind-mount only the needed CLI auth homes (or - Gateway networking (two containers, WS auth + health): `pnpm test:docker:gateway-network` (script: `scripts/e2e/gateway-network-docker.sh`) - MCP channel bridge (seeded Gateway + stdio bridge + raw Claude notification-frame smoke): `pnpm test:docker:mcp-channels` (script: `scripts/e2e/mcp-channels-docker.sh`) - Pi bundle MCP tools (real stdio MCP server + embedded Pi profile allow/deny smoke): `pnpm test:docker:pi-bundle-mcp-tools` (script: `scripts/e2e/pi-bundle-mcp-tools-docker.sh`) +- Cron/subagent MCP cleanup (real Gateway + stdio MCP child teardown after isolated cron and one-shot subagent runs): `pnpm test:docker:cron-mcp-cleanup` (script: `scripts/e2e/cron-mcp-cleanup-docker.sh`) - Plugins (install smoke + `/plugin` alias + Claude-bundle restart semantics): `pnpm test:docker:plugins` (script: `scripts/e2e/plugins-docker.sh`) - Bundled plugin runtime deps: `pnpm test:docker:bundled-channel-deps` builds a small Docker runner image by default, builds and packs OpenClaw once on the host, then mounts that tarball into each Linux install scenario. Reuse the image with `OPENCLAW_SKIP_DOCKER_BUILD=1`, skip the host rebuild after a fresh local build with `OPENCLAW_BUNDLED_CHANNEL_HOST_BUILD=0`, or point at an existing tarball with `OPENCLAW_BUNDLED_CHANNEL_PACKAGE_TGZ=/path/to/openclaw-*.tgz`. - Narrow bundled plugin runtime deps while iterating by disabling unrelated scenarios, for example: @@ -951,6 +952,10 @@ model key. It builds the repo Docker image, starts a real stdio MCP probe server inside the container, materializes that server through the embedded Pi bundle MCP runtime, executes the tool, then verifies `coding` and `messaging` keep `bundle-mcp` tools while `minimal` and `tools.deny: ["bundle-mcp"]` filter them. +`test:docker:cron-mcp-cleanup` is deterministic and does not need a live model +key. It starts a seeded Gateway with a real stdio MCP probe server, runs an +isolated cron turn and a `/subagents spawn` one-shot child turn, then verifies +the MCP child process exits after each run. Manual ACP plain-language thread smoke (not CI): diff --git a/scripts/e2e/cron-mcp-cleanup-docker-client.ts b/scripts/e2e/cron-mcp-cleanup-docker-client.ts index 8c062d16972..f4ed3e49fb2 100644 --- a/scripts/e2e/cron-mcp-cleanup-docker-client.ts +++ b/scripts/e2e/cron-mcp-cleanup-docker-client.ts @@ -1,15 +1,17 @@ import { execFile } from "node:child_process"; +import { randomUUID } from "node:crypto"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { setTimeout as delay } from "node:timers/promises"; import { promisify } from "node:util"; -import { assert, connectGateway, waitFor } from "./mcp-channels-harness.ts"; +import { assert, connectGateway, type GatewayRpcClient, waitFor } from "./mcp-channels-harness.ts"; const execFileAsync = promisify(execFile); type CronJob = { id?: string }; type CronRunResult = { ok?: boolean; enqueued?: boolean; runId?: string }; +type AgentRunResult = { runId?: string; status?: string }; async function readProbePid(pidPath: string): Promise { try { @@ -21,6 +23,25 @@ async function readProbePid(pidPath: string): Promise { } } +async function readProbePids(pidsPath: string): Promise { + try { + const raw = await fs.readFile(pidsPath, "utf-8"); + const pids: number[] = []; + const seen = new Set(); + for (const line of raw.split(/\r?\n/)) { + const pid = Number.parseInt(line.trim(), 10); + if (!Number.isInteger(pid) || pid <= 0 || seen.has(pid)) { + continue; + } + seen.add(pid); + pids.push(pid); + } + return pids; + } catch { + return []; + } +} + async function describeProbePid(pid: number): Promise { try { const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "args="]); @@ -43,9 +64,14 @@ async function waitForProbePid(pidPath: string): Promise { return undefined; } -async function waitForProbeExit(pid: number): Promise { +async function waitForProbeExit(params: { + pid: number; + label: string; + timeoutMs?: number; +}): Promise { + const { pid, label, timeoutMs = 30_000 } = params; const startedAt = Date.now(); - while (Date.now() - startedAt < 30_000) { + while (Date.now() - startedAt < timeoutMs) { const args = await describeProbePid(pid); if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) { return; @@ -53,7 +79,157 @@ async function waitForProbeExit(pid: number): Promise { await delay(100); } const args = await describeProbePid(pid); - throw new Error(`cron MCP probe process still alive after run: pid=${pid} args=${args}`); + throw new Error(`${label} MCP probe process still alive after run: pid=${pid} args=${args}`); +} + +async function waitForAnyProbeExit(params: { + pidsPath: string; + label: string; + timeoutMs: number; +}): Promise { + const startedAt = Date.now(); + let observed: number[] = []; + while (Date.now() - startedAt < params.timeoutMs) { + observed = await readProbePids(params.pidsPath); + for (const pid of observed) { + const args = await describeProbePid(pid); + if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) { + return pid; + } + } + await delay(100); + } + const descriptions = await Promise.all( + observed.map(async (pid) => ({ pid, args: await describeProbePid(pid) })), + ); + throw new Error( + `${params.label} MCP probe processes still alive after run: ${JSON.stringify(descriptions)}`, + ); +} + +async function resetProbeFiles(params: { + pidPath: string; + pidsPath: string; + exitPath: string; +}): Promise { + await fs.rm(params.pidPath, { force: true }); + await fs.rm(params.pidsPath, { force: true }); + await fs.rm(params.exitPath, { force: true }); +} + +async function runCronCleanupScenario(params: { + gateway: GatewayRpcClient; + pidPath: string; +}): Promise<{ jobId: string; runId?: string; pid: number; status?: unknown }> { + const { gateway, pidPath } = params; + const job = await gateway.request("cron.add", { + name: "cron mcp cleanup docker e2e", + enabled: true, + schedule: { kind: "every", everyMs: 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { + kind: "agentTurn", + message: "Use available context and then stop.", + timeoutSeconds: 12, + lightContext: true, + }, + delivery: { mode: "none" }, + }); + assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`); + + const run = await gateway.request("cron.run", { + id: job.id, + mode: "force", + }); + assert( + run.ok === true && run.enqueued === true, + `cron.run was not enqueued: ${JSON.stringify(run)}`, + ); + + const started = await waitFor( + "cron started event", + () => + gateway.events.find( + (entry) => + entry.event === "cron" && + entry.payload.jobId === job.id && + entry.payload.action === "started", + )?.payload, + 60_000, + ); + assert(started, "missing cron started event"); + + const pid = await waitForProbePid(pidPath); + assert( + pid, + `cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify( + gateway.events.slice(-10), + )}`, + ); + const initialArgs = await describeProbePid(pid); + assert( + initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"), + `cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`, + ); + + const finished = await waitFor( + "cron finished event", + () => + gateway.events.find( + (entry) => + entry.event === "cron" && + entry.payload.jobId === job.id && + entry.payload.action === "finished", + )?.payload, + 90_000, + ); + assert(finished, "missing cron finished event"); + + await waitForProbeExit({ pid, label: "cron" }); + return { + jobId: job.id, + runId: run.runId, + pid, + status: finished.status, + }; +} + +async function runSubagentCleanupScenario(params: { + gateway: GatewayRpcClient; + pidPath: string; + pidsPath: string; + exitPath: string; +}): Promise<{ runId: string; exitedPid: number; pids: number[] }> { + const { gateway, pidPath, pidsPath, exitPath } = params; + await resetProbeFiles({ pidPath, pidsPath, exitPath }); + + const run = await gateway.request("agent", { + message: "Use available context and then stop.", + sessionKey: `agent:main:subagent:docker-${randomUUID()}`, + agentId: "main", + lane: "subagent", + cleanupBundleMcpOnRunEnd: true, + idempotencyKey: randomUUID(), + deliver: false, + timeout: 20, + bestEffortDeliver: true, + }); + assert( + run.status === "accepted" && run.runId, + `agent did not accept subagent cleanup run: ${JSON.stringify(run)}`, + ); + + const exitedPid = await waitForAnyProbeExit({ + pidsPath, + label: "subagent", + timeoutMs: 90_000, + }); + return { + runId: run.runId, + exitedPid, + pids: await readProbePids(pidsPath), + }; } async function main() { @@ -61,83 +237,20 @@ async function main() { const gatewayToken = process.env.GW_TOKEN?.trim(); const stateDir = process.env.OPENCLAW_STATE_DIR?.trim() || path.join(os.homedir(), ".openclaw"); const pidPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pid"); + const pidsPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pids"); + const exitPath = path.join(stateDir, "cron-mcp-cleanup", "probe.exit"); assert(gatewayUrl, "missing GW_URL"); assert(gatewayToken, "missing GW_TOKEN"); const gateway = await connectGateway({ url: gatewayUrl, token: gatewayToken }); try { - const job = await gateway.request("cron.add", { - name: "cron mcp cleanup docker e2e", - enabled: true, - schedule: { kind: "every", everyMs: 60_000 }, - sessionTarget: "isolated", - wakeMode: "next-heartbeat", - payload: { - kind: "agentTurn", - message: "Use available context and then stop.", - timeoutSeconds: 12, - lightContext: true, - }, - delivery: { mode: "none" }, - }); - assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`); - - const run = await gateway.request("cron.run", { - id: job.id, - mode: "force", - }); - assert( - run.ok === true && run.enqueued === true, - `cron.run was not enqueued: ${JSON.stringify(run)}`, - ); - - const started = await waitFor( - "cron started event", - () => - gateway.events.find( - (entry) => - entry.event === "cron" && - entry.payload.jobId === job.id && - entry.payload.action === "started", - )?.payload, - 60_000, - ); - assert(started, "missing cron started event"); - - const pid = await waitForProbePid(pidPath); - assert( - pid, - `cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify( - gateway.events.slice(-10), - )}`, - ); - const initialArgs = await describeProbePid(pid); - assert( - initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"), - `cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`, - ); - - const finished = await waitFor( - "cron finished event", - () => - gateway.events.find( - (entry) => - entry.event === "cron" && - entry.payload.jobId === job.id && - entry.payload.action === "finished", - )?.payload, - 90_000, - ); - assert(finished, "missing cron finished event"); - - await waitForProbeExit(pid); + const cron = await runCronCleanupScenario({ gateway, pidPath }); + const subagent = await runSubagentCleanupScenario({ gateway, pidPath, pidsPath, exitPath }); process.stdout.write( JSON.stringify({ ok: true, - jobId: job.id, - runId: run.runId, - pid, - status: finished.status, + cron, + subagent, }) + "\n", ); } finally { diff --git a/scripts/e2e/cron-mcp-cleanup-docker.sh b/scripts/e2e/cron-mcp-cleanup-docker.sh index b5e3f99dfde..d3924471751 100644 --- a/scripts/e2e/cron-mcp-cleanup-docker.sh +++ b/scripts/e2e/cron-mcp-cleanup-docker.sh @@ -18,7 +18,7 @@ trap cleanup EXIT echo "Building Docker image..." run_logged cron-mcp-cleanup-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" -echo "Running in-container cron MCP cleanup smoke..." +echo "Running in-container cron/subagent MCP cleanup smoke..." set +e docker run --rm \ --name "$CONTAINER_NAME" \ @@ -81,7 +81,7 @@ status=${PIPESTATUS[0]} set -e if [ "$status" -ne 0 ]; then - echo "Docker cron MCP cleanup smoke failed" + echo "Docker cron/subagent MCP cleanup smoke failed" cat "$CLIENT_LOG" exit "$status" fi diff --git a/scripts/e2e/cron-mcp-cleanup-seed.ts b/scripts/e2e/cron-mcp-cleanup-seed.ts index e6c4383cc2e..50bbaee20a9 100644 --- a/scripts/e2e/cron-mcp-cleanup-seed.ts +++ b/scripts/e2e/cron-mcp-cleanup-seed.ts @@ -27,7 +27,12 @@ const DOCKER_OPENAI_MODEL: ModelDefinitionConfig = { maxTokens: 128_000, }; -async function writeProbeServer(params: { serverPath: string; pidPath: string; exitPath: string }) { +async function writeProbeServer(params: { + serverPath: string; + pidPath: string; + pidsPath: string; + exitPath: string; +}) { const sdkMcpServerPath = require.resolve("@modelcontextprotocol/sdk/server/mcp.js"); const sdkStdioServerPath = require.resolve("@modelcontextprotocol/sdk/server/stdio.js"); await fs.writeFile( @@ -41,6 +46,7 @@ import { StdioServerTransport } from ${JSON.stringify(sdkStdioServerPath)}; process.title = "openclaw-cron-mcp-cleanup-probe"; await fsp.mkdir(${JSON.stringify(path.dirname(params.pidPath))}, { recursive: true }); await fsp.writeFile(${JSON.stringify(params.pidPath)}, String(process.pid), "utf8"); +await fsp.appendFile(${JSON.stringify(params.pidsPath)}, String(process.pid) + "\\n", "utf8"); process.once("exit", () => { try { fs.writeFileSync(${JSON.stringify(params.exitPath)}, "exited", "utf8"); @@ -72,13 +78,15 @@ async function main() { const probeDir = path.join(stateDir, "cron-mcp-cleanup"); const serverPath = path.join(probeDir, "probe-server.mjs"); const pidPath = path.join(probeDir, "probe.pid"); + const pidsPath = path.join(probeDir, "probe.pids"); const exitPath = path.join(probeDir, "probe.exit"); await fs.mkdir(probeDir, { recursive: true }); await fs.mkdir(path.dirname(configPath), { recursive: true }); await fs.rm(pidPath, { force: true }); + await fs.rm(pidsPath, { force: true }); await fs.rm(exitPath, { force: true }); - await writeProbeServer({ serverPath, pidPath, exitPath }); + await writeProbeServer({ serverPath, pidPath, pidsPath, exitPath }); const seededConfig = applyProviderConfigWithDefaultModelPreset( { @@ -91,6 +99,20 @@ async function main() { cron: { enabled: false, }, + agents: { + defaults: { + subagents: { + runTimeoutSeconds: 8, + }, + }, + }, + tools: { + subagents: { + tools: { + alsoAllow: ["bundle-mcp"], + }, + }, + }, mcp: { servers: { cronCleanupProbe: { @@ -126,6 +148,7 @@ async function main() { configPath, serverPath, pidPath, + pidsPath, exitPath, }) + "\n", ); diff --git a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts index 948f69fc957..9da67a2d69b 100644 --- a/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts +++ b/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts @@ -9,10 +9,15 @@ import { resetSessionsSpawnConfigOverride, resetSessionsSpawnHookRunnerOverride, setSessionsSpawnHookRunnerOverride, + setSessionsSpawnAnnounceFlowOverride, setupSessionsSpawnGatewayMock, setSessionsSpawnConfigOverride, waitForSessionsSpawnEvent, } from "./openclaw-tools.subagents.sessions-spawn.test-harness.js"; +import { + __testing as bundleMcpRuntimeTesting, + getOrCreateSessionMcpRuntime, +} from "./pi-bundle-mcp-tools.js"; import { getLatestSubagentRunByChildSessionKey, resetSubagentRegistryForTests, @@ -151,7 +156,8 @@ async function waitForRunCleanup(childSessionKey: string) { } describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => { - beforeEach(() => { + beforeEach(async () => { + await bundleMcpRuntimeTesting.resetSessionMcpRuntimeManager(); resetSessionsSpawnAnnounceFlowOverride(); resetSessionsSpawnHookRunnerOverride(); resetSessionsSpawnConfigOverride(); @@ -182,11 +188,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => { callGatewayMock.mockClear(); }); - afterEach(() => { + afterEach(async () => { resetSessionsSpawnAnnounceFlowOverride(); resetSessionsSpawnHookRunnerOverride(); resetSessionsSpawnConfigOverride(); resetSubagentRegistryForTests({ persist: false }); + await bundleMcpRuntimeTesting.resetSessionMcpRuntimeManager(); }); afterAll(() => { @@ -262,6 +269,56 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => { expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true); }); + it("sessions_spawn retires bundle MCP runtime when run-mode cleanup completes", async () => { + let resumeAnnounceFlow: ((value: boolean) => void) | undefined; + let announceFlowStarted: (() => void) | undefined; + const announceFlowStartedPromise = new Promise((resolve) => { + announceFlowStarted = resolve; + }); + const announceFlowGate = new Promise((resolve) => { + resumeAnnounceFlow = resolve; + }); + setSessionsSpawnAnnounceFlowOverride(async () => { + announceFlowStarted?.(); + return await announceFlowGate; + }); + const ctx = setupSessionsSpawnGatewayMock({ + includeChatHistory: true, + agentWaitResult: { status: "ok", startedAt: 3000, endedAt: 4000 }, + }); + + const tool = await getSessionsSpawnTool({ + agentSessionKey: "main", + agentChannel: "whatsapp", + }); + + await executeSpawnAndExpectAccepted({ + tool, + callId: "call-mcp-retire", + cleanup: "keep", + }); + + await announceFlowStartedPromise; + const child = ctx.getChild(); + if (!child.sessionKey) { + throw new Error("missing child sessionKey"); + } + await getOrCreateSessionMcpRuntime({ + sessionId: "session:subagent:mcp-retire", + sessionKey: child.sessionKey, + workspaceDir: "/tmp/openclaw-subagent-mcp-retire", + cfg: { mcp: { servers: {} } } as Parameters[0]["cfg"], + }); + expect(bundleMcpRuntimeTesting.getCachedSessionIds()).toContain("session:subagent:mcp-retire"); + + resumeAnnounceFlow?.(true); + await waitForRunCleanup(child.sessionKey); + await waitForSessionsSpawnEvent( + "bundle MCP runtime retirement", + () => !bundleMcpRuntimeTesting.getCachedSessionIds().includes("session:subagent:mcp-retire"), + ); + }); + it("sessions_spawn runs cleanup via lifecycle events", async () => { let deletedKey: string | undefined; const ctx = setupSessionsSpawnGatewayMock({ diff --git a/src/agents/pi-bundle-mcp-runtime.test.ts b/src/agents/pi-bundle-mcp-runtime.test.ts index 173259e73f7..456236ac78d 100644 --- a/src/agents/pi-bundle-mcp-runtime.test.ts +++ b/src/agents/pi-bundle-mcp-runtime.test.ts @@ -5,6 +5,7 @@ import { getOrCreateSessionMcpRuntime, materializeBundleMcpToolsForRun, retireSessionMcpRuntime, + retireSessionMcpRuntimeForSessionKey, } from "./pi-bundle-mcp-tools.js"; import type { SessionMcpRuntime } from "./pi-bundle-mcp-types.js"; @@ -319,4 +320,25 @@ describe("session MCP runtime", () => { await expect(retireSessionMcpRuntime({ sessionId: " ", reason: "test" })).resolves.toBe(false); }); + + it("retires global session runtimes by session key", async () => { + await getOrCreateSessionMcpRuntime({ + sessionId: "session-retire-key", + sessionKey: "agent:test:session-retire-key", + workspaceDir: "/workspace", + }); + expect(__testing.getCachedSessionIds()).toContain("session-retire-key"); + + await expect( + retireSessionMcpRuntimeForSessionKey({ + sessionKey: " agent:test:session-retire-key ", + reason: "test", + }), + ).resolves.toBe(true); + expect(__testing.getCachedSessionIds()).not.toContain("session-retire-key"); + + await expect( + retireSessionMcpRuntimeForSessionKey({ sessionKey: "agent:test:missing", reason: "test" }), + ).resolves.toBe(false); + }); }); diff --git a/src/agents/pi-embedded-runner.e2e.test.ts b/src/agents/pi-embedded-runner.e2e.test.ts index 8a22f19c428..3274dde08a0 100644 --- a/src/agents/pi-embedded-runner.e2e.test.ts +++ b/src/agents/pi-embedded-runner.e2e.test.ts @@ -111,6 +111,7 @@ const installRunEmbeddedMocks = () => { })); vi.doMock("./pi-bundle-mcp-tools.js", () => ({ disposeSessionMcpRuntime: (sessionId: string) => disposeSessionMcpRuntimeMock(sessionId), + retireSessionMcpRuntimeForSessionKey: () => Promise.resolve(false), retireSessionMcpRuntime: ({ sessionId }: { sessionId?: string | null }) => sessionId ? disposeSessionMcpRuntimeMock(sessionId) : Promise.resolve(false), })); diff --git a/src/agents/subagent-registry-lifecycle.test.ts b/src/agents/subagent-registry-lifecycle.test.ts index 33ad78e7aa7..07951c68dd0 100644 --- a/src/agents/subagent-registry-lifecycle.test.ts +++ b/src/agents/subagent-registry-lifecycle.test.ts @@ -27,6 +27,10 @@ const browserLifecycleCleanupMocks = vi.hoisted(() => ({ cleanupBrowserSessionsForLifecycleEnd: vi.fn(async () => {}), })); +const bundleMcpRuntimeMocks = vi.hoisted(() => ({ + retireSessionMcpRuntimeForSessionKey: vi.fn(async () => true), +})); + vi.mock("../tasks/detached-task-runtime.js", () => ({ completeTaskRunByRunId: taskExecutorMocks.completeTaskRunByRunId, failTaskRunByRunId: taskExecutorMocks.failTaskRunByRunId, @@ -42,6 +46,10 @@ vi.mock("../browser-lifecycle-cleanup.js", () => ({ browserLifecycleCleanupMocks.cleanupBrowserSessionsForLifecycleEnd, })); +vi.mock("./pi-bundle-mcp-tools.js", () => ({ + retireSessionMcpRuntimeForSessionKey: bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey, +})); + vi.mock("../runtime.js", () => ({ defaultRuntime: { log: runtimeMocks.log, @@ -120,6 +128,8 @@ describe("subagent registry lifecycle hardening", () => { beforeEach(() => { vi.clearAllMocks(); browserLifecycleCleanupMocks.cleanupBrowserSessionsForLifecycleEnd.mockClear(); + bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey.mockClear(); + bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey.mockResolvedValue(true); }); it("does not reject completion when task finalization throws", async () => { @@ -233,6 +243,54 @@ describe("subagent registry lifecycle hardening", () => { ); }); + it("retires bundle MCP runtimes when run-mode cleanup completes", async () => { + const entry = createRunEntry({ + endedAt: 4_000, + expectsCompletionMessage: false, + spawnMode: "run", + }); + + const controller = createLifecycleController({ entry }); + + await expect( + controller.completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: true, + }), + ).resolves.toBeUndefined(); + + expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).toHaveBeenCalledWith({ + sessionKey: entry.childSessionKey, + reason: "subagent-run-cleanup", + onError: expect.any(Function), + }); + }); + + it("keeps bundle MCP runtimes warm for persistent session-mode cleanup", async () => { + const entry = createRunEntry({ + endedAt: 4_000, + expectsCompletionMessage: false, + spawnMode: "session", + }); + + const controller = createLifecycleController({ entry }); + + await expect( + controller.completeSubagentRun({ + runId: entry.runId, + endedAt: 4_000, + outcome: { status: "ok" }, + reason: SUBAGENT_ENDED_REASON_COMPLETE, + triggerCleanup: true, + }), + ).resolves.toBeUndefined(); + + expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).not.toHaveBeenCalled(); + }); + it("enriches registered-run outcomes with persisted timing before cleanup", async () => { const persist = vi.fn(); const runSubagentAnnounceFlow = vi.fn(async () => true); diff --git a/src/agents/subagent-spawn.test.ts b/src/agents/subagent-spawn.test.ts index d64ca11b7c5..97f1d92ae90 100644 --- a/src/agents/subagent-spawn.test.ts +++ b/src/agents/subagent-spawn.test.ts @@ -161,6 +161,15 @@ describe("spawnSubagentDirect seam flow", () => { operations.indexOf("gateway:sessions.patch"), ); expect(operations.indexOf("gateway:agent")).toBeGreaterThan(operations.indexOf("store:update")); + expect(hoisted.callGatewayMock).toHaveBeenCalledWith( + expect.objectContaining({ + method: "agent", + params: expect.objectContaining({ + sessionKey: childSessionKey, + cleanupBundleMcpOnRunEnd: true, + }), + }), + ); }); it("omits requesterOrigin threadId when no requester thread is provided", async () => { diff --git a/src/agents/tools/agent-step.test.ts b/src/agents/tools/agent-step.test.ts new file mode 100644 index 00000000000..ceb2124273b --- /dev/null +++ b/src/agents/tools/agent-step.test.ts @@ -0,0 +1,80 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import type { CallGatewayOptions } from "../../gateway/call.js"; +import { runAgentStep, __testing } from "./agent-step.js"; + +const runWaitMocks = vi.hoisted(() => ({ + waitForAgentRunAndReadUpdatedAssistantReply: vi.fn(), +})); + +const bundleMcpRuntimeMocks = vi.hoisted(() => ({ + retireSessionMcpRuntimeForSessionKey: vi.fn(async () => true), +})); + +vi.mock("../run-wait.js", () => ({ + waitForAgentRunAndReadUpdatedAssistantReply: + runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply, +})); + +vi.mock("../pi-bundle-mcp-tools.js", () => ({ + retireSessionMcpRuntimeForSessionKey: bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey, +})); + +describe("runAgentStep", () => { + afterEach(() => { + __testing.setDepsForTest(); + vi.clearAllMocks(); + }); + + it("retires bundle MCP runtime after successful nested agent steps", async () => { + const gatewayCalls: CallGatewayOptions[] = []; + __testing.setDepsForTest({ + callGateway: async (opts: CallGatewayOptions): Promise => { + gatewayCalls.push(opts); + return { runId: "run-nested" } as T; + }, + }); + runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply.mockResolvedValue({ + status: "ok", + replyText: "done", + }); + + await expect( + runAgentStep({ + sessionKey: "agent:main:subagent:child", + message: "hello", + extraSystemPrompt: "reply briefly", + timeoutMs: 10_000, + }), + ).resolves.toBe("done"); + + expect(gatewayCalls[0]?.params).toMatchObject({ + sessionKey: "agent:main:subagent:child", + deliver: false, + lane: "nested:agent:main:subagent:child", + }); + expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).toHaveBeenCalledWith({ + sessionKey: "agent:main:subagent:child", + reason: "nested-agent-step-complete", + }); + }); + + it("does not retire bundle MCP runtime while nested agent steps are still pending", async () => { + __testing.setDepsForTest({ + callGateway: async (): Promise => ({ runId: "run-pending" }) as T, + }); + runWaitMocks.waitForAgentRunAndReadUpdatedAssistantReply.mockResolvedValue({ + status: "timeout", + }); + + await expect( + runAgentStep({ + sessionKey: "agent:main:subagent:child", + message: "hello", + extraSystemPrompt: "reply briefly", + timeoutMs: 10_000, + }), + ).resolves.toBeUndefined(); + + expect(bundleMcpRuntimeMocks.retireSessionMcpRuntimeForSessionKey).not.toHaveBeenCalled(); + }); +});