test: cover docker MCP cleanup for subagents

This commit is contained in:
Peter Steinberger
2026-04-23 02:45:59 +01:00
parent ccf2e77e8d
commit 1489febee9
11 changed files with 448 additions and 79 deletions

View File

@@ -1,15 +1,17 @@
import { execFile } from "node:child_process";
import { randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { setTimeout as delay } from "node:timers/promises";
import { promisify } from "node:util";
import { assert, connectGateway, waitFor } from "./mcp-channels-harness.ts";
import { assert, connectGateway, type GatewayRpcClient, waitFor } from "./mcp-channels-harness.ts";
const execFileAsync = promisify(execFile);
type CronJob = { id?: string };
type CronRunResult = { ok?: boolean; enqueued?: boolean; runId?: string };
type AgentRunResult = { runId?: string; status?: string };
async function readProbePid(pidPath: string): Promise<number | undefined> {
try {
@@ -21,6 +23,25 @@ async function readProbePid(pidPath: string): Promise<number | undefined> {
}
}
async function readProbePids(pidsPath: string): Promise<number[]> {
try {
const raw = await fs.readFile(pidsPath, "utf-8");
const pids: number[] = [];
const seen = new Set<number>();
for (const line of raw.split(/\r?\n/)) {
const pid = Number.parseInt(line.trim(), 10);
if (!Number.isInteger(pid) || pid <= 0 || seen.has(pid)) {
continue;
}
seen.add(pid);
pids.push(pid);
}
return pids;
} catch {
return [];
}
}
async function describeProbePid(pid: number): Promise<string | undefined> {
try {
const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "args="]);
@@ -43,9 +64,14 @@ async function waitForProbePid(pidPath: string): Promise<number | undefined> {
return undefined;
}
async function waitForProbeExit(pid: number): Promise<void> {
async function waitForProbeExit(params: {
pid: number;
label: string;
timeoutMs?: number;
}): Promise<void> {
const { pid, label, timeoutMs = 30_000 } = params;
const startedAt = Date.now();
while (Date.now() - startedAt < 30_000) {
while (Date.now() - startedAt < timeoutMs) {
const args = await describeProbePid(pid);
if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) {
return;
@@ -53,7 +79,157 @@ async function waitForProbeExit(pid: number): Promise<void> {
await delay(100);
}
const args = await describeProbePid(pid);
throw new Error(`cron MCP probe process still alive after run: pid=${pid} args=${args}`);
throw new Error(`${label} MCP probe process still alive after run: pid=${pid} args=${args}`);
}
async function waitForAnyProbeExit(params: {
pidsPath: string;
label: string;
timeoutMs: number;
}): Promise<number> {
const startedAt = Date.now();
let observed: number[] = [];
while (Date.now() - startedAt < params.timeoutMs) {
observed = await readProbePids(params.pidsPath);
for (const pid of observed) {
const args = await describeProbePid(pid);
if (!args || !args.includes("openclaw-cron-mcp-cleanup-probe")) {
return pid;
}
}
await delay(100);
}
const descriptions = await Promise.all(
observed.map(async (pid) => ({ pid, args: await describeProbePid(pid) })),
);
throw new Error(
`${params.label} MCP probe processes still alive after run: ${JSON.stringify(descriptions)}`,
);
}
async function resetProbeFiles(params: {
pidPath: string;
pidsPath: string;
exitPath: string;
}): Promise<void> {
await fs.rm(params.pidPath, { force: true });
await fs.rm(params.pidsPath, { force: true });
await fs.rm(params.exitPath, { force: true });
}
async function runCronCleanupScenario(params: {
gateway: GatewayRpcClient;
pidPath: string;
}): Promise<{ jobId: string; runId?: string; pid: number; status?: unknown }> {
const { gateway, pidPath } = params;
const job = await gateway.request<CronJob>("cron.add", {
name: "cron mcp cleanup docker e2e",
enabled: true,
schedule: { kind: "every", everyMs: 60_000 },
sessionTarget: "isolated",
wakeMode: "next-heartbeat",
payload: {
kind: "agentTurn",
message: "Use available context and then stop.",
timeoutSeconds: 12,
lightContext: true,
},
delivery: { mode: "none" },
});
assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`);
const run = await gateway.request<CronRunResult>("cron.run", {
id: job.id,
mode: "force",
});
assert(
run.ok === true && run.enqueued === true,
`cron.run was not enqueued: ${JSON.stringify(run)}`,
);
const started = await waitFor(
"cron started event",
() =>
gateway.events.find(
(entry) =>
entry.event === "cron" &&
entry.payload.jobId === job.id &&
entry.payload.action === "started",
)?.payload,
60_000,
);
assert(started, "missing cron started event");
const pid = await waitForProbePid(pidPath);
assert(
pid,
`cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify(
gateway.events.slice(-10),
)}`,
);
const initialArgs = await describeProbePid(pid);
assert(
initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"),
`cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`,
);
const finished = await waitFor(
"cron finished event",
() =>
gateway.events.find(
(entry) =>
entry.event === "cron" &&
entry.payload.jobId === job.id &&
entry.payload.action === "finished",
)?.payload,
90_000,
);
assert(finished, "missing cron finished event");
await waitForProbeExit({ pid, label: "cron" });
return {
jobId: job.id,
runId: run.runId,
pid,
status: finished.status,
};
}
async function runSubagentCleanupScenario(params: {
gateway: GatewayRpcClient;
pidPath: string;
pidsPath: string;
exitPath: string;
}): Promise<{ runId: string; exitedPid: number; pids: number[] }> {
const { gateway, pidPath, pidsPath, exitPath } = params;
await resetProbeFiles({ pidPath, pidsPath, exitPath });
const run = await gateway.request<AgentRunResult>("agent", {
message: "Use available context and then stop.",
sessionKey: `agent:main:subagent:docker-${randomUUID()}`,
agentId: "main",
lane: "subagent",
cleanupBundleMcpOnRunEnd: true,
idempotencyKey: randomUUID(),
deliver: false,
timeout: 20,
bestEffortDeliver: true,
});
assert(
run.status === "accepted" && run.runId,
`agent did not accept subagent cleanup run: ${JSON.stringify(run)}`,
);
const exitedPid = await waitForAnyProbeExit({
pidsPath,
label: "subagent",
timeoutMs: 90_000,
});
return {
runId: run.runId,
exitedPid,
pids: await readProbePids(pidsPath),
};
}
async function main() {
@@ -61,83 +237,20 @@ async function main() {
const gatewayToken = process.env.GW_TOKEN?.trim();
const stateDir = process.env.OPENCLAW_STATE_DIR?.trim() || path.join(os.homedir(), ".openclaw");
const pidPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pid");
const pidsPath = path.join(stateDir, "cron-mcp-cleanup", "probe.pids");
const exitPath = path.join(stateDir, "cron-mcp-cleanup", "probe.exit");
assert(gatewayUrl, "missing GW_URL");
assert(gatewayToken, "missing GW_TOKEN");
const gateway = await connectGateway({ url: gatewayUrl, token: gatewayToken });
try {
const job = await gateway.request<CronJob>("cron.add", {
name: "cron mcp cleanup docker e2e",
enabled: true,
schedule: { kind: "every", everyMs: 60_000 },
sessionTarget: "isolated",
wakeMode: "next-heartbeat",
payload: {
kind: "agentTurn",
message: "Use available context and then stop.",
timeoutSeconds: 12,
lightContext: true,
},
delivery: { mode: "none" },
});
assert(job.id, `cron.add did not return an id: ${JSON.stringify(job)}`);
const run = await gateway.request<CronRunResult>("cron.run", {
id: job.id,
mode: "force",
});
assert(
run.ok === true && run.enqueued === true,
`cron.run was not enqueued: ${JSON.stringify(run)}`,
);
const started = await waitFor(
"cron started event",
() =>
gateway.events.find(
(entry) =>
entry.event === "cron" &&
entry.payload.jobId === job.id &&
entry.payload.action === "started",
)?.payload,
60_000,
);
assert(started, "missing cron started event");
const pid = await waitForProbePid(pidPath);
assert(
pid,
`cron MCP probe did not start; missing pid file at ${pidPath}; events=${JSON.stringify(
gateway.events.slice(-10),
)}`,
);
const initialArgs = await describeProbePid(pid);
assert(
initialArgs?.includes("openclaw-cron-mcp-cleanup-probe"),
`cron MCP probe pid did not look like the test server: pid=${pid} args=${initialArgs}`,
);
const finished = await waitFor(
"cron finished event",
() =>
gateway.events.find(
(entry) =>
entry.event === "cron" &&
entry.payload.jobId === job.id &&
entry.payload.action === "finished",
)?.payload,
90_000,
);
assert(finished, "missing cron finished event");
await waitForProbeExit(pid);
const cron = await runCronCleanupScenario({ gateway, pidPath });
const subagent = await runSubagentCleanupScenario({ gateway, pidPath, pidsPath, exitPath });
process.stdout.write(
JSON.stringify({
ok: true,
jobId: job.id,
runId: run.runId,
pid,
status: finished.status,
cron,
subagent,
}) + "\n",
);
} finally {

View File

@@ -18,7 +18,7 @@ trap cleanup EXIT
echo "Building Docker image..."
run_logged cron-mcp-cleanup-build docker build -t "$IMAGE_NAME" -f "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR"
echo "Running in-container cron MCP cleanup smoke..."
echo "Running in-container cron/subagent MCP cleanup smoke..."
set +e
docker run --rm \
--name "$CONTAINER_NAME" \
@@ -81,7 +81,7 @@ status=${PIPESTATUS[0]}
set -e
if [ "$status" -ne 0 ]; then
echo "Docker cron MCP cleanup smoke failed"
echo "Docker cron/subagent MCP cleanup smoke failed"
cat "$CLIENT_LOG"
exit "$status"
fi

View File

@@ -27,7 +27,12 @@ const DOCKER_OPENAI_MODEL: ModelDefinitionConfig = {
maxTokens: 128_000,
};
async function writeProbeServer(params: { serverPath: string; pidPath: string; exitPath: string }) {
async function writeProbeServer(params: {
serverPath: string;
pidPath: string;
pidsPath: string;
exitPath: string;
}) {
const sdkMcpServerPath = require.resolve("@modelcontextprotocol/sdk/server/mcp.js");
const sdkStdioServerPath = require.resolve("@modelcontextprotocol/sdk/server/stdio.js");
await fs.writeFile(
@@ -41,6 +46,7 @@ import { StdioServerTransport } from ${JSON.stringify(sdkStdioServerPath)};
process.title = "openclaw-cron-mcp-cleanup-probe";
await fsp.mkdir(${JSON.stringify(path.dirname(params.pidPath))}, { recursive: true });
await fsp.writeFile(${JSON.stringify(params.pidPath)}, String(process.pid), "utf8");
await fsp.appendFile(${JSON.stringify(params.pidsPath)}, String(process.pid) + "\\n", "utf8");
process.once("exit", () => {
try {
fs.writeFileSync(${JSON.stringify(params.exitPath)}, "exited", "utf8");
@@ -72,13 +78,15 @@ async function main() {
const probeDir = path.join(stateDir, "cron-mcp-cleanup");
const serverPath = path.join(probeDir, "probe-server.mjs");
const pidPath = path.join(probeDir, "probe.pid");
const pidsPath = path.join(probeDir, "probe.pids");
const exitPath = path.join(probeDir, "probe.exit");
await fs.mkdir(probeDir, { recursive: true });
await fs.mkdir(path.dirname(configPath), { recursive: true });
await fs.rm(pidPath, { force: true });
await fs.rm(pidsPath, { force: true });
await fs.rm(exitPath, { force: true });
await writeProbeServer({ serverPath, pidPath, exitPath });
await writeProbeServer({ serverPath, pidPath, pidsPath, exitPath });
const seededConfig = applyProviderConfigWithDefaultModelPreset(
{
@@ -91,6 +99,20 @@ async function main() {
cron: {
enabled: false,
},
agents: {
defaults: {
subagents: {
runTimeoutSeconds: 8,
},
},
},
tools: {
subagents: {
tools: {
alsoAllow: ["bundle-mcp"],
},
},
},
mcp: {
servers: {
cronCleanupProbe: {
@@ -126,6 +148,7 @@ async function main() {
configPath,
serverPath,
pidPath,
pidsPath,
exitPath,
}) + "\n",
);