diff --git a/CHANGELOG.md b/CHANGELOG.md index bba162b1211..13ec95e3405 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai - Release/CI/E2E: fail the kitchen-sink RPC walk when command RSS sampling captures no process samples. - Release/CI/E2E: force-stop memory/fd repro gateway children that survive listener cleanup. - Release/CI/E2E: remove fallback ClawHub skill-install home directories when proof runs fail. +- Release/CI/E2E: let plugin lifecycle measurement wrappers exit promptly after external shutdown while preserving descendant cleanup. - Installers: fail the PowerShell installer when interactive onboarding exits non-zero. - Scripts/UI: stop descendant processes from wrapped non-interactive commands when `run-with-env` receives shutdown signals. - Release/CI/E2E: write multi-node update Docker artifacts to unique per-run directories by default so parallel runs cannot overwrite evidence. diff --git a/scripts/e2e/lib/plugin-lifecycle-matrix/measure.mjs b/scripts/e2e/lib/plugin-lifecycle-matrix/measure.mjs index cf5b961a90d..4be88fdff9a 100644 --- a/scripts/e2e/lib/plugin-lifecycle-matrix/measure.mjs +++ b/scripts/e2e/lib/plugin-lifecycle-matrix/measure.mjs @@ -125,7 +125,10 @@ let maxCpuTicks = 0; let timedOut = false; let finished = false; let parentSignalInFlight = false; +let forwardedParentSignal = null; let killTimer; +let parentSignalTimer; +let parentSignalPollTimer; const updateMetrics = () => { if (!child.pid) { return; @@ -164,6 +167,21 @@ function terminateChildGroup(signal) { } catch {} } +function childGroupExists() { + if (!child.pid) { + return false; + } + try { + process.kill(-child.pid, 0); + return true; + } catch (error) { + if (error && error.code === "ESRCH") { + return false; + } + return true; + } +} + function clearRuntimeTimers() { clearInterval(interval); if (timeoutTimer) { @@ -172,9 +190,16 @@ function clearRuntimeTimers() { if (killTimer) { clearTimeout(killTimer); } + if (parentSignalTimer) { + clearTimeout(parentSignalTimer); + } + if (parentSignalPollTimer) { + clearInterval(parentSignalPollTimer); + } } function rethrowParentSignal(signal) { + clearRuntimeTimers(); process.removeAllListeners(signal); process.kill(process.pid, signal); process.exit(128); @@ -192,12 +217,18 @@ function handleParentSignal(signal) { return; } finished = true; + forwardedParentSignal = signal; clearRuntimeTimers(); terminateChildGroup(signal); - setTimeout(() => { + parentSignalTimer = setTimeout(() => { terminateChildGroup("SIGKILL"); rethrowParentSignal(signal); }, timeoutKillGraceMs); + parentSignalPollTimer = setInterval(() => { + if (!childGroupExists()) { + rethrowParentSignal(signal); + } + }, Math.min(50, timeoutKillGraceMs)); } for (const signal of ["SIGHUP", "SIGINT", "SIGTERM"]) { @@ -248,6 +279,12 @@ child.on("error", (error) => { }); child.on("exit", (code, signal) => { + if (parentSignalInFlight && forwardedParentSignal) { + if (!childGroupExists()) { + rethrowParentSignal(forwardedParentSignal); + } + return; + } if (timedOut && killTimer) { return; } diff --git a/test/scripts/plugin-lifecycle-measure.test.ts b/test/scripts/plugin-lifecycle-measure.test.ts index 0aae4a1dc64..6dde1b0ca7e 100644 --- a/test/scripts/plugin-lifecycle-measure.test.ts +++ b/test/scripts/plugin-lifecycle-measure.test.ts @@ -1,5 +1,10 @@ -import { spawnSync } from "node:child_process"; -import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { spawn, spawnSync, type ChildProcess } from "node:child_process"; +import { + existsSync, + mkdtempSync, + readFileSync, + rmSync, +} from "node:fs"; import { tmpdir } from "node:os"; import path from "node:path"; import { afterEach, describe, expect, it } from "vitest"; @@ -38,6 +43,35 @@ function waitForPidExit(pid: number, timeoutMs: number): boolean { return !pidExists(pid); } +function waitForPath(filePath: string, timeoutMs: number): boolean { + const waitBuffer = new SharedArrayBuffer(4); + const waitView = new Int32Array(waitBuffer); + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (existsSync(filePath)) { + return true; + } + Atomics.wait(waitView, 0, 0, 25); + } + return existsSync(filePath); +} + +function waitForChildClose( + child: ChildProcess, + timeoutMs: number, +): Promise<{ code: number | null; signal: NodeJS.Signals | null }> { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + child.kill("SIGKILL"); + reject(new Error("timed out waiting for measured wrapper to exit")); + }, timeoutMs); + child.once("close", (code, signal) => { + clearTimeout(timer); + resolve({ code, signal }); + }); + }); +} + afterEach(() => { for (const dir of tempDirs.splice(0)) { rmSync(dir, { recursive: true, force: true }); @@ -213,4 +247,88 @@ describe("plugin lifecycle resource sampler", () => { } } }); + + it.runIf(process.platform === "linux")( + "exits promptly when externally terminated phases stop during grace", + async () => { + const dir = makeTempDir(); + const summary = path.join(dir, "summary.tsv"); + const readyFile = path.join(dir, "ready.pid"); + const result = spawn( + "node", + [ + scriptPath, + summary, + "external-fast-stop", + "--", + "node", + "--input-type=module", + "--eval", + [ + "import { writeFileSync } from 'node:fs';", + "writeFileSync(process.env.READY_FILE, String(process.pid));", + "process.on('SIGTERM', () => process.exit(0));", + "setInterval(() => {}, 1000);", + ].join("\n"), + ], + { + cwd: process.cwd(), + env: { + ...process.env, + OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000", + OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500", + READY_FILE: readyFile, + }, + stdio: "ignore", + }, + ); + + expect(waitForPath(readyFile, 1000)).toBe(true); + const started = Date.now(); + result.kill("SIGTERM"); + const close = await waitForChildClose(result, 5000); + + expect(Date.now() - started).toBeLessThan(1000); + expect(close.signal).toBe("SIGTERM"); + }, + ); + + it.runIf(process.platform === "linux")( + "exits promptly when shell descendants drain during termination grace", + async () => { + const dir = makeTempDir(); + const summary = path.join(dir, "summary.tsv"); + const readyFile = path.join(dir, "ready.pid"); + const result = spawn( + "node", + [ + scriptPath, + summary, + "external-descendant-drain", + "--", + "bash", + "-lc", + 'trap "exit 0" TERM; bash -c \'trap "sleep 0.15; exit 0" TERM; printf "%s\\n" "$$" >"$READY_FILE"; while :; do sleep 1; done\' & wait', + ], + { + cwd: process.cwd(), + env: { + ...process.env, + OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000", + OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500", + READY_FILE: readyFile, + }, + stdio: "ignore", + }, + ); + + expect(waitForPath(readyFile, 1000)).toBe(true); + const started = Date.now(); + result.kill("SIGTERM"); + const close = await waitForChildClose(result, 5000); + + expect(Date.now() - started).toBeLessThan(1000); + expect(close.signal).toBe("SIGTERM"); + }, + ); });