fix(e2e): rethrow lifecycle shutdown promptly

This commit is contained in:
Vincent Koc
2026-06-03 16:34:04 +02:00
parent 8f6f2617ec
commit 38f1db6d67
3 changed files with 159 additions and 3 deletions

View File

@@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai
- Release/CI/E2E: fail the kitchen-sink RPC walk when command RSS sampling captures no process samples.
- Release/CI/E2E: force-stop memory/fd repro gateway children that survive listener cleanup.
- Release/CI/E2E: remove fallback ClawHub skill-install home directories when proof runs fail.
- Release/CI/E2E: let plugin lifecycle measurement wrappers exit promptly after external shutdown while preserving descendant cleanup.
- Installers: fail the PowerShell installer when interactive onboarding exits non-zero.
- Scripts/UI: stop descendant processes from wrapped non-interactive commands when `run-with-env` receives shutdown signals.
- Release/CI/E2E: write multi-node update Docker artifacts to unique per-run directories by default so parallel runs cannot overwrite evidence.

View File

@@ -125,7 +125,10 @@ let maxCpuTicks = 0;
let timedOut = false;
let finished = false;
let parentSignalInFlight = false;
let forwardedParentSignal = null;
let killTimer;
let parentSignalTimer;
let parentSignalPollTimer;
const updateMetrics = () => {
if (!child.pid) {
return;
@@ -164,6 +167,21 @@ function terminateChildGroup(signal) {
} catch {}
}
function childGroupExists() {
if (!child.pid) {
return false;
}
try {
process.kill(-child.pid, 0);
return true;
} catch (error) {
if (error && error.code === "ESRCH") {
return false;
}
return true;
}
}
function clearRuntimeTimers() {
clearInterval(interval);
if (timeoutTimer) {
@@ -172,9 +190,16 @@ function clearRuntimeTimers() {
if (killTimer) {
clearTimeout(killTimer);
}
if (parentSignalTimer) {
clearTimeout(parentSignalTimer);
}
if (parentSignalPollTimer) {
clearInterval(parentSignalPollTimer);
}
}
function rethrowParentSignal(signal) {
clearRuntimeTimers();
process.removeAllListeners(signal);
process.kill(process.pid, signal);
process.exit(128);
@@ -192,12 +217,18 @@ function handleParentSignal(signal) {
return;
}
finished = true;
forwardedParentSignal = signal;
clearRuntimeTimers();
terminateChildGroup(signal);
setTimeout(() => {
parentSignalTimer = setTimeout(() => {
terminateChildGroup("SIGKILL");
rethrowParentSignal(signal);
}, timeoutKillGraceMs);
parentSignalPollTimer = setInterval(() => {
if (!childGroupExists()) {
rethrowParentSignal(signal);
}
}, Math.min(50, timeoutKillGraceMs));
}
for (const signal of ["SIGHUP", "SIGINT", "SIGTERM"]) {
@@ -248,6 +279,12 @@ child.on("error", (error) => {
});
child.on("exit", (code, signal) => {
if (parentSignalInFlight && forwardedParentSignal) {
if (!childGroupExists()) {
rethrowParentSignal(forwardedParentSignal);
}
return;
}
if (timedOut && killTimer) {
return;
}

View File

@@ -1,5 +1,10 @@
import { spawnSync } from "node:child_process";
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
import { spawn, spawnSync, type ChildProcess } from "node:child_process";
import {
existsSync,
mkdtempSync,
readFileSync,
rmSync,
} from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
@@ -38,6 +43,35 @@ function waitForPidExit(pid: number, timeoutMs: number): boolean {
return !pidExists(pid);
}
function waitForPath(filePath: string, timeoutMs: number): boolean {
const waitBuffer = new SharedArrayBuffer(4);
const waitView = new Int32Array(waitBuffer);
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
if (existsSync(filePath)) {
return true;
}
Atomics.wait(waitView, 0, 0, 25);
}
return existsSync(filePath);
}
function waitForChildClose(
child: ChildProcess,
timeoutMs: number,
): Promise<{ code: number | null; signal: NodeJS.Signals | null }> {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
child.kill("SIGKILL");
reject(new Error("timed out waiting for measured wrapper to exit"));
}, timeoutMs);
child.once("close", (code, signal) => {
clearTimeout(timer);
resolve({ code, signal });
});
});
}
afterEach(() => {
for (const dir of tempDirs.splice(0)) {
rmSync(dir, { recursive: true, force: true });
@@ -213,4 +247,88 @@ describe("plugin lifecycle resource sampler", () => {
}
}
});
it.runIf(process.platform === "linux")(
"exits promptly when externally terminated phases stop during grace",
async () => {
const dir = makeTempDir();
const summary = path.join(dir, "summary.tsv");
const readyFile = path.join(dir, "ready.pid");
const result = spawn(
"node",
[
scriptPath,
summary,
"external-fast-stop",
"--",
"node",
"--input-type=module",
"--eval",
[
"import { writeFileSync } from 'node:fs';",
"writeFileSync(process.env.READY_FILE, String(process.pid));",
"process.on('SIGTERM', () => process.exit(0));",
"setInterval(() => {}, 1000);",
].join("\n"),
],
{
cwd: process.cwd(),
env: {
...process.env,
OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000",
OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500",
READY_FILE: readyFile,
},
stdio: "ignore",
},
);
expect(waitForPath(readyFile, 1000)).toBe(true);
const started = Date.now();
result.kill("SIGTERM");
const close = await waitForChildClose(result, 5000);
expect(Date.now() - started).toBeLessThan(1000);
expect(close.signal).toBe("SIGTERM");
},
);
it.runIf(process.platform === "linux")(
"exits promptly when shell descendants drain during termination grace",
async () => {
const dir = makeTempDir();
const summary = path.join(dir, "summary.tsv");
const readyFile = path.join(dir, "ready.pid");
const result = spawn(
"node",
[
scriptPath,
summary,
"external-descendant-drain",
"--",
"bash",
"-lc",
'trap "exit 0" TERM; bash -c \'trap "sleep 0.15; exit 0" TERM; printf "%s\\n" "$$" >"$READY_FILE"; while :; do sleep 1; done\' & wait',
],
{
cwd: process.cwd(),
env: {
...process.env,
OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000",
OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500",
READY_FILE: readyFile,
},
stdio: "ignore",
},
);
expect(waitForPath(readyFile, 1000)).toBe(true);
const started = Date.now();
result.kill("SIGTERM");
const close = await waitForChildClose(result, 5000);
expect(Date.now() - started).toBeLessThan(1000);
expect(close.signal).toBe("SIGTERM");
},
);
});