mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-12 06:22:57 +00:00
fix(e2e): rethrow lifecycle shutdown promptly
This commit is contained in:
@@ -58,6 +58,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Release/CI/E2E: fail the kitchen-sink RPC walk when command RSS sampling captures no process samples.
|
||||
- Release/CI/E2E: force-stop memory/fd repro gateway children that survive listener cleanup.
|
||||
- Release/CI/E2E: remove fallback ClawHub skill-install home directories when proof runs fail.
|
||||
- Release/CI/E2E: let plugin lifecycle measurement wrappers exit promptly after external shutdown while preserving descendant cleanup.
|
||||
- Installers: fail the PowerShell installer when interactive onboarding exits non-zero.
|
||||
- Scripts/UI: stop descendant processes from wrapped non-interactive commands when `run-with-env` receives shutdown signals.
|
||||
- Release/CI/E2E: write multi-node update Docker artifacts to unique per-run directories by default so parallel runs cannot overwrite evidence.
|
||||
|
||||
@@ -125,7 +125,10 @@ let maxCpuTicks = 0;
|
||||
let timedOut = false;
|
||||
let finished = false;
|
||||
let parentSignalInFlight = false;
|
||||
let forwardedParentSignal = null;
|
||||
let killTimer;
|
||||
let parentSignalTimer;
|
||||
let parentSignalPollTimer;
|
||||
const updateMetrics = () => {
|
||||
if (!child.pid) {
|
||||
return;
|
||||
@@ -164,6 +167,21 @@ function terminateChildGroup(signal) {
|
||||
} catch {}
|
||||
}
|
||||
|
||||
function childGroupExists() {
|
||||
if (!child.pid) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
process.kill(-child.pid, 0);
|
||||
return true;
|
||||
} catch (error) {
|
||||
if (error && error.code === "ESRCH") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
function clearRuntimeTimers() {
|
||||
clearInterval(interval);
|
||||
if (timeoutTimer) {
|
||||
@@ -172,9 +190,16 @@ function clearRuntimeTimers() {
|
||||
if (killTimer) {
|
||||
clearTimeout(killTimer);
|
||||
}
|
||||
if (parentSignalTimer) {
|
||||
clearTimeout(parentSignalTimer);
|
||||
}
|
||||
if (parentSignalPollTimer) {
|
||||
clearInterval(parentSignalPollTimer);
|
||||
}
|
||||
}
|
||||
|
||||
function rethrowParentSignal(signal) {
|
||||
clearRuntimeTimers();
|
||||
process.removeAllListeners(signal);
|
||||
process.kill(process.pid, signal);
|
||||
process.exit(128);
|
||||
@@ -192,12 +217,18 @@ function handleParentSignal(signal) {
|
||||
return;
|
||||
}
|
||||
finished = true;
|
||||
forwardedParentSignal = signal;
|
||||
clearRuntimeTimers();
|
||||
terminateChildGroup(signal);
|
||||
setTimeout(() => {
|
||||
parentSignalTimer = setTimeout(() => {
|
||||
terminateChildGroup("SIGKILL");
|
||||
rethrowParentSignal(signal);
|
||||
}, timeoutKillGraceMs);
|
||||
parentSignalPollTimer = setInterval(() => {
|
||||
if (!childGroupExists()) {
|
||||
rethrowParentSignal(signal);
|
||||
}
|
||||
}, Math.min(50, timeoutKillGraceMs));
|
||||
}
|
||||
|
||||
for (const signal of ["SIGHUP", "SIGINT", "SIGTERM"]) {
|
||||
@@ -248,6 +279,12 @@ child.on("error", (error) => {
|
||||
});
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
if (parentSignalInFlight && forwardedParentSignal) {
|
||||
if (!childGroupExists()) {
|
||||
rethrowParentSignal(forwardedParentSignal);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (timedOut && killTimer) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import { spawn, spawnSync, type ChildProcess } from "node:child_process";
|
||||
import {
|
||||
existsSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
@@ -38,6 +43,35 @@ function waitForPidExit(pid: number, timeoutMs: number): boolean {
|
||||
return !pidExists(pid);
|
||||
}
|
||||
|
||||
function waitForPath(filePath: string, timeoutMs: number): boolean {
|
||||
const waitBuffer = new SharedArrayBuffer(4);
|
||||
const waitView = new Int32Array(waitBuffer);
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
if (existsSync(filePath)) {
|
||||
return true;
|
||||
}
|
||||
Atomics.wait(waitView, 0, 0, 25);
|
||||
}
|
||||
return existsSync(filePath);
|
||||
}
|
||||
|
||||
function waitForChildClose(
|
||||
child: ChildProcess,
|
||||
timeoutMs: number,
|
||||
): Promise<{ code: number | null; signal: NodeJS.Signals | null }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
reject(new Error("timed out waiting for measured wrapper to exit"));
|
||||
}, timeoutMs);
|
||||
child.once("close", (code, signal) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ code, signal });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
for (const dir of tempDirs.splice(0)) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
@@ -213,4 +247,88 @@ describe("plugin lifecycle resource sampler", () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it.runIf(process.platform === "linux")(
|
||||
"exits promptly when externally terminated phases stop during grace",
|
||||
async () => {
|
||||
const dir = makeTempDir();
|
||||
const summary = path.join(dir, "summary.tsv");
|
||||
const readyFile = path.join(dir, "ready.pid");
|
||||
const result = spawn(
|
||||
"node",
|
||||
[
|
||||
scriptPath,
|
||||
summary,
|
||||
"external-fast-stop",
|
||||
"--",
|
||||
"node",
|
||||
"--input-type=module",
|
||||
"--eval",
|
||||
[
|
||||
"import { writeFileSync } from 'node:fs';",
|
||||
"writeFileSync(process.env.READY_FILE, String(process.pid));",
|
||||
"process.on('SIGTERM', () => process.exit(0));",
|
||||
"setInterval(() => {}, 1000);",
|
||||
].join("\n"),
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
env: {
|
||||
...process.env,
|
||||
OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000",
|
||||
OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500",
|
||||
READY_FILE: readyFile,
|
||||
},
|
||||
stdio: "ignore",
|
||||
},
|
||||
);
|
||||
|
||||
expect(waitForPath(readyFile, 1000)).toBe(true);
|
||||
const started = Date.now();
|
||||
result.kill("SIGTERM");
|
||||
const close = await waitForChildClose(result, 5000);
|
||||
|
||||
expect(Date.now() - started).toBeLessThan(1000);
|
||||
expect(close.signal).toBe("SIGTERM");
|
||||
},
|
||||
);
|
||||
|
||||
it.runIf(process.platform === "linux")(
|
||||
"exits promptly when shell descendants drain during termination grace",
|
||||
async () => {
|
||||
const dir = makeTempDir();
|
||||
const summary = path.join(dir, "summary.tsv");
|
||||
const readyFile = path.join(dir, "ready.pid");
|
||||
const result = spawn(
|
||||
"node",
|
||||
[
|
||||
scriptPath,
|
||||
summary,
|
||||
"external-descendant-drain",
|
||||
"--",
|
||||
"bash",
|
||||
"-lc",
|
||||
'trap "exit 0" TERM; bash -c \'trap "sleep 0.15; exit 0" TERM; printf "%s\\n" "$$" >"$READY_FILE"; while :; do sleep 1; done\' & wait',
|
||||
],
|
||||
{
|
||||
cwd: process.cwd(),
|
||||
env: {
|
||||
...process.env,
|
||||
OPENCLAW_PLUGIN_LIFECYCLE_PHASE_TIMEOUT_MS: "5000",
|
||||
OPENCLAW_PLUGIN_LIFECYCLE_TIMEOUT_KILL_GRACE_MS: "1500",
|
||||
READY_FILE: readyFile,
|
||||
},
|
||||
stdio: "ignore",
|
||||
},
|
||||
);
|
||||
|
||||
expect(waitForPath(readyFile, 1000)).toBe(true);
|
||||
const started = Date.now();
|
||||
result.kill("SIGTERM");
|
||||
const close = await waitForChildClose(result, 5000);
|
||||
|
||||
expect(Date.now() - started).toBeLessThan(1000);
|
||||
expect(close.signal).toBe("SIGTERM");
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user