fix: stop test-parallel from waiting forever on child close

This commit is contained in:
Peter Steinberger
2026-03-27 12:20:51 +00:00
parent f625a0b106
commit ac2c2ac954
2 changed files with 135 additions and 46 deletions

View File

@@ -280,6 +280,7 @@ export async function executePlan(plan, options = {}) {
const heapSnapshotEnabled =
process.platform !== "win32" && heapSnapshotIntervalMs >= heapSnapshotMinIntervalMs;
const heapSnapshotSignal = env.OPENCLAW_TEST_HEAPSNAPSHOT_SIGNAL?.trim() || "SIGUSR2";
const closeGraceMs = Math.max(100, parseEnvNumber("OPENCLAW_TEST_CLOSE_GRACE_MS", 2000));
const heapSnapshotBaseDir = heapSnapshotEnabled
? path.resolve(
env.OPENCLAW_TEST_HEAPSNAPSHOT_DIR?.trim() ||
@@ -382,11 +383,14 @@ export async function executePlan(plan, options = {}) {
let pendingLine = "";
let memoryPollTimer = null;
let heapSnapshotTimer = null;
let closeFallbackTimer = null;
const memoryFileRecords = [];
let initialTreeSample = null;
let latestTreeSample = null;
let peakTreeSample = null;
let heapSnapshotSequence = 0;
let childExitState = null;
let settled = false;
const updatePeakTreeSample = (sample, reason) => {
if (!sample) {
return;
@@ -505,6 +509,72 @@ export async function executePlan(plan, options = {}) {
} top=${topGrowthFiles.length > 0 ? topGrowthFiles.join(", ") : "none"}`,
);
};
const clearChildTimers = () => {
if (memoryPollTimer) {
clearInterval(memoryPollTimer);
memoryPollTimer = null;
}
if (heapSnapshotTimer) {
clearInterval(heapSnapshotTimer);
heapSnapshotTimer = null;
}
if (closeFallbackTimer) {
clearTimeout(closeFallbackTimer);
closeFallbackTimer = null;
}
};
const finalizeRun = (code, signal, source = "close") => {
if (settled) {
return;
}
settled = true;
clearChildTimers();
children.delete(child);
const resolvedCode = resolveTestRunExitCode({
code,
signal,
output,
fatalSeen,
childError,
});
const elapsedMs = Date.now() - startedAt;
logMemoryTraceSummary();
if (resolvedCode !== 0) {
const failureTail = formatCapturedOutputTail(output);
const failureArtifactPath = artifacts.writeTempJsonArtifact(`${artifactStem}-failure`, {
entry: unit.id,
command: [pnpmInvocation.command, ...spawnArgs],
elapsedMs,
error: childError ? String(childError) : null,
exitCode: resolvedCode,
fatalSeen,
logPath: laneLogPath,
outputTail: failureTail,
signal: signal ?? null,
});
if (failureTail) {
console.error(`[test-parallel] failure tail ${unit.id}\n${failureTail}`);
}
console.error(
`[test-parallel] failure artifacts ${unit.id} log=${laneLogPath} meta=${failureArtifactPath}`,
);
}
if (source !== "close") {
laneLogStream.write(
`\n[test-parallel] finalize source=${source} after child exit without close\n`,
);
}
laneLogStream.write(
`\n[test-parallel] done ${unit.id} code=${String(resolvedCode)} signal=${
signal ?? "none"
} elapsed=${formatElapsedMs(elapsedMs)}\n`,
);
laneLogStream.end();
console.log(
`[test-parallel] done ${unit.id} code=${String(resolvedCode)} elapsed=${formatElapsedMs(elapsedMs)}`,
);
resolve(resolvedCode);
};
try {
const childEnv = {
...env,
@@ -565,53 +635,20 @@ export async function executePlan(plan, options = {}) {
laneLogStream.write(`\n[test-parallel] child error: ${String(err)}\n`);
console.error(`[test-parallel] child error: ${String(err)}`);
});
child.on("exit", (code, signal) => {
childExitState = { code, signal };
if (settled || closeFallbackTimer) {
return;
}
closeFallbackTimer = setTimeout(() => {
child.stdout?.destroy();
child.stderr?.destroy();
finalizeRun(code, signal, "exit-timeout");
}, closeGraceMs);
closeFallbackTimer.unref?.();
});
child.on("close", (code, signal) => {
if (memoryPollTimer) {
clearInterval(memoryPollTimer);
}
if (heapSnapshotTimer) {
clearInterval(heapSnapshotTimer);
}
children.delete(child);
const resolvedCode = resolveTestRunExitCode({
code,
signal,
output,
fatalSeen,
childError,
});
const elapsedMs = Date.now() - startedAt;
logMemoryTraceSummary();
if (resolvedCode !== 0) {
const failureTail = formatCapturedOutputTail(output);
const failureArtifactPath = artifacts.writeTempJsonArtifact(`${artifactStem}-failure`, {
entry: unit.id,
command: [pnpmInvocation.command, ...spawnArgs],
elapsedMs,
error: childError ? String(childError) : null,
exitCode: resolvedCode,
fatalSeen,
logPath: laneLogPath,
outputTail: failureTail,
signal: signal ?? null,
});
if (failureTail) {
console.error(`[test-parallel] failure tail ${unit.id}\n${failureTail}`);
}
console.error(
`[test-parallel] failure artifacts ${unit.id} log=${laneLogPath} meta=${failureArtifactPath}`,
);
}
laneLogStream.write(
`\n[test-parallel] done ${unit.id} code=${String(resolvedCode)} signal=${
signal ?? "none"
} elapsed=${formatElapsedMs(elapsedMs)}\n`,
);
laneLogStream.end();
console.log(
`[test-parallel] done ${unit.id} code=${String(resolvedCode)} elapsed=${formatElapsedMs(elapsedMs)}`,
);
resolve(resolvedCode);
finalizeRun(childExitState?.code ?? code, childExitState?.signal ?? signal, "close");
});
});