From ac2c2ac95471b1e32ca14d950cedc8e0475f68bc Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 27 Mar 2026 12:20:51 +0000 Subject: [PATCH] fix: stop test-parallel from waiting forever on child close --- scripts/test-planner/executor.mjs | 129 +++++++++++------- .../test-planner.executor-fallback.test.ts | 52 +++++++ 2 files changed, 135 insertions(+), 46 deletions(-) create mode 100644 test/scripts/test-planner.executor-fallback.test.ts diff --git a/scripts/test-planner/executor.mjs b/scripts/test-planner/executor.mjs index 60fe6a622e3..65630c4ad07 100644 --- a/scripts/test-planner/executor.mjs +++ b/scripts/test-planner/executor.mjs @@ -280,6 +280,7 @@ export async function executePlan(plan, options = {}) { const heapSnapshotEnabled = process.platform !== "win32" && heapSnapshotIntervalMs >= heapSnapshotMinIntervalMs; const heapSnapshotSignal = env.OPENCLAW_TEST_HEAPSNAPSHOT_SIGNAL?.trim() || "SIGUSR2"; + const closeGraceMs = Math.max(100, parseEnvNumber("OPENCLAW_TEST_CLOSE_GRACE_MS", 2000)); const heapSnapshotBaseDir = heapSnapshotEnabled ? path.resolve( env.OPENCLAW_TEST_HEAPSNAPSHOT_DIR?.trim() || @@ -382,11 +383,14 @@ export async function executePlan(plan, options = {}) { let pendingLine = ""; let memoryPollTimer = null; let heapSnapshotTimer = null; + let closeFallbackTimer = null; const memoryFileRecords = []; let initialTreeSample = null; let latestTreeSample = null; let peakTreeSample = null; let heapSnapshotSequence = 0; + let childExitState = null; + let settled = false; const updatePeakTreeSample = (sample, reason) => { if (!sample) { return; @@ -505,6 +509,72 @@ export async function executePlan(plan, options = {}) { } top=${topGrowthFiles.length > 0 ? topGrowthFiles.join(", ") : "none"}`, ); }; + const clearChildTimers = () => { + if (memoryPollTimer) { + clearInterval(memoryPollTimer); + memoryPollTimer = null; + } + if (heapSnapshotTimer) { + clearInterval(heapSnapshotTimer); + heapSnapshotTimer = null; + } + if (closeFallbackTimer) { + clearTimeout(closeFallbackTimer); + closeFallbackTimer = null; + } + }; + const finalizeRun = (code, signal, source = "close") => { + if (settled) { + return; + } + settled = true; + clearChildTimers(); + children.delete(child); + const resolvedCode = resolveTestRunExitCode({ + code, + signal, + output, + fatalSeen, + childError, + }); + const elapsedMs = Date.now() - startedAt; + logMemoryTraceSummary(); + if (resolvedCode !== 0) { + const failureTail = formatCapturedOutputTail(output); + const failureArtifactPath = artifacts.writeTempJsonArtifact(`${artifactStem}-failure`, { + entry: unit.id, + command: [pnpmInvocation.command, ...spawnArgs], + elapsedMs, + error: childError ? String(childError) : null, + exitCode: resolvedCode, + fatalSeen, + logPath: laneLogPath, + outputTail: failureTail, + signal: signal ?? null, + }); + if (failureTail) { + console.error(`[test-parallel] failure tail ${unit.id}\n${failureTail}`); + } + console.error( + `[test-parallel] failure artifacts ${unit.id} log=${laneLogPath} meta=${failureArtifactPath}`, + ); + } + if (source !== "close") { + laneLogStream.write( + `\n[test-parallel] finalize source=${source} after child exit without close\n`, + ); + } + laneLogStream.write( + `\n[test-parallel] done ${unit.id} code=${String(resolvedCode)} signal=${ + signal ?? "none" + } elapsed=${formatElapsedMs(elapsedMs)}\n`, + ); + laneLogStream.end(); + console.log( + `[test-parallel] done ${unit.id} code=${String(resolvedCode)} elapsed=${formatElapsedMs(elapsedMs)}`, + ); + resolve(resolvedCode); + }; try { const childEnv = { ...env, @@ -565,53 +635,20 @@ export async function executePlan(plan, options = {}) { laneLogStream.write(`\n[test-parallel] child error: ${String(err)}\n`); console.error(`[test-parallel] child error: ${String(err)}`); }); + child.on("exit", (code, signal) => { + childExitState = { code, signal }; + if (settled || closeFallbackTimer) { + return; + } + closeFallbackTimer = setTimeout(() => { + child.stdout?.destroy(); + child.stderr?.destroy(); + finalizeRun(code, signal, "exit-timeout"); + }, closeGraceMs); + closeFallbackTimer.unref?.(); + }); child.on("close", (code, signal) => { - if (memoryPollTimer) { - clearInterval(memoryPollTimer); - } - if (heapSnapshotTimer) { - clearInterval(heapSnapshotTimer); - } - children.delete(child); - const resolvedCode = resolveTestRunExitCode({ - code, - signal, - output, - fatalSeen, - childError, - }); - const elapsedMs = Date.now() - startedAt; - logMemoryTraceSummary(); - if (resolvedCode !== 0) { - const failureTail = formatCapturedOutputTail(output); - const failureArtifactPath = artifacts.writeTempJsonArtifact(`${artifactStem}-failure`, { - entry: unit.id, - command: [pnpmInvocation.command, ...spawnArgs], - elapsedMs, - error: childError ? String(childError) : null, - exitCode: resolvedCode, - fatalSeen, - logPath: laneLogPath, - outputTail: failureTail, - signal: signal ?? null, - }); - if (failureTail) { - console.error(`[test-parallel] failure tail ${unit.id}\n${failureTail}`); - } - console.error( - `[test-parallel] failure artifacts ${unit.id} log=${laneLogPath} meta=${failureArtifactPath}`, - ); - } - laneLogStream.write( - `\n[test-parallel] done ${unit.id} code=${String(resolvedCode)} signal=${ - signal ?? "none" - } elapsed=${formatElapsedMs(elapsedMs)}\n`, - ); - laneLogStream.end(); - console.log( - `[test-parallel] done ${unit.id} code=${String(resolvedCode)} elapsed=${formatElapsedMs(elapsedMs)}`, - ); - resolve(resolvedCode); + finalizeRun(childExitState?.code ?? code, childExitState?.signal ?? signal, "close"); }); }); diff --git a/test/scripts/test-planner.executor-fallback.test.ts b/test/scripts/test-planner.executor-fallback.test.ts new file mode 100644 index 00000000000..1f5a7335971 --- /dev/null +++ b/test/scripts/test-planner.executor-fallback.test.ts @@ -0,0 +1,52 @@ +import { EventEmitter } from "node:events"; +import { PassThrough } from "node:stream"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { importFreshModule } from "../helpers/import-fresh.js"; + +afterEach(() => { + vi.restoreAllMocks(); + vi.resetModules(); +}); + +describe("test planner executor", () => { + it("falls back to child exit when close never arrives", async () => { + const stdout = new PassThrough(); + const stderr = new PassThrough(); + const fakeChild = Object.assign(new EventEmitter(), { + stdout, + stderr, + pid: 12345, + kill: vi.fn(), + }); + const spawnMock = vi.fn(() => { + setTimeout(() => { + fakeChild.emit("exit", 0, null); + }, 0); + return fakeChild; + }); + vi.doMock("node:child_process", () => ({ + spawn: spawnMock, + })); + + const { executePlan, createExecutionArtifacts } = await importFreshModule< + typeof import("../../scripts/test-planner/executor.mjs") + >(import.meta.url, "../../scripts/test-planner/executor.mjs?scope=exit-fallback"); + const artifacts = createExecutionArtifacts({ OPENCLAW_TEST_CLOSE_GRACE_MS: "10" }); + const executePromise = executePlan( + { + passthroughMetadataOnly: true, + passthroughOptionArgs: [], + runtimeCapabilities: { isWindowsCi: false, isCI: false, isWindows: false }, + }, + { + env: { OPENCLAW_TEST_CLOSE_GRACE_MS: "10" }, + artifacts, + }, + ); + + await expect(executePromise).resolves.toBe(0); + expect(spawnMock).toHaveBeenCalledTimes(1); + + artifacts.cleanupTempArtifacts(); + }); +});