diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.test.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.test.ts index 38261818890..648a8ec944b 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.test.ts @@ -1,5 +1,6 @@ -import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import path from "node:path"; +import { setTimeout as sleep } from "node:timers/promises"; import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path"; import { describe, expect, it } from "vitest"; import { @@ -10,6 +11,15 @@ import { startMatrixQaOpenClawCli, } from "./scenario-runtime-cli.js"; +function isProcessRunning(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + describe("Matrix QA CLI runtime", () => { it("redacts secret CLI arguments in diagnostic command text", () => { expect( @@ -176,4 +186,131 @@ describe("Matrix QA CLI runtime", () => { await rm(root, { force: true, recursive: true }); } }); + + it("kills CLI commands that ignore graceful timeout termination", async () => { + const root = await mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "matrix-qa-cli-timeout-kill-"), + ); + const pidPath = path.join(root, "cli.pid"); + let childPid: number | undefined; + try { + await mkdir(path.join(root, "dist")); + await writeFile( + path.join(root, "dist", "index.mjs"), + [ + "import { writeFileSync } from 'node:fs';", + `writeFileSync(${JSON.stringify(pidPath)}, String(process.pid));`, + "process.stdout.write('waiting despite graceful shutdown\\n');", + "process.on('SIGTERM', () => { process.stdout.write('ignored sigterm\\n'); });", + "setInterval(() => {}, 1000);", + ].join("\n"), + ); + + await expect( + runMatrixQaOpenClawCli({ + args: ["matrix", "verify", "self"], + cwd: root, + env: process.env, + timeoutMs: 500, + }), + ).rejects.toThrow(/timed out after 500ms/u); + + childPid = Number(await readFile(pidPath, "utf8")); + expect(isProcessRunning(childPid)).toBe(false); + } finally { + if (childPid && isProcessRunning(childPid)) { + process.kill(childPid, "SIGKILL"); + } + await rm(root, { force: true, recursive: true }); + } + }); + + it("preserves timeout diagnostics when wait attaches after timeout", async () => { + const root = await mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "matrix-qa-cli-late-wait-timeout-"), + ); + const pidPath = path.join(root, "cli.pid"); + let childPid: number | undefined; + try { + await mkdir(path.join(root, "dist")); + await writeFile( + path.join(root, "dist", "index.mjs"), + [ + "import { writeFileSync } from 'node:fs';", + `writeFileSync(${JSON.stringify(pidPath)}, String(process.pid));`, + "process.stdout.write('late wait timeout marker\\n');", + "process.on('SIGTERM', () => {});", + "setInterval(() => {}, 1000);", + ].join("\n"), + ); + + const session = startMatrixQaOpenClawCli({ + args: ["matrix", "verify", "self"], + cwd: root, + env: process.env, + timeoutMs: 500, + }); + await sleep(850); + + await expect(session.wait()).rejects.toThrow(/timed out after 500ms/u); + await expect(session.wait()).rejects.toThrow(/late wait timeout marker/u); + + childPid = Number(await readFile(pidPath, "utf8")); + expect(isProcessRunning(childPid)).toBe(false); + } finally { + if (childPid && isProcessRunning(childPid)) { + process.kill(childPid, "SIGKILL"); + } + await rm(root, { force: true, recursive: true }); + } + }); + + it("settles and kills descendants that keep timed-out CLI stdio open", async () => { + const root = await mkdtemp( + path.join(resolvePreferredOpenClawTmpDir(), "matrix-qa-cli-timeout-tree-"), + ); + const childPidPath = path.join(root, "child.pid"); + const grandchildPidPath = path.join(root, "grandchild.pid"); + let childPid: number | undefined; + let grandchildPid: number | undefined; + try { + await mkdir(path.join(root, "dist")); + await writeFile( + path.join(root, "dist", "index.mjs"), + [ + "import { spawn } from 'node:child_process';", + "import { writeFileSync } from 'node:fs';", + `writeFileSync(${JSON.stringify(childPidPath)}, String(process.pid));`, + "const grandchild = spawn(process.execPath, ['-e', 'setInterval(() => {}, 1000);'], { stdio: ['ignore', 'inherit', 'inherit'] });", + `writeFileSync(${JSON.stringify(grandchildPidPath)}, String(grandchild.pid));`, + "process.stdout.write('spawned persistent descendant\\n');", + "process.on('SIGTERM', () => {});", + "setInterval(() => {}, 1000);", + ].join("\n"), + ); + + await expect( + runMatrixQaOpenClawCli({ + args: ["matrix", "verify", "self"], + cwd: root, + env: process.env, + timeoutMs: 500, + }), + ).rejects.toThrow(/timed out after 500ms/u); + + childPid = Number(await readFile(childPidPath, "utf8")); + grandchildPid = Number(await readFile(grandchildPidPath, "utf8")); + expect(isProcessRunning(childPid)).toBe(false); + if (process.platform !== "win32") { + expect(isProcessRunning(grandchildPid)).toBe(false); + } + } finally { + for (const pid of [grandchildPid, childPid]) { + if (pid && isProcessRunning(pid)) { + process.kill(pid, "SIGKILL"); + } + } + await rm(root, { force: true, recursive: true }); + } + }); }); diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts index 1e47a19b04c..56066fb1fb1 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-cli.ts @@ -30,6 +30,8 @@ export type MatrixQaCliSession = { }; const MATRIX_QA_CLI_SECRET_ARG_FLAGS = new Set(["--access-token", "--password", "--recovery-key"]); +const MATRIX_QA_CLI_TIMEOUT_KILL_GRACE_MS = 250; +const MATRIX_QA_CLI_TIMEOUT_FORCE_SETTLE_MS = 100; function isMatrixQaCliSecretPositionalArg(args: string[], index: number): boolean { return args[0] === "matrix" && args[1] === "verify" && args[2] === "device" && index === 3; @@ -91,6 +93,31 @@ function formatMatrixQaCliExitError(result: MatrixQaCliRunResult) { .join("\n"); } +function formatMatrixQaCliTimeoutError(result: MatrixQaCliRunResult, timeoutMs: number) { + return [ + `${formatMatrixQaCliCommand(result.args)} timed out after ${timeoutMs}ms`, + result.stderr.trim() ? `stderr:\n${redactMatrixQaCliOutput(result.stderr.trim())}` : null, + result.stdout.trim() ? `stdout:\n${redactMatrixQaCliOutput(result.stdout.trim())}` : null, + ] + .filter(Boolean) + .join("\n"); +} + +function killMatrixQaCliChild( + child: ReturnType, + signal: NodeJS.Signals, +): void { + if (process.platform !== "win32" && child.pid) { + try { + process.kill(-child.pid, signal); + return; + } catch { + // Fall back to the direct child if process-group signaling is unavailable. + } + } + child.kill(signal); +} + export function startMatrixQaOpenClawCli(params: { allowNonZero?: boolean; args: string[]; @@ -104,8 +131,11 @@ export function startMatrixQaOpenClawCli(params: { const stdout: Buffer[] = []; const stderr: Buffer[] = []; let closed = false; + let closeError: Error | undefined; let closeResult: MatrixQaCliRunResult | undefined; let timedOut = false; + let forceKillTimeout: NodeJS.Timeout | undefined; + let forceSettleTimeout: NodeJS.Timeout | undefined; let settleWait: | { reject: (error: Error) => void; @@ -115,6 +145,7 @@ export function startMatrixQaOpenClawCli(params: { const child = startOpenClawCliProcess(process.execPath, [distEntryPath, ...params.args], { cwd, + detached: process.platform !== "win32", env: params.env, stdio: ["pipe", "pipe", "pipe"], }); @@ -127,6 +158,7 @@ export function startMatrixQaOpenClawCli(params: { return; } closed = true; + closeError = error; closeResult = result; if (!settleWait) { return; @@ -140,30 +172,20 @@ export function startMatrixQaOpenClawCli(params: { const timeout = setTimeout(() => { timedOut = true; - child.kill("SIGTERM"); - setTimeout(() => { - const result = buildMatrixQaCliResult({ - args: params.args, - exitCode: 1, - output: readOutput(), - }); - finish( - result, - new Error( - [ - `${formatMatrixQaCliCommand(params.args)} timed out after ${params.timeoutMs}ms`, - result.stderr.trim() - ? `stderr:\n${redactMatrixQaCliOutput(result.stderr.trim())}` - : null, - result.stdout.trim() - ? `stdout:\n${redactMatrixQaCliOutput(result.stdout.trim())}` - : null, - ] - .filter(Boolean) - .join("\n"), - ), - ); - }, 25); + killMatrixQaCliChild(child, "SIGTERM"); + forceKillTimeout = setTimeout(() => { + if (!closed) { + killMatrixQaCliChild(child, "SIGKILL"); + forceSettleTimeout = setTimeout(() => { + const result = buildMatrixQaCliResult({ + args: params.args, + exitCode: 1, + output: readOutput(), + }); + finish(result, new Error(formatMatrixQaCliTimeoutError(result, params.timeoutMs))); + }, MATRIX_QA_CLI_TIMEOUT_FORCE_SETTLE_MS); + } + }, MATRIX_QA_CLI_TIMEOUT_KILL_GRACE_MS); }, params.timeoutMs); child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk))); @@ -173,6 +195,12 @@ export function startMatrixQaOpenClawCli(params: { } child.on("error", (error) => { clearTimeout(timeout); + if (forceKillTimeout) { + clearTimeout(forceKillTimeout); + } + if (forceSettleTimeout) { + clearTimeout(forceSettleTimeout); + } finish( buildMatrixQaCliResult({ args: params.args, @@ -184,14 +212,21 @@ export function startMatrixQaOpenClawCli(params: { }); child.on("close", (exitCode) => { clearTimeout(timeout); - if (timedOut) { - return; + if (forceKillTimeout) { + clearTimeout(forceKillTimeout); + } + if (forceSettleTimeout) { + clearTimeout(forceSettleTimeout); } const result = buildMatrixQaCliResult({ args: params.args, exitCode: exitCode ?? 1, output: readOutput(), }); + if (timedOut) { + finish(result, new Error(formatMatrixQaCliTimeoutError(result, params.timeoutMs))); + return; + } if (result.exitCode !== 0 && params.allowNonZero !== true) { finish(result, new Error(formatMatrixQaCliExitError(result))); return; @@ -210,7 +245,9 @@ export function startMatrixQaOpenClawCli(params: { wait: async () => await new Promise((resolve, reject) => { if (closed && closeResult) { - if (closeResult.exitCode === 0 || params.allowNonZero === true) { + if (closeError) { + reject(closeError); + } else if (closeResult.exitCode === 0 || params.allowNonZero === true) { resolve(closeResult); } else { reject(new Error(formatMatrixQaCliExitError(closeResult))); @@ -248,7 +285,7 @@ export function startMatrixQaOpenClawCli(params: { }, kill: () => { if (!closed) { - child.kill("SIGTERM"); + killMatrixQaCliChild(child, "SIGTERM"); } }, };