From d988851fe016e43cbcec408c91ddd090ab2f05cd Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 17 Jun 2026 21:41:46 +0200 Subject: [PATCH] fix(qa-lab): wait for model catalog process groups --- .../qa-lab/src/model-catalog.runtime.test.ts | 83 +++++++++++++++++++ .../qa-lab/src/model-catalog.runtime.ts | 50 ++++++++++- 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/extensions/qa-lab/src/model-catalog.runtime.test.ts b/extensions/qa-lab/src/model-catalog.runtime.test.ts index d786dd1b0d3..88ad51a405f 100644 --- a/extensions/qa-lab/src/model-catalog.runtime.test.ts +++ b/extensions/qa-lab/src/model-catalog.runtime.test.ts @@ -1,10 +1,51 @@ // Qa Lab tests cover model catalog plugin behavior. +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; import { describe, expect, it } from "vitest"; import { + loadQaRunnerModelOptions, parseQaRunnerModelOptionsOutput, selectQaRunnerModelOptions, } from "./model-catalog.runtime.js"; +async function waitForFile(filePath: string, timeoutMs: number): Promise { + const deadlineAt = Date.now() + timeoutMs; + while (Date.now() < deadlineAt) { + try { + await fs.access(filePath); + return; + } catch { + await new Promise((resolvePoll) => { + setTimeout(resolvePoll, 25); + }); + } + } + throw new Error(`timed out waiting for ${filePath}`); +} + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +async function waitForDead(pid: number, timeoutMs: number): Promise { + const deadlineAt = Date.now() + timeoutMs; + while (Date.now() < deadlineAt) { + if (!isProcessAlive(pid)) { + return; + } + await new Promise((resolvePoll) => { + setTimeout(resolvePoll, 25); + }); + } + throw new Error(`timed out waiting for pid ${pid} to exit`); +} + describe("qa runner model catalog", () => { it("filters to available rows and prefers gpt-5.5 first", () => { expect( @@ -58,4 +99,46 @@ describe("qa runner model catalog", () => { ).map((entry) => entry.key), ).toEqual(["openai/gpt-5.5"]); }); + + it.runIf(process.platform !== "win32")( + "kills aborted catalog process groups when the catalog child exits first", + async () => { + const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-model-catalog-")); + const pidPath = path.join(repoRoot, "descendant.pid"); + let descendantPid: number | undefined; + const controller = new AbortController(); + const childScript = "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"; + const catalogScript = [ + "const { spawn } = require('node:child_process');", + "const fs = require('node:fs');", + `const child = spawn(process.execPath, ['-e', ${JSON.stringify(childScript)}], { stdio: 'ignore' });`, + `fs.writeFileSync(${JSON.stringify(pidPath)}, String(child.pid));`, + "process.on('SIGTERM', () => process.exit(0));", + "setInterval(() => {}, 1000);", + ].join("\n"); + + try { + await fs.mkdir(path.join(repoRoot, "dist"), { recursive: true }); + await fs.writeFile(path.join(repoRoot, "dist", "index.js"), catalogScript, "utf8"); + const runPromise = loadQaRunnerModelOptions({ + repoRoot, + signal: controller.signal, + }); + + await waitForFile(pidPath, 2_000); + descendantPid = Number.parseInt(await fs.readFile(pidPath, "utf8"), 10); + expect(Number.isInteger(descendantPid)).toBe(true); + expect(isProcessAlive(descendantPid)).toBe(true); + controller.abort(); + + await expect(runPromise).rejects.toThrow("qa model catalog aborted"); + await waitForDead(descendantPid, 2_000); + } finally { + if (descendantPid !== undefined && isProcessAlive(descendantPid)) { + process.kill(descendantPid, "SIGKILL"); + } + await fs.rm(repoRoot, { force: true, recursive: true }); + } + }, + ); }); diff --git a/extensions/qa-lab/src/model-catalog.runtime.ts b/extensions/qa-lab/src/model-catalog.runtime.ts index 5dd0516f67b..8bd99a8ee98 100644 --- a/extensions/qa-lab/src/model-catalog.runtime.ts +++ b/extensions/qa-lab/src/model-catalog.runtime.ts @@ -107,6 +107,8 @@ export function parseQaRunnerModelOptionsOutput(stdout: string): QaRunnerModelOp } const CATALOG_ABORT_ERROR_MESSAGE = "qa model catalog aborted"; +const CATALOG_ABORT_KILL_GRACE_MS = 1_000; +const CATALOG_ABORT_POLL_MS = 50; function createCatalogAbortError() { return new Error(CATALOG_ABORT_ERROR_MESSAGE); @@ -141,6 +143,31 @@ function killProcessTree(pid: number | undefined, signal: NodeJS.Signals) { } } +function processTreeIsAlive(pid: number | undefined) { + if (pid === undefined || process.platform === "win32") { + return false; + } + try { + process.kill(-pid, 0); + return true; + } catch (error) { + return error instanceof Error && "code" in error && error.code === "EPERM"; + } +} + +async function waitForProcessTreeExit(pid: number | undefined, timeoutMs: number) { + const deadlineAt = Date.now() + timeoutMs; + while (Date.now() < deadlineAt) { + if (!processTreeIsAlive(pid)) { + return true; + } + await new Promise((resolvePoll) => { + setTimeout(resolvePoll, CATALOG_ABORT_POLL_MS); + }); + } + return !processTreeIsAlive(pid); +} + export async function loadQaRunnerModelOptions(params: { repoRoot: string; signal?: AbortSignal }) { const tempRoot = await fs.mkdtemp( path.join(resolvePreferredOpenClawTmpDir(), "openclaw-qa-model-catalog-"), @@ -194,16 +221,27 @@ export async function loadQaRunnerModelOptions(params: { repoRoot: string; signa detached: process.platform !== "win32", stdio: ["ignore", "pipe", "pipe"], }); - const cleanup = () => { + const cleanupAbortListener = () => { params.signal?.removeEventListener("abort", abortCatalogLoad); + }; + const cleanup = () => { + cleanupAbortListener(); if (forceKillTimer) { clearTimeout(forceKillTimer); + forceKillTimer = undefined; + } + }; + const finishAbortedCatalogLoad = async () => { + cleanup(); + if (processTreeIsAlive(child.pid)) { + killProcessTree(child.pid, "SIGKILL"); + await waitForProcessTreeExit(child.pid, CATALOG_ABORT_KILL_GRACE_MS); } }; const abortCatalogLoad = () => { aborted = true; killProcessTree(child.pid, "SIGTERM"); - forceKillTimer = setTimeout(() => { + forceKillTimer ??= setTimeout(() => { killProcessTree(child.pid, "SIGKILL"); }, 1_000); forceKillTimer.unref(); @@ -220,11 +258,15 @@ export async function loadQaRunnerModelOptions(params: { repoRoot: string; signa reject(aborted ? createCatalogAbortError() : error); }); child.once("exit", (code) => { - cleanup(); + cleanupAbortListener(); if (aborted) { - reject(createCatalogAbortError()); + void finishAbortedCatalogLoad().then( + () => reject(createCatalogAbortError()), + () => reject(createCatalogAbortError()), + ); return; } + cleanup(); if (code === 0) { if (stdout.exceeded) { reject(