fix(qa-lab): wait for model catalog process groups

This commit is contained in:
Vincent Koc
2026-06-17 21:41:46 +02:00
parent 4005e03919
commit d988851fe0
2 changed files with 129 additions and 4 deletions

View File

@@ -1,10 +1,51 @@
// Qa Lab tests cover model catalog plugin behavior.
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import {
loadQaRunnerModelOptions,
parseQaRunnerModelOptionsOutput,
selectQaRunnerModelOptions,
} from "./model-catalog.runtime.js";
async function waitForFile(filePath: string, timeoutMs: number): Promise<void> {
const deadlineAt = Date.now() + timeoutMs;
while (Date.now() < deadlineAt) {
try {
await fs.access(filePath);
return;
} catch {
await new Promise((resolvePoll) => {
setTimeout(resolvePoll, 25);
});
}
}
throw new Error(`timed out waiting for ${filePath}`);
}
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
async function waitForDead(pid: number, timeoutMs: number): Promise<void> {
const deadlineAt = Date.now() + timeoutMs;
while (Date.now() < deadlineAt) {
if (!isProcessAlive(pid)) {
return;
}
await new Promise((resolvePoll) => {
setTimeout(resolvePoll, 25);
});
}
throw new Error(`timed out waiting for pid ${pid} to exit`);
}
describe("qa runner model catalog", () => {
it("filters to available rows and prefers gpt-5.5 first", () => {
expect(
@@ -58,4 +99,46 @@ describe("qa runner model catalog", () => {
).map((entry) => entry.key),
).toEqual(["openai/gpt-5.5"]);
});
it.runIf(process.platform !== "win32")(
"kills aborted catalog process groups when the catalog child exits first",
async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-model-catalog-"));
const pidPath = path.join(repoRoot, "descendant.pid");
let descendantPid: number | undefined;
const controller = new AbortController();
const childScript = "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);";
const catalogScript = [
"const { spawn } = require('node:child_process');",
"const fs = require('node:fs');",
`const child = spawn(process.execPath, ['-e', ${JSON.stringify(childScript)}], { stdio: 'ignore' });`,
`fs.writeFileSync(${JSON.stringify(pidPath)}, String(child.pid));`,
"process.on('SIGTERM', () => process.exit(0));",
"setInterval(() => {}, 1000);",
].join("\n");
try {
await fs.mkdir(path.join(repoRoot, "dist"), { recursive: true });
await fs.writeFile(path.join(repoRoot, "dist", "index.js"), catalogScript, "utf8");
const runPromise = loadQaRunnerModelOptions({
repoRoot,
signal: controller.signal,
});
await waitForFile(pidPath, 2_000);
descendantPid = Number.parseInt(await fs.readFile(pidPath, "utf8"), 10);
expect(Number.isInteger(descendantPid)).toBe(true);
expect(isProcessAlive(descendantPid)).toBe(true);
controller.abort();
await expect(runPromise).rejects.toThrow("qa model catalog aborted");
await waitForDead(descendantPid, 2_000);
} finally {
if (descendantPid !== undefined && isProcessAlive(descendantPid)) {
process.kill(descendantPid, "SIGKILL");
}
await fs.rm(repoRoot, { force: true, recursive: true });
}
},
);
});

View File

@@ -107,6 +107,8 @@ export function parseQaRunnerModelOptionsOutput(stdout: string): QaRunnerModelOp
}
const CATALOG_ABORT_ERROR_MESSAGE = "qa model catalog aborted";
const CATALOG_ABORT_KILL_GRACE_MS = 1_000;
const CATALOG_ABORT_POLL_MS = 50;
function createCatalogAbortError() {
return new Error(CATALOG_ABORT_ERROR_MESSAGE);
@@ -141,6 +143,31 @@ function killProcessTree(pid: number | undefined, signal: NodeJS.Signals) {
}
}
function processTreeIsAlive(pid: number | undefined) {
if (pid === undefined || process.platform === "win32") {
return false;
}
try {
process.kill(-pid, 0);
return true;
} catch (error) {
return error instanceof Error && "code" in error && error.code === "EPERM";
}
}
async function waitForProcessTreeExit(pid: number | undefined, timeoutMs: number) {
const deadlineAt = Date.now() + timeoutMs;
while (Date.now() < deadlineAt) {
if (!processTreeIsAlive(pid)) {
return true;
}
await new Promise((resolvePoll) => {
setTimeout(resolvePoll, CATALOG_ABORT_POLL_MS);
});
}
return !processTreeIsAlive(pid);
}
export async function loadQaRunnerModelOptions(params: { repoRoot: string; signal?: AbortSignal }) {
const tempRoot = await fs.mkdtemp(
path.join(resolvePreferredOpenClawTmpDir(), "openclaw-qa-model-catalog-"),
@@ -194,16 +221,27 @@ export async function loadQaRunnerModelOptions(params: { repoRoot: string; signa
detached: process.platform !== "win32",
stdio: ["ignore", "pipe", "pipe"],
});
const cleanup = () => {
const cleanupAbortListener = () => {
params.signal?.removeEventListener("abort", abortCatalogLoad);
};
const cleanup = () => {
cleanupAbortListener();
if (forceKillTimer) {
clearTimeout(forceKillTimer);
forceKillTimer = undefined;
}
};
const finishAbortedCatalogLoad = async () => {
cleanup();
if (processTreeIsAlive(child.pid)) {
killProcessTree(child.pid, "SIGKILL");
await waitForProcessTreeExit(child.pid, CATALOG_ABORT_KILL_GRACE_MS);
}
};
const abortCatalogLoad = () => {
aborted = true;
killProcessTree(child.pid, "SIGTERM");
forceKillTimer = setTimeout(() => {
forceKillTimer ??= setTimeout(() => {
killProcessTree(child.pid, "SIGKILL");
}, 1_000);
forceKillTimer.unref();
@@ -220,11 +258,15 @@ export async function loadQaRunnerModelOptions(params: { repoRoot: string; signa
reject(aborted ? createCatalogAbortError() : error);
});
child.once("exit", (code) => {
cleanup();
cleanupAbortListener();
if (aborted) {
reject(createCatalogAbortError());
void finishAbortedCatalogLoad().then(
() => reject(createCatalogAbortError()),
() => reject(createCatalogAbortError()),
);
return;
}
cleanup();
if (code === 0) {
if (stdout.exceeded) {
reject(