test(qa): add gateway CPU scenario pack

This commit is contained in:
Vincent Koc
2026-04-28 13:19:40 -07:00
parent 5e8d3130c6
commit 4509420dd4
13 changed files with 544 additions and 5 deletions

View File

@@ -21,6 +21,8 @@ type ProbeResult = {
};
type GatewaySample = {
cpuCoreRatio: number | null;
cpuMs: number | null;
exitCode: number | null;
firstOutputMs: number | null;
healthz: ProbeResult;
@@ -46,6 +48,8 @@ type CaseResult = {
samples: GatewaySample[];
summary: {
firstOutputMs: SummaryStats | null;
cpuCoreRatio: SummaryStats | null;
cpuMs: SummaryStats | null;
healthzMs: SummaryStats | null;
maxRssMb: SummaryStats | null;
readyLogMs: SummaryStats | null;
@@ -269,6 +273,16 @@ function summarizeCase(benchCase: GatewayBenchCase, samples: GatewaySample[]): C
.map((sample) => sample.firstOutputMs)
.filter((value): value is number => typeof value === "number"),
),
cpuCoreRatio: summarizeNumbers(
samples
.map((sample) => sample.cpuCoreRatio)
.filter((value): value is number => typeof value === "number"),
),
cpuMs: summarizeNumbers(
samples
.map((sample) => sample.cpuMs)
.filter((value): value is number => typeof value === "number"),
),
healthzMs: summarizeNumbers(
samples
.map((sample) => sample.healthz.ms)
@@ -308,6 +322,13 @@ function formatMb(value: number | null): string {
return `${value.toFixed(1)}MB`;
}
function formatRatio(value: number | null): string {
if (value == null) {
return "n/a";
}
return value.toFixed(3);
}
function formatStats(stats: SummaryStats | null): string {
if (!stats) {
return "n/a";
@@ -322,6 +343,13 @@ function formatMemoryStats(stats: SummaryStats | null): string {
return `p50=${formatMb(stats.p50)} avg=${formatMb(stats.avg)} min=${formatMb(stats.min)} max=${formatMb(stats.max)}`;
}
function formatRatioStats(stats: SummaryStats | null): string {
if (!stats) {
return "n/a";
}
return `p50=${formatRatio(stats.p50)} avg=${formatRatio(stats.avg)} min=${formatRatio(stats.min)} max=${formatRatio(stats.max)}`;
}
async function getFreePort(): Promise<number> {
return new Promise((resolve, reject) => {
const server = createServer();
@@ -547,6 +575,71 @@ function readProcessRssMb(pid: number | undefined): number | null {
return Number.isFinite(rssKb) && rssKb > 0 ? rssKb / 1024 : null;
}
function parsePsCpuTimeMs(raw: string): number | null {
const parts = raw.trim().split(":").map(Number);
if (parts.some((part) => !Number.isFinite(part) || part < 0)) {
return null;
}
if (parts.length === 2) {
return Math.round((parts[0] * 60 + parts[1]) * 1000);
}
if (parts.length === 3) {
return Math.round((parts[0] * 60 * 60 + parts[1] * 60 + parts[2]) * 1000);
}
return null;
}
function readProcessTreeCpuMs(rootPid: number | undefined): number | null {
if (!rootPid || process.platform === "win32") {
return null;
}
const result = spawnSync("ps", ["-eo", "pid=,ppid=,time="], {
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
});
if (result.status !== 0) {
return null;
}
const childrenByParent = new Map<number, number[]>();
const cpuByPid = new Map<number, number>();
for (const line of result.stdout.split("\n")) {
const match = line.trim().match(/^(\d+)\s+(\d+)\s+(\S+)$/u);
if (!match) {
continue;
}
const pid = Number(match[1]);
const ppid = Number(match[2]);
const cpuMs = parsePsCpuTimeMs(match[3]);
if (!Number.isInteger(pid) || !Number.isInteger(ppid) || cpuMs === null) {
continue;
}
cpuByPid.set(pid, cpuMs);
const children = childrenByParent.get(ppid) ?? [];
children.push(pid);
childrenByParent.set(ppid, children);
}
if (!cpuByPid.has(rootPid)) {
return null;
}
let totalCpuMs = 0;
const seen = new Set<number>();
const stack = [rootPid];
while (stack.length > 0) {
const pid = stack.pop();
if (!pid || seen.has(pid)) {
continue;
}
seen.add(pid);
totalCpuMs += cpuByPid.get(pid) ?? 0;
for (const childPid of childrenByParent.get(pid) ?? []) {
stack.push(childPid);
}
}
return totalCpuMs;
}
async function runGatewaySample(options: {
benchCase: GatewayBenchCase;
entry: string;
@@ -583,6 +676,7 @@ async function runGatewaySample(options: {
],
{ cwd: process.cwd(), detached: process.platform !== "win32", env },
);
const cpuStartMs = readProcessTreeCpuMs(child.pid);
const sampleRss = () => {
const rssMb = readProcessRssMb(child.pid);
if (rssMb != null) {
@@ -636,6 +730,10 @@ async function runGatewaySample(options: {
startAt,
}),
]);
const readyAt = performance.now();
const cpuEndMs = readProcessTreeCpuMs(child.pid);
const cpuMs = cpuStartMs == null || cpuEndMs == null ? null : Math.max(0, cpuEndMs - cpuStartMs);
const cpuCoreRatio = cpuMs == null ? null : cpuMs / Math.max(1, readyAt - startAt);
const exit = await stopChild(child);
clearInterval(rssTimer);
sampleRss();
@@ -643,6 +741,8 @@ async function runGatewaySample(options: {
rmSync(root, { force: true, maxRetries: 3, recursive: true, retryDelay: 100 });
return {
cpuCoreRatio,
cpuMs,
exitCode: exit.exitCode,
firstOutputMs,
healthz,
@@ -673,11 +773,11 @@ async function runCase(options: {
if (index >= options.warmup) {
samples.push(sample);
console.log(
`[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} rss=${formatMb(sample.maxRssMb)}`,
`[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`,
);
} else {
console.log(
`[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} rss=${formatMb(sample.maxRssMb)}`,
`[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`,
);
}
}
@@ -687,6 +787,8 @@ async function runCase(options: {
function printResult(result: CaseResult): void {
console.log(`\n${result.name} (${result.id})`);
console.log(` first output: ${formatStats(result.summary.firstOutputMs)}`);
console.log(` CPU: ${formatStats(result.summary.cpuMs)}`);
console.log(` CPU core: ${formatRatioStats(result.summary.cpuCoreRatio)}`);
console.log(` /healthz: ${formatStats(result.summary.healthzMs)}`);
console.log(` ready log: ${formatStats(result.summary.readyLogMs)}`);
console.log(` /readyz: ${formatStats(result.summary.readyzMs)}`);

View File

@@ -0,0 +1,280 @@
#!/usr/bin/env node
import { spawnSync } from "node:child_process";
import fs from "node:fs";
import path from "node:path";
import process from "node:process";
const DEFAULT_STARTUP_CASES = ["default", "oneInternalHook", "allInternalHooks"];
const DEFAULT_QA_SCENARIOS = [
"channel-chat-baseline",
"memory-failure-fallback",
"gateway-restart-inflight-run",
];
const DEFAULT_CPU_CORE_WARN = 0.9;
const DEFAULT_HOT_WALL_WARN_MS = 30_000;
function parseArgs(argv) {
const options = {
outputDir: path.join(
process.cwd(),
".artifacts",
"gateway-cpu-scenarios",
new Date().toISOString().replace(/[:.]/g, "-"),
),
startupCases: [],
qaScenarios: [],
runs: 1,
warmup: 0,
skipStartup: false,
skipQa: false,
cpuCoreWarn: DEFAULT_CPU_CORE_WARN,
hotWallWarnMs: DEFAULT_HOT_WALL_WARN_MS,
};
for (let index = 0; index < argv.length; index += 1) {
const arg = argv[index];
const readValue = () => {
const value = argv[index + 1];
if (!value) {
throw new Error(`Missing value for ${arg}`);
}
index += 1;
return value;
};
switch (arg) {
case "--output-dir":
options.outputDir = path.resolve(readValue());
break;
case "--startup-case":
options.startupCases.push(readValue());
break;
case "--qa-scenario":
options.qaScenarios.push(readValue());
break;
case "--runs":
options.runs = parsePositiveInt(readValue(), "--runs");
break;
case "--warmup":
options.warmup = parseNonNegativeInt(readValue(), "--warmup");
break;
case "--cpu-core-warn":
options.cpuCoreWarn = parsePositiveNumber(readValue(), "--cpu-core-warn");
break;
case "--hot-wall-warn-ms":
options.hotWallWarnMs = parsePositiveInt(readValue(), "--hot-wall-warn-ms");
break;
case "--skip-startup":
options.skipStartup = true;
break;
case "--skip-qa":
options.skipQa = true;
break;
case "--help":
printHelp();
process.exit(0);
break;
default:
throw new Error(`Unknown argument: ${arg}`);
}
}
if (options.startupCases.length === 0) {
options.startupCases = [...DEFAULT_STARTUP_CASES];
}
if (options.qaScenarios.length === 0) {
options.qaScenarios = [...DEFAULT_QA_SCENARIOS];
}
return options;
}
function parsePositiveInt(raw, label) {
const value = Number(raw);
if (!Number.isInteger(value) || value < 1) {
throw new Error(`${label} must be a positive integer`);
}
return value;
}
function parseNonNegativeInt(raw, label) {
const value = Number(raw);
if (!Number.isInteger(value) || value < 0) {
throw new Error(`${label} must be a non-negative integer`);
}
return value;
}
function parsePositiveNumber(raw, label) {
const value = Number(raw);
if (!Number.isFinite(value) || value <= 0) {
throw new Error(`${label} must be a positive number`);
}
return value;
}
function printHelp() {
console.log(`Usage: pnpm test:gateway:cpu-scenarios [options]
Runs a small gateway CPU scenario suite against built dist artifacts.
Options:
--output-dir <path> Artifact directory
--startup-case <id> Startup bench case, repeatable
--qa-scenario <id> QA Lab scenario, repeatable
--runs <count> Startup bench runs per case (default: 1)
--warmup <count> Startup bench warmup runs per case (default: 0)
--cpu-core-warn <ratio> Hot CPU observation threshold (default: 0.9)
--hot-wall-warn-ms <ms> Minimum wall time for hot CPU observations (default: 30000)
--skip-startup Skip startup bench
--skip-qa Skip QA Lab scenario smoke
`);
}
function readJsonIfExists(filePath) {
if (!fs.existsSync(filePath)) {
return null;
}
return JSON.parse(fs.readFileSync(filePath, "utf8"));
}
function runStep(name, command, args) {
console.error(`[gateway-cpu] start ${name}`);
const result = spawnSync(command, args, {
cwd: process.cwd(),
env: process.env,
stdio: "inherit",
});
const status = result.status ?? (result.signal ? 1 : 0);
console.error(`[gateway-cpu] ${status === 0 ? "pass" : "fail"} ${name}`);
return { name, status, signal: result.signal ?? null };
}
function pnpmCommand() {
return process.platform === "win32" ? "pnpm.cmd" : "pnpm";
}
function toRepoRelativePath(absolutePath) {
const relativePath = path.relative(process.cwd(), absolutePath);
if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) {
throw new Error(`Output path must stay inside the repo root: ${absolutePath}`);
}
return relativePath;
}
function collectObservations(params) {
const observations = [];
for (const result of params.startup?.results ?? []) {
const cpuCoreMax = result.summary?.cpuCoreRatio?.max;
const wallMax = result.summary?.readyz?.max ?? result.summary?.healthz?.max;
if (
typeof cpuCoreMax === "number" &&
typeof wallMax === "number" &&
cpuCoreMax >= params.cpuCoreWarn &&
wallMax >= params.hotWallWarnMs
) {
observations.push({
kind: "startup-cpu-hot",
id: result.id,
cpuCoreRatioMax: cpuCoreMax,
wallMsMax: wallMax,
});
}
}
const qaCpuCoreRatio = params.qa?.metrics?.gatewayCpuCoreRatio;
const qaWallMs = params.qa?.metrics?.wallMs;
if (
typeof qaCpuCoreRatio === "number" &&
typeof qaWallMs === "number" &&
qaCpuCoreRatio >= params.cpuCoreWarn &&
qaWallMs >= params.hotWallWarnMs
) {
observations.push({
kind: "qa-cpu-hot",
id: "qa-suite",
cpuCoreRatio: qaCpuCoreRatio,
wallMs: qaWallMs,
});
}
return observations;
}
async function main() {
const options = parseArgs(process.argv.slice(2));
fs.mkdirSync(options.outputDir, { recursive: true });
const startupOutput = path.join(options.outputDir, "gateway-startup-bench.json");
const qaOutputDir = path.join(options.outputDir, "qa-suite");
const qaOutputArg = toRepoRelativePath(qaOutputDir);
const steps = [];
if (!options.skipStartup) {
steps.push(
runStep("startup bench", process.execPath, [
"--import",
"tsx",
"scripts/bench-gateway-startup.ts",
"--runs",
String(options.runs),
"--warmup",
String(options.warmup),
"--output",
startupOutput,
...options.startupCases.flatMap((id) => ["--case", id]),
]),
);
}
if (!options.skipQa) {
steps.push(
runStep("qa suite", pnpmCommand(), [
"openclaw",
"qa",
"suite",
"--provider-mode",
"mock-openai",
"--concurrency",
"1",
"--output-dir",
qaOutputArg,
...options.qaScenarios.flatMap((id) => ["--scenario", id]),
]),
);
}
const startup = readJsonIfExists(startupOutput);
const qa = readJsonIfExists(path.join(qaOutputDir, "qa-suite-summary.json"));
const observations = collectObservations({
startup,
qa,
cpuCoreWarn: options.cpuCoreWarn,
hotWallWarnMs: options.hotWallWarnMs,
});
const summary = {
generatedAt: new Date().toISOString(),
outputDir: options.outputDir,
startupOutput: fs.existsSync(startupOutput) ? startupOutput : null,
qaSummary: fs.existsSync(path.join(qaOutputDir, "qa-suite-summary.json"))
? path.join(qaOutputDir, "qa-suite-summary.json")
: null,
options: {
startupCases: options.startupCases,
qaScenarios: options.qaScenarios,
runs: options.runs,
warmup: options.warmup,
cpuCoreWarn: options.cpuCoreWarn,
hotWallWarnMs: options.hotWallWarnMs,
},
steps,
observations,
};
const summaryPath = path.join(options.outputDir, "summary.json");
fs.writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`);
console.log(JSON.stringify(summary, null, 2));
if (steps.some((step) => step.status !== 0)) {
process.exitCode = 1;
}
}
main().catch((error) => {
console.error(error instanceof Error ? error.stack : String(error));
process.exitCode = 1;
});