fix(test): harden startup benchmark harness

This commit is contained in:
Vincent Koc
2026-05-28 06:40:30 +02:00
parent 4f26cc9090
commit d10d30c5fa
4 changed files with 139 additions and 8 deletions

View File

@@ -10,6 +10,8 @@ type CommandCase = {
name: string;
args: string[];
presets: readonly string[];
expectedExitCodes?: readonly number[];
expectedNonzeroOutputIncludes?: readonly string[];
firstOutputBudgetMs?: number;
exitBudgetMs?: number;
};
@@ -46,6 +48,8 @@ type SuiteResult = {
id: string;
name: string;
args: string[];
expectedExitCodes?: number[];
expectedNonzeroOutputIncludes?: string[];
contract: {
firstOutputBudgetMs: number | null;
exitBudgetMs: number | null;
@@ -307,8 +311,22 @@ const COMMAND_CASES: readonly CommandCase[] = [
firstOutputBudgetMs: 2_500,
exitBudgetMs: 6_000,
},
{ id: "health", name: "health", args: ["health"], presets: ["startup", "real"] },
{ id: "healthJson", name: "health --json", args: ["health", "--json"], presets: ["startup"] },
{
id: "health",
name: "health",
args: ["health"],
presets: ["startup", "real"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ["Gateway target:"],
},
{
id: "healthJson",
name: "health --json",
args: ["health", "--json"],
presets: ["startup"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ['"ok"', '"gateway_transport_error"'],
},
{
id: "statusJson",
name: "status --json",
@@ -364,12 +382,16 @@ const COMMAND_CASES: readonly CommandCase[] = [
name: "gateway health --json",
args: ["gateway", "health", "--json"],
presets: ["real"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ['"ok"', '"gateway_transport_error"'],
},
{
id: "configGetGatewayPort",
name: "config get gateway.port",
args: ["config", "get", "gateway.port"],
presets: ["real"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ["Config path not found: gateway.port"],
},
] as const;
@@ -759,10 +781,23 @@ export function collectFailedSamples(result: SuiteResult): string[] {
}
for (const [sampleIndex, sample] of commandCase.samples.entries()) {
const label = `${result.entry} ${commandCase.id} sample ${sampleIndex + 1}`;
const expectedExitCodes = new Set(commandCase.expectedExitCodes ?? [0]);
if (sample.signal !== null) {
failures.push(`${label}: exited via signal ${sample.signal}`);
} else if (sample.exitCode !== 0) {
} else if (!expectedExitCodes.has(sample.exitCode ?? -1)) {
failures.push(`${label}: exited with code ${String(sample.exitCode)}`);
} else if (sample.exitCode !== 0) {
const output = `${sample.stdoutTail ?? ""}\n${sample.stderrTail ?? ""}`;
const missing = (commandCase.expectedNonzeroOutputIncludes ?? []).filter(
(snippet) => !output.includes(snippet),
);
if (missing.length > 0) {
failures.push(
`${label}: exited with expected code ${String(
sample.exitCode,
)} but output did not match expected clean-state markers (${missing.join(", ")})`,
);
}
}
}
}
@@ -790,6 +825,12 @@ async function buildSuiteResult(params: {
id: commandCase.id,
name: commandCase.name,
args: commandCase.args,
...(commandCase.expectedExitCodes && commandCase.expectedExitCodes.some((code) => code !== 0)
? { expectedExitCodes: [...commandCase.expectedExitCodes] }
: {}),
...(commandCase.expectedNonzeroOutputIncludes
? { expectedNonzeroOutputIncludes: [...commandCase.expectedNonzeroOutputIncludes] }
: {}),
contract:
commandCase.firstOutputBudgetMs != null || commandCase.exitBudgetMs != null
? {
@@ -819,11 +860,7 @@ function parseOptions(): CliOptions {
entrySecondary: parseFlagValue("--entry-secondary"),
runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"),
warmup: parseNonNegativeInt(parseFlagValue("--warmup"), DEFAULT_WARMUP, "--warmup"),
timeoutMs: parsePositiveInt(
parseFlagValue("--timeout-ms"),
DEFAULT_TIMEOUT_MS,
"--timeout-ms",
),
timeoutMs: parsePositiveInt(parseFlagValue("--timeout-ms"), DEFAULT_TIMEOUT_MS, "--timeout-ms"),
json: hasFlag("--json"),
output: parseFlagValue("--output"),
cpuProfDir: parseFlagValue("--cpu-prof-dir"),

View File

@@ -86,6 +86,14 @@ function resolveCurrentReportPath() {
if (opts.report) {
return opts.report;
}
const build = spawnSync(process.execPath, ["scripts/ensure-cli-startup-build.mjs"], {
cwd: process.cwd(),
stdio: "inherit",
env: process.env,
});
if (build.status !== 0) {
process.exit(build.status ?? 1);
}
const reportPath = `.artifacts/cli-startup-bench.current.json`;
fs.mkdirSync(".artifacts", { recursive: true });
const args = [

View File

@@ -65,6 +65,78 @@ describe("bench-cli-startup", () => {
]);
});
it("allows declared nonzero exit codes for clean-state probes", () => {
const sample = {
ms: 10,
firstOutputMs: 5,
maxRssMb: 50,
exitCode: 1,
signal: null,
stderrTail: "Health check failed: gateway closed\n Gateway target: ws://127.0.0.1:18789",
};
expect(
testing.collectFailedSamples({
entry: "openclaw.mjs",
cases: [
{
id: "health",
name: "health",
args: ["health"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ["Gateway target:"],
contract: null,
samples: [sample],
summary: {
sampleCount: 1,
durationMs: { avg: 10, p50: 10, p95: 10, min: 10, max: 10 },
firstOutputMs: { avg: 5, p50: 5, p95: 5, min: 5, max: 5 },
maxRssMb: { avg: 50, p50: 50, p95: 50, min: 50, max: 50 },
exitSummary: "code:1x1",
},
},
],
}),
).toEqual([]);
});
it("rejects allowed nonzero exits without their expected clean-state output", () => {
const sample = {
ms: 10,
firstOutputMs: 5,
maxRssMb: 50,
exitCode: 1,
signal: null,
stderrTail: "TypeError: crashed before output",
};
expect(
testing.collectFailedSamples({
entry: "openclaw.mjs",
cases: [
{
id: "health",
name: "health",
args: ["health"],
expectedExitCodes: [0, 1],
expectedNonzeroOutputIncludes: ["Gateway target:"],
contract: null,
samples: [sample],
summary: {
sampleCount: 1,
durationMs: { avg: 10, p50: 10, p95: 10, min: 10, max: 10 },
firstOutputMs: { avg: 5, p50: 5, p95: 5, min: 5, max: 5 },
maxRssMb: { avg: 50, p50: 50, p95: 50, min: 50, max: 50 },
exitSummary: "code:1x1",
},
},
],
}),
).toEqual([
"openclaw.mjs health sample 1: exited with expected code 1 but output did not match expected clean-state markers (Gateway target:)",
]);
});
it("rejects invalid measured run counts", () => {
expect(() => testing.parsePositiveInt("0", 5, "--runs")).toThrow(
"--runs must be an integer >= 1",

View File

@@ -19,6 +19,20 @@ describe("CLI startup benchmark script spawners", () => {
}
});
it("builds the source CLI before generating a startup budget report", () => {
const source = fs.readFileSync(
path.resolve(process.cwd(), "scripts/test-cli-startup-bench-budget.mjs"),
"utf8",
);
expect(source).toContain(
'spawnSync(process.execPath, ["scripts/ensure-cli-startup-build.mjs"]',
);
expect(source.indexOf("scripts/ensure-cli-startup-build.mjs")).toBeLessThan(
source.indexOf("scripts/bench-cli-startup.ts"),
);
});
it("does not require unrelated fixture cases for a narrowed preset", () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-bench-budget-test-"));
try {