test(perf): compare saved CLI startup benchmarks (#94812)

Summary:
- Adds saved CLI startup benchmark report comparison flags to `scripts/bench-cli-startup.ts`, plus JSON output coverage and changed-target routing expectations for the new test-helper importer.
- PR surface: Tests +77, Other +109. Total +186 across 4 files.
- Reproducibility: not applicable. as a feature/tooling PR. The prior PR defects were source-proven in review comments and the current head addresses them; I did not run local tests because this review was read-only.

Automerge notes:
- Ran the ClawSweeper repair loop before final review.
- Included post-review commit in the final squash: test(perf): compare saved CLI startup benchmarks

Validation:
- ClawSweeper review passed for head 1afa110f1b.
- Required merge gates passed before the squash merge.

Prepared head SHA: 1afa110f1b
Review: https://github.com/openclaw/openclaw/pull/94812#issuecomment-4748785428

Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
Co-authored-by: Felix Isaac Lim <38658663+FelixIsaac@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-06-19 09:37:47 +00:00
committed by GitHub
parent 5b3d652c05
commit 2e0dfda462
4 changed files with 201 additions and 15 deletions

View File

@@ -1,6 +1,6 @@
// Bench Cli Startup script supports OpenClaw repository automation.
import { spawn } from "node:child_process";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { pathToFileURL } from "node:url";
@@ -61,8 +61,36 @@ type SuiteResult = {
}>;
};
type BenchmarkReport = {
primary: SuiteResult;
secondary?: SuiteResult | null;
};
type CaseDelta = {
id: string;
name: string;
durationAvgDeltaMs: number;
durationAvgDeltaPct: number;
maxRssAvgDeltaMb: number | null;
maxRssAvgDeltaPct: number | null;
};
type BenchmarkComparison = {
baseline: string;
candidate: string;
deltas: CaseDelta[];
};
type BenchmarkComparisonResult = {
baseline: SuiteResult;
candidate: SuiteResult;
comparison: BenchmarkComparison;
};
type CliOptions = {
cases: CommandCase[];
compareBaseline?: string;
compareCandidate?: string;
entryPrimary: string;
entrySecondary?: string;
runs: number;
@@ -797,8 +825,26 @@ function printSuite(result: SuiteResult): void {
}
function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
const deltas = buildCaseDeltas(primary, secondary);
console.log("Delta (secondary - primary, avg)");
for (const delta of deltas) {
const durationDelta = delta.durationAvgDeltaMs;
const durationPct = delta.durationAvgDeltaPct;
const durationSign = durationDelta > 0 ? "+" : "";
let line = `${delta.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
if (delta.maxRssAvgDeltaMb != null && delta.maxRssAvgDeltaPct != null) {
const rssDelta = delta.maxRssAvgDeltaMb;
const rssPct = delta.maxRssAvgDeltaPct;
const rssSign = rssDelta > 0 ? "+" : "";
line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
}
console.log(line);
}
}
function buildCaseDeltas(primary: SuiteResult, secondary: SuiteResult): CaseDelta[] {
const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
const deltas: CaseDelta[] = [];
for (const commandCase of secondary.cases) {
const baseline = primaryById.get(commandCase.id);
if (!baseline) {
@@ -809,17 +855,24 @@ function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
baseline.summary.durationMs.avg > 0
? (durationDelta / baseline.summary.durationMs.avg) * 100
: 0;
const durationSign = durationDelta > 0 ? "+" : "";
let line = `${commandCase.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
if (baseline.summary.maxRssMb && commandCase.summary.maxRssMb) {
const rssDelta = commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg;
const rssPct =
baseline.summary.maxRssMb.avg > 0 ? (rssDelta / baseline.summary.maxRssMb.avg) * 100 : 0;
const rssSign = rssDelta > 0 ? "+" : "";
line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
}
console.log(line);
const rssDelta =
baseline.summary.maxRssMb && commandCase.summary.maxRssMb
? commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg
: null;
const rssPct =
rssDelta != null && baseline.summary.maxRssMb && baseline.summary.maxRssMb.avg > 0
? (rssDelta / baseline.summary.maxRssMb.avg) * 100
: null;
deltas.push({
id: commandCase.id,
name: commandCase.name,
durationAvgDeltaMs: durationDelta,
durationAvgDeltaPct: durationPct,
maxRssAvgDeltaMb: rssDelta,
maxRssAvgDeltaPct: rssPct,
});
}
return deltas;
}
export function collectFailedSamples(result: SuiteResult): string[] {
@@ -910,6 +963,8 @@ function parseOptions(): CliOptions {
});
return {
cases,
compareBaseline: parseFlagValue("--compare-baseline"),
compareCandidate: parseFlagValue("--compare-candidate"),
entryPrimary: parseFlagValue("--entry-primary") ?? parseFlagValue("--entry") ?? DEFAULT_ENTRY,
entrySecondary: parseFlagValue("--entry-secondary"),
runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"),
@@ -938,6 +993,8 @@ Options:
--warmup <n> Warmup runs per case (default: ${DEFAULT_WARMUP})
--timeout-ms <ms> Per-run timeout (default: ${DEFAULT_TIMEOUT_MS})
--output <path> Write machine-readable JSON to a file
--compare-baseline <path> Read a saved JSON report as the baseline
--compare-candidate <path> Read a saved JSON report as the candidate and print deltas
--cpu-prof-dir <dir> Write V8 CPU profiles for each run
--heap-prof-dir <dir> Write V8 heap profiles for each run
--json Emit machine-readable JSON
@@ -948,6 +1005,39 @@ Case ids:
`);
}
function readBenchmarkReport(filePath: string): BenchmarkReport {
return JSON.parse(readFileSync(filePath, "utf8")) as BenchmarkReport;
}
function writeJsonOutput(filePath: string, value: unknown): void {
mkdirSync(path.dirname(filePath), { recursive: true });
writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8");
}
function readBenchmarkComparison(
baselinePath: string,
candidatePath: string,
): BenchmarkComparisonResult {
const baseline = readBenchmarkReport(baselinePath);
const candidate = readBenchmarkReport(candidatePath);
return {
baseline: baseline.primary,
candidate: candidate.primary,
comparison: {
baseline: baselinePath,
candidate: candidatePath,
deltas: buildCaseDeltas(baseline.primary, candidate.primary),
},
};
}
function readBenchmarkComparisonForTesting(
baselinePath: string,
candidatePath: string,
): { comparison: unknown } {
return readBenchmarkComparison(baselinePath, candidatePath);
}
async function main(): Promise<void> {
if (hasFlag("--help")) {
printUsage();
@@ -955,6 +1045,24 @@ async function main(): Promise<void> {
}
const options = parseOptions();
if (options.compareBaseline || options.compareCandidate) {
if (!options.compareBaseline || !options.compareCandidate) {
throw new Error("--compare-baseline and --compare-candidate must be provided together");
}
const { baseline, candidate, comparison } = readBenchmarkComparison(
options.compareBaseline,
options.compareCandidate,
);
if (options.output) {
writeJsonOutput(options.output, comparison);
}
if (options.json) {
console.log(JSON.stringify(comparison, null, 2));
return;
}
printDelta(baseline, candidate);
return;
}
const tmpDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-"));
const rssHookPath = buildRssHook(tmpDir);
try {
@@ -987,8 +1095,7 @@ async function main(): Promise<void> {
];
if (options.output) {
mkdirSync(path.dirname(options.output), { recursive: true });
writeFileSync(options.output, `${JSON.stringify(report, null, 2)}\n`, "utf8");
writeJsonOutput(options.output, report);
}
if (options.json) {
@@ -1040,6 +1147,8 @@ export const testing = {
parseGatewayPortEnv,
parseNonNegativeInt,
parsePositiveInt,
readBenchmarkComparison: readBenchmarkComparisonForTesting,
writeJsonOutput,
};
if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {

View File

@@ -879,6 +879,7 @@ describe("test-projects args", () => {
"src/scripts/sync-plugin-versions.test.ts",
"test/helpers/temp-dir.test.ts",
"test/scripts/android-pin-version.test.ts",
"test/scripts/bench-cli-startup.test.ts",
"test/scripts/ios-configure-signing.test.ts",
"test/scripts/ios-pin-version.test.ts",
"test/scripts/ios-team-id.test.ts",
@@ -886,6 +887,7 @@ describe("test-projects args", () => {
"test/scripts/kitchen-sink-rpc-walk.test.ts",
"test/scripts/openai-chat-tools-client.test.ts",
"test/scripts/report-test-temp-creations.test.ts",
"test/scripts/test-projects.test.ts",
"test/test-env.test.ts",
"test/vitest-scoped-config.test.ts",
],

View File

@@ -1,6 +1,9 @@
// Bench Cli Startup tests cover bench cli startup script behavior.
import { existsSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { describe, expect, it } from "vitest";
import { testing } from "../../scripts/bench-cli-startup.ts";
import { createTempDirTracker } from "../helpers/temp-dir.js";
function withEnv<T>(env: Record<string, string | undefined>, callback: () => T): T {
const previous = new Map<string, string | undefined>();
@@ -26,6 +29,72 @@ function withEnv<T>(env: Record<string, string | undefined>, callback: () => T):
}
describe("bench-cli-startup", () => {
it("writes compare-mode JSON output and creates parent directories", () => {
const tempDirs = createTempDirTracker();
const tmpDir = tempDirs.make("openclaw-cli-startup-compare-output-");
try {
const baselinePath = join(tmpDir, "baseline.json");
const candidatePath = join(tmpDir, "candidate.json");
const outputPath = join(tmpDir, "nested", "comparison.json");
const makeReport = (durationAvg: number, maxRssAvg: number) => ({
primary: {
entry: "openclaw.mjs",
cases: [
{
id: "version",
name: "--version",
args: ["--version"],
contract: null,
samples: [],
summary: {
sampleCount: 1,
durationMs: {
avg: durationAvg,
p50: durationAvg,
p95: durationAvg,
min: durationAvg,
max: durationAvg,
},
firstOutputMs: null,
maxRssMb: {
avg: maxRssAvg,
p50: maxRssAvg,
p95: maxRssAvg,
min: maxRssAvg,
max: maxRssAvg,
},
exitSummary: "code:0x1",
},
},
],
},
});
writeFileSync(baselinePath, JSON.stringify(makeReport(100, 50)), "utf8");
writeFileSync(candidatePath, JSON.stringify(makeReport(125, 60)), "utf8");
const { comparison } = testing.readBenchmarkComparison(baselinePath, candidatePath);
testing.writeJsonOutput(outputPath, comparison);
expect(existsSync(outputPath)).toBe(true);
expect(JSON.parse(readFileSync(outputPath, "utf8"))).toEqual({
baseline: baselinePath,
candidate: candidatePath,
deltas: [
{
id: "version",
name: "--version",
durationAvgDeltaMs: 25,
durationAvgDeltaPct: 25,
maxRssAvgDeltaMb: 10,
maxRssAvgDeltaPct: 20,
},
],
});
} finally {
tempDirs.cleanup();
}
});
it("fails reports with no measured samples", () => {
expect(
testing.collectFailedSamples({

View File

@@ -1918,6 +1918,8 @@ describe("scripts/test-projects changed-target routing", () => {
"test/helpers/temp-dir.ts": "export const tempDir = 'x';\n",
"test/helpers/temp-dir.test.ts":
"import { tempDir } from './temp-dir.js';\nvoid tempDir;\n",
"test/scripts/bench-cli-startup.test.ts":
"import { tempDir } from '../helpers/temp-dir.js';\nvoid tempDir;\n",
"src/foo.test.ts":
"import { tempDir } from '../test/helpers/temp-dir.js';\nvoid tempDir;\n",
},
@@ -1926,7 +1928,11 @@ describe("scripts/test-projects changed-target routing", () => {
},
);
expect(targets).toEqual(["test/helpers/temp-dir.test.ts", "src/foo.test.ts"]);
expect(targets).toEqual([
"test/helpers/temp-dir.test.ts",
"src/foo.test.ts",
"test/scripts/bench-cli-startup.test.ts",
]);
});
it("keeps the broad changed run available for shared test helpers", () => {