mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-24 05:29:29 +00:00
test(perf): compare saved CLI startup benchmarks (#94812)
Summary: - Adds saved CLI startup benchmark report comparison flags to `scripts/bench-cli-startup.ts`, plus JSON output coverage and changed-target routing expectations for the new test-helper importer. - PR surface: Tests +77, Other +109. Total +186 across 4 files. - Reproducibility: not applicable. as a feature/tooling PR. The prior PR defects were source-proven in review comments and the current head addresses them; I did not run local tests because this review was read-only. Automerge notes: - Ran the ClawSweeper repair loop before final review. - Included post-review commit in the final squash: test(perf): compare saved CLI startup benchmarks Validation: - ClawSweeper review passed for head1afa110f1b. - Required merge gates passed before the squash merge. Prepared head SHA:1afa110f1bReview: https://github.com/openclaw/openclaw/pull/94812#issuecomment-4748785428 Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: Felix Isaac Lim <38658663+FelixIsaac@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
// Bench Cli Startup script supports OpenClaw repository automation.
|
||||
import { spawn } from "node:child_process";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
@@ -61,8 +61,36 @@ type SuiteResult = {
|
||||
}>;
|
||||
};
|
||||
|
||||
type BenchmarkReport = {
|
||||
primary: SuiteResult;
|
||||
secondary?: SuiteResult | null;
|
||||
};
|
||||
|
||||
type CaseDelta = {
|
||||
id: string;
|
||||
name: string;
|
||||
durationAvgDeltaMs: number;
|
||||
durationAvgDeltaPct: number;
|
||||
maxRssAvgDeltaMb: number | null;
|
||||
maxRssAvgDeltaPct: number | null;
|
||||
};
|
||||
|
||||
type BenchmarkComparison = {
|
||||
baseline: string;
|
||||
candidate: string;
|
||||
deltas: CaseDelta[];
|
||||
};
|
||||
|
||||
type BenchmarkComparisonResult = {
|
||||
baseline: SuiteResult;
|
||||
candidate: SuiteResult;
|
||||
comparison: BenchmarkComparison;
|
||||
};
|
||||
|
||||
type CliOptions = {
|
||||
cases: CommandCase[];
|
||||
compareBaseline?: string;
|
||||
compareCandidate?: string;
|
||||
entryPrimary: string;
|
||||
entrySecondary?: string;
|
||||
runs: number;
|
||||
@@ -797,8 +825,26 @@ function printSuite(result: SuiteResult): void {
|
||||
}
|
||||
|
||||
function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
|
||||
const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
|
||||
const deltas = buildCaseDeltas(primary, secondary);
|
||||
console.log("Delta (secondary - primary, avg)");
|
||||
for (const delta of deltas) {
|
||||
const durationDelta = delta.durationAvgDeltaMs;
|
||||
const durationPct = delta.durationAvgDeltaPct;
|
||||
const durationSign = durationDelta > 0 ? "+" : "";
|
||||
let line = `${delta.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
|
||||
if (delta.maxRssAvgDeltaMb != null && delta.maxRssAvgDeltaPct != null) {
|
||||
const rssDelta = delta.maxRssAvgDeltaMb;
|
||||
const rssPct = delta.maxRssAvgDeltaPct;
|
||||
const rssSign = rssDelta > 0 ? "+" : "";
|
||||
line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
|
||||
}
|
||||
console.log(line);
|
||||
}
|
||||
}
|
||||
|
||||
function buildCaseDeltas(primary: SuiteResult, secondary: SuiteResult): CaseDelta[] {
|
||||
const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
|
||||
const deltas: CaseDelta[] = [];
|
||||
for (const commandCase of secondary.cases) {
|
||||
const baseline = primaryById.get(commandCase.id);
|
||||
if (!baseline) {
|
||||
@@ -809,17 +855,24 @@ function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
|
||||
baseline.summary.durationMs.avg > 0
|
||||
? (durationDelta / baseline.summary.durationMs.avg) * 100
|
||||
: 0;
|
||||
const durationSign = durationDelta > 0 ? "+" : "";
|
||||
let line = `${commandCase.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
|
||||
if (baseline.summary.maxRssMb && commandCase.summary.maxRssMb) {
|
||||
const rssDelta = commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg;
|
||||
const rssPct =
|
||||
baseline.summary.maxRssMb.avg > 0 ? (rssDelta / baseline.summary.maxRssMb.avg) * 100 : 0;
|
||||
const rssSign = rssDelta > 0 ? "+" : "";
|
||||
line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
|
||||
}
|
||||
console.log(line);
|
||||
const rssDelta =
|
||||
baseline.summary.maxRssMb && commandCase.summary.maxRssMb
|
||||
? commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg
|
||||
: null;
|
||||
const rssPct =
|
||||
rssDelta != null && baseline.summary.maxRssMb && baseline.summary.maxRssMb.avg > 0
|
||||
? (rssDelta / baseline.summary.maxRssMb.avg) * 100
|
||||
: null;
|
||||
deltas.push({
|
||||
id: commandCase.id,
|
||||
name: commandCase.name,
|
||||
durationAvgDeltaMs: durationDelta,
|
||||
durationAvgDeltaPct: durationPct,
|
||||
maxRssAvgDeltaMb: rssDelta,
|
||||
maxRssAvgDeltaPct: rssPct,
|
||||
});
|
||||
}
|
||||
return deltas;
|
||||
}
|
||||
|
||||
export function collectFailedSamples(result: SuiteResult): string[] {
|
||||
@@ -910,6 +963,8 @@ function parseOptions(): CliOptions {
|
||||
});
|
||||
return {
|
||||
cases,
|
||||
compareBaseline: parseFlagValue("--compare-baseline"),
|
||||
compareCandidate: parseFlagValue("--compare-candidate"),
|
||||
entryPrimary: parseFlagValue("--entry-primary") ?? parseFlagValue("--entry") ?? DEFAULT_ENTRY,
|
||||
entrySecondary: parseFlagValue("--entry-secondary"),
|
||||
runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"),
|
||||
@@ -938,6 +993,8 @@ Options:
|
||||
--warmup <n> Warmup runs per case (default: ${DEFAULT_WARMUP})
|
||||
--timeout-ms <ms> Per-run timeout (default: ${DEFAULT_TIMEOUT_MS})
|
||||
--output <path> Write machine-readable JSON to a file
|
||||
--compare-baseline <path> Read a saved JSON report as the baseline
|
||||
--compare-candidate <path> Read a saved JSON report as the candidate and print deltas
|
||||
--cpu-prof-dir <dir> Write V8 CPU profiles for each run
|
||||
--heap-prof-dir <dir> Write V8 heap profiles for each run
|
||||
--json Emit machine-readable JSON
|
||||
@@ -948,6 +1005,39 @@ Case ids:
|
||||
`);
|
||||
}
|
||||
|
||||
function readBenchmarkReport(filePath: string): BenchmarkReport {
|
||||
return JSON.parse(readFileSync(filePath, "utf8")) as BenchmarkReport;
|
||||
}
|
||||
|
||||
function writeJsonOutput(filePath: string, value: unknown): void {
|
||||
mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8");
|
||||
}
|
||||
|
||||
function readBenchmarkComparison(
|
||||
baselinePath: string,
|
||||
candidatePath: string,
|
||||
): BenchmarkComparisonResult {
|
||||
const baseline = readBenchmarkReport(baselinePath);
|
||||
const candidate = readBenchmarkReport(candidatePath);
|
||||
return {
|
||||
baseline: baseline.primary,
|
||||
candidate: candidate.primary,
|
||||
comparison: {
|
||||
baseline: baselinePath,
|
||||
candidate: candidatePath,
|
||||
deltas: buildCaseDeltas(baseline.primary, candidate.primary),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function readBenchmarkComparisonForTesting(
|
||||
baselinePath: string,
|
||||
candidatePath: string,
|
||||
): { comparison: unknown } {
|
||||
return readBenchmarkComparison(baselinePath, candidatePath);
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
if (hasFlag("--help")) {
|
||||
printUsage();
|
||||
@@ -955,6 +1045,24 @@ async function main(): Promise<void> {
|
||||
}
|
||||
|
||||
const options = parseOptions();
|
||||
if (options.compareBaseline || options.compareCandidate) {
|
||||
if (!options.compareBaseline || !options.compareCandidate) {
|
||||
throw new Error("--compare-baseline and --compare-candidate must be provided together");
|
||||
}
|
||||
const { baseline, candidate, comparison } = readBenchmarkComparison(
|
||||
options.compareBaseline,
|
||||
options.compareCandidate,
|
||||
);
|
||||
if (options.output) {
|
||||
writeJsonOutput(options.output, comparison);
|
||||
}
|
||||
if (options.json) {
|
||||
console.log(JSON.stringify(comparison, null, 2));
|
||||
return;
|
||||
}
|
||||
printDelta(baseline, candidate);
|
||||
return;
|
||||
}
|
||||
const tmpDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-"));
|
||||
const rssHookPath = buildRssHook(tmpDir);
|
||||
try {
|
||||
@@ -987,8 +1095,7 @@ async function main(): Promise<void> {
|
||||
];
|
||||
|
||||
if (options.output) {
|
||||
mkdirSync(path.dirname(options.output), { recursive: true });
|
||||
writeFileSync(options.output, `${JSON.stringify(report, null, 2)}\n`, "utf8");
|
||||
writeJsonOutput(options.output, report);
|
||||
}
|
||||
|
||||
if (options.json) {
|
||||
@@ -1040,6 +1147,8 @@ export const testing = {
|
||||
parseGatewayPortEnv,
|
||||
parseNonNegativeInt,
|
||||
parsePositiveInt,
|
||||
readBenchmarkComparison: readBenchmarkComparisonForTesting,
|
||||
writeJsonOutput,
|
||||
};
|
||||
|
||||
if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
|
||||
|
||||
@@ -879,6 +879,7 @@ describe("test-projects args", () => {
|
||||
"src/scripts/sync-plugin-versions.test.ts",
|
||||
"test/helpers/temp-dir.test.ts",
|
||||
"test/scripts/android-pin-version.test.ts",
|
||||
"test/scripts/bench-cli-startup.test.ts",
|
||||
"test/scripts/ios-configure-signing.test.ts",
|
||||
"test/scripts/ios-pin-version.test.ts",
|
||||
"test/scripts/ios-team-id.test.ts",
|
||||
@@ -886,6 +887,7 @@ describe("test-projects args", () => {
|
||||
"test/scripts/kitchen-sink-rpc-walk.test.ts",
|
||||
"test/scripts/openai-chat-tools-client.test.ts",
|
||||
"test/scripts/report-test-temp-creations.test.ts",
|
||||
"test/scripts/test-projects.test.ts",
|
||||
"test/test-env.test.ts",
|
||||
"test/vitest-scoped-config.test.ts",
|
||||
],
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
// Bench Cli Startup tests cover bench cli startup script behavior.
|
||||
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { testing } from "../../scripts/bench-cli-startup.ts";
|
||||
import { createTempDirTracker } from "../helpers/temp-dir.js";
|
||||
|
||||
function withEnv<T>(env: Record<string, string | undefined>, callback: () => T): T {
|
||||
const previous = new Map<string, string | undefined>();
|
||||
@@ -26,6 +29,72 @@ function withEnv<T>(env: Record<string, string | undefined>, callback: () => T):
|
||||
}
|
||||
|
||||
describe("bench-cli-startup", () => {
|
||||
it("writes compare-mode JSON output and creates parent directories", () => {
|
||||
const tempDirs = createTempDirTracker();
|
||||
const tmpDir = tempDirs.make("openclaw-cli-startup-compare-output-");
|
||||
try {
|
||||
const baselinePath = join(tmpDir, "baseline.json");
|
||||
const candidatePath = join(tmpDir, "candidate.json");
|
||||
const outputPath = join(tmpDir, "nested", "comparison.json");
|
||||
const makeReport = (durationAvg: number, maxRssAvg: number) => ({
|
||||
primary: {
|
||||
entry: "openclaw.mjs",
|
||||
cases: [
|
||||
{
|
||||
id: "version",
|
||||
name: "--version",
|
||||
args: ["--version"],
|
||||
contract: null,
|
||||
samples: [],
|
||||
summary: {
|
||||
sampleCount: 1,
|
||||
durationMs: {
|
||||
avg: durationAvg,
|
||||
p50: durationAvg,
|
||||
p95: durationAvg,
|
||||
min: durationAvg,
|
||||
max: durationAvg,
|
||||
},
|
||||
firstOutputMs: null,
|
||||
maxRssMb: {
|
||||
avg: maxRssAvg,
|
||||
p50: maxRssAvg,
|
||||
p95: maxRssAvg,
|
||||
min: maxRssAvg,
|
||||
max: maxRssAvg,
|
||||
},
|
||||
exitSummary: "code:0x1",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
writeFileSync(baselinePath, JSON.stringify(makeReport(100, 50)), "utf8");
|
||||
writeFileSync(candidatePath, JSON.stringify(makeReport(125, 60)), "utf8");
|
||||
|
||||
const { comparison } = testing.readBenchmarkComparison(baselinePath, candidatePath);
|
||||
testing.writeJsonOutput(outputPath, comparison);
|
||||
expect(existsSync(outputPath)).toBe(true);
|
||||
expect(JSON.parse(readFileSync(outputPath, "utf8"))).toEqual({
|
||||
baseline: baselinePath,
|
||||
candidate: candidatePath,
|
||||
deltas: [
|
||||
{
|
||||
id: "version",
|
||||
name: "--version",
|
||||
durationAvgDeltaMs: 25,
|
||||
durationAvgDeltaPct: 25,
|
||||
maxRssAvgDeltaMb: 10,
|
||||
maxRssAvgDeltaPct: 20,
|
||||
},
|
||||
],
|
||||
});
|
||||
} finally {
|
||||
tempDirs.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
it("fails reports with no measured samples", () => {
|
||||
expect(
|
||||
testing.collectFailedSamples({
|
||||
|
||||
@@ -1918,6 +1918,8 @@ describe("scripts/test-projects changed-target routing", () => {
|
||||
"test/helpers/temp-dir.ts": "export const tempDir = 'x';\n",
|
||||
"test/helpers/temp-dir.test.ts":
|
||||
"import { tempDir } from './temp-dir.js';\nvoid tempDir;\n",
|
||||
"test/scripts/bench-cli-startup.test.ts":
|
||||
"import { tempDir } from '../helpers/temp-dir.js';\nvoid tempDir;\n",
|
||||
"src/foo.test.ts":
|
||||
"import { tempDir } from '../test/helpers/temp-dir.js';\nvoid tempDir;\n",
|
||||
},
|
||||
@@ -1926,7 +1928,11 @@ describe("scripts/test-projects changed-target routing", () => {
|
||||
},
|
||||
);
|
||||
|
||||
expect(targets).toEqual(["test/helpers/temp-dir.test.ts", "src/foo.test.ts"]);
|
||||
expect(targets).toEqual([
|
||||
"test/helpers/temp-dir.test.ts",
|
||||
"src/foo.test.ts",
|
||||
"test/scripts/bench-cli-startup.test.ts",
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps the broad changed run available for shared test helpers", () => {
|
||||
|
||||
Reference in New Issue
Block a user