diff --git a/scripts/bench-cli-startup.ts b/scripts/bench-cli-startup.ts index 11fd4781d73..7b050cb7cb0 100644 --- a/scripts/bench-cli-startup.ts +++ b/scripts/bench-cli-startup.ts @@ -1,6 +1,6 @@ // Bench Cli Startup script supports OpenClaw repository automation. import { spawn } from "node:child_process"; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import os from "node:os"; import path from "node:path"; import { pathToFileURL } from "node:url"; @@ -61,8 +61,36 @@ type SuiteResult = { }>; }; +type BenchmarkReport = { + primary: SuiteResult; + secondary?: SuiteResult | null; +}; + +type CaseDelta = { + id: string; + name: string; + durationAvgDeltaMs: number; + durationAvgDeltaPct: number; + maxRssAvgDeltaMb: number | null; + maxRssAvgDeltaPct: number | null; +}; + +type BenchmarkComparison = { + baseline: string; + candidate: string; + deltas: CaseDelta[]; +}; + +type BenchmarkComparisonResult = { + baseline: SuiteResult; + candidate: SuiteResult; + comparison: BenchmarkComparison; +}; + type CliOptions = { cases: CommandCase[]; + compareBaseline?: string; + compareCandidate?: string; entryPrimary: string; entrySecondary?: string; runs: number; @@ -797,8 +825,26 @@ function printSuite(result: SuiteResult): void { } function printDelta(primary: SuiteResult, secondary: SuiteResult): void { - const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase])); + const deltas = buildCaseDeltas(primary, secondary); console.log("Delta (secondary - primary, avg)"); + for (const delta of deltas) { + const durationDelta = delta.durationAvgDeltaMs; + const durationPct = delta.durationAvgDeltaPct; + const durationSign = durationDelta > 0 ? "+" : ""; + let line = `${delta.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`; + if (delta.maxRssAvgDeltaMb != null && delta.maxRssAvgDeltaPct != null) { + const rssDelta = delta.maxRssAvgDeltaMb; + const rssPct = delta.maxRssAvgDeltaPct; + const rssSign = rssDelta > 0 ? "+" : ""; + line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`; + } + console.log(line); + } +} + +function buildCaseDeltas(primary: SuiteResult, secondary: SuiteResult): CaseDelta[] { + const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase])); + const deltas: CaseDelta[] = []; for (const commandCase of secondary.cases) { const baseline = primaryById.get(commandCase.id); if (!baseline) { @@ -809,17 +855,24 @@ function printDelta(primary: SuiteResult, secondary: SuiteResult): void { baseline.summary.durationMs.avg > 0 ? (durationDelta / baseline.summary.durationMs.avg) * 100 : 0; - const durationSign = durationDelta > 0 ? "+" : ""; - let line = `${commandCase.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`; - if (baseline.summary.maxRssMb && commandCase.summary.maxRssMb) { - const rssDelta = commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg; - const rssPct = - baseline.summary.maxRssMb.avg > 0 ? (rssDelta / baseline.summary.maxRssMb.avg) * 100 : 0; - const rssSign = rssDelta > 0 ? "+" : ""; - line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`; - } - console.log(line); + const rssDelta = + baseline.summary.maxRssMb && commandCase.summary.maxRssMb + ? commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg + : null; + const rssPct = + rssDelta != null && baseline.summary.maxRssMb && baseline.summary.maxRssMb.avg > 0 + ? (rssDelta / baseline.summary.maxRssMb.avg) * 100 + : null; + deltas.push({ + id: commandCase.id, + name: commandCase.name, + durationAvgDeltaMs: durationDelta, + durationAvgDeltaPct: durationPct, + maxRssAvgDeltaMb: rssDelta, + maxRssAvgDeltaPct: rssPct, + }); } + return deltas; } export function collectFailedSamples(result: SuiteResult): string[] { @@ -910,6 +963,8 @@ function parseOptions(): CliOptions { }); return { cases, + compareBaseline: parseFlagValue("--compare-baseline"), + compareCandidate: parseFlagValue("--compare-candidate"), entryPrimary: parseFlagValue("--entry-primary") ?? parseFlagValue("--entry") ?? DEFAULT_ENTRY, entrySecondary: parseFlagValue("--entry-secondary"), runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"), @@ -938,6 +993,8 @@ Options: --warmup Warmup runs per case (default: ${DEFAULT_WARMUP}) --timeout-ms Per-run timeout (default: ${DEFAULT_TIMEOUT_MS}) --output Write machine-readable JSON to a file + --compare-baseline Read a saved JSON report as the baseline + --compare-candidate Read a saved JSON report as the candidate and print deltas --cpu-prof-dir Write V8 CPU profiles for each run --heap-prof-dir Write V8 heap profiles for each run --json Emit machine-readable JSON @@ -948,6 +1005,39 @@ Case ids: `); } +function readBenchmarkReport(filePath: string): BenchmarkReport { + return JSON.parse(readFileSync(filePath, "utf8")) as BenchmarkReport; +} + +function writeJsonOutput(filePath: string, value: unknown): void { + mkdirSync(path.dirname(filePath), { recursive: true }); + writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8"); +} + +function readBenchmarkComparison( + baselinePath: string, + candidatePath: string, +): BenchmarkComparisonResult { + const baseline = readBenchmarkReport(baselinePath); + const candidate = readBenchmarkReport(candidatePath); + return { + baseline: baseline.primary, + candidate: candidate.primary, + comparison: { + baseline: baselinePath, + candidate: candidatePath, + deltas: buildCaseDeltas(baseline.primary, candidate.primary), + }, + }; +} + +function readBenchmarkComparisonForTesting( + baselinePath: string, + candidatePath: string, +): { comparison: unknown } { + return readBenchmarkComparison(baselinePath, candidatePath); +} + async function main(): Promise { if (hasFlag("--help")) { printUsage(); @@ -955,6 +1045,24 @@ async function main(): Promise { } const options = parseOptions(); + if (options.compareBaseline || options.compareCandidate) { + if (!options.compareBaseline || !options.compareCandidate) { + throw new Error("--compare-baseline and --compare-candidate must be provided together"); + } + const { baseline, candidate, comparison } = readBenchmarkComparison( + options.compareBaseline, + options.compareCandidate, + ); + if (options.output) { + writeJsonOutput(options.output, comparison); + } + if (options.json) { + console.log(JSON.stringify(comparison, null, 2)); + return; + } + printDelta(baseline, candidate); + return; + } const tmpDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-")); const rssHookPath = buildRssHook(tmpDir); try { @@ -987,8 +1095,7 @@ async function main(): Promise { ]; if (options.output) { - mkdirSync(path.dirname(options.output), { recursive: true }); - writeFileSync(options.output, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + writeJsonOutput(options.output, report); } if (options.json) { @@ -1040,6 +1147,8 @@ export const testing = { parseGatewayPortEnv, parseNonNegativeInt, parsePositiveInt, + readBenchmarkComparison: readBenchmarkComparisonForTesting, + writeJsonOutput, }; if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) { diff --git a/src/scripts/test-projects.test.ts b/src/scripts/test-projects.test.ts index 87e4b3a80ec..4673ca79e43 100644 --- a/src/scripts/test-projects.test.ts +++ b/src/scripts/test-projects.test.ts @@ -879,6 +879,7 @@ describe("test-projects args", () => { "src/scripts/sync-plugin-versions.test.ts", "test/helpers/temp-dir.test.ts", "test/scripts/android-pin-version.test.ts", + "test/scripts/bench-cli-startup.test.ts", "test/scripts/ios-configure-signing.test.ts", "test/scripts/ios-pin-version.test.ts", "test/scripts/ios-team-id.test.ts", @@ -886,6 +887,7 @@ describe("test-projects args", () => { "test/scripts/kitchen-sink-rpc-walk.test.ts", "test/scripts/openai-chat-tools-client.test.ts", "test/scripts/report-test-temp-creations.test.ts", + "test/scripts/test-projects.test.ts", "test/test-env.test.ts", "test/vitest-scoped-config.test.ts", ], diff --git a/test/scripts/bench-cli-startup.test.ts b/test/scripts/bench-cli-startup.test.ts index d073a98e10d..1e63198ed28 100644 --- a/test/scripts/bench-cli-startup.test.ts +++ b/test/scripts/bench-cli-startup.test.ts @@ -1,6 +1,9 @@ // Bench Cli Startup tests cover bench cli startup script behavior. +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import { describe, expect, it } from "vitest"; import { testing } from "../../scripts/bench-cli-startup.ts"; +import { createTempDirTracker } from "../helpers/temp-dir.js"; function withEnv(env: Record, callback: () => T): T { const previous = new Map(); @@ -26,6 +29,72 @@ function withEnv(env: Record, callback: () => T): } describe("bench-cli-startup", () => { + it("writes compare-mode JSON output and creates parent directories", () => { + const tempDirs = createTempDirTracker(); + const tmpDir = tempDirs.make("openclaw-cli-startup-compare-output-"); + try { + const baselinePath = join(tmpDir, "baseline.json"); + const candidatePath = join(tmpDir, "candidate.json"); + const outputPath = join(tmpDir, "nested", "comparison.json"); + const makeReport = (durationAvg: number, maxRssAvg: number) => ({ + primary: { + entry: "openclaw.mjs", + cases: [ + { + id: "version", + name: "--version", + args: ["--version"], + contract: null, + samples: [], + summary: { + sampleCount: 1, + durationMs: { + avg: durationAvg, + p50: durationAvg, + p95: durationAvg, + min: durationAvg, + max: durationAvg, + }, + firstOutputMs: null, + maxRssMb: { + avg: maxRssAvg, + p50: maxRssAvg, + p95: maxRssAvg, + min: maxRssAvg, + max: maxRssAvg, + }, + exitSummary: "code:0x1", + }, + }, + ], + }, + }); + + writeFileSync(baselinePath, JSON.stringify(makeReport(100, 50)), "utf8"); + writeFileSync(candidatePath, JSON.stringify(makeReport(125, 60)), "utf8"); + + const { comparison } = testing.readBenchmarkComparison(baselinePath, candidatePath); + testing.writeJsonOutput(outputPath, comparison); + expect(existsSync(outputPath)).toBe(true); + expect(JSON.parse(readFileSync(outputPath, "utf8"))).toEqual({ + baseline: baselinePath, + candidate: candidatePath, + deltas: [ + { + id: "version", + name: "--version", + durationAvgDeltaMs: 25, + durationAvgDeltaPct: 25, + maxRssAvgDeltaMb: 10, + maxRssAvgDeltaPct: 20, + }, + ], + }); + } finally { + tempDirs.cleanup(); + } + }); + it("fails reports with no measured samples", () => { expect( testing.collectFailedSamples({ diff --git a/test/scripts/test-projects.test.ts b/test/scripts/test-projects.test.ts index 3b8e77e26de..eeb49d8b7f3 100644 --- a/test/scripts/test-projects.test.ts +++ b/test/scripts/test-projects.test.ts @@ -1918,6 +1918,8 @@ describe("scripts/test-projects changed-target routing", () => { "test/helpers/temp-dir.ts": "export const tempDir = 'x';\n", "test/helpers/temp-dir.test.ts": "import { tempDir } from './temp-dir.js';\nvoid tempDir;\n", + "test/scripts/bench-cli-startup.test.ts": + "import { tempDir } from '../helpers/temp-dir.js';\nvoid tempDir;\n", "src/foo.test.ts": "import { tempDir } from '../test/helpers/temp-dir.js';\nvoid tempDir;\n", }, @@ -1926,7 +1928,11 @@ describe("scripts/test-projects changed-target routing", () => { }, ); - expect(targets).toEqual(["test/helpers/temp-dir.test.ts", "src/foo.test.ts"]); + expect(targets).toEqual([ + "test/helpers/temp-dir.test.ts", + "src/foo.test.ts", + "test/scripts/bench-cli-startup.test.ts", + ]); }); it("keeps the broad changed run available for shared test helpers", () => {