test(perf): compare saved CLI startup benchmarks (#94812)

Summary: - Adds saved CLI startup benchmark report comparison flags to `scripts/bench-cli-startup.ts`, plus JSON output coverage and changed-target routing expectations for the new test-helper importer. - PR surface: Tests +77, Other +109. Total +186 across 4 files. - Reproducibility: not applicable. as a feature/tooling PR. The prior PR defects were source-proven in review comments and the current head addresses them; I did not run local tests because this review was read-only. Automerge notes: - Ran the ClawSweeper repair loop before final review. - Included post-review commit in the final squash: test(perf): compare saved CLI startup benchmarks Validation: - ClawSweeper review passed for head 1afa110f1b. - Required merge gates passed before the squash merge. Prepared head SHA: 1afa110f1b Review: https://github.com/openclaw/openclaw/pull/94812#issuecomment-4748785428 Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Co-authored-by: Felix Isaac Lim <38658663+FelixIsaac@users.noreply.github.com>
2026-06-24 05:29:29 +00:00 · 2026-06-19 09:37:47 +00:00
parent 5b3d652c05
commit 2e0dfda462
4 changed files with 201 additions and 15 deletions
--- a/scripts/bench-cli-startup.ts
+++ b/scripts/bench-cli-startup.ts
@@ -1,6 +1,6 @@
 // Bench Cli Startup script supports OpenClaw repository automation.
 import { spawn } from "node:child_process";
-import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
 import os from "node:os";
 import path from "node:path";
 import { pathToFileURL } from "node:url";
@@ -61,8 +61,36 @@ type SuiteResult = {
  }>;
 };

+type BenchmarkReport = {
+  primary: SuiteResult;
+  secondary?: SuiteResult | null;
+};
+
+type CaseDelta = {
+  id: string;
+  name: string;
+  durationAvgDeltaMs: number;
+  durationAvgDeltaPct: number;
+  maxRssAvgDeltaMb: number | null;
+  maxRssAvgDeltaPct: number | null;
+};
+
+type BenchmarkComparison = {
+  baseline: string;
+  candidate: string;
+  deltas: CaseDelta[];
+};
+
+type BenchmarkComparisonResult = {
+  baseline: SuiteResult;
+  candidate: SuiteResult;
+  comparison: BenchmarkComparison;
+};
+
 type CliOptions = {
  cases: CommandCase[];
+  compareBaseline?: string;
+  compareCandidate?: string;
  entryPrimary: string;
  entrySecondary?: string;
  runs: number;
@@ -797,8 +825,26 @@ function printSuite(result: SuiteResult): void {
 }

 function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
-  const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
+  const deltas = buildCaseDeltas(primary, secondary);
  console.log("Delta (secondary - primary, avg)");
+  for (const delta of deltas) {
+    const durationDelta = delta.durationAvgDeltaMs;
+    const durationPct = delta.durationAvgDeltaPct;
+    const durationSign = durationDelta > 0 ? "+" : "";
+    let line = `${delta.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
+    if (delta.maxRssAvgDeltaMb != null && delta.maxRssAvgDeltaPct != null) {
+      const rssDelta = delta.maxRssAvgDeltaMb;
+      const rssPct = delta.maxRssAvgDeltaPct;
+      const rssSign = rssDelta > 0 ? "+" : "";
+      line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
+    }
+    console.log(line);
+  }
+}
+
+function buildCaseDeltas(primary: SuiteResult, secondary: SuiteResult): CaseDelta[] {
+  const primaryById = new Map(primary.cases.map((commandCase) => [commandCase.id, commandCase]));
+  const deltas: CaseDelta[] = [];
  for (const commandCase of secondary.cases) {
    const baseline = primaryById.get(commandCase.id);
    if (!baseline) {
@@ -809,17 +855,24 @@ function printDelta(primary: SuiteResult, secondary: SuiteResult): void {
      baseline.summary.durationMs.avg > 0
        ? (durationDelta / baseline.summary.durationMs.avg) * 100
        : 0;
-    const durationSign = durationDelta > 0 ? "+" : "";
-    let line = `${commandCase.name.padEnd(24)} ${durationSign}${formatMs(durationDelta)} (${durationSign}${durationPct.toFixed(1)}%)`;
-    if (baseline.summary.maxRssMb && commandCase.summary.maxRssMb) {
-      const rssDelta = commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg;
-      const rssPct =
-        baseline.summary.maxRssMb.avg > 0 ? (rssDelta / baseline.summary.maxRssMb.avg) * 100 : 0;
-      const rssSign = rssDelta > 0 ? "+" : "";
-      line += ` rss ${rssSign}${formatMb(rssDelta)} (${rssSign}${rssPct.toFixed(1)}%)`;
-    }
-    console.log(line);
+    const rssDelta =
+      baseline.summary.maxRssMb && commandCase.summary.maxRssMb
+        ? commandCase.summary.maxRssMb.avg - baseline.summary.maxRssMb.avg
+        : null;
+    const rssPct =
+      rssDelta != null && baseline.summary.maxRssMb && baseline.summary.maxRssMb.avg > 0
+        ? (rssDelta / baseline.summary.maxRssMb.avg) * 100
+        : null;
+    deltas.push({
+      id: commandCase.id,
+      name: commandCase.name,
+      durationAvgDeltaMs: durationDelta,
+      durationAvgDeltaPct: durationPct,
+      maxRssAvgDeltaMb: rssDelta,
+      maxRssAvgDeltaPct: rssPct,
+    });
  }
+  return deltas;
 }

 export function collectFailedSamples(result: SuiteResult): string[] {
@@ -910,6 +963,8 @@ function parseOptions(): CliOptions {
  });
  return {
    cases,
+    compareBaseline: parseFlagValue("--compare-baseline"),
+    compareCandidate: parseFlagValue("--compare-candidate"),
    entryPrimary: parseFlagValue("--entry-primary") ?? parseFlagValue("--entry") ?? DEFAULT_ENTRY,
    entrySecondary: parseFlagValue("--entry-secondary"),
    runs: parsePositiveInt(parseFlagValue("--runs"), DEFAULT_RUNS, "--runs"),
@@ -938,6 +993,8 @@ Options:
  --warmup <n>                 Warmup runs per case (default: ${DEFAULT_WARMUP})
  --timeout-ms <ms>            Per-run timeout (default: ${DEFAULT_TIMEOUT_MS})
  --output <path>              Write machine-readable JSON to a file
+  --compare-baseline <path>    Read a saved JSON report as the baseline
+  --compare-candidate <path>   Read a saved JSON report as the candidate and print deltas
  --cpu-prof-dir <dir>         Write V8 CPU profiles for each run
  --heap-prof-dir <dir>        Write V8 heap profiles for each run
  --json                       Emit machine-readable JSON
@@ -948,6 +1005,39 @@ Case ids:
 `);
 }

+function readBenchmarkReport(filePath: string): BenchmarkReport {
+  return JSON.parse(readFileSync(filePath, "utf8")) as BenchmarkReport;
+}
+
+function writeJsonOutput(filePath: string, value: unknown): void {
+  mkdirSync(path.dirname(filePath), { recursive: true });
+  writeFileSync(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf8");
+}
+
+function readBenchmarkComparison(
+  baselinePath: string,
+  candidatePath: string,
+): BenchmarkComparisonResult {
+  const baseline = readBenchmarkReport(baselinePath);
+  const candidate = readBenchmarkReport(candidatePath);
+  return {
+    baseline: baseline.primary,
+    candidate: candidate.primary,
+    comparison: {
+      baseline: baselinePath,
+      candidate: candidatePath,
+      deltas: buildCaseDeltas(baseline.primary, candidate.primary),
+    },
+  };
+}
+
+function readBenchmarkComparisonForTesting(
+  baselinePath: string,
+  candidatePath: string,
+): { comparison: unknown } {
+  return readBenchmarkComparison(baselinePath, candidatePath);
+}
+
 async function main(): Promise<void> {
  if (hasFlag("--help")) {
    printUsage();
@@ -955,6 +1045,24 @@ async function main(): Promise<void> {
  }

  const options = parseOptions();
+  if (options.compareBaseline || options.compareCandidate) {
+    if (!options.compareBaseline || !options.compareCandidate) {
+      throw new Error("--compare-baseline and --compare-candidate must be provided together");
+    }
+    const { baseline, candidate, comparison } = readBenchmarkComparison(
+      options.compareBaseline,
+      options.compareCandidate,
+    );
+    if (options.output) {
+      writeJsonOutput(options.output, comparison);
+    }
+    if (options.json) {
+      console.log(JSON.stringify(comparison, null, 2));
+      return;
+    }
+    printDelta(baseline, candidate);
+    return;
+  }
  const tmpDir = mkdtempSync(path.join(os.tmpdir(), "openclaw-cli-bench-"));
  const rssHookPath = buildRssHook(tmpDir);
  try {
@@ -987,8 +1095,7 @@ async function main(): Promise<void> {
    ];

    if (options.output) {
-      mkdirSync(path.dirname(options.output), { recursive: true });
-      writeFileSync(options.output, `${JSON.stringify(report, null, 2)}\n`, "utf8");
+      writeJsonOutput(options.output, report);
    }

    if (options.json) {
@@ -1040,6 +1147,8 @@ export const testing = {
  parseGatewayPortEnv,
  parseNonNegativeInt,
  parsePositiveInt,
+  readBenchmarkComparison: readBenchmarkComparisonForTesting,
+  writeJsonOutput,
 };

 if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
--- a/src/scripts/test-projects.test.ts
+++ b/src/scripts/test-projects.test.ts
@@ -879,6 +879,7 @@ describe("test-projects args", () => {
          "src/scripts/sync-plugin-versions.test.ts",
          "test/helpers/temp-dir.test.ts",
          "test/scripts/android-pin-version.test.ts",
+          "test/scripts/bench-cli-startup.test.ts",
          "test/scripts/ios-configure-signing.test.ts",
          "test/scripts/ios-pin-version.test.ts",
          "test/scripts/ios-team-id.test.ts",
@@ -886,6 +887,7 @@ describe("test-projects args", () => {
          "test/scripts/kitchen-sink-rpc-walk.test.ts",
          "test/scripts/openai-chat-tools-client.test.ts",
          "test/scripts/report-test-temp-creations.test.ts",
+          "test/scripts/test-projects.test.ts",
          "test/test-env.test.ts",
          "test/vitest-scoped-config.test.ts",
        ],
--- a/test/scripts/bench-cli-startup.test.ts
+++ b/test/scripts/bench-cli-startup.test.ts
@@ -1,6 +1,9 @@
 // Bench Cli Startup tests cover bench cli startup script behavior.
+import { existsSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
 import { describe, expect, it } from "vitest";
 import { testing } from "../../scripts/bench-cli-startup.ts";
+import { createTempDirTracker } from "../helpers/temp-dir.js";

 function withEnv<T>(env: Record<string, string | undefined>, callback: () => T): T {
  const previous = new Map<string, string | undefined>();
@@ -26,6 +29,72 @@ function withEnv<T>(env: Record<string, string | undefined>, callback: () => T):
 }

 describe("bench-cli-startup", () => {
+  it("writes compare-mode JSON output and creates parent directories", () => {
+    const tempDirs = createTempDirTracker();
+    const tmpDir = tempDirs.make("openclaw-cli-startup-compare-output-");
+    try {
+      const baselinePath = join(tmpDir, "baseline.json");
+      const candidatePath = join(tmpDir, "candidate.json");
+      const outputPath = join(tmpDir, "nested", "comparison.json");
+      const makeReport = (durationAvg: number, maxRssAvg: number) => ({
+        primary: {
+          entry: "openclaw.mjs",
+          cases: [
+            {
+              id: "version",
+              name: "--version",
+              args: ["--version"],
+              contract: null,
+              samples: [],
+              summary: {
+                sampleCount: 1,
+                durationMs: {
+                  avg: durationAvg,
+                  p50: durationAvg,
+                  p95: durationAvg,
+                  min: durationAvg,
+                  max: durationAvg,
+                },
+                firstOutputMs: null,
+                maxRssMb: {
+                  avg: maxRssAvg,
+                  p50: maxRssAvg,
+                  p95: maxRssAvg,
+                  min: maxRssAvg,
+                  max: maxRssAvg,
+                },
+                exitSummary: "code:0x1",
+              },
+            },
+          ],
+        },
+      });
+
+      writeFileSync(baselinePath, JSON.stringify(makeReport(100, 50)), "utf8");
+      writeFileSync(candidatePath, JSON.stringify(makeReport(125, 60)), "utf8");
+
+      const { comparison } = testing.readBenchmarkComparison(baselinePath, candidatePath);
+      testing.writeJsonOutput(outputPath, comparison);
+      expect(existsSync(outputPath)).toBe(true);
+      expect(JSON.parse(readFileSync(outputPath, "utf8"))).toEqual({
+        baseline: baselinePath,
+        candidate: candidatePath,
+        deltas: [
+          {
+            id: "version",
+            name: "--version",
+            durationAvgDeltaMs: 25,
+            durationAvgDeltaPct: 25,
+            maxRssAvgDeltaMb: 10,
+            maxRssAvgDeltaPct: 20,
+          },
+        ],
+      });
+    } finally {
+      tempDirs.cleanup();
+    }
+  });
+
  it("fails reports with no measured samples", () => {
    expect(
      testing.collectFailedSamples({
--- a/test/scripts/test-projects.test.ts
+++ b/test/scripts/test-projects.test.ts
@@ -1918,6 +1918,8 @@ describe("scripts/test-projects changed-target routing", () => {
        "test/helpers/temp-dir.ts": "export const tempDir = 'x';\n",
        "test/helpers/temp-dir.test.ts":
          "import { tempDir } from './temp-dir.js';\nvoid tempDir;\n",
+        "test/scripts/bench-cli-startup.test.ts":
+          "import { tempDir } from '../helpers/temp-dir.js';\nvoid tempDir;\n",
        "src/foo.test.ts":
          "import { tempDir } from '../test/helpers/temp-dir.js';\nvoid tempDir;\n",
      },
@@ -1926,7 +1928,11 @@ describe("scripts/test-projects changed-target routing", () => {
      },
    );

-    expect(targets).toEqual(["test/helpers/temp-dir.test.ts", "src/foo.test.ts"]);
+    expect(targets).toEqual([
+      "test/helpers/temp-dir.test.ts",
+      "src/foo.test.ts",
+      "test/scripts/bench-cli-startup.test.ts",
+    ]);
  });

  it("keeps the broad changed run available for shared test helpers", () => {