From 25ca5cc8dfcd51f494b46089a1cfcf5e50bcced4 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 2 May 2026 17:45:55 +0100
Subject: [PATCH] ci: add source performance probes

---
 .github/workflows/openclaw-performance.yml    | 113 ++++++++
 docs/ci.md                                    |   4 +-
 docs/help/testing.md                          |   4 +-
 package.json                                  |   1 +
 scripts/bench-cli-startup.ts                  |  14 +-
 .../openclaw-performance-source-summary.mjs   | 252 ++++++++++++++++++
 6 files changed, 385 insertions(+), 3 deletions(-)
 create mode 100644 scripts/openclaw-performance-source-summary.mjs

diff --git a/.github/workflows/openclaw-performance.yml b/.github/workflows/openclaw-performance.yml
index 8c92a73af23..1388790d216 100644
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -89,6 +89,7 @@ jobs:
       REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }}
       BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }}
       SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries
+      SOURCE_PERF_DIR: ${{ github.workspace }}/.artifacts/openclaw-performance/source/${{ matrix.lane }}
       LANE_ID: ${{ matrix.lane }}
       PROFILE: ${{ inputs.profile || 'diagnostic' }}
       REQUESTED_REPEAT: ${{ inputs.repeat || '3' }}
@@ -297,6 +298,105 @@ jobs:
             exit "$status"
           fi
 
+      - name: Run OpenClaw source performance probes
+        if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          source_runs="$REQUESTED_REPEAT"
+          if ! [[ "$source_runs" =~ ^[0-9]+$ ]] || [[ "$source_runs" -lt 1 ]]; then
+            source_runs=3
+          fi
+
+          mkdir -p "$SOURCE_PERF_DIR/mock-hello"
+          pnpm build
+
+          pnpm test:gateway:cpu-scenarios \
+            --output-dir "$SOURCE_PERF_DIR/gateway-cpu" \
+            --runs "$source_runs" \
+            --warmup 1 \
+            --skip-qa \
+            --startup-case default \
+            --startup-case skipChannels \
+            --startup-case oneInternalHook \
+            --startup-case allInternalHooks \
+            --startup-case fiftyPlugins \
+            --startup-case fiftyStartupLazyPlugins
+
+          for run_index in $(seq 1 "$source_runs"); do
+            run_dir="$SOURCE_PERF_DIR/mock-hello/run-$(printf '%03d' "$run_index")"
+            pnpm openclaw qa suite \
+              --provider-mode mock-openai \
+              --concurrency 1 \
+              --output-dir "$(realpath --relative-to="$GITHUB_WORKSPACE" "$run_dir")" \
+              --scenario channel-chat-baseline
+          done
+
+          gateway_home="$(mktemp -d)"
+          gateway_port="$(node -e "const net=require('node:net'); const s=net.createServer(); s.listen(0,'127.0.0.1',()=>{ console.log(s.address().port); s.close(); });")"
+          gateway_state="$gateway_home/.openclaw"
+          gateway_config="$gateway_state/openclaw.json"
+          gateway_log="$SOURCE_PERF_DIR/cli-gateway.log"
+          gateway_pid=""
+          mkdir -p "$gateway_state"
+          cat > "$gateway_config" <<EOF
+          {
+            "browser": { "enabled": false },
+            "gateway": {
+              "mode": "local",
+              "port": ${gateway_port},
+              "bind": "loopback",
+              "auth": { "mode": "none" },
+              "controlUi": { "enabled": false },
+              "tailscale": { "mode": "off" }
+            },
+            "plugins": {
+              "enabled": true,
+              "entries": { "browser": { "enabled": false } }
+            }
+          }
+          EOF
+          cleanup_gateway() {
+            if [[ -n "${gateway_pid:-}" ]] && kill -0 "$gateway_pid" 2>/dev/null; then
+              kill "$gateway_pid" 2>/dev/null || true
+              wait "$gateway_pid" 2>/dev/null || true
+            fi
+            rm -rf "$gateway_home"
+          }
+          trap cleanup_gateway EXIT
+          OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" OPENCLAW_SKIP_CHANNELS=1 \
+            node dist/entry.js gateway run --bind loopback --port "$gateway_port" --auth none --allow-unconfigured --force \
+            >"$gateway_log" 2>&1 &
+          gateway_pid="$!"
+
+          for _ in $(seq 1 120); do
+            if curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null; then
+              break
+            fi
+            if ! kill -0 "$gateway_pid" 2>/dev/null; then
+              cat "$gateway_log" >&2
+              exit 1
+            fi
+            sleep 1
+          done
+          curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null
+
+          OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" \
+            node --import tsx scripts/bench-cli-startup.ts \
+            --case gatewayHealthJson \
+            --case configGetGatewayPort \
+            --runs "$source_runs" \
+            --warmup 1 \
+            --output "$SOURCE_PERF_DIR/cli-startup.json"
+          cleanup_gateway
+          trap - EXIT
+
+          pnpm perf:source:summary \
+            --source-dir "$SOURCE_PERF_DIR" \
+            --output "$SOURCE_PERF_DIR/index.md"
+
+          cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY"
+
       - name: Upload Kova artifacts
         if: ${{ always() && steps.lane.outputs.run == 'true' }}
         uses: actions/upload-artifact@v5
@@ -306,6 +406,7 @@ jobs:
             .artifacts/kova/reports/${{ matrix.lane }}
             .artifacts/kova/bundles/${{ matrix.lane }}
             .artifacts/kova/summaries/${{ matrix.lane }}.md
+            .artifacts/openclaw-performance/source/${{ matrix.lane }}
           if-no-files-found: ignore
           retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }}
 
@@ -348,6 +449,18 @@ jobs:
             mkdir -p "$dest/bundles"
             cp -R "$BUNDLE_DIR"/. "$dest/bundles/"
           fi
+          if [[ -d "$SOURCE_PERF_DIR" ]]; then
+            mkdir -p "$dest/source"
+            cp -R "$SOURCE_PERF_DIR"/. "$dest/source/"
+            if [[ -f "$SOURCE_PERF_DIR/index.md" ]]; then
+              cat >> "$dest/index.md" <<'EOF'
+
+          ## Source probes
+
+          Additional gateway boot, memory, plugin pressure, mock hello-loop, and CLI startup numbers are in [source/index.md](source/index.md).
+          EOF
+            fi
+          fi
           cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" <<EOF
           {
             "repository": "${GITHUB_REPOSITORY}",
diff --git a/docs/ci.md b/docs/ci.md
index 99cf7a639eb..02683e60b04 100644
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -143,7 +143,9 @@ The workflow installs OCM from a pinned release and Kova from the pinned `kova_r
 - `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
 - `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
 
-Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, and `index.md` into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
+The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it.
+
+Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, `index.md`, and source-probe artifacts into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
 
 ## Full Release Validation
 
diff --git a/docs/help/testing.md b/docs/help/testing.md
index 45efdc22e50..07eda5b4656 100644
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -50,7 +50,9 @@ When debugging real providers/models (requires real creds):
   `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
   `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
   publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
-  `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured.
+  `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. The
+  mock-provider report also includes source-level gateway boot, memory,
+  plugin-pressure, repeated fake-model hello-loop, and CLI startup numbers.
 - Docker live model sweep: `pnpm test:docker:live-models`
   - Each selected model now runs a text turn plus a small file-read-style probe.
     Models whose metadata advertises `image` input also run a tiny image turn.
diff --git a/package.json b/package.json
index 06b81d7bcc0..f01b7b8741a 100644
--- a/package.json
+++ b/package.json
@@ -1429,6 +1429,7 @@
     "openclaw": "node scripts/run-node.mjs",
     "openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
     "perf:kova:summary": "node scripts/kova-ci-summary.mjs",
+    "perf:source:summary": "node scripts/openclaw-performance-source-summary.mjs",
     "plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check",
     "plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write",
     "plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check",
diff --git a/scripts/bench-cli-startup.ts b/scripts/bench-cli-startup.ts
index c60562549cd..e5976648a35 100644
--- a/scripts/bench-cli-startup.ts
+++ b/scripts/bench-cli-startup.ts
@@ -15,6 +15,8 @@ type Sample = {
   maxRssMb: number | null;
   exitCode: number | null;
   signal: string | null;
+  stdoutTail?: string;
+  stderrTail?: string;
 };
 
 type SummaryStats = {
@@ -328,7 +330,7 @@ function runCase(params: {
         ...process.env,
         OPENCLAW_HIDE_BANNER: "1",
       },
-      stdio: ["ignore", "ignore", "pipe"],
+      stdio: ["ignore", "pipe", "pipe"],
       encoding: "utf8",
       timeout: params.timeoutMs,
       maxBuffer: 32 * 1024 * 1024,
@@ -342,11 +344,21 @@ function runCase(params: {
       maxRssMb: parseMaxRssMb(proc.stderr ?? ""),
       exitCode: proc.status,
       signal: proc.signal,
+      ...(proc.status === 0
+        ? {}
+        : {
+            stdoutTail: tailLines(proc.stdout ?? "", 20),
+            stderrTail: tailLines(proc.stderr ?? "", 20),
+          }),
     });
   }
   return samples;
 }
 
+function tailLines(value: string, maxLines: number): string {
+  return value.split(/\r?\n/).filter(Boolean).slice(-maxLines).join("\n");
+}
+
 function printSuite(result: SuiteResult): void {
   console.log(`Entry: ${result.entry}`);
   for (const commandCase of result.cases) {
diff --git a/scripts/openclaw-performance-source-summary.mjs b/scripts/openclaw-performance-source-summary.mjs
new file mode 100644
index 00000000000..36aa178963a
--- /dev/null
+++ b/scripts/openclaw-performance-source-summary.mjs
@@ -0,0 +1,252 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+import process from "node:process";
+
+function parseArgs(argv) {
+  const options = { sourceDir: null, output: null };
+  for (let index = 0; index < argv.length; index += 1) {
+    const arg = argv[index];
+    const readValue = () => {
+      const value = argv[index + 1];
+      if (!value) {
+        throw new Error(`Missing value for ${arg}`);
+      }
+      index += 1;
+      return value;
+    };
+    switch (arg) {
+      case "--source-dir":
+        options.sourceDir = path.resolve(readValue());
+        break;
+      case "--output":
+        options.output = path.resolve(readValue());
+        break;
+      case "--help":
+        printHelp();
+        process.exit(0);
+        break;
+      default:
+        throw new Error(`Unknown argument: ${arg}`);
+    }
+  }
+  if (!options.sourceDir) {
+    throw new Error("--source-dir is required");
+  }
+  return options;
+}
+
+function printHelp() {
+  console.log(`Usage: node scripts/openclaw-performance-source-summary.mjs --source-dir <dir> [--output <summary.md>]
+
+Summarizes OpenClaw-native performance probe artifacts for CI reports.`);
+}
+
+function readJsonIfExists(filePath) {
+  if (!fs.existsSync(filePath)) {
+    return null;
+  }
+  return JSON.parse(fs.readFileSync(filePath, "utf8"));
+}
+
+function formatMs(value) {
+  return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}ms` : "n/a";
+}
+
+function formatMb(value) {
+  return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}MB` : "n/a";
+}
+
+function formatBytesAsMb(value) {
+  return typeof value === "number" && Number.isFinite(value)
+    ? formatMb(value / 1024 / 1024)
+    : "n/a";
+}
+
+function formatRatio(value) {
+  return typeof value === "number" && Number.isFinite(value) ? value.toFixed(3) : "n/a";
+}
+
+function metric(stats, key = "p50") {
+  return stats && typeof stats[key] === "number" ? stats[key] : null;
+}
+
+function escapeCell(value) {
+  return String(value).replaceAll("|", "\\|");
+}
+
+function table(headers, rows) {
+  if (rows.length === 0) {
+    return ["No data.", ""];
+  }
+  return [
+    `| ${headers.join(" | ")} |`,
+    `| ${headers.map(() => "---").join(" | ")} |`,
+    ...rows.map((row) => `| ${row.map((cell) => escapeCell(cell)).join(" | ")} |`),
+    "",
+  ];
+}
+
+function loadMockHelloSummaries(sourceDir) {
+  const root = path.join(sourceDir, "mock-hello");
+  if (!fs.existsSync(root)) {
+    return [];
+  }
+  return fs
+    .readdirSync(root, { withFileTypes: true })
+    .filter((entry) => entry.isDirectory())
+    .map((entry) => ({
+      id: entry.name,
+      summary: readJsonIfExists(path.join(root, entry.name, "qa-suite-summary.json")),
+    }))
+    .filter((entry) => entry.summary != null)
+    .toSorted((a, b) => a.id.localeCompare(b.id));
+}
+
+function buildStartupRows(startup) {
+  return (startup?.results ?? []).map((result) => [
+    result.id ?? "unknown",
+    result.name ?? result.id ?? "unknown",
+    formatMs(metric(result.summary?.readyzMs)),
+    formatMs(metric(result.summary?.readyzMs, "p95")),
+    formatMs(metric(result.summary?.healthzMs)),
+    formatMs(metric(result.summary?.readyLogMs)),
+    formatMs(metric(result.summary?.firstOutputMs)),
+    formatMb(metric(result.summary?.maxRssMb, "p95")),
+    formatRatio(metric(result.summary?.cpuCoreRatio, "p95")),
+  ]);
+}
+
+function buildTraceRows(startup) {
+  const rows = [];
+  for (const result of startup?.results ?? []) {
+    const traceEntries = Object.entries(result.summary?.startupTrace ?? {})
+      .filter(([, stats]) => typeof stats?.p50 === "number")
+      .toSorted((a, b) => (b[1].p50 ?? 0) - (a[1].p50 ?? 0))
+      .slice(0, 5);
+    for (const [name, stats] of traceEntries) {
+      rows.push([result.id ?? "unknown", name, formatMs(stats.p50), formatMs(stats.p95)]);
+    }
+  }
+  return rows;
+}
+
+function buildMockHelloRows(summaries) {
+  return summaries.map(({ id, summary }) => {
+    const status =
+      typeof summary?.counts?.failed === "number" && summary.counts.failed > 0 ? "fail" : "pass";
+    const counts = summary?.counts
+      ? `${summary.counts.passed ?? 0}/${summary.counts.total ?? 0}`
+      : "n/a";
+    return [
+      id,
+      status,
+      counts,
+      formatMs(summary?.metrics?.wallMs),
+      formatRatio(summary?.metrics?.gatewayCpuCoreRatio),
+      formatBytesAsMb(summary?.metrics?.gatewayProcessRssStartBytes),
+      formatBytesAsMb(summary?.metrics?.gatewayProcessRssEndBytes),
+      formatBytesAsMb(summary?.metrics?.gatewayProcessRssDeltaBytes),
+      summary?.run?.primaryModel ?? "n/a",
+    ];
+  });
+}
+
+function buildCliRows(cli) {
+  return (cli?.primary?.cases ?? []).map((commandCase) => [
+    commandCase.id ?? "unknown",
+    commandCase.name ?? commandCase.id ?? "unknown",
+    formatMs(commandCase.summary?.durationMs?.p50),
+    formatMs(commandCase.summary?.durationMs?.p95),
+    formatMb(commandCase.summary?.maxRssMb?.p95),
+    commandCase.summary?.exitSummary ?? "n/a",
+  ]);
+}
+
+function buildObservationRows(summary) {
+  return (summary?.observations ?? []).map((observation) => [
+    observation.kind ?? "unknown",
+    observation.id ?? "unknown",
+    formatRatio(observation.cpuCoreRatio ?? observation.cpuCoreRatioMax),
+    formatMs(observation.wallMs ?? observation.wallMsMax),
+  ]);
+}
+
+function buildMarkdown(sourceDir) {
+  const gatewaySummary = readJsonIfExists(path.join(sourceDir, "gateway-cpu", "summary.json"));
+  const startup = readJsonIfExists(
+    path.join(sourceDir, "gateway-cpu", "gateway-startup-bench.json"),
+  );
+  const cli = readJsonIfExists(path.join(sourceDir, "cli-startup.json"));
+  const mockHelloSummaries = loadMockHelloSummaries(sourceDir);
+
+  const lines = [
+    "# OpenClaw Source Performance",
+    "",
+    `Generated: ${new Date().toISOString()}`,
+    "",
+    "## Gateway Boot",
+    "",
+    ...table(
+      [
+        "case",
+        "name",
+        "readyz p50",
+        "readyz p95",
+        "healthz p50",
+        "ready log p50",
+        "first output p50",
+        "RSS p95",
+        "CPU core p95",
+      ],
+      buildStartupRows(startup),
+    ),
+    "## Startup Hotspots",
+    "",
+    ...table(["case", "phase", "p50", "p95"], buildTraceRows(startup)),
+    "## Fake Model Hello Loops",
+    "",
+    ...table(
+      [
+        "run",
+        "status",
+        "pass",
+        "wall",
+        "gateway CPU core",
+        "RSS start",
+        "RSS end",
+        "RSS delta",
+        "model",
+      ],
+      buildMockHelloRows(mockHelloSummaries),
+    ),
+    "## CLI Against Booted Gateway",
+    "",
+    ...table(
+      ["case", "command", "duration p50", "duration p95", "RSS p95", "exits"],
+      buildCliRows(cli),
+    ),
+    "## Observations",
+    "",
+    ...table(["kind", "id", "CPU core", "wall"], buildObservationRows(gatewaySummary)),
+  ];
+
+  return `${lines.join("\n")}\n`;
+}
+
+async function main() {
+  const options = parseArgs(process.argv.slice(2));
+  const markdown = buildMarkdown(options.sourceDir);
+  if (options.output) {
+    fs.mkdirSync(path.dirname(options.output), { recursive: true });
+    fs.writeFileSync(options.output, markdown, "utf8");
+  } else {
+    process.stdout.write(markdown);
+  }
+}
+
+main().catch((error) => {
+  console.error(error instanceof Error ? error.stack : String(error));
+  process.exitCode = 1;
+});