From 25ca5cc8dfcd51f494b46089a1cfcf5e50bcced4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 17:45:55 +0100 Subject: [PATCH] ci: add source performance probes --- .github/workflows/openclaw-performance.yml | 113 ++++++++ docs/ci.md | 4 +- docs/help/testing.md | 4 +- package.json | 1 + scripts/bench-cli-startup.ts | 14 +- .../openclaw-performance-source-summary.mjs | 252 ++++++++++++++++++ 6 files changed, 385 insertions(+), 3 deletions(-) create mode 100644 scripts/openclaw-performance-source-summary.mjs diff --git a/.github/workflows/openclaw-performance.yml b/.github/workflows/openclaw-performance.yml index 8c92a73af23..1388790d216 100644 --- a/.github/workflows/openclaw-performance.yml +++ b/.github/workflows/openclaw-performance.yml @@ -89,6 +89,7 @@ jobs: REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }} BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }} SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries + SOURCE_PERF_DIR: ${{ github.workspace }}/.artifacts/openclaw-performance/source/${{ matrix.lane }} LANE_ID: ${{ matrix.lane }} PROFILE: ${{ inputs.profile || 'diagnostic' }} REQUESTED_REPEAT: ${{ inputs.repeat || '3' }} @@ -297,6 +298,105 @@ jobs: exit "$status" fi + - name: Run OpenClaw source performance probes + if: ${{ steps.lane.outputs.run == 'true' && matrix.lane == 'mock-provider' }} + shell: bash + run: | + set -euo pipefail + source_runs="$REQUESTED_REPEAT" + if ! [[ "$source_runs" =~ ^[0-9]+$ ]] || [[ "$source_runs" -lt 1 ]]; then + source_runs=3 + fi + + mkdir -p "$SOURCE_PERF_DIR/mock-hello" + pnpm build + + pnpm test:gateway:cpu-scenarios \ + --output-dir "$SOURCE_PERF_DIR/gateway-cpu" \ + --runs "$source_runs" \ + --warmup 1 \ + --skip-qa \ + --startup-case default \ + --startup-case skipChannels \ + --startup-case oneInternalHook \ + --startup-case allInternalHooks \ + --startup-case fiftyPlugins \ + --startup-case fiftyStartupLazyPlugins + + for run_index in $(seq 1 "$source_runs"); do + run_dir="$SOURCE_PERF_DIR/mock-hello/run-$(printf '%03d' "$run_index")" + pnpm openclaw qa suite \ + --provider-mode mock-openai \ + --concurrency 1 \ + --output-dir "$(realpath --relative-to="$GITHUB_WORKSPACE" "$run_dir")" \ + --scenario channel-chat-baseline + done + + gateway_home="$(mktemp -d)" + gateway_port="$(node -e "const net=require('node:net'); const s=net.createServer(); s.listen(0,'127.0.0.1',()=>{ console.log(s.address().port); s.close(); });")" + gateway_state="$gateway_home/.openclaw" + gateway_config="$gateway_state/openclaw.json" + gateway_log="$SOURCE_PERF_DIR/cli-gateway.log" + gateway_pid="" + mkdir -p "$gateway_state" + cat > "$gateway_config" </dev/null; then + kill "$gateway_pid" 2>/dev/null || true + wait "$gateway_pid" 2>/dev/null || true + fi + rm -rf "$gateway_home" + } + trap cleanup_gateway EXIT + OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" OPENCLAW_SKIP_CHANNELS=1 \ + node dist/entry.js gateway run --bind loopback --port "$gateway_port" --auth none --allow-unconfigured --force \ + >"$gateway_log" 2>&1 & + gateway_pid="$!" + + for _ in $(seq 1 120); do + if curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null; then + break + fi + if ! kill -0 "$gateway_pid" 2>/dev/null; then + cat "$gateway_log" >&2 + exit 1 + fi + sleep 1 + done + curl -fsS "http://127.0.0.1:${gateway_port}/healthz" >/dev/null + + OPENCLAW_HOME="$gateway_home" OPENCLAW_STATE_DIR="$gateway_state" OPENCLAW_CONFIG_PATH="$gateway_config" OPENCLAW_GATEWAY_PORT="$gateway_port" \ + node --import tsx scripts/bench-cli-startup.ts \ + --case gatewayHealthJson \ + --case configGetGatewayPort \ + --runs "$source_runs" \ + --warmup 1 \ + --output "$SOURCE_PERF_DIR/cli-startup.json" + cleanup_gateway + trap - EXIT + + pnpm perf:source:summary \ + --source-dir "$SOURCE_PERF_DIR" \ + --output "$SOURCE_PERF_DIR/index.md" + + cat "$SOURCE_PERF_DIR/index.md" >> "$GITHUB_STEP_SUMMARY" + - name: Upload Kova artifacts if: ${{ always() && steps.lane.outputs.run == 'true' }} uses: actions/upload-artifact@v5 @@ -306,6 +406,7 @@ jobs: .artifacts/kova/reports/${{ matrix.lane }} .artifacts/kova/bundles/${{ matrix.lane }} .artifacts/kova/summaries/${{ matrix.lane }}.md + .artifacts/openclaw-performance/source/${{ matrix.lane }} if-no-files-found: ignore retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }} @@ -348,6 +449,18 @@ jobs: mkdir -p "$dest/bundles" cp -R "$BUNDLE_DIR"/. "$dest/bundles/" fi + if [[ -d "$SOURCE_PERF_DIR" ]]; then + mkdir -p "$dest/source" + cp -R "$SOURCE_PERF_DIR"/. "$dest/source/" + if [[ -f "$SOURCE_PERF_DIR/index.md" ]]; then + cat >> "$dest/index.md" <<'EOF' + + ## Source probes + + Additional gateway boot, memory, plugin pressure, mock hello-loop, and CLI startup numbers are in [source/index.md](source/index.md). + EOF + fi + fi cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" </-//`. The current branch pointer is written as `openclaw-performance//latest-.json`. +The mock-provider lane also runs OpenClaw-native source probes after the Kova pass: gateway boot timing and memory across default, hook, and 50-plugin startup cases; repeated mock-OpenAI `channel-chat-baseline` hello loops; and CLI startup commands against the booted gateway. The source probe Markdown summary lives at `source/index.md` in the report bundle, with raw JSON beside it. + +Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, `index.md`, and source-probe artifacts into `openclaw/clawgrit-reports` under `openclaw-performance//-//`. The current branch pointer is written as `openclaw-performance//latest-.json`. ## Full Release Validation diff --git a/docs/help/testing.md b/docs/help/testing.md index 45efdc22e50..07eda5b4656 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -50,7 +50,9 @@ When debugging real providers/models (requires real creds): `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to - `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. + `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. The + mock-provider report also includes source-level gateway boot, memory, + plugin-pressure, repeated fake-model hello-loop, and CLI startup numbers. - Docker live model sweep: `pnpm test:docker:live-models` - Each selected model now runs a text turn plus a small file-read-style probe. Models whose metadata advertises `image` input also run a tiny image turn. diff --git a/package.json b/package.json index 06b81d7bcc0..f01b7b8741a 100644 --- a/package.json +++ b/package.json @@ -1429,6 +1429,7 @@ "openclaw": "node scripts/run-node.mjs", "openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json", "perf:kova:summary": "node scripts/kova-ci-summary.mjs", + "perf:source:summary": "node scripts/openclaw-performance-source-summary.mjs", "plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check", "plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write", "plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check", diff --git a/scripts/bench-cli-startup.ts b/scripts/bench-cli-startup.ts index c60562549cd..e5976648a35 100644 --- a/scripts/bench-cli-startup.ts +++ b/scripts/bench-cli-startup.ts @@ -15,6 +15,8 @@ type Sample = { maxRssMb: number | null; exitCode: number | null; signal: string | null; + stdoutTail?: string; + stderrTail?: string; }; type SummaryStats = { @@ -328,7 +330,7 @@ function runCase(params: { ...process.env, OPENCLAW_HIDE_BANNER: "1", }, - stdio: ["ignore", "ignore", "pipe"], + stdio: ["ignore", "pipe", "pipe"], encoding: "utf8", timeout: params.timeoutMs, maxBuffer: 32 * 1024 * 1024, @@ -342,11 +344,21 @@ function runCase(params: { maxRssMb: parseMaxRssMb(proc.stderr ?? ""), exitCode: proc.status, signal: proc.signal, + ...(proc.status === 0 + ? {} + : { + stdoutTail: tailLines(proc.stdout ?? "", 20), + stderrTail: tailLines(proc.stderr ?? "", 20), + }), }); } return samples; } +function tailLines(value: string, maxLines: number): string { + return value.split(/\r?\n/).filter(Boolean).slice(-maxLines).join("\n"); +} + function printSuite(result: SuiteResult): void { console.log(`Entry: ${result.entry}`); for (const commandCase of result.cases) { diff --git a/scripts/openclaw-performance-source-summary.mjs b/scripts/openclaw-performance-source-summary.mjs new file mode 100644 index 00000000000..36aa178963a --- /dev/null +++ b/scripts/openclaw-performance-source-summary.mjs @@ -0,0 +1,252 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; + +function parseArgs(argv) { + const options = { sourceDir: null, output: null }; + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + const readValue = () => { + const value = argv[index + 1]; + if (!value) { + throw new Error(`Missing value for ${arg}`); + } + index += 1; + return value; + }; + switch (arg) { + case "--source-dir": + options.sourceDir = path.resolve(readValue()); + break; + case "--output": + options.output = path.resolve(readValue()); + break; + case "--help": + printHelp(); + process.exit(0); + break; + default: + throw new Error(`Unknown argument: ${arg}`); + } + } + if (!options.sourceDir) { + throw new Error("--source-dir is required"); + } + return options; +} + +function printHelp() { + console.log(`Usage: node scripts/openclaw-performance-source-summary.mjs --source-dir [--output ] + +Summarizes OpenClaw-native performance probe artifacts for CI reports.`); +} + +function readJsonIfExists(filePath) { + if (!fs.existsSync(filePath)) { + return null; + } + return JSON.parse(fs.readFileSync(filePath, "utf8")); +} + +function formatMs(value) { + return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}ms` : "n/a"; +} + +function formatMb(value) { + return typeof value === "number" && Number.isFinite(value) ? `${value.toFixed(1)}MB` : "n/a"; +} + +function formatBytesAsMb(value) { + return typeof value === "number" && Number.isFinite(value) + ? formatMb(value / 1024 / 1024) + : "n/a"; +} + +function formatRatio(value) { + return typeof value === "number" && Number.isFinite(value) ? value.toFixed(3) : "n/a"; +} + +function metric(stats, key = "p50") { + return stats && typeof stats[key] === "number" ? stats[key] : null; +} + +function escapeCell(value) { + return String(value).replaceAll("|", "\\|"); +} + +function table(headers, rows) { + if (rows.length === 0) { + return ["No data.", ""]; + } + return [ + `| ${headers.join(" | ")} |`, + `| ${headers.map(() => "---").join(" | ")} |`, + ...rows.map((row) => `| ${row.map((cell) => escapeCell(cell)).join(" | ")} |`), + "", + ]; +} + +function loadMockHelloSummaries(sourceDir) { + const root = path.join(sourceDir, "mock-hello"); + if (!fs.existsSync(root)) { + return []; + } + return fs + .readdirSync(root, { withFileTypes: true }) + .filter((entry) => entry.isDirectory()) + .map((entry) => ({ + id: entry.name, + summary: readJsonIfExists(path.join(root, entry.name, "qa-suite-summary.json")), + })) + .filter((entry) => entry.summary != null) + .toSorted((a, b) => a.id.localeCompare(b.id)); +} + +function buildStartupRows(startup) { + return (startup?.results ?? []).map((result) => [ + result.id ?? "unknown", + result.name ?? result.id ?? "unknown", + formatMs(metric(result.summary?.readyzMs)), + formatMs(metric(result.summary?.readyzMs, "p95")), + formatMs(metric(result.summary?.healthzMs)), + formatMs(metric(result.summary?.readyLogMs)), + formatMs(metric(result.summary?.firstOutputMs)), + formatMb(metric(result.summary?.maxRssMb, "p95")), + formatRatio(metric(result.summary?.cpuCoreRatio, "p95")), + ]); +} + +function buildTraceRows(startup) { + const rows = []; + for (const result of startup?.results ?? []) { + const traceEntries = Object.entries(result.summary?.startupTrace ?? {}) + .filter(([, stats]) => typeof stats?.p50 === "number") + .toSorted((a, b) => (b[1].p50 ?? 0) - (a[1].p50 ?? 0)) + .slice(0, 5); + for (const [name, stats] of traceEntries) { + rows.push([result.id ?? "unknown", name, formatMs(stats.p50), formatMs(stats.p95)]); + } + } + return rows; +} + +function buildMockHelloRows(summaries) { + return summaries.map(({ id, summary }) => { + const status = + typeof summary?.counts?.failed === "number" && summary.counts.failed > 0 ? "fail" : "pass"; + const counts = summary?.counts + ? `${summary.counts.passed ?? 0}/${summary.counts.total ?? 0}` + : "n/a"; + return [ + id, + status, + counts, + formatMs(summary?.metrics?.wallMs), + formatRatio(summary?.metrics?.gatewayCpuCoreRatio), + formatBytesAsMb(summary?.metrics?.gatewayProcessRssStartBytes), + formatBytesAsMb(summary?.metrics?.gatewayProcessRssEndBytes), + formatBytesAsMb(summary?.metrics?.gatewayProcessRssDeltaBytes), + summary?.run?.primaryModel ?? "n/a", + ]; + }); +} + +function buildCliRows(cli) { + return (cli?.primary?.cases ?? []).map((commandCase) => [ + commandCase.id ?? "unknown", + commandCase.name ?? commandCase.id ?? "unknown", + formatMs(commandCase.summary?.durationMs?.p50), + formatMs(commandCase.summary?.durationMs?.p95), + formatMb(commandCase.summary?.maxRssMb?.p95), + commandCase.summary?.exitSummary ?? "n/a", + ]); +} + +function buildObservationRows(summary) { + return (summary?.observations ?? []).map((observation) => [ + observation.kind ?? "unknown", + observation.id ?? "unknown", + formatRatio(observation.cpuCoreRatio ?? observation.cpuCoreRatioMax), + formatMs(observation.wallMs ?? observation.wallMsMax), + ]); +} + +function buildMarkdown(sourceDir) { + const gatewaySummary = readJsonIfExists(path.join(sourceDir, "gateway-cpu", "summary.json")); + const startup = readJsonIfExists( + path.join(sourceDir, "gateway-cpu", "gateway-startup-bench.json"), + ); + const cli = readJsonIfExists(path.join(sourceDir, "cli-startup.json")); + const mockHelloSummaries = loadMockHelloSummaries(sourceDir); + + const lines = [ + "# OpenClaw Source Performance", + "", + `Generated: ${new Date().toISOString()}`, + "", + "## Gateway Boot", + "", + ...table( + [ + "case", + "name", + "readyz p50", + "readyz p95", + "healthz p50", + "ready log p50", + "first output p50", + "RSS p95", + "CPU core p95", + ], + buildStartupRows(startup), + ), + "## Startup Hotspots", + "", + ...table(["case", "phase", "p50", "p95"], buildTraceRows(startup)), + "## Fake Model Hello Loops", + "", + ...table( + [ + "run", + "status", + "pass", + "wall", + "gateway CPU core", + "RSS start", + "RSS end", + "RSS delta", + "model", + ], + buildMockHelloRows(mockHelloSummaries), + ), + "## CLI Against Booted Gateway", + "", + ...table( + ["case", "command", "duration p50", "duration p95", "RSS p95", "exits"], + buildCliRows(cli), + ), + "## Observations", + "", + ...table(["kind", "id", "CPU core", "wall"], buildObservationRows(gatewaySummary)), + ]; + + return `${lines.join("\n")}\n`; +} + +async function main() { + const options = parseArgs(process.argv.slice(2)); + const markdown = buildMarkdown(options.sourceDir); + if (options.output) { + fs.mkdirSync(path.dirname(options.output), { recursive: true }); + fs.writeFileSync(options.output, markdown, "utf8"); + } else { + process.stdout.write(markdown); + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.stack : String(error)); + process.exitCode = 1; +});