ci: add OpenClaw performance reports

2026-05-06 05:30:42 +00:00 · 2026-05-02 16:48:33 +01:00
parent 4532e5d858
commit 0be7a78479
5 changed files with 599 additions and 23 deletions
--- a/.github/workflows/openclaw-performance.yml
+++ b/.github/workflows/openclaw-performance.yml
@@ -0,0 +1,335 @@
+name: OpenClaw Performance
+
+on:
+  schedule:
+    - cron: "11 5 * * *"
+  workflow_dispatch:
+    inputs:
+      profile:
+        description: Kova profile to run
+        required: false
+        default: diagnostic
+        type: choice
+        options:
+          - smoke
+          - diagnostic
+          - soak
+          - release
+      repeat:
+        description: Repeat count for non-profiled Kova runs
+        required: false
+        default: "3"
+        type: string
+      deep_profile:
+        description: Run the deep-profile lane with CPU/heap/trace artifacts
+        required: false
+        default: false
+        type: boolean
+      live_gpt54:
+        description: Run the live OpenAI GPT 5.4 agent-turn lane
+        required: false
+        default: false
+        type: boolean
+      fail_on_regression:
+        description: Fail the workflow when Kova exits non-zero
+        required: false
+        default: false
+        type: boolean
+      kova_ref:
+        description: Kova Git ref to install
+        required: false
+        default: 51947110f5cacb6ab2c0947594ea9628031c9fcf
+        type: string
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.event_name == 'workflow_dispatch' && format('{0}-{1}', github.workflow, github.run_id) || format('{0}-{1}', github.workflow, github.ref) }}
+  cancel-in-progress: false
+
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
+  OCM_VERSION: v0.2.15
+  PERFORMANCE_MODEL_ID: gpt-5.4
+  CLAWGRIT_REPORTS_TOKEN_PRESENT: ${{ secrets.CLAWGRIT_REPORTS_TOKEN != '' && 'true' || 'false' }}
+
+jobs:
+  kova:
+    name: ${{ matrix.title }}
+    runs-on: blacksmith-16vcpu-ubuntu-2404
+    timeout-minutes: 240
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - lane: mock-provider
+            title: Kova mock provider performance
+            auth: mock
+            repeat: input
+            deep_profile: "false"
+            live: "false"
+            include_filters: "scenario:fresh-install scenario:gateway-performance scenario:bundled-plugin-startup scenario:bundled-runtime-deps scenario:agent-cold-warm-message"
+          - lane: mock-deep-profile
+            title: Kova mock provider deep profile
+            auth: mock
+            repeat: "1"
+            deep_profile: "true"
+            live: "false"
+            include_filters: "scenario:fresh-install scenario:gateway-performance scenario:agent-cold-warm-message"
+          - lane: live-gpt54
+            title: Kova live OpenAI GPT 5.4 agent turn
+            auth: live
+            repeat: "1"
+            deep_profile: "false"
+            live: "true"
+            include_filters: "scenario:agent-cold-warm-message"
+    env:
+      KOVA_REF: ${{ inputs.kova_ref || '51947110f5cacb6ab2c0947594ea9628031c9fcf' }}
+      KOVA_HOME: ${{ github.workspace }}/.artifacts/kova/home/${{ matrix.lane }}
+      REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }}
+      BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }}
+      SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries
+      LANE_ID: ${{ matrix.lane }}
+      PROFILE: ${{ inputs.profile || 'diagnostic' }}
+      REQUESTED_REPEAT: ${{ inputs.repeat || '3' }}
+      FAIL_ON_REGRESSION: ${{ inputs.fail_on_regression || 'false' }}
+      INCLUDE_FILTERS: ${{ matrix.include_filters }}
+      AUTH_MODE: ${{ matrix.auth }}
+      MATRIX_REPEAT: ${{ matrix.repeat }}
+      MATRIX_DEEP_PROFILE: ${{ matrix.deep_profile }}
+      MATRIX_LIVE: ${{ matrix.live }}
+    steps:
+      - name: Decide lane
+        id: lane
+        shell: bash
+        run: |
+          set -euo pipefail
+          run_lane=true
+          reason=""
+          if [[ "$LANE_ID" == "mock-deep-profile" && "${{ github.event_name }}" != "schedule" && "${{ inputs.deep_profile || 'false' }}" != "true" ]]; then
+            run_lane=false
+            reason="deep_profile input is false"
+          fi
+          if [[ "$LANE_ID" == "live-gpt54" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_gpt54 || 'false' }}" != "true" ]]; then
+            run_lane=false
+            reason="live_gpt54 input is false"
+          fi
+          echo "run=$run_lane" >> "$GITHUB_OUTPUT"
+          if [[ "$run_lane" != "true" ]]; then
+            echo "Skipping ${LANE_ID}: ${reason}" >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - name: Checkout OpenClaw
+        if: steps.lane.outputs.run == 'true'
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 1
+          persist-credentials: false
+
+      - name: Set up Node environment
+        if: steps.lane.outputs.run == 'true'
+        uses: ./.github/actions/setup-node-env
+        with:
+          install-bun: "false"
+
+      - name: Install OCM and Kova
+        if: steps.lane.outputs.run == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          KOVA_SRC="${RUNNER_TEMP}/kova-src"
+          echo "KOVA_SRC=$KOVA_SRC" >> "$GITHUB_ENV"
+          mkdir -p "$HOME/.local/bin" "$(dirname "$KOVA_SRC")"
+          curl -fsSL https://raw.githubusercontent.com/shakkernerd/ocm/main/install.sh \
+            | bash -s -- --version "$OCM_VERSION" --prefix "$HOME/.local" --force
+          git clone --filter=blob:none https://github.com/shakkernerd/Kova.git "$KOVA_SRC"
+          git -C "$KOVA_SRC" checkout "$KOVA_REF"
+          cat > "$HOME/.local/bin/kova" <<EOF
+          #!/usr/bin/env bash
+          export KOVA_HOME="${KOVA_HOME}"
+          exec node "${KOVA_SRC}/bin/kova.mjs" "\$@"
+          EOF
+          chmod 0755 "$HOME/.local/bin/kova"
+          echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+
+      - name: Pin Kova OpenAI model to GPT 5.4
+        if: steps.lane.outputs.run == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          node - <<'NODE'
+          const fs = require("node:fs");
+          const path = require("node:path");
+          const root = process.env.KOVA_SRC;
+          const files = [
+            "support/configure-openclaw-mock-auth.mjs",
+            "support/configure-openclaw-live-auth.mjs",
+            "support/mock-openai-server.mjs",
+            "states/mock-openai-provider.json"
+          ];
+          for (const rel of files) {
+            const file = path.join(root, rel);
+            const before = fs.readFileSync(file, "utf8");
+            const after = before.replaceAll("gpt-5.5", process.env.PERFORMANCE_MODEL_ID);
+            fs.writeFileSync(file, after, "utf8");
+          }
+          NODE
+
+      - name: Kova self-check
+        if: steps.lane.outputs.run == 'true'
+        run: kova self-check --json
+
+      - name: Configure live OpenAI auth
+        if: ${{ steps.lane.outputs.run == 'true' && matrix.live == 'true' }}
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -z "${OPENAI_API_KEY:-}" ]]; then
+            echo "OPENAI_API_KEY is not configured; live GPT 5.4 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
+            exit 0
+          fi
+          kova setup --ci --json
+          kova setup --non-interactive --auth env-only --provider openai --env-var OPENAI_API_KEY --json
+
+      - name: Run Kova
+        id: kova
+        if: steps.lane.outputs.run == 'true'
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          mkdir -p "$REPORT_DIR" "$BUNDLE_DIR" "$SUMMARY_DIR"
+
+          if [[ "$MATRIX_LIVE" == "true" && -z "${OPENAI_API_KEY:-}" ]]; then
+            echo "skipped=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          repeat="$REQUESTED_REPEAT"
+          if [[ "$MATRIX_REPEAT" != "input" ]]; then
+            repeat="$MATRIX_REPEAT"
+          fi
+
+          args=(
+            matrix run
+            --profile "$PROFILE"
+            --target "local-build:${GITHUB_WORKSPACE}"
+            --auth "$AUTH_MODE"
+            --parallel 1
+            --repeat "$repeat"
+            --report-dir "$REPORT_DIR"
+            --execute
+            --json
+          )
+
+          for filter in $INCLUDE_FILTERS; do
+            args+=(--include "$filter")
+          done
+
+          if [[ "$MATRIX_DEEP_PROFILE" == "true" ]]; then
+            args+=(--deep-profile)
+          fi
+          if [[ "$FAIL_ON_REGRESSION" == "true" ]]; then
+            args+=(--gate)
+          fi
+
+          log_path="$REPORT_DIR/${LANE_ID}.log"
+          set +e
+          kova "${args[@]}" 2>&1 | tee "$log_path"
+          status=${PIPESTATUS[0]}
+          set -e
+
+          report_json="$(find "$REPORT_DIR" -maxdepth 1 -type f -name '*.json' -print | sort | tail -n 1)"
+          if [[ -z "$report_json" ]]; then
+            echo "Kova did not write a JSON report." >&2
+            exit 1
+          fi
+          report_md="${report_json%.json}.md"
+          echo "status=$status" >> "$GITHUB_OUTPUT"
+          echo "report_json=$report_json" >> "$GITHUB_OUTPUT"
+          echo "report_md=$report_md" >> "$GITHUB_OUTPUT"
+
+          kova report bundle "$report_json" --output-dir "$BUNDLE_DIR" --json | tee "$BUNDLE_DIR/bundle.json"
+
+          ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')"
+          run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+          report_url=""
+          if [[ "$CLAWGRIT_REPORTS_TOKEN_PRESENT" == "true" ]]; then
+            report_url="https://github.com/openclaw/clawgrit-reports/tree/main/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
+          fi
+          summary_path="$SUMMARY_DIR/${LANE_ID}.md"
+          node scripts/kova-ci-summary.mjs --report "$report_json" --output "$summary_path" --lane "$LANE_ID" --report-url "$report_url"
+          cat "$summary_path" >> "$GITHUB_STEP_SUMMARY"
+
+          if [[ "$FAIL_ON_REGRESSION" == "true" && "$status" != "0" ]]; then
+            exit "$status"
+          fi
+
+      - name: Upload Kova artifacts
+        if: ${{ always() && steps.lane.outputs.run == 'true' }}
+        uses: actions/upload-artifact@v5
+        with:
+          name: openclaw-performance-${{ matrix.lane }}-${{ github.run_id }}-${{ github.run_attempt }}
+          path: |
+            .artifacts/kova/reports/${{ matrix.lane }}
+            .artifacts/kova/bundles/${{ matrix.lane }}
+            .artifacts/kova/summaries/${{ matrix.lane }}.md
+          if-no-files-found: ignore
+          retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }}
+
+      - name: Checkout clawgrit reports
+        if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }}
+        uses: actions/checkout@v6
+        with:
+          repository: openclaw/clawgrit-reports
+          path: .artifacts/clawgrit-reports
+          token: ${{ secrets.CLAWGRIT_REPORTS_TOKEN }}
+          persist-credentials: true
+
+      - name: Publish to clawgrit reports
+        if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          reports_root=".artifacts/clawgrit-reports"
+          ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')"
+          run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+          dest="${reports_root}/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
+          mkdir -p "$dest"
+          cp "${{ steps.kova.outputs.report_json }}" "$dest/report.json"
+          if [[ -f "${{ steps.kova.outputs.report_md }}" ]]; then
+            cp "${{ steps.kova.outputs.report_md }}" "$dest/report.md"
+          fi
+          cp "$SUMMARY_DIR/${LANE_ID}.md" "$dest/index.md"
+          if [[ -d "$BUNDLE_DIR" ]]; then
+            mkdir -p "$dest/bundles"
+            cp -R "$BUNDLE_DIR"/. "$dest/bundles/"
+          fi
+          cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" <<EOF
+          {
+            "repository": "${GITHUB_REPOSITORY}",
+            "ref": "${GITHUB_REF_NAME}",
+            "sha": "${GITHUB_SHA}",
+            "workflow": "${GITHUB_WORKFLOW}",
+            "run_id": "${GITHUB_RUN_ID}",
+            "run_attempt": "${GITHUB_RUN_ATTEMPT}",
+            "lane": "${LANE_ID}",
+            "path": "openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
+          }
+          EOF
+
+          git -C "$reports_root" config user.name "openclaw-performance[bot]"
+          git -C "$reports_root" config user.email "openclaw-performance[bot]@users.noreply.github.com"
+          git -C "$reports_root" add openclaw-performance
+          if git -C "$reports_root" diff --cached --quiet; then
+            echo "No clawgrit report changes to publish."
+            exit 0
+          fi
+          git -C "$reports_root" commit -m "perf: add OpenClaw ${LANE_ID} report ${GITHUB_SHA::12}"
+          git -C "$reports_root" push
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -12,29 +12,30 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`

 ## Pipeline overview

-| Job                              | Purpose                                                                                      | When it runs                       |
-| -------------------------------- | -------------------------------------------------------------------------------------------- | ---------------------------------- |
-| `preflight`                      | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest      | Always on non-draft pushes and PRs |
-| `security-scm-fast`              | Private key detection and workflow audit via `zizmor`                                        | Always on non-draft pushes and PRs |
-| `security-dependency-audit`      | Dependency-free production lockfile audit against npm advisories                             | Always on non-draft pushes and PRs |
-| `security-fast`                  | Required aggregate for the fast security jobs                                                | Always on non-draft pushes and PRs |
-| `check-dependencies`             | Production Knip dependency-only pass plus the unused-file allowlist guard                    | Node-relevant changes              |
-| `build-artifacts`                | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts          | Node-relevant changes              |
-| `checks-fast-core`               | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks                 | Node-relevant changes              |
-| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result                         | Node-relevant changes              |
-| `checks-node-core-test`          | Core Node test shards, excluding channel, bundled, contract, and extension lanes             | Node-relevant changes              |
-| `check`                          | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke   | Node-relevant changes              |
-| `check-additional`               | Architecture, boundary, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes              |
-| `build-smoke`                    | Built-CLI smoke tests and startup-memory smoke                                               | Node-relevant changes              |
-| `checks`                         | Verifier for built-artifact channel tests                                                    | Node-relevant changes              |
-| `checks-node-compat-node22`      | Node 22 compatibility build and smoke lane                                                   | Manual CI dispatch for releases    |
-| `check-docs`                     | Docs formatting, lint, and broken-link checks                                                | Docs changed                       |
-| `skills-python`                  | Ruff + pytest for Python-backed skills                                                       | Python-skill-relevant changes      |
-| `checks-windows`                 | Windows-specific process/path tests plus shared runtime import specifier regressions         | Windows-relevant changes           |
-| `macos-node`                     | macOS TypeScript test lane using the shared built artifacts                                  | macOS-relevant changes             |
-| `macos-swift`                    | Swift lint, build, and tests for the macOS app                                               | macOS-relevant changes             |
-| `android`                        | Android unit tests for both flavors plus one debug APK build                                 | Android-relevant changes           |
-| `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                    | Main CI success or manual dispatch |
+| Job                              | Purpose                                                                                                   | When it runs                       |
+| -------------------------------- | --------------------------------------------------------------------------------------------------------- | ---------------------------------- |
+| `preflight`                      | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest                   | Always on non-draft pushes and PRs |
+| `security-scm-fast`              | Private key detection and workflow audit via `zizmor`                                                     | Always on non-draft pushes and PRs |
+| `security-dependency-audit`      | Dependency-free production lockfile audit against npm advisories                                          | Always on non-draft pushes and PRs |
+| `security-fast`                  | Required aggregate for the fast security jobs                                                             | Always on non-draft pushes and PRs |
+| `check-dependencies`             | Production Knip dependency-only pass plus the unused-file allowlist guard                                 | Node-relevant changes              |
+| `build-artifacts`                | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts                       | Node-relevant changes              |
+| `checks-fast-core`               | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks                              | Node-relevant changes              |
+| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result                                      | Node-relevant changes              |
+| `checks-node-core-test`          | Core Node test shards, excluding channel, bundled, contract, and extension lanes                          | Node-relevant changes              |
+| `check`                          | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke                | Node-relevant changes              |
+| `check-additional`               | Architecture, boundary, extension-surface guards, package-boundary, and gateway-watch shards              | Node-relevant changes              |
+| `build-smoke`                    | Built-CLI smoke tests and startup-memory smoke                                                            | Node-relevant changes              |
+| `checks`                         | Verifier for built-artifact channel tests                                                                 | Node-relevant changes              |
+| `checks-node-compat-node22`      | Node 22 compatibility build and smoke lane                                                                | Manual CI dispatch for releases    |
+| `check-docs`                     | Docs formatting, lint, and broken-link checks                                                             | Docs changed                       |
+| `skills-python`                  | Ruff + pytest for Python-backed skills                                                                    | Python-skill-relevant changes      |
+| `checks-windows`                 | Windows-specific process/path tests plus shared runtime import specifier regressions                      | Windows-relevant changes           |
+| `macos-node`                     | macOS TypeScript test lane using the shared built artifacts                                               | macOS-relevant changes             |
+| `macos-swift`                    | Swift lint, build, and tests for the macOS app                                                            | macOS-relevant changes             |
+| `android`                        | Android unit tests for both flavors plus one debug APK build                                              | Android-relevant changes           |
+| `test-performance-agent`         | Daily Codex slow-test optimization after trusted activity                                                 | Main CI success or manual dispatch |
+| `openclaw-performance`           | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch      |

 ## Fail-fast order

@@ -124,8 +125,26 @@ node scripts/ci-run-timings.mjs --latest-main # ignore issue/comment noise and c
 node scripts/ci-run-timings.mjs --recent 10   # compare recent successful main CI runs
 pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/baseline-before.json
 pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifacts/test-perf/after-agent.json
+pnpm perf:kova:summary --report .artifacts/kova/reports/mock-provider/report.json --output .artifacts/kova/summary.md
 ```

+## OpenClaw Performance
+
+`OpenClaw Performance` is the product/runtime performance workflow. It runs daily on `main` and can be dispatched manually:
+
+```bash
+gh workflow run openclaw-performance.yml --ref main -f profile=diagnostic -f repeat=3
+gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_gpt54=true
+```
+
+The workflow installs OCM from a pinned release and Kova from the pinned `kova_ref` input, then runs three lanes:
+
+- `mock-provider`: Kova diagnostic scenarios against a local-build runtime with deterministic fake OpenAI-compatible auth.
+- `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
+- `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
+
+Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, and `index.md` into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
+
 ## Full Release Validation

 `Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. With `rerun_group=all` and `release_profile=full`, it also runs `NPM Telegram Beta E2E` against the `release-package-under-test` artifact from release checks. After publishing, pass `npm_telegram_package_spec` to rerun the same Telegram package lane against the published npm package.
--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -46,6 +46,11 @@ When debugging real providers/models (requires real creds):

 - Live suite (models + gateway tool/image probes): `pnpm test:live`
 - Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
+- Runtime performance reports: dispatch `OpenClaw Performance` with
+  `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
+  `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
+  publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
+  `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured.
 - Docker live model sweep: `pnpm test:docker:live-models`
  - Each selected model now runs a text turn plus a small file-read-style probe.
    Models whose metadata advertises `image` input also run a tiny image turn.
--- a/package.json
+++ b/package.json
@@ -1428,6 +1428,7 @@
    "moltbot:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
    "openclaw": "node scripts/run-node.mjs",
    "openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
+    "perf:kova:summary": "node scripts/kova-ci-summary.mjs",
    "plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check",
    "plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write",
    "plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check",
--- a/scripts/kova-ci-summary.mjs
+++ b/scripts/kova-ci-summary.mjs
@@ -0,0 +1,216 @@
+#!/usr/bin/env node
+import { readFile, writeFile } from "node:fs/promises";
+import path from "node:path";
+
+const args = parseArgs(process.argv.slice(2));
+if (!args.report) {
+  usage("missing --report");
+}
+
+const keyMetricIds = [
+  "timeToHealthReadyMs",
+  "timeToListeningMs",
+  "healthP95Ms",
+  "peakRssMb",
+  "resourcePeakGatewayRssMb",
+  "cpuPercentMax",
+  "openclawEventLoopMaxMs",
+  "agentTurnP95Ms",
+  "coldAgentTurnMs",
+  "warmAgentTurnMs",
+  "agentPreProviderP95Ms",
+  "agentProviderFinalP95Ms",
+  "agentCleanupP95Ms",
+  "runtimeDepsStagingMs",
+];
+
+const reportPath = path.resolve(args.report);
+const report = JSON.parse(await readFile(reportPath, "utf8"));
+const markdown = renderSummary(report, {
+  lane: args.lane || "kova",
+  reportUrl: args.reportUrl || "",
+  artifactUrl: args.artifactUrl || "",
+});
+
+if (args.output) {
+  await writeFile(path.resolve(args.output), markdown, "utf8");
+} else {
+  process.stdout.write(markdown);
+}
+
+function renderSummary(report, options) {
+  const lines = [];
+  const statuses = report.summary?.statuses || {};
+  const statusText =
+    Object.entries(statuses)
+      .map(([status, count]) => `${status}: ${value(count)}`)
+      .join(", ") || "unknown";
+
+  lines.push(`# OpenClaw Performance Report`);
+  lines.push("");
+  lines.push(`- Lane: ${options.lane}`);
+  lines.push(`- Run: ${value(report.runId)}`);
+  lines.push(`- Generated: ${value(report.generatedAt)}`);
+  lines.push(`- Target: ${value(report.target)}`);
+  lines.push(`- Statuses: ${statusText}`);
+  lines.push(`- Repeat: ${value(report.performance?.repeat)}`);
+  if (options.reportUrl) {
+    lines.push(`- Published report: ${options.reportUrl}`);
+  }
+  if (options.artifactUrl) {
+    lines.push(`- GitHub artifact: ${options.artifactUrl}`);
+  }
+  lines.push("");
+
+  const groups = Array.isArray(report.performance?.groups) ? report.performance.groups : [];
+  if (groups.length > 0) {
+    lines.push("## Key metrics");
+    lines.push("");
+    lines.push("| Scenario | State | Metric | Median | p95 | Max |");
+    lines.push("| --- | --- | --- | ---: | ---: | ---: |");
+    for (const group of groups) {
+      for (const metricId of keyMetricIds) {
+        const metric = group.metrics?.[metricId];
+        if (!metric || metric.count === 0) {
+          continue;
+        }
+        lines.push(
+          [
+            value(group.scenario),
+            value(group.state),
+            value(metric.title || metricId),
+            formatMetric(metric.median, metric.unit),
+            formatMetric(metric.p95, metric.unit),
+            formatMetric(metric.max, metric.unit),
+          ]
+            .join(" | ")
+            .replace(/^/, "| ")
+            .replace(/$/, " |"),
+        );
+      }
+    }
+    lines.push("");
+  }
+
+  const violations = collectViolations(report.records);
+  if (violations.length > 0) {
+    lines.push("## Threshold violations");
+    lines.push("");
+    lines.push("| Scenario | State | Metric | Actual | Threshold |");
+    lines.push("| --- | --- | --- | ---: | ---: |");
+    for (const item of violations.slice(0, 20)) {
+      lines.push(
+        [
+          item.scenario,
+          item.state,
+          item.metric,
+          formatMetric(item.actual, item.unit),
+          formatMetric(item.threshold, item.unit),
+        ]
+          .join(" | ")
+          .replace(/^/, "| ")
+          .replace(/$/, " |"),
+      );
+    }
+    if (violations.length > 20) {
+      lines.push("");
+      lines.push(`_Only first 20 of ${violations.length} violations shown._`);
+    }
+    lines.push("");
+  }
+
+  const records = Array.isArray(report.records) ? report.records : [];
+  if (records.length > 0) {
+    lines.push("## Records");
+    lines.push("");
+    lines.push("| Scenario | State | Status | Failure |");
+    lines.push("| --- | --- | --- | --- |");
+    for (const record of records.slice(0, 30)) {
+      lines.push(
+        [
+          value(record.scenario),
+          value(record.state?.id ?? record.state),
+          value(record.status),
+          value(record.failureReason || record.error?.message || ""),
+        ]
+          .join(" | ")
+          .replace(/^/, "| ")
+          .replace(/$/, " |"),
+      );
+    }
+    lines.push("");
+  }
+
+  return `${lines.join("\n").trimEnd()}\n`;
+}
+
+function collectViolations(records) {
+  if (!Array.isArray(records)) {
+    return [];
+  }
+  return records.flatMap((record) => {
+    if (!Array.isArray(record.violations)) {
+      return [];
+    }
+    return record.violations.map((violation) => ({
+      scenario: value(record.scenario),
+      state: value(record.state?.id ?? record.state),
+      metric: value(violation.metric || violation.id || violation.name),
+      actual: violation.actual ?? violation.value,
+      threshold: violation.threshold ?? violation.max ?? violation.expected,
+      unit: violation.unit,
+    }));
+  });
+}
+
+function formatMetric(valueToFormat, unit) {
+  if (valueToFormat === null || valueToFormat === undefined || Number.isNaN(valueToFormat)) {
+    return "";
+  }
+  const numeric = Number(valueToFormat);
+  const rendered = Number.isFinite(numeric)
+    ? numeric.toLocaleString("en-US", { maximumFractionDigits: numeric >= 100 ? 0 : 1 })
+    : String(valueToFormat);
+  return unit ? `${rendered} ${unit}` : rendered;
+}
+
+function value(input) {
+  if (input === null || input === undefined) {
+    return "";
+  }
+  return String(input).replaceAll("|", "\\|").replaceAll("\n", " ");
+}
+
+function parseArgs(argv) {
+  const parsed = {};
+  for (let index = 0; index < argv.length; index += 1) {
+    const arg = argv[index];
+    if (!arg.startsWith("--")) {
+      usage(`unexpected argument: ${arg}`);
+    }
+    const key = arg.slice(2).replaceAll("-", "");
+    const value = argv[index + 1];
+    if (!value || value.startsWith("--")) {
+      usage(`${arg} requires a value`);
+    }
+    parsed[key] = value;
+    index += 1;
+  }
+  return {
+    report: parsed.report,
+    output: parsed.output,
+    lane: parsed.lane,
+    reportUrl: parsed.reporturl,
+    artifactUrl: parsed.artifacturl,
+  };
+}
+
+function usage(message) {
+  if (message) {
+    console.error(`error: ${message}`);
+  }
+  console.error(
+    "usage: node scripts/kova-ci-summary.mjs --report <report.json> [--output <summary.md>] [--lane <name>]",
+  );
+  process.exit(2);
+}