mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
ci: add OpenClaw performance reports
This commit is contained in:
335
.github/workflows/openclaw-performance.yml
vendored
Normal file
335
.github/workflows/openclaw-performance.yml
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
name: OpenClaw Performance
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "11 5 * * *"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
profile:
|
||||
description: Kova profile to run
|
||||
required: false
|
||||
default: diagnostic
|
||||
type: choice
|
||||
options:
|
||||
- smoke
|
||||
- diagnostic
|
||||
- soak
|
||||
- release
|
||||
repeat:
|
||||
description: Repeat count for non-profiled Kova runs
|
||||
required: false
|
||||
default: "3"
|
||||
type: string
|
||||
deep_profile:
|
||||
description: Run the deep-profile lane with CPU/heap/trace artifacts
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
live_gpt54:
|
||||
description: Run the live OpenAI GPT 5.4 agent-turn lane
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
fail_on_regression:
|
||||
description: Fail the workflow when Kova exits non-zero
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
kova_ref:
|
||||
description: Kova Git ref to install
|
||||
required: false
|
||||
default: 51947110f5cacb6ab2c0947594ea9628031c9fcf
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name == 'workflow_dispatch' && format('{0}-{1}', github.workflow, github.run_id) || format('{0}-{1}', github.workflow, github.ref) }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
|
||||
OCM_VERSION: v0.2.15
|
||||
PERFORMANCE_MODEL_ID: gpt-5.4
|
||||
CLAWGRIT_REPORTS_TOKEN_PRESENT: ${{ secrets.CLAWGRIT_REPORTS_TOKEN != '' && 'true' || 'false' }}
|
||||
|
||||
jobs:
|
||||
kova:
|
||||
name: ${{ matrix.title }}
|
||||
runs-on: blacksmith-16vcpu-ubuntu-2404
|
||||
timeout-minutes: 240
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- lane: mock-provider
|
||||
title: Kova mock provider performance
|
||||
auth: mock
|
||||
repeat: input
|
||||
deep_profile: "false"
|
||||
live: "false"
|
||||
include_filters: "scenario:fresh-install scenario:gateway-performance scenario:bundled-plugin-startup scenario:bundled-runtime-deps scenario:agent-cold-warm-message"
|
||||
- lane: mock-deep-profile
|
||||
title: Kova mock provider deep profile
|
||||
auth: mock
|
||||
repeat: "1"
|
||||
deep_profile: "true"
|
||||
live: "false"
|
||||
include_filters: "scenario:fresh-install scenario:gateway-performance scenario:agent-cold-warm-message"
|
||||
- lane: live-gpt54
|
||||
title: Kova live OpenAI GPT 5.4 agent turn
|
||||
auth: live
|
||||
repeat: "1"
|
||||
deep_profile: "false"
|
||||
live: "true"
|
||||
include_filters: "scenario:agent-cold-warm-message"
|
||||
env:
|
||||
KOVA_REF: ${{ inputs.kova_ref || '51947110f5cacb6ab2c0947594ea9628031c9fcf' }}
|
||||
KOVA_HOME: ${{ github.workspace }}/.artifacts/kova/home/${{ matrix.lane }}
|
||||
REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }}
|
||||
BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }}
|
||||
SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries
|
||||
LANE_ID: ${{ matrix.lane }}
|
||||
PROFILE: ${{ inputs.profile || 'diagnostic' }}
|
||||
REQUESTED_REPEAT: ${{ inputs.repeat || '3' }}
|
||||
FAIL_ON_REGRESSION: ${{ inputs.fail_on_regression || 'false' }}
|
||||
INCLUDE_FILTERS: ${{ matrix.include_filters }}
|
||||
AUTH_MODE: ${{ matrix.auth }}
|
||||
MATRIX_REPEAT: ${{ matrix.repeat }}
|
||||
MATRIX_DEEP_PROFILE: ${{ matrix.deep_profile }}
|
||||
MATRIX_LIVE: ${{ matrix.live }}
|
||||
steps:
|
||||
- name: Decide lane
|
||||
id: lane
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
run_lane=true
|
||||
reason=""
|
||||
if [[ "$LANE_ID" == "mock-deep-profile" && "${{ github.event_name }}" != "schedule" && "${{ inputs.deep_profile || 'false' }}" != "true" ]]; then
|
||||
run_lane=false
|
||||
reason="deep_profile input is false"
|
||||
fi
|
||||
if [[ "$LANE_ID" == "live-gpt54" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_gpt54 || 'false' }}" != "true" ]]; then
|
||||
run_lane=false
|
||||
reason="live_gpt54 input is false"
|
||||
fi
|
||||
echo "run=$run_lane" >> "$GITHUB_OUTPUT"
|
||||
if [[ "$run_lane" != "true" ]]; then
|
||||
echo "Skipping ${LANE_ID}: ${reason}" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
- name: Checkout OpenClaw
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 1
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Node environment
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
uses: ./.github/actions/setup-node-env
|
||||
with:
|
||||
install-bun: "false"
|
||||
|
||||
- name: Install OCM and Kova
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
KOVA_SRC="${RUNNER_TEMP}/kova-src"
|
||||
echo "KOVA_SRC=$KOVA_SRC" >> "$GITHUB_ENV"
|
||||
mkdir -p "$HOME/.local/bin" "$(dirname "$KOVA_SRC")"
|
||||
curl -fsSL https://raw.githubusercontent.com/shakkernerd/ocm/main/install.sh \
|
||||
| bash -s -- --version "$OCM_VERSION" --prefix "$HOME/.local" --force
|
||||
git clone --filter=blob:none https://github.com/shakkernerd/Kova.git "$KOVA_SRC"
|
||||
git -C "$KOVA_SRC" checkout "$KOVA_REF"
|
||||
cat > "$HOME/.local/bin/kova" <<EOF
|
||||
#!/usr/bin/env bash
|
||||
export KOVA_HOME="${KOVA_HOME}"
|
||||
exec node "${KOVA_SRC}/bin/kova.mjs" "\$@"
|
||||
EOF
|
||||
chmod 0755 "$HOME/.local/bin/kova"
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Pin Kova OpenAI model to GPT 5.4
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
node - <<'NODE'
|
||||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
const root = process.env.KOVA_SRC;
|
||||
const files = [
|
||||
"support/configure-openclaw-mock-auth.mjs",
|
||||
"support/configure-openclaw-live-auth.mjs",
|
||||
"support/mock-openai-server.mjs",
|
||||
"states/mock-openai-provider.json"
|
||||
];
|
||||
for (const rel of files) {
|
||||
const file = path.join(root, rel);
|
||||
const before = fs.readFileSync(file, "utf8");
|
||||
const after = before.replaceAll("gpt-5.5", process.env.PERFORMANCE_MODEL_ID);
|
||||
fs.writeFileSync(file, after, "utf8");
|
||||
}
|
||||
NODE
|
||||
|
||||
- name: Kova self-check
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
run: kova self-check --json
|
||||
|
||||
- name: Configure live OpenAI auth
|
||||
if: ${{ steps.lane.outputs.run == 'true' && matrix.live == 'true' }}
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [[ -z "${OPENAI_API_KEY:-}" ]]; then
|
||||
echo "OPENAI_API_KEY is not configured; live GPT 5.4 lane will be skipped." >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 0
|
||||
fi
|
||||
kova setup --ci --json
|
||||
kova setup --non-interactive --auth env-only --provider openai --env-var OPENAI_API_KEY --json
|
||||
|
||||
- name: Run Kova
|
||||
id: kova
|
||||
if: steps.lane.outputs.run == 'true'
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
mkdir -p "$REPORT_DIR" "$BUNDLE_DIR" "$SUMMARY_DIR"
|
||||
|
||||
if [[ "$MATRIX_LIVE" == "true" && -z "${OPENAI_API_KEY:-}" ]]; then
|
||||
echo "skipped=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
repeat="$REQUESTED_REPEAT"
|
||||
if [[ "$MATRIX_REPEAT" != "input" ]]; then
|
||||
repeat="$MATRIX_REPEAT"
|
||||
fi
|
||||
|
||||
args=(
|
||||
matrix run
|
||||
--profile "$PROFILE"
|
||||
--target "local-build:${GITHUB_WORKSPACE}"
|
||||
--auth "$AUTH_MODE"
|
||||
--parallel 1
|
||||
--repeat "$repeat"
|
||||
--report-dir "$REPORT_DIR"
|
||||
--execute
|
||||
--json
|
||||
)
|
||||
|
||||
for filter in $INCLUDE_FILTERS; do
|
||||
args+=(--include "$filter")
|
||||
done
|
||||
|
||||
if [[ "$MATRIX_DEEP_PROFILE" == "true" ]]; then
|
||||
args+=(--deep-profile)
|
||||
fi
|
||||
if [[ "$FAIL_ON_REGRESSION" == "true" ]]; then
|
||||
args+=(--gate)
|
||||
fi
|
||||
|
||||
log_path="$REPORT_DIR/${LANE_ID}.log"
|
||||
set +e
|
||||
kova "${args[@]}" 2>&1 | tee "$log_path"
|
||||
status=${PIPESTATUS[0]}
|
||||
set -e
|
||||
|
||||
report_json="$(find "$REPORT_DIR" -maxdepth 1 -type f -name '*.json' -print | sort | tail -n 1)"
|
||||
if [[ -z "$report_json" ]]; then
|
||||
echo "Kova did not write a JSON report." >&2
|
||||
exit 1
|
||||
fi
|
||||
report_md="${report_json%.json}.md"
|
||||
echo "status=$status" >> "$GITHUB_OUTPUT"
|
||||
echo "report_json=$report_json" >> "$GITHUB_OUTPUT"
|
||||
echo "report_md=$report_md" >> "$GITHUB_OUTPUT"
|
||||
|
||||
kova report bundle "$report_json" --output-dir "$BUNDLE_DIR" --json | tee "$BUNDLE_DIR/bundle.json"
|
||||
|
||||
ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')"
|
||||
run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
|
||||
report_url=""
|
||||
if [[ "$CLAWGRIT_REPORTS_TOKEN_PRESENT" == "true" ]]; then
|
||||
report_url="https://github.com/openclaw/clawgrit-reports/tree/main/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
|
||||
fi
|
||||
summary_path="$SUMMARY_DIR/${LANE_ID}.md"
|
||||
node scripts/kova-ci-summary.mjs --report "$report_json" --output "$summary_path" --lane "$LANE_ID" --report-url "$report_url"
|
||||
cat "$summary_path" >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
if [[ "$FAIL_ON_REGRESSION" == "true" && "$status" != "0" ]]; then
|
||||
exit "$status"
|
||||
fi
|
||||
|
||||
- name: Upload Kova artifacts
|
||||
if: ${{ always() && steps.lane.outputs.run == 'true' }}
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: openclaw-performance-${{ matrix.lane }}-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
path: |
|
||||
.artifacts/kova/reports/${{ matrix.lane }}
|
||||
.artifacts/kova/bundles/${{ matrix.lane }}
|
||||
.artifacts/kova/summaries/${{ matrix.lane }}.md
|
||||
if-no-files-found: ignore
|
||||
retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }}
|
||||
|
||||
- name: Checkout clawgrit reports
|
||||
if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }}
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
repository: openclaw/clawgrit-reports
|
||||
path: .artifacts/clawgrit-reports
|
||||
token: ${{ secrets.CLAWGRIT_REPORTS_TOKEN }}
|
||||
persist-credentials: true
|
||||
|
||||
- name: Publish to clawgrit reports
|
||||
if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
reports_root=".artifacts/clawgrit-reports"
|
||||
ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')"
|
||||
run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
|
||||
dest="${reports_root}/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
|
||||
mkdir -p "$dest"
|
||||
cp "${{ steps.kova.outputs.report_json }}" "$dest/report.json"
|
||||
if [[ -f "${{ steps.kova.outputs.report_md }}" ]]; then
|
||||
cp "${{ steps.kova.outputs.report_md }}" "$dest/report.md"
|
||||
fi
|
||||
cp "$SUMMARY_DIR/${LANE_ID}.md" "$dest/index.md"
|
||||
if [[ -d "$BUNDLE_DIR" ]]; then
|
||||
mkdir -p "$dest/bundles"
|
||||
cp -R "$BUNDLE_DIR"/. "$dest/bundles/"
|
||||
fi
|
||||
cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" <<EOF
|
||||
{
|
||||
"repository": "${GITHUB_REPOSITORY}",
|
||||
"ref": "${GITHUB_REF_NAME}",
|
||||
"sha": "${GITHUB_SHA}",
|
||||
"workflow": "${GITHUB_WORKFLOW}",
|
||||
"run_id": "${GITHUB_RUN_ID}",
|
||||
"run_attempt": "${GITHUB_RUN_ATTEMPT}",
|
||||
"lane": "${LANE_ID}",
|
||||
"path": "openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}"
|
||||
}
|
||||
EOF
|
||||
|
||||
git -C "$reports_root" config user.name "openclaw-performance[bot]"
|
||||
git -C "$reports_root" config user.email "openclaw-performance[bot]@users.noreply.github.com"
|
||||
git -C "$reports_root" add openclaw-performance
|
||||
if git -C "$reports_root" diff --cached --quiet; then
|
||||
echo "No clawgrit report changes to publish."
|
||||
exit 0
|
||||
fi
|
||||
git -C "$reports_root" commit -m "perf: add OpenClaw ${LANE_ID} report ${GITHUB_SHA::12}"
|
||||
git -C "$reports_root" push
|
||||
65
docs/ci.md
65
docs/ci.md
@@ -12,29 +12,30 @@ OpenClaw CI runs on every push to `main` and every pull request. The `preflight`
|
||||
|
||||
## Pipeline overview
|
||||
|
||||
| Job | Purpose | When it runs |
|
||||
| -------------------------------- | -------------------------------------------------------------------------------------------- | ---------------------------------- |
|
||||
| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs |
|
||||
| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs |
|
||||
| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs |
|
||||
| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs |
|
||||
| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes |
|
||||
| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes |
|
||||
| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes |
|
||||
| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes |
|
||||
| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes |
|
||||
| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes |
|
||||
| `check-additional` | Architecture, boundary, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes |
|
||||
| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes |
|
||||
| `checks` | Verifier for built-artifact channel tests | Node-relevant changes |
|
||||
| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases |
|
||||
| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed |
|
||||
| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes |
|
||||
| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes |
|
||||
| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes |
|
||||
| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes |
|
||||
| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes |
|
||||
| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch |
|
||||
| Job | Purpose | When it runs |
|
||||
| -------------------------------- | --------------------------------------------------------------------------------------------------------- | ---------------------------------- |
|
||||
| `preflight` | Detect docs-only changes, changed scopes, changed extensions, and build the CI manifest | Always on non-draft pushes and PRs |
|
||||
| `security-scm-fast` | Private key detection and workflow audit via `zizmor` | Always on non-draft pushes and PRs |
|
||||
| `security-dependency-audit` | Dependency-free production lockfile audit against npm advisories | Always on non-draft pushes and PRs |
|
||||
| `security-fast` | Required aggregate for the fast security jobs | Always on non-draft pushes and PRs |
|
||||
| `check-dependencies` | Production Knip dependency-only pass plus the unused-file allowlist guard | Node-relevant changes |
|
||||
| `build-artifacts` | Build `dist/`, Control UI, built-artifact checks, and reusable downstream artifacts | Node-relevant changes |
|
||||
| `checks-fast-core` | Fast Linux correctness lanes such as bundled/plugin-contract/protocol checks | Node-relevant changes |
|
||||
| `checks-fast-contracts-channels` | Sharded channel contract checks with a stable aggregate check result | Node-relevant changes |
|
||||
| `checks-node-core-test` | Core Node test shards, excluding channel, bundled, contract, and extension lanes | Node-relevant changes |
|
||||
| `check` | Sharded main local gate equivalent: prod types, lint, guards, test types, and strict smoke | Node-relevant changes |
|
||||
| `check-additional` | Architecture, boundary, extension-surface guards, package-boundary, and gateway-watch shards | Node-relevant changes |
|
||||
| `build-smoke` | Built-CLI smoke tests and startup-memory smoke | Node-relevant changes |
|
||||
| `checks` | Verifier for built-artifact channel tests | Node-relevant changes |
|
||||
| `checks-node-compat-node22` | Node 22 compatibility build and smoke lane | Manual CI dispatch for releases |
|
||||
| `check-docs` | Docs formatting, lint, and broken-link checks | Docs changed |
|
||||
| `skills-python` | Ruff + pytest for Python-backed skills | Python-skill-relevant changes |
|
||||
| `checks-windows` | Windows-specific process/path tests plus shared runtime import specifier regressions | Windows-relevant changes |
|
||||
| `macos-node` | macOS TypeScript test lane using the shared built artifacts | macOS-relevant changes |
|
||||
| `macos-swift` | Swift lint, build, and tests for the macOS app | macOS-relevant changes |
|
||||
| `android` | Android unit tests for both flavors plus one debug APK build | Android-relevant changes |
|
||||
| `test-performance-agent` | Daily Codex slow-test optimization after trusted activity | Main CI success or manual dispatch |
|
||||
| `openclaw-performance` | Daily/on-demand Kova runtime performance reports with mock-provider, deep-profile, and GPT 5.4 live lanes | Scheduled and manual dispatch |
|
||||
|
||||
## Fail-fast order
|
||||
|
||||
@@ -124,8 +125,26 @@ node scripts/ci-run-timings.mjs --latest-main # ignore issue/comment noise and c
|
||||
node scripts/ci-run-timings.mjs --recent 10 # compare recent successful main CI runs
|
||||
pnpm test:perf:groups --full-suite --allow-failures --output .artifacts/test-perf/baseline-before.json
|
||||
pnpm test:perf:groups:compare .artifacts/test-perf/baseline-before.json .artifacts/test-perf/after-agent.json
|
||||
pnpm perf:kova:summary --report .artifacts/kova/reports/mock-provider/report.json --output .artifacts/kova/summary.md
|
||||
```
|
||||
|
||||
## OpenClaw Performance
|
||||
|
||||
`OpenClaw Performance` is the product/runtime performance workflow. It runs daily on `main` and can be dispatched manually:
|
||||
|
||||
```bash
|
||||
gh workflow run openclaw-performance.yml --ref main -f profile=diagnostic -f repeat=3
|
||||
gh workflow run openclaw-performance.yml --ref main -f profile=smoke -f repeat=1 -f deep_profile=true -f live_gpt54=true
|
||||
```
|
||||
|
||||
The workflow installs OCM from a pinned release and Kova from the pinned `kova_ref` input, then runs three lanes:
|
||||
|
||||
- `mock-provider`: Kova diagnostic scenarios against a local-build runtime with deterministic fake OpenAI-compatible auth.
|
||||
- `mock-deep-profile`: CPU/heap/trace profiling for startup, gateway, and agent-turn hotspots.
|
||||
- `live-gpt54`: a real OpenAI `openai/gpt-5.4` agent turn, skipped when `OPENAI_API_KEY` is unavailable.
|
||||
|
||||
Every lane uploads GitHub artifacts. When `CLAWGRIT_REPORTS_TOKEN` is configured, the workflow also commits `report.json`, `report.md`, bundles, and `index.md` into `openclaw/clawgrit-reports` under `openclaw-performance/<ref>/<run-id>-<attempt>/<lane>/`. The current branch pointer is written as `openclaw-performance/<ref>/latest-<lane>.json`.
|
||||
|
||||
## Full Release Validation
|
||||
|
||||
`Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. With `rerun_group=all` and `release_profile=full`, it also runs `NPM Telegram Beta E2E` against the `release-package-under-test` artifact from release checks. After publishing, pass `npm_telegram_package_spec` to rerun the same Telegram package lane against the published npm package.
|
||||
|
||||
@@ -46,6 +46,11 @@ When debugging real providers/models (requires real creds):
|
||||
|
||||
- Live suite (models + gateway tool/image probes): `pnpm test:live`
|
||||
- Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts`
|
||||
- Runtime performance reports: dispatch `OpenClaw Performance` with
|
||||
`live_gpt54=true` for a real `openai/gpt-5.4` agent turn or
|
||||
`deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs
|
||||
publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to
|
||||
`openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured.
|
||||
- Docker live model sweep: `pnpm test:docker:live-models`
|
||||
- Each selected model now runs a text turn plus a small file-read-style probe.
|
||||
Models whose metadata advertises `image` input also run a tiny image turn.
|
||||
|
||||
@@ -1428,6 +1428,7 @@
|
||||
"moltbot:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
|
||||
"openclaw": "node scripts/run-node.mjs",
|
||||
"openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json",
|
||||
"perf:kova:summary": "node scripts/kova-ci-summary.mjs",
|
||||
"plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check",
|
||||
"plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write",
|
||||
"plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check",
|
||||
|
||||
216
scripts/kova-ci-summary.mjs
Normal file
216
scripts/kova-ci-summary.mjs
Normal file
@@ -0,0 +1,216 @@
|
||||
#!/usr/bin/env node
|
||||
import { readFile, writeFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
if (!args.report) {
|
||||
usage("missing --report");
|
||||
}
|
||||
|
||||
const keyMetricIds = [
|
||||
"timeToHealthReadyMs",
|
||||
"timeToListeningMs",
|
||||
"healthP95Ms",
|
||||
"peakRssMb",
|
||||
"resourcePeakGatewayRssMb",
|
||||
"cpuPercentMax",
|
||||
"openclawEventLoopMaxMs",
|
||||
"agentTurnP95Ms",
|
||||
"coldAgentTurnMs",
|
||||
"warmAgentTurnMs",
|
||||
"agentPreProviderP95Ms",
|
||||
"agentProviderFinalP95Ms",
|
||||
"agentCleanupP95Ms",
|
||||
"runtimeDepsStagingMs",
|
||||
];
|
||||
|
||||
const reportPath = path.resolve(args.report);
|
||||
const report = JSON.parse(await readFile(reportPath, "utf8"));
|
||||
const markdown = renderSummary(report, {
|
||||
lane: args.lane || "kova",
|
||||
reportUrl: args.reportUrl || "",
|
||||
artifactUrl: args.artifactUrl || "",
|
||||
});
|
||||
|
||||
if (args.output) {
|
||||
await writeFile(path.resolve(args.output), markdown, "utf8");
|
||||
} else {
|
||||
process.stdout.write(markdown);
|
||||
}
|
||||
|
||||
function renderSummary(report, options) {
|
||||
const lines = [];
|
||||
const statuses = report.summary?.statuses || {};
|
||||
const statusText =
|
||||
Object.entries(statuses)
|
||||
.map(([status, count]) => `${status}: ${value(count)}`)
|
||||
.join(", ") || "unknown";
|
||||
|
||||
lines.push(`# OpenClaw Performance Report`);
|
||||
lines.push("");
|
||||
lines.push(`- Lane: ${options.lane}`);
|
||||
lines.push(`- Run: ${value(report.runId)}`);
|
||||
lines.push(`- Generated: ${value(report.generatedAt)}`);
|
||||
lines.push(`- Target: ${value(report.target)}`);
|
||||
lines.push(`- Statuses: ${statusText}`);
|
||||
lines.push(`- Repeat: ${value(report.performance?.repeat)}`);
|
||||
if (options.reportUrl) {
|
||||
lines.push(`- Published report: ${options.reportUrl}`);
|
||||
}
|
||||
if (options.artifactUrl) {
|
||||
lines.push(`- GitHub artifact: ${options.artifactUrl}`);
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
const groups = Array.isArray(report.performance?.groups) ? report.performance.groups : [];
|
||||
if (groups.length > 0) {
|
||||
lines.push("## Key metrics");
|
||||
lines.push("");
|
||||
lines.push("| Scenario | State | Metric | Median | p95 | Max |");
|
||||
lines.push("| --- | --- | --- | ---: | ---: | ---: |");
|
||||
for (const group of groups) {
|
||||
for (const metricId of keyMetricIds) {
|
||||
const metric = group.metrics?.[metricId];
|
||||
if (!metric || metric.count === 0) {
|
||||
continue;
|
||||
}
|
||||
lines.push(
|
||||
[
|
||||
value(group.scenario),
|
||||
value(group.state),
|
||||
value(metric.title || metricId),
|
||||
formatMetric(metric.median, metric.unit),
|
||||
formatMetric(metric.p95, metric.unit),
|
||||
formatMetric(metric.max, metric.unit),
|
||||
]
|
||||
.join(" | ")
|
||||
.replace(/^/, "| ")
|
||||
.replace(/$/, " |"),
|
||||
);
|
||||
}
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
const violations = collectViolations(report.records);
|
||||
if (violations.length > 0) {
|
||||
lines.push("## Threshold violations");
|
||||
lines.push("");
|
||||
lines.push("| Scenario | State | Metric | Actual | Threshold |");
|
||||
lines.push("| --- | --- | --- | ---: | ---: |");
|
||||
for (const item of violations.slice(0, 20)) {
|
||||
lines.push(
|
||||
[
|
||||
item.scenario,
|
||||
item.state,
|
||||
item.metric,
|
||||
formatMetric(item.actual, item.unit),
|
||||
formatMetric(item.threshold, item.unit),
|
||||
]
|
||||
.join(" | ")
|
||||
.replace(/^/, "| ")
|
||||
.replace(/$/, " |"),
|
||||
);
|
||||
}
|
||||
if (violations.length > 20) {
|
||||
lines.push("");
|
||||
lines.push(`_Only first 20 of ${violations.length} violations shown._`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
const records = Array.isArray(report.records) ? report.records : [];
|
||||
if (records.length > 0) {
|
||||
lines.push("## Records");
|
||||
lines.push("");
|
||||
lines.push("| Scenario | State | Status | Failure |");
|
||||
lines.push("| --- | --- | --- | --- |");
|
||||
for (const record of records.slice(0, 30)) {
|
||||
lines.push(
|
||||
[
|
||||
value(record.scenario),
|
||||
value(record.state?.id ?? record.state),
|
||||
value(record.status),
|
||||
value(record.failureReason || record.error?.message || ""),
|
||||
]
|
||||
.join(" | ")
|
||||
.replace(/^/, "| ")
|
||||
.replace(/$/, " |"),
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
return `${lines.join("\n").trimEnd()}\n`;
|
||||
}
|
||||
|
||||
function collectViolations(records) {
|
||||
if (!Array.isArray(records)) {
|
||||
return [];
|
||||
}
|
||||
return records.flatMap((record) => {
|
||||
if (!Array.isArray(record.violations)) {
|
||||
return [];
|
||||
}
|
||||
return record.violations.map((violation) => ({
|
||||
scenario: value(record.scenario),
|
||||
state: value(record.state?.id ?? record.state),
|
||||
metric: value(violation.metric || violation.id || violation.name),
|
||||
actual: violation.actual ?? violation.value,
|
||||
threshold: violation.threshold ?? violation.max ?? violation.expected,
|
||||
unit: violation.unit,
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
function formatMetric(valueToFormat, unit) {
|
||||
if (valueToFormat === null || valueToFormat === undefined || Number.isNaN(valueToFormat)) {
|
||||
return "";
|
||||
}
|
||||
const numeric = Number(valueToFormat);
|
||||
const rendered = Number.isFinite(numeric)
|
||||
? numeric.toLocaleString("en-US", { maximumFractionDigits: numeric >= 100 ? 0 : 1 })
|
||||
: String(valueToFormat);
|
||||
return unit ? `${rendered} ${unit}` : rendered;
|
||||
}
|
||||
|
||||
function value(input) {
|
||||
if (input === null || input === undefined) {
|
||||
return "";
|
||||
}
|
||||
return String(input).replaceAll("|", "\\|").replaceAll("\n", " ");
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const parsed = {};
|
||||
for (let index = 0; index < argv.length; index += 1) {
|
||||
const arg = argv[index];
|
||||
if (!arg.startsWith("--")) {
|
||||
usage(`unexpected argument: ${arg}`);
|
||||
}
|
||||
const key = arg.slice(2).replaceAll("-", "");
|
||||
const value = argv[index + 1];
|
||||
if (!value || value.startsWith("--")) {
|
||||
usage(`${arg} requires a value`);
|
||||
}
|
||||
parsed[key] = value;
|
||||
index += 1;
|
||||
}
|
||||
return {
|
||||
report: parsed.report,
|
||||
output: parsed.output,
|
||||
lane: parsed.lane,
|
||||
reportUrl: parsed.reporturl,
|
||||
artifactUrl: parsed.artifacturl,
|
||||
};
|
||||
}
|
||||
|
||||
function usage(message) {
|
||||
if (message) {
|
||||
console.error(`error: ${message}`);
|
||||
}
|
||||
console.error(
|
||||
"usage: node scripts/kova-ci-summary.mjs --report <report.json> [--output <summary.md>] [--lane <name>]",
|
||||
);
|
||||
process.exit(2);
|
||||
}
|
||||
Reference in New Issue
Block a user