diff --git a/.github/workflows/openclaw-performance.yml b/.github/workflows/openclaw-performance.yml new file mode 100644 index 00000000000..3faf2a42c66 --- /dev/null +++ b/.github/workflows/openclaw-performance.yml @@ -0,0 +1,335 @@ +name: OpenClaw Performance + +on: + schedule: + - cron: "11 5 * * *" + workflow_dispatch: + inputs: + profile: + description: Kova profile to run + required: false + default: diagnostic + type: choice + options: + - smoke + - diagnostic + - soak + - release + repeat: + description: Repeat count for non-profiled Kova runs + required: false + default: "3" + type: string + deep_profile: + description: Run the deep-profile lane with CPU/heap/trace artifacts + required: false + default: false + type: boolean + live_gpt54: + description: Run the live OpenAI GPT 5.4 agent-turn lane + required: false + default: false + type: boolean + fail_on_regression: + description: Fail the workflow when Kova exits non-zero + required: false + default: false + type: boolean + kova_ref: + description: Kova Git ref to install + required: false + default: 51947110f5cacb6ab2c0947594ea9628031c9fcf + type: string + +permissions: + contents: read + +concurrency: + group: ${{ github.event_name == 'workflow_dispatch' && format('{0}-{1}', github.workflow, github.run_id) || format('{0}-{1}', github.workflow, github.ref) }} + cancel-in-progress: false + +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + OCM_VERSION: v0.2.15 + PERFORMANCE_MODEL_ID: gpt-5.4 + CLAWGRIT_REPORTS_TOKEN_PRESENT: ${{ secrets.CLAWGRIT_REPORTS_TOKEN != '' && 'true' || 'false' }} + +jobs: + kova: + name: ${{ matrix.title }} + runs-on: blacksmith-16vcpu-ubuntu-2404 + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + include: + - lane: mock-provider + title: Kova mock provider performance + auth: mock + repeat: input + deep_profile: "false" + live: "false" + include_filters: "scenario:fresh-install scenario:gateway-performance scenario:bundled-plugin-startup scenario:bundled-runtime-deps scenario:agent-cold-warm-message" + - lane: mock-deep-profile + title: Kova mock provider deep profile + auth: mock + repeat: "1" + deep_profile: "true" + live: "false" + include_filters: "scenario:fresh-install scenario:gateway-performance scenario:agent-cold-warm-message" + - lane: live-gpt54 + title: Kova live OpenAI GPT 5.4 agent turn + auth: live + repeat: "1" + deep_profile: "false" + live: "true" + include_filters: "scenario:agent-cold-warm-message" + env: + KOVA_REF: ${{ inputs.kova_ref || '51947110f5cacb6ab2c0947594ea9628031c9fcf' }} + KOVA_HOME: ${{ github.workspace }}/.artifacts/kova/home/${{ matrix.lane }} + REPORT_DIR: ${{ github.workspace }}/.artifacts/kova/reports/${{ matrix.lane }} + BUNDLE_DIR: ${{ github.workspace }}/.artifacts/kova/bundles/${{ matrix.lane }} + SUMMARY_DIR: ${{ github.workspace }}/.artifacts/kova/summaries + LANE_ID: ${{ matrix.lane }} + PROFILE: ${{ inputs.profile || 'diagnostic' }} + REQUESTED_REPEAT: ${{ inputs.repeat || '3' }} + FAIL_ON_REGRESSION: ${{ inputs.fail_on_regression || 'false' }} + INCLUDE_FILTERS: ${{ matrix.include_filters }} + AUTH_MODE: ${{ matrix.auth }} + MATRIX_REPEAT: ${{ matrix.repeat }} + MATRIX_DEEP_PROFILE: ${{ matrix.deep_profile }} + MATRIX_LIVE: ${{ matrix.live }} + steps: + - name: Decide lane + id: lane + shell: bash + run: | + set -euo pipefail + run_lane=true + reason="" + if [[ "$LANE_ID" == "mock-deep-profile" && "${{ github.event_name }}" != "schedule" && "${{ inputs.deep_profile || 'false' }}" != "true" ]]; then + run_lane=false + reason="deep_profile input is false" + fi + if [[ "$LANE_ID" == "live-gpt54" && "${{ github.event_name }}" != "schedule" && "${{ inputs.live_gpt54 || 'false' }}" != "true" ]]; then + run_lane=false + reason="live_gpt54 input is false" + fi + echo "run=$run_lane" >> "$GITHUB_OUTPUT" + if [[ "$run_lane" != "true" ]]; then + echo "Skipping ${LANE_ID}: ${reason}" >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Checkout OpenClaw + if: steps.lane.outputs.run == 'true' + uses: actions/checkout@v6 + with: + fetch-depth: 1 + persist-credentials: false + + - name: Set up Node environment + if: steps.lane.outputs.run == 'true' + uses: ./.github/actions/setup-node-env + with: + install-bun: "false" + + - name: Install OCM and Kova + if: steps.lane.outputs.run == 'true' + shell: bash + run: | + set -euo pipefail + KOVA_SRC="${RUNNER_TEMP}/kova-src" + echo "KOVA_SRC=$KOVA_SRC" >> "$GITHUB_ENV" + mkdir -p "$HOME/.local/bin" "$(dirname "$KOVA_SRC")" + curl -fsSL https://raw.githubusercontent.com/shakkernerd/ocm/main/install.sh \ + | bash -s -- --version "$OCM_VERSION" --prefix "$HOME/.local" --force + git clone --filter=blob:none https://github.com/shakkernerd/Kova.git "$KOVA_SRC" + git -C "$KOVA_SRC" checkout "$KOVA_REF" + cat > "$HOME/.local/bin/kova" <> "$GITHUB_PATH" + + - name: Pin Kova OpenAI model to GPT 5.4 + if: steps.lane.outputs.run == 'true' + shell: bash + run: | + set -euo pipefail + node - <<'NODE' + const fs = require("node:fs"); + const path = require("node:path"); + const root = process.env.KOVA_SRC; + const files = [ + "support/configure-openclaw-mock-auth.mjs", + "support/configure-openclaw-live-auth.mjs", + "support/mock-openai-server.mjs", + "states/mock-openai-provider.json" + ]; + for (const rel of files) { + const file = path.join(root, rel); + const before = fs.readFileSync(file, "utf8"); + const after = before.replaceAll("gpt-5.5", process.env.PERFORMANCE_MODEL_ID); + fs.writeFileSync(file, after, "utf8"); + } + NODE + + - name: Kova self-check + if: steps.lane.outputs.run == 'true' + run: kova self-check --json + + - name: Configure live OpenAI auth + if: ${{ steps.lane.outputs.run == 'true' && matrix.live == 'true' }} + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + shell: bash + run: | + set -euo pipefail + if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "OPENAI_API_KEY is not configured; live GPT 5.4 lane will be skipped." >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + kova setup --ci --json + kova setup --non-interactive --auth env-only --provider openai --env-var OPENAI_API_KEY --json + + - name: Run Kova + id: kova + if: steps.lane.outputs.run == 'true' + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + shell: bash + run: | + set -euo pipefail + mkdir -p "$REPORT_DIR" "$BUNDLE_DIR" "$SUMMARY_DIR" + + if [[ "$MATRIX_LIVE" == "true" && -z "${OPENAI_API_KEY:-}" ]]; then + echo "skipped=true" >> "$GITHUB_OUTPUT" + exit 0 + fi + + repeat="$REQUESTED_REPEAT" + if [[ "$MATRIX_REPEAT" != "input" ]]; then + repeat="$MATRIX_REPEAT" + fi + + args=( + matrix run + --profile "$PROFILE" + --target "local-build:${GITHUB_WORKSPACE}" + --auth "$AUTH_MODE" + --parallel 1 + --repeat "$repeat" + --report-dir "$REPORT_DIR" + --execute + --json + ) + + for filter in $INCLUDE_FILTERS; do + args+=(--include "$filter") + done + + if [[ "$MATRIX_DEEP_PROFILE" == "true" ]]; then + args+=(--deep-profile) + fi + if [[ "$FAIL_ON_REGRESSION" == "true" ]]; then + args+=(--gate) + fi + + log_path="$REPORT_DIR/${LANE_ID}.log" + set +e + kova "${args[@]}" 2>&1 | tee "$log_path" + status=${PIPESTATUS[0]} + set -e + + report_json="$(find "$REPORT_DIR" -maxdepth 1 -type f -name '*.json' -print | sort | tail -n 1)" + if [[ -z "$report_json" ]]; then + echo "Kova did not write a JSON report." >&2 + exit 1 + fi + report_md="${report_json%.json}.md" + echo "status=$status" >> "$GITHUB_OUTPUT" + echo "report_json=$report_json" >> "$GITHUB_OUTPUT" + echo "report_md=$report_md" >> "$GITHUB_OUTPUT" + + kova report bundle "$report_json" --output-dir "$BUNDLE_DIR" --json | tee "$BUNDLE_DIR/bundle.json" + + ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')" + run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + report_url="" + if [[ "$CLAWGRIT_REPORTS_TOKEN_PRESENT" == "true" ]]; then + report_url="https://github.com/openclaw/clawgrit-reports/tree/main/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}" + fi + summary_path="$SUMMARY_DIR/${LANE_ID}.md" + node scripts/kova-ci-summary.mjs --report "$report_json" --output "$summary_path" --lane "$LANE_ID" --report-url "$report_url" + cat "$summary_path" >> "$GITHUB_STEP_SUMMARY" + + if [[ "$FAIL_ON_REGRESSION" == "true" && "$status" != "0" ]]; then + exit "$status" + fi + + - name: Upload Kova artifacts + if: ${{ always() && steps.lane.outputs.run == 'true' }} + uses: actions/upload-artifact@v5 + with: + name: openclaw-performance-${{ matrix.lane }}-${{ github.run_id }}-${{ github.run_attempt }} + path: | + .artifacts/kova/reports/${{ matrix.lane }} + .artifacts/kova/bundles/${{ matrix.lane }} + .artifacts/kova/summaries/${{ matrix.lane }}.md + if-no-files-found: ignore + retention-days: ${{ matrix.deep_profile == 'true' && 14 || 30 }} + + - name: Checkout clawgrit reports + if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }} + uses: actions/checkout@v6 + with: + repository: openclaw/clawgrit-reports + path: .artifacts/clawgrit-reports + token: ${{ secrets.CLAWGRIT_REPORTS_TOKEN }} + persist-credentials: true + + - name: Publish to clawgrit reports + if: ${{ steps.kova.outputs.report_json != '' && env.CLAWGRIT_REPORTS_TOKEN_PRESENT == 'true' }} + shell: bash + run: | + set -euo pipefail + reports_root=".artifacts/clawgrit-reports" + ref_slug="$(printf '%s' "${GITHUB_REF_NAME}" | tr -c 'A-Za-z0-9._-' '-')" + run_slug="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + dest="${reports_root}/openclaw-performance/${ref_slug}/${run_slug}/${LANE_ID}" + mkdir -p "$dest" + cp "${{ steps.kova.outputs.report_json }}" "$dest/report.json" + if [[ -f "${{ steps.kova.outputs.report_md }}" ]]; then + cp "${{ steps.kova.outputs.report_md }}" "$dest/report.md" + fi + cp "$SUMMARY_DIR/${LANE_ID}.md" "$dest/index.md" + if [[ -d "$BUNDLE_DIR" ]]; then + mkdir -p "$dest/bundles" + cp -R "$BUNDLE_DIR"/. "$dest/bundles/" + fi + cat > "${reports_root}/openclaw-performance/${ref_slug}/latest-${LANE_ID}.json" </-//`. The current branch pointer is written as `openclaw-performance//latest-.json`. + ## Full Release Validation `Full Release Validation` is the manual umbrella workflow for "run everything before release." It accepts a branch, tag, or full commit SHA, dispatches the manual `CI` workflow with that target, dispatches `Plugin Prerelease` for release-only plugin/package/static/Docker proof, and dispatches `OpenClaw Release Checks` for install smoke, package acceptance, Docker release-path suites, live/E2E, OpenWebUI, QA Lab parity, Matrix, and Telegram lanes. With `rerun_group=all` and `release_profile=full`, it also runs `NPM Telegram Beta E2E` against the `release-package-under-test` artifact from release checks. After publishing, pass `npm_telegram_package_spec` to rerun the same Telegram package lane against the published npm package. diff --git a/docs/help/testing.md b/docs/help/testing.md index eb8d0642c88..2fe19bf8506 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -46,6 +46,11 @@ When debugging real providers/models (requires real creds): - Live suite (models + gateway tool/image probes): `pnpm test:live` - Target one live file quietly: `pnpm test:live -- src/agents/models.profiles.live.test.ts` +- Runtime performance reports: dispatch `OpenClaw Performance` with + `live_gpt54=true` for a real `openai/gpt-5.4` agent turn or + `deep_profile=true` for Kova CPU/heap/trace artifacts. Daily scheduled runs + publish mock-provider, deep-profile, and GPT 5.4 lane artifacts to + `openclaw/clawgrit-reports` when `CLAWGRIT_REPORTS_TOKEN` is configured. - Docker live model sweep: `pnpm test:docker:live-models` - Each selected model now runs a text turn plus a small file-read-style probe. Models whose metadata advertises `image` input also run a tiny image turn. diff --git a/package.json b/package.json index 695d4ec8ec0..7e4c1d18ddf 100644 --- a/package.json +++ b/package.json @@ -1428,6 +1428,7 @@ "moltbot:rpc": "node scripts/run-node.mjs agent --mode rpc --json", "openclaw": "node scripts/run-node.mjs", "openclaw:rpc": "node scripts/run-node.mjs agent --mode rpc --json", + "perf:kova:summary": "node scripts/kova-ci-summary.mjs", "plugin-sdk:api:check": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --check", "plugin-sdk:api:gen": "node --import tsx scripts/generate-plugin-sdk-api-baseline.ts --write", "plugin-sdk:check-exports": "node scripts/sync-plugin-sdk-exports.mjs --check", diff --git a/scripts/kova-ci-summary.mjs b/scripts/kova-ci-summary.mjs new file mode 100644 index 00000000000..317713128b2 --- /dev/null +++ b/scripts/kova-ci-summary.mjs @@ -0,0 +1,216 @@ +#!/usr/bin/env node +import { readFile, writeFile } from "node:fs/promises"; +import path from "node:path"; + +const args = parseArgs(process.argv.slice(2)); +if (!args.report) { + usage("missing --report"); +} + +const keyMetricIds = [ + "timeToHealthReadyMs", + "timeToListeningMs", + "healthP95Ms", + "peakRssMb", + "resourcePeakGatewayRssMb", + "cpuPercentMax", + "openclawEventLoopMaxMs", + "agentTurnP95Ms", + "coldAgentTurnMs", + "warmAgentTurnMs", + "agentPreProviderP95Ms", + "agentProviderFinalP95Ms", + "agentCleanupP95Ms", + "runtimeDepsStagingMs", +]; + +const reportPath = path.resolve(args.report); +const report = JSON.parse(await readFile(reportPath, "utf8")); +const markdown = renderSummary(report, { + lane: args.lane || "kova", + reportUrl: args.reportUrl || "", + artifactUrl: args.artifactUrl || "", +}); + +if (args.output) { + await writeFile(path.resolve(args.output), markdown, "utf8"); +} else { + process.stdout.write(markdown); +} + +function renderSummary(report, options) { + const lines = []; + const statuses = report.summary?.statuses || {}; + const statusText = + Object.entries(statuses) + .map(([status, count]) => `${status}: ${value(count)}`) + .join(", ") || "unknown"; + + lines.push(`# OpenClaw Performance Report`); + lines.push(""); + lines.push(`- Lane: ${options.lane}`); + lines.push(`- Run: ${value(report.runId)}`); + lines.push(`- Generated: ${value(report.generatedAt)}`); + lines.push(`- Target: ${value(report.target)}`); + lines.push(`- Statuses: ${statusText}`); + lines.push(`- Repeat: ${value(report.performance?.repeat)}`); + if (options.reportUrl) { + lines.push(`- Published report: ${options.reportUrl}`); + } + if (options.artifactUrl) { + lines.push(`- GitHub artifact: ${options.artifactUrl}`); + } + lines.push(""); + + const groups = Array.isArray(report.performance?.groups) ? report.performance.groups : []; + if (groups.length > 0) { + lines.push("## Key metrics"); + lines.push(""); + lines.push("| Scenario | State | Metric | Median | p95 | Max |"); + lines.push("| --- | --- | --- | ---: | ---: | ---: |"); + for (const group of groups) { + for (const metricId of keyMetricIds) { + const metric = group.metrics?.[metricId]; + if (!metric || metric.count === 0) { + continue; + } + lines.push( + [ + value(group.scenario), + value(group.state), + value(metric.title || metricId), + formatMetric(metric.median, metric.unit), + formatMetric(metric.p95, metric.unit), + formatMetric(metric.max, metric.unit), + ] + .join(" | ") + .replace(/^/, "| ") + .replace(/$/, " |"), + ); + } + } + lines.push(""); + } + + const violations = collectViolations(report.records); + if (violations.length > 0) { + lines.push("## Threshold violations"); + lines.push(""); + lines.push("| Scenario | State | Metric | Actual | Threshold |"); + lines.push("| --- | --- | --- | ---: | ---: |"); + for (const item of violations.slice(0, 20)) { + lines.push( + [ + item.scenario, + item.state, + item.metric, + formatMetric(item.actual, item.unit), + formatMetric(item.threshold, item.unit), + ] + .join(" | ") + .replace(/^/, "| ") + .replace(/$/, " |"), + ); + } + if (violations.length > 20) { + lines.push(""); + lines.push(`_Only first 20 of ${violations.length} violations shown._`); + } + lines.push(""); + } + + const records = Array.isArray(report.records) ? report.records : []; + if (records.length > 0) { + lines.push("## Records"); + lines.push(""); + lines.push("| Scenario | State | Status | Failure |"); + lines.push("| --- | --- | --- | --- |"); + for (const record of records.slice(0, 30)) { + lines.push( + [ + value(record.scenario), + value(record.state?.id ?? record.state), + value(record.status), + value(record.failureReason || record.error?.message || ""), + ] + .join(" | ") + .replace(/^/, "| ") + .replace(/$/, " |"), + ); + } + lines.push(""); + } + + return `${lines.join("\n").trimEnd()}\n`; +} + +function collectViolations(records) { + if (!Array.isArray(records)) { + return []; + } + return records.flatMap((record) => { + if (!Array.isArray(record.violations)) { + return []; + } + return record.violations.map((violation) => ({ + scenario: value(record.scenario), + state: value(record.state?.id ?? record.state), + metric: value(violation.metric || violation.id || violation.name), + actual: violation.actual ?? violation.value, + threshold: violation.threshold ?? violation.max ?? violation.expected, + unit: violation.unit, + })); + }); +} + +function formatMetric(valueToFormat, unit) { + if (valueToFormat === null || valueToFormat === undefined || Number.isNaN(valueToFormat)) { + return ""; + } + const numeric = Number(valueToFormat); + const rendered = Number.isFinite(numeric) + ? numeric.toLocaleString("en-US", { maximumFractionDigits: numeric >= 100 ? 0 : 1 }) + : String(valueToFormat); + return unit ? `${rendered} ${unit}` : rendered; +} + +function value(input) { + if (input === null || input === undefined) { + return ""; + } + return String(input).replaceAll("|", "\\|").replaceAll("\n", " "); +} + +function parseArgs(argv) { + const parsed = {}; + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (!arg.startsWith("--")) { + usage(`unexpected argument: ${arg}`); + } + const key = arg.slice(2).replaceAll("-", ""); + const value = argv[index + 1]; + if (!value || value.startsWith("--")) { + usage(`${arg} requires a value`); + } + parsed[key] = value; + index += 1; + } + return { + report: parsed.report, + output: parsed.output, + lane: parsed.lane, + reportUrl: parsed.reporturl, + artifactUrl: parsed.artifacturl, + }; +} + +function usage(message) { + if (message) { + console.error(`error: ${message}`); + } + console.error( + "usage: node scripts/kova-ci-summary.mjs --report [--output ] [--lane ]", + ); + process.exit(2); +}