ci: add test performance agent

This commit is contained in:
Peter Steinberger
2026-04-23 20:40:42 +01:00
parent 6fc8913223
commit 360cb3dbf1
4 changed files with 330 additions and 0 deletions

View File

@@ -0,0 +1,41 @@
# OpenClaw Test Performance Agent
You are maintaining OpenClaw test performance after a trusted main-branch CI run.
Goal: inspect the full-suite test performance report, then make small, coverage-preserving improvements to slow tests when the fix is clear. If the baseline report shows failing tests and the fix is obvious, fix those too.
Inputs:
- Baseline grouped report: `.artifacts/test-perf/baseline-before.json`
- Per-config Vitest JSON reports: `.artifacts/test-perf/baseline-before/vitest-json/`
- Per-config logs: `.artifacts/test-perf/baseline-before/logs/`
Hard limits:
- Preserve test coverage and behavioral intent.
- Do not delete, skip, weaken, or narrow test cases to make the suite faster.
- Do not add `test.skip`, `it.skip`, `describe.skip`, `test.only`, `it.only`, or `describe.only`.
- Do not update snapshots, generated baselines, inventories, ignore files, lockfiles, package metadata, CI workflows, or release metadata.
- Do not add dependencies.
- Do not create, delete, or rename files.
- Keep changes minimal and focused on the slow or failing tests you can justify from the report.
- Prefer no edit when a performance improvement is speculative.
Good fixes:
- Replace broad partial module mocks, especially `importOriginal()` mocks, with narrow injected dependencies or local runtime seams.
- Avoid importing heavy barrels in hot tests when a narrow module or helper covers the same behavior.
- Move expensive setup from per-test hooks to shared setup only when state isolation remains correct.
- Reuse existing fixtures/builders instead of recreating expensive work per case.
- Mock expensive runtime boundaries directly: filesystem crawls, package registries, provider SDKs, network/process launch, browser/runtime scanners.
- Keep one integration smoke per boundary and test pure helpers directly, but only when the same behavior remains covered.
Required workflow:
1. Run `pnpm docs:list` if available, then read `docs/reference/test.md` and `docs/help/testing.md` sections about test performance.
2. Inspect `.artifacts/test-perf/baseline-before.json`; focus on the slowest files/configs or any failed configs.
3. Pick at most a few low-risk files. Explain the coverage-preserving reason in comments only if the code would otherwise be unclear.
4. Run targeted tests for changed files where possible. Use `pnpm test <path>` and optionally `pnpm test:perf:imports <path>`.
5. Leave the worktree clean if no safe improvement exists.
When uncertain, make no edit and explain the uncertainty in the final message.

View File

@@ -0,0 +1,274 @@
name: Test Performance Agent
on:
workflow_run: # zizmor: ignore[dangerous-triggers] main-only test optimization after trusted CI; job gates repository, event, branch, actor, conclusion, current main SHA, and daily cadence before using write token
workflows:
- CI
types:
- completed
workflow_dispatch:
permissions:
actions: read
contents: write
concurrency:
group: test-performance-agent-main
cancel-in-progress: false
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
TEST_PERF_BEFORE: .artifacts/test-perf/baseline-before.json
TEST_PERF_AFTER: .artifacts/test-perf/after-agent.json
TEST_PERF_COMPARE: .artifacts/test-perf/agent-compare.json
jobs:
optimize-tests:
if: >
github.repository == 'openclaw/openclaw' &&
(github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.event == 'push' &&
github.event.workflow_run.head_branch == 'main' &&
!endsWith(github.event.workflow_run.actor.login, '[bot]')))
runs-on: blacksmith-32vcpu-ubuntu-2404
timeout-minutes: 240
steps:
- name: Checkout
uses: actions/checkout@v6
with:
ref: main
fetch-depth: 0
persist-credentials: false
submodules: false
- name: Gate trusted main activity and daily cadence
id: gate
env:
EVENT_NAME: ${{ github.event_name }}
GH_TOKEN: ${{ github.token }}
WORKFLOW_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
run: |
set -euo pipefail
if [ "$EVENT_NAME" != "workflow_run" ]; then
echo "run_agent=true" >> "$GITHUB_OUTPUT"
echo "base_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
exit 0
fi
for attempt in 1 2 3 4 5; do
if git fetch --no-tags origin main; then
break
fi
if [ "$attempt" = "5" ]; then
echo "Failed to fetch main after retries." >&2
exit 1
fi
echo "Fetch attempt ${attempt} failed; retrying."
sleep $((attempt * 2))
done
remote_main="$(git rev-parse origin/main)"
if [ "$remote_main" != "$WORKFLOW_HEAD_SHA" ]; then
echo "CI run is superseded by ${remote_main}; skipping test performance agent for ${WORKFLOW_HEAD_SHA}."
echo "run_agent=false" >> "$GITHUB_OUTPUT"
exit 0
fi
day_start="$(date -u +%Y-%m-%dT00:00:00Z)"
runs_json="$RUNNER_TEMP/test-performance-agent-runs.json"
gh api "repos/${GITHUB_REPOSITORY}/actions/workflows/test-performance-agent.yml/runs" \
-f branch=main \
-f event=workflow_run \
-f per_page=50 > "$runs_json"
prior_runs="$(
jq -r \
--argjson current_run_id "$GITHUB_RUN_ID" \
--arg day_start "$day_start" \
'.workflow_runs[]
| select(.database_id != $current_run_id)
| select(.created_at >= $day_start)
| select(.status != "cancelled")
| select((.conclusion // "") != "skipped")
| [.database_id, .status, (.conclusion // ""), .created_at, .head_sha]
| @tsv' "$runs_json"
)"
if [ -n "$prior_runs" ]; then
echo "Test performance agent already ran or is running today; skipping."
printf '%s\n' "$prior_runs"
echo "run_agent=false" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "run_agent=true" >> "$GITHUB_OUTPUT"
echo "base_sha=${remote_main}" >> "$GITHUB_OUTPUT"
- name: Setup Node environment
if: steps.gate.outputs.run_agent == 'true'
uses: ./.github/actions/setup-node-env
with:
install-bun: "false"
- name: Ensure test performance agent key exists
if: steps.gate.outputs.run_agent == 'true'
env:
OPENAI_API_KEY: ${{ secrets.OPENCLAW_TEST_PERF_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
run: |
set -euo pipefail
if [ -z "${OPENAI_API_KEY:-}" ]; then
echo "Missing OPENCLAW_TEST_PERF_AGENT_OPENAI_API_KEY or OPENAI_API_KEY secret." >&2
exit 1
fi
- name: Build baseline full-suite performance report
if: steps.gate.outputs.run_agent == 'true'
run: pnpm test:perf:groups --full-suite --allow-failures --output "$TEST_PERF_BEFORE" --limit 20 --top-files 40
- name: Run Codex test performance agent
if: steps.gate.outputs.run_agent == 'true'
uses: openai/codex-action@v1
with:
openai-api-key: ${{ secrets.OPENCLAW_TEST_PERF_AGENT_OPENAI_API_KEY || secrets.OPENAI_API_KEY }}
prompt-file: .github/codex/prompts/test-performance-agent.md
model: gpt-5.4
effort: high
sandbox: workspace-write
safety-strategy: drop-sudo
codex-args: '["--full-auto"]'
- name: Enforce focused test performance patch
if: steps.gate.outputs.run_agent == 'true'
id: patch
run: |
set -euo pipefail
untracked="$(git ls-files --others --exclude-standard)"
if [ -n "$untracked" ]; then
echo "Test performance agent created untracked files; forbidden:"
printf '%s\n' "$untracked"
exit 1
fi
added_deleted_or_renamed="$(git diff --name-status --diff-filter=ADR)"
if [ -n "$added_deleted_or_renamed" ]; then
echo "Test performance agent added, deleted, or renamed tracked files; forbidden:"
printf '%s\n' "$added_deleted_or_renamed"
exit 1
fi
bad_paths="$(
git diff --name-only | while IFS= read -r path; do
case "$path" in
apps/*|extensions/*|packages/*|scripts/*|src/*|Swabble/*|test/*|ui/*) ;;
*) printf '%s\n' "$path" ;;
esac
done
)"
if [ -n "$bad_paths" ]; then
echo "Test performance agent touched forbidden paths:"
printf '%s\n' "$bad_paths"
exit 1
fi
if git diff --quiet; then
echo "has_changes=false" >> "$GITHUB_OUTPUT"
else
echo "has_changes=true" >> "$GITHUB_OUTPUT"
fi
- name: Restore Node 24 path
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
run: |
set -euo pipefail
export PATH="${NODE_BIN}:${PATH}"
echo "${NODE_BIN}" >> "$GITHUB_PATH"
node -v
corepack enable
pnpm -v
- name: Run full-suite performance report after agent changes
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
run: pnpm test:perf:groups --full-suite --output "$TEST_PERF_AFTER" --limit 20 --top-files 40
- name: Compare test performance reports
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
run: pnpm test:perf:groups:compare "$TEST_PERF_BEFORE" "$TEST_PERF_AFTER" --output "$TEST_PERF_COMPARE" --limit 20 --top-files 40
- name: Enforce coverage-preserving test count
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
run: |
set -euo pipefail
node <<'NODE'
const fs = require("node:fs");
const before = JSON.parse(fs.readFileSync(process.env.TEST_PERF_BEFORE, "utf8"));
const after = JSON.parse(fs.readFileSync(process.env.TEST_PERF_AFTER, "utf8"));
if (before.failed) {
console.log("Baseline had failing configs; skipping total test-count comparison against partial report.");
process.exit(0);
}
const beforeTests = before.totals?.testCount ?? 0;
const afterTests = after.totals?.testCount ?? 0;
if (afterTests < beforeTests) {
console.error(`Test count decreased from ${beforeTests} to ${afterTests}; refusing coverage-reducing patch.`);
process.exit(1);
}
console.log(`Test count preserved: ${beforeTests} -> ${afterTests}.`);
NODE
- name: Check changed lanes
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
run: pnpm check:changed
- name: Commit test performance updates
if: steps.gate.outputs.run_agent == 'true' && steps.patch.outputs.has_changes == 'true'
env:
BASE_SHA: ${{ steps.gate.outputs.base_sha }}
GITHUB_TOKEN: ${{ github.token }}
TARGET_BRANCH: main
run: |
set -euo pipefail
if git diff --quiet; then
echo "No test performance changes."
exit 0
fi
git config user.name "openclaw-test-performance-agent[bot]"
git config user.email "openclaw-test-performance-agent[bot]@users.noreply.github.com"
git add apps extensions packages scripts src Swabble test ui
git commit --no-verify -m "test: optimize slow tests"
for attempt in 1 2 3 4 5; do
if ! git fetch --no-tags origin "${TARGET_BRANCH}"; then
echo "Fetch attempt ${attempt} failed; retrying."
sleep $((attempt * 2))
continue
fi
if git push "https://x-access-token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" HEAD:"${TARGET_BRANCH}"; then
exit 0
fi
remote_main="$(git rev-parse "origin/${TARGET_BRANCH}")"
if [ "$remote_main" != "$BASE_SHA" ]; then
echo "main advanced from ${BASE_SHA} to ${remote_main}; skipping stale test performance update."
exit 0
fi
echo "Test performance update attempt ${attempt} failed; retrying."
sleep $((attempt * 2))
done
echo "Failed to push test performance updates after retries." >&2
exit 1
- name: Upload test performance artifacts
if: steps.gate.outputs.run_agent == 'true' && always()
uses: actions/upload-artifact@v7
with:
name: test-performance-agent-${{ github.run_id }}
path: .artifacts/test-perf/
if-no-files-found: ignore
retention-days: 14