From f1edd601bc28e6aba2a49b17db1bbee4d7fe30b6 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 00:05:31 +0100 Subject: [PATCH] ci: split release qa parity lanes --- .agents/skills/openclaw-testing/SKILL.md | 6 ++ .github/workflows/openclaw-release-checks.yml | 99 +++++++++++++++---- docs/ci.md | 4 +- 3 files changed, 91 insertions(+), 18 deletions(-) diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index 7098320cbf4..5d178d3ece4 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -204,6 +204,12 @@ gh workflow run openclaw-release-checks.yml \ Release-check rerun groups are `all`, `install-smoke`, `cross-os`, `live-e2e`, `package`, `qa`, `qa-parity`, and `qa-live`. +The release QA parity box is internally split into candidate and baseline lane +jobs, followed by a report job that downloads both artifacts and runs +`pnpm openclaw qa parity-report`. For parity failures, inspect the failed lane +first; inspect the report job when both lane summaries exist but the comparison +fails. + ### QA Lab Matrix Profiles `pnpm openclaw qa matrix` defaults to `--profile all`. Do not assume the CLI diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index b09006deadc..6374a99c568 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -302,14 +302,22 @@ jobs: OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }} OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }} - qa_lab_parity_release_checks: - name: Run QA Lab parity gate + qa_lab_parity_lane_release_checks: + name: Run QA Lab parity lane (${{ matrix.lane }}) needs: [resolve_target] if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group) runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 30 permissions: contents: read + strategy: + fail-fast: false + matrix: + include: + - lane: candidate + output_dir: gpt54 + - lane: baseline + output_dir: opus46 env: QA_PARITY_CONCURRENCY: "1" OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000" @@ -338,25 +346,80 @@ jobs: - name: Build private QA runtime run: pnpm build - - name: Run OpenAI candidate lane + - name: Run parity lane + env: + QA_PARITY_LANE: ${{ matrix.lane }} + QA_PARITY_OUTPUT_DIR: ${{ matrix.output_dir }} run: | - pnpm openclaw qa suite \ - --provider-mode mock-openai \ - --parity-pack agentic \ - --concurrency "${QA_PARITY_CONCURRENCY}" \ - --model "${OPENCLAW_CI_OPENAI_MODEL}" \ - --alt-model openai/gpt-5.4-alt \ - --output-dir .artifacts/qa-e2e/gpt54 + set -euo pipefail + + case "${QA_PARITY_LANE}" in + candidate) + model="${OPENCLAW_CI_OPENAI_MODEL}" + alt_model="openai/gpt-5.4-alt" + ;; + baseline) + model="anthropic/claude-opus-4-6" + alt_model="anthropic/claude-sonnet-4-6" + ;; + *) + echo "Unknown QA parity lane: ${QA_PARITY_LANE}" >&2 + exit 1 + ;; + esac - - name: Run Opus 4.6 lane - run: | pnpm openclaw qa suite \ --provider-mode mock-openai \ --parity-pack agentic \ --concurrency "${QA_PARITY_CONCURRENCY}" \ - --model anthropic/claude-opus-4-6 \ - --alt-model anthropic/claude-sonnet-4-6 \ - --output-dir .artifacts/qa-e2e/opus46 + --model "${model}" \ + --alt-model "${alt_model}" \ + --output-dir ".artifacts/qa-e2e/${QA_PARITY_OUTPUT_DIR}" + + - name: Upload parity lane artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-qa-parity-${{ matrix.lane }}-${{ needs.resolve_target.outputs.sha }} + path: .artifacts/qa-e2e/ + retention-days: 14 + if-no-files-found: warn + + qa_lab_parity_report_release_checks: + name: Run QA Lab parity report + needs: [resolve_target, qa_lab_parity_lane_release_checks] + if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group) + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 20 + permissions: + contents: read + actions: read + env: + OPENCLAW_BUILD_PRIVATE_QA: "1" + OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1" + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.resolve_target.outputs.ref }} + fetch-depth: 1 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Download parity lane artifacts + uses: actions/download-artifact@v4 + with: + pattern: release-qa-parity-*-${{ needs.resolve_target.outputs.sha }} + path: .artifacts/qa-e2e/ + merge-multiple: true + + - name: Build private QA runtime + run: pnpm build - name: Generate parity report run: | @@ -548,7 +611,8 @@ jobs: - cross_os_release_checks - live_and_e2e_release_checks - package_acceptance_release_checks - - qa_lab_parity_release_checks + - qa_lab_parity_lane_release_checks + - qa_lab_parity_report_release_checks - qa_live_matrix_release_checks - qa_live_telegram_release_checks if: always() @@ -566,7 +630,8 @@ jobs: "cross_os_release_checks=${{ needs.cross_os_release_checks.result }}" \ "live_and_e2e_release_checks=${{ needs.live_and_e2e_release_checks.result }}" \ "package_acceptance_release_checks=${{ needs.package_acceptance_release_checks.result }}" \ - "qa_lab_parity_release_checks=${{ needs.qa_lab_parity_release_checks.result }}" \ + "qa_lab_parity_lane_release_checks=${{ needs.qa_lab_parity_lane_release_checks.result }}" \ + "qa_lab_parity_report_release_checks=${{ needs.qa_lab_parity_report_release_checks.result }}" \ "qa_live_matrix_release_checks=${{ needs.qa_live_matrix_release_checks.result }}" \ "qa_live_telegram_release_checks=${{ needs.qa_live_telegram_release_checks.result }}" do diff --git a/docs/ci.md b/docs/ci.md index 4a197ebad8d..ad1948096d0 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -197,7 +197,9 @@ when the checked-out CLI supports it. The CLI default and manual workflow input remain `all`; manual `matrix_profile=all` dispatch always shards full Matrix coverage into `transport`, `media`, `e2ee-smoke`, `e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` also -runs the release-critical QA Lab lanes before release approval. +runs the release-critical QA Lab lanes before release approval; its QA parity +gate runs the candidate and baseline packs as parallel lane jobs, then downloads +both artifacts into a small report job for the final parity comparison. The `Duplicate PRs After Merge` workflow is a manual maintainer workflow for post-land duplicate cleanup. It defaults to dry-run and only closes explicitly