From f1edd601bc28e6aba2a49b17db1bbee4d7fe30b6 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Tue, 28 Apr 2026 00:05:31 +0100
Subject: [PATCH] ci: split release qa parity lanes

---
 .agents/skills/openclaw-testing/SKILL.md      |  6 ++
 .github/workflows/openclaw-release-checks.yml | 99 +++++++++++++++----
 docs/ci.md                                    |  4 +-
 3 files changed, 91 insertions(+), 18 deletions(-)

diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md
index 7098320cbf4..5d178d3ece4 100644
--- a/.agents/skills/openclaw-testing/SKILL.md
+++ b/.agents/skills/openclaw-testing/SKILL.md
@@ -204,6 +204,12 @@ gh workflow run openclaw-release-checks.yml \
 Release-check rerun groups are `all`, `install-smoke`, `cross-os`, `live-e2e`,
 `package`, `qa`, `qa-parity`, and `qa-live`.
 
+The release QA parity box is internally split into candidate and baseline lane
+jobs, followed by a report job that downloads both artifacts and runs
+`pnpm openclaw qa parity-report`. For parity failures, inspect the failed lane
+first; inspect the report job when both lane summaries exist but the comparison
+fails.
+
 ### QA Lab Matrix Profiles
 
 `pnpm openclaw qa matrix` defaults to `--profile all`. Do not assume the CLI
diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml
index b09006deadc..6374a99c568 100644
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -302,14 +302,22 @@ jobs:
       OPENCLAW_QA_CONVEX_SITE_URL: ${{ secrets.OPENCLAW_QA_CONVEX_SITE_URL }}
       OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}
 
-  qa_lab_parity_release_checks:
-    name: Run QA Lab parity gate
+  qa_lab_parity_lane_release_checks:
+    name: Run QA Lab parity lane (${{ matrix.lane }})
     needs: [resolve_target]
     if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
     runs-on: blacksmith-32vcpu-ubuntu-2404
     timeout-minutes: 30
     permissions:
       contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - lane: candidate
+            output_dir: gpt54
+          - lane: baseline
+            output_dir: opus46
     env:
       QA_PARITY_CONCURRENCY: "1"
       OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
@@ -338,25 +346,80 @@ jobs:
       - name: Build private QA runtime
         run: pnpm build
 
-      - name: Run OpenAI candidate lane
+      - name: Run parity lane
+        env:
+          QA_PARITY_LANE: ${{ matrix.lane }}
+          QA_PARITY_OUTPUT_DIR: ${{ matrix.output_dir }}
         run: |
-          pnpm openclaw qa suite \
-            --provider-mode mock-openai \
-            --parity-pack agentic \
-            --concurrency "${QA_PARITY_CONCURRENCY}" \
-            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
-            --alt-model openai/gpt-5.4-alt \
-            --output-dir .artifacts/qa-e2e/gpt54
+          set -euo pipefail
+
+          case "${QA_PARITY_LANE}" in
+            candidate)
+              model="${OPENCLAW_CI_OPENAI_MODEL}"
+              alt_model="openai/gpt-5.4-alt"
+              ;;
+            baseline)
+              model="anthropic/claude-opus-4-6"
+              alt_model="anthropic/claude-sonnet-4-6"
+              ;;
+            *)
+              echo "Unknown QA parity lane: ${QA_PARITY_LANE}" >&2
+              exit 1
+              ;;
+          esac
 
-      - name: Run Opus 4.6 lane
-        run: |
           pnpm openclaw qa suite \
             --provider-mode mock-openai \
             --parity-pack agentic \
             --concurrency "${QA_PARITY_CONCURRENCY}" \
-            --model anthropic/claude-opus-4-6 \
-            --alt-model anthropic/claude-sonnet-4-6 \
-            --output-dir .artifacts/qa-e2e/opus46
+            --model "${model}" \
+            --alt-model "${alt_model}" \
+            --output-dir ".artifacts/qa-e2e/${QA_PARITY_OUTPUT_DIR}"
+
+      - name: Upload parity lane artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-qa-parity-${{ matrix.lane }}-${{ needs.resolve_target.outputs.sha }}
+          path: .artifacts/qa-e2e/
+          retention-days: 14
+          if-no-files-found: warn
+
+  qa_lab_parity_report_release_checks:
+    name: Run QA Lab parity report
+    needs: [resolve_target, qa_lab_parity_lane_release_checks]
+    if: contains(fromJSON('["all","qa","qa-parity"]'), needs.resolve_target.outputs.rerun_group)
+    runs-on: blacksmith-32vcpu-ubuntu-2404
+    timeout-minutes: 20
+    permissions:
+      contents: read
+      actions: read
+    env:
+      OPENCLAW_BUILD_PRIVATE_QA: "1"
+      OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
+    steps:
+      - name: Checkout selected ref
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.resolve_target.outputs.ref }}
+          fetch-depth: 1
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          pnpm-version: ${{ env.PNPM_VERSION }}
+          install-bun: "true"
+
+      - name: Download parity lane artifacts
+        uses: actions/download-artifact@v4
+        with:
+          pattern: release-qa-parity-*-${{ needs.resolve_target.outputs.sha }}
+          path: .artifacts/qa-e2e/
+          merge-multiple: true
+
+      - name: Build private QA runtime
+        run: pnpm build
 
       - name: Generate parity report
         run: |
@@ -548,7 +611,8 @@ jobs:
       - cross_os_release_checks
       - live_and_e2e_release_checks
       - package_acceptance_release_checks
-      - qa_lab_parity_release_checks
+      - qa_lab_parity_lane_release_checks
+      - qa_lab_parity_report_release_checks
       - qa_live_matrix_release_checks
       - qa_live_telegram_release_checks
     if: always()
@@ -566,7 +630,8 @@ jobs:
             "cross_os_release_checks=${{ needs.cross_os_release_checks.result }}" \
             "live_and_e2e_release_checks=${{ needs.live_and_e2e_release_checks.result }}" \
             "package_acceptance_release_checks=${{ needs.package_acceptance_release_checks.result }}" \
-            "qa_lab_parity_release_checks=${{ needs.qa_lab_parity_release_checks.result }}" \
+            "qa_lab_parity_lane_release_checks=${{ needs.qa_lab_parity_lane_release_checks.result }}" \
+            "qa_lab_parity_report_release_checks=${{ needs.qa_lab_parity_report_release_checks.result }}" \
             "qa_live_matrix_release_checks=${{ needs.qa_live_matrix_release_checks.result }}" \
             "qa_live_telegram_release_checks=${{ needs.qa_live_telegram_release_checks.result }}"
           do
diff --git a/docs/ci.md b/docs/ci.md
index 4a197ebad8d..ad1948096d0 100644
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -197,7 +197,9 @@ when the checked-out CLI supports it. The CLI default and manual workflow input
 remain `all`; manual `matrix_profile=all`
 dispatch always shards full Matrix coverage into `transport`, `media`,
 `e2ee-smoke`, `e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` also
-runs the release-critical QA Lab lanes before release approval.
+runs the release-critical QA Lab lanes before release approval; its QA parity
+gate runs the candidate and baseline packs as parallel lane jobs, then downloads
+both artifacts into a small report job for the final parity comparison.
 
 The `Duplicate PRs After Merge` workflow is a manual maintainer workflow for
 post-land duplicate cleanup. It defaults to dry-run and only closes explicitly