Files
openclaw/.github/workflows/parity-gate.yml
2026-05-01 12:42:33 +01:00

110 lines
4.0 KiB
YAML

name: Parity gate
on:
schedule:
- cron: "17 3 * * *"
release:
types: [published]
workflow_dispatch:
permissions:
contents: read
concurrency:
group: parity-gate-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
parity-gate:
name: Run the OpenAI / Opus 4.6 parity gate against the qa-lab mock
runs-on: blacksmith-32vcpu-ubuntu-2404
timeout-minutes: 30
env:
# Fence the gate off from any real provider credentials. The qa-lab
# mock server + auth staging (PR N) should be enough to produce a
# meaningful verdict without touching a real API. If any of these
# leak into the job env, fail hard instead of silently running
# against a live provider and burning real budget.
#
# The parity pack has 11 isolated scenario workers. It exercises a real
# gateway child plus mock model turns and subagents, so keep it serial in
# CI even on the larger runner. Concurrent isolated gateway workers make
# the short strict-agentic scenarios flaky, especially the approval-turn
# followthrough gate that expects a fast post-approval read within a 30s
# agent.wait timeout.
QA_PARITY_CONCURRENCY: "1"
OPENCLAW_CI_OPENAI_MODEL: ${{ vars.OPENCLAW_CI_OPENAI_MODEL || 'openai/gpt-5.5' }}
OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
OPENAI_API_KEY: ""
ANTHROPIC_API_KEY: ""
OPENCLAW_LIVE_OPENAI_KEY: ""
OPENCLAW_LIVE_ANTHROPIC_KEY: ""
OPENCLAW_LIVE_GEMINI_KEY: ""
OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ""
# The parity suite is a private QA command. Build that exact runtime up
# front so CI never tests a public dist plus a later no-clean QA overlay.
OPENCLAW_BUILD_PRIVATE_QA: "1"
OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
steps:
- name: Checkout PR
uses: actions/checkout@v6
with:
persist-credentials: false
- name: Install pnpm
uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1
- name: Setup Node
uses: actions/setup-node@v6
with:
node-version: "22.18.0"
cache: "pnpm"
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build private QA runtime
run: pnpm build
# The approval-turn sentinel still runs inside the full parity pack below.
# Keep the exact mock read-plan contract in deterministic unit tests instead
# of paying for a separate full-runtime preflight that has been flaky in CI.
- name: Run OpenAI candidate lane
run: |
pnpm openclaw qa suite \
--provider-mode mock-openai \
--parity-pack agentic \
--concurrency "${QA_PARITY_CONCURRENCY}" \
--model "${OPENCLAW_CI_OPENAI_MODEL}" \
--alt-model openai/gpt-5.4-alt \
--output-dir .artifacts/qa-e2e/gpt54
- name: Run Opus 4.6 lane
run: |
pnpm openclaw qa suite \
--provider-mode mock-openai \
--parity-pack agentic \
--concurrency "${QA_PARITY_CONCURRENCY}" \
--model anthropic/claude-opus-4-6 \
--alt-model anthropic/claude-sonnet-4-6 \
--output-dir .artifacts/qa-e2e/opus46
- name: Generate parity report
run: |
pnpm openclaw qa parity-report \
--repo-root . \
--candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
--baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
--candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
--baseline-label anthropic/claude-opus-4-6 \
--output-dir .artifacts/qa-e2e/parity
- name: Upload parity artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: parity-gate-${{ github.event.pull_request.number || github.sha }}
path: .artifacts/qa-e2e/
retention-days: 14
if-no-files-found: warn