openclaw/.github/workflows/parity-gate.yml

name: Parity gate

on:
  schedule:
    - cron: "17 3 * * *"
  release:
    types: [published]
  workflow_dispatch:

permissions:
  contents: read

concurrency:
  group: parity-gate-${{ github.event.pull_request.number || github.sha }}
  cancel-in-progress: true

jobs:
  parity-gate:
    name: Run the OpenAI / Opus 4.6 parity gate against the qa-lab mock
    runs-on: blacksmith-32vcpu-ubuntu-2404
    timeout-minutes: 30
    env:
      # Fence the gate off from any real provider credentials. The qa-lab
      # mock server + auth staging (PR N) should be enough to produce a
      # meaningful verdict without touching a real API. If any of these
      # leak into the job env, fail hard instead of silently running
      # against a live provider and burning real budget.
      #
      # The parity pack has 11 isolated scenario workers. It exercises a real
      # gateway child plus mock model turns and subagents, so keep it serial in
      # CI even on the larger runner. Concurrent isolated gateway workers make
      # the short strict-agentic scenarios flaky, especially the approval-turn
      # followthrough gate that expects a fast post-approval read within a 30s
      # agent.wait timeout.
      QA_PARITY_CONCURRENCY: "1"
      OPENCLAW_CI_OPENAI_MODEL: ${{ vars.OPENCLAW_CI_OPENAI_MODEL || 'openai/gpt-5.5' }}
      OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
      OPENAI_API_KEY: ""
      ANTHROPIC_API_KEY: ""
      OPENCLAW_LIVE_OPENAI_KEY: ""
      OPENCLAW_LIVE_ANTHROPIC_KEY: ""
      OPENCLAW_LIVE_GEMINI_KEY: ""
      OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ""
      # The parity suite is a private QA command. Build that exact runtime up
      # front so CI never tests a public dist plus a later no-clean QA overlay.
      OPENCLAW_BUILD_PRIVATE_QA: "1"
      OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
    steps:
      - name: Checkout PR
        uses: actions/checkout@v6
        with:
          persist-credentials: false

      - name: Install pnpm
        uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1

      - name: Setup Node
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
          cache: "pnpm"

      - name: Install dependencies
        run: pnpm install --frozen-lockfile

      - name: Build private QA runtime
        run: pnpm build

      # The approval-turn sentinel still runs inside the full parity pack below.
      # Keep the exact mock read-plan contract in deterministic unit tests instead
      # of paying for a separate full-runtime preflight that has been flaky in CI.
      - name: Run OpenAI candidate lane
        run: |
          pnpm openclaw qa suite \
            --provider-mode mock-openai \
            --parity-pack agentic \
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model "${OPENCLAW_CI_OPENAI_MODEL}" \
            --alt-model openai/gpt-5.4-alt \
            --output-dir .artifacts/qa-e2e/gpt54

      - name: Run Opus 4.6 lane
        run: |
          pnpm openclaw qa suite \
            --provider-mode mock-openai \
            --parity-pack agentic \
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model anthropic/claude-opus-4-6 \
            --alt-model anthropic/claude-sonnet-4-6 \
            --output-dir .artifacts/qa-e2e/opus46

      - name: Generate parity report
        run: |
          pnpm openclaw qa parity-report \
            --repo-root . \
            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
            --candidate-label "${OPENCLAW_CI_OPENAI_MODEL}" \
            --baseline-label anthropic/claude-opus-4-6 \
            --output-dir .artifacts/qa-e2e/parity

      - name: Upload parity artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: parity-gate-${{ github.event.pull_request.number || github.sha }}
          path: .artifacts/qa-e2e/
          retention-days: 14
          if-no-files-found: warn