openclaw/.github/workflows/parity-gate.yml

name: Parity gate

on:
  pull_request:
    types: [opened, reopened, synchronize, ready_for_review]
    paths:
      - "extensions/qa-lab/**"
      - "extensions/qa-channel/**"
      - "extensions/openai/**"
      - "qa/scenarios/**"
      - "src/agents/**"
      - "src/context-engine/**"
      - "src/gateway/**"
      - "src/media/**"
      - ".github/workflows/parity-gate.yml"

permissions:
  contents: read

concurrency:
  group: parity-gate-${{ github.event.pull_request.number || github.sha }}
  cancel-in-progress: true

jobs:
  parity-gate:
    name: Run the GPT-5.4 / Opus 4.6 parity gate against the qa-lab mock
    if: ${{ github.event.pull_request.draft != true }}
    runs-on: blacksmith-32vcpu-ubuntu-2404
    timeout-minutes: 30
    env:
      # Fence the gate off from any real provider credentials. The qa-lab
      # mock server + auth staging (PR N) should be enough to produce a
      # meaningful verdict without touching a real API. If any of these
      # leak into the job env, fail hard instead of silently running
      # against a live provider and burning real budget.
      #
      # The parity pack has 11 isolated scenario workers. It exercises a real
      # gateway child plus mock model turns and subagents, so keep it serial in
      # CI even on the larger runner. Concurrent isolated gateway workers make
      # the short strict-agentic scenarios flaky, especially the approval-turn
      # followthrough gate that expects a fast post-approval read within a 30s
      # agent.wait timeout.
      QA_PARITY_CONCURRENCY: "1"
      OPENAI_API_KEY: ""
      ANTHROPIC_API_KEY: ""
      OPENCLAW_LIVE_OPENAI_KEY: ""
      OPENCLAW_LIVE_ANTHROPIC_KEY: ""
      OPENCLAW_LIVE_GEMINI_KEY: ""
      OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ""
      # The parity suite is a private QA command. Build that exact runtime up
      # front so CI never tests a public dist plus a later no-clean QA overlay.
      OPENCLAW_BUILD_PRIVATE_QA: "1"
      OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
    steps:
      - name: Checkout PR
        uses: actions/checkout@v6

      - name: Install pnpm
        uses: pnpm/action-setup@v4

      - name: Setup Node
        uses: actions/setup-node@v6
        with:
          node-version: "22.18.0"
          cache: "pnpm"

      - name: Install dependencies
        run: pnpm install --frozen-lockfile

      - name: Build private QA runtime
        run: pnpm build

      - name: Run GPT-5.4 lane
        run: |
          pnpm openclaw qa suite \
            --provider-mode mock-openai \
            --parity-pack agentic \
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model openai/gpt-5.4 \
            --alt-model openai/gpt-5.4-alt \
            --output-dir .artifacts/qa-e2e/gpt54

      - name: Run Opus 4.6 lane
        run: |
          pnpm openclaw qa suite \
            --provider-mode mock-openai \
            --parity-pack agentic \
            --concurrency "${QA_PARITY_CONCURRENCY}" \
            --model anthropic/claude-opus-4-6 \
            --alt-model anthropic/claude-sonnet-4-6 \
            --output-dir .artifacts/qa-e2e/opus46

      - name: Generate parity report
        run: |
          pnpm openclaw qa parity-report \
            --repo-root . \
            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
            --candidate-label openai/gpt-5.4 \
            --baseline-label anthropic/claude-opus-4-6 \
            --output-dir .artifacts/qa-e2e/parity

      - name: Upload parity artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: parity-gate-${{ github.event.pull_request.number || github.sha }}
          path: .artifacts/qa-e2e/
          retention-days: 14
          if-no-files-found: warn