mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:20:43 +00:00
111 lines
3.8 KiB
YAML
111 lines
3.8 KiB
YAML
name: Parity gate
|
|
|
|
on:
|
|
pull_request:
|
|
types: [opened, reopened, synchronize, ready_for_review]
|
|
paths:
|
|
- "extensions/qa-lab/**"
|
|
- "extensions/qa-channel/**"
|
|
- "extensions/openai/**"
|
|
- "qa/scenarios/**"
|
|
- "src/agents/**"
|
|
- "src/context-engine/**"
|
|
- "src/gateway/**"
|
|
- "src/media/**"
|
|
- ".github/workflows/parity-gate.yml"
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
concurrency:
|
|
group: parity-gate-${{ github.event.pull_request.number || github.sha }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
parity-gate:
|
|
name: Run the GPT-5.4 / Opus 4.6 parity gate against the qa-lab mock
|
|
if: ${{ github.event.pull_request.draft != true }}
|
|
runs-on: blacksmith-32vcpu-ubuntu-2404
|
|
timeout-minutes: 30
|
|
env:
|
|
# Fence the gate off from any real provider credentials. The qa-lab
|
|
# mock server + auth staging (PR N) should be enough to produce a
|
|
# meaningful verdict without touching a real API. If any of these
|
|
# leak into the job env, fail hard instead of silently running
|
|
# against a live provider and burning real budget.
|
|
#
|
|
# The parity pack has 11 isolated scenario workers. It exercises a real
|
|
# gateway child plus mock model turns and subagents, so keep it serial in
|
|
# CI even on the larger runner. Concurrent isolated gateway workers make
|
|
# the short strict-agentic scenarios flaky, especially the approval-turn
|
|
# followthrough gate that expects a fast post-approval read within a 30s
|
|
# agent.wait timeout.
|
|
QA_PARITY_CONCURRENCY: "1"
|
|
OPENAI_API_KEY: ""
|
|
ANTHROPIC_API_KEY: ""
|
|
OPENCLAW_LIVE_OPENAI_KEY: ""
|
|
OPENCLAW_LIVE_ANTHROPIC_KEY: ""
|
|
OPENCLAW_LIVE_GEMINI_KEY: ""
|
|
OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ""
|
|
# The parity suite is a private QA command. Build that exact runtime up
|
|
# front so CI never tests a public dist plus a later no-clean QA overlay.
|
|
OPENCLAW_BUILD_PRIVATE_QA: "1"
|
|
OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
|
|
steps:
|
|
- name: Checkout PR
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Install pnpm
|
|
uses: pnpm/action-setup@v4
|
|
|
|
- name: Setup Node
|
|
uses: actions/setup-node@v6
|
|
with:
|
|
node-version: "22.18.0"
|
|
cache: "pnpm"
|
|
|
|
- name: Install dependencies
|
|
run: pnpm install --frozen-lockfile
|
|
|
|
- name: Build private QA runtime
|
|
run: pnpm build
|
|
|
|
- name: Run GPT-5.4 lane
|
|
run: |
|
|
pnpm openclaw qa suite \
|
|
--provider-mode mock-openai \
|
|
--parity-pack agentic \
|
|
--concurrency "${QA_PARITY_CONCURRENCY}" \
|
|
--model openai/gpt-5.4 \
|
|
--alt-model openai/gpt-5.4-alt \
|
|
--output-dir .artifacts/qa-e2e/gpt54
|
|
|
|
- name: Run Opus 4.6 lane
|
|
run: |
|
|
pnpm openclaw qa suite \
|
|
--provider-mode mock-openai \
|
|
--parity-pack agentic \
|
|
--concurrency "${QA_PARITY_CONCURRENCY}" \
|
|
--model anthropic/claude-opus-4-6 \
|
|
--alt-model anthropic/claude-sonnet-4-6 \
|
|
--output-dir .artifacts/qa-e2e/opus46
|
|
|
|
- name: Generate parity report
|
|
run: |
|
|
pnpm openclaw qa parity-report \
|
|
--repo-root . \
|
|
--candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
|
|
--baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
|
|
--candidate-label openai/gpt-5.4 \
|
|
--baseline-label anthropic/claude-opus-4-6 \
|
|
--output-dir .artifacts/qa-e2e/parity
|
|
|
|
- name: Upload parity artifacts
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: parity-gate-${{ github.event.pull_request.number || github.sha }}
|
|
path: .artifacts/qa-e2e/
|
|
retention-days: 14
|
|
if-no-files-found: warn
|