From 4c675216f1ce9ea2b7d757cb7f169a414aa3781b Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 22 Apr 2026 13:43:15 -0700 Subject: [PATCH] fix(qa): deflake parity approval preflight --- .github/workflows/parity-gate.yml | 10 +-- .../src/providers/mock-openai/server.test.ts | 66 +++++++++++++++++++ .../approval-turn-tool-followthrough.md | 4 -- 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/.github/workflows/parity-gate.yml b/.github/workflows/parity-gate.yml index b6f2f18d3b8..cf46c10b300 100644 --- a/.github/workflows/parity-gate.yml +++ b/.github/workflows/parity-gate.yml @@ -71,13 +71,9 @@ jobs: - name: Build private QA runtime run: pnpm build - - name: Run parity preflight - run: | - pnpm openclaw qa suite \ - --provider-mode mock-openai \ - --model openai/gpt-5.4 \ - --alt-model anthropic/claude-opus-4-6 \ - --preflight + # The approval-turn sentinel still runs inside the full parity pack below. + # Keep the exact mock read-plan contract in deterministic unit tests instead + # of paying for a separate full-runtime preflight that has been flaky in CI. - name: Run GPT-5.4 lane run: | pnpm openclaw qa suite \ diff --git a/extensions/qa-lab/src/providers/mock-openai/server.test.ts b/extensions/qa-lab/src/providers/mock-openai/server.test.ts index 719bbd29742..f95ef6cb5fe 100644 --- a/extensions/qa-lab/src/providers/mock-openai/server.test.ts +++ b/extensions/qa-lab/src/providers/mock-openai/server.test.ts @@ -120,6 +120,72 @@ describe("qa mock openai server", () => { expect(body).toContain('"name":"read"'); }); + it("turns a short approval into a kickoff-task read", async () => { + const server = await startMockServer(); + + const preActionResponse = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: false, + model: "gpt-5.4", + input: [ + makeUserInput( + "Before acting, tell me the single file you would start with in six words or fewer. Do not use tools yet.", + ), + ], + }), + }); + expect(preActionResponse.status).toBe(200); + expect(await preActionResponse.json()).toMatchObject({ + output: [ + { + type: "message", + content: [ + { + text: expect.stringContaining("Protocol note: acknowledged."), + }, + ], + }, + ], + }); + + const approvalResponse = await fetch(`${server.baseUrl}/v1/responses`, { + method: "POST", + headers: { + "content-type": "application/json", + }, + body: JSON.stringify({ + stream: true, + model: "gpt-5.4", + input: [ + makeUserInput( + "Before acting, tell me the single file you would start with in six words or fewer. Do not use tools yet.", + ), + makeUserInput( + "ok do it. read `QA_KICKOFF_TASK.md` now and reply with the QA mission in one short sentence.", + ), + ], + }), + }); + expect(approvalResponse.status).toBe(200); + const approvalBody = await approvalResponse.text(); + expect(approvalBody).toContain('"name":"read"'); + expect(approvalBody).toContain('"arguments":"{\\"path\\":\\"QA_KICKOFF_TASK.md\\"}"'); + + const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`); + expect(debugResponse.status).toBe(200); + expect(await debugResponse.json()).toMatchObject({ + model: "gpt-5.4", + prompt: + "ok do it. read `QA_KICKOFF_TASK.md` now and reply with the QA mission in one short sentence.", + allInputText: expect.stringContaining("ok do it."), + plannedToolName: "read", + }); + }); + it("emits deterministic text deltas for generic streaming QA prompts", async () => { const server = await startMockServer(); diff --git a/qa/scenarios/runtime/approval-turn-tool-followthrough.md b/qa/scenarios/runtime/approval-turn-tool-followthrough.md index bc086ca0674..3440024370d 100644 --- a/qa/scenarios/runtime/approval-turn-tool-followthrough.md +++ b/qa/scenarios/runtime/approval-turn-tool-followthrough.md @@ -83,9 +83,5 @@ steps: expr: "state.getSnapshot().messages.slice(beforeApprovalCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && expectedReplyAny.some((needle) => normalizeLowercaseStringOrEmpty(candidate.text).includes(needle))).at(-1)" - expr: liveTurnTimeoutMs(env, 20000) - expr: "env.providerMode === 'mock-openai' ? 100 : 250" - - assert: - expr: "!env.mock || ([...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))].toReversed().find((request) => String(request.allInputText ?? '').includes('ok do it.') && !request.toolOutput)?.plannedToolName === 'read')" - message: - expr: "`expected read after approval, got ${String(([...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))].toReversed().find((request) => String(request.allInputText ?? '').includes('ok do it.') && !request.toolOutput)?.plannedToolName ?? ''))}`" detailsExpr: outbound.text ```