fix(qa): deflake parity approval preflight

This commit is contained in:
Vincent Koc
2026-04-22 13:43:15 -07:00
parent db5895fd2a
commit 4c675216f1
3 changed files with 69 additions and 11 deletions

View File

@@ -71,13 +71,9 @@ jobs:
- name: Build private QA runtime
run: pnpm build
- name: Run parity preflight
run: |
pnpm openclaw qa suite \
--provider-mode mock-openai \
--model openai/gpt-5.4 \
--alt-model anthropic/claude-opus-4-6 \
--preflight
# The approval-turn sentinel still runs inside the full parity pack below.
# Keep the exact mock read-plan contract in deterministic unit tests instead
# of paying for a separate full-runtime preflight that has been flaky in CI.
- name: Run GPT-5.4 lane
run: |
pnpm openclaw qa suite \

View File

@@ -120,6 +120,72 @@ describe("qa mock openai server", () => {
expect(body).toContain('"name":"read"');
});
it("turns a short approval into a kickoff-task read", async () => {
const server = await startMockServer();
const preActionResponse = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: false,
model: "gpt-5.4",
input: [
makeUserInput(
"Before acting, tell me the single file you would start with in six words or fewer. Do not use tools yet.",
),
],
}),
});
expect(preActionResponse.status).toBe(200);
expect(await preActionResponse.json()).toMatchObject({
output: [
{
type: "message",
content: [
{
text: expect.stringContaining("Protocol note: acknowledged."),
},
],
},
],
});
const approvalResponse = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: {
"content-type": "application/json",
},
body: JSON.stringify({
stream: true,
model: "gpt-5.4",
input: [
makeUserInput(
"Before acting, tell me the single file you would start with in six words or fewer. Do not use tools yet.",
),
makeUserInput(
"ok do it. read `QA_KICKOFF_TASK.md` now and reply with the QA mission in one short sentence.",
),
],
}),
});
expect(approvalResponse.status).toBe(200);
const approvalBody = await approvalResponse.text();
expect(approvalBody).toContain('"name":"read"');
expect(approvalBody).toContain('"arguments":"{\\"path\\":\\"QA_KICKOFF_TASK.md\\"}"');
const debugResponse = await fetch(`${server.baseUrl}/debug/last-request`);
expect(debugResponse.status).toBe(200);
expect(await debugResponse.json()).toMatchObject({
model: "gpt-5.4",
prompt:
"ok do it. read `QA_KICKOFF_TASK.md` now and reply with the QA mission in one short sentence.",
allInputText: expect.stringContaining("ok do it."),
plannedToolName: "read",
});
});
it("emits deterministic text deltas for generic streaming QA prompts", async () => {
const server = await startMockServer();

View File

@@ -83,9 +83,5 @@ steps:
expr: "state.getSnapshot().messages.slice(beforeApprovalCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && expectedReplyAny.some((needle) => normalizeLowercaseStringOrEmpty(candidate.text).includes(needle))).at(-1)"
- expr: liveTurnTimeoutMs(env, 20000)
- expr: "env.providerMode === 'mock-openai' ? 100 : 250"
- assert:
expr: "!env.mock || ([...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))].toReversed().find((request) => String(request.allInputText ?? '').includes('ok do it.') && !request.toolOutput)?.plannedToolName === 'read')"
message:
expr: "`expected read after approval, got ${String(([...(await fetchJson(`${env.mock.baseUrl}/debug/requests`))].toReversed().find((request) => String(request.allInputText ?? '').includes('ok do it.') && !request.toolOutput)?.plannedToolName ?? ''))}`"
detailsExpr: outbound.text
```