diff --git a/extensions/qa-lab/src/agentic-parity-report.test.ts b/extensions/qa-lab/src/agentic-parity-report.test.ts index 21ee3d80f00..56e2b85b214 100644 --- a/extensions/qa-lab/src/agentic-parity-report.test.ts +++ b/extensions/qa-lab/src/agentic-parity-report.test.ts @@ -130,6 +130,27 @@ describe("qa agentic parity report", () => { ); }); + it("ignores neutral Failed and Blocked headings in passing protocol reports", () => { + const summary: QaParitySuiteSummary = { + scenarios: [ + { + name: "Source and docs discovery report", + status: "pass", + details: `Worked: +- Read the seeded QA material. +Failed: +- None observed. +Blocked: +- No live provider evidence in this lane. +Follow-up: +- Re-run with a real provider if needed.`, + }, + ], + }; + + expect(computeQaAgenticParityMetrics(summary).fakeSuccessCount).toBe(0); + }); + it("renders a readable markdown parity report", () => { const comparison = buildQaAgenticParityComparison({ candidateLabel: "openai/gpt-5.4", diff --git a/extensions/qa-lab/src/agentic-parity-report.ts b/extensions/qa-lab/src/agentic-parity-report.ts index e27dafb2d06..e8f6be293a1 100644 --- a/extensions/qa-lab/src/agentic-parity-report.ts +++ b/extensions/qa-lab/src/agentic-parity-report.ts @@ -68,9 +68,11 @@ const SUSPICIOUS_PASS_PATTERNS = [ /incomplete turn/i, /\btimed out\b/i, /\btimeout\b/i, - /\bblocked\b/i, /\berror\b/i, - /\bfailed\b/i, + /\bfailed to\b/i, + /\bcould not\b/i, + /\bunable to\b/i, + /did not continue/i, ] as const; function normalizeScenarioStatus(status: string | undefined): "pass" | "fail" | "skip" { diff --git a/extensions/qa-lab/src/mock-openai-server.test.ts b/extensions/qa-lab/src/mock-openai-server.test.ts index 4648ff6859e..bd7968165b2 100644 --- a/extensions/qa-lab/src/mock-openai-server.test.ts +++ b/extensions/qa-lab/src/mock-openai-server.test.ts @@ -228,7 +228,7 @@ describe("qa mock openai server", () => { }, { type: "function_call_output", - output: "Replay safety: unsafe after write.\n", + output: "Successfully wrote 41 bytes to compaction-retry-summary.txt.", }, ], }), diff --git a/extensions/qa-lab/src/mock-openai-server.ts b/extensions/qa-lab/src/mock-openai-server.ts index b013d9e420c..fa8e07c3b8c 100644 --- a/extensions/qa-lab/src/mock-openai-server.ts +++ b/extensions/qa-lab/src/mock-openai-server.ts @@ -453,7 +453,12 @@ function buildAssistantText(input: ResponsesInputItem[], body: Record