From dcd98bf1ef10d63b5dd4428e37762c211389ce37 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 22 May 2026 22:17:28 +0800 Subject: [PATCH] test(qa-lab): report scenario pack coverage --- CHANGELOG.md | 1 + extensions/qa-lab/src/coverage-report.test.ts | 12 ++++ extensions/qa-lab/src/coverage-report.ts | 62 ++++++++++++++++++- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d6b9b11333..9491f97017c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin. - QA-Lab: add a QA bus tool-trace visibility scenario for sanitized tool-call assertions. - QA-Lab: replace generic evidence framing in seeded scenario prompts with concrete observed QA behavior. +- QA-Lab: list named scenario packs in the coverage report so personal-agent privacy coverage stays visible in audits. - QA-Lab: list live transport lane membership in the coverage report so real transport checks stay separate from seeded qa-channel scenarios. - Release/package: run package integrity checks before package acceptance lanes so public install/update validation fails before private QA assets can leak into the package. - QA-Lab: include the optional 100-turn runtime parity soak in release-soak artifacts so long-run Codex/Pi transcript drift stays visible outside the default gate. (#80395) Thanks @100yenadmin. diff --git a/extensions/qa-lab/src/coverage-report.test.ts b/extensions/qa-lab/src/coverage-report.test.ts index 3ad7e9abea9..7480d6b26a1 100644 --- a/extensions/qa-lab/src/coverage-report.test.ts +++ b/extensions/qa-lab/src/coverage-report.test.ts @@ -18,6 +18,13 @@ describe("qa coverage report", () => { "telegram", "whatsapp", ]); + expect(inventory.scenarioPacks.map((pack) => pack.id)).toEqual(["personal-agent"]); + expect(inventory.scenarioPacks[0]?.missingScenarioIds).toStrictEqual([]); + expect(inventory.scenarioPacks[0]?.scenarioIds).toContain( + "personal-share-safe-diagnostics-artifact", + ); + expect(inventory.scenarioPacks[0]?.coverageIds).toContain("personal.redaction"); + expect(inventory.scenarioPacks[0]?.coverageIds).toContain("qa.artifact-safety"); expect(inventory.byTheme.memory.map((feature) => feature.id)).toContain("memory.recall"); expect(inventory.bySurface.memory.map((feature) => feature.id)).toContain("memory.recall"); }); @@ -33,6 +40,11 @@ describe("qa coverage report", () => { expect(report).toContain("memory.recall"); expect(report).toContain("primary: memory-recall (qa/scenarios/memory/memory-recall.md)"); expect(report).toContain("secondary: active-memory-preprompt-recall"); + expect(report).toContain("## Scenario Packs"); + expect(report).toContain( + "- personal-agent (Personal Agent Benchmark Pack): 10 scenarios; coverage:", + ); + expect(report).toContain("personal-share-safe-diagnostics-artifact"); expect(report).toContain("## Live Transport Lanes"); expect(report).toContain( "- telegram (telegram): canary: always-on, help-command: telegram-help-command, mention-gating: telegram-mention-gating; missing baseline: allowlist-block, top-level-reply-shape, restart-resume", diff --git a/extensions/qa-lab/src/coverage-report.ts b/extensions/qa-lab/src/coverage-report.ts index 5b6297a801d..ed5880f060a 100644 --- a/extensions/qa-lab/src/coverage-report.ts +++ b/extensions/qa-lab/src/coverage-report.ts @@ -2,7 +2,7 @@ import { buildLiveTransportCoverageLaneSummaries, type LiveTransportCoverageLaneSummary, } from "./live-transports/shared/live-transport-scenarios.js"; -import type { QaSeedScenarioWithSource } from "./scenario-catalog.js"; +import { QA_SCENARIO_PACKS, type QaSeedScenarioWithSource } from "./scenario-catalog.js"; type QaCoverageScenarioSummary = { id: string; @@ -24,6 +24,14 @@ type QaCoverageFeatureSummary = { scenarios: QaCoverageScenarioReference[]; }; +type QaCoverageScenarioPackSummary = { + id: string; + title: string; + scenarioIds: string[]; + coverageIds: string[]; + missingScenarioIds: string[]; +}; + type QaCoverageInventory = { scenarioCount: number; coverageIdCount: number; @@ -34,6 +42,7 @@ type QaCoverageInventory = { missingCoverage: QaCoverageScenarioSummary[]; byTheme: Record; bySurface: Record; + scenarioPacks: QaCoverageScenarioPackSummary[]; liveTransportLanes: LiveTransportCoverageLaneSummary[]; }; @@ -65,6 +74,36 @@ function sortFeatures(features: readonly QaCoverageFeatureSummary[]) { return features.toSorted((left, right) => left.id.localeCompare(right.id)); } +function buildScenarioPackSummaries( + scenarios: readonly QaSeedScenarioWithSource[], +): QaCoverageScenarioPackSummary[] { + const scenariosById = new Map(scenarios.map((scenario) => [scenario.id, scenario])); + return QA_SCENARIO_PACKS.map((pack) => { + const coverageIds = new Set(); + const missingScenarioIds: string[] = []; + for (const scenarioId of pack.scenarioIds) { + const scenario = scenariosById.get(scenarioId); + if (!scenario) { + missingScenarioIds.push(scenarioId); + continue; + } + for (const coverageId of [ + ...(scenario.coverage?.primary ?? []), + ...(scenario.coverage?.secondary ?? []), + ]) { + coverageIds.add(coverageId); + } + } + return { + id: pack.id, + title: pack.title, + scenarioIds: [...pack.scenarioIds], + coverageIds: [...coverageIds].toSorted(), + missingScenarioIds, + }; + }).toSorted((left, right) => left.id.localeCompare(right.id)); +} + export function buildQaCoverageInventory( scenarios: readonly QaSeedScenarioWithSource[], ): QaCoverageInventory { @@ -137,6 +176,7 @@ export function buildQaCoverageInventory( missingCoverage, byTheme, bySurface, + scenarioPacks: buildScenarioPackSummaries(scenarios), liveTransportLanes: buildLiveTransportCoverageLaneSummaries(), }; } @@ -172,6 +212,17 @@ function pushLiveTransportLines( } } +function pushScenarioPackLines(lines: string[], packs: readonly QaCoverageScenarioPackSummary[]) { + for (const pack of packs) { + const missing = + pack.missingScenarioIds.length > 0 ? pack.missingScenarioIds.join(", ") : "none"; + lines.push( + `- ${pack.id} (${pack.title}): ${pack.scenarioIds.length} scenarios; coverage: ${pack.coverageIds.join(", ")}; missing scenarios: ${missing}`, + ); + lines.push(` - scenarios: ${pack.scenarioIds.join(", ")}`); + } +} + export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): string { const lines: string[] = [ "# QA Coverage Inventory", @@ -183,10 +234,15 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): `- Overlapping coverage IDs: ${inventory.overlappingCoverage.length}`, `- Missing coverage metadata: ${inventory.missingCoverage.length}`, "", - "## By Theme", - "", ]; + if (inventory.scenarioPacks.length > 0) { + lines.push("## Scenario Packs", ""); + pushScenarioPackLines(lines, inventory.scenarioPacks); + lines.push(""); + } + + lines.push("## By Theme", ""); for (const theme of Object.keys(inventory.byTheme).toSorted()) { lines.push(`### ${theme}`, ""); pushFeatureLines(lines, inventory.byTheme[theme] ?? []);