From dcd98bf1ef10d63b5dd4428e37762c211389ce37 Mon Sep 17 00:00:00 2001
From: Vincent Koc <vincentkoc@ieee.org>
Date: Fri, 22 May 2026 22:17:28 +0800
Subject: [PATCH] test(qa-lab): report scenario pack coverage

---
 CHANGELOG.md                                  |  1 +
 extensions/qa-lab/src/coverage-report.test.ts | 12 ++++
 extensions/qa-lab/src/coverage-report.ts      | 62 ++++++++++++++++++-
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d6b9b11333..9491f97017c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
 - QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin.
 - QA-Lab: add a QA bus tool-trace visibility scenario for sanitized tool-call assertions.
 - QA-Lab: replace generic evidence framing in seeded scenario prompts with concrete observed QA behavior.
+- QA-Lab: list named scenario packs in the coverage report so personal-agent privacy coverage stays visible in audits.
 - QA-Lab: list live transport lane membership in the coverage report so real transport checks stay separate from seeded qa-channel scenarios.
 - Release/package: run package integrity checks before package acceptance lanes so public install/update validation fails before private QA assets can leak into the package.
 - QA-Lab: include the optional 100-turn runtime parity soak in release-soak artifacts so long-run Codex/Pi transcript drift stays visible outside the default gate. (#80395) Thanks @100yenadmin.
diff --git a/extensions/qa-lab/src/coverage-report.test.ts b/extensions/qa-lab/src/coverage-report.test.ts
index 3ad7e9abea9..7480d6b26a1 100644
--- a/extensions/qa-lab/src/coverage-report.test.ts
+++ b/extensions/qa-lab/src/coverage-report.test.ts
@@ -18,6 +18,13 @@ describe("qa coverage report", () => {
       "telegram",
       "whatsapp",
     ]);
+    expect(inventory.scenarioPacks.map((pack) => pack.id)).toEqual(["personal-agent"]);
+    expect(inventory.scenarioPacks[0]?.missingScenarioIds).toStrictEqual([]);
+    expect(inventory.scenarioPacks[0]?.scenarioIds).toContain(
+      "personal-share-safe-diagnostics-artifact",
+    );
+    expect(inventory.scenarioPacks[0]?.coverageIds).toContain("personal.redaction");
+    expect(inventory.scenarioPacks[0]?.coverageIds).toContain("qa.artifact-safety");
     expect(inventory.byTheme.memory.map((feature) => feature.id)).toContain("memory.recall");
     expect(inventory.bySurface.memory.map((feature) => feature.id)).toContain("memory.recall");
   });
@@ -33,6 +40,11 @@ describe("qa coverage report", () => {
     expect(report).toContain("memory.recall");
     expect(report).toContain("primary: memory-recall (qa/scenarios/memory/memory-recall.md)");
     expect(report).toContain("secondary: active-memory-preprompt-recall");
+    expect(report).toContain("## Scenario Packs");
+    expect(report).toContain(
+      "- personal-agent (Personal Agent Benchmark Pack): 10 scenarios; coverage:",
+    );
+    expect(report).toContain("personal-share-safe-diagnostics-artifact");
     expect(report).toContain("## Live Transport Lanes");
     expect(report).toContain(
       "- telegram (telegram): canary: always-on, help-command: telegram-help-command, mention-gating: telegram-mention-gating; missing baseline: allowlist-block, top-level-reply-shape, restart-resume",
diff --git a/extensions/qa-lab/src/coverage-report.ts b/extensions/qa-lab/src/coverage-report.ts
index 5b6297a801d..ed5880f060a 100644
--- a/extensions/qa-lab/src/coverage-report.ts
+++ b/extensions/qa-lab/src/coverage-report.ts
@@ -2,7 +2,7 @@ import {
   buildLiveTransportCoverageLaneSummaries,
   type LiveTransportCoverageLaneSummary,
 } from "./live-transports/shared/live-transport-scenarios.js";
-import type { QaSeedScenarioWithSource } from "./scenario-catalog.js";
+import { QA_SCENARIO_PACKS, type QaSeedScenarioWithSource } from "./scenario-catalog.js";
 
 type QaCoverageScenarioSummary = {
   id: string;
@@ -24,6 +24,14 @@ type QaCoverageFeatureSummary = {
   scenarios: QaCoverageScenarioReference[];
 };
 
+type QaCoverageScenarioPackSummary = {
+  id: string;
+  title: string;
+  scenarioIds: string[];
+  coverageIds: string[];
+  missingScenarioIds: string[];
+};
+
 type QaCoverageInventory = {
   scenarioCount: number;
   coverageIdCount: number;
@@ -34,6 +42,7 @@ type QaCoverageInventory = {
   missingCoverage: QaCoverageScenarioSummary[];
   byTheme: Record<string, QaCoverageFeatureSummary[]>;
   bySurface: Record<string, QaCoverageFeatureSummary[]>;
+  scenarioPacks: QaCoverageScenarioPackSummary[];
   liveTransportLanes: LiveTransportCoverageLaneSummary[];
 };
 
@@ -65,6 +74,36 @@ function sortFeatures(features: readonly QaCoverageFeatureSummary[]) {
   return features.toSorted((left, right) => left.id.localeCompare(right.id));
 }
 
+function buildScenarioPackSummaries(
+  scenarios: readonly QaSeedScenarioWithSource[],
+): QaCoverageScenarioPackSummary[] {
+  const scenariosById = new Map(scenarios.map((scenario) => [scenario.id, scenario]));
+  return QA_SCENARIO_PACKS.map((pack) => {
+    const coverageIds = new Set<string>();
+    const missingScenarioIds: string[] = [];
+    for (const scenarioId of pack.scenarioIds) {
+      const scenario = scenariosById.get(scenarioId);
+      if (!scenario) {
+        missingScenarioIds.push(scenarioId);
+        continue;
+      }
+      for (const coverageId of [
+        ...(scenario.coverage?.primary ?? []),
+        ...(scenario.coverage?.secondary ?? []),
+      ]) {
+        coverageIds.add(coverageId);
+      }
+    }
+    return {
+      id: pack.id,
+      title: pack.title,
+      scenarioIds: [...pack.scenarioIds],
+      coverageIds: [...coverageIds].toSorted(),
+      missingScenarioIds,
+    };
+  }).toSorted((left, right) => left.id.localeCompare(right.id));
+}
+
 export function buildQaCoverageInventory(
   scenarios: readonly QaSeedScenarioWithSource[],
 ): QaCoverageInventory {
@@ -137,6 +176,7 @@ export function buildQaCoverageInventory(
     missingCoverage,
     byTheme,
     bySurface,
+    scenarioPacks: buildScenarioPackSummaries(scenarios),
     liveTransportLanes: buildLiveTransportCoverageLaneSummaries(),
   };
 }
@@ -172,6 +212,17 @@ function pushLiveTransportLines(
   }
 }
 
+function pushScenarioPackLines(lines: string[], packs: readonly QaCoverageScenarioPackSummary[]) {
+  for (const pack of packs) {
+    const missing =
+      pack.missingScenarioIds.length > 0 ? pack.missingScenarioIds.join(", ") : "none";
+    lines.push(
+      `- ${pack.id} (${pack.title}): ${pack.scenarioIds.length} scenarios; coverage: ${pack.coverageIds.join(", ")}; missing scenarios: ${missing}`,
+    );
+    lines.push(`  - scenarios: ${pack.scenarioIds.join(", ")}`);
+  }
+}
+
 export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): string {
   const lines: string[] = [
     "# QA Coverage Inventory",
@@ -183,10 +234,15 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory):
     `- Overlapping coverage IDs: ${inventory.overlappingCoverage.length}`,
     `- Missing coverage metadata: ${inventory.missingCoverage.length}`,
     "",
-    "## By Theme",
-    "",
   ];
 
+  if (inventory.scenarioPacks.length > 0) {
+    lines.push("## Scenario Packs", "");
+    pushScenarioPackLines(lines, inventory.scenarioPacks);
+    lines.push("");
+  }
+
+  lines.push("## By Theme", "");
   for (const theme of Object.keys(inventory.byTheme).toSorted()) {
     lines.push(`### ${theme}`, "");
     pushFeatureLines(lines, inventory.byTheme[theme] ?? []);