test(qa-lab): report scenario pack coverage

This commit is contained in:
Vincent Koc
2026-05-22 22:17:28 +08:00
parent d70dc4be19
commit dcd98bf1ef
3 changed files with 72 additions and 3 deletions

View File

@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
- QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin.
- QA-Lab: add a QA bus tool-trace visibility scenario for sanitized tool-call assertions.
- QA-Lab: replace generic evidence framing in seeded scenario prompts with concrete observed QA behavior.
- QA-Lab: list named scenario packs in the coverage report so personal-agent privacy coverage stays visible in audits.
- QA-Lab: list live transport lane membership in the coverage report so real transport checks stay separate from seeded qa-channel scenarios.
- Release/package: run package integrity checks before package acceptance lanes so public install/update validation fails before private QA assets can leak into the package.
- QA-Lab: include the optional 100-turn runtime parity soak in release-soak artifacts so long-run Codex/Pi transcript drift stays visible outside the default gate. (#80395) Thanks @100yenadmin.

View File

@@ -18,6 +18,13 @@ describe("qa coverage report", () => {
"telegram",
"whatsapp",
]);
expect(inventory.scenarioPacks.map((pack) => pack.id)).toEqual(["personal-agent"]);
expect(inventory.scenarioPacks[0]?.missingScenarioIds).toStrictEqual([]);
expect(inventory.scenarioPacks[0]?.scenarioIds).toContain(
"personal-share-safe-diagnostics-artifact",
);
expect(inventory.scenarioPacks[0]?.coverageIds).toContain("personal.redaction");
expect(inventory.scenarioPacks[0]?.coverageIds).toContain("qa.artifact-safety");
expect(inventory.byTheme.memory.map((feature) => feature.id)).toContain("memory.recall");
expect(inventory.bySurface.memory.map((feature) => feature.id)).toContain("memory.recall");
});
@@ -33,6 +40,11 @@ describe("qa coverage report", () => {
expect(report).toContain("memory.recall");
expect(report).toContain("primary: memory-recall (qa/scenarios/memory/memory-recall.md)");
expect(report).toContain("secondary: active-memory-preprompt-recall");
expect(report).toContain("## Scenario Packs");
expect(report).toContain(
"- personal-agent (Personal Agent Benchmark Pack): 10 scenarios; coverage:",
);
expect(report).toContain("personal-share-safe-diagnostics-artifact");
expect(report).toContain("## Live Transport Lanes");
expect(report).toContain(
"- telegram (telegram): canary: always-on, help-command: telegram-help-command, mention-gating: telegram-mention-gating; missing baseline: allowlist-block, top-level-reply-shape, restart-resume",

View File

@@ -2,7 +2,7 @@ import {
buildLiveTransportCoverageLaneSummaries,
type LiveTransportCoverageLaneSummary,
} from "./live-transports/shared/live-transport-scenarios.js";
import type { QaSeedScenarioWithSource } from "./scenario-catalog.js";
import { QA_SCENARIO_PACKS, type QaSeedScenarioWithSource } from "./scenario-catalog.js";
type QaCoverageScenarioSummary = {
id: string;
@@ -24,6 +24,14 @@ type QaCoverageFeatureSummary = {
scenarios: QaCoverageScenarioReference[];
};
type QaCoverageScenarioPackSummary = {
id: string;
title: string;
scenarioIds: string[];
coverageIds: string[];
missingScenarioIds: string[];
};
type QaCoverageInventory = {
scenarioCount: number;
coverageIdCount: number;
@@ -34,6 +42,7 @@ type QaCoverageInventory = {
missingCoverage: QaCoverageScenarioSummary[];
byTheme: Record<string, QaCoverageFeatureSummary[]>;
bySurface: Record<string, QaCoverageFeatureSummary[]>;
scenarioPacks: QaCoverageScenarioPackSummary[];
liveTransportLanes: LiveTransportCoverageLaneSummary[];
};
@@ -65,6 +74,36 @@ function sortFeatures(features: readonly QaCoverageFeatureSummary[]) {
return features.toSorted((left, right) => left.id.localeCompare(right.id));
}
function buildScenarioPackSummaries(
scenarios: readonly QaSeedScenarioWithSource[],
): QaCoverageScenarioPackSummary[] {
const scenariosById = new Map(scenarios.map((scenario) => [scenario.id, scenario]));
return QA_SCENARIO_PACKS.map((pack) => {
const coverageIds = new Set<string>();
const missingScenarioIds: string[] = [];
for (const scenarioId of pack.scenarioIds) {
const scenario = scenariosById.get(scenarioId);
if (!scenario) {
missingScenarioIds.push(scenarioId);
continue;
}
for (const coverageId of [
...(scenario.coverage?.primary ?? []),
...(scenario.coverage?.secondary ?? []),
]) {
coverageIds.add(coverageId);
}
}
return {
id: pack.id,
title: pack.title,
scenarioIds: [...pack.scenarioIds],
coverageIds: [...coverageIds].toSorted(),
missingScenarioIds,
};
}).toSorted((left, right) => left.id.localeCompare(right.id));
}
export function buildQaCoverageInventory(
scenarios: readonly QaSeedScenarioWithSource[],
): QaCoverageInventory {
@@ -137,6 +176,7 @@ export function buildQaCoverageInventory(
missingCoverage,
byTheme,
bySurface,
scenarioPacks: buildScenarioPackSummaries(scenarios),
liveTransportLanes: buildLiveTransportCoverageLaneSummaries(),
};
}
@@ -172,6 +212,17 @@ function pushLiveTransportLines(
}
}
function pushScenarioPackLines(lines: string[], packs: readonly QaCoverageScenarioPackSummary[]) {
for (const pack of packs) {
const missing =
pack.missingScenarioIds.length > 0 ? pack.missingScenarioIds.join(", ") : "none";
lines.push(
`- ${pack.id} (${pack.title}): ${pack.scenarioIds.length} scenarios; coverage: ${pack.coverageIds.join(", ")}; missing scenarios: ${missing}`,
);
lines.push(` - scenarios: ${pack.scenarioIds.join(", ")}`);
}
}
export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): string {
const lines: string[] = [
"# QA Coverage Inventory",
@@ -183,10 +234,15 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory):
`- Overlapping coverage IDs: ${inventory.overlappingCoverage.length}`,
`- Missing coverage metadata: ${inventory.missingCoverage.length}`,
"",
"## By Theme",
"",
];
if (inventory.scenarioPacks.length > 0) {
lines.push("## Scenario Packs", "");
pushScenarioPackLines(lines, inventory.scenarioPacks);
lines.push("");
}
lines.push("## By Theme", "");
for (const theme of Object.keys(inventory.byTheme).toSorted()) {
lines.push(`### ${theme}`, "");
pushFeatureLines(lines, inventory.byTheme[theme] ?? []);