mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 00:38:11 +00:00
test(qa-lab): report scenario pack coverage
This commit is contained in:
@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
|
||||
- QA-Lab: add curated mock JSONL replay fixtures and first-drift reporting for runtime-parity audits. (#80323, refs #80176) Thanks @100yenadmin.
|
||||
- QA-Lab: add a QA bus tool-trace visibility scenario for sanitized tool-call assertions.
|
||||
- QA-Lab: replace generic evidence framing in seeded scenario prompts with concrete observed QA behavior.
|
||||
- QA-Lab: list named scenario packs in the coverage report so personal-agent privacy coverage stays visible in audits.
|
||||
- QA-Lab: list live transport lane membership in the coverage report so real transport checks stay separate from seeded qa-channel scenarios.
|
||||
- Release/package: run package integrity checks before package acceptance lanes so public install/update validation fails before private QA assets can leak into the package.
|
||||
- QA-Lab: include the optional 100-turn runtime parity soak in release-soak artifacts so long-run Codex/Pi transcript drift stays visible outside the default gate. (#80395) Thanks @100yenadmin.
|
||||
|
||||
@@ -18,6 +18,13 @@ describe("qa coverage report", () => {
|
||||
"telegram",
|
||||
"whatsapp",
|
||||
]);
|
||||
expect(inventory.scenarioPacks.map((pack) => pack.id)).toEqual(["personal-agent"]);
|
||||
expect(inventory.scenarioPacks[0]?.missingScenarioIds).toStrictEqual([]);
|
||||
expect(inventory.scenarioPacks[0]?.scenarioIds).toContain(
|
||||
"personal-share-safe-diagnostics-artifact",
|
||||
);
|
||||
expect(inventory.scenarioPacks[0]?.coverageIds).toContain("personal.redaction");
|
||||
expect(inventory.scenarioPacks[0]?.coverageIds).toContain("qa.artifact-safety");
|
||||
expect(inventory.byTheme.memory.map((feature) => feature.id)).toContain("memory.recall");
|
||||
expect(inventory.bySurface.memory.map((feature) => feature.id)).toContain("memory.recall");
|
||||
});
|
||||
@@ -33,6 +40,11 @@ describe("qa coverage report", () => {
|
||||
expect(report).toContain("memory.recall");
|
||||
expect(report).toContain("primary: memory-recall (qa/scenarios/memory/memory-recall.md)");
|
||||
expect(report).toContain("secondary: active-memory-preprompt-recall");
|
||||
expect(report).toContain("## Scenario Packs");
|
||||
expect(report).toContain(
|
||||
"- personal-agent (Personal Agent Benchmark Pack): 10 scenarios; coverage:",
|
||||
);
|
||||
expect(report).toContain("personal-share-safe-diagnostics-artifact");
|
||||
expect(report).toContain("## Live Transport Lanes");
|
||||
expect(report).toContain(
|
||||
"- telegram (telegram): canary: always-on, help-command: telegram-help-command, mention-gating: telegram-mention-gating; missing baseline: allowlist-block, top-level-reply-shape, restart-resume",
|
||||
|
||||
@@ -2,7 +2,7 @@ import {
|
||||
buildLiveTransportCoverageLaneSummaries,
|
||||
type LiveTransportCoverageLaneSummary,
|
||||
} from "./live-transports/shared/live-transport-scenarios.js";
|
||||
import type { QaSeedScenarioWithSource } from "./scenario-catalog.js";
|
||||
import { QA_SCENARIO_PACKS, type QaSeedScenarioWithSource } from "./scenario-catalog.js";
|
||||
|
||||
type QaCoverageScenarioSummary = {
|
||||
id: string;
|
||||
@@ -24,6 +24,14 @@ type QaCoverageFeatureSummary = {
|
||||
scenarios: QaCoverageScenarioReference[];
|
||||
};
|
||||
|
||||
type QaCoverageScenarioPackSummary = {
|
||||
id: string;
|
||||
title: string;
|
||||
scenarioIds: string[];
|
||||
coverageIds: string[];
|
||||
missingScenarioIds: string[];
|
||||
};
|
||||
|
||||
type QaCoverageInventory = {
|
||||
scenarioCount: number;
|
||||
coverageIdCount: number;
|
||||
@@ -34,6 +42,7 @@ type QaCoverageInventory = {
|
||||
missingCoverage: QaCoverageScenarioSummary[];
|
||||
byTheme: Record<string, QaCoverageFeatureSummary[]>;
|
||||
bySurface: Record<string, QaCoverageFeatureSummary[]>;
|
||||
scenarioPacks: QaCoverageScenarioPackSummary[];
|
||||
liveTransportLanes: LiveTransportCoverageLaneSummary[];
|
||||
};
|
||||
|
||||
@@ -65,6 +74,36 @@ function sortFeatures(features: readonly QaCoverageFeatureSummary[]) {
|
||||
return features.toSorted((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
|
||||
function buildScenarioPackSummaries(
|
||||
scenarios: readonly QaSeedScenarioWithSource[],
|
||||
): QaCoverageScenarioPackSummary[] {
|
||||
const scenariosById = new Map(scenarios.map((scenario) => [scenario.id, scenario]));
|
||||
return QA_SCENARIO_PACKS.map((pack) => {
|
||||
const coverageIds = new Set<string>();
|
||||
const missingScenarioIds: string[] = [];
|
||||
for (const scenarioId of pack.scenarioIds) {
|
||||
const scenario = scenariosById.get(scenarioId);
|
||||
if (!scenario) {
|
||||
missingScenarioIds.push(scenarioId);
|
||||
continue;
|
||||
}
|
||||
for (const coverageId of [
|
||||
...(scenario.coverage?.primary ?? []),
|
||||
...(scenario.coverage?.secondary ?? []),
|
||||
]) {
|
||||
coverageIds.add(coverageId);
|
||||
}
|
||||
}
|
||||
return {
|
||||
id: pack.id,
|
||||
title: pack.title,
|
||||
scenarioIds: [...pack.scenarioIds],
|
||||
coverageIds: [...coverageIds].toSorted(),
|
||||
missingScenarioIds,
|
||||
};
|
||||
}).toSorted((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
|
||||
export function buildQaCoverageInventory(
|
||||
scenarios: readonly QaSeedScenarioWithSource[],
|
||||
): QaCoverageInventory {
|
||||
@@ -137,6 +176,7 @@ export function buildQaCoverageInventory(
|
||||
missingCoverage,
|
||||
byTheme,
|
||||
bySurface,
|
||||
scenarioPacks: buildScenarioPackSummaries(scenarios),
|
||||
liveTransportLanes: buildLiveTransportCoverageLaneSummaries(),
|
||||
};
|
||||
}
|
||||
@@ -172,6 +212,17 @@ function pushLiveTransportLines(
|
||||
}
|
||||
}
|
||||
|
||||
function pushScenarioPackLines(lines: string[], packs: readonly QaCoverageScenarioPackSummary[]) {
|
||||
for (const pack of packs) {
|
||||
const missing =
|
||||
pack.missingScenarioIds.length > 0 ? pack.missingScenarioIds.join(", ") : "none";
|
||||
lines.push(
|
||||
`- ${pack.id} (${pack.title}): ${pack.scenarioIds.length} scenarios; coverage: ${pack.coverageIds.join(", ")}; missing scenarios: ${missing}`,
|
||||
);
|
||||
lines.push(` - scenarios: ${pack.scenarioIds.join(", ")}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): string {
|
||||
const lines: string[] = [
|
||||
"# QA Coverage Inventory",
|
||||
@@ -183,10 +234,15 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory):
|
||||
`- Overlapping coverage IDs: ${inventory.overlappingCoverage.length}`,
|
||||
`- Missing coverage metadata: ${inventory.missingCoverage.length}`,
|
||||
"",
|
||||
"## By Theme",
|
||||
"",
|
||||
];
|
||||
|
||||
if (inventory.scenarioPacks.length > 0) {
|
||||
lines.push("## Scenario Packs", "");
|
||||
pushScenarioPackLines(lines, inventory.scenarioPacks);
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
lines.push("## By Theme", "");
|
||||
for (const theme of Object.keys(inventory.byTheme).toSorted()) {
|
||||
lines.push(`### ${theme}`, "");
|
||||
pushFeatureLines(lines, inventory.byTheme[theme] ?? []);
|
||||
|
||||
Reference in New Issue
Block a user