From a1fe86a0ff0dbd32876c567fd723b024e7fd09e5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 25 May 2026 10:22:43 +0100 Subject: [PATCH] feat(qa): add coverage scenario matching --- CHANGELOG.md | 1 + docs/concepts/qa-e2e-automation.md | 3 + docs/help/testing.md | 6 + extensions/qa-lab/src/cli.runtime.test.ts | 26 +++- extensions/qa-lab/src/cli.runtime.ts | 28 ++++- extensions/qa-lab/src/cli.test.ts | 21 ++++ extensions/qa-lab/src/cli.ts | 8 ++ extensions/qa-lab/src/coverage-report.ts | 138 ++++++++++++++++++++++ scripts/qa-coverage-report.ts | 8 ++ 9 files changed, 231 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8494033db6b..abb45889734 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Changes +- QA-Lab: add `qa coverage --match ` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes. - Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev. - Build: include `ui:build` in the `full` and `ciArtifacts` profiles of `scripts/build-all.mjs` so `pnpm build` always rebuilds `dist/control-ui` after `tsdown` cleans `dist`, removing the second-command requirement and the missing-asset failure mode for source/runtime installs and CI artifact uploads. (#85206) - Migrate: import supported Hermes, OpenCode, and Codex auth credentials into OpenClaw auth profiles when credential migration is selected, with explicit opt-out and non-interactive controls. (#85667) Thanks @fuller-stack-dev. diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index eb8dfb848cb..f3982fc6f83 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -815,6 +815,9 @@ The report should answer: - What follow-up scenarios are worth adding For the inventory of available scenarios - useful when sizing follow-up work or wiring a new transport - run `pnpm openclaw qa coverage` (add `--json` for machine-readable output). +When choosing focused proof for a touched behavior or file path, run `pnpm openclaw qa coverage --match `. +The match report searches scenario metadata, docs refs, code refs, coverage IDs, plugins, and provider requirements, then prints matching `qa suite --scenario ...` targets. +Treat it as a discovery aid, not a gate replacement; the selected scenario still needs the right provider mode, live transport, Multipass, Testbox, or release lane for the behavior under test. For character and style checks, run the same scenario across multiple live model refs and write a judged Markdown report: diff --git a/docs/help/testing.md b/docs/help/testing.md index d5b1bb78ec6..17c2abcb88d 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -154,6 +154,12 @@ inside every shard. `aimock` starts a local AIMock-backed provider server for experimental fixture and protocol-mock coverage without replacing the scenario-aware `mock-openai` lane. +- `pnpm openclaw qa coverage --match ` + - Searches scenario IDs, titles, surfaces, coverage IDs, docs refs, code refs, + plugins, and provider requirements, then prints matching suite targets. + - Use this before a QA Lab run when you know the touched behavior or file path + but not the smallest scenario. It is advisory only; still choose mock, + live, Multipass, Matrix, or transport proof from the behavior being changed. - `pnpm test:plugins:kitchen-sink-live` - Runs the live OpenAI Kitchen Sink plugin gauntlet through QA Lab. It installs the external Kitchen Sink package, verifies the plugin SDK surface diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index aa219fdf318..7c78627955a 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -859,9 +859,7 @@ describe("qa cli runtime", () => { repoRoot: "/tmp/openclaw-repo", pack: "personal-admin", }), - ).rejects.toThrow( - '--pack must be one of personal-agent, observability, got "personal-admin"', - ); + ).rejects.toThrow('--pack must be one of personal-agent, observability, got "personal-admin"'); }); it("rejects unknown suite CLI auth modes", async () => { @@ -1087,6 +1085,28 @@ describe("qa cli runtime", () => { expectWriteContains(stdoutWrite, "memory.recall"); }); + it("prints a focused scenario match report from coverage metadata", async () => { + await runQaCoverageReportCommand({ + repoRoot: process.cwd(), + match: ["image roundtrip"], + }); + + expectWriteContains(stdoutWrite, "# QA Scenario Matches"); + expectWriteContains(stdoutWrite, "image-generation-roundtrip"); + expectWriteContains(stdoutWrite, "--scenario image-generation-roundtrip"); + expect(stdoutWrite.mock.calls.flat().join("")).not.toContain("memory-recall"); + }); + + it("rejects scenario match queries for tool coverage reports", async () => { + await expect( + runQaCoverageReportCommand({ + repoRoot: process.cwd(), + tools: true, + match: ["runtime"], + }), + ).rejects.toThrow("--match cannot be combined with --tools."); + }); + it("prints a markdown tool coverage report from runtime tool fixtures", async () => { await runQaCoverageReportCommand({ repoRoot: process.cwd(), tools: true }); diff --git a/extensions/qa-lab/src/cli.runtime.ts b/extensions/qa-lab/src/cli.runtime.ts index 60990add44e..f35f8c0692c 100644 --- a/extensions/qa-lab/src/cli.runtime.ts +++ b/extensions/qa-lab/src/cli.runtime.ts @@ -12,7 +12,12 @@ import { import { resolveQaParityPackScenarioIds } from "./agentic-parity.js"; import { runQaCharacterEval, type QaCharacterModelOptions } from "./character-eval.js"; import { resolveRepoRelativeOutputDir } from "./cli-paths.js"; -import { buildQaCoverageInventory, renderQaCoverageMarkdownReport } from "./coverage-report.js"; +import { + buildQaCoverageInventory, + findQaScenarioMatches, + renderQaCoverageMarkdownReport, + renderQaScenarioMatchesMarkdownReport, +} from "./coverage-report.js"; import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js"; import { runQaDockerUp } from "./docker-up.runtime.js"; import type { QaCliBackendAuthMode } from "./gateway-child.js"; @@ -786,6 +791,7 @@ export async function runQaCoverageReportCommand(opts: { json?: boolean; tools?: boolean; summary?: string; + match?: string[]; }) { const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); const outputPath = opts.output ? path.resolve(repoRoot, opts.output) : undefined; @@ -793,6 +799,9 @@ export async function runQaCoverageReportCommand(opts: { let body: string; let outputLabel = "QA coverage report"; if (opts.tools === true) { + if (opts.match && opts.match.length > 0) { + throw new Error("--match cannot be combined with --tools."); + } const summary = opts.summary?.trim() ? (JSON.parse( await fs.readFile(path.resolve(repoRoot, opts.summary), "utf8"), @@ -810,10 +819,19 @@ export async function runQaCoverageReportCommand(opts: { if (opts.summary?.trim()) { throw new Error("--summary requires --tools."); } - const inventory = buildQaCoverageInventory(scenarios); - body = opts.json - ? `${JSON.stringify(inventory, null, 2)}\n` - : renderQaCoverageMarkdownReport(inventory); + const query = opts.match?.join(" ").trim(); + if (query) { + const matches = findQaScenarioMatches(scenarios, query); + body = opts.json + ? `${JSON.stringify({ query, matches }, null, 2)}\n` + : renderQaScenarioMatchesMarkdownReport({ query, matches }); + outputLabel = "QA scenario match report"; + } else { + const inventory = buildQaCoverageInventory(scenarios); + body = opts.json + ? `${JSON.stringify(inventory, null, 2)}\n` + : renderQaCoverageMarkdownReport(inventory); + } } if (outputPath) { diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index dfe2d1056c1..e38395a26fa 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -466,6 +466,7 @@ describe("qa cli registration", () => { output: ".artifacts/qa-coverage.md", json: true, tools: false, + match: [], }); }); @@ -487,6 +488,26 @@ describe("qa cli registration", () => { tools: true, json: false, summary: ".artifacts/runtime-summary.json", + match: [], + }); + }); + + it("routes coverage match queries into the qa runtime command", async () => { + await program.parseAsync([ + "node", + "openclaw", + "qa", + "coverage", + "--match", + "image roundtrip", + "--match", + "native", + ]); + + expect(runQaCoverageReportCommand).toHaveBeenCalledWith({ + tools: false, + json: false, + match: ["image roundtrip", "native"], }); }); diff --git a/extensions/qa-lab/src/cli.ts b/extensions/qa-lab/src/cli.ts index 1cc2bb62c8a..bc156636bfd 100644 --- a/extensions/qa-lab/src/cli.ts +++ b/extensions/qa-lab/src/cli.ts @@ -78,6 +78,7 @@ async function runQaCoverageReport(opts: { json?: boolean; tools?: boolean; summary?: string; + match?: string[]; }) { const runtime = await loadQaLabCliRuntime(); await runtime.runQaCoverageReportCommand(opts); @@ -404,6 +405,12 @@ export function registerQaLabCli(program: Command) { .option("--json", "Print JSON instead of Markdown", false) .option("--tools", "Print runtime tool fixture coverage instead of scenario coverage", false) .option("--summary ", "Runtime qa-suite-summary.json to overlay on --tools coverage") + .option( + "--match ", + "Search scenario metadata and print matching qa suite targets (repeatable)", + collectString, + [], + ) .action( async (opts: { repoRoot?: string; @@ -411,6 +418,7 @@ export function registerQaLabCli(program: Command) { json?: boolean; tools?: boolean; summary?: string; + match?: string[]; }) => { await runQaCoverageReport(opts); }, diff --git a/extensions/qa-lab/src/coverage-report.ts b/extensions/qa-lab/src/coverage-report.ts index ed5880f060a..a72f6f6f73a 100644 --- a/extensions/qa-lab/src/coverage-report.ts +++ b/extensions/qa-lab/src/coverage-report.ts @@ -13,6 +13,16 @@ type QaCoverageScenarioSummary = { risk: string; }; +type QaScenarioSearchMatch = QaCoverageScenarioSummary & { + coverageIds: string[]; + docsRefs: string[]; + codeRefs: string[]; + runtimeParityTier?: string; + requiredProviderMode?: string; + requiredProvider?: string; + requiredModel?: string; +}; + type QaCoverageIntent = "primary" | "secondary"; type QaCoverageScenarioReference = QaCoverageScenarioSummary & { @@ -70,6 +80,85 @@ function summarizeScenario(scenario: QaSeedScenarioWithSource): QaCoverageScenar }; } +function normalizeSearchText(value: string) { + return value.toLowerCase(); +} + +function tokenizeScenarioSearchQuery(query: string) { + return query + .toLowerCase() + .split(/\s+/u) + .map((token) => token.trim()) + .filter(Boolean); +} + +function scenarioSearchText(scenario: QaSeedScenarioWithSource) { + const config = scenario.execution.config ?? {}; + return normalizeSearchText( + [ + scenario.id, + scenario.title, + scenario.sourcePath, + scenario.surface, + ...(scenario.surfaces ?? []), + scenario.category ?? "", + scenario.runtimeParityTier ?? "", + scenario.risk ?? "", + scenario.riskLevel ?? "", + scenario.objective, + ...scenario.successCriteria, + ...(scenario.capabilities ?? []), + ...(scenario.plugins ?? []), + ...(scenario.docsRefs ?? []), + ...(scenario.codeRefs ?? []), + ...(scenario.coverage?.primary ?? []), + ...(scenario.coverage?.secondary ?? []), + ...Object.entries(config).flatMap(([key, value]) => [ + key, + typeof value === "string" ? value : "", + ]), + ].join("\n"), + ); +} + +function stringifyConfigValue(value: unknown) { + return typeof value === "string" && value.trim() ? value.trim() : undefined; +} + +function summarizeScenarioSearchMatch(scenario: QaSeedScenarioWithSource): QaScenarioSearchMatch { + const config = scenario.execution.config ?? {}; + return { + ...summarizeScenario(scenario), + coverageIds: [ + ...(scenario.coverage?.primary ?? []), + ...(scenario.coverage?.secondary ?? []), + ].toSorted((left, right) => left.localeCompare(right)), + docsRefs: [...(scenario.docsRefs ?? [])], + codeRefs: [...(scenario.codeRefs ?? [])], + runtimeParityTier: scenario.runtimeParityTier, + requiredProviderMode: stringifyConfigValue(config.requiredProviderMode), + requiredProvider: stringifyConfigValue(config.requiredProvider), + requiredModel: stringifyConfigValue(config.requiredModel), + }; +} + +export function findQaScenarioMatches( + scenarios: readonly QaSeedScenarioWithSource[], + query: string, +) { + const tokens = tokenizeScenarioSearchQuery(query); + if (tokens.length === 0) { + return []; + } + return scenarios + .filter((scenario) => { + const haystack = scenarioSearchText(scenario); + return tokens.every((token) => haystack.includes(token)); + }) + .map(summarizeScenarioSearchMatch) + .toSorted((left, right) => left.id.localeCompare(right.id)); +} + function sortFeatures(features: readonly QaCoverageFeatureSummary[]) { return features.toSorted((left, right) => left.id.localeCompare(right.id)); } @@ -280,3 +369,52 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory): return `${lines.join("\n").trimEnd()}\n`; } + +function formatOptionalScenarioMetadata(match: QaScenarioSearchMatch) { + const metadata = [ + match.runtimeParityTier ? `runtimeParityTier=${match.runtimeParityTier}` : "", + match.requiredProviderMode ? `providerMode=${match.requiredProviderMode}` : "", + match.requiredProvider ? `provider=${match.requiredProvider}` : "", + match.requiredModel ? `model=${match.requiredModel}` : "", + ].filter(Boolean); + return metadata.length > 0 ? metadata.join("; ") : "none"; +} + +export function renderQaScenarioMatchesMarkdownReport(params: { + query: string; + matches: readonly QaScenarioSearchMatch[]; +}) { + const scenarioArgs = params.matches.map((match) => `--scenario ${match.id}`).join(" "); + const lines = [ + "# QA Scenario Matches", + "", + `- Query: ${params.query}`, + `- Matches: ${params.matches.length}`, + ]; + + if (scenarioArgs) { + lines.push(`- Suite command: \`pnpm openclaw qa suite ${scenarioArgs}\``); + } + lines.push(""); + + if (params.matches.length === 0) { + lines.push("No QA scenarios matched the query.", ""); + return lines.join("\n"); + } + + for (const match of params.matches) { + lines.push(`- ${match.id}: ${match.title}`); + lines.push(` - source: ${match.sourcePath}`); + lines.push(` - surface: ${match.surfaces.join(", ")}`); + lines.push(` - coverage: ${match.coverageIds.join(", ") || "none"}`); + lines.push(` - live requirements: ${formatOptionalScenarioMetadata(match)}`); + if (match.codeRefs.length > 0) { + lines.push(` - code refs: ${match.codeRefs.join(", ")}`); + } + if (match.docsRefs.length > 0) { + lines.push(` - docs refs: ${match.docsRefs.join(", ")}`); + } + } + + return `${lines.join("\n").trimEnd()}\n`; +} diff --git a/scripts/qa-coverage-report.ts b/scripts/qa-coverage-report.ts index 6858c6edc6b..dd76e33a1e6 100644 --- a/scripts/qa-coverage-report.ts +++ b/scripts/qa-coverage-report.ts @@ -2,6 +2,7 @@ import { runQaCoverageReportCommand } from "../extensions/qa-lab/src/cli.runtime type Options = { json?: boolean; + match?: string[]; output?: string; repoRoot?: string; summary?: string; @@ -27,6 +28,7 @@ function parseArgs(args: string[]): Options { Options: --json Print machine-readable JSON + --match Search scenario metadata and print matching suite targets --output Write the report to a file --repo-root Repository root to target --summary Runtime qa-suite-summary.json to overlay on --tools coverage @@ -37,6 +39,11 @@ Options: case "--json": opts.json = true; break; + case "--match": + opts.match ??= []; + opts.match.push(takeValue(args, index, arg)); + index += 1; + break; case "--output": opts.output = takeValue(args, index, arg); index += 1; @@ -62,6 +69,7 @@ Options: const opts = parseArgs(process.argv.slice(2)); await runQaCoverageReportCommand({ ...(opts.json ? { json: true } : {}), + ...(opts.match ? { match: opts.match } : {}), ...(opts.output ? { output: opts.output } : {}), ...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}), ...(opts.summary ? { summary: opts.summary } : {}),