feat(qa): add coverage scenario matching

This commit is contained in:
Peter Steinberger
2026-05-25 10:22:43 +01:00
parent 4a45098a86
commit a1fe86a0ff
9 changed files with 231 additions and 8 deletions

View File

@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
### Changes
- QA-Lab: add `qa coverage --match <query>` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes.
- Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
- Build: include `ui:build` in the `full` and `ciArtifacts` profiles of `scripts/build-all.mjs` so `pnpm build` always rebuilds `dist/control-ui` after `tsdown` cleans `dist`, removing the second-command requirement and the missing-asset failure mode for source/runtime installs and CI artifact uploads. (#85206)
- Migrate: import supported Hermes, OpenCode, and Codex auth credentials into OpenClaw auth profiles when credential migration is selected, with explicit opt-out and non-interactive controls. (#85667) Thanks @fuller-stack-dev.

View File

@@ -815,6 +815,9 @@ The report should answer:
- What follow-up scenarios are worth adding
For the inventory of available scenarios - useful when sizing follow-up work or wiring a new transport - run `pnpm openclaw qa coverage` (add `--json` for machine-readable output).
When choosing focused proof for a touched behavior or file path, run `pnpm openclaw qa coverage --match <query>`.
The match report searches scenario metadata, docs refs, code refs, coverage IDs, plugins, and provider requirements, then prints matching `qa suite --scenario ...` targets.
Treat it as a discovery aid, not a gate replacement; the selected scenario still needs the right provider mode, live transport, Multipass, Testbox, or release lane for the behavior under test.
For character and style checks, run the same scenario across multiple live model
refs and write a judged Markdown report:

View File

@@ -154,6 +154,12 @@ inside every shard.
`aimock` starts a local AIMock-backed provider server for experimental
fixture and protocol-mock coverage without replacing the scenario-aware
`mock-openai` lane.
- `pnpm openclaw qa coverage --match <query>`
- Searches scenario IDs, titles, surfaces, coverage IDs, docs refs, code refs,
plugins, and provider requirements, then prints matching suite targets.
- Use this before a QA Lab run when you know the touched behavior or file path
but not the smallest scenario. It is advisory only; still choose mock,
live, Multipass, Matrix, or transport proof from the behavior being changed.
- `pnpm test:plugins:kitchen-sink-live`
- Runs the live OpenAI Kitchen Sink plugin gauntlet through QA Lab. It
installs the external Kitchen Sink package, verifies the plugin SDK surface

View File

@@ -859,9 +859,7 @@ describe("qa cli runtime", () => {
repoRoot: "/tmp/openclaw-repo",
pack: "personal-admin",
}),
).rejects.toThrow(
'--pack must be one of personal-agent, observability, got "personal-admin"',
);
).rejects.toThrow('--pack must be one of personal-agent, observability, got "personal-admin"');
});
it("rejects unknown suite CLI auth modes", async () => {
@@ -1087,6 +1085,28 @@ describe("qa cli runtime", () => {
expectWriteContains(stdoutWrite, "memory.recall");
});
it("prints a focused scenario match report from coverage metadata", async () => {
await runQaCoverageReportCommand({
repoRoot: process.cwd(),
match: ["image roundtrip"],
});
expectWriteContains(stdoutWrite, "# QA Scenario Matches");
expectWriteContains(stdoutWrite, "image-generation-roundtrip");
expectWriteContains(stdoutWrite, "--scenario image-generation-roundtrip");
expect(stdoutWrite.mock.calls.flat().join("")).not.toContain("memory-recall");
});
it("rejects scenario match queries for tool coverage reports", async () => {
await expect(
runQaCoverageReportCommand({
repoRoot: process.cwd(),
tools: true,
match: ["runtime"],
}),
).rejects.toThrow("--match cannot be combined with --tools.");
});
it("prints a markdown tool coverage report from runtime tool fixtures", async () => {
await runQaCoverageReportCommand({ repoRoot: process.cwd(), tools: true });

View File

@@ -12,7 +12,12 @@ import {
import { resolveQaParityPackScenarioIds } from "./agentic-parity.js";
import { runQaCharacterEval, type QaCharacterModelOptions } from "./character-eval.js";
import { resolveRepoRelativeOutputDir } from "./cli-paths.js";
import { buildQaCoverageInventory, renderQaCoverageMarkdownReport } from "./coverage-report.js";
import {
buildQaCoverageInventory,
findQaScenarioMatches,
renderQaCoverageMarkdownReport,
renderQaScenarioMatchesMarkdownReport,
} from "./coverage-report.js";
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
import { runQaDockerUp } from "./docker-up.runtime.js";
import type { QaCliBackendAuthMode } from "./gateway-child.js";
@@ -786,6 +791,7 @@ export async function runQaCoverageReportCommand(opts: {
json?: boolean;
tools?: boolean;
summary?: string;
match?: string[];
}) {
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const outputPath = opts.output ? path.resolve(repoRoot, opts.output) : undefined;
@@ -793,6 +799,9 @@ export async function runQaCoverageReportCommand(opts: {
let body: string;
let outputLabel = "QA coverage report";
if (opts.tools === true) {
if (opts.match && opts.match.length > 0) {
throw new Error("--match cannot be combined with --tools.");
}
const summary = opts.summary?.trim()
? (JSON.parse(
await fs.readFile(path.resolve(repoRoot, opts.summary), "utf8"),
@@ -810,10 +819,19 @@ export async function runQaCoverageReportCommand(opts: {
if (opts.summary?.trim()) {
throw new Error("--summary requires --tools.");
}
const inventory = buildQaCoverageInventory(scenarios);
body = opts.json
? `${JSON.stringify(inventory, null, 2)}\n`
: renderQaCoverageMarkdownReport(inventory);
const query = opts.match?.join(" ").trim();
if (query) {
const matches = findQaScenarioMatches(scenarios, query);
body = opts.json
? `${JSON.stringify({ query, matches }, null, 2)}\n`
: renderQaScenarioMatchesMarkdownReport({ query, matches });
outputLabel = "QA scenario match report";
} else {
const inventory = buildQaCoverageInventory(scenarios);
body = opts.json
? `${JSON.stringify(inventory, null, 2)}\n`
: renderQaCoverageMarkdownReport(inventory);
}
}
if (outputPath) {

View File

@@ -466,6 +466,7 @@ describe("qa cli registration", () => {
output: ".artifacts/qa-coverage.md",
json: true,
tools: false,
match: [],
});
});
@@ -487,6 +488,26 @@ describe("qa cli registration", () => {
tools: true,
json: false,
summary: ".artifacts/runtime-summary.json",
match: [],
});
});
it("routes coverage match queries into the qa runtime command", async () => {
await program.parseAsync([
"node",
"openclaw",
"qa",
"coverage",
"--match",
"image roundtrip",
"--match",
"native",
]);
expect(runQaCoverageReportCommand).toHaveBeenCalledWith({
tools: false,
json: false,
match: ["image roundtrip", "native"],
});
});

View File

@@ -78,6 +78,7 @@ async function runQaCoverageReport(opts: {
json?: boolean;
tools?: boolean;
summary?: string;
match?: string[];
}) {
const runtime = await loadQaLabCliRuntime();
await runtime.runQaCoverageReportCommand(opts);
@@ -404,6 +405,12 @@ export function registerQaLabCli(program: Command) {
.option("--json", "Print JSON instead of Markdown", false)
.option("--tools", "Print runtime tool fixture coverage instead of scenario coverage", false)
.option("--summary <path>", "Runtime qa-suite-summary.json to overlay on --tools coverage")
.option(
"--match <query>",
"Search scenario metadata and print matching qa suite targets (repeatable)",
collectString,
[],
)
.action(
async (opts: {
repoRoot?: string;
@@ -411,6 +418,7 @@ export function registerQaLabCli(program: Command) {
json?: boolean;
tools?: boolean;
summary?: string;
match?: string[];
}) => {
await runQaCoverageReport(opts);
},

View File

@@ -13,6 +13,16 @@ type QaCoverageScenarioSummary = {
risk: string;
};
type QaScenarioSearchMatch = QaCoverageScenarioSummary & {
coverageIds: string[];
docsRefs: string[];
codeRefs: string[];
runtimeParityTier?: string;
requiredProviderMode?: string;
requiredProvider?: string;
requiredModel?: string;
};
type QaCoverageIntent = "primary" | "secondary";
type QaCoverageScenarioReference = QaCoverageScenarioSummary & {
@@ -70,6 +80,85 @@ function summarizeScenario(scenario: QaSeedScenarioWithSource): QaCoverageScenar
};
}
function normalizeSearchText(value: string) {
return value.toLowerCase();
}
function tokenizeScenarioSearchQuery(query: string) {
return query
.toLowerCase()
.split(/\s+/u)
.map((token) => token.trim())
.filter(Boolean);
}
function scenarioSearchText(scenario: QaSeedScenarioWithSource) {
const config = scenario.execution.config ?? {};
return normalizeSearchText(
[
scenario.id,
scenario.title,
scenario.sourcePath,
scenario.surface,
...(scenario.surfaces ?? []),
scenario.category ?? "",
scenario.runtimeParityTier ?? "",
scenario.risk ?? "",
scenario.riskLevel ?? "",
scenario.objective,
...scenario.successCriteria,
...(scenario.capabilities ?? []),
...(scenario.plugins ?? []),
...(scenario.docsRefs ?? []),
...(scenario.codeRefs ?? []),
...(scenario.coverage?.primary ?? []),
...(scenario.coverage?.secondary ?? []),
...Object.entries(config).flatMap(([key, value]) => [
key,
typeof value === "string" ? value : "",
]),
].join("\n"),
);
}
function stringifyConfigValue(value: unknown) {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function summarizeScenarioSearchMatch(scenario: QaSeedScenarioWithSource): QaScenarioSearchMatch {
const config = scenario.execution.config ?? {};
return {
...summarizeScenario(scenario),
coverageIds: [
...(scenario.coverage?.primary ?? []),
...(scenario.coverage?.secondary ?? []),
].toSorted((left, right) => left.localeCompare(right)),
docsRefs: [...(scenario.docsRefs ?? [])],
codeRefs: [...(scenario.codeRefs ?? [])],
runtimeParityTier: scenario.runtimeParityTier,
requiredProviderMode: stringifyConfigValue(config.requiredProviderMode),
requiredProvider: stringifyConfigValue(config.requiredProvider),
requiredModel: stringifyConfigValue(config.requiredModel),
};
}
export function findQaScenarioMatches(
scenarios: readonly QaSeedScenarioWithSource[],
query: string,
) {
const tokens = tokenizeScenarioSearchQuery(query);
if (tokens.length === 0) {
return [];
}
return scenarios
.filter((scenario) => {
const haystack = scenarioSearchText(scenario);
return tokens.every((token) => haystack.includes(token));
})
.map(summarizeScenarioSearchMatch)
.toSorted((left, right) => left.id.localeCompare(right.id));
}
function sortFeatures(features: readonly QaCoverageFeatureSummary[]) {
return features.toSorted((left, right) => left.id.localeCompare(right.id));
}
@@ -280,3 +369,52 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory):
return `${lines.join("\n").trimEnd()}\n`;
}
function formatOptionalScenarioMetadata(match: QaScenarioSearchMatch) {
const metadata = [
match.runtimeParityTier ? `runtimeParityTier=${match.runtimeParityTier}` : "",
match.requiredProviderMode ? `providerMode=${match.requiredProviderMode}` : "",
match.requiredProvider ? `provider=${match.requiredProvider}` : "",
match.requiredModel ? `model=${match.requiredModel}` : "",
].filter(Boolean);
return metadata.length > 0 ? metadata.join("; ") : "none";
}
export function renderQaScenarioMatchesMarkdownReport(params: {
query: string;
matches: readonly QaScenarioSearchMatch[];
}) {
const scenarioArgs = params.matches.map((match) => `--scenario ${match.id}`).join(" ");
const lines = [
"# QA Scenario Matches",
"",
`- Query: ${params.query}`,
`- Matches: ${params.matches.length}`,
];
if (scenarioArgs) {
lines.push(`- Suite command: \`pnpm openclaw qa suite ${scenarioArgs}\``);
}
lines.push("");
if (params.matches.length === 0) {
lines.push("No QA scenarios matched the query.", "");
return lines.join("\n");
}
for (const match of params.matches) {
lines.push(`- ${match.id}: ${match.title}`);
lines.push(` - source: ${match.sourcePath}`);
lines.push(` - surface: ${match.surfaces.join(", ")}`);
lines.push(` - coverage: ${match.coverageIds.join(", ") || "none"}`);
lines.push(` - live requirements: ${formatOptionalScenarioMetadata(match)}`);
if (match.codeRefs.length > 0) {
lines.push(` - code refs: ${match.codeRefs.join(", ")}`);
}
if (match.docsRefs.length > 0) {
lines.push(` - docs refs: ${match.docsRefs.join(", ")}`);
}
}
return `${lines.join("\n").trimEnd()}\n`;
}

View File

@@ -2,6 +2,7 @@ import { runQaCoverageReportCommand } from "../extensions/qa-lab/src/cli.runtime
type Options = {
json?: boolean;
match?: string[];
output?: string;
repoRoot?: string;
summary?: string;
@@ -27,6 +28,7 @@ function parseArgs(args: string[]): Options {
Options:
--json Print machine-readable JSON
--match <query> Search scenario metadata and print matching suite targets
--output <path> Write the report to a file
--repo-root <path> Repository root to target
--summary <path> Runtime qa-suite-summary.json to overlay on --tools coverage
@@ -37,6 +39,11 @@ Options:
case "--json":
opts.json = true;
break;
case "--match":
opts.match ??= [];
opts.match.push(takeValue(args, index, arg));
index += 1;
break;
case "--output":
opts.output = takeValue(args, index, arg);
index += 1;
@@ -62,6 +69,7 @@ Options:
const opts = parseArgs(process.argv.slice(2));
await runQaCoverageReportCommand({
...(opts.json ? { json: true } : {}),
...(opts.match ? { match: opts.match } : {}),
...(opts.output ? { output: opts.output } : {}),
...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}),
...(opts.summary ? { summary: opts.summary } : {}),