mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-27 22:42:31 +00:00
feat(qa): add coverage scenario matching
This commit is contained in:
@@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- QA-Lab: add `qa coverage --match <query>` so focused proof selection can discover matching scenarios from existing metadata before running live or remote lanes.
|
||||
- Control UI: add an ephemeral Activity tab for sanitized live tool activity summaries without persisting raw telemetry. Fixes #12831. Thanks @BunsDev.
|
||||
- Build: include `ui:build` in the `full` and `ciArtifacts` profiles of `scripts/build-all.mjs` so `pnpm build` always rebuilds `dist/control-ui` after `tsdown` cleans `dist`, removing the second-command requirement and the missing-asset failure mode for source/runtime installs and CI artifact uploads. (#85206)
|
||||
- Migrate: import supported Hermes, OpenCode, and Codex auth credentials into OpenClaw auth profiles when credential migration is selected, with explicit opt-out and non-interactive controls. (#85667) Thanks @fuller-stack-dev.
|
||||
|
||||
@@ -815,6 +815,9 @@ The report should answer:
|
||||
- What follow-up scenarios are worth adding
|
||||
|
||||
For the inventory of available scenarios - useful when sizing follow-up work or wiring a new transport - run `pnpm openclaw qa coverage` (add `--json` for machine-readable output).
|
||||
When choosing focused proof for a touched behavior or file path, run `pnpm openclaw qa coverage --match <query>`.
|
||||
The match report searches scenario metadata, docs refs, code refs, coverage IDs, plugins, and provider requirements, then prints matching `qa suite --scenario ...` targets.
|
||||
Treat it as a discovery aid, not a gate replacement; the selected scenario still needs the right provider mode, live transport, Multipass, Testbox, or release lane for the behavior under test.
|
||||
|
||||
For character and style checks, run the same scenario across multiple live model
|
||||
refs and write a judged Markdown report:
|
||||
|
||||
@@ -154,6 +154,12 @@ inside every shard.
|
||||
`aimock` starts a local AIMock-backed provider server for experimental
|
||||
fixture and protocol-mock coverage without replacing the scenario-aware
|
||||
`mock-openai` lane.
|
||||
- `pnpm openclaw qa coverage --match <query>`
|
||||
- Searches scenario IDs, titles, surfaces, coverage IDs, docs refs, code refs,
|
||||
plugins, and provider requirements, then prints matching suite targets.
|
||||
- Use this before a QA Lab run when you know the touched behavior or file path
|
||||
but not the smallest scenario. It is advisory only; still choose mock,
|
||||
live, Multipass, Matrix, or transport proof from the behavior being changed.
|
||||
- `pnpm test:plugins:kitchen-sink-live`
|
||||
- Runs the live OpenAI Kitchen Sink plugin gauntlet through QA Lab. It
|
||||
installs the external Kitchen Sink package, verifies the plugin SDK surface
|
||||
|
||||
@@ -859,9 +859,7 @@ describe("qa cli runtime", () => {
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
pack: "personal-admin",
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
'--pack must be one of personal-agent, observability, got "personal-admin"',
|
||||
);
|
||||
).rejects.toThrow('--pack must be one of personal-agent, observability, got "personal-admin"');
|
||||
});
|
||||
|
||||
it("rejects unknown suite CLI auth modes", async () => {
|
||||
@@ -1087,6 +1085,28 @@ describe("qa cli runtime", () => {
|
||||
expectWriteContains(stdoutWrite, "memory.recall");
|
||||
});
|
||||
|
||||
it("prints a focused scenario match report from coverage metadata", async () => {
|
||||
await runQaCoverageReportCommand({
|
||||
repoRoot: process.cwd(),
|
||||
match: ["image roundtrip"],
|
||||
});
|
||||
|
||||
expectWriteContains(stdoutWrite, "# QA Scenario Matches");
|
||||
expectWriteContains(stdoutWrite, "image-generation-roundtrip");
|
||||
expectWriteContains(stdoutWrite, "--scenario image-generation-roundtrip");
|
||||
expect(stdoutWrite.mock.calls.flat().join("")).not.toContain("memory-recall");
|
||||
});
|
||||
|
||||
it("rejects scenario match queries for tool coverage reports", async () => {
|
||||
await expect(
|
||||
runQaCoverageReportCommand({
|
||||
repoRoot: process.cwd(),
|
||||
tools: true,
|
||||
match: ["runtime"],
|
||||
}),
|
||||
).rejects.toThrow("--match cannot be combined with --tools.");
|
||||
});
|
||||
|
||||
it("prints a markdown tool coverage report from runtime tool fixtures", async () => {
|
||||
await runQaCoverageReportCommand({ repoRoot: process.cwd(), tools: true });
|
||||
|
||||
|
||||
@@ -12,7 +12,12 @@ import {
|
||||
import { resolveQaParityPackScenarioIds } from "./agentic-parity.js";
|
||||
import { runQaCharacterEval, type QaCharacterModelOptions } from "./character-eval.js";
|
||||
import { resolveRepoRelativeOutputDir } from "./cli-paths.js";
|
||||
import { buildQaCoverageInventory, renderQaCoverageMarkdownReport } from "./coverage-report.js";
|
||||
import {
|
||||
buildQaCoverageInventory,
|
||||
findQaScenarioMatches,
|
||||
renderQaCoverageMarkdownReport,
|
||||
renderQaScenarioMatchesMarkdownReport,
|
||||
} from "./coverage-report.js";
|
||||
import { buildQaDockerHarnessImage, writeQaDockerHarnessFiles } from "./docker-harness.js";
|
||||
import { runQaDockerUp } from "./docker-up.runtime.js";
|
||||
import type { QaCliBackendAuthMode } from "./gateway-child.js";
|
||||
@@ -786,6 +791,7 @@ export async function runQaCoverageReportCommand(opts: {
|
||||
json?: boolean;
|
||||
tools?: boolean;
|
||||
summary?: string;
|
||||
match?: string[];
|
||||
}) {
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const outputPath = opts.output ? path.resolve(repoRoot, opts.output) : undefined;
|
||||
@@ -793,6 +799,9 @@ export async function runQaCoverageReportCommand(opts: {
|
||||
let body: string;
|
||||
let outputLabel = "QA coverage report";
|
||||
if (opts.tools === true) {
|
||||
if (opts.match && opts.match.length > 0) {
|
||||
throw new Error("--match cannot be combined with --tools.");
|
||||
}
|
||||
const summary = opts.summary?.trim()
|
||||
? (JSON.parse(
|
||||
await fs.readFile(path.resolve(repoRoot, opts.summary), "utf8"),
|
||||
@@ -810,10 +819,19 @@ export async function runQaCoverageReportCommand(opts: {
|
||||
if (opts.summary?.trim()) {
|
||||
throw new Error("--summary requires --tools.");
|
||||
}
|
||||
const inventory = buildQaCoverageInventory(scenarios);
|
||||
body = opts.json
|
||||
? `${JSON.stringify(inventory, null, 2)}\n`
|
||||
: renderQaCoverageMarkdownReport(inventory);
|
||||
const query = opts.match?.join(" ").trim();
|
||||
if (query) {
|
||||
const matches = findQaScenarioMatches(scenarios, query);
|
||||
body = opts.json
|
||||
? `${JSON.stringify({ query, matches }, null, 2)}\n`
|
||||
: renderQaScenarioMatchesMarkdownReport({ query, matches });
|
||||
outputLabel = "QA scenario match report";
|
||||
} else {
|
||||
const inventory = buildQaCoverageInventory(scenarios);
|
||||
body = opts.json
|
||||
? `${JSON.stringify(inventory, null, 2)}\n`
|
||||
: renderQaCoverageMarkdownReport(inventory);
|
||||
}
|
||||
}
|
||||
|
||||
if (outputPath) {
|
||||
|
||||
@@ -466,6 +466,7 @@ describe("qa cli registration", () => {
|
||||
output: ".artifacts/qa-coverage.md",
|
||||
json: true,
|
||||
tools: false,
|
||||
match: [],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -487,6 +488,26 @@ describe("qa cli registration", () => {
|
||||
tools: true,
|
||||
json: false,
|
||||
summary: ".artifacts/runtime-summary.json",
|
||||
match: [],
|
||||
});
|
||||
});
|
||||
|
||||
it("routes coverage match queries into the qa runtime command", async () => {
|
||||
await program.parseAsync([
|
||||
"node",
|
||||
"openclaw",
|
||||
"qa",
|
||||
"coverage",
|
||||
"--match",
|
||||
"image roundtrip",
|
||||
"--match",
|
||||
"native",
|
||||
]);
|
||||
|
||||
expect(runQaCoverageReportCommand).toHaveBeenCalledWith({
|
||||
tools: false,
|
||||
json: false,
|
||||
match: ["image roundtrip", "native"],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -78,6 +78,7 @@ async function runQaCoverageReport(opts: {
|
||||
json?: boolean;
|
||||
tools?: boolean;
|
||||
summary?: string;
|
||||
match?: string[];
|
||||
}) {
|
||||
const runtime = await loadQaLabCliRuntime();
|
||||
await runtime.runQaCoverageReportCommand(opts);
|
||||
@@ -404,6 +405,12 @@ export function registerQaLabCli(program: Command) {
|
||||
.option("--json", "Print JSON instead of Markdown", false)
|
||||
.option("--tools", "Print runtime tool fixture coverage instead of scenario coverage", false)
|
||||
.option("--summary <path>", "Runtime qa-suite-summary.json to overlay on --tools coverage")
|
||||
.option(
|
||||
"--match <query>",
|
||||
"Search scenario metadata and print matching qa suite targets (repeatable)",
|
||||
collectString,
|
||||
[],
|
||||
)
|
||||
.action(
|
||||
async (opts: {
|
||||
repoRoot?: string;
|
||||
@@ -411,6 +418,7 @@ export function registerQaLabCli(program: Command) {
|
||||
json?: boolean;
|
||||
tools?: boolean;
|
||||
summary?: string;
|
||||
match?: string[];
|
||||
}) => {
|
||||
await runQaCoverageReport(opts);
|
||||
},
|
||||
|
||||
@@ -13,6 +13,16 @@ type QaCoverageScenarioSummary = {
|
||||
risk: string;
|
||||
};
|
||||
|
||||
type QaScenarioSearchMatch = QaCoverageScenarioSummary & {
|
||||
coverageIds: string[];
|
||||
docsRefs: string[];
|
||||
codeRefs: string[];
|
||||
runtimeParityTier?: string;
|
||||
requiredProviderMode?: string;
|
||||
requiredProvider?: string;
|
||||
requiredModel?: string;
|
||||
};
|
||||
|
||||
type QaCoverageIntent = "primary" | "secondary";
|
||||
|
||||
type QaCoverageScenarioReference = QaCoverageScenarioSummary & {
|
||||
@@ -70,6 +80,85 @@ function summarizeScenario(scenario: QaSeedScenarioWithSource): QaCoverageScenar
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeSearchText(value: string) {
|
||||
return value.toLowerCase();
|
||||
}
|
||||
|
||||
function tokenizeScenarioSearchQuery(query: string) {
|
||||
return query
|
||||
.toLowerCase()
|
||||
.split(/\s+/u)
|
||||
.map((token) => token.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function scenarioSearchText(scenario: QaSeedScenarioWithSource) {
|
||||
const config = scenario.execution.config ?? {};
|
||||
return normalizeSearchText(
|
||||
[
|
||||
scenario.id,
|
||||
scenario.title,
|
||||
scenario.sourcePath,
|
||||
scenario.surface,
|
||||
...(scenario.surfaces ?? []),
|
||||
scenario.category ?? "",
|
||||
scenario.runtimeParityTier ?? "",
|
||||
scenario.risk ?? "",
|
||||
scenario.riskLevel ?? "",
|
||||
scenario.objective,
|
||||
...scenario.successCriteria,
|
||||
...(scenario.capabilities ?? []),
|
||||
...(scenario.plugins ?? []),
|
||||
...(scenario.docsRefs ?? []),
|
||||
...(scenario.codeRefs ?? []),
|
||||
...(scenario.coverage?.primary ?? []),
|
||||
...(scenario.coverage?.secondary ?? []),
|
||||
...Object.entries(config).flatMap(([key, value]) => [
|
||||
key,
|
||||
typeof value === "string" ? value : "",
|
||||
]),
|
||||
].join("\n"),
|
||||
);
|
||||
}
|
||||
|
||||
function stringifyConfigValue(value: unknown) {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function summarizeScenarioSearchMatch(scenario: QaSeedScenarioWithSource): QaScenarioSearchMatch {
|
||||
const config = scenario.execution.config ?? {};
|
||||
return {
|
||||
...summarizeScenario(scenario),
|
||||
coverageIds: [
|
||||
...(scenario.coverage?.primary ?? []),
|
||||
...(scenario.coverage?.secondary ?? []),
|
||||
].toSorted((left, right) => left.localeCompare(right)),
|
||||
docsRefs: [...(scenario.docsRefs ?? [])],
|
||||
codeRefs: [...(scenario.codeRefs ?? [])],
|
||||
runtimeParityTier: scenario.runtimeParityTier,
|
||||
requiredProviderMode: stringifyConfigValue(config.requiredProviderMode),
|
||||
requiredProvider: stringifyConfigValue(config.requiredProvider),
|
||||
requiredModel: stringifyConfigValue(config.requiredModel),
|
||||
};
|
||||
}
|
||||
|
||||
export function findQaScenarioMatches(
|
||||
scenarios: readonly QaSeedScenarioWithSource[],
|
||||
query: string,
|
||||
) {
|
||||
const tokens = tokenizeScenarioSearchQuery(query);
|
||||
if (tokens.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return scenarios
|
||||
.filter((scenario) => {
|
||||
const haystack = scenarioSearchText(scenario);
|
||||
return tokens.every((token) => haystack.includes(token));
|
||||
})
|
||||
.map(summarizeScenarioSearchMatch)
|
||||
.toSorted((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
|
||||
function sortFeatures(features: readonly QaCoverageFeatureSummary[]) {
|
||||
return features.toSorted((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
@@ -280,3 +369,52 @@ export function renderQaCoverageMarkdownReport(inventory: QaCoverageInventory):
|
||||
|
||||
return `${lines.join("\n").trimEnd()}\n`;
|
||||
}
|
||||
|
||||
function formatOptionalScenarioMetadata(match: QaScenarioSearchMatch) {
|
||||
const metadata = [
|
||||
match.runtimeParityTier ? `runtimeParityTier=${match.runtimeParityTier}` : "",
|
||||
match.requiredProviderMode ? `providerMode=${match.requiredProviderMode}` : "",
|
||||
match.requiredProvider ? `provider=${match.requiredProvider}` : "",
|
||||
match.requiredModel ? `model=${match.requiredModel}` : "",
|
||||
].filter(Boolean);
|
||||
return metadata.length > 0 ? metadata.join("; ") : "none";
|
||||
}
|
||||
|
||||
export function renderQaScenarioMatchesMarkdownReport(params: {
|
||||
query: string;
|
||||
matches: readonly QaScenarioSearchMatch[];
|
||||
}) {
|
||||
const scenarioArgs = params.matches.map((match) => `--scenario ${match.id}`).join(" ");
|
||||
const lines = [
|
||||
"# QA Scenario Matches",
|
||||
"",
|
||||
`- Query: ${params.query}`,
|
||||
`- Matches: ${params.matches.length}`,
|
||||
];
|
||||
|
||||
if (scenarioArgs) {
|
||||
lines.push(`- Suite command: \`pnpm openclaw qa suite ${scenarioArgs}\``);
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
if (params.matches.length === 0) {
|
||||
lines.push("No QA scenarios matched the query.", "");
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
for (const match of params.matches) {
|
||||
lines.push(`- ${match.id}: ${match.title}`);
|
||||
lines.push(` - source: ${match.sourcePath}`);
|
||||
lines.push(` - surface: ${match.surfaces.join(", ")}`);
|
||||
lines.push(` - coverage: ${match.coverageIds.join(", ") || "none"}`);
|
||||
lines.push(` - live requirements: ${formatOptionalScenarioMetadata(match)}`);
|
||||
if (match.codeRefs.length > 0) {
|
||||
lines.push(` - code refs: ${match.codeRefs.join(", ")}`);
|
||||
}
|
||||
if (match.docsRefs.length > 0) {
|
||||
lines.push(` - docs refs: ${match.docsRefs.join(", ")}`);
|
||||
}
|
||||
}
|
||||
|
||||
return `${lines.join("\n").trimEnd()}\n`;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import { runQaCoverageReportCommand } from "../extensions/qa-lab/src/cli.runtime
|
||||
|
||||
type Options = {
|
||||
json?: boolean;
|
||||
match?: string[];
|
||||
output?: string;
|
||||
repoRoot?: string;
|
||||
summary?: string;
|
||||
@@ -27,6 +28,7 @@ function parseArgs(args: string[]): Options {
|
||||
|
||||
Options:
|
||||
--json Print machine-readable JSON
|
||||
--match <query> Search scenario metadata and print matching suite targets
|
||||
--output <path> Write the report to a file
|
||||
--repo-root <path> Repository root to target
|
||||
--summary <path> Runtime qa-suite-summary.json to overlay on --tools coverage
|
||||
@@ -37,6 +39,11 @@ Options:
|
||||
case "--json":
|
||||
opts.json = true;
|
||||
break;
|
||||
case "--match":
|
||||
opts.match ??= [];
|
||||
opts.match.push(takeValue(args, index, arg));
|
||||
index += 1;
|
||||
break;
|
||||
case "--output":
|
||||
opts.output = takeValue(args, index, arg);
|
||||
index += 1;
|
||||
@@ -62,6 +69,7 @@ Options:
|
||||
const opts = parseArgs(process.argv.slice(2));
|
||||
await runQaCoverageReportCommand({
|
||||
...(opts.json ? { json: true } : {}),
|
||||
...(opts.match ? { match: opts.match } : {}),
|
||||
...(opts.output ? { output: opts.output } : {}),
|
||||
...(opts.repoRoot ? { repoRoot: opts.repoRoot } : {}),
|
||||
...(opts.summary ? { summary: opts.summary } : {}),
|
||||
|
||||
Reference in New Issue
Block a user