feat(qa-lab): add scenario pack selector

This commit is contained in:
Vincent Koc
2026-05-17 08:56:28 +08:00
parent dcb4160909
commit da8afe359d
12 changed files with 126 additions and 18 deletions

View File

@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
- Gateway: add opt-in restart trace logs for restart signal, active-work drain, close, next-start, ready, and memory spans. (#82396) Thanks @samzong.
- Gateway/performance: split startup benchmark HTTP-listen timing from full gateway-ready timing and add post-bind plugin and sidecar diagnostics to restart-readiness traces. (#82603) Thanks @samzong.
- QA-Lab: add a deterministic local personal-agent scenario pack covering reminders, threaded replies, scoped memory recall, redaction, and safe tool followthrough. (#78219) Thanks @iFiras-Max1.
- QA-Lab: add `--pack personal-agent` for `openclaw qa suite` so maintainers can run the accepted personal-agent scenario pack by selector. (#82760) Thanks @iFiras-Max1.
- QA-Lab: add a private Codex-vs-Pi runtime parity axis with runtime-pair suite runs, parity reports, and release-check wiring. (#80238) Thanks @100yenadmin.
- Slack: add Slack assistant thread lifecycle support with assistant view manifest entries, suggested prompts, thread-scoped assistant sessions, and Slack-provided assistant context. Fixes #80787. Thanks @mobybot27.

View File

@@ -25,20 +25,20 @@ The first pack is intentionally narrow:
## Scenarios
The machine-readable pack metadata lives in
`extensions/qa-lab/src/scenario-packs.ts`. The initial pack does not add a CLI
pack selector, so run the scenarios explicitly:
`extensions/qa-lab/src/scenario-packs.ts`. Run the pack with
`--pack personal-agent`:
```bash
OPENCLAW_ENABLE_PRIVATE_QA_CLI=1 pnpm openclaw qa suite \
--provider-mode mock-openai \
--scenario personal-reminder-roundtrip \
--scenario personal-channel-thread-reply \
--scenario personal-memory-preference-recall \
--scenario personal-redaction-no-secret-leak \
--scenario personal-tool-safety-followthrough \
--pack personal-agent \
--concurrency 1
```
`--pack` is additive with repeated `--scenario` flags. Explicit scenarios run
first, then the pack scenarios run in `QA_PERSONAL_AGENT_SCENARIO_IDS` order with
duplicates removed.
The pack is designed for `qa-channel` with `mock-openai` or another local QA
provider lane. It should not be pointed at live chat services or real personal
accounts.

View File

@@ -231,6 +231,9 @@ Host and Multipass suite runs execute multiple selected scenarios in parallel
with isolated gateway workers by default. `qa-channel` defaults to concurrency
4, capped by the selected scenario count. Use `--concurrency <count>` to tune
the worker count, or `--concurrency 1` for serial execution.
Use `--pack personal-agent` to run the personal assistant benchmark pack. The
pack selector is additive with repeated `--scenario` flags: explicit scenarios
run first, then pack scenarios run in pack order with duplicates removed.
The command exits non-zero when any scenario fails. Use `--allow-failures` when
you want artifacts without a failing exit code.
Live runs forward the supported QA auth inputs that are practical for the

View File

@@ -761,6 +761,35 @@ describe("qa cli runtime", () => {
});
});
it("expands the personal-agent pack onto the suite scenario list", async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
pack: "personal-agent",
scenarioIds: ["channel-chat-baseline"],
});
expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), {
repoRoot: path.resolve("/tmp/openclaw-repo"),
scenarioIds: [
"channel-chat-baseline",
"personal-reminder-roundtrip",
"personal-channel-thread-reply",
"personal-memory-preference-recall",
"personal-redaction-no-secret-leak",
"personal-tool-safety-followthrough",
],
});
});
it("rejects unknown suite packs", async () => {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
pack: "personal-admin",
}),
).rejects.toThrow('--pack must be one of personal-agent, got "personal-admin"');
});
it("rejects unknown suite CLI auth modes", async () => {
await expect(
runQaSuiteCommand({

View File

@@ -43,6 +43,7 @@ import {
} from "./run-config.js";
import type { RuntimeId } from "./runtime-parity.js";
import { readQaScenarioPack } from "./scenario-catalog.js";
import { resolveQaScenarioPackScenarioIds } from "./scenario-packs.js";
import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";
@@ -496,6 +497,7 @@ export async function runQaSuiteCommand(opts: {
thinking?: string;
cliAuthMode?: string;
parityPack?: string;
pack?: string;
scenarioIds?: string[];
concurrency?: number;
allowFailures?: boolean;
@@ -510,9 +512,12 @@ export async function runQaSuiteCommand(opts: {
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const transportId = normalizeQaTransportId(opts.transportId);
const runner = (opts.runner ?? "host").trim().toLowerCase();
const scenarioIds = resolveQaParityPackScenarioIds({
parityPack: opts.parityPack,
scenarioIds: opts.scenarioIds,
const scenarioIds = resolveQaScenarioPackScenarioIds({
pack: opts.pack,
scenarioIds: resolveQaParityPackScenarioIds({
parityPack: opts.parityPack,
scenarioIds: opts.scenarioIds,
}),
});
const allowFailures = opts.allowFailures === true;
if (runner !== "host" && runner !== "multipass") {

View File

@@ -537,6 +537,13 @@ describe("qa cli registration", () => {
expect(options.allowFailures).toBe(true);
});
it("forwards --pack for suite runs", async () => {
await program.parseAsync(["node", "openclaw", "qa", "suite", "--pack", "personal-agent"]);
const options = requireQaSuiteOptions();
expect(options.pack).toBe("personal-agent");
});
it("routes credential add flags into the qa runtime command", async () => {
await program.parseAsync([
"node",

View File

@@ -41,6 +41,7 @@ async function runQaSuite(opts: {
enabledPluginIds?: string[];
cliAuthMode?: string;
parityPack?: string;
pack?: string;
scenarioIds?: string[];
concurrency?: number;
runner?: string;
@@ -253,6 +254,7 @@ export function registerQaLabCli(program: Command) {
"CLI backend auth mode for live Claude CLI runs: auto, api-key, or subscription",
)
.option("--parity-pack <name>", 'Preset scenario pack; currently only "agentic" is supported')
.option("--pack <id>", 'Scenario pack id; currently only "personal-agent" is supported')
.option("--scenario <id>", "Run only the named QA scenario (repeatable)", collectString, [])
.option(
"--enable-plugin <id>",
@@ -290,6 +292,7 @@ export function registerQaLabCli(program: Command) {
altModel?: string;
cliAuthMode?: string;
parityPack?: string;
pack?: string;
scenario?: string[];
enablePlugin?: string[];
concurrency?: number;
@@ -315,6 +318,7 @@ export function registerQaLabCli(program: Command) {
thinking: opts.thinking,
cliAuthMode: opts.cliAuthMode,
parityPack: opts.parityPack,
pack: opts.pack,
scenarioIds: opts.scenario,
enabledPluginIds: opts.enablePlugin,
concurrency: opts.concurrency,

View File

@@ -227,6 +227,7 @@ export type QaBootstrapScenarioCatalog = {
export {
QA_PERSONAL_AGENT_SCENARIO_IDS,
QA_SCENARIO_PACKS,
resolveQaScenarioPackScenarioIds,
type QaScenarioPackDefinition,
} from "./scenario-packs.js";

View File

@@ -1,5 +1,10 @@
import { describe, expect, it } from "vitest";
import { QA_SCENARIO_PACKS, readQaScenarioById } from "./scenario-catalog.js";
import {
QA_PERSONAL_AGENT_SCENARIO_IDS,
QA_SCENARIO_PACKS,
readQaScenarioById,
resolveQaScenarioPackScenarioIds,
} from "./scenario-catalog.js";
describe("qa scenario packs", () => {
it("points every pack scenario id at a loadable markdown scenario", () => {
@@ -41,6 +46,27 @@ describe("qa scenario packs", () => {
}
});
it("expands the personal-agent pack in pack order", () => {
expect(resolveQaScenarioPackScenarioIds({ pack: "personal-agent" })).toEqual([
...QA_PERSONAL_AGENT_SCENARIO_IDS,
]);
});
it("combines explicit scenarios with pack scenarios", () => {
expect(
resolveQaScenarioPackScenarioIds({
pack: "personal-agent",
scenarioIds: ["channel-chat-baseline", "personal-reminder-roundtrip"],
}),
).toEqual(["channel-chat-baseline", ...QA_PERSONAL_AGENT_SCENARIO_IDS]);
});
it("rejects unknown scenario packs", () => {
expect(() => resolveQaScenarioPackScenarioIds({ pack: "personal-admin" })).toThrow(
'--pack must be one of personal-agent, got "personal-admin"',
);
});
it("keeps personal pack mock debug assertions scoped to each reviewed scenario", () => {
const redactionFlow = JSON.stringify(
readQaScenarioById("personal-redaction-no-secret-leak").execution.flow,

View File

@@ -22,3 +22,21 @@ export const QA_SCENARIO_PACKS = [
scenarioIds: QA_PERSONAL_AGENT_SCENARIO_IDS,
},
] as const satisfies readonly QaScenarioPackDefinition[];
export function resolveQaScenarioPackScenarioIds(params: {
pack?: string;
scenarioIds?: string[];
}): string[] {
const normalizedPack = params.pack?.trim().toLowerCase();
const explicitScenarioIds = [...new Set(params.scenarioIds ?? [])];
if (!normalizedPack) {
return explicitScenarioIds;
}
const pack = QA_SCENARIO_PACKS.find((candidate) => candidate.id === normalizedPack);
if (!pack) {
throw new Error(
`--pack must be one of ${QA_SCENARIO_PACKS.map((candidate) => candidate.id).join(", ")}, got "${params.pack}"`,
);
}
return [...new Set([...explicitScenarioIds, ...pack.scenarioIds])];
}

View File

@@ -189,6 +189,23 @@ describe("qa suite planning helpers", () => {
).toEqual(["anthropic-only"]);
});
it("keeps explicitly requested scenarios in request order", () => {
const scenarios = [
makeQaSuiteTestScenario("first"),
makeQaSuiteTestScenario("second"),
makeQaSuiteTestScenario("third"),
];
expect(
selectQaSuiteScenarios({
scenarios,
scenarioIds: ["third", "first"],
providerMode: "live-frontier",
primaryModel: "openai/gpt-5.5",
}).map((scenario) => scenario.id),
).toEqual(["third", "first"]);
});
it("collects unique scenario-declared bundled plugins in encounter order", () => {
const scenarios = [
makeQaSuiteTestScenario("generic", { plugins: ["active-memory", "memory-wiki"] }),

View File

@@ -66,20 +66,17 @@ function selectQaSuiteScenarios(params: {
}) {
const requestedScenarioIds =
params.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null;
const requestedScenarios = requestedScenarioIds
? params.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
: params.scenarios;
if (requestedScenarioIds) {
const foundScenarioIds = new Set(requestedScenarios.map((scenario) => scenario.id));
const scenarioById = new Map(params.scenarios.map((scenario) => [scenario.id, scenario]));
const missingScenarioIds = [...requestedScenarioIds].filter(
(scenarioId) => !foundScenarioIds.has(scenarioId),
(scenarioId) => !scenarioById.has(scenarioId),
);
if (missingScenarioIds.length > 0) {
throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
}
return requestedScenarios;
return [...requestedScenarioIds].map((scenarioId) => scenarioById.get(scenarioId)!);
}
return requestedScenarios.filter((scenario) =>
return params.scenarios.filter((scenario) =>
scenarioMatchesLiveLane({
scenario,
providerMode: params.providerMode,