mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 16:44:45 +00:00
feat(qa-lab): add scenario pack selector
This commit is contained in:
@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway: add opt-in restart trace logs for restart signal, active-work drain, close, next-start, ready, and memory spans. (#82396) Thanks @samzong.
|
||||
- Gateway/performance: split startup benchmark HTTP-listen timing from full gateway-ready timing and add post-bind plugin and sidecar diagnostics to restart-readiness traces. (#82603) Thanks @samzong.
|
||||
- QA-Lab: add a deterministic local personal-agent scenario pack covering reminders, threaded replies, scoped memory recall, redaction, and safe tool followthrough. (#78219) Thanks @iFiras-Max1.
|
||||
- QA-Lab: add `--pack personal-agent` for `openclaw qa suite` so maintainers can run the accepted personal-agent scenario pack by selector. (#82760) Thanks @iFiras-Max1.
|
||||
- QA-Lab: add a private Codex-vs-Pi runtime parity axis with runtime-pair suite runs, parity reports, and release-check wiring. (#80238) Thanks @100yenadmin.
|
||||
- Slack: add Slack assistant thread lifecycle support with assistant view manifest entries, suggested prompts, thread-scoped assistant sessions, and Slack-provided assistant context. Fixes #80787. Thanks @mobybot27.
|
||||
|
||||
|
||||
@@ -25,20 +25,20 @@ The first pack is intentionally narrow:
|
||||
## Scenarios
|
||||
|
||||
The machine-readable pack metadata lives in
|
||||
`extensions/qa-lab/src/scenario-packs.ts`. The initial pack does not add a CLI
|
||||
pack selector, so run the scenarios explicitly:
|
||||
`extensions/qa-lab/src/scenario-packs.ts`. Run the pack with
|
||||
`--pack personal-agent`:
|
||||
|
||||
```bash
|
||||
OPENCLAW_ENABLE_PRIVATE_QA_CLI=1 pnpm openclaw qa suite \
|
||||
--provider-mode mock-openai \
|
||||
--scenario personal-reminder-roundtrip \
|
||||
--scenario personal-channel-thread-reply \
|
||||
--scenario personal-memory-preference-recall \
|
||||
--scenario personal-redaction-no-secret-leak \
|
||||
--scenario personal-tool-safety-followthrough \
|
||||
--pack personal-agent \
|
||||
--concurrency 1
|
||||
```
|
||||
|
||||
`--pack` is additive with repeated `--scenario` flags. Explicit scenarios run
|
||||
first, then the pack scenarios run in `QA_PERSONAL_AGENT_SCENARIO_IDS` order with
|
||||
duplicates removed.
|
||||
|
||||
The pack is designed for `qa-channel` with `mock-openai` or another local QA
|
||||
provider lane. It should not be pointed at live chat services or real personal
|
||||
accounts.
|
||||
|
||||
@@ -231,6 +231,9 @@ Host and Multipass suite runs execute multiple selected scenarios in parallel
|
||||
with isolated gateway workers by default. `qa-channel` defaults to concurrency
|
||||
4, capped by the selected scenario count. Use `--concurrency <count>` to tune
|
||||
the worker count, or `--concurrency 1` for serial execution.
|
||||
Use `--pack personal-agent` to run the personal assistant benchmark pack. The
|
||||
pack selector is additive with repeated `--scenario` flags: explicit scenarios
|
||||
run first, then pack scenarios run in pack order with duplicates removed.
|
||||
The command exits non-zero when any scenario fails. Use `--allow-failures` when
|
||||
you want artifacts without a failing exit code.
|
||||
Live runs forward the supported QA auth inputs that are practical for the
|
||||
|
||||
@@ -761,6 +761,35 @@ describe("qa cli runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("expands the personal-agent pack onto the suite scenario list", async () => {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
pack: "personal-agent",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
expectFields(mockFirstObjectArg(runQaSuiteFromRuntime), {
|
||||
repoRoot: path.resolve("/tmp/openclaw-repo"),
|
||||
scenarioIds: [
|
||||
"channel-chat-baseline",
|
||||
"personal-reminder-roundtrip",
|
||||
"personal-channel-thread-reply",
|
||||
"personal-memory-preference-recall",
|
||||
"personal-redaction-no-secret-leak",
|
||||
"personal-tool-safety-followthrough",
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects unknown suite packs", async () => {
|
||||
await expect(
|
||||
runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
pack: "personal-admin",
|
||||
}),
|
||||
).rejects.toThrow('--pack must be one of personal-agent, got "personal-admin"');
|
||||
});
|
||||
|
||||
it("rejects unknown suite CLI auth modes", async () => {
|
||||
await expect(
|
||||
runQaSuiteCommand({
|
||||
|
||||
@@ -43,6 +43,7 @@ import {
|
||||
} from "./run-config.js";
|
||||
import type { RuntimeId } from "./runtime-parity.js";
|
||||
import { readQaScenarioPack } from "./scenario-catalog.js";
|
||||
import { resolveQaScenarioPackScenarioIds } from "./scenario-packs.js";
|
||||
import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
|
||||
import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";
|
||||
|
||||
@@ -496,6 +497,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
thinking?: string;
|
||||
cliAuthMode?: string;
|
||||
parityPack?: string;
|
||||
pack?: string;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
allowFailures?: boolean;
|
||||
@@ -510,9 +512,12 @@ export async function runQaSuiteCommand(opts: {
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const transportId = normalizeQaTransportId(opts.transportId);
|
||||
const runner = (opts.runner ?? "host").trim().toLowerCase();
|
||||
const scenarioIds = resolveQaParityPackScenarioIds({
|
||||
parityPack: opts.parityPack,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
const scenarioIds = resolveQaScenarioPackScenarioIds({
|
||||
pack: opts.pack,
|
||||
scenarioIds: resolveQaParityPackScenarioIds({
|
||||
parityPack: opts.parityPack,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
}),
|
||||
});
|
||||
const allowFailures = opts.allowFailures === true;
|
||||
if (runner !== "host" && runner !== "multipass") {
|
||||
|
||||
@@ -537,6 +537,13 @@ describe("qa cli registration", () => {
|
||||
expect(options.allowFailures).toBe(true);
|
||||
});
|
||||
|
||||
it("forwards --pack for suite runs", async () => {
|
||||
await program.parseAsync(["node", "openclaw", "qa", "suite", "--pack", "personal-agent"]);
|
||||
|
||||
const options = requireQaSuiteOptions();
|
||||
expect(options.pack).toBe("personal-agent");
|
||||
});
|
||||
|
||||
it("routes credential add flags into the qa runtime command", async () => {
|
||||
await program.parseAsync([
|
||||
"node",
|
||||
|
||||
@@ -41,6 +41,7 @@ async function runQaSuite(opts: {
|
||||
enabledPluginIds?: string[];
|
||||
cliAuthMode?: string;
|
||||
parityPack?: string;
|
||||
pack?: string;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
runner?: string;
|
||||
@@ -253,6 +254,7 @@ export function registerQaLabCli(program: Command) {
|
||||
"CLI backend auth mode for live Claude CLI runs: auto, api-key, or subscription",
|
||||
)
|
||||
.option("--parity-pack <name>", 'Preset scenario pack; currently only "agentic" is supported')
|
||||
.option("--pack <id>", 'Scenario pack id; currently only "personal-agent" is supported')
|
||||
.option("--scenario <id>", "Run only the named QA scenario (repeatable)", collectString, [])
|
||||
.option(
|
||||
"--enable-plugin <id>",
|
||||
@@ -290,6 +292,7 @@ export function registerQaLabCli(program: Command) {
|
||||
altModel?: string;
|
||||
cliAuthMode?: string;
|
||||
parityPack?: string;
|
||||
pack?: string;
|
||||
scenario?: string[];
|
||||
enablePlugin?: string[];
|
||||
concurrency?: number;
|
||||
@@ -315,6 +318,7 @@ export function registerQaLabCli(program: Command) {
|
||||
thinking: opts.thinking,
|
||||
cliAuthMode: opts.cliAuthMode,
|
||||
parityPack: opts.parityPack,
|
||||
pack: opts.pack,
|
||||
scenarioIds: opts.scenario,
|
||||
enabledPluginIds: opts.enablePlugin,
|
||||
concurrency: opts.concurrency,
|
||||
|
||||
@@ -227,6 +227,7 @@ export type QaBootstrapScenarioCatalog = {
|
||||
export {
|
||||
QA_PERSONAL_AGENT_SCENARIO_IDS,
|
||||
QA_SCENARIO_PACKS,
|
||||
resolveQaScenarioPackScenarioIds,
|
||||
type QaScenarioPackDefinition,
|
||||
} from "./scenario-packs.js";
|
||||
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { QA_SCENARIO_PACKS, readQaScenarioById } from "./scenario-catalog.js";
|
||||
import {
|
||||
QA_PERSONAL_AGENT_SCENARIO_IDS,
|
||||
QA_SCENARIO_PACKS,
|
||||
readQaScenarioById,
|
||||
resolveQaScenarioPackScenarioIds,
|
||||
} from "./scenario-catalog.js";
|
||||
|
||||
describe("qa scenario packs", () => {
|
||||
it("points every pack scenario id at a loadable markdown scenario", () => {
|
||||
@@ -41,6 +46,27 @@ describe("qa scenario packs", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("expands the personal-agent pack in pack order", () => {
|
||||
expect(resolveQaScenarioPackScenarioIds({ pack: "personal-agent" })).toEqual([
|
||||
...QA_PERSONAL_AGENT_SCENARIO_IDS,
|
||||
]);
|
||||
});
|
||||
|
||||
it("combines explicit scenarios with pack scenarios", () => {
|
||||
expect(
|
||||
resolveQaScenarioPackScenarioIds({
|
||||
pack: "personal-agent",
|
||||
scenarioIds: ["channel-chat-baseline", "personal-reminder-roundtrip"],
|
||||
}),
|
||||
).toEqual(["channel-chat-baseline", ...QA_PERSONAL_AGENT_SCENARIO_IDS]);
|
||||
});
|
||||
|
||||
it("rejects unknown scenario packs", () => {
|
||||
expect(() => resolveQaScenarioPackScenarioIds({ pack: "personal-admin" })).toThrow(
|
||||
'--pack must be one of personal-agent, got "personal-admin"',
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps personal pack mock debug assertions scoped to each reviewed scenario", () => {
|
||||
const redactionFlow = JSON.stringify(
|
||||
readQaScenarioById("personal-redaction-no-secret-leak").execution.flow,
|
||||
|
||||
@@ -22,3 +22,21 @@ export const QA_SCENARIO_PACKS = [
|
||||
scenarioIds: QA_PERSONAL_AGENT_SCENARIO_IDS,
|
||||
},
|
||||
] as const satisfies readonly QaScenarioPackDefinition[];
|
||||
|
||||
export function resolveQaScenarioPackScenarioIds(params: {
|
||||
pack?: string;
|
||||
scenarioIds?: string[];
|
||||
}): string[] {
|
||||
const normalizedPack = params.pack?.trim().toLowerCase();
|
||||
const explicitScenarioIds = [...new Set(params.scenarioIds ?? [])];
|
||||
if (!normalizedPack) {
|
||||
return explicitScenarioIds;
|
||||
}
|
||||
const pack = QA_SCENARIO_PACKS.find((candidate) => candidate.id === normalizedPack);
|
||||
if (!pack) {
|
||||
throw new Error(
|
||||
`--pack must be one of ${QA_SCENARIO_PACKS.map((candidate) => candidate.id).join(", ")}, got "${params.pack}"`,
|
||||
);
|
||||
}
|
||||
return [...new Set([...explicitScenarioIds, ...pack.scenarioIds])];
|
||||
}
|
||||
|
||||
@@ -189,6 +189,23 @@ describe("qa suite planning helpers", () => {
|
||||
).toEqual(["anthropic-only"]);
|
||||
});
|
||||
|
||||
it("keeps explicitly requested scenarios in request order", () => {
|
||||
const scenarios = [
|
||||
makeQaSuiteTestScenario("first"),
|
||||
makeQaSuiteTestScenario("second"),
|
||||
makeQaSuiteTestScenario("third"),
|
||||
];
|
||||
|
||||
expect(
|
||||
selectQaSuiteScenarios({
|
||||
scenarios,
|
||||
scenarioIds: ["third", "first"],
|
||||
providerMode: "live-frontier",
|
||||
primaryModel: "openai/gpt-5.5",
|
||||
}).map((scenario) => scenario.id),
|
||||
).toEqual(["third", "first"]);
|
||||
});
|
||||
|
||||
it("collects unique scenario-declared bundled plugins in encounter order", () => {
|
||||
const scenarios = [
|
||||
makeQaSuiteTestScenario("generic", { plugins: ["active-memory", "memory-wiki"] }),
|
||||
|
||||
@@ -66,20 +66,17 @@ function selectQaSuiteScenarios(params: {
|
||||
}) {
|
||||
const requestedScenarioIds =
|
||||
params.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null;
|
||||
const requestedScenarios = requestedScenarioIds
|
||||
? params.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
|
||||
: params.scenarios;
|
||||
if (requestedScenarioIds) {
|
||||
const foundScenarioIds = new Set(requestedScenarios.map((scenario) => scenario.id));
|
||||
const scenarioById = new Map(params.scenarios.map((scenario) => [scenario.id, scenario]));
|
||||
const missingScenarioIds = [...requestedScenarioIds].filter(
|
||||
(scenarioId) => !foundScenarioIds.has(scenarioId),
|
||||
(scenarioId) => !scenarioById.has(scenarioId),
|
||||
);
|
||||
if (missingScenarioIds.length > 0) {
|
||||
throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
|
||||
}
|
||||
return requestedScenarios;
|
||||
return [...requestedScenarioIds].map((scenarioId) => scenarioById.get(scenarioId)!);
|
||||
}
|
||||
return requestedScenarios.filter((scenario) =>
|
||||
return params.scenarios.filter((scenario) =>
|
||||
scenarioMatchesLiveLane({
|
||||
scenario,
|
||||
providerMode: params.providerMode,
|
||||
|
||||
Reference in New Issue
Block a user