mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-24 23:51:48 +00:00
test: parallelize QA suite scenarios
This commit is contained in:
@@ -62,6 +62,9 @@ This boots a fresh Multipass guest, installs dependencies, builds OpenClaw
|
||||
inside the guest, runs `qa suite`, then copies the normal QA report and
|
||||
summary back into `.artifacts/qa-e2e/...` on the host.
|
||||
It reuses the same scenario-selection behavior as `qa suite` on the host.
|
||||
Host and Multipass suite runs execute multiple selected scenarios in parallel
|
||||
with isolated gateway workers by default. Use `--concurrency <count>` to tune
|
||||
the worker count, or `--concurrency 1` for serial execution.
|
||||
Live runs forward the supported QA auth inputs that are practical for the
|
||||
guest: env-based provider keys, the QA live provider config path, and
|
||||
`CODEX_HOME` when present. Keep `--output-dir` under the repo root so the guest
|
||||
|
||||
@@ -48,6 +48,9 @@ These commands sit beside the main test suites when you need QA-lab realism:
|
||||
|
||||
- `pnpm openclaw qa suite`
|
||||
- Runs repo-backed QA scenarios directly on the host.
|
||||
- Runs multiple selected scenarios in parallel by default with isolated
|
||||
gateway workers. Use `--concurrency <count>` to tune the worker count, or
|
||||
`--concurrency 1` for the older serial lane.
|
||||
- `pnpm openclaw qa suite --runner multipass`
|
||||
- Runs the same QA suite inside a disposable Multipass Linux VM.
|
||||
- Keeps the same scenario-selection behavior as `qa suite` on the host.
|
||||
|
||||
@@ -161,6 +161,22 @@ describe("qa cli runtime", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes host suite concurrency through", async () => {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
scenarioIds: ["channel-chat-baseline", "thread-follow-up"],
|
||||
concurrency: 3,
|
||||
});
|
||||
|
||||
expect(runQaSuiteFromRuntime).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
repoRoot: path.resolve("/tmp/openclaw-repo"),
|
||||
scenarioIds: ["channel-chat-baseline", "thread-follow-up"],
|
||||
concurrency: 3,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("resolves character eval paths and passes model refs through", async () => {
|
||||
await runQaCharacterEvalCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
@@ -291,6 +307,7 @@ describe("qa cli runtime", () => {
|
||||
runner: "multipass",
|
||||
providerMode: "mock-openai",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
concurrency: 3,
|
||||
image: "lts",
|
||||
cpus: 2,
|
||||
memory: "4G",
|
||||
@@ -305,6 +322,7 @@ describe("qa cli runtime", () => {
|
||||
alternateModel: undefined,
|
||||
fastMode: undefined,
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
concurrency: 3,
|
||||
image: "lts",
|
||||
cpus: 2,
|
||||
memory: "4G",
|
||||
|
||||
@@ -200,6 +200,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
memory?: string;
|
||||
@@ -229,6 +230,9 @@ export async function runQaSuiteCommand(opts: {
|
||||
alternateModel: opts.alternateModel,
|
||||
fastMode: opts.fastMode,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
...(opts.concurrency !== undefined
|
||||
? { concurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency) }
|
||||
: {}),
|
||||
image: opts.image,
|
||||
cpus: parseQaPositiveIntegerOption("--cpus", opts.cpus),
|
||||
memory: opts.memory,
|
||||
@@ -249,6 +253,9 @@ export async function runQaSuiteCommand(opts: {
|
||||
alternateModel: opts.alternateModel,
|
||||
fastMode: opts.fastMode,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
...(opts.concurrency !== undefined
|
||||
? { concurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency) }
|
||||
: {}),
|
||||
});
|
||||
process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
|
||||
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
|
||||
|
||||
@@ -23,6 +23,7 @@ async function runQaSuite(opts: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
runner?: string;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
@@ -152,6 +153,9 @@ export function registerQaLabCli(program: Command) {
|
||||
.option("--model <ref>", "Primary provider/model ref")
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
.option("--scenario <id>", "Run only the named QA scenario (repeatable)", collectString, [])
|
||||
.option("--concurrency <count>", "Scenario worker concurrency", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
.option("--fast", "Enable provider fast mode where supported", false)
|
||||
.option("--image <alias>", "Multipass image alias")
|
||||
.option("--cpus <count>", "Multipass vCPU count", (value: string) => Number(value))
|
||||
@@ -166,6 +170,7 @@ export function registerQaLabCli(program: Command) {
|
||||
model?: string;
|
||||
altModel?: string;
|
||||
scenario?: string[];
|
||||
concurrency?: number;
|
||||
fast?: boolean;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
@@ -181,6 +186,7 @@ export function registerQaLabCli(program: Command) {
|
||||
alternateModel: opts.altModel,
|
||||
fastMode: opts.fast,
|
||||
scenarioIds: opts.scenario,
|
||||
concurrency: opts.concurrency,
|
||||
image: opts.image,
|
||||
cpus: opts.cpus,
|
||||
memory: opts.memory,
|
||||
|
||||
@@ -334,6 +334,7 @@ export function createQaMultipassPlan(params: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
memory?: string;
|
||||
@@ -365,6 +366,7 @@ export function createQaMultipassPlan(params: {
|
||||
...(params.primaryModel ? ["--model", params.primaryModel] : []),
|
||||
...(params.alternateModel ? ["--alt-model", params.alternateModel] : []),
|
||||
...(params.fastMode ? ["--fast"] : []),
|
||||
...(params.concurrency ? ["--concurrency", String(params.concurrency)] : []),
|
||||
],
|
||||
scenarioIds,
|
||||
);
|
||||
@@ -632,6 +634,7 @@ export async function runQaMultipass(params: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
memory?: string;
|
||||
|
||||
@@ -3,6 +3,38 @@ import { createQaBusState } from "./bus-state.js";
|
||||
import { qaSuiteTesting } from "./suite.js";
|
||||
|
||||
describe("qa suite failure reply handling", () => {
|
||||
it("normalizes suite concurrency to a bounded integer", () => {
|
||||
const previous = process.env.OPENCLAW_QA_SUITE_CONCURRENCY;
|
||||
delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY;
|
||||
try {
|
||||
expect(qaSuiteTesting.normalizeQaSuiteConcurrency(undefined, 10)).toBe(4);
|
||||
expect(qaSuiteTesting.normalizeQaSuiteConcurrency(2.8, 10)).toBe(2);
|
||||
expect(qaSuiteTesting.normalizeQaSuiteConcurrency(20, 3)).toBe(3);
|
||||
expect(qaSuiteTesting.normalizeQaSuiteConcurrency(0, 3)).toBe(1);
|
||||
} finally {
|
||||
if (previous === undefined) {
|
||||
delete process.env.OPENCLAW_QA_SUITE_CONCURRENCY;
|
||||
} else {
|
||||
process.env.OPENCLAW_QA_SUITE_CONCURRENCY = previous;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("maps suite work with bounded concurrency while preserving order", async () => {
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
const result = await qaSuiteTesting.mapQaSuiteWithConcurrency([1, 2, 3, 4], 2, async (item) => {
|
||||
active += 1;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
active -= 1;
|
||||
return item * 10;
|
||||
});
|
||||
|
||||
expect(maxActive).toBe(2);
|
||||
expect(result).toEqual([10, 20, 30, 40]);
|
||||
});
|
||||
|
||||
it("detects classified failure replies before a success-only outbound predicate matches", async () => {
|
||||
const state = createQaBusState();
|
||||
state.addOutboundMessage({
|
||||
|
||||
@@ -81,8 +81,16 @@ export type QaSuiteRunParams = {
|
||||
scenarioIds?: string[];
|
||||
lab?: QaLabServerHandle;
|
||||
startLab?: QaSuiteStartLabFn;
|
||||
concurrency?: number;
|
||||
};
|
||||
|
||||
async function startQaLabServerRuntime(
|
||||
params?: QaLabServerStartParams,
|
||||
): Promise<QaLabServerHandle> {
|
||||
const { startQaLabServer } = await import("./lab-server.js");
|
||||
return await startQaLabServer(params);
|
||||
}
|
||||
|
||||
const _QA_IMAGE_UNDERSTANDING_PNG_BASE64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAYAAABccqhmAAAAAklEQVR4AewaftIAAAK4SURBVO3BAQEAMAwCIG//znsQgXfJBZjUALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsBpjVALMaYFYDzGqAWQ0wqwFmNcCsl9wFmNQAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwGmNUAsxpgVgPMaoBZDTCrAWY1wKwP4TIF+7ciPkoAAAAASUVORK5CYII=";
|
||||
const _QA_IMAGE_UNDERSTANDING_LARGE_PNG_BASE64 =
|
||||
@@ -127,10 +135,37 @@ type QaRawSessionStoreEntry = {
|
||||
updatedAt?: number;
|
||||
};
|
||||
|
||||
const QA_CONTROL_PLANE_WRITE_WINDOW_MS = 60_000;
|
||||
const QA_CONTROL_PLANE_WRITE_MAX_REQUESTS = 2;
|
||||
const DEFAULT_QA_SUITE_CONCURRENCY = 4;
|
||||
|
||||
const qaControlPlaneWriteTimestamps: number[] = [];
|
||||
function normalizeQaSuiteConcurrency(value: number | undefined, scenarioCount: number) {
|
||||
const envValue = Number(process.env.OPENCLAW_QA_SUITE_CONCURRENCY);
|
||||
const raw =
|
||||
typeof value === "number" && Number.isFinite(value)
|
||||
? value
|
||||
: Number.isFinite(envValue)
|
||||
? envValue
|
||||
: DEFAULT_QA_SUITE_CONCURRENCY;
|
||||
return Math.max(1, Math.min(Math.floor(raw), Math.max(1, scenarioCount)));
|
||||
}
|
||||
|
||||
async function mapQaSuiteWithConcurrency<T, U>(
|
||||
items: readonly T[],
|
||||
concurrency: number,
|
||||
mapper: (item: T, index: number) => Promise<U>,
|
||||
) {
|
||||
const results = Array.from<U>({ length: items.length });
|
||||
let nextIndex = 0;
|
||||
const workerCount = Math.min(Math.max(1, Math.floor(concurrency)), items.length);
|
||||
const workers = Array.from({ length: workerCount }, async () => {
|
||||
while (nextIndex < items.length) {
|
||||
const index = nextIndex;
|
||||
nextIndex += 1;
|
||||
results[index] = await mapper(items[index], index);
|
||||
}
|
||||
});
|
||||
await Promise.all(workers);
|
||||
return results;
|
||||
}
|
||||
|
||||
function splitModelRef(ref: string) {
|
||||
const slash = ref.indexOf("/");
|
||||
@@ -430,7 +465,7 @@ async function waitForConfigRestartSettle(
|
||||
}
|
||||
|
||||
function isGatewayRestartRace(error: unknown) {
|
||||
const text = formatErrorMessage(error);
|
||||
const text = formatGatewayPrimaryErrorText(error);
|
||||
return (
|
||||
text.includes("gateway closed (1012)") ||
|
||||
text.includes("gateway closed (1006") ||
|
||||
@@ -440,11 +475,17 @@ function isGatewayRestartRace(error: unknown) {
|
||||
}
|
||||
|
||||
function isConfigHashConflict(error: unknown) {
|
||||
return formatErrorMessage(error).includes("config changed since last load");
|
||||
return formatGatewayPrimaryErrorText(error).includes("config changed since last load");
|
||||
}
|
||||
|
||||
function formatGatewayPrimaryErrorText(error: unknown) {
|
||||
const text = formatErrorMessage(error);
|
||||
const gatewayLogsIndex = text.indexOf("\nGateway logs:");
|
||||
return (gatewayLogsIndex >= 0 ? text.slice(0, gatewayLogsIndex) : text).trim();
|
||||
}
|
||||
|
||||
function getGatewayRetryAfterMs(error: unknown) {
|
||||
const text = formatErrorMessage(error);
|
||||
const text = formatGatewayPrimaryErrorText(error);
|
||||
const millisecondsMatch = /retryAfterMs["=: ]+(\d+)/i.exec(text);
|
||||
if (millisecondsMatch) {
|
||||
const parsed = Number(millisecondsMatch[1]);
|
||||
@@ -462,25 +503,6 @@ function getGatewayRetryAfterMs(error: unknown) {
|
||||
return null;
|
||||
}
|
||||
|
||||
async function waitForQaControlPlaneWriteBudget() {
|
||||
while (true) {
|
||||
const now = Date.now();
|
||||
while (
|
||||
qaControlPlaneWriteTimestamps.length > 0 &&
|
||||
now - qaControlPlaneWriteTimestamps[0] >= QA_CONTROL_PLANE_WRITE_WINDOW_MS
|
||||
) {
|
||||
qaControlPlaneWriteTimestamps.shift();
|
||||
}
|
||||
if (qaControlPlaneWriteTimestamps.length < QA_CONTROL_PLANE_WRITE_MAX_REQUESTS) {
|
||||
qaControlPlaneWriteTimestamps.push(now);
|
||||
return;
|
||||
}
|
||||
const retryAfterMs =
|
||||
qaControlPlaneWriteTimestamps[0] + QA_CONTROL_PLANE_WRITE_WINDOW_MS - now + 250;
|
||||
await sleep(Math.max(250, retryAfterMs));
|
||||
}
|
||||
}
|
||||
|
||||
async function readConfigSnapshot(env: QaSuiteEnvironment) {
|
||||
const snapshot = (await env.gateway.call(
|
||||
"config.get",
|
||||
@@ -509,7 +531,6 @@ async function runConfigMutation(params: {
|
||||
for (let attempt = 1; attempt <= 8; attempt += 1) {
|
||||
const snapshot = await readConfigSnapshot(params.env);
|
||||
try {
|
||||
await waitForQaControlPlaneWriteBudget();
|
||||
const result = await params.env.gateway.call(
|
||||
params.action,
|
||||
{
|
||||
@@ -1178,6 +1199,10 @@ function createScenarioFlowApi(
|
||||
export const qaSuiteTesting = {
|
||||
createScenarioWaitForCondition,
|
||||
findFailureOutboundMessage,
|
||||
getGatewayRetryAfterMs,
|
||||
isConfigHashConflict,
|
||||
mapQaSuiteWithConcurrency,
|
||||
normalizeQaSuiteConcurrency,
|
||||
waitForOutboundMessage,
|
||||
};
|
||||
|
||||
@@ -1196,7 +1221,71 @@ async function runScenarioDefinition(
|
||||
});
|
||||
}
|
||||
|
||||
export async function runQaSuite(params?: QaSuiteRunParams) {
|
||||
function createQaSuiteReportNotes(params: {
|
||||
providerMode: "mock-openai" | "live-frontier";
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
}) {
|
||||
return [
|
||||
params.providerMode === "mock-openai"
|
||||
? "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider."
|
||||
: `Runs against qa-channel + qa-lab bus + real gateway child + live frontier models (${params.primaryModel}, ${params.alternateModel})${params.fastMode ? " with fast mode enabled" : ""}.`,
|
||||
params.concurrency > 1
|
||||
? `Scenarios run in isolated gateway workers with concurrency ${params.concurrency}.`
|
||||
: "Scenarios run serially in one gateway worker.",
|
||||
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
|
||||
];
|
||||
}
|
||||
|
||||
async function writeQaSuiteArtifacts(params: {
|
||||
outputDir: string;
|
||||
startedAt: Date;
|
||||
finishedAt: Date;
|
||||
scenarios: QaSuiteScenarioResult[];
|
||||
providerMode: "mock-openai" | "live-frontier";
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
}) {
|
||||
const report = renderQaMarkdownReport({
|
||||
title: "OpenClaw QA Scenario Suite",
|
||||
startedAt: params.startedAt,
|
||||
finishedAt: params.finishedAt,
|
||||
checks: [],
|
||||
scenarios: params.scenarios.map((scenario) => ({
|
||||
name: scenario.name,
|
||||
status: scenario.status,
|
||||
details: scenario.details,
|
||||
steps: scenario.steps,
|
||||
})) satisfies QaReportScenario[],
|
||||
notes: createQaSuiteReportNotes(params),
|
||||
});
|
||||
const reportPath = path.join(params.outputDir, "qa-suite-report.md");
|
||||
const summaryPath = path.join(params.outputDir, "qa-suite-summary.json");
|
||||
await fs.writeFile(reportPath, report, "utf8");
|
||||
await fs.writeFile(
|
||||
summaryPath,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
scenarios: params.scenarios,
|
||||
counts: {
|
||||
total: params.scenarios.length,
|
||||
passed: params.scenarios.filter((scenario) => scenario.status === "pass").length,
|
||||
failed: params.scenarios.filter((scenario) => scenario.status === "fail").length,
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
return { report, reportPath, summaryPath };
|
||||
}
|
||||
|
||||
export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResult> {
|
||||
const startedAt = new Date();
|
||||
const repoRoot = path.resolve(params?.repoRoot ?? process.cwd());
|
||||
const providerMode = normalizeQaProviderMode(params?.providerMode ?? "mock-openai");
|
||||
@@ -1211,19 +1300,180 @@ export async function runQaSuite(params?: QaSuiteRunParams) {
|
||||
params?.outputDir ??
|
||||
path.join(repoRoot, ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const requestedScenarioIds =
|
||||
params?.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null;
|
||||
const selectedCatalogScenarios = requestedScenarioIds
|
||||
? catalog.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
|
||||
: catalog.scenarios;
|
||||
if (requestedScenarioIds) {
|
||||
const foundScenarioIds = new Set(selectedCatalogScenarios.map((scenario) => scenario.id));
|
||||
const missingScenarioIds = [...requestedScenarioIds].filter(
|
||||
(scenarioId) => !foundScenarioIds.has(scenarioId),
|
||||
);
|
||||
if (missingScenarioIds.length > 0) {
|
||||
throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
|
||||
}
|
||||
}
|
||||
const concurrency = normalizeQaSuiteConcurrency(
|
||||
params?.concurrency,
|
||||
selectedCatalogScenarios.length,
|
||||
);
|
||||
|
||||
if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
|
||||
const ownsLab = !params?.lab;
|
||||
const startLab = params?.startLab ?? startQaLabServerRuntime;
|
||||
const lab =
|
||||
params?.lab ??
|
||||
(await startLab({
|
||||
repoRoot,
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
}));
|
||||
const liveScenarioOutcomes: QaLabScenarioOutcome[] = selectedCatalogScenarios.map(
|
||||
(scenario) => ({
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "pending",
|
||||
}),
|
||||
);
|
||||
const updateScenarioRun = () =>
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "running",
|
||||
startedAt: startedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
|
||||
try {
|
||||
updateScenarioRun();
|
||||
const scenarios: QaSuiteScenarioResult[] = await mapQaSuiteWithConcurrency(
|
||||
selectedCatalogScenarios,
|
||||
concurrency,
|
||||
async (scenario, index): Promise<QaSuiteScenarioResult> => {
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
};
|
||||
updateScenarioRun();
|
||||
try {
|
||||
const scenarioOutputDir = path.join(outputDir, "scenarios", scenario.id);
|
||||
const result: QaSuiteResult = await runQaSuite({
|
||||
repoRoot,
|
||||
outputDir: scenarioOutputDir,
|
||||
providerMode,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
thinkingDefault: params?.thinkingDefault,
|
||||
scenarioIds: [scenario.id],
|
||||
concurrency: 1,
|
||||
});
|
||||
const scenarioResult: QaSuiteScenarioResult =
|
||||
result.scenarios[0] ??
|
||||
({
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details: "isolated scenario run returned no scenario result",
|
||||
steps: [
|
||||
{
|
||||
name: "isolated scenario worker",
|
||||
status: "fail",
|
||||
details: "isolated scenario run returned no scenario result",
|
||||
},
|
||||
],
|
||||
} satisfies QaSuiteScenarioResult);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: scenarioResult.status,
|
||||
details: scenarioResult.details,
|
||||
steps: scenarioResult.steps,
|
||||
startedAt: liveScenarioOutcomes[index]?.startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
updateScenarioRun();
|
||||
return scenarioResult;
|
||||
} catch (error) {
|
||||
const details = formatErrorMessage(error);
|
||||
const scenarioResult = {
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details,
|
||||
steps: [
|
||||
{
|
||||
name: "isolated scenario worker",
|
||||
status: "fail",
|
||||
details,
|
||||
},
|
||||
],
|
||||
} satisfies QaSuiteScenarioResult;
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
status: "fail",
|
||||
details,
|
||||
steps: scenarioResult.steps,
|
||||
startedAt: liveScenarioOutcomes[index]?.startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
updateScenarioRun();
|
||||
return scenarioResult;
|
||||
}
|
||||
},
|
||||
);
|
||||
const finishedAt = new Date();
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "completed",
|
||||
startedAt: startedAt.toISOString(),
|
||||
finishedAt: finishedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
const { report, reportPath, summaryPath } = await writeQaSuiteArtifacts({
|
||||
outputDir,
|
||||
startedAt,
|
||||
finishedAt,
|
||||
scenarios,
|
||||
providerMode,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
concurrency,
|
||||
});
|
||||
lab.setLatestReport({
|
||||
outputPath: reportPath,
|
||||
markdown: report,
|
||||
generatedAt: finishedAt.toISOString(),
|
||||
} satisfies QaLabLatestReport);
|
||||
return {
|
||||
outputDir,
|
||||
reportPath,
|
||||
summaryPath,
|
||||
report,
|
||||
scenarios,
|
||||
watchUrl: lab.baseUrl,
|
||||
} satisfies QaSuiteResult;
|
||||
} finally {
|
||||
if (ownsLab) {
|
||||
await lab.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ownsLab = !params?.lab;
|
||||
const startLab = params?.startLab ?? startQaLabServerRuntime;
|
||||
const lab =
|
||||
params?.lab ??
|
||||
(await params?.startLab?.({
|
||||
(await startLab({
|
||||
repoRoot,
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
}));
|
||||
if (!lab) {
|
||||
throw new Error("QA suite requires lab or startLab runtime");
|
||||
}
|
||||
const mock =
|
||||
providerMode === "mock-openai"
|
||||
? await startQaMockOpenAiServer({
|
||||
@@ -1267,21 +1517,6 @@ export async function runQaSuite(params?: QaSuiteRunParams) {
|
||||
await waitForQaChannelReady(env, 120_000);
|
||||
});
|
||||
await sleep(1_000);
|
||||
const catalog = readQaBootstrapScenarioCatalog();
|
||||
const requestedScenarioIds =
|
||||
params?.scenarioIds && params.scenarioIds.length > 0 ? new Set(params.scenarioIds) : null;
|
||||
const selectedCatalogScenarios = requestedScenarioIds
|
||||
? catalog.scenarios.filter((scenario) => requestedScenarioIds.has(scenario.id))
|
||||
: catalog.scenarios;
|
||||
if (requestedScenarioIds) {
|
||||
const foundScenarioIds = new Set(selectedCatalogScenarios.map((scenario) => scenario.id));
|
||||
const missingScenarioIds = [...requestedScenarioIds].filter(
|
||||
(scenarioId) => !foundScenarioIds.has(scenarioId),
|
||||
);
|
||||
if (missingScenarioIds.length > 0) {
|
||||
throw new Error(`unknown QA scenario id(s): ${missingScenarioIds.join(", ")}`);
|
||||
}
|
||||
}
|
||||
const scenarios: QaSuiteScenarioResult[] = [];
|
||||
const liveScenarioOutcomes: QaLabScenarioOutcome[] = selectedCatalogScenarios.map(
|
||||
(scenario) => ({
|
||||
@@ -1339,43 +1574,17 @@ export async function runQaSuite(params?: QaSuiteRunParams) {
|
||||
finishedAt: finishedAt.toISOString(),
|
||||
scenarios: [...liveScenarioOutcomes],
|
||||
});
|
||||
const report = renderQaMarkdownReport({
|
||||
title: "OpenClaw QA Scenario Suite",
|
||||
const { report, reportPath, summaryPath } = await writeQaSuiteArtifacts({
|
||||
outputDir,
|
||||
startedAt,
|
||||
finishedAt,
|
||||
checks: [],
|
||||
scenarios: scenarios.map((scenario) => ({
|
||||
name: scenario.name,
|
||||
status: scenario.status,
|
||||
details: scenario.details,
|
||||
steps: scenario.steps,
|
||||
})) satisfies QaReportScenario[],
|
||||
notes: [
|
||||
providerMode === "mock-openai"
|
||||
? "Runs against qa-channel + qa-lab bus + real gateway child + mock OpenAI provider."
|
||||
: `Runs against qa-channel + qa-lab bus + real gateway child + live frontier models (${primaryModel}, ${alternateModel})${fastMode ? " with fast mode enabled" : ""}.`,
|
||||
"Cron uses a one-minute schedule assertion plus forced execution for fast verification.",
|
||||
],
|
||||
scenarios,
|
||||
providerMode,
|
||||
primaryModel,
|
||||
alternateModel,
|
||||
fastMode,
|
||||
concurrency,
|
||||
});
|
||||
const reportPath = path.join(outputDir, "qa-suite-report.md");
|
||||
const summaryPath = path.join(outputDir, "qa-suite-summary.json");
|
||||
await fs.writeFile(reportPath, report, "utf8");
|
||||
await fs.writeFile(
|
||||
summaryPath,
|
||||
`${JSON.stringify(
|
||||
{
|
||||
scenarios,
|
||||
counts: {
|
||||
total: scenarios.length,
|
||||
passed: scenarios.filter((scenario) => scenario.status === "pass").length,
|
||||
failed: scenarios.filter((scenario) => scenario.status === "fail").length,
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
const latestReport = {
|
||||
outputPath: reportPath,
|
||||
markdown: report,
|
||||
|
||||
Reference in New Issue
Block a user