mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:40:44 +00:00
qa-lab: make live lanes CI-ready for v1 E2E automation (#69122)
* qa-lab: harden CI defaults and failure semantics for live lanes * qa-lab: add unit tests for suite progress logging defaults * qa-lab: cover malformed multipass summary edge cases * qa-lab: share suite summary failure counting helper * qa-lab: test allow-failures parse wiring and sanitize progress ids * fix: note qa CI live-lane defaults in changelog (#69122) (thanks @joshavant)
This commit is contained in:
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Plugins/tasks: add a detached runtime registration contract so plugin executors can own detached task lifecycle and cancellation without reaching into core task internals. (#68915) Thanks @mbelinky.
|
||||
- Terminal/logging: optimize `sanitizeForLog()` by replacing the iterative control-character stripping loop with a single regex pass while preserving the existing ANSI-first sanitization behavior. (#67205) Thanks @bulutmuf.
|
||||
- QA/CI: make `openclaw qa suite` and `openclaw qa telegram` fail by default when scenarios fail, add `--allow-failures` for artifact-only runs, and tighten live-lane defaults for CI automation. (#69122) Thanks @joshavant.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@@ -80,6 +80,8 @@ disposable server. It requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`,
|
||||
private group. The SUT bot must have a Telegram username, and bot-to-bot
|
||||
observation works best when both bots have Bot-to-Bot Communication Mode
|
||||
enabled in `@BotFather`.
|
||||
The command exits non-zero when any scenario fails. Use `--allow-failures` when
|
||||
you want artifacts without a failing exit code.
|
||||
|
||||
Live transport lanes now share one smaller contract instead of each inventing
|
||||
their own scenario list shape:
|
||||
@@ -107,9 +109,11 @@ inside the guest, runs `qa suite`, then copies the normal QA report and
|
||||
summary back into `.artifacts/qa-e2e/...` on the host.
|
||||
It reuses the same scenario-selection behavior as `qa suite` on the host.
|
||||
Host and Multipass suite runs execute multiple selected scenarios in parallel
|
||||
with isolated gateway workers by default, up to 64 workers or the selected
|
||||
scenario count. Use `--concurrency <count>` to tune the worker count, or
|
||||
`--concurrency 1` for serial execution.
|
||||
with isolated gateway workers by default. `qa-channel` defaults to concurrency
|
||||
4, capped by the selected scenario count. Use `--concurrency <count>` to tune
|
||||
the worker count, or `--concurrency 1` for serial execution.
|
||||
The command exits non-zero when any scenario fails. Use `--allow-failures` when
|
||||
you want artifacts without a failing exit code.
|
||||
Live runs forward the supported QA auth inputs that are practical for the
|
||||
guest: env-based provider keys, the QA live provider config path, and
|
||||
`CODEX_HOME` when present. Keep `--output-dir` under the repo root so the guest
|
||||
|
||||
@@ -49,9 +49,11 @@ These commands sit beside the main test suites when you need QA-lab realism:
|
||||
- `pnpm openclaw qa suite`
|
||||
- Runs repo-backed QA scenarios directly on the host.
|
||||
- Runs multiple selected scenarios in parallel by default with isolated
|
||||
gateway workers, up to 64 workers or the selected scenario count. Use
|
||||
`--concurrency <count>` to tune the worker count, or `--concurrency 1` for
|
||||
the older serial lane.
|
||||
gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the
|
||||
selected scenario count). Use `--concurrency <count>` to tune the worker
|
||||
count, or `--concurrency 1` for the older serial lane.
|
||||
- Exits non-zero when any scenario fails. Use `--allow-failures` when you
|
||||
want artifacts without a failing exit code.
|
||||
- Supports provider modes `live-frontier`, `mock-openai`, and `aimock`.
|
||||
`aimock` starts a local AIMock-backed provider server for experimental
|
||||
fixture and protocol-mock coverage without replacing the scenario-aware
|
||||
@@ -86,6 +88,8 @@ These commands sit beside the main test suites when you need QA-lab realism:
|
||||
- Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
|
||||
- Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id.
|
||||
- Supports `--credential-source convex` for shared pooled credentials. Use env mode by default, or set `OPENCLAW_QA_CREDENTIAL_SOURCE=convex` to opt into pooled leases.
|
||||
- Exits non-zero when any scenario fails. Use `--allow-failures` when you
|
||||
want artifacts without a failing exit code.
|
||||
- Requires two distinct bots in the same private group, with the SUT bot exposing a Telegram username.
|
||||
- For stable bot-to-bot observation, enable Bot-to-Bot Communication Mode in `@BotFather` for both bots and ensure the driver bot can observe group bot traffic.
|
||||
- Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`.
|
||||
@@ -118,7 +122,7 @@ Required env vars:
|
||||
- `OPENCLAW_QA_CONVEX_SECRET_CI` for `ci`
|
||||
- Credential role selection:
|
||||
- CLI: `--credential-role maintainer|ci`
|
||||
- Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `maintainer`)
|
||||
- Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `ci` in CI, `maintainer` otherwise)
|
||||
|
||||
Optional env vars:
|
||||
|
||||
|
||||
@@ -103,6 +103,7 @@ describe("qa cli runtime", () => {
|
||||
watchUrl: "http://127.0.0.1:43124",
|
||||
reportPath: "/tmp/report.md",
|
||||
summaryPath: "/tmp/summary.json",
|
||||
scenarios: [],
|
||||
});
|
||||
runQaCharacterEval.mockResolvedValue({
|
||||
reportPath: "/tmp/character-report.md",
|
||||
@@ -199,6 +200,7 @@ describe("qa cli runtime", () => {
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
fastMode: true,
|
||||
allowFailures: undefined,
|
||||
scenarioIds: ["telegram-help-command"],
|
||||
sutAccountId: "sut-live",
|
||||
});
|
||||
@@ -223,10 +225,68 @@ describe("qa cli runtime", () => {
|
||||
expect.objectContaining({
|
||||
repoRoot: path.resolve("/tmp/openclaw-repo"),
|
||||
providerMode: "live-frontier",
|
||||
allowFailures: undefined,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("sets a failing exit code when telegram scenarios fail", async () => {
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
runTelegramQaLive.mockResolvedValueOnce({
|
||||
outputDir: "/tmp/telegram",
|
||||
reportPath: "/tmp/telegram/report.md",
|
||||
summaryPath: "/tmp/telegram/summary.json",
|
||||
observedMessagesPath: "/tmp/telegram/observed.json",
|
||||
scenarios: [
|
||||
{
|
||||
id: "telegram-help-command",
|
||||
title: "Telegram help command reply",
|
||||
status: "fail",
|
||||
details: "missing expected text",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await runQaTelegramCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
});
|
||||
expect(process.exitCode).toBe(1);
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps telegram exit code clear when --allow-failures is set", async () => {
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
runTelegramQaLive.mockResolvedValueOnce({
|
||||
outputDir: "/tmp/telegram",
|
||||
reportPath: "/tmp/telegram/report.md",
|
||||
summaryPath: "/tmp/telegram/summary.json",
|
||||
observedMessagesPath: "/tmp/telegram/observed.json",
|
||||
scenarios: [
|
||||
{
|
||||
id: "telegram-help-command",
|
||||
title: "Telegram help command reply",
|
||||
status: "fail",
|
||||
details: "missing expected text",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await runQaTelegramCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
allowFailures: true,
|
||||
});
|
||||
expect(process.exitCode).toBeUndefined();
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
}
|
||||
});
|
||||
|
||||
it("passes host suite concurrency through", async () => {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
@@ -244,6 +304,59 @@ describe("qa cli runtime", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("sets a failing exit code when host suite scenarios fail", async () => {
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
runQaSuiteFromRuntime.mockResolvedValueOnce({
|
||||
watchUrl: "http://127.0.0.1:43124",
|
||||
reportPath: "/tmp/report.md",
|
||||
summaryPath: "/tmp/summary.json",
|
||||
scenarios: [
|
||||
{
|
||||
name: "channel chat baseline",
|
||||
status: "fail",
|
||||
steps: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
});
|
||||
expect(process.exitCode).toBe(1);
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps host suite exit code clear when --allow-failures is set", async () => {
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
runQaSuiteFromRuntime.mockResolvedValueOnce({
|
||||
watchUrl: "http://127.0.0.1:43124",
|
||||
reportPath: "/tmp/report.md",
|
||||
summaryPath: "/tmp/summary.json",
|
||||
scenarios: [
|
||||
{
|
||||
name: "channel chat baseline",
|
||||
status: "fail",
|
||||
steps: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
allowFailures: true,
|
||||
});
|
||||
expect(process.exitCode).toBeUndefined();
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
}
|
||||
});
|
||||
|
||||
it("passes host suite CLI auth mode through", async () => {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
@@ -475,6 +588,7 @@ describe("qa cli runtime", () => {
|
||||
runner: "multipass",
|
||||
providerMode: "mock-openai",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
allowFailures: true,
|
||||
concurrency: 3,
|
||||
image: "lts",
|
||||
cpus: 2,
|
||||
@@ -490,6 +604,7 @@ describe("qa cli runtime", () => {
|
||||
primaryModel: undefined,
|
||||
alternateModel: undefined,
|
||||
fastMode: undefined,
|
||||
allowFailures: true,
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
concurrency: 3,
|
||||
image: "lts",
|
||||
@@ -508,6 +623,7 @@ describe("qa cli runtime", () => {
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
fastMode: true,
|
||||
allowFailures: true,
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
@@ -519,11 +635,171 @@ describe("qa cli runtime", () => {
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
fastMode: true,
|
||||
allowFailures: true,
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("sets a failing exit code when multipass summary reports failed scenarios", async () => {
|
||||
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
|
||||
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
|
||||
await fs.writeFile(
|
||||
summaryPath,
|
||||
JSON.stringify({
|
||||
counts: {
|
||||
total: 2,
|
||||
passed: 1,
|
||||
failed: 1,
|
||||
},
|
||||
}),
|
||||
"utf8",
|
||||
);
|
||||
runQaMultipass.mockResolvedValueOnce({
|
||||
outputDir: repoRoot,
|
||||
reportPath: path.join(repoRoot, "qa-suite-report.md"),
|
||||
summaryPath,
|
||||
hostLogPath: path.join(repoRoot, "multipass-host.log"),
|
||||
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
|
||||
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
|
||||
vmName: "openclaw-qa-test",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
|
||||
try {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
runner: "multipass",
|
||||
});
|
||||
expect(process.exitCode).toBe(1);
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
await fs.rm(repoRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects malformed multipass summary JSON", async () => {
|
||||
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
|
||||
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
|
||||
await fs.writeFile(summaryPath, "{not-json", "utf8");
|
||||
runQaMultipass.mockResolvedValueOnce({
|
||||
outputDir: repoRoot,
|
||||
reportPath: path.join(repoRoot, "qa-suite-report.md"),
|
||||
summaryPath,
|
||||
hostLogPath: path.join(repoRoot, "multipass-host.log"),
|
||||
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
|
||||
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
|
||||
vmName: "openclaw-qa-test",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
try {
|
||||
await expect(
|
||||
runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
runner: "multipass",
|
||||
}),
|
||||
).rejects.toThrow("Could not parse QA summary JSON");
|
||||
} finally {
|
||||
await fs.rm(repoRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects unreadable multipass summary JSON with read/parse wording", async () => {
|
||||
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
|
||||
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
|
||||
runQaMultipass.mockResolvedValueOnce({
|
||||
outputDir: repoRoot,
|
||||
reportPath: path.join(repoRoot, "qa-suite-report.md"),
|
||||
summaryPath,
|
||||
hostLogPath: path.join(repoRoot, "multipass-host.log"),
|
||||
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
|
||||
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
|
||||
vmName: "openclaw-qa-test",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
try {
|
||||
await expect(
|
||||
runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
runner: "multipass",
|
||||
}),
|
||||
).rejects.toThrow("Could not read QA summary JSON");
|
||||
} finally {
|
||||
await fs.rm(repoRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects partial multipass summary JSON without failure fields", async () => {
|
||||
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
|
||||
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
|
||||
await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2, passed: 2 } }), "utf8");
|
||||
runQaMultipass.mockResolvedValueOnce({
|
||||
outputDir: repoRoot,
|
||||
reportPath: path.join(repoRoot, "qa-suite-report.md"),
|
||||
summaryPath,
|
||||
hostLogPath: path.join(repoRoot, "multipass-host.log"),
|
||||
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
|
||||
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
|
||||
vmName: "openclaw-qa-test",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
try {
|
||||
await expect(
|
||||
runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
runner: "multipass",
|
||||
}),
|
||||
).rejects.toThrow("did not include counts.failed or scenarios[].status");
|
||||
} finally {
|
||||
await fs.rm(repoRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps multipass exit code clear when --allow-failures is set", async () => {
|
||||
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
|
||||
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
|
||||
await fs.writeFile(
|
||||
summaryPath,
|
||||
JSON.stringify({
|
||||
counts: {
|
||||
total: 2,
|
||||
passed: 1,
|
||||
failed: 1,
|
||||
},
|
||||
}),
|
||||
"utf8",
|
||||
);
|
||||
runQaMultipass.mockResolvedValueOnce({
|
||||
outputDir: repoRoot,
|
||||
reportPath: path.join(repoRoot, "qa-suite-report.md"),
|
||||
summaryPath,
|
||||
hostLogPath: path.join(repoRoot, "multipass-host.log"),
|
||||
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
|
||||
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
|
||||
vmName: "openclaw-qa-test",
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
const priorExitCode = process.exitCode;
|
||||
process.exitCode = undefined;
|
||||
|
||||
try {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
runner: "multipass",
|
||||
allowFailures: true,
|
||||
});
|
||||
expect(process.exitCode).toBeUndefined();
|
||||
} finally {
|
||||
process.exitCode = priorExitCode;
|
||||
await fs.rm(repoRoot, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("passes provider-qualified mock parity suite selection through to the host runner", async () => {
|
||||
await runQaSuiteCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
|
||||
@@ -39,6 +39,7 @@ import {
|
||||
} from "./run-config.js";
|
||||
import { readQaScenarioPack } from "./scenario-catalog.js";
|
||||
import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
|
||||
import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";
|
||||
|
||||
type InterruptibleServer = {
|
||||
baseUrl: string;
|
||||
@@ -121,6 +122,34 @@ function parseQaPositiveIntegerOption(label: string, value: number | undefined)
|
||||
return Math.floor(value);
|
||||
}
|
||||
|
||||
async function readQaFailedScenarioCountFromSummary(summaryPath: string) {
|
||||
let summaryText: string;
|
||||
try {
|
||||
summaryText = await fs.readFile(summaryPath, "utf8");
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Could not read QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
|
||||
{ cause: error },
|
||||
);
|
||||
}
|
||||
let payload: unknown;
|
||||
try {
|
||||
payload = JSON.parse(summaryText) as unknown;
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Could not parse QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
|
||||
{ cause: error },
|
||||
);
|
||||
}
|
||||
const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(payload);
|
||||
if (failedScenarioCount !== null) {
|
||||
return failedScenarioCount;
|
||||
}
|
||||
throw new Error(
|
||||
`QA summary at ${summaryPath} did not include counts.failed or scenarios[].status.`,
|
||||
);
|
||||
}
|
||||
|
||||
function parseQaCliBackendAuthMode(value: string | undefined): QaCliBackendAuthMode | undefined {
|
||||
const normalized = value?.trim().toLowerCase();
|
||||
if (!normalized) {
|
||||
@@ -329,6 +358,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
parityPack?: string;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
allowFailures?: boolean;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
memory?: string;
|
||||
@@ -341,6 +371,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
parityPack: opts.parityPack,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
});
|
||||
const allowFailures = opts.allowFailures === true;
|
||||
if (runner !== "host" && runner !== "multipass") {
|
||||
throw new Error(`--runner must be one of host or multipass, got "${opts.runner}".`);
|
||||
}
|
||||
@@ -367,6 +398,7 @@ export async function runQaSuiteCommand(opts: {
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
fastMode: opts.fastMode,
|
||||
allowFailures: true,
|
||||
scenarioIds,
|
||||
...(opts.concurrency !== undefined
|
||||
? { concurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency) }
|
||||
@@ -381,6 +413,12 @@ export async function runQaSuiteCommand(opts: {
|
||||
process.stdout.write(`QA Multipass summary: ${result.summaryPath}\n`);
|
||||
process.stdout.write(`QA Multipass host log: ${result.hostLogPath}\n`);
|
||||
process.stdout.write(`QA Multipass bootstrap log: ${result.bootstrapLogPath}\n`);
|
||||
if (!allowFailures) {
|
||||
const failedScenarioCount = await readQaFailedScenarioCountFromSummary(result.summaryPath);
|
||||
if (failedScenarioCount > 0) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
const result = await runQaSuiteFromRuntime({
|
||||
@@ -400,6 +438,10 @@ export async function runQaSuiteCommand(opts: {
|
||||
process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
|
||||
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
|
||||
const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(result);
|
||||
if (!allowFailures && failedScenarioCount !== null && failedScenarioCount > 0) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runQaParityReportCommand(opts: {
|
||||
|
||||
@@ -46,6 +46,7 @@ const {
|
||||
runQaCredentialsRemoveCommand,
|
||||
runQaCoverageReportCommand,
|
||||
runQaProviderServerCommand,
|
||||
runQaSuiteCommand,
|
||||
runQaTelegramCommand,
|
||||
} = vi.hoisted(() => ({
|
||||
runQaCredentialsAddCommand: vi.fn(),
|
||||
@@ -53,6 +54,7 @@ const {
|
||||
runQaCredentialsRemoveCommand: vi.fn(),
|
||||
runQaCoverageReportCommand: vi.fn(),
|
||||
runQaProviderServerCommand: vi.fn(),
|
||||
runQaSuiteCommand: vi.fn(),
|
||||
runQaTelegramCommand: vi.fn(),
|
||||
}));
|
||||
|
||||
@@ -76,6 +78,7 @@ vi.mock("./cli.runtime.js", () => ({
|
||||
runQaCredentialsRemoveCommand,
|
||||
runQaCoverageReportCommand,
|
||||
runQaProviderServerCommand,
|
||||
runQaSuiteCommand,
|
||||
}));
|
||||
|
||||
import { registerQaLabCli } from "./cli.js";
|
||||
@@ -90,6 +93,7 @@ describe("qa cli registration", () => {
|
||||
runQaCredentialsRemoveCommand.mockReset();
|
||||
runQaCoverageReportCommand.mockReset();
|
||||
runQaProviderServerCommand.mockReset();
|
||||
runQaSuiteCommand.mockReset();
|
||||
runQaTelegramCommand.mockReset();
|
||||
listQaRunnerCliContributions
|
||||
.mockReset()
|
||||
@@ -188,6 +192,7 @@ describe("qa cli registration", () => {
|
||||
primaryModel: undefined,
|
||||
alternateModel: undefined,
|
||||
fastMode: false,
|
||||
allowFailures: false,
|
||||
scenarioIds: [],
|
||||
sutAccountId: "sut",
|
||||
credentialSource: undefined,
|
||||
@@ -195,6 +200,26 @@ describe("qa cli registration", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("forwards --allow-failures for telegram runs", async () => {
|
||||
await program.parseAsync(["node", "openclaw", "qa", "telegram", "--allow-failures"]);
|
||||
|
||||
expect(runQaTelegramCommand).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
allowFailures: true,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards --allow-failures for suite runs", async () => {
|
||||
await program.parseAsync(["node", "openclaw", "qa", "suite", "--allow-failures"]);
|
||||
|
||||
expect(runQaSuiteCommand).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
allowFailures: true,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("routes credential add flags into the qa runtime command", async () => {
|
||||
await program.parseAsync([
|
||||
"node",
|
||||
|
||||
@@ -35,6 +35,7 @@ async function runQaSuite(opts: {
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
allowFailures?: boolean;
|
||||
cliAuthMode?: string;
|
||||
parityPack?: string;
|
||||
scenarioIds?: string[];
|
||||
@@ -238,6 +239,11 @@ export function registerQaLabCli(program: Command) {
|
||||
.option("--concurrency <count>", "Scenario worker concurrency", (value: string) =>
|
||||
Number(value),
|
||||
)
|
||||
.option(
|
||||
"--allow-failures",
|
||||
"Write artifacts without setting a failing exit code when scenarios fail",
|
||||
false,
|
||||
)
|
||||
.option("--fast", "Enable provider fast mode where supported", false)
|
||||
.option("--image <alias>", "Multipass image alias")
|
||||
.option("--cpus <count>", "Multipass vCPU count", (value: string) => Number(value))
|
||||
@@ -256,6 +262,7 @@ export function registerQaLabCli(program: Command) {
|
||||
parityPack?: string;
|
||||
scenario?: string[];
|
||||
concurrency?: number;
|
||||
allowFailures?: boolean;
|
||||
fast?: boolean;
|
||||
image?: string;
|
||||
cpus?: number;
|
||||
@@ -275,6 +282,7 @@ export function registerQaLabCli(program: Command) {
|
||||
parityPack: opts.parityPack,
|
||||
scenarioIds: opts.scenario,
|
||||
concurrency: opts.concurrency,
|
||||
allowFailures: opts.allowFailures,
|
||||
image: opts.image,
|
||||
cpus: opts.cpus,
|
||||
memory: opts.memory,
|
||||
|
||||
@@ -80,6 +80,65 @@ describe("credential lease runtime", () => {
|
||||
expect(headers.authorization).toBe("Bearer maintainer-secret");
|
||||
});
|
||||
|
||||
it("defaults convex credential role to maintainer outside CI", async () => {
|
||||
const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
|
||||
jsonResponse({
|
||||
status: "ok",
|
||||
credentialId: "cred-maintainer-default",
|
||||
leaseToken: "lease-maintainer-default",
|
||||
payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
|
||||
}),
|
||||
);
|
||||
|
||||
await acquireQaCredentialLease({
|
||||
kind: "telegram",
|
||||
source: "convex",
|
||||
env: {
|
||||
OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
|
||||
OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "maintainer-secret",
|
||||
},
|
||||
fetchImpl,
|
||||
resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
|
||||
parsePayload: (payload) =>
|
||||
payload as { groupId: string; driverToken: string; sutToken: string },
|
||||
});
|
||||
|
||||
const firstCall = fetchImpl.mock.calls[0];
|
||||
const firstInit = firstCall?.[1];
|
||||
const headers = firstInit?.headers as Record<string, string>;
|
||||
expect(headers.authorization).toBe("Bearer maintainer-secret");
|
||||
});
|
||||
|
||||
it("defaults convex credential role to ci when CI=true", async () => {
|
||||
const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
|
||||
jsonResponse({
|
||||
status: "ok",
|
||||
credentialId: "cred-ci-default",
|
||||
leaseToken: "lease-ci-default",
|
||||
payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
|
||||
}),
|
||||
);
|
||||
|
||||
await acquireQaCredentialLease({
|
||||
kind: "telegram",
|
||||
source: "convex",
|
||||
env: {
|
||||
CI: "true",
|
||||
OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
|
||||
OPENCLAW_QA_CONVEX_SECRET_CI: "ci-secret",
|
||||
},
|
||||
fetchImpl,
|
||||
resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
|
||||
parsePayload: (payload) =>
|
||||
payload as { groupId: string; driverToken: string; sutToken: string },
|
||||
});
|
||||
|
||||
const firstCall = fetchImpl.mock.calls[0];
|
||||
const firstInit = firstCall?.[1];
|
||||
const headers = firstInit?.headers as Record<string, string>;
|
||||
expect(headers.authorization).toBe("Bearer ci-secret");
|
||||
});
|
||||
|
||||
it("retries convex acquire while the pool is exhausted", async () => {
|
||||
const fetchImpl = vi
|
||||
.fn<typeof fetch>()
|
||||
|
||||
@@ -114,8 +114,12 @@ function normalizeQaCredentialSource(value: string | undefined): QaCredentialLea
|
||||
throw new Error(`Credential source must be one of env or convex, got "${value}".`);
|
||||
}
|
||||
|
||||
function normalizeQaCredentialRole(value: string | undefined): QaCredentialRole {
|
||||
const normalized = value?.trim().toLowerCase() || "maintainer";
|
||||
function normalizeQaCredentialRole(
|
||||
value: string | undefined,
|
||||
env: NodeJS.ProcessEnv = process.env,
|
||||
): QaCredentialRole {
|
||||
const defaultRole = isTruthyOptIn(env.CI) ? "ci" : "maintainer";
|
||||
const normalized = value?.trim().toLowerCase() || defaultRole;
|
||||
if (normalized === "maintainer" || normalized === "ci") {
|
||||
return normalized;
|
||||
}
|
||||
@@ -350,7 +354,7 @@ export async function acquireQaCredentialLease<TPayload>(
|
||||
};
|
||||
}
|
||||
|
||||
const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE);
|
||||
const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE, env);
|
||||
const config = resolveConvexCredentialBrokerConfig({
|
||||
env,
|
||||
role,
|
||||
|
||||
@@ -24,6 +24,7 @@ export function resolveLiveTransportQaRunOptions(
|
||||
primaryModel: opts.primaryModel,
|
||||
alternateModel: opts.alternateModel,
|
||||
fastMode: opts.fastMode,
|
||||
allowFailures: opts.allowFailures,
|
||||
scenarioIds: opts.scenarioIds,
|
||||
sutAccountId: opts.sutAccountId,
|
||||
credentialSource: opts.credentialSource?.trim(),
|
||||
|
||||
@@ -10,6 +10,7 @@ export type LiveTransportQaCommandOptions = {
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
allowFailures?: boolean;
|
||||
scenarioIds?: string[];
|
||||
sutAccountId?: string;
|
||||
credentialSource?: string;
|
||||
@@ -24,6 +25,7 @@ type LiveTransportQaCommanderOptions = {
|
||||
altModel?: string;
|
||||
scenario?: string[];
|
||||
fast?: boolean;
|
||||
allowFailures?: boolean;
|
||||
sutAccount?: string;
|
||||
credentialSource?: string;
|
||||
credentialRole?: string;
|
||||
@@ -57,6 +59,7 @@ export function mapLiveTransportQaCommanderOptions(
|
||||
primaryModel: opts.model,
|
||||
alternateModel: opts.altModel,
|
||||
fastMode: opts.fast,
|
||||
allowFailures: opts.allowFailures,
|
||||
scenarioIds: opts.scenario,
|
||||
sutAccountId: opts.sutAccount,
|
||||
credentialSource: opts.credentialSource,
|
||||
@@ -84,6 +87,11 @@ export function registerLiveTransportQaCli(params: {
|
||||
.option("--alt-model <ref>", "Alternate provider/model ref")
|
||||
.option("--scenario <id>", params.scenarioHelp, collectString, [])
|
||||
.option("--fast", "Enable provider fast mode where supported", false)
|
||||
.option(
|
||||
"--allow-failures",
|
||||
"Write artifacts without setting a failing exit code when scenarios fail",
|
||||
false,
|
||||
)
|
||||
.option("--sut-account <id>", params.sutAccountHelp, "sut");
|
||||
|
||||
if (params.credentialOptions) {
|
||||
|
||||
@@ -6,10 +6,17 @@ import {
|
||||
import { runTelegramQaLive } from "./telegram-live.runtime.js";
|
||||
|
||||
export async function runQaTelegramCommand(opts: LiveTransportQaCommandOptions) {
|
||||
const result = await runTelegramQaLive(resolveLiveTransportQaRunOptions(opts));
|
||||
const runOptions = resolveLiveTransportQaRunOptions(opts);
|
||||
const result = await runTelegramQaLive(runOptions);
|
||||
printLiveTransportQaArtifacts("Telegram QA", {
|
||||
report: result.reportPath,
|
||||
summary: result.summaryPath,
|
||||
"observed messages": result.observedMessagesPath,
|
||||
});
|
||||
if (
|
||||
!runOptions.allowFailures &&
|
||||
result.scenarios.some((scenario) => scenario.status === "fail")
|
||||
) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,8 @@ export const telegramQaCliRegistration: LiveTransportQaCliRegistration =
|
||||
commandName: "telegram",
|
||||
credentialOptions: {
|
||||
sourceDescription: "Credential source for Telegram QA: env or convex (default: env)",
|
||||
roleDescription: "Credential role for convex auth: maintainer or ci (default: maintainer)",
|
||||
roleDescription:
|
||||
"Credential role for convex auth: maintainer or ci (default: ci in CI, maintainer otherwise)",
|
||||
},
|
||||
description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness",
|
||||
outputDirHelp: "Telegram QA artifact directory",
|
||||
|
||||
@@ -140,6 +140,17 @@ describe("qa multipass runtime", () => {
|
||||
expect(script).toContain("'--provider-mode' 'live-frontier'");
|
||||
});
|
||||
|
||||
it("forwards --allow-failures into the guest qa suite command when requested", () => {
|
||||
const plan = createQaMultipassPlan({
|
||||
repoRoot: process.cwd(),
|
||||
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-allow-failures-test"),
|
||||
allowFailures: true,
|
||||
scenarioIds: ["channel-chat-baseline"],
|
||||
});
|
||||
|
||||
expect(plan.qaCommand).toEqual(expect.arrayContaining(["--allow-failures"]));
|
||||
});
|
||||
|
||||
it("redacts forwarded live secrets in the persisted artifact script", () => {
|
||||
vi.stubEnv("OPENAI_API_KEY", "test-openai-key");
|
||||
const plan = createQaMultipassPlan({
|
||||
|
||||
@@ -237,6 +237,7 @@ export function createQaMultipassPlan(params: {
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
allowFailures?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
image?: string;
|
||||
@@ -275,6 +276,7 @@ export function createQaMultipassPlan(params: {
|
||||
...(params.primaryModel ? ["--model", params.primaryModel] : []),
|
||||
...(params.alternateModel ? ["--alt-model", params.alternateModel] : []),
|
||||
...(params.fastMode ? ["--fast"] : []),
|
||||
...(params.allowFailures ? ["--allow-failures"] : []),
|
||||
...(params.concurrency ? ["--concurrency", String(params.concurrency)] : []),
|
||||
],
|
||||
scenarioIds,
|
||||
@@ -544,6 +546,7 @@ export async function runQaMultipass(params: {
|
||||
primaryModel?: string;
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
allowFailures?: boolean;
|
||||
scenarioIds?: string[];
|
||||
concurrency?: number;
|
||||
image?: string;
|
||||
|
||||
36
extensions/qa-lab/src/suite-summary.test.ts
Normal file
36
extensions/qa-lab/src/suite-summary.test.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
countQaSuiteFailedScenarios,
|
||||
readQaSuiteFailedScenarioCountFromSummary,
|
||||
} from "./suite-summary.js";
|
||||
|
||||
describe("qa suite summary helpers", () => {
|
||||
it("counts failed scenarios from scenario statuses", () => {
|
||||
expect(
|
||||
countQaSuiteFailedScenarios([{ status: "pass" }, { status: "fail" }, { status: "fail" }]),
|
||||
).toBe(2);
|
||||
});
|
||||
|
||||
it("prefers counts.failed when available", () => {
|
||||
expect(
|
||||
readQaSuiteFailedScenarioCountFromSummary({
|
||||
counts: { failed: 3.8 },
|
||||
scenarios: [{ status: "pass" }, { status: "fail" }],
|
||||
}),
|
||||
).toBe(3);
|
||||
});
|
||||
|
||||
it("falls back to scenario statuses when counts.failed is missing", () => {
|
||||
expect(
|
||||
readQaSuiteFailedScenarioCountFromSummary({
|
||||
counts: { total: 2 },
|
||||
scenarios: [{ status: "pass" }, { status: "fail" }],
|
||||
}),
|
||||
).toBe(1);
|
||||
});
|
||||
|
||||
it("returns null for unsupported summary shapes", () => {
|
||||
expect(readQaSuiteFailedScenarioCountFromSummary({ counts: { total: 2 } })).toBeNull();
|
||||
expect(readQaSuiteFailedScenarioCountFromSummary("not-json-object")).toBeNull();
|
||||
});
|
||||
});
|
||||
64
extensions/qa-lab/src/suite-summary.ts
Normal file
64
extensions/qa-lab/src/suite-summary.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import type { QaProviderMode } from "./model-selection.js";
|
||||
|
||||
export type QaSuiteSummaryScenario = {
|
||||
name: string;
|
||||
status: "pass" | "fail";
|
||||
steps: unknown[];
|
||||
details?: string;
|
||||
};
|
||||
|
||||
export type QaSuiteSummaryJson = {
|
||||
scenarios: QaSuiteSummaryScenario[];
|
||||
counts: {
|
||||
total: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
};
|
||||
run: {
|
||||
startedAt: string;
|
||||
finishedAt: string;
|
||||
providerMode: QaProviderMode;
|
||||
primaryModel: string;
|
||||
primaryProvider: string | null;
|
||||
primaryModelName: string | null;
|
||||
alternateModel: string;
|
||||
alternateProvider: string | null;
|
||||
alternateModelName: string | null;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
scenarioIds: string[] | null;
|
||||
};
|
||||
};
|
||||
|
||||
type QaSuiteScenarioStatus = Pick<QaSuiteSummaryScenario, "status">;
|
||||
|
||||
export function countQaSuiteFailedScenarios(
|
||||
scenarios: ReadonlyArray<QaSuiteScenarioStatus>,
|
||||
): number {
|
||||
let failed = 0;
|
||||
for (const scenario of scenarios) {
|
||||
if (scenario.status === "fail") {
|
||||
failed += 1;
|
||||
}
|
||||
}
|
||||
return failed;
|
||||
}
|
||||
|
||||
export function readQaSuiteFailedScenarioCountFromSummary(summary: unknown): number | null {
|
||||
if (!summary || typeof summary !== "object") {
|
||||
return null;
|
||||
}
|
||||
const payload = summary as {
|
||||
counts?: {
|
||||
failed?: unknown;
|
||||
};
|
||||
scenarios?: Array<QaSuiteScenarioStatus>;
|
||||
};
|
||||
if (typeof payload.counts?.failed === "number" && Number.isFinite(payload.counts.failed)) {
|
||||
return Math.max(0, Math.floor(payload.counts.failed));
|
||||
}
|
||||
if (Array.isArray(payload.scenarios)) {
|
||||
return countQaSuiteFailedScenarios(payload.scenarios);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { runQaSuite } from "./suite.js";
|
||||
import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
|
||||
|
||||
describe("qa suite", () => {
|
||||
it("rejects unsupported transport ids before starting the lab", async () => {
|
||||
@@ -14,4 +14,58 @@ describe("qa suite", () => {
|
||||
|
||||
expect(startLab).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("parses progress env booleans", () => {
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("true")).toBe(true);
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("on")).toBe(true);
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("false")).toBe(false);
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("off")).toBe(false);
|
||||
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("maybe")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("defaults progress logging from CI when no override is set", () => {
|
||||
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "true" })).toBe(true);
|
||||
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "false" })).toBe(false);
|
||||
});
|
||||
|
||||
it("applies OPENCLAW_QA_SUITE_PROGRESS override and falls back on invalid values", () => {
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
|
||||
CI: "false",
|
||||
OPENCLAW_QA_SUITE_PROGRESS: "true",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
|
||||
CI: "true",
|
||||
OPENCLAW_QA_SUITE_PROGRESS: "false",
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
|
||||
CI: "false",
|
||||
OPENCLAW_QA_SUITE_PROGRESS: "on",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
|
||||
CI: "true",
|
||||
OPENCLAW_QA_SUITE_PROGRESS: "off",
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
|
||||
CI: "true",
|
||||
OPENCLAW_QA_SUITE_PROGRESS: "definitely",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("sanitizes scenario ids for progress logs", () => {
|
||||
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario-id")).toBe("scenario-id");
|
||||
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario\nid\tvalue")).toBe(
|
||||
"scenario id value",
|
||||
);
|
||||
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("\u0000\u0001")).toBe("<empty>");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -46,6 +46,7 @@ import {
|
||||
import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js";
|
||||
import { waitForGatewayHealthy, waitForTransportReady } from "./suite-runtime-gateway.js";
|
||||
import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js";
|
||||
import { countQaSuiteFailedScenarios, type QaSuiteSummaryJson } from "./suite-summary.js";
|
||||
import { closeQaWebSessions } from "./web-runtime.js";
|
||||
|
||||
type QaSuiteStep = {
|
||||
@@ -84,6 +85,49 @@ export type QaSuiteRunParams = {
|
||||
controlUiEnabled?: boolean;
|
||||
};
|
||||
|
||||
function parseQaSuiteBooleanEnv(value: string | undefined): boolean | undefined {
|
||||
const normalized = value?.trim().toLowerCase();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if (normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on") {
|
||||
return true;
|
||||
}
|
||||
if (normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off") {
|
||||
return false;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function shouldLogQaSuiteProgress(env: NodeJS.ProcessEnv = process.env) {
|
||||
const override = parseQaSuiteBooleanEnv(env.OPENCLAW_QA_SUITE_PROGRESS);
|
||||
if (override !== undefined) {
|
||||
return override;
|
||||
}
|
||||
return parseQaSuiteBooleanEnv(env.CI) === true;
|
||||
}
|
||||
|
||||
function writeQaSuiteProgress(enabled: boolean, message: string) {
|
||||
if (!enabled) {
|
||||
return;
|
||||
}
|
||||
process.stderr.write(`[qa-suite] ${message}\n`);
|
||||
}
|
||||
|
||||
function sanitizeQaSuiteProgressValue(value: string): string {
|
||||
let normalized = "";
|
||||
for (const char of value) {
|
||||
const code = char.codePointAt(0);
|
||||
if (code === undefined) {
|
||||
continue;
|
||||
}
|
||||
const isControl = code <= 0x1f || (code >= 0x7f && code <= 0x9f);
|
||||
normalized += isControl ? " " : char;
|
||||
}
|
||||
normalized = normalized.replace(/\s+/gu, " ").trim();
|
||||
return normalized.length > 0 ? normalized : "<empty>";
|
||||
}
|
||||
|
||||
function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuiteStartLabFn {
|
||||
if (startLab) {
|
||||
return startLab;
|
||||
@@ -223,28 +267,7 @@ export type QaSuiteSummaryJsonParams = {
|
||||
* import this type instead of re-declaring the shape, so changes to the
|
||||
* summary schema propagate through to every consumer at type-check time.
|
||||
*/
|
||||
export type QaSuiteSummaryJson = {
|
||||
scenarios: QaSuiteScenarioResult[];
|
||||
counts: {
|
||||
total: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
};
|
||||
run: {
|
||||
startedAt: string;
|
||||
finishedAt: string;
|
||||
providerMode: QaProviderMode;
|
||||
primaryModel: string;
|
||||
primaryProvider: string | null;
|
||||
primaryModelName: string | null;
|
||||
alternateModel: string;
|
||||
alternateProvider: string | null;
|
||||
alternateModelName: string | null;
|
||||
fastMode: boolean;
|
||||
concurrency: number;
|
||||
scenarioIds: string[] | null;
|
||||
};
|
||||
};
|
||||
export type { QaSuiteSummaryJson } from "./suite-summary.js";
|
||||
|
||||
/**
|
||||
* Pure-ish JSON builder for qa-suite-summary.json. Exported so the GPT-5.4
|
||||
@@ -268,7 +291,7 @@ export function buildQaSuiteSummaryJson(params: QaSuiteSummaryJsonParams): QaSui
|
||||
counts: {
|
||||
total: params.scenarios.length,
|
||||
passed: params.scenarios.filter((scenario) => scenario.status === "pass").length,
|
||||
failed: params.scenarios.filter((scenario) => scenario.status === "fail").length,
|
||||
failed: countQaSuiteFailedScenarios(params.scenarios),
|
||||
},
|
||||
run: {
|
||||
startedAt: params.startedAt.toISOString(),
|
||||
@@ -359,6 +382,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
selectedCatalogScenarios.length,
|
||||
defaultQaSuiteConcurrencyForTransport(transportId),
|
||||
);
|
||||
const progressEnabled = shouldLogQaSuiteProgress();
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`run start: scenarios=${selectedCatalogScenarios.length} concurrency=${concurrency} transport=${transportId}`,
|
||||
);
|
||||
|
||||
if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
|
||||
const ownsLab = !params?.lab;
|
||||
@@ -396,6 +424,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
selectedCatalogScenarios,
|
||||
concurrency,
|
||||
async (scenario, index): Promise<QaSuiteScenarioResult> => {
|
||||
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
|
||||
);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
@@ -447,6 +480,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
updateScenarioRun();
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`scenario ${scenarioResult.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
|
||||
);
|
||||
return scenarioResult;
|
||||
} catch (error) {
|
||||
const details = formatErrorMessage(error);
|
||||
@@ -472,11 +509,16 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
finishedAt: new Date().toISOString(),
|
||||
};
|
||||
updateScenarioRun();
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`scenario fail (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
|
||||
);
|
||||
return scenarioResult;
|
||||
}
|
||||
},
|
||||
);
|
||||
const finishedAt = new Date();
|
||||
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
|
||||
lab.setScenarioRun({
|
||||
kind: "suite",
|
||||
status: "completed",
|
||||
@@ -511,6 +553,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
markdown: report,
|
||||
generatedAt: finishedAt.toISOString(),
|
||||
} satisfies QaLabLatestReport);
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
|
||||
);
|
||||
return {
|
||||
outputDir,
|
||||
reportPath,
|
||||
@@ -607,6 +653,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
});
|
||||
|
||||
for (const [index, scenario] of selectedCatalogScenarios.entries()) {
|
||||
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
|
||||
);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
@@ -622,6 +673,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
|
||||
const result = await runScenarioDefinition(env, scenario);
|
||||
scenarios.push(result);
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`scenario ${result.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
|
||||
);
|
||||
liveScenarioOutcomes[index] = {
|
||||
id: scenario.id,
|
||||
name: scenario.title,
|
||||
@@ -640,6 +695,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
}
|
||||
|
||||
const finishedAt = new Date();
|
||||
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
|
||||
if (scenarios.some((scenario) => scenario.status === "fail")) {
|
||||
preserveGatewayRuntimeDir = path.join(outputDir, "artifacts", "gateway-runtime");
|
||||
}
|
||||
@@ -674,6 +730,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
generatedAt: finishedAt.toISOString(),
|
||||
} satisfies QaLabLatestReport;
|
||||
lab.setLatestReport(latestReport);
|
||||
writeQaSuiteProgress(
|
||||
progressEnabled,
|
||||
`run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
|
||||
);
|
||||
|
||||
return {
|
||||
outputDir,
|
||||
@@ -706,3 +766,9 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const qaSuiteProgressTesting = {
|
||||
parseQaSuiteBooleanEnv,
|
||||
sanitizeQaSuiteProgressValue,
|
||||
shouldLogQaSuiteProgress,
|
||||
};
|
||||
|
||||
19
extensions/qa-matrix/src/run-config.test.ts
Normal file
19
extensions/qa-matrix/src/run-config.test.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { normalizeQaProviderMode } from "./run-config.js";
|
||||
|
||||
describe("matrix qa run config", () => {
|
||||
it("defaults to live-frontier when provider mode is omitted", () => {
|
||||
expect(normalizeQaProviderMode(undefined)).toBe("live-frontier");
|
||||
expect(normalizeQaProviderMode("")).toBe("live-frontier");
|
||||
});
|
||||
|
||||
it("keeps legacy live-openai as an alias for live-frontier", () => {
|
||||
expect(normalizeQaProviderMode("live-openai")).toBe("live-frontier");
|
||||
});
|
||||
|
||||
it("rejects unknown provider modes", () => {
|
||||
expect(() => normalizeQaProviderMode("mystery-mode")).toThrow(
|
||||
"unknown QA provider mode: mystery-mode",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -2,8 +2,15 @@ export type QaProviderMode = "mock-openai" | "live-frontier";
|
||||
export type QaProviderModeInput = QaProviderMode | "live-openai";
|
||||
|
||||
export function normalizeQaProviderMode(input: unknown): QaProviderMode {
|
||||
if (input === undefined || input === null || input === "") {
|
||||
return "live-frontier";
|
||||
}
|
||||
if (input === "mock-openai") {
|
||||
return "mock-openai";
|
||||
}
|
||||
return "live-frontier";
|
||||
if (input === "live-frontier" || input === "live-openai") {
|
||||
return "live-frontier";
|
||||
}
|
||||
const details = typeof input === "string" ? `: ${input}` : "";
|
||||
throw new Error(`unknown QA provider mode${details}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user