qa-lab: make live lanes CI-ready for v1 E2E automation (#69122)

* qa-lab: harden CI defaults and failure semantics for live lanes

* qa-lab: add unit tests for suite progress logging defaults

* qa-lab: cover malformed multipass summary edge cases

* qa-lab: share suite summary failure counting helper

* qa-lab: test allow-failures parse wiring and sanitize progress ids

* fix: note qa CI live-lane defaults in changelog (#69122) (thanks @joshavant)
This commit is contained in:
Josh Avant
2026-04-19 21:13:27 -05:00
committed by GitHub
parent 6159b17cdf
commit d5b326523f
21 changed files with 737 additions and 37 deletions

View File

@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
- Plugins/tasks: add a detached runtime registration contract so plugin executors can own detached task lifecycle and cancellation without reaching into core task internals. (#68915) Thanks @mbelinky.
- Terminal/logging: optimize `sanitizeForLog()` by replacing the iterative control-character stripping loop with a single regex pass while preserving the existing ANSI-first sanitization behavior. (#67205) Thanks @bulutmuf.
- QA/CI: make `openclaw qa suite` and `openclaw qa telegram` fail by default when scenarios fail, add `--allow-failures` for artifact-only runs, and tighten live-lane defaults for CI automation. (#69122) Thanks @joshavant.
### Fixes

View File

@@ -80,6 +80,8 @@ disposable server. It requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`,
private group. The SUT bot must have a Telegram username, and bot-to-bot
observation works best when both bots have Bot-to-Bot Communication Mode
enabled in `@BotFather`.
The command exits non-zero when any scenario fails. Use `--allow-failures` when
you want artifacts without a failing exit code.
Live transport lanes now share one smaller contract instead of each inventing
their own scenario list shape:
@@ -107,9 +109,11 @@ inside the guest, runs `qa suite`, then copies the normal QA report and
summary back into `.artifacts/qa-e2e/...` on the host.
It reuses the same scenario-selection behavior as `qa suite` on the host.
Host and Multipass suite runs execute multiple selected scenarios in parallel
with isolated gateway workers by default, up to 64 workers or the selected
scenario count. Use `--concurrency <count>` to tune the worker count, or
`--concurrency 1` for serial execution.
with isolated gateway workers by default. `qa-channel` defaults to concurrency
4, capped by the selected scenario count. Use `--concurrency <count>` to tune
the worker count, or `--concurrency 1` for serial execution.
The command exits non-zero when any scenario fails. Use `--allow-failures` when
you want artifacts without a failing exit code.
Live runs forward the supported QA auth inputs that are practical for the
guest: env-based provider keys, the QA live provider config path, and
`CODEX_HOME` when present. Keep `--output-dir` under the repo root so the guest

View File

@@ -49,9 +49,11 @@ These commands sit beside the main test suites when you need QA-lab realism:
- `pnpm openclaw qa suite`
- Runs repo-backed QA scenarios directly on the host.
- Runs multiple selected scenarios in parallel by default with isolated
gateway workers, up to 64 workers or the selected scenario count. Use
`--concurrency <count>` to tune the worker count, or `--concurrency 1` for
the older serial lane.
gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the
selected scenario count). Use `--concurrency <count>` to tune the worker
count, or `--concurrency 1` for the older serial lane.
- Exits non-zero when any scenario fails. Use `--allow-failures` when you
want artifacts without a failing exit code.
- Supports provider modes `live-frontier`, `mock-openai`, and `aimock`.
`aimock` starts a local AIMock-backed provider server for experimental
fixture and protocol-mock coverage without replacing the scenario-aware
@@ -86,6 +88,8 @@ These commands sit beside the main test suites when you need QA-lab realism:
- Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env.
- Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id.
- Supports `--credential-source convex` for shared pooled credentials. Use env mode by default, or set `OPENCLAW_QA_CREDENTIAL_SOURCE=convex` to opt into pooled leases.
- Exits non-zero when any scenario fails. Use `--allow-failures` when you
want artifacts without a failing exit code.
- Requires two distinct bots in the same private group, with the SUT bot exposing a Telegram username.
- For stable bot-to-bot observation, enable Bot-to-Bot Communication Mode in `@BotFather` for both bots and ensure the driver bot can observe group bot traffic.
- Writes a Telegram QA report, summary, and observed-messages artifact under `.artifacts/qa-e2e/...`.
@@ -118,7 +122,7 @@ Required env vars:
- `OPENCLAW_QA_CONVEX_SECRET_CI` for `ci`
- Credential role selection:
- CLI: `--credential-role maintainer|ci`
- Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `maintainer`)
- Env default: `OPENCLAW_QA_CREDENTIAL_ROLE` (defaults to `ci` in CI, `maintainer` otherwise)
Optional env vars:

View File

@@ -103,6 +103,7 @@ describe("qa cli runtime", () => {
watchUrl: "http://127.0.0.1:43124",
reportPath: "/tmp/report.md",
summaryPath: "/tmp/summary.json",
scenarios: [],
});
runQaCharacterEval.mockResolvedValue({
reportPath: "/tmp/character-report.md",
@@ -199,6 +200,7 @@ describe("qa cli runtime", () => {
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
allowFailures: undefined,
scenarioIds: ["telegram-help-command"],
sutAccountId: "sut-live",
});
@@ -223,10 +225,68 @@ describe("qa cli runtime", () => {
expect.objectContaining({
repoRoot: path.resolve("/tmp/openclaw-repo"),
providerMode: "live-frontier",
allowFailures: undefined,
}),
);
});
it("sets a failing exit code when telegram scenarios fail", async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runTelegramQaLive.mockResolvedValueOnce({
outputDir: "/tmp/telegram",
reportPath: "/tmp/telegram/report.md",
summaryPath: "/tmp/telegram/summary.json",
observedMessagesPath: "/tmp/telegram/observed.json",
scenarios: [
{
id: "telegram-help-command",
title: "Telegram help command reply",
status: "fail",
details: "missing expected text",
},
],
});
try {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo",
});
expect(process.exitCode).toBe(1);
} finally {
process.exitCode = priorExitCode;
}
});
it("keeps telegram exit code clear when --allow-failures is set", async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runTelegramQaLive.mockResolvedValueOnce({
outputDir: "/tmp/telegram",
reportPath: "/tmp/telegram/report.md",
summaryPath: "/tmp/telegram/summary.json",
observedMessagesPath: "/tmp/telegram/observed.json",
scenarios: [
{
id: "telegram-help-command",
title: "Telegram help command reply",
status: "fail",
details: "missing expected text",
},
],
});
try {
await runQaTelegramCommand({
repoRoot: "/tmp/openclaw-repo",
allowFailures: true,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
}
});
it("passes host suite concurrency through", async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
@@ -244,6 +304,59 @@ describe("qa cli runtime", () => {
);
});
it("sets a failing exit code when host suite scenarios fail", async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124",
reportPath: "/tmp/report.md",
summaryPath: "/tmp/summary.json",
scenarios: [
{
name: "channel chat baseline",
status: "fail",
steps: [],
},
],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
});
expect(process.exitCode).toBe(1);
} finally {
process.exitCode = priorExitCode;
}
});
it("keeps host suite exit code clear when --allow-failures is set", async () => {
const priorExitCode = process.exitCode;
process.exitCode = undefined;
runQaSuiteFromRuntime.mockResolvedValueOnce({
watchUrl: "http://127.0.0.1:43124",
reportPath: "/tmp/report.md",
summaryPath: "/tmp/summary.json",
scenarios: [
{
name: "channel chat baseline",
status: "fail",
steps: [],
},
],
});
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
allowFailures: true,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
}
});
it("passes host suite CLI auth mode through", async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
@@ -475,6 +588,7 @@ describe("qa cli runtime", () => {
runner: "multipass",
providerMode: "mock-openai",
scenarioIds: ["channel-chat-baseline"],
allowFailures: true,
concurrency: 3,
image: "lts",
cpus: 2,
@@ -490,6 +604,7 @@ describe("qa cli runtime", () => {
primaryModel: undefined,
alternateModel: undefined,
fastMode: undefined,
allowFailures: true,
scenarioIds: ["channel-chat-baseline"],
concurrency: 3,
image: "lts",
@@ -508,6 +623,7 @@ describe("qa cli runtime", () => {
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
allowFailures: true,
scenarioIds: ["channel-chat-baseline"],
});
@@ -519,11 +635,171 @@ describe("qa cli runtime", () => {
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: true,
allowFailures: true,
scenarioIds: ["channel-chat-baseline"],
}),
);
});
it("sets a failing exit code when multipass summary reports failed scenarios", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
await fs.writeFile(
summaryPath,
JSON.stringify({
counts: {
total: 2,
passed: 1,
failed: 1,
},
}),
"utf8",
);
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md"),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log"),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
const priorExitCode = process.exitCode;
process.exitCode = undefined;
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
runner: "multipass",
});
expect(process.exitCode).toBe(1);
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("rejects malformed multipass summary JSON", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
await fs.writeFile(summaryPath, "{not-json", "utf8");
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md"),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log"),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
runner: "multipass",
}),
).rejects.toThrow("Could not parse QA summary JSON");
} finally {
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("rejects unreadable multipass summary JSON with read/parse wording", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md"),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log"),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
runner: "multipass",
}),
).rejects.toThrow("Could not read QA summary JSON");
} finally {
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("rejects partial multipass summary JSON without failure fields", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
await fs.writeFile(summaryPath, JSON.stringify({ counts: { total: 2, passed: 2 } }), "utf8");
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md"),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log"),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
try {
await expect(
runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
runner: "multipass",
}),
).rejects.toThrow("did not include counts.failed or scenarios[].status");
} finally {
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("keeps multipass exit code clear when --allow-failures is set", async () => {
const repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "qa-multipass-summary-"));
const summaryPath = path.join(repoRoot, "qa-suite-summary.json");
await fs.writeFile(
summaryPath,
JSON.stringify({
counts: {
total: 2,
passed: 1,
failed: 1,
},
}),
"utf8",
);
runQaMultipass.mockResolvedValueOnce({
outputDir: repoRoot,
reportPath: path.join(repoRoot, "qa-suite-report.md"),
summaryPath,
hostLogPath: path.join(repoRoot, "multipass-host.log"),
bootstrapLogPath: path.join(repoRoot, "multipass-guest-bootstrap.log"),
guestScriptPath: path.join(repoRoot, "multipass-guest-run.sh"),
vmName: "openclaw-qa-test",
scenarioIds: ["channel-chat-baseline"],
});
const priorExitCode = process.exitCode;
process.exitCode = undefined;
try {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",
runner: "multipass",
allowFailures: true,
});
expect(process.exitCode).toBeUndefined();
} finally {
process.exitCode = priorExitCode;
await fs.rm(repoRoot, { recursive: true, force: true });
}
});
it("passes provider-qualified mock parity suite selection through to the host runner", async () => {
await runQaSuiteCommand({
repoRoot: "/tmp/openclaw-repo",

View File

@@ -39,6 +39,7 @@ import {
} from "./run-config.js";
import { readQaScenarioPack } from "./scenario-catalog.js";
import { runQaSuiteFromRuntime } from "./suite-launch.runtime.js";
import { readQaSuiteFailedScenarioCountFromSummary } from "./suite-summary.js";
type InterruptibleServer = {
baseUrl: string;
@@ -121,6 +122,34 @@ function parseQaPositiveIntegerOption(label: string, value: number | undefined)
return Math.floor(value);
}
async function readQaFailedScenarioCountFromSummary(summaryPath: string) {
let summaryText: string;
try {
summaryText = await fs.readFile(summaryPath, "utf8");
} catch (error) {
throw new Error(
`Could not read QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
{ cause: error },
);
}
let payload: unknown;
try {
payload = JSON.parse(summaryText) as unknown;
} catch (error) {
throw new Error(
`Could not parse QA summary JSON at ${summaryPath}: ${formatErrorMessage(error)}`,
{ cause: error },
);
}
const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(payload);
if (failedScenarioCount !== null) {
return failedScenarioCount;
}
throw new Error(
`QA summary at ${summaryPath} did not include counts.failed or scenarios[].status.`,
);
}
function parseQaCliBackendAuthMode(value: string | undefined): QaCliBackendAuthMode | undefined {
const normalized = value?.trim().toLowerCase();
if (!normalized) {
@@ -329,6 +358,7 @@ export async function runQaSuiteCommand(opts: {
parityPack?: string;
scenarioIds?: string[];
concurrency?: number;
allowFailures?: boolean;
image?: string;
cpus?: number;
memory?: string;
@@ -341,6 +371,7 @@ export async function runQaSuiteCommand(opts: {
parityPack: opts.parityPack,
scenarioIds: opts.scenarioIds,
});
const allowFailures = opts.allowFailures === true;
if (runner !== "host" && runner !== "multipass") {
throw new Error(`--runner must be one of host or multipass, got "${opts.runner}".`);
}
@@ -367,6 +398,7 @@ export async function runQaSuiteCommand(opts: {
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
fastMode: opts.fastMode,
allowFailures: true,
scenarioIds,
...(opts.concurrency !== undefined
? { concurrency: parseQaPositiveIntegerOption("--concurrency", opts.concurrency) }
@@ -381,6 +413,12 @@ export async function runQaSuiteCommand(opts: {
process.stdout.write(`QA Multipass summary: ${result.summaryPath}\n`);
process.stdout.write(`QA Multipass host log: ${result.hostLogPath}\n`);
process.stdout.write(`QA Multipass bootstrap log: ${result.bootstrapLogPath}\n`);
if (!allowFailures) {
const failedScenarioCount = await readQaFailedScenarioCountFromSummary(result.summaryPath);
if (failedScenarioCount > 0) {
process.exitCode = 1;
}
}
return;
}
const result = await runQaSuiteFromRuntime({
@@ -400,6 +438,10 @@ export async function runQaSuiteCommand(opts: {
process.stdout.write(`QA suite watch: ${result.watchUrl}\n`);
process.stdout.write(`QA suite report: ${result.reportPath}\n`);
process.stdout.write(`QA suite summary: ${result.summaryPath}\n`);
const failedScenarioCount = readQaSuiteFailedScenarioCountFromSummary(result);
if (!allowFailures && failedScenarioCount !== null && failedScenarioCount > 0) {
process.exitCode = 1;
}
}
export async function runQaParityReportCommand(opts: {

View File

@@ -46,6 +46,7 @@ const {
runQaCredentialsRemoveCommand,
runQaCoverageReportCommand,
runQaProviderServerCommand,
runQaSuiteCommand,
runQaTelegramCommand,
} = vi.hoisted(() => ({
runQaCredentialsAddCommand: vi.fn(),
@@ -53,6 +54,7 @@ const {
runQaCredentialsRemoveCommand: vi.fn(),
runQaCoverageReportCommand: vi.fn(),
runQaProviderServerCommand: vi.fn(),
runQaSuiteCommand: vi.fn(),
runQaTelegramCommand: vi.fn(),
}));
@@ -76,6 +78,7 @@ vi.mock("./cli.runtime.js", () => ({
runQaCredentialsRemoveCommand,
runQaCoverageReportCommand,
runQaProviderServerCommand,
runQaSuiteCommand,
}));
import { registerQaLabCli } from "./cli.js";
@@ -90,6 +93,7 @@ describe("qa cli registration", () => {
runQaCredentialsRemoveCommand.mockReset();
runQaCoverageReportCommand.mockReset();
runQaProviderServerCommand.mockReset();
runQaSuiteCommand.mockReset();
runQaTelegramCommand.mockReset();
listQaRunnerCliContributions
.mockReset()
@@ -188,6 +192,7 @@ describe("qa cli registration", () => {
primaryModel: undefined,
alternateModel: undefined,
fastMode: false,
allowFailures: false,
scenarioIds: [],
sutAccountId: "sut",
credentialSource: undefined,
@@ -195,6 +200,26 @@ describe("qa cli registration", () => {
});
});
it("forwards --allow-failures for telegram runs", async () => {
await program.parseAsync(["node", "openclaw", "qa", "telegram", "--allow-failures"]);
expect(runQaTelegramCommand).toHaveBeenCalledWith(
expect.objectContaining({
allowFailures: true,
}),
);
});
it("forwards --allow-failures for suite runs", async () => {
await program.parseAsync(["node", "openclaw", "qa", "suite", "--allow-failures"]);
expect(runQaSuiteCommand).toHaveBeenCalledWith(
expect.objectContaining({
allowFailures: true,
}),
);
});
it("routes credential add flags into the qa runtime command", async () => {
await program.parseAsync([
"node",

View File

@@ -35,6 +35,7 @@ async function runQaSuite(opts: {
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
allowFailures?: boolean;
cliAuthMode?: string;
parityPack?: string;
scenarioIds?: string[];
@@ -238,6 +239,11 @@ export function registerQaLabCli(program: Command) {
.option("--concurrency <count>", "Scenario worker concurrency", (value: string) =>
Number(value),
)
.option(
"--allow-failures",
"Write artifacts without setting a failing exit code when scenarios fail",
false,
)
.option("--fast", "Enable provider fast mode where supported", false)
.option("--image <alias>", "Multipass image alias")
.option("--cpus <count>", "Multipass vCPU count", (value: string) => Number(value))
@@ -256,6 +262,7 @@ export function registerQaLabCli(program: Command) {
parityPack?: string;
scenario?: string[];
concurrency?: number;
allowFailures?: boolean;
fast?: boolean;
image?: string;
cpus?: number;
@@ -275,6 +282,7 @@ export function registerQaLabCli(program: Command) {
parityPack: opts.parityPack,
scenarioIds: opts.scenario,
concurrency: opts.concurrency,
allowFailures: opts.allowFailures,
image: opts.image,
cpus: opts.cpus,
memory: opts.memory,

View File

@@ -80,6 +80,65 @@ describe("credential lease runtime", () => {
expect(headers.authorization).toBe("Bearer maintainer-secret");
});
it("defaults convex credential role to maintainer outside CI", async () => {
const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
jsonResponse({
status: "ok",
credentialId: "cred-maintainer-default",
leaseToken: "lease-maintainer-default",
payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
}),
);
await acquireQaCredentialLease({
kind: "telegram",
source: "convex",
env: {
OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
OPENCLAW_QA_CONVEX_SECRET_MAINTAINER: "maintainer-secret",
},
fetchImpl,
resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
parsePayload: (payload) =>
payload as { groupId: string; driverToken: string; sutToken: string },
});
const firstCall = fetchImpl.mock.calls[0];
const firstInit = firstCall?.[1];
const headers = firstInit?.headers as Record<string, string>;
expect(headers.authorization).toBe("Bearer maintainer-secret");
});
it("defaults convex credential role to ci when CI=true", async () => {
const fetchImpl = vi.fn<typeof fetch>().mockResolvedValueOnce(
jsonResponse({
status: "ok",
credentialId: "cred-ci-default",
leaseToken: "lease-ci-default",
payload: { groupId: "-100123", driverToken: "driver", sutToken: "sut" },
}),
);
await acquireQaCredentialLease({
kind: "telegram",
source: "convex",
env: {
CI: "true",
OPENCLAW_QA_CONVEX_SITE_URL: "https://qa-cred.example.convex.site",
OPENCLAW_QA_CONVEX_SECRET_CI: "ci-secret",
},
fetchImpl,
resolveEnvPayload: () => ({ groupId: "-1", driverToken: "unused", sutToken: "unused" }),
parsePayload: (payload) =>
payload as { groupId: string; driverToken: string; sutToken: string },
});
const firstCall = fetchImpl.mock.calls[0];
const firstInit = firstCall?.[1];
const headers = firstInit?.headers as Record<string, string>;
expect(headers.authorization).toBe("Bearer ci-secret");
});
it("retries convex acquire while the pool is exhausted", async () => {
const fetchImpl = vi
.fn<typeof fetch>()

View File

@@ -114,8 +114,12 @@ function normalizeQaCredentialSource(value: string | undefined): QaCredentialLea
throw new Error(`Credential source must be one of env or convex, got "${value}".`);
}
function normalizeQaCredentialRole(value: string | undefined): QaCredentialRole {
const normalized = value?.trim().toLowerCase() || "maintainer";
function normalizeQaCredentialRole(
value: string | undefined,
env: NodeJS.ProcessEnv = process.env,
): QaCredentialRole {
const defaultRole = isTruthyOptIn(env.CI) ? "ci" : "maintainer";
const normalized = value?.trim().toLowerCase() || defaultRole;
if (normalized === "maintainer" || normalized === "ci") {
return normalized;
}
@@ -350,7 +354,7 @@ export async function acquireQaCredentialLease<TPayload>(
};
}
const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE);
const role = normalizeQaCredentialRole(opts.role ?? env.OPENCLAW_QA_CREDENTIAL_ROLE, env);
const config = resolveConvexCredentialBrokerConfig({
env,
role,

View File

@@ -24,6 +24,7 @@ export function resolveLiveTransportQaRunOptions(
primaryModel: opts.primaryModel,
alternateModel: opts.alternateModel,
fastMode: opts.fastMode,
allowFailures: opts.allowFailures,
scenarioIds: opts.scenarioIds,
sutAccountId: opts.sutAccountId,
credentialSource: opts.credentialSource?.trim(),

View File

@@ -10,6 +10,7 @@ export type LiveTransportQaCommandOptions = {
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
allowFailures?: boolean;
scenarioIds?: string[];
sutAccountId?: string;
credentialSource?: string;
@@ -24,6 +25,7 @@ type LiveTransportQaCommanderOptions = {
altModel?: string;
scenario?: string[];
fast?: boolean;
allowFailures?: boolean;
sutAccount?: string;
credentialSource?: string;
credentialRole?: string;
@@ -57,6 +59,7 @@ export function mapLiveTransportQaCommanderOptions(
primaryModel: opts.model,
alternateModel: opts.altModel,
fastMode: opts.fast,
allowFailures: opts.allowFailures,
scenarioIds: opts.scenario,
sutAccountId: opts.sutAccount,
credentialSource: opts.credentialSource,
@@ -84,6 +87,11 @@ export function registerLiveTransportQaCli(params: {
.option("--alt-model <ref>", "Alternate provider/model ref")
.option("--scenario <id>", params.scenarioHelp, collectString, [])
.option("--fast", "Enable provider fast mode where supported", false)
.option(
"--allow-failures",
"Write artifacts without setting a failing exit code when scenarios fail",
false,
)
.option("--sut-account <id>", params.sutAccountHelp, "sut");
if (params.credentialOptions) {

View File

@@ -6,10 +6,17 @@ import {
import { runTelegramQaLive } from "./telegram-live.runtime.js";
export async function runQaTelegramCommand(opts: LiveTransportQaCommandOptions) {
const result = await runTelegramQaLive(resolveLiveTransportQaRunOptions(opts));
const runOptions = resolveLiveTransportQaRunOptions(opts);
const result = await runTelegramQaLive(runOptions);
printLiveTransportQaArtifacts("Telegram QA", {
report: result.reportPath,
summary: result.summaryPath,
"observed messages": result.observedMessagesPath,
});
if (
!runOptions.allowFailures &&
result.scenarios.some((scenario) => scenario.status === "fail")
) {
process.exitCode = 1;
}
}

View File

@@ -22,7 +22,8 @@ export const telegramQaCliRegistration: LiveTransportQaCliRegistration =
commandName: "telegram",
credentialOptions: {
sourceDescription: "Credential source for Telegram QA: env or convex (default: env)",
roleDescription: "Credential role for convex auth: maintainer or ci (default: maintainer)",
roleDescription:
"Credential role for convex auth: maintainer or ci (default: ci in CI, maintainer otherwise)",
},
description: "Run the manual Telegram live QA lane against a private bot-to-bot group harness",
outputDirHelp: "Telegram QA artifact directory",

View File

@@ -140,6 +140,17 @@ describe("qa multipass runtime", () => {
expect(script).toContain("'--provider-mode' 'live-frontier'");
});
it("forwards --allow-failures into the guest qa suite command when requested", () => {
const plan = createQaMultipassPlan({
repoRoot: process.cwd(),
outputDir: path.join(process.cwd(), ".artifacts", "qa-e2e", "multipass-allow-failures-test"),
allowFailures: true,
scenarioIds: ["channel-chat-baseline"],
});
expect(plan.qaCommand).toEqual(expect.arrayContaining(["--allow-failures"]));
});
it("redacts forwarded live secrets in the persisted artifact script", () => {
vi.stubEnv("OPENAI_API_KEY", "test-openai-key");
const plan = createQaMultipassPlan({

View File

@@ -237,6 +237,7 @@ export function createQaMultipassPlan(params: {
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
allowFailures?: boolean;
scenarioIds?: string[];
concurrency?: number;
image?: string;
@@ -275,6 +276,7 @@ export function createQaMultipassPlan(params: {
...(params.primaryModel ? ["--model", params.primaryModel] : []),
...(params.alternateModel ? ["--alt-model", params.alternateModel] : []),
...(params.fastMode ? ["--fast"] : []),
...(params.allowFailures ? ["--allow-failures"] : []),
...(params.concurrency ? ["--concurrency", String(params.concurrency)] : []),
],
scenarioIds,
@@ -544,6 +546,7 @@ export async function runQaMultipass(params: {
primaryModel?: string;
alternateModel?: string;
fastMode?: boolean;
allowFailures?: boolean;
scenarioIds?: string[];
concurrency?: number;
image?: string;

View File

@@ -0,0 +1,36 @@
import { describe, expect, it } from "vitest";
import {
countQaSuiteFailedScenarios,
readQaSuiteFailedScenarioCountFromSummary,
} from "./suite-summary.js";
describe("qa suite summary helpers", () => {
it("counts failed scenarios from scenario statuses", () => {
expect(
countQaSuiteFailedScenarios([{ status: "pass" }, { status: "fail" }, { status: "fail" }]),
).toBe(2);
});
it("prefers counts.failed when available", () => {
expect(
readQaSuiteFailedScenarioCountFromSummary({
counts: { failed: 3.8 },
scenarios: [{ status: "pass" }, { status: "fail" }],
}),
).toBe(3);
});
it("falls back to scenario statuses when counts.failed is missing", () => {
expect(
readQaSuiteFailedScenarioCountFromSummary({
counts: { total: 2 },
scenarios: [{ status: "pass" }, { status: "fail" }],
}),
).toBe(1);
});
it("returns null for unsupported summary shapes", () => {
expect(readQaSuiteFailedScenarioCountFromSummary({ counts: { total: 2 } })).toBeNull();
expect(readQaSuiteFailedScenarioCountFromSummary("not-json-object")).toBeNull();
});
});

View File

@@ -0,0 +1,64 @@
import type { QaProviderMode } from "./model-selection.js";
export type QaSuiteSummaryScenario = {
name: string;
status: "pass" | "fail";
steps: unknown[];
details?: string;
};
export type QaSuiteSummaryJson = {
scenarios: QaSuiteSummaryScenario[];
counts: {
total: number;
passed: number;
failed: number;
};
run: {
startedAt: string;
finishedAt: string;
providerMode: QaProviderMode;
primaryModel: string;
primaryProvider: string | null;
primaryModelName: string | null;
alternateModel: string;
alternateProvider: string | null;
alternateModelName: string | null;
fastMode: boolean;
concurrency: number;
scenarioIds: string[] | null;
};
};
type QaSuiteScenarioStatus = Pick<QaSuiteSummaryScenario, "status">;
export function countQaSuiteFailedScenarios(
scenarios: ReadonlyArray<QaSuiteScenarioStatus>,
): number {
let failed = 0;
for (const scenario of scenarios) {
if (scenario.status === "fail") {
failed += 1;
}
}
return failed;
}
export function readQaSuiteFailedScenarioCountFromSummary(summary: unknown): number | null {
if (!summary || typeof summary !== "object") {
return null;
}
const payload = summary as {
counts?: {
failed?: unknown;
};
scenarios?: Array<QaSuiteScenarioStatus>;
};
if (typeof payload.counts?.failed === "number" && Number.isFinite(payload.counts.failed)) {
return Math.max(0, Math.floor(payload.counts.failed));
}
if (Array.isArray(payload.scenarios)) {
return countQaSuiteFailedScenarios(payload.scenarios);
}
return null;
}

View File

@@ -1,5 +1,5 @@
import { describe, expect, it, vi } from "vitest";
import { runQaSuite } from "./suite.js";
import { qaSuiteProgressTesting, runQaSuite } from "./suite.js";
describe("qa suite", () => {
it("rejects unsupported transport ids before starting the lab", async () => {
@@ -14,4 +14,58 @@ describe("qa suite", () => {
expect(startLab).not.toHaveBeenCalled();
});
it("parses progress env booleans", () => {
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("true")).toBe(true);
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("on")).toBe(true);
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("false")).toBe(false);
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("off")).toBe(false);
expect(qaSuiteProgressTesting.parseQaSuiteBooleanEnv("maybe")).toBeUndefined();
});
it("defaults progress logging from CI when no override is set", () => {
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "true" })).toBe(true);
expect(qaSuiteProgressTesting.shouldLogQaSuiteProgress({ CI: "false" })).toBe(false);
});
it("applies OPENCLAW_QA_SUITE_PROGRESS override and falls back on invalid values", () => {
expect(
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
CI: "false",
OPENCLAW_QA_SUITE_PROGRESS: "true",
}),
).toBe(true);
expect(
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
CI: "true",
OPENCLAW_QA_SUITE_PROGRESS: "false",
}),
).toBe(false);
expect(
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
CI: "false",
OPENCLAW_QA_SUITE_PROGRESS: "on",
}),
).toBe(true);
expect(
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
CI: "true",
OPENCLAW_QA_SUITE_PROGRESS: "off",
}),
).toBe(false);
expect(
qaSuiteProgressTesting.shouldLogQaSuiteProgress({
CI: "true",
OPENCLAW_QA_SUITE_PROGRESS: "definitely",
}),
).toBe(true);
});
it("sanitizes scenario ids for progress logs", () => {
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario-id")).toBe("scenario-id");
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("scenario\nid\tvalue")).toBe(
"scenario id value",
);
expect(qaSuiteProgressTesting.sanitizeQaSuiteProgressValue("\u0000\u0001")).toBe("<empty>");
});
});

View File

@@ -46,6 +46,7 @@ import {
import { createQaSuiteScenarioFlowApi } from "./suite-runtime-flow.js";
import { waitForGatewayHealthy, waitForTransportReady } from "./suite-runtime-gateway.js";
import type { QaSuiteRuntimeEnv } from "./suite-runtime-types.js";
import { countQaSuiteFailedScenarios, type QaSuiteSummaryJson } from "./suite-summary.js";
import { closeQaWebSessions } from "./web-runtime.js";
type QaSuiteStep = {
@@ -84,6 +85,49 @@ export type QaSuiteRunParams = {
controlUiEnabled?: boolean;
};
function parseQaSuiteBooleanEnv(value: string | undefined): boolean | undefined {
const normalized = value?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if (normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on") {
return true;
}
if (normalized === "0" || normalized === "false" || normalized === "no" || normalized === "off") {
return false;
}
return undefined;
}
function shouldLogQaSuiteProgress(env: NodeJS.ProcessEnv = process.env) {
const override = parseQaSuiteBooleanEnv(env.OPENCLAW_QA_SUITE_PROGRESS);
if (override !== undefined) {
return override;
}
return parseQaSuiteBooleanEnv(env.CI) === true;
}
function writeQaSuiteProgress(enabled: boolean, message: string) {
if (!enabled) {
return;
}
process.stderr.write(`[qa-suite] ${message}\n`);
}
function sanitizeQaSuiteProgressValue(value: string): string {
let normalized = "";
for (const char of value) {
const code = char.codePointAt(0);
if (code === undefined) {
continue;
}
const isControl = code <= 0x1f || (code >= 0x7f && code <= 0x9f);
normalized += isControl ? " " : char;
}
normalized = normalized.replace(/\s+/gu, " ").trim();
return normalized.length > 0 ? normalized : "<empty>";
}
function requireQaSuiteStartLab(startLab: QaSuiteStartLabFn | undefined): QaSuiteStartLabFn {
if (startLab) {
return startLab;
@@ -223,28 +267,7 @@ export type QaSuiteSummaryJsonParams = {
* import this type instead of re-declaring the shape, so changes to the
* summary schema propagate through to every consumer at type-check time.
*/
export type QaSuiteSummaryJson = {
scenarios: QaSuiteScenarioResult[];
counts: {
total: number;
passed: number;
failed: number;
};
run: {
startedAt: string;
finishedAt: string;
providerMode: QaProviderMode;
primaryModel: string;
primaryProvider: string | null;
primaryModelName: string | null;
alternateModel: string;
alternateProvider: string | null;
alternateModelName: string | null;
fastMode: boolean;
concurrency: number;
scenarioIds: string[] | null;
};
};
export type { QaSuiteSummaryJson } from "./suite-summary.js";
/**
* Pure-ish JSON builder for qa-suite-summary.json. Exported so the GPT-5.4
@@ -268,7 +291,7 @@ export function buildQaSuiteSummaryJson(params: QaSuiteSummaryJsonParams): QaSui
counts: {
total: params.scenarios.length,
passed: params.scenarios.filter((scenario) => scenario.status === "pass").length,
failed: params.scenarios.filter((scenario) => scenario.status === "fail").length,
failed: countQaSuiteFailedScenarios(params.scenarios),
},
run: {
startedAt: params.startedAt.toISOString(),
@@ -359,6 +382,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
selectedCatalogScenarios.length,
defaultQaSuiteConcurrencyForTransport(transportId),
);
const progressEnabled = shouldLogQaSuiteProgress();
writeQaSuiteProgress(
progressEnabled,
`run start: scenarios=${selectedCatalogScenarios.length} concurrency=${concurrency} transport=${transportId}`,
);
if (concurrency > 1 && selectedCatalogScenarios.length > 1) {
const ownsLab = !params?.lab;
@@ -396,6 +424,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
selectedCatalogScenarios,
concurrency,
async (scenario, index): Promise<QaSuiteScenarioResult> => {
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
writeQaSuiteProgress(
progressEnabled,
`scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
);
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
@@ -447,6 +480,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
finishedAt: new Date().toISOString(),
};
updateScenarioRun();
writeQaSuiteProgress(
progressEnabled,
`scenario ${scenarioResult.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
);
return scenarioResult;
} catch (error) {
const details = formatErrorMessage(error);
@@ -472,11 +509,16 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
finishedAt: new Date().toISOString(),
};
updateScenarioRun();
writeQaSuiteProgress(
progressEnabled,
`scenario fail (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
);
return scenarioResult;
}
},
);
const finishedAt = new Date();
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
lab.setScenarioRun({
kind: "suite",
status: "completed",
@@ -511,6 +553,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
markdown: report,
generatedAt: finishedAt.toISOString(),
} satisfies QaLabLatestReport);
writeQaSuiteProgress(
progressEnabled,
`run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
);
return {
outputDir,
reportPath,
@@ -607,6 +653,11 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
});
for (const [index, scenario] of selectedCatalogScenarios.entries()) {
const scenarioIdForLog = sanitizeQaSuiteProgressValue(scenario.id);
writeQaSuiteProgress(
progressEnabled,
`scenario start (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
);
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
@@ -622,6 +673,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
const result = await runScenarioDefinition(env, scenario);
scenarios.push(result);
writeQaSuiteProgress(
progressEnabled,
`scenario ${result.status} (${index + 1}/${selectedCatalogScenarios.length}): ${scenarioIdForLog}`,
);
liveScenarioOutcomes[index] = {
id: scenario.id,
name: scenario.title,
@@ -640,6 +695,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
}
const finishedAt = new Date();
const failedCount = scenarios.filter((scenario) => scenario.status === "fail").length;
if (scenarios.some((scenario) => scenario.status === "fail")) {
preserveGatewayRuntimeDir = path.join(outputDir, "artifacts", "gateway-runtime");
}
@@ -674,6 +730,10 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
generatedAt: finishedAt.toISOString(),
} satisfies QaLabLatestReport;
lab.setLatestReport(latestReport);
writeQaSuiteProgress(
progressEnabled,
`run complete: passed=${scenarios.length - failedCount} failed=${failedCount} total=${scenarios.length}`,
);
return {
outputDir,
@@ -706,3 +766,9 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise<QaSuiteResu
}
}
}
export const qaSuiteProgressTesting = {
parseQaSuiteBooleanEnv,
sanitizeQaSuiteProgressValue,
shouldLogQaSuiteProgress,
};

View File

@@ -0,0 +1,19 @@
import { describe, expect, it } from "vitest";
import { normalizeQaProviderMode } from "./run-config.js";
describe("matrix qa run config", () => {
it("defaults to live-frontier when provider mode is omitted", () => {
expect(normalizeQaProviderMode(undefined)).toBe("live-frontier");
expect(normalizeQaProviderMode("")).toBe("live-frontier");
});
it("keeps legacy live-openai as an alias for live-frontier", () => {
expect(normalizeQaProviderMode("live-openai")).toBe("live-frontier");
});
it("rejects unknown provider modes", () => {
expect(() => normalizeQaProviderMode("mystery-mode")).toThrow(
"unknown QA provider mode: mystery-mode",
);
});
});

View File

@@ -2,8 +2,15 @@ export type QaProviderMode = "mock-openai" | "live-frontier";
export type QaProviderModeInput = QaProviderMode | "live-openai";
export function normalizeQaProviderMode(input: unknown): QaProviderMode {
if (input === undefined || input === null || input === "") {
return "live-frontier";
}
if (input === "mock-openai") {
return "mock-openai";
}
return "live-frontier";
if (input === "live-frontier" || input === "live-openai") {
return "live-frontier";
}
const details = typeof input === "string" ? `: ${input}` : "";
throw new Error(`unknown QA provider mode${details}`);
}