feat(qa): add Mantis before-after CLI

This commit is contained in:
Peter Steinberger
2026-05-03 19:13:01 +01:00
parent 3147efbed4
commit d4af125b52
7 changed files with 625 additions and 2 deletions

View File

@@ -72,7 +72,7 @@ pnpm openclaw qa mantis discord-smoke \
--output-dir .artifacts/qa-e2e/mantis/discord-smoke
```
The later before and after runner should accept this shape:
The local before and after runner accepts this shape:
```bash
pnpm openclaw qa mantis run \
@@ -83,6 +83,12 @@ pnpm openclaw qa mantis run \
--output-dir .artifacts/qa-e2e/mantis/local-discord-status-reactions
```
The runner creates detached baseline and candidate worktrees under the output
directory, installs dependencies, builds each ref, runs the scenario with
`--allow-failures`, then writes `baseline/`, `candidate/`, `comparison.json`,
and `mantis-report.md`. For the first Discord scenario, a successful verification
means baseline status is `fail` and candidate status is `pass`.
The GitHub smoke workflow is `Mantis Discord Smoke`. The before and after GitHub
workflow for the first real scenario is `Mantis Discord Status Reactions`. It
accepts:

View File

@@ -47,7 +47,7 @@ script aliases; both forms are supported.
| `qa matrix` | Live transport lane against a disposable Tuwunel homeserver. See [Matrix QA](/concepts/qa-matrix). |
| `qa telegram` | Live transport lane against a real private Telegram group. |
| `qa discord` | Live transport lane against a real private Discord guild channel. |
| `qa mantis` | Planned before and after verification runner for live transport bugs. See [Mantis](/concepts/mantis). |
| `qa mantis` | Before and after verification runner for live transport bugs, with the first Discord status-reactions scenario. See [Mantis](/concepts/mantis). |
## Operator flow

View File

@@ -48,6 +48,7 @@ const {
runQaProviderServerCommand,
runQaSuiteCommand,
runQaTelegramCommand,
runMantisBeforeAfterCommand,
runMantisDiscordSmokeCommand,
} = vi.hoisted(() => ({
runQaCredentialsAddCommand: vi.fn(),
@@ -57,6 +58,7 @@ const {
runQaProviderServerCommand: vi.fn(),
runQaSuiteCommand: vi.fn(),
runQaTelegramCommand: vi.fn(),
runMantisBeforeAfterCommand: vi.fn(),
runMantisDiscordSmokeCommand: vi.fn(),
}));
@@ -75,6 +77,7 @@ vi.mock("./live-transports/telegram/cli.runtime.js", () => ({
}));
vi.mock("./mantis/cli.runtime.js", () => ({
runMantisBeforeAfterCommand,
runMantisDiscordSmokeCommand,
}));
@@ -101,6 +104,7 @@ describe("qa cli registration", () => {
runQaProviderServerCommand.mockReset();
runQaSuiteCommand.mockReset();
runQaTelegramCommand.mockReset();
runMantisBeforeAfterCommand.mockReset();
runMantisDiscordSmokeCommand.mockReset();
listQaRunnerCliContributions
.mockReset()
@@ -161,6 +165,49 @@ describe("qa cli registration", () => {
});
});
it("routes mantis before/after flags into the mantis runtime command", async () => {
await program.parseAsync([
"node",
"openclaw",
"qa",
"mantis",
"run",
"--transport",
"discord",
"--scenario",
"discord-status-reactions-tool-only",
"--baseline",
"origin/main",
"--candidate",
"HEAD",
"--repo-root",
"/tmp/openclaw-repo",
"--output-dir",
".artifacts/qa-e2e/mantis/local-discord-status-reactions",
"--credential-source",
"convex",
"--credential-role",
"maintainer",
"--skip-install",
"--skip-build",
]);
expect(runMantisBeforeAfterCommand).toHaveBeenCalledWith({
baseline: "origin/main",
candidate: "HEAD",
credentialRole: "maintainer",
credentialSource: "convex",
fastMode: true,
outputDir: ".artifacts/qa-e2e/mantis/local-discord-status-reactions",
providerMode: "live-frontier",
repoRoot: "/tmp/openclaw-repo",
scenario: "discord-status-reactions-tool-only",
skipBuild: true,
skipInstall: true,
transport: "discord",
});
});
it("routes coverage report flags into the qa runtime command", async () => {
await program.parseAsync([
"node",

View File

@@ -1,4 +1,5 @@
import { runMantisDiscordSmoke, type MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js";
import { runMantisBeforeAfter, type MantisBeforeAfterOptions } from "./run.runtime.js";
export async function runMantisDiscordSmokeCommand(opts: MantisDiscordSmokeOptions) {
const result = await runMantisDiscordSmoke(opts);
@@ -8,3 +9,12 @@ export async function runMantisDiscordSmokeCommand(opts: MantisDiscordSmokeOptio
process.exitCode = 1;
}
}
export async function runMantisBeforeAfterCommand(opts: MantisBeforeAfterOptions) {
const result = await runMantisBeforeAfter(opts);
process.stdout.write(`Mantis before/after report: ${result.reportPath}\n`);
process.stdout.write(`Mantis before/after comparison: ${result.comparisonPath}\n`);
if (result.status === "fail") {
process.exitCode = 1;
}
}

View File

@@ -1,6 +1,7 @@
import type { Command } from "commander";
import { createLazyCliRuntimeLoader } from "../live-transports/shared/live-transport-cli.js";
import type { MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js";
import type { MantisBeforeAfterOptions } from "./run.runtime.js";
type MantisCliRuntime = typeof import("./cli.runtime.js");
@@ -13,6 +14,11 @@ async function runDiscordSmoke(opts: MantisDiscordSmokeOptions) {
await runtime.runMantisDiscordSmokeCommand(opts);
}
async function runBeforeAfter(opts: MantisBeforeAfterOptions) {
const runtime = await loadMantisCliRuntime();
await runtime.runMantisBeforeAfterCommand(opts);
}
type MantisDiscordSmokeCommanderOptions = {
channelId?: string;
guildId?: string;
@@ -25,11 +31,58 @@ type MantisDiscordSmokeCommanderOptions = {
tokenEnv?: string;
};
type MantisBeforeAfterCommanderOptions = {
baseline?: string;
candidate?: string;
credentialRole?: string;
credentialSource?: string;
fast?: boolean;
outputDir?: string;
providerMode?: string;
repoRoot?: string;
scenario?: string;
skipBuild?: boolean;
skipInstall?: boolean;
transport?: string;
};
export function registerMantisCli(qa: Command) {
const mantis = qa
.command("mantis")
.description("Run Mantis before/after and live-smoke verification flows");
mantis
.command("run")
.description("Run a Mantis before/after scenario against baseline and candidate refs")
.requiredOption("--transport <transport>", "Transport to verify; currently only discord")
.requiredOption("--scenario <id>", "Mantis scenario id to run")
.requiredOption("--baseline <ref>", "Ref expected to reproduce the bug")
.requiredOption("--candidate <ref>", "Ref expected to contain the fix")
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
.option("--output-dir <path>", "Mantis before/after artifact directory")
.option("--provider-mode <mode>", "QA provider mode", "live-frontier")
.option("--credential-source <source>", "QA credential source", "convex")
.option("--credential-role <role>", "QA credential role", "ci")
.option("--fast", "Enable fast provider mode where supported", true)
.option("--skip-install", "Skip pnpm install in baseline/candidate worktrees", false)
.option("--skip-build", "Skip pnpm build in baseline/candidate worktrees", false)
.action(async (opts: MantisBeforeAfterCommanderOptions) => {
await runBeforeAfter({
baseline: opts.baseline,
candidate: opts.candidate,
credentialRole: opts.credentialRole,
credentialSource: opts.credentialSource,
fastMode: opts.fast,
outputDir: opts.outputDir,
providerMode: opts.providerMode,
repoRoot: opts.repoRoot,
scenario: opts.scenario,
skipBuild: opts.skipBuild,
skipInstall: opts.skipInstall,
transport: opts.transport,
});
});
mantis
.command("discord-smoke")
.description("Verify the Mantis Discord bot can see the guild/channel, post, and react")

View File

@@ -0,0 +1,98 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { runMantisBeforeAfter } from "./run.runtime.js";
describe("mantis before/after runtime", () => {
let repoRoot: string;
beforeEach(async () => {
repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "mantis-before-after-"));
});
afterEach(async () => {
await fs.rm(repoRoot, { force: true, recursive: true });
});
it("runs baseline and candidate worktrees and writes stable comparison artifacts", async () => {
const commands: { args: readonly string[]; command: string; cwd?: string }[] = [];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
commands.push({ command, args });
if (command !== "pnpm" || !args.includes("openclaw")) {
return;
}
const repoRootArg = args[args.indexOf("--repo-root") + 1];
const outputDirArg = args[args.indexOf("--output-dir") + 1];
const lane = outputDirArg.endsWith("baseline") ? "baseline" : "candidate";
const outputDir = path.join(repoRootArg, outputDirArg);
await fs.mkdir(outputDir, { recursive: true });
const screenshotPath = path.join(outputDir, `${lane}-timeline.png`);
await fs.writeFile(screenshotPath, `${lane} screenshot`);
await fs.writeFile(
path.join(outputDir, "discord-qa-summary.json"),
`${JSON.stringify(
{
scenarios: [
{
artifactPaths: { screenshot: screenshotPath },
details:
lane === "baseline"
? "reaction timeline missing thinking/done"
: "reaction timeline matched queued -> thinking -> done",
id: "discord-status-reactions-tool-only",
status: lane === "baseline" ? "fail" : "pass",
},
],
},
null,
2,
)}\n`,
);
});
const result = await runMantisBeforeAfter({
baseline: "bug-sha",
candidate: "fix-sha",
commandRunner: runner,
now: () => new Date("2026-05-03T12:00:00.000Z"),
outputDir: ".artifacts/qa-e2e/mantis/test-run",
repoRoot,
skipBuild: true,
skipInstall: true,
});
expect(result.status).toBe("pass");
expect(
commands.map((entry) => [
entry.command,
entry.args[0],
entry.args[1],
entry.args[2],
entry.args[3],
]),
).toEqual([
["git", "worktree", "add", "--detach", expect.stringContaining("baseline")],
["pnpm", "--dir", expect.stringContaining("baseline"), "openclaw", "qa"],
["git", "worktree", "add", "--detach", expect.stringContaining("candidate")],
["pnpm", "--dir", expect.stringContaining("candidate"), "openclaw", "qa"],
]);
const comparison = JSON.parse(await fs.readFile(result.comparisonPath, "utf8")) as {
baseline: { reproduced: boolean; status: string };
candidate: { fixed: boolean; status: string };
pass: boolean;
};
expect(comparison).toMatchObject({
baseline: { reproduced: true, status: "fail" },
candidate: { fixed: true, status: "pass" },
pass: true,
});
await expect(
fs.readFile(path.join(result.outputDir, "baseline", "baseline.png"), "utf8"),
).resolves.toBe("baseline screenshot");
await expect(
fs.readFile(path.join(result.outputDir, "candidate", "candidate.png"), "utf8"),
).resolves.toBe("candidate screenshot");
});
});

View File

@@ -0,0 +1,409 @@
import { spawn, type SpawnOptions } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js";
export type MantisBeforeAfterOptions = {
allowFailures?: boolean;
baseline?: string;
candidate?: string;
commandRunner?: CommandRunner;
credentialRole?: string;
credentialSource?: string;
fastMode?: boolean;
now?: () => Date;
outputDir?: string;
providerMode?: string;
repoRoot?: string;
scenario?: string;
skipBuild?: boolean;
skipInstall?: boolean;
transport?: string;
};
export type MantisBeforeAfterResult = {
comparisonPath: string;
outputDir: string;
reportPath: string;
status: "pass" | "fail";
};
type CommandRunner = (
command: string,
args: readonly string[],
options: SpawnOptions,
) => Promise<void>;
type DiscordQaSummary = {
scenarios?: {
artifactPaths?: Record<string, string>;
details?: string;
id?: string;
status?: string;
title?: string;
}[];
};
type LaneResult = {
outputDir: string;
scenarioDetails?: string;
screenshotPath?: string;
status: string;
summaryPath: string;
};
type Comparison = {
baseline: {
expected: "queued-only";
ref: string;
reproduced: boolean;
screenshotPath?: string;
status: string;
};
candidate: {
expected: "queued -> thinking -> done";
fixed: boolean;
ref: string;
screenshotPath?: string;
status: string;
};
pass: boolean;
scenario: string;
transport: "discord";
};
const DEFAULT_BASELINE_REF = "0bf06e953fdda290799fc9fb9244a8f67fdae593";
const DEFAULT_CANDIDATE_REF = "HEAD";
const DEFAULT_SCENARIO = "discord-status-reactions-tool-only";
const DEFAULT_TRANSPORT = "discord";
const DEFAULT_PROVIDER_MODE = "live-frontier";
const DEFAULT_MODEL = "openai/gpt-5.4";
const DEFAULT_CREDENTIAL_SOURCE = "convex";
const DEFAULT_CREDENTIAL_ROLE = "ci";
function trimToValue(value: string | undefined) {
const trimmed = value?.trim();
return trimmed && trimmed.length > 0 ? trimmed : undefined;
}
function normalizeRequiredLiteral<T extends string>(
value: string | undefined,
defaultValue: T,
allowed: readonly T[],
label: string,
): T {
const normalized = (trimToValue(value) ?? defaultValue) as T;
if (!allowed.includes(normalized)) {
throw new Error(`${label} must be ${allowed.map((entry) => `'${entry}'`).join(" or ")}.`);
}
return normalized;
}
function defaultOutputDir(repoRoot: string, startedAt: Date) {
const stamp = startedAt.toISOString().replace(/[:.]/gu, "-");
return path.join(repoRoot, ".artifacts", "qa-e2e", "mantis", `run-${stamp}`);
}
function defaultCommandRunner(
command: string,
args: readonly string[],
options: SpawnOptions,
): Promise<void> {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
...options,
stdio: options.stdio ?? "inherit",
});
child.on("error", reject);
child.on("close", (code, signal) => {
if (code === 0) {
resolve();
return;
}
const detail = signal ? `signal ${signal}` : `exit code ${code ?? "unknown"}`;
reject(new Error(`${command} ${args.join(" ")} failed with ${detail}`));
});
});
}
async function runCommand(params: {
args: readonly string[];
command: string;
cwd: string;
runner: CommandRunner;
}) {
await params.runner(params.command, params.args, {
cwd: params.cwd,
env: process.env,
stdio: "inherit",
});
}
async function copyDirContents(sourceDir: string, targetDir: string) {
await fs.rm(targetDir, { force: true, recursive: true });
await fs.mkdir(targetDir, { recursive: true });
await fs.cp(sourceDir, targetDir, { recursive: true });
}
async function readLaneResult(params: {
laneOutputDir: string;
publishedLaneDir: string;
scenario: string;
}) {
const summaryPath = path.join(params.publishedLaneDir, "discord-qa-summary.json");
const summary = JSON.parse(await fs.readFile(summaryPath, "utf8")) as DiscordQaSummary;
const scenarioSummary =
summary.scenarios?.find((entry) => entry.id === params.scenario) ?? summary.scenarios?.[0];
const status = scenarioSummary?.status ?? "fail";
const screenshotPath = scenarioSummary?.artifactPaths?.screenshot;
return {
outputDir: params.publishedLaneDir,
scenarioDetails: scenarioSummary?.details,
screenshotPath,
status,
summaryPath,
} satisfies LaneResult;
}
function renderReport(params: {
baseline: LaneResult;
candidate: LaneResult;
comparison: Comparison;
outputDir: string;
}) {
const lines = [
"# Mantis Before/After",
"",
`Status: ${params.comparison.pass ? "pass" : "fail"}`,
`Transport: ${params.comparison.transport}`,
`Scenario: ${params.comparison.scenario}`,
`Output: ${params.outputDir}`,
"",
"## Baseline",
"",
`- Ref: \`${params.comparison.baseline.ref}\``,
`- Expected: ${params.comparison.baseline.expected}`,
`- Status: \`${params.baseline.status}\``,
`- Reproduced: \`${params.comparison.baseline.reproduced}\``,
params.baseline.screenshotPath
? `- Screenshot: \`${path.join("baseline", path.basename(params.baseline.screenshotPath))}\``
: "- Screenshot: missing",
params.baseline.scenarioDetails ? `- Details: ${params.baseline.scenarioDetails}` : undefined,
"",
"## Candidate",
"",
`- Ref: \`${params.comparison.candidate.ref}\``,
`- Expected: ${params.comparison.candidate.expected}`,
`- Status: \`${params.candidate.status}\``,
`- Fixed: \`${params.comparison.candidate.fixed}\``,
params.candidate.screenshotPath
? `- Screenshot: \`${path.join("candidate", path.basename(params.candidate.screenshotPath))}\``
: "- Screenshot: missing",
params.candidate.scenarioDetails ? `- Details: ${params.candidate.scenarioDetails}` : undefined,
"",
].filter((line) => line !== undefined);
return `${lines.join("\n")}\n`;
}
async function copyScreenshot(params: { lane: "baseline" | "candidate"; result: LaneResult }) {
if (!params.result.screenshotPath) {
return undefined;
}
const source = path.isAbsolute(params.result.screenshotPath)
? params.result.screenshotPath
: path.join(params.result.outputDir, params.result.screenshotPath);
const target = path.join(params.result.outputDir, `${params.lane}.png`);
await fs.copyFile(source, target);
return target;
}
async function runLane(params: {
lane: "baseline" | "candidate";
outputDir: string;
ref: string;
repoRoot: string;
runner: CommandRunner;
scenario: string;
worktreeRoot: string;
opts: Required<
Pick<
MantisBeforeAfterOptions,
| "credentialRole"
| "credentialSource"
| "fastMode"
| "providerMode"
| "skipBuild"
| "skipInstall"
>
>;
}) {
const worktreeDir = path.join(params.worktreeRoot, params.lane);
const worktreeOutputDir = path.join(".artifacts", "qa-e2e", "mantis", "run", params.lane);
await runCommand({
command: "git",
args: ["worktree", "add", "--detach", worktreeDir, params.ref],
cwd: params.repoRoot,
runner: params.runner,
});
if (!params.opts.skipInstall) {
await runCommand({
command: "pnpm",
args: ["--dir", worktreeDir, "install", "--frozen-lockfile"],
cwd: params.repoRoot,
runner: params.runner,
});
}
if (!params.opts.skipBuild) {
await runCommand({
command: "pnpm",
args: ["--dir", worktreeDir, "build"],
cwd: params.repoRoot,
runner: params.runner,
});
}
await runCommand({
command: "pnpm",
args: [
"--dir",
worktreeDir,
"openclaw",
"qa",
"discord",
"--repo-root",
worktreeDir,
"--output-dir",
worktreeOutputDir,
"--provider-mode",
params.opts.providerMode,
"--model",
DEFAULT_MODEL,
"--alt-model",
DEFAULT_MODEL,
...(params.opts.fastMode ? ["--fast"] : []),
"--credential-source",
params.opts.credentialSource,
"--credential-role",
params.opts.credentialRole,
"--scenario",
params.scenario,
"--allow-failures",
],
cwd: params.repoRoot,
runner: params.runner,
});
const publishedLaneDir = path.join(params.outputDir, params.lane);
await copyDirContents(path.join(worktreeDir, worktreeOutputDir), publishedLaneDir);
const result = await readLaneResult({
laneOutputDir: path.join(worktreeDir, worktreeOutputDir),
publishedLaneDir,
scenario: params.scenario,
});
const copiedScreenshot = await copyScreenshot({ lane: params.lane, result });
return {
...result,
screenshotPath: copiedScreenshot ?? result.screenshotPath,
} satisfies LaneResult;
}
export async function runMantisBeforeAfter(
opts: MantisBeforeAfterOptions = {},
): Promise<MantisBeforeAfterResult> {
const startedAt = (opts.now ?? (() => new Date()))();
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
const outputDir = await ensureRepoBoundDirectory(
repoRoot,
resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ?? defaultOutputDir(repoRoot, startedAt),
"Mantis before/after output directory",
{ mode: 0o755 },
);
const transport = normalizeRequiredLiteral(
opts.transport,
DEFAULT_TRANSPORT,
["discord"],
"--transport",
);
const scenario = normalizeRequiredLiteral(
opts.scenario,
DEFAULT_SCENARIO,
[DEFAULT_SCENARIO],
"--scenario",
);
const baseline = trimToValue(opts.baseline) ?? DEFAULT_BASELINE_REF;
const candidate = trimToValue(opts.candidate) ?? DEFAULT_CANDIDATE_REF;
const runner = opts.commandRunner ?? defaultCommandRunner;
const worktreeRoot = path.join(outputDir, "worktrees");
const comparisonPath = path.join(outputDir, "comparison.json");
const reportPath = path.join(outputDir, "mantis-report.md");
await fs.mkdir(worktreeRoot, { recursive: true });
try {
const commonOpts = {
credentialRole: trimToValue(opts.credentialRole) ?? DEFAULT_CREDENTIAL_ROLE,
credentialSource: trimToValue(opts.credentialSource) ?? DEFAULT_CREDENTIAL_SOURCE,
fastMode: opts.fastMode ?? true,
providerMode: trimToValue(opts.providerMode) ?? DEFAULT_PROVIDER_MODE,
skipBuild: opts.skipBuild ?? false,
skipInstall: opts.skipInstall ?? false,
};
const baselineResult = await runLane({
lane: "baseline",
outputDir,
ref: baseline,
repoRoot,
runner,
scenario,
worktreeRoot,
opts: commonOpts,
});
const candidateResult = await runLane({
lane: "candidate",
outputDir,
ref: candidate,
repoRoot,
runner,
scenario,
worktreeRoot,
opts: commonOpts,
});
const comparison = {
baseline: {
expected: "queued-only",
ref: baseline,
reproduced: baselineResult.status === "fail",
screenshotPath: baselineResult.screenshotPath,
status: baselineResult.status,
},
candidate: {
expected: "queued -> thinking -> done",
fixed: candidateResult.status === "pass",
ref: candidate,
screenshotPath: candidateResult.screenshotPath,
status: candidateResult.status,
},
pass: baselineResult.status === "fail" && candidateResult.status === "pass",
scenario,
transport,
} satisfies Comparison;
await fs.writeFile(comparisonPath, `${JSON.stringify(comparison, null, 2)}\n`, "utf8");
await fs.writeFile(
reportPath,
renderReport({
baseline: baselineResult,
candidate: candidateResult,
comparison,
outputDir,
}),
"utf8",
);
return {
comparisonPath,
outputDir,
reportPath,
status: comparison.pass ? "pass" : "fail",
};
} catch (error) {
await fs.writeFile(path.join(outputDir, "error.txt"), `${formatErrorMessage(error)}\n`, "utf8");
throw error;
}
}