From d4af125b52ef9248d2c4f94cb94c90262460c88d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 19:13:01 +0100 Subject: [PATCH] feat(qa): add Mantis before-after CLI --- docs/concepts/mantis.md | 8 +- docs/concepts/qa-e2e-automation.md | 2 +- extensions/qa-lab/src/cli.test.ts | 47 ++ extensions/qa-lab/src/mantis/cli.runtime.ts | 10 + extensions/qa-lab/src/mantis/cli.ts | 53 +++ .../qa-lab/src/mantis/run.runtime.test.ts | 98 +++++ extensions/qa-lab/src/mantis/run.runtime.ts | 409 ++++++++++++++++++ 7 files changed, 625 insertions(+), 2 deletions(-) create mode 100644 extensions/qa-lab/src/mantis/run.runtime.test.ts create mode 100644 extensions/qa-lab/src/mantis/run.runtime.ts diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md index d9090ae204b..04e9254af44 100644 --- a/docs/concepts/mantis.md +++ b/docs/concepts/mantis.md @@ -72,7 +72,7 @@ pnpm openclaw qa mantis discord-smoke \ --output-dir .artifacts/qa-e2e/mantis/discord-smoke ``` -The later before and after runner should accept this shape: +The local before and after runner accepts this shape: ```bash pnpm openclaw qa mantis run \ @@ -83,6 +83,12 @@ pnpm openclaw qa mantis run \ --output-dir .artifacts/qa-e2e/mantis/local-discord-status-reactions ``` +The runner creates detached baseline and candidate worktrees under the output +directory, installs dependencies, builds each ref, runs the scenario with +`--allow-failures`, then writes `baseline/`, `candidate/`, `comparison.json`, +and `mantis-report.md`. For the first Discord scenario, a successful verification +means baseline status is `fail` and candidate status is `pass`. + The GitHub smoke workflow is `Mantis Discord Smoke`. The before and after GitHub workflow for the first real scenario is `Mantis Discord Status Reactions`. It accepts: diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index c6d5aeda9c5..f238f675f82 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -47,7 +47,7 @@ script aliases; both forms are supported. | `qa matrix` | Live transport lane against a disposable Tuwunel homeserver. See [Matrix QA](/concepts/qa-matrix). | | `qa telegram` | Live transport lane against a real private Telegram group. | | `qa discord` | Live transport lane against a real private Discord guild channel. | -| `qa mantis` | Planned before and after verification runner for live transport bugs. See [Mantis](/concepts/mantis). | +| `qa mantis` | Before and after verification runner for live transport bugs, with the first Discord status-reactions scenario. See [Mantis](/concepts/mantis). | ## Operator flow diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index 36062e0e3b0..11a917accda 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -48,6 +48,7 @@ const { runQaProviderServerCommand, runQaSuiteCommand, runQaTelegramCommand, + runMantisBeforeAfterCommand, runMantisDiscordSmokeCommand, } = vi.hoisted(() => ({ runQaCredentialsAddCommand: vi.fn(), @@ -57,6 +58,7 @@ const { runQaProviderServerCommand: vi.fn(), runQaSuiteCommand: vi.fn(), runQaTelegramCommand: vi.fn(), + runMantisBeforeAfterCommand: vi.fn(), runMantisDiscordSmokeCommand: vi.fn(), })); @@ -75,6 +77,7 @@ vi.mock("./live-transports/telegram/cli.runtime.js", () => ({ })); vi.mock("./mantis/cli.runtime.js", () => ({ + runMantisBeforeAfterCommand, runMantisDiscordSmokeCommand, })); @@ -101,6 +104,7 @@ describe("qa cli registration", () => { runQaProviderServerCommand.mockReset(); runQaSuiteCommand.mockReset(); runQaTelegramCommand.mockReset(); + runMantisBeforeAfterCommand.mockReset(); runMantisDiscordSmokeCommand.mockReset(); listQaRunnerCliContributions .mockReset() @@ -161,6 +165,49 @@ describe("qa cli registration", () => { }); }); + it("routes mantis before/after flags into the mantis runtime command", async () => { + await program.parseAsync([ + "node", + "openclaw", + "qa", + "mantis", + "run", + "--transport", + "discord", + "--scenario", + "discord-status-reactions-tool-only", + "--baseline", + "origin/main", + "--candidate", + "HEAD", + "--repo-root", + "/tmp/openclaw-repo", + "--output-dir", + ".artifacts/qa-e2e/mantis/local-discord-status-reactions", + "--credential-source", + "convex", + "--credential-role", + "maintainer", + "--skip-install", + "--skip-build", + ]); + + expect(runMantisBeforeAfterCommand).toHaveBeenCalledWith({ + baseline: "origin/main", + candidate: "HEAD", + credentialRole: "maintainer", + credentialSource: "convex", + fastMode: true, + outputDir: ".artifacts/qa-e2e/mantis/local-discord-status-reactions", + providerMode: "live-frontier", + repoRoot: "/tmp/openclaw-repo", + scenario: "discord-status-reactions-tool-only", + skipBuild: true, + skipInstall: true, + transport: "discord", + }); + }); + it("routes coverage report flags into the qa runtime command", async () => { await program.parseAsync([ "node", diff --git a/extensions/qa-lab/src/mantis/cli.runtime.ts b/extensions/qa-lab/src/mantis/cli.runtime.ts index 0370d1b2c4f..384f61e93c3 100644 --- a/extensions/qa-lab/src/mantis/cli.runtime.ts +++ b/extensions/qa-lab/src/mantis/cli.runtime.ts @@ -1,4 +1,5 @@ import { runMantisDiscordSmoke, type MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js"; +import { runMantisBeforeAfter, type MantisBeforeAfterOptions } from "./run.runtime.js"; export async function runMantisDiscordSmokeCommand(opts: MantisDiscordSmokeOptions) { const result = await runMantisDiscordSmoke(opts); @@ -8,3 +9,12 @@ export async function runMantisDiscordSmokeCommand(opts: MantisDiscordSmokeOptio process.exitCode = 1; } } + +export async function runMantisBeforeAfterCommand(opts: MantisBeforeAfterOptions) { + const result = await runMantisBeforeAfter(opts); + process.stdout.write(`Mantis before/after report: ${result.reportPath}\n`); + process.stdout.write(`Mantis before/after comparison: ${result.comparisonPath}\n`); + if (result.status === "fail") { + process.exitCode = 1; + } +} diff --git a/extensions/qa-lab/src/mantis/cli.ts b/extensions/qa-lab/src/mantis/cli.ts index 4a2b22bb8ae..18129a00d51 100644 --- a/extensions/qa-lab/src/mantis/cli.ts +++ b/extensions/qa-lab/src/mantis/cli.ts @@ -1,6 +1,7 @@ import type { Command } from "commander"; import { createLazyCliRuntimeLoader } from "../live-transports/shared/live-transport-cli.js"; import type { MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js"; +import type { MantisBeforeAfterOptions } from "./run.runtime.js"; type MantisCliRuntime = typeof import("./cli.runtime.js"); @@ -13,6 +14,11 @@ async function runDiscordSmoke(opts: MantisDiscordSmokeOptions) { await runtime.runMantisDiscordSmokeCommand(opts); } +async function runBeforeAfter(opts: MantisBeforeAfterOptions) { + const runtime = await loadMantisCliRuntime(); + await runtime.runMantisBeforeAfterCommand(opts); +} + type MantisDiscordSmokeCommanderOptions = { channelId?: string; guildId?: string; @@ -25,11 +31,58 @@ type MantisDiscordSmokeCommanderOptions = { tokenEnv?: string; }; +type MantisBeforeAfterCommanderOptions = { + baseline?: string; + candidate?: string; + credentialRole?: string; + credentialSource?: string; + fast?: boolean; + outputDir?: string; + providerMode?: string; + repoRoot?: string; + scenario?: string; + skipBuild?: boolean; + skipInstall?: boolean; + transport?: string; +}; + export function registerMantisCli(qa: Command) { const mantis = qa .command("mantis") .description("Run Mantis before/after and live-smoke verification flows"); + mantis + .command("run") + .description("Run a Mantis before/after scenario against baseline and candidate refs") + .requiredOption("--transport ", "Transport to verify; currently only discord") + .requiredOption("--scenario ", "Mantis scenario id to run") + .requiredOption("--baseline ", "Ref expected to reproduce the bug") + .requiredOption("--candidate ", "Ref expected to contain the fix") + .option("--repo-root ", "Repository root to target when running from a neutral cwd") + .option("--output-dir ", "Mantis before/after artifact directory") + .option("--provider-mode ", "QA provider mode", "live-frontier") + .option("--credential-source ", "QA credential source", "convex") + .option("--credential-role ", "QA credential role", "ci") + .option("--fast", "Enable fast provider mode where supported", true) + .option("--skip-install", "Skip pnpm install in baseline/candidate worktrees", false) + .option("--skip-build", "Skip pnpm build in baseline/candidate worktrees", false) + .action(async (opts: MantisBeforeAfterCommanderOptions) => { + await runBeforeAfter({ + baseline: opts.baseline, + candidate: opts.candidate, + credentialRole: opts.credentialRole, + credentialSource: opts.credentialSource, + fastMode: opts.fast, + outputDir: opts.outputDir, + providerMode: opts.providerMode, + repoRoot: opts.repoRoot, + scenario: opts.scenario, + skipBuild: opts.skipBuild, + skipInstall: opts.skipInstall, + transport: opts.transport, + }); + }); + mantis .command("discord-smoke") .description("Verify the Mantis Discord bot can see the guild/channel, post, and react") diff --git a/extensions/qa-lab/src/mantis/run.runtime.test.ts b/extensions/qa-lab/src/mantis/run.runtime.test.ts new file mode 100644 index 00000000000..bd46e54aa8c --- /dev/null +++ b/extensions/qa-lab/src/mantis/run.runtime.test.ts @@ -0,0 +1,98 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { runMantisBeforeAfter } from "./run.runtime.js"; + +describe("mantis before/after runtime", () => { + let repoRoot: string; + + beforeEach(async () => { + repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "mantis-before-after-")); + }); + + afterEach(async () => { + await fs.rm(repoRoot, { force: true, recursive: true }); + }); + + it("runs baseline and candidate worktrees and writes stable comparison artifacts", async () => { + const commands: { args: readonly string[]; command: string; cwd?: string }[] = []; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + commands.push({ command, args }); + if (command !== "pnpm" || !args.includes("openclaw")) { + return; + } + const repoRootArg = args[args.indexOf("--repo-root") + 1]; + const outputDirArg = args[args.indexOf("--output-dir") + 1]; + const lane = outputDirArg.endsWith("baseline") ? "baseline" : "candidate"; + const outputDir = path.join(repoRootArg, outputDirArg); + await fs.mkdir(outputDir, { recursive: true }); + const screenshotPath = path.join(outputDir, `${lane}-timeline.png`); + await fs.writeFile(screenshotPath, `${lane} screenshot`); + await fs.writeFile( + path.join(outputDir, "discord-qa-summary.json"), + `${JSON.stringify( + { + scenarios: [ + { + artifactPaths: { screenshot: screenshotPath }, + details: + lane === "baseline" + ? "reaction timeline missing thinking/done" + : "reaction timeline matched queued -> thinking -> done", + id: "discord-status-reactions-tool-only", + status: lane === "baseline" ? "fail" : "pass", + }, + ], + }, + null, + 2, + )}\n`, + ); + }); + + const result = await runMantisBeforeAfter({ + baseline: "bug-sha", + candidate: "fix-sha", + commandRunner: runner, + now: () => new Date("2026-05-03T12:00:00.000Z"), + outputDir: ".artifacts/qa-e2e/mantis/test-run", + repoRoot, + skipBuild: true, + skipInstall: true, + }); + + expect(result.status).toBe("pass"); + expect( + commands.map((entry) => [ + entry.command, + entry.args[0], + entry.args[1], + entry.args[2], + entry.args[3], + ]), + ).toEqual([ + ["git", "worktree", "add", "--detach", expect.stringContaining("baseline")], + ["pnpm", "--dir", expect.stringContaining("baseline"), "openclaw", "qa"], + ["git", "worktree", "add", "--detach", expect.stringContaining("candidate")], + ["pnpm", "--dir", expect.stringContaining("candidate"), "openclaw", "qa"], + ]); + + const comparison = JSON.parse(await fs.readFile(result.comparisonPath, "utf8")) as { + baseline: { reproduced: boolean; status: string }; + candidate: { fixed: boolean; status: string }; + pass: boolean; + }; + expect(comparison).toMatchObject({ + baseline: { reproduced: true, status: "fail" }, + candidate: { fixed: true, status: "pass" }, + pass: true, + }); + await expect( + fs.readFile(path.join(result.outputDir, "baseline", "baseline.png"), "utf8"), + ).resolves.toBe("baseline screenshot"); + await expect( + fs.readFile(path.join(result.outputDir, "candidate", "candidate.png"), "utf8"), + ).resolves.toBe("candidate screenshot"); + }); +}); diff --git a/extensions/qa-lab/src/mantis/run.runtime.ts b/extensions/qa-lab/src/mantis/run.runtime.ts new file mode 100644 index 00000000000..ade6d88cb8c --- /dev/null +++ b/extensions/qa-lab/src/mantis/run.runtime.ts @@ -0,0 +1,409 @@ +import { spawn, type SpawnOptions } from "node:child_process"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; +import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js"; + +export type MantisBeforeAfterOptions = { + allowFailures?: boolean; + baseline?: string; + candidate?: string; + commandRunner?: CommandRunner; + credentialRole?: string; + credentialSource?: string; + fastMode?: boolean; + now?: () => Date; + outputDir?: string; + providerMode?: string; + repoRoot?: string; + scenario?: string; + skipBuild?: boolean; + skipInstall?: boolean; + transport?: string; +}; + +export type MantisBeforeAfterResult = { + comparisonPath: string; + outputDir: string; + reportPath: string; + status: "pass" | "fail"; +}; + +type CommandRunner = ( + command: string, + args: readonly string[], + options: SpawnOptions, +) => Promise; + +type DiscordQaSummary = { + scenarios?: { + artifactPaths?: Record; + details?: string; + id?: string; + status?: string; + title?: string; + }[]; +}; + +type LaneResult = { + outputDir: string; + scenarioDetails?: string; + screenshotPath?: string; + status: string; + summaryPath: string; +}; + +type Comparison = { + baseline: { + expected: "queued-only"; + ref: string; + reproduced: boolean; + screenshotPath?: string; + status: string; + }; + candidate: { + expected: "queued -> thinking -> done"; + fixed: boolean; + ref: string; + screenshotPath?: string; + status: string; + }; + pass: boolean; + scenario: string; + transport: "discord"; +}; + +const DEFAULT_BASELINE_REF = "0bf06e953fdda290799fc9fb9244a8f67fdae593"; +const DEFAULT_CANDIDATE_REF = "HEAD"; +const DEFAULT_SCENARIO = "discord-status-reactions-tool-only"; +const DEFAULT_TRANSPORT = "discord"; +const DEFAULT_PROVIDER_MODE = "live-frontier"; +const DEFAULT_MODEL = "openai/gpt-5.4"; +const DEFAULT_CREDENTIAL_SOURCE = "convex"; +const DEFAULT_CREDENTIAL_ROLE = "ci"; + +function trimToValue(value: string | undefined) { + const trimmed = value?.trim(); + return trimmed && trimmed.length > 0 ? trimmed : undefined; +} + +function normalizeRequiredLiteral( + value: string | undefined, + defaultValue: T, + allowed: readonly T[], + label: string, +): T { + const normalized = (trimToValue(value) ?? defaultValue) as T; + if (!allowed.includes(normalized)) { + throw new Error(`${label} must be ${allowed.map((entry) => `'${entry}'`).join(" or ")}.`); + } + return normalized; +} + +function defaultOutputDir(repoRoot: string, startedAt: Date) { + const stamp = startedAt.toISOString().replace(/[:.]/gu, "-"); + return path.join(repoRoot, ".artifacts", "qa-e2e", "mantis", `run-${stamp}`); +} + +function defaultCommandRunner( + command: string, + args: readonly string[], + options: SpawnOptions, +): Promise { + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + ...options, + stdio: options.stdio ?? "inherit", + }); + child.on("error", reject); + child.on("close", (code, signal) => { + if (code === 0) { + resolve(); + return; + } + const detail = signal ? `signal ${signal}` : `exit code ${code ?? "unknown"}`; + reject(new Error(`${command} ${args.join(" ")} failed with ${detail}`)); + }); + }); +} + +async function runCommand(params: { + args: readonly string[]; + command: string; + cwd: string; + runner: CommandRunner; +}) { + await params.runner(params.command, params.args, { + cwd: params.cwd, + env: process.env, + stdio: "inherit", + }); +} + +async function copyDirContents(sourceDir: string, targetDir: string) { + await fs.rm(targetDir, { force: true, recursive: true }); + await fs.mkdir(targetDir, { recursive: true }); + await fs.cp(sourceDir, targetDir, { recursive: true }); +} + +async function readLaneResult(params: { + laneOutputDir: string; + publishedLaneDir: string; + scenario: string; +}) { + const summaryPath = path.join(params.publishedLaneDir, "discord-qa-summary.json"); + const summary = JSON.parse(await fs.readFile(summaryPath, "utf8")) as DiscordQaSummary; + const scenarioSummary = + summary.scenarios?.find((entry) => entry.id === params.scenario) ?? summary.scenarios?.[0]; + const status = scenarioSummary?.status ?? "fail"; + const screenshotPath = scenarioSummary?.artifactPaths?.screenshot; + return { + outputDir: params.publishedLaneDir, + scenarioDetails: scenarioSummary?.details, + screenshotPath, + status, + summaryPath, + } satisfies LaneResult; +} + +function renderReport(params: { + baseline: LaneResult; + candidate: LaneResult; + comparison: Comparison; + outputDir: string; +}) { + const lines = [ + "# Mantis Before/After", + "", + `Status: ${params.comparison.pass ? "pass" : "fail"}`, + `Transport: ${params.comparison.transport}`, + `Scenario: ${params.comparison.scenario}`, + `Output: ${params.outputDir}`, + "", + "## Baseline", + "", + `- Ref: \`${params.comparison.baseline.ref}\``, + `- Expected: ${params.comparison.baseline.expected}`, + `- Status: \`${params.baseline.status}\``, + `- Reproduced: \`${params.comparison.baseline.reproduced}\``, + params.baseline.screenshotPath + ? `- Screenshot: \`${path.join("baseline", path.basename(params.baseline.screenshotPath))}\`` + : "- Screenshot: missing", + params.baseline.scenarioDetails ? `- Details: ${params.baseline.scenarioDetails}` : undefined, + "", + "## Candidate", + "", + `- Ref: \`${params.comparison.candidate.ref}\``, + `- Expected: ${params.comparison.candidate.expected}`, + `- Status: \`${params.candidate.status}\``, + `- Fixed: \`${params.comparison.candidate.fixed}\``, + params.candidate.screenshotPath + ? `- Screenshot: \`${path.join("candidate", path.basename(params.candidate.screenshotPath))}\`` + : "- Screenshot: missing", + params.candidate.scenarioDetails ? `- Details: ${params.candidate.scenarioDetails}` : undefined, + "", + ].filter((line) => line !== undefined); + return `${lines.join("\n")}\n`; +} + +async function copyScreenshot(params: { lane: "baseline" | "candidate"; result: LaneResult }) { + if (!params.result.screenshotPath) { + return undefined; + } + const source = path.isAbsolute(params.result.screenshotPath) + ? params.result.screenshotPath + : path.join(params.result.outputDir, params.result.screenshotPath); + const target = path.join(params.result.outputDir, `${params.lane}.png`); + await fs.copyFile(source, target); + return target; +} + +async function runLane(params: { + lane: "baseline" | "candidate"; + outputDir: string; + ref: string; + repoRoot: string; + runner: CommandRunner; + scenario: string; + worktreeRoot: string; + opts: Required< + Pick< + MantisBeforeAfterOptions, + | "credentialRole" + | "credentialSource" + | "fastMode" + | "providerMode" + | "skipBuild" + | "skipInstall" + > + >; +}) { + const worktreeDir = path.join(params.worktreeRoot, params.lane); + const worktreeOutputDir = path.join(".artifacts", "qa-e2e", "mantis", "run", params.lane); + await runCommand({ + command: "git", + args: ["worktree", "add", "--detach", worktreeDir, params.ref], + cwd: params.repoRoot, + runner: params.runner, + }); + if (!params.opts.skipInstall) { + await runCommand({ + command: "pnpm", + args: ["--dir", worktreeDir, "install", "--frozen-lockfile"], + cwd: params.repoRoot, + runner: params.runner, + }); + } + if (!params.opts.skipBuild) { + await runCommand({ + command: "pnpm", + args: ["--dir", worktreeDir, "build"], + cwd: params.repoRoot, + runner: params.runner, + }); + } + await runCommand({ + command: "pnpm", + args: [ + "--dir", + worktreeDir, + "openclaw", + "qa", + "discord", + "--repo-root", + worktreeDir, + "--output-dir", + worktreeOutputDir, + "--provider-mode", + params.opts.providerMode, + "--model", + DEFAULT_MODEL, + "--alt-model", + DEFAULT_MODEL, + ...(params.opts.fastMode ? ["--fast"] : []), + "--credential-source", + params.opts.credentialSource, + "--credential-role", + params.opts.credentialRole, + "--scenario", + params.scenario, + "--allow-failures", + ], + cwd: params.repoRoot, + runner: params.runner, + }); + const publishedLaneDir = path.join(params.outputDir, params.lane); + await copyDirContents(path.join(worktreeDir, worktreeOutputDir), publishedLaneDir); + const result = await readLaneResult({ + laneOutputDir: path.join(worktreeDir, worktreeOutputDir), + publishedLaneDir, + scenario: params.scenario, + }); + const copiedScreenshot = await copyScreenshot({ lane: params.lane, result }); + return { + ...result, + screenshotPath: copiedScreenshot ?? result.screenshotPath, + } satisfies LaneResult; +} + +export async function runMantisBeforeAfter( + opts: MantisBeforeAfterOptions = {}, +): Promise { + const startedAt = (opts.now ?? (() => new Date()))(); + const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); + const outputDir = await ensureRepoBoundDirectory( + repoRoot, + resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ?? defaultOutputDir(repoRoot, startedAt), + "Mantis before/after output directory", + { mode: 0o755 }, + ); + const transport = normalizeRequiredLiteral( + opts.transport, + DEFAULT_TRANSPORT, + ["discord"], + "--transport", + ); + const scenario = normalizeRequiredLiteral( + opts.scenario, + DEFAULT_SCENARIO, + [DEFAULT_SCENARIO], + "--scenario", + ); + const baseline = trimToValue(opts.baseline) ?? DEFAULT_BASELINE_REF; + const candidate = trimToValue(opts.candidate) ?? DEFAULT_CANDIDATE_REF; + const runner = opts.commandRunner ?? defaultCommandRunner; + const worktreeRoot = path.join(outputDir, "worktrees"); + const comparisonPath = path.join(outputDir, "comparison.json"); + const reportPath = path.join(outputDir, "mantis-report.md"); + await fs.mkdir(worktreeRoot, { recursive: true }); + + try { + const commonOpts = { + credentialRole: trimToValue(opts.credentialRole) ?? DEFAULT_CREDENTIAL_ROLE, + credentialSource: trimToValue(opts.credentialSource) ?? DEFAULT_CREDENTIAL_SOURCE, + fastMode: opts.fastMode ?? true, + providerMode: trimToValue(opts.providerMode) ?? DEFAULT_PROVIDER_MODE, + skipBuild: opts.skipBuild ?? false, + skipInstall: opts.skipInstall ?? false, + }; + const baselineResult = await runLane({ + lane: "baseline", + outputDir, + ref: baseline, + repoRoot, + runner, + scenario, + worktreeRoot, + opts: commonOpts, + }); + const candidateResult = await runLane({ + lane: "candidate", + outputDir, + ref: candidate, + repoRoot, + runner, + scenario, + worktreeRoot, + opts: commonOpts, + }); + const comparison = { + baseline: { + expected: "queued-only", + ref: baseline, + reproduced: baselineResult.status === "fail", + screenshotPath: baselineResult.screenshotPath, + status: baselineResult.status, + }, + candidate: { + expected: "queued -> thinking -> done", + fixed: candidateResult.status === "pass", + ref: candidate, + screenshotPath: candidateResult.screenshotPath, + status: candidateResult.status, + }, + pass: baselineResult.status === "fail" && candidateResult.status === "pass", + scenario, + transport, + } satisfies Comparison; + await fs.writeFile(comparisonPath, `${JSON.stringify(comparison, null, 2)}\n`, "utf8"); + await fs.writeFile( + reportPath, + renderReport({ + baseline: baselineResult, + candidate: candidateResult, + comparison, + outputDir, + }), + "utf8", + ); + return { + comparisonPath, + outputDir, + reportPath, + status: comparison.pass ? "pass" : "fail", + }; + } catch (error) { + await fs.writeFile(path.join(outputDir, "error.txt"), `${formatErrorMessage(error)}\n`, "utf8"); + throw error; + } +}