From 57b2d297613a29010b14eca8fefbeced00e5db86 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 4 May 2026 01:30:08 +0100 Subject: [PATCH] feat(qa): add Mantis desktop browser smoke --- docs/concepts/mantis.md | 46 +- docs/concepts/qa-e2e-automation.md | 40 +- extensions/qa-lab/src/cli.test.ts | 71 +++ extensions/qa-lab/src/mantis/cli.runtime.ts | 16 + extensions/qa-lab/src/mantis/cli.ts | 51 ++ .../desktop-browser-smoke.runtime.test.ts | 141 +++++ .../mantis/desktop-browser-smoke.runtime.ts | 544 ++++++++++++++++++ 7 files changed, 877 insertions(+), 32 deletions(-) create mode 100644 extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts create mode 100644 extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md index 1a8b962bdb6..3ea4ff81cde 100644 --- a/docs/concepts/mantis.md +++ b/docs/concepts/mantis.md @@ -89,6 +89,27 @@ directory, installs dependencies, builds each ref, runs the scenario with and `mantis-report.md`. For the first Discord scenario, a successful verification means baseline status is `fail` and candidate status is `pass`. +The first VM/browser primitive is the desktop smoke: + +```bash +pnpm openclaw qa mantis desktop-browser-smoke \ + --output-dir .artifacts/qa-e2e/mantis/desktop-browser +``` + +It leases or reuses a Crabbox desktop machine, starts a visible browser inside the +VNC session, captures the desktop, pulls artifacts back to the local output +directory, and writes the reconnect command into the report. The command defaults +to the Hetzner provider because it is the first provider with working desktop/VNC +coverage in the Mantis lane. Override it with `--provider`, `--crabbox-bin`, or +`OPENCLAW_MANTIS_CRABBOX_PROVIDER` when running against another Crabbox fleet. + +Useful desktop smoke flags: + +- `--lease-id ` or `OPENCLAW_MANTIS_CRABBOX_LEASE_ID` reuses a warmed desktop. +- `--browser-url ` changes the page opened in the visible browser. +- `--keep-lease` or `OPENCLAW_MANTIS_KEEP_VM=1` keeps a newly created passing lease open for VNC inspection. Failed runs keep the lease by default when one was created so an operator can reconnect. +- `--class`, `--idle-timeout`, and `--ttl` tune machine size and lease lifetime. + The GitHub smoke workflow is `Mantis Discord Smoke`. The before and after GitHub workflow for the first real scenario is `Mantis Discord Status Reactions`. It accepts: @@ -132,18 +153,19 @@ ClawSweeper review findings. 1. Acquire credentials. 2. Allocate or reuse a VM. -3. Prepare a clean checkout for the baseline ref. -4. Install dependencies and build only what the scenario needs. -5. Start a child OpenClaw Gateway with an isolated state directory. -6. Configure the live transport, provider, model, and browser profile. -7. Run the scenario and capture baseline evidence. -8. Stop the gateway and preserve logs. -9. Prepare the candidate ref in the same VM. -10. Run the same scenario and capture candidate evidence. -11. Compare the oracle results and visual evidence. -12. Write Markdown, JSON, logs, screenshots, and optional trace artifacts. -13. Upload GitHub Actions artifacts. -14. Post a concise PR or Discord status message. +3. Prepare the desktop/browser profile when the scenario needs UI evidence. +4. Prepare a clean checkout for the baseline ref. +5. Install dependencies and build only what the scenario needs. +6. Start a child OpenClaw Gateway with an isolated state directory. +7. Configure the live transport, provider, model, and browser profile. +8. Run the scenario and capture baseline evidence. +9. Stop the gateway and preserve logs. +10. Prepare the candidate ref in the same VM. +11. Run the same scenario and capture candidate evidence. +12. Compare the oracle results and visual evidence. +13. Write Markdown, JSON, logs, screenshots, and optional trace artifacts. +14. Upload GitHub Actions artifacts. +15. Post a concise PR or Discord status message. The scenario should be able to fail in two different ways: diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index b89c449bbec..5da067faed6 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -29,26 +29,26 @@ Current pieces: Every QA flow runs under `pnpm openclaw qa `. Many have `pnpm qa:*` script aliases; both forms are supported. -| Command | Purpose | -| --------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `qa run` | Bundled QA self-check; writes a Markdown report. | -| `qa suite` | Run repo-backed scenarios against the QA gateway lane. Aliases: `pnpm openclaw qa suite --runner multipass` for a disposable Linux VM. | -| `qa coverage` | Print the markdown scenario-coverage inventory (`--json` for machine output). | -| `qa parity-report` | Compare two `qa-suite-summary.json` files and write the agentic parity report. | -| `qa character-eval` | Run the character QA scenario across multiple live models with a judged report. See [Reporting](#reporting). | -| `qa manual` | Run a one-off prompt against the selected provider/model lane. | -| `qa ui` | Start the QA debugger UI and local QA bus (alias: `pnpm qa:lab:ui`). | -| `qa docker-build-image` | Build the prebaked QA Docker image. | -| `qa docker-scaffold` | Write a docker-compose scaffold for the QA dashboard + gateway lane. | -| `qa up` | Build the QA site, start the Docker-backed stack, print the URL (alias: `pnpm qa:lab:up`; `:fast` variant adds `--use-prebuilt-image --bind-ui-dist --skip-ui-build`). | -| `qa aimock` | Start only the AIMock provider server. | -| `qa mock-openai` | Start only the scenario-aware `mock-openai` provider server. | -| `qa credentials doctor` / `add` / `list` / `remove` | Manage the shared Convex credential pool. | -| `qa matrix` | Live transport lane against a disposable Tuwunel homeserver. See [Matrix QA](/concepts/qa-matrix). | -| `qa telegram` | Live transport lane against a real private Telegram group. | -| `qa discord` | Live transport lane against a real private Discord guild channel. | -| `qa slack` | Live transport lane against a real private Slack channel. | -| `qa mantis` | Before and after verification runner for live transport bugs, with the first Discord status-reactions scenario. See [Mantis](/concepts/mantis). | +| Command | Purpose | +| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `qa run` | Bundled QA self-check; writes a Markdown report. | +| `qa suite` | Run repo-backed scenarios against the QA gateway lane. Aliases: `pnpm openclaw qa suite --runner multipass` for a disposable Linux VM. | +| `qa coverage` | Print the markdown scenario-coverage inventory (`--json` for machine output). | +| `qa parity-report` | Compare two `qa-suite-summary.json` files and write the agentic parity report. | +| `qa character-eval` | Run the character QA scenario across multiple live models with a judged report. See [Reporting](#reporting). | +| `qa manual` | Run a one-off prompt against the selected provider/model lane. | +| `qa ui` | Start the QA debugger UI and local QA bus (alias: `pnpm qa:lab:ui`). | +| `qa docker-build-image` | Build the prebaked QA Docker image. | +| `qa docker-scaffold` | Write a docker-compose scaffold for the QA dashboard + gateway lane. | +| `qa up` | Build the QA site, start the Docker-backed stack, print the URL (alias: `pnpm qa:lab:up`; `:fast` variant adds `--use-prebuilt-image --bind-ui-dist --skip-ui-build`). | +| `qa aimock` | Start only the AIMock provider server. | +| `qa mock-openai` | Start only the scenario-aware `mock-openai` provider server. | +| `qa credentials doctor` / `add` / `list` / `remove` | Manage the shared Convex credential pool. | +| `qa matrix` | Live transport lane against a disposable Tuwunel homeserver. See [Matrix QA](/concepts/qa-matrix). | +| `qa telegram` | Live transport lane against a real private Telegram group. | +| `qa discord` | Live transport lane against a real private Discord guild channel. | +| `qa slack` | Live transport lane against a real private Slack channel. | +| `qa mantis` | Before and after verification runner for live transport bugs, with Discord status-reactions evidence and a Crabbox desktop/browser smoke. See [Mantis](/concepts/mantis). | ## Operator flow diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index 11a917accda..43c423c8216 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -49,6 +49,7 @@ const { runQaSuiteCommand, runQaTelegramCommand, runMantisBeforeAfterCommand, + runMantisDesktopBrowserSmokeCommand, runMantisDiscordSmokeCommand, } = vi.hoisted(() => ({ runQaCredentialsAddCommand: vi.fn(), @@ -59,6 +60,7 @@ const { runQaSuiteCommand: vi.fn(), runQaTelegramCommand: vi.fn(), runMantisBeforeAfterCommand: vi.fn(), + runMantisDesktopBrowserSmokeCommand: vi.fn(), runMantisDiscordSmokeCommand: vi.fn(), })); @@ -78,6 +80,7 @@ vi.mock("./live-transports/telegram/cli.runtime.js", () => ({ vi.mock("./mantis/cli.runtime.js", () => ({ runMantisBeforeAfterCommand, + runMantisDesktopBrowserSmokeCommand, runMantisDiscordSmokeCommand, })); @@ -105,6 +108,7 @@ describe("qa cli registration", () => { runQaSuiteCommand.mockReset(); runQaTelegramCommand.mockReset(); runMantisBeforeAfterCommand.mockReset(); + runMantisDesktopBrowserSmokeCommand.mockReset(); runMantisDiscordSmokeCommand.mockReset(); listQaRunnerCliContributions .mockReset() @@ -208,6 +212,73 @@ describe("qa cli registration", () => { }); }); + it("routes mantis desktop browser smoke flags into the mantis runtime command", async () => { + await program.parseAsync([ + "node", + "openclaw", + "qa", + "mantis", + "desktop-browser-smoke", + "--repo-root", + "/tmp/openclaw-repo", + "--output-dir", + ".artifacts/qa-e2e/mantis/desktop-browser", + "--browser-url", + "https://openclaw.ai/docs", + "--crabbox-bin", + "/tmp/crabbox", + "--provider", + "hetzner", + "--class", + "beast", + "--lease-id", + "cbx_123abc", + "--idle-timeout", + "30m", + "--ttl", + "90m", + "--keep-lease", + ]); + + expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({ + browserUrl: "https://openclaw.ai/docs", + crabboxBin: "/tmp/crabbox", + idleTimeout: "30m", + keepLease: true, + leaseId: "cbx_123abc", + machineClass: "beast", + outputDir: ".artifacts/qa-e2e/mantis/desktop-browser", + provider: "hetzner", + repoRoot: "/tmp/openclaw-repo", + ttl: "90m", + }); + }); + + it("does not shadow mantis desktop browser runtime env defaults", async () => { + await program.parseAsync([ + "node", + "openclaw", + "qa", + "mantis", + "desktop-browser-smoke", + "--repo-root", + "/tmp/openclaw-repo", + ]); + + expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({ + browserUrl: undefined, + crabboxBin: undefined, + idleTimeout: undefined, + keepLease: undefined, + leaseId: undefined, + machineClass: undefined, + outputDir: undefined, + provider: undefined, + repoRoot: "/tmp/openclaw-repo", + ttl: undefined, + }); + }); + it("routes coverage report flags into the qa runtime command", async () => { await program.parseAsync([ "node", diff --git a/extensions/qa-lab/src/mantis/cli.runtime.ts b/extensions/qa-lab/src/mantis/cli.runtime.ts index 384f61e93c3..703aa429954 100644 --- a/extensions/qa-lab/src/mantis/cli.runtime.ts +++ b/extensions/qa-lab/src/mantis/cli.runtime.ts @@ -1,3 +1,7 @@ +import { + runMantisDesktopBrowserSmoke, + type MantisDesktopBrowserSmokeOptions, +} from "./desktop-browser-smoke.runtime.js"; import { runMantisDiscordSmoke, type MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js"; import { runMantisBeforeAfter, type MantisBeforeAfterOptions } from "./run.runtime.js"; @@ -18,3 +22,15 @@ export async function runMantisBeforeAfterCommand(opts: MantisBeforeAfterOptions process.exitCode = 1; } } + +export async function runMantisDesktopBrowserSmokeCommand(opts: MantisDesktopBrowserSmokeOptions) { + const result = await runMantisDesktopBrowserSmoke(opts); + process.stdout.write(`Mantis desktop browser report: ${result.reportPath}\n`); + process.stdout.write(`Mantis desktop browser summary: ${result.summaryPath}\n`); + if (result.screenshotPath) { + process.stdout.write(`Mantis desktop browser screenshot: ${result.screenshotPath}\n`); + } + if (result.status === "fail") { + process.exitCode = 1; + } +} diff --git a/extensions/qa-lab/src/mantis/cli.ts b/extensions/qa-lab/src/mantis/cli.ts index 18129a00d51..28eee774c86 100644 --- a/extensions/qa-lab/src/mantis/cli.ts +++ b/extensions/qa-lab/src/mantis/cli.ts @@ -1,5 +1,6 @@ import type { Command } from "commander"; import { createLazyCliRuntimeLoader } from "../live-transports/shared/live-transport-cli.js"; +import type { MantisDesktopBrowserSmokeOptions } from "./desktop-browser-smoke.runtime.js"; import type { MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js"; import type { MantisBeforeAfterOptions } from "./run.runtime.js"; @@ -19,6 +20,11 @@ async function runBeforeAfter(opts: MantisBeforeAfterOptions) { await runtime.runMantisBeforeAfterCommand(opts); } +async function runDesktopBrowserSmoke(opts: MantisDesktopBrowserSmokeOptions) { + const runtime = await loadMantisCliRuntime(); + await runtime.runMantisDesktopBrowserSmokeCommand(opts); +} + type MantisDiscordSmokeCommanderOptions = { channelId?: string; guildId?: string; @@ -46,6 +52,20 @@ type MantisBeforeAfterCommanderOptions = { transport?: string; }; +type MantisDesktopBrowserSmokeCommanderOptions = { + browserUrl?: string; + class?: string; + crabboxBin?: string; + idleTimeout?: string; + keepLease?: boolean; + leaseId?: string; + machineClass?: string; + outputDir?: string; + provider?: string; + repoRoot?: string; + ttl?: string; +}; + export function registerMantisCli(qa: Command) { const mantis = qa .command("mantis") @@ -108,4 +128,35 @@ export function registerMantisCli(qa: Command) { tokenEnv: opts.tokenEnv, }); }); + + mantis + .command("desktop-browser-smoke") + .description( + "Lease or reuse a Crabbox desktop, open a visible browser, and capture a VNC desktop screenshot", + ) + .option("--repo-root ", "Repository root to target when running from a neutral cwd") + .option("--output-dir ", "Mantis desktop browser artifact directory") + .option("--browser-url ", "URL to open in the visible browser") + .option("--crabbox-bin ", "Crabbox binary path") + .option("--provider ", "Crabbox provider") + .option("--machine-class ", "Crabbox machine class") + .option("--class ", "Alias for --machine-class") + .option("--lease-id ", "Reuse an existing Crabbox lease") + .option("--idle-timeout ", "Crabbox idle timeout") + .option("--ttl ", "Crabbox maximum lease lifetime") + .option("--keep-lease", "Keep a lease created by this run after a passing smoke") + .action(async (opts: MantisDesktopBrowserSmokeCommanderOptions) => { + await runDesktopBrowserSmoke({ + browserUrl: opts.browserUrl, + crabboxBin: opts.crabboxBin, + idleTimeout: opts.idleTimeout, + keepLease: opts.keepLease, + leaseId: opts.leaseId, + machineClass: opts.machineClass ?? opts.class, + outputDir: opts.outputDir, + provider: opts.provider, + repoRoot: opts.repoRoot, + ttl: opts.ttl, + }); + }); } diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts new file mode 100644 index 00000000000..2d44e9ceadf --- /dev/null +++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts @@ -0,0 +1,141 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { runMantisDesktopBrowserSmoke } from "./desktop-browser-smoke.runtime.js"; + +describe("mantis desktop browser smoke runtime", () => { + let repoRoot: string; + + beforeEach(async () => { + repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "mantis-desktop-browser-smoke-")); + }); + + afterEach(async () => { + await fs.rm(repoRoot, { force: true, recursive: true }); + }); + + it("leases a desktop box, runs a visible browser, copies artifacts, and stops on pass", async () => { + const commands: { args: readonly string[]; command: string }[] = []; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + commands.push({ command, args }); + if (command === "/tmp/crabbox" && args[0] === "warmup") { + return { stdout: "ready lease cbx_abc123\n", stderr: "" }; + } + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_abc123", + provider: "hetzner", + slug: "brisk-mantis", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + state: "active", + })}\n`, + stderr: "", + }; + } + if (command === "rsync") { + const outputDir = args.at(-1); + expect(outputDir).toBeTypeOf("string"); + await fs.mkdir(outputDir as string, { recursive: true }); + await fs.writeFile(path.join(outputDir as string, "desktop-browser-smoke.png"), "png"); + await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n"); + await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n"); + return { stdout: "", stderr: "" }; + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisDesktopBrowserSmoke({ + browserUrl: "https://openclaw.ai/docs", + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + now: () => new Date("2026-05-04T12:00:00.000Z"), + outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-test", + repoRoot, + }); + + expect(result.status).toBe("pass"); + expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([ + ["/tmp/crabbox", "warmup"], + ["/tmp/crabbox", "inspect"], + ["/tmp/crabbox", "run"], + ["rsync", "-az"], + ["/tmp/crabbox", "stop"], + ]); + const rsyncArgs = commands.find((entry) => entry.command === "rsync")?.args ?? []; + expect(rsyncArgs).not.toContain("--delete"); + expect(rsyncArgs).toEqual( + expect.arrayContaining([ + "crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/desktop-browser-smoke.png", + "crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/remote-metadata.json", + "crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/chrome.log", + ]), + ); + const remoteScript = commands + .find((entry) => entry.command === "/tmp/crabbox" && entry.args[0] === "run") + ?.args.at(-1); + expect(remoteScript).toContain("${BROWSER:-}"); + expect(remoteScript).toContain("${CHROME_BIN:-}"); + expect(remoteScript).toContain("chromium-browser"); + expect(remoteScript).toContain('"browserBinary": "$browser_bin"'); + await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png"); + const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { + browserUrl: string; + crabbox: { id: string; vncCommand: string }; + status: string; + }; + expect(summary).toMatchObject({ + browserUrl: "https://openclaw.ai/docs", + crabbox: { + id: "cbx_abc123", + vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open", + }, + status: "pass", + }); + }); + + it("keeps an existing lease and writes failure reports when the remote run fails", async () => { + const commands: { args: readonly string[]; command: string }[] = []; + const runner = vi.fn(async (command: string, args: readonly string[]) => { + commands.push({ command, args }); + if (command === "/tmp/crabbox" && args[0] === "inspect") { + return { + stdout: `${JSON.stringify({ + host: "203.0.113.10", + id: "cbx_existing", + provider: "hetzner", + sshKey: "/tmp/key", + sshPort: "2222", + sshUser: "crabbox", + })}\n`, + stderr: "", + }; + } + if (command === "/tmp/crabbox" && args[0] === "run") { + throw new Error("remote chrome failed"); + } + return { stdout: "", stderr: "" }; + }); + + const result = await runMantisDesktopBrowserSmoke({ + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + leaseId: "cbx_existing", + outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-fail", + repoRoot, + }); + + expect(result.status).toBe("fail"); + expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([ + ["/tmp/crabbox", "inspect"], + ["/tmp/crabbox", "run"], + ]); + await expect(fs.readFile(path.join(result.outputDir, "error.txt"), "utf8")).resolves.toContain( + "remote chrome failed", + ); + }); +}); diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts new file mode 100644 index 00000000000..a5ee129bf91 --- /dev/null +++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts @@ -0,0 +1,544 @@ +import { spawn, type SpawnOptions } from "node:child_process"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; +import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js"; + +export type MantisDesktopBrowserSmokeOptions = { + browserUrl?: string; + commandRunner?: CommandRunner; + crabboxBin?: string; + env?: NodeJS.ProcessEnv; + idleTimeout?: string; + keepLease?: boolean; + leaseId?: string; + machineClass?: string; + now?: () => Date; + outputDir?: string; + provider?: string; + repoRoot?: string; + ttl?: string; +}; + +export type MantisDesktopBrowserSmokeResult = { + outputDir: string; + reportPath: string; + screenshotPath?: string; + status: "pass" | "fail"; + summaryPath: string; +}; + +type CommandResult = { + stderr: string; + stdout: string; +}; + +type CommandRunner = ( + command: string, + args: readonly string[], + options: SpawnOptions, +) => Promise; + +type CrabboxInspect = { + host?: string; + id?: string; + provider?: string; + ready?: boolean; + slug?: string; + sshKey?: string; + sshPort?: string; + sshUser?: string; + state?: string; +}; + +type MantisDesktopBrowserSmokeSummary = { + artifacts: { + reportPath: string; + screenshotPath?: string; + summaryPath: string; + }; + browserUrl: string; + crabbox: { + bin: string; + createdLease: boolean; + id: string; + provider: string; + slug?: string; + state?: string; + vncCommand: string; + }; + error?: string; + finishedAt: string; + outputDir: string; + remoteOutputDir: string; + startedAt: string; + status: "pass" | "fail"; +}; + +const DEFAULT_BROWSER_URL = "https://openclaw.ai"; +const DEFAULT_PROVIDER = "hetzner"; +const DEFAULT_CLASS = "beast"; +const DEFAULT_IDLE_TIMEOUT = "60m"; +const DEFAULT_TTL = "120m"; +const CRABBOX_BIN_ENV = "OPENCLAW_MANTIS_CRABBOX_BIN"; +const CRABBOX_PROVIDER_ENV = "OPENCLAW_MANTIS_CRABBOX_PROVIDER"; +const CRABBOX_CLASS_ENV = "OPENCLAW_MANTIS_CRABBOX_CLASS"; +const CRABBOX_LEASE_ID_ENV = "OPENCLAW_MANTIS_CRABBOX_LEASE_ID"; +const CRABBOX_KEEP_ENV = "OPENCLAW_MANTIS_KEEP_VM"; +const CRABBOX_IDLE_TIMEOUT_ENV = "OPENCLAW_MANTIS_CRABBOX_IDLE_TIMEOUT"; +const CRABBOX_TTL_ENV = "OPENCLAW_MANTIS_CRABBOX_TTL"; + +function trimToValue(value: string | undefined) { + const trimmed = value?.trim(); + return trimmed && trimmed.length > 0 ? trimmed : undefined; +} + +function isTruthyOptIn(value: string | undefined) { + const normalized = value?.trim().toLowerCase(); + return normalized === "1" || normalized === "true" || normalized === "yes"; +} + +function defaultOutputDir(repoRoot: string, startedAt: Date) { + const stamp = startedAt.toISOString().replace(/[:.]/gu, "-"); + return path.join(repoRoot, ".artifacts", "qa-e2e", "mantis", `desktop-browser-${stamp}`); +} + +async function defaultCommandRunner( + command: string, + args: readonly string[], + options: SpawnOptions, +): Promise { + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + ...options, + stdio: ["ignore", "pipe", "pipe"], + }); + let stdout = ""; + let stderr = ""; + child.stdout?.on("data", (chunk: Buffer) => { + const text = chunk.toString(); + stdout += text; + if (options.stdio === "inherit") { + process.stdout.write(text); + } + }); + child.stderr?.on("data", (chunk: Buffer) => { + const text = chunk.toString(); + stderr += text; + if (options.stdio === "inherit") { + process.stderr.write(text); + } + }); + child.on("error", reject); + child.on("close", (code, signal) => { + if (code === 0) { + resolve({ stdout, stderr }); + return; + } + const detail = signal ? `signal ${signal}` : `exit code ${code ?? "unknown"}`; + reject(new Error(`${command} ${args.join(" ")} failed with ${detail}`)); + }); + }); +} + +async function pathExists(filePath: string) { + try { + await fs.access(filePath); + return true; + } catch { + return false; + } +} + +async function resolveCrabboxBin(params: { + env: NodeJS.ProcessEnv; + explicit?: string; + repoRoot: string; +}) { + const configured = trimToValue(params.explicit) ?? trimToValue(params.env[CRABBOX_BIN_ENV]); + if (configured) { + return configured; + } + const sibling = path.resolve(params.repoRoot, "../crabbox/bin/crabbox"); + if (await pathExists(sibling)) { + return sibling; + } + return "crabbox"; +} + +function extractLeaseId(output: string) { + return output.match(/\bcbx_[a-f0-9]+\b/u)?.[0]; +} + +function shellQuote(value: string) { + return `'${value.replaceAll("'", "'\\''")}'`; +} + +function renderRemoteScript(params: { browserUrl: string; remoteOutputDir: string }) { + const shellUrl = shellQuote(params.browserUrl); + const shellUrlJson = shellQuote(JSON.stringify(params.browserUrl)); + const shellOutputDir = shellQuote(params.remoteOutputDir); + return `set -euo pipefail +out=${shellOutputDir} +url=${shellUrl} +url_json=${shellUrlJson} +rm -rf "$out" +mkdir -p "$out" +export DISPLAY="\${DISPLAY:-:99}" +if ! command -v scrot >/dev/null 2>&1; then + sudo apt-get update -y >"$out/apt.log" 2>&1 + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y scrot >>"$out/apt.log" 2>&1 +fi +profile="$out/chrome-profile" +mkdir -p "$profile" +browser_bin="" +for candidate in "\${BROWSER:-}" "\${CHROME_BIN:-}" google-chrome chromium chromium-browser; do + if [ -n "$candidate" ] && command -v "$candidate" >/dev/null 2>&1; then + browser_bin="$(command -v "$candidate")" + break + fi +done +if [ -z "$browser_bin" ]; then + echo "No browser binary found. Checked BROWSER, CHROME_BIN, google-chrome, chromium, chromium-browser." >&2 + exit 127 +fi +"$browser_bin" \ + --user-data-dir="$profile" \ + --no-first-run \ + --no-default-browser-check \ + --disable-dev-shm-usage \ + --window-size=1280,900 \ + --window-position=0,0 \ + --class=mantis-desktop-browser-smoke \ + "$url" >"$out/chrome.log" 2>&1 & +chrome_pid=$! +cleanup() { + kill "$chrome_pid" >/dev/null 2>&1 || true +} +trap cleanup EXIT +sleep 8 +scrot "$out/desktop-browser-smoke.png" +cleanup +trap - EXIT +sleep 1 +rm -rf "$profile" || true +cat >"$out/remote-metadata.json" < line !== undefined); + return `${lines.join("\n")}\n`; +} + +async function runCommand(params: { + args: readonly string[]; + command: string; + cwd: string; + runner: CommandRunner; + stdio?: "inherit" | "pipe"; +}) { + return params.runner(params.command, params.args, { + cwd: params.cwd, + env: process.env, + stdio: params.stdio ?? "pipe", + }); +} + +async function warmupCrabbox(params: { + crabboxBin: string; + cwd: string; + idleTimeout: string; + machineClass: string; + provider: string; + runner: CommandRunner; + ttl: string; +}) { + const result = await runCommand({ + command: params.crabboxBin, + args: [ + "warmup", + "--provider", + params.provider, + "--desktop", + "--browser", + "--class", + params.machineClass, + "--idle-timeout", + params.idleTimeout, + "--ttl", + params.ttl, + ], + cwd: params.cwd, + runner: params.runner, + stdio: "inherit", + }); + const leaseId = extractLeaseId(`${result.stdout}\n${result.stderr}`); + if (!leaseId) { + throw new Error("Crabbox warmup did not print a cbx_ lease id."); + } + return leaseId; +} + +async function inspectCrabbox(params: { + crabboxBin: string; + cwd: string; + leaseId: string; + provider: string; + runner: CommandRunner; +}) { + const result = await runCommand({ + command: params.crabboxBin, + args: ["inspect", "--provider", params.provider, "--id", params.leaseId, "--json"], + cwd: params.cwd, + runner: params.runner, + }); + return JSON.parse(result.stdout) as CrabboxInspect; +} + +async function copyRemoteArtifacts(params: { + cwd: string; + inspect: CrabboxInspect; + outputDir: string; + remoteOutputDir: string; + runner: CommandRunner; +}) { + const { host, sshKey, sshPort, sshUser } = params.inspect; + if (!host || !sshKey || !sshUser) { + throw new Error("Crabbox inspect output is missing SSH copy details."); + } + await runCommand({ + command: "rsync", + args: [ + "-az", + "-e", + [ + "ssh", + "-i", + shellQuote(sshKey), + "-p", + sshPort ?? "22", + "-o", + "BatchMode=yes", + "-o", + "ConnectTimeout=15", + "-o", + "StrictHostKeyChecking=no", + "-o", + "UserKnownHostsFile=/dev/null", + ].join(" "), + `${sshUser}@${host}:${params.remoteOutputDir}/desktop-browser-smoke.png`, + `${sshUser}@${host}:${params.remoteOutputDir}/remote-metadata.json`, + `${sshUser}@${host}:${params.remoteOutputDir}/chrome.log`, + `${params.outputDir}/`, + ], + cwd: params.cwd, + runner: params.runner, + }); +} + +async function stopCrabbox(params: { + crabboxBin: string; + cwd: string; + leaseId: string; + provider: string; + runner: CommandRunner; +}) { + await runCommand({ + command: params.crabboxBin, + args: ["stop", "--provider", params.provider, params.leaseId], + cwd: params.cwd, + runner: params.runner, + stdio: "inherit", + }); +} + +export async function runMantisDesktopBrowserSmoke( + opts: MantisDesktopBrowserSmokeOptions = {}, +): Promise { + const env = opts.env ?? process.env; + const startedAt = (opts.now ?? (() => new Date()))(); + const repoRoot = path.resolve(opts.repoRoot ?? process.cwd()); + const outputDir = await ensureRepoBoundDirectory( + repoRoot, + resolveRepoRelativeOutputDir(repoRoot, opts.outputDir) ?? defaultOutputDir(repoRoot, startedAt), + "Mantis desktop browser smoke output directory", + { mode: 0o755 }, + ); + const summaryPath = path.join(outputDir, "mantis-desktop-browser-smoke-summary.json"); + const reportPath = path.join(outputDir, "mantis-desktop-browser-smoke-report.md"); + const crabboxBin = await resolveCrabboxBin({ env, explicit: opts.crabboxBin, repoRoot }); + const provider = + trimToValue(opts.provider) ?? trimToValue(env[CRABBOX_PROVIDER_ENV]) ?? DEFAULT_PROVIDER; + const machineClass = + trimToValue(opts.machineClass) ?? trimToValue(env[CRABBOX_CLASS_ENV]) ?? DEFAULT_CLASS; + const idleTimeout = + trimToValue(opts.idleTimeout) ?? + trimToValue(env[CRABBOX_IDLE_TIMEOUT_ENV]) ?? + DEFAULT_IDLE_TIMEOUT; + const ttl = trimToValue(opts.ttl) ?? trimToValue(env[CRABBOX_TTL_ENV]) ?? DEFAULT_TTL; + const browserUrl = trimToValue(opts.browserUrl) ?? DEFAULT_BROWSER_URL; + const runner = opts.commandRunner ?? defaultCommandRunner; + const explicitLeaseId = trimToValue(opts.leaseId) ?? trimToValue(env[CRABBOX_LEASE_ID_ENV]); + const keepLease = opts.keepLease ?? isTruthyOptIn(env[CRABBOX_KEEP_ENV]); + const createdLease = explicitLeaseId === undefined; + const remoteOutputDir = `/tmp/openclaw-mantis-desktop-${startedAt + .toISOString() + .replace(/[^0-9A-Za-z]/gu, "-")}`; + let leaseId = explicitLeaseId; + let summary: MantisDesktopBrowserSmokeSummary | undefined; + + try { + leaseId = + leaseId ?? + (await warmupCrabbox({ + crabboxBin, + cwd: repoRoot, + idleTimeout, + machineClass, + provider, + runner, + ttl, + })); + const inspected = await inspectCrabbox({ + crabboxBin, + cwd: repoRoot, + leaseId, + provider, + runner, + }); + await runCommand({ + command: crabboxBin, + args: [ + "run", + "--provider", + provider, + "--id", + leaseId, + "--desktop", + "--browser", + "--no-sync", + "--shell", + "--", + renderRemoteScript({ browserUrl, remoteOutputDir }), + ], + cwd: repoRoot, + runner, + stdio: "inherit", + }); + await copyRemoteArtifacts({ + cwd: repoRoot, + inspect: inspected, + outputDir, + remoteOutputDir, + runner, + }); + const screenshotPath = path.join(outputDir, "desktop-browser-smoke.png"); + if (!(await pathExists(screenshotPath))) { + throw new Error("Desktop browser screenshot was not copied back from Crabbox."); + } + summary = { + artifacts: { + reportPath, + screenshotPath, + summaryPath, + }, + browserUrl, + crabbox: { + bin: crabboxBin, + createdLease, + id: leaseId, + provider, + slug: inspected.slug, + state: inspected.state, + vncCommand: `${crabboxBin} vnc --provider ${provider} --id ${leaseId} --open`, + }, + finishedAt: new Date().toISOString(), + outputDir, + remoteOutputDir, + startedAt: startedAt.toISOString(), + status: "pass", + }; + return { + outputDir, + reportPath, + screenshotPath, + status: "pass", + summaryPath, + }; + } catch (error) { + summary = { + artifacts: { + reportPath, + summaryPath, + }, + browserUrl, + crabbox: { + bin: crabboxBin, + createdLease, + id: leaseId ?? "unallocated", + provider, + vncCommand: leaseId + ? `${crabboxBin} vnc --provider ${provider} --id ${leaseId} --open` + : "unallocated", + }, + error: formatErrorMessage(error), + finishedAt: new Date().toISOString(), + outputDir, + remoteOutputDir, + startedAt: startedAt.toISOString(), + status: "fail", + }; + await fs.writeFile(path.join(outputDir, "error.txt"), `${summary.error}\n`, "utf8"); + return { + outputDir, + reportPath, + status: "fail", + summaryPath, + }; + } finally { + if (summary) { + summary.finishedAt = new Date().toISOString(); + await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8"); + await fs.writeFile(reportPath, renderReport(summary), "utf8"); + } + if (summary?.status === "pass" && createdLease && leaseId && !keepLease) { + await stopCrabbox({ crabboxBin, cwd: repoRoot, leaseId, provider, runner }); + } + } +}