From 4856cbb017dac9cea4eac6f8f2eb87fd8e09fb28 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 4 May 2026 01:48:11 +0100 Subject: [PATCH] feat(qa): publish Mantis desktop screenshots --- .../mantis-discord-status-reactions.yml | 84 ++++++++++++++++++- docs/concepts/mantis.md | 5 +- extensions/qa-lab/src/cli.test.ts | 4 + extensions/qa-lab/src/mantis/cli.ts | 3 + .../desktop-browser-smoke.runtime.test.ts | 24 +++++- .../mantis/desktop-browser-smoke.runtime.ts | 50 ++++++++++- 6 files changed, 164 insertions(+), 6 deletions(-) diff --git a/.github/workflows/mantis-discord-status-reactions.yml b/.github/workflows/mantis-discord-status-reactions.yml index 28d65c14c94..7dcd48623d9 100644 --- a/.github/workflows/mantis-discord-status-reactions.yml +++ b/.github/workflows/mantis-discord-status-reactions.yml @@ -255,6 +255,24 @@ jobs: - name: Build Mantis harness run: pnpm build + - name: Install Crabbox CLI + env: + GH_TOKEN: ${{ github.token }} + shell: bash + run: | + set -euo pipefail + install_dir="${RUNNER_TEMP}/crabbox" + mkdir -p "$install_dir" "$HOME/.local/bin" + gh release download \ + --repo openclaw/crabbox \ + --pattern 'crabbox_*_linux_amd64.tar.gz' \ + --dir "$install_dir" \ + --clobber + tar -xzf "$install_dir"/crabbox_*_linux_amd64.tar.gz -C "$install_dir" + install -m 0755 "$install_dir/crabbox" "$HOME/.local/bin/crabbox" + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + "$HOME/.local/bin/crabbox" --version + - name: Prepare baseline and candidate worktrees shell: bash env: @@ -285,6 +303,10 @@ jobs: OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }} OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" OPENCLAW_QA_DISCORD_CAPTURE_CONTENT: "1" + CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }} + CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }} + CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }} + CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }} run: | set -euo pipefail @@ -299,6 +321,7 @@ jobs: require_var OPENAI_API_KEY require_var OPENCLAW_QA_CONVEX_SITE_URL require_var OPENCLAW_QA_CONVEX_SECRET_CI + require_var CRABBOX_COORDINATOR_TOKEN root=".artifacts/qa-e2e/mantis/discord-status-reactions" worktree_root=".artifacts/qa-e2e/mantis/discord-status-reactions-worktrees" @@ -328,6 +351,55 @@ jobs: run_lane baseline run_lane candidate + desktop_lease_id="" + warmup_output="$( + crabbox warmup \ + --provider hetzner \ + --desktop \ + --browser \ + --class standard \ + --idle-timeout 30m \ + --ttl 90m + )" + printf '%s\n' "$warmup_output" | tee "$root/crabbox-desktop-warmup.log" + desktop_lease_id="$(printf '%s\n' "$warmup_output" | grep -Eo 'cbx_[a-f0-9]+' | head -n 1 || true)" + if [[ ! "$desktop_lease_id" =~ ^cbx_[a-f0-9]+$ ]]; then + echo "Crabbox desktop warmup did not return a lease id." >&2 + exit 1 + fi + + cleanup_desktop_lease() { + if [[ -n "$desktop_lease_id" ]]; then + crabbox stop --provider hetzner "$desktop_lease_id" || true + fi + } + trap cleanup_desktop_lease EXIT + + capture_desktop_lane() { + local lane="$1" + local html_file="$root/$lane/discord-status-reactions-tool-only-timeline.html" + local desktop_dir="$root/$lane/desktop-browser" + if [[ ! -f "$html_file" ]]; then + echo "Missing desktop source HTML for ${lane}: ${html_file}" >&2 + exit 1 + fi + local args=( + openclaw qa mantis desktop-browser-smoke + --html-file "$html_file" + --output-dir "$desktop_dir" + --provider hetzner + --class standard + --idle-timeout 30m + --ttl 90m + --lease-id "$desktop_lease_id" + ) + pnpm "${args[@]}" + cp "$desktop_dir/desktop-browser-smoke.png" "$root/$lane/discord-status-reactions-tool-only-desktop.png" + } + + capture_desktop_lane baseline + capture_desktop_lane candidate + baseline_status="$(jq -r '.scenarios[0].status' "$root/baseline/discord-qa-summary.json")" candidate_status="$(jq -r '.scenarios[0].status' "$root/candidate/discord-qa-summary.json")" @@ -351,6 +423,8 @@ jobs: echo "- Candidate status: \`${candidate_status}\`" echo "- Baseline screenshot: \`baseline/discord-status-reactions-tool-only-timeline.png\`" echo "- Candidate screenshot: \`candidate/discord-status-reactions-tool-only-timeline.png\`" + echo "- Baseline desktop screenshot: \`baseline/discord-status-reactions-tool-only-desktop.png\`" + echo "- Candidate desktop screenshot: \`candidate/discord-status-reactions-tool-only-desktop.png\`" } > "$root/mantis-report.md" cat "$root/mantis-report.md" >> "$GITHUB_STEP_SUMMARY" @@ -409,7 +483,9 @@ jobs: for required in \ "$root/comparison.json" \ "$root/baseline/discord-status-reactions-tool-only-timeline.png" \ - "$root/candidate/discord-status-reactions-tool-only-timeline.png" + "$root/candidate/discord-status-reactions-tool-only-timeline.png" \ + "$root/baseline/discord-status-reactions-tool-only-desktop.png" \ + "$root/candidate/discord-status-reactions-tool-only-desktop.png" do if [[ ! -f "$required" ]]; then echo "Missing required QA evidence file: $required" >&2 @@ -435,6 +511,8 @@ jobs: mkdir -p "$artifacts_worktree/$artifact_root" cp "$root/baseline/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/baseline.png" cp "$root/candidate/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/candidate.png" + cp "$root/baseline/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/baseline-desktop.png" + cp "$root/candidate/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/candidate-desktop.png" cp "$root/comparison.json" "$artifacts_worktree/$artifact_root/comparison.json" cp "$root/mantis-report.md" "$artifacts_worktree/$artifact_root/mantis-report.md" @@ -470,6 +548,10 @@ jobs: | --- | --- | | Baseline Discord status reaction timeline | Candidate Discord status reaction timeline | + | Baseline desktop/VNC browser | Candidate desktop/VNC browser | + | --- | --- | + | Baseline Mantis desktop browser screenshot | Candidate Mantis desktop browser screenshot | + Raw QA files: https://github.com/${GITHUB_REPOSITORY}/tree/qa-artifacts/${artifact_root} EOF diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md index 3ea4ff81cde..2f2381c09b5 100644 --- a/docs/concepts/mantis.md +++ b/docs/concepts/mantis.md @@ -107,6 +107,7 @@ Useful desktop smoke flags: - `--lease-id ` or `OPENCLAW_MANTIS_CRABBOX_LEASE_ID` reuses a warmed desktop. - `--browser-url ` changes the page opened in the visible browser. +- `--html-file ` renders a repo-local HTML artifact in the visible browser. Mantis uses this to capture the generated Discord status-reaction timeline through a real Crabbox desktop. - `--keep-lease` or `OPENCLAW_MANTIS_KEEP_VM=1` keeps a newly created passing lease open for VNC inspection. Failed runs keep the lease by default when one was created so an operator can reconnect. - `--class`, `--idle-timeout`, and `--ttl` tune machine size and lease lifetime. @@ -120,7 +121,9 @@ accepts: It checks out the workflow harness ref, builds separate baseline and candidate worktrees, runs `discord-status-reactions-tool-only` against each worktree, and uploads `baseline/`, `candidate/`, `comparison.json`, and `mantis-report.md` as -Actions artifacts. +Actions artifacts. It also renders each lane's timeline HTML in a Crabbox +desktop browser and publishes those VNC screenshots beside the deterministic +timeline PNGs in the PR comment. You can also trigger the status-reactions run directly from a PR comment: diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts index 43c423c8216..380b99ef932 100644 --- a/extensions/qa-lab/src/cli.test.ts +++ b/extensions/qa-lab/src/cli.test.ts @@ -225,6 +225,8 @@ describe("qa cli registration", () => { ".artifacts/qa-e2e/mantis/desktop-browser", "--browser-url", "https://openclaw.ai/docs", + "--html-file", + "qa-artifacts/timeline.html", "--crabbox-bin", "/tmp/crabbox", "--provider", @@ -243,6 +245,7 @@ describe("qa cli registration", () => { expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({ browserUrl: "https://openclaw.ai/docs", crabboxBin: "/tmp/crabbox", + htmlFile: "qa-artifacts/timeline.html", idleTimeout: "30m", keepLease: true, leaseId: "cbx_123abc", @@ -268,6 +271,7 @@ describe("qa cli registration", () => { expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({ browserUrl: undefined, crabboxBin: undefined, + htmlFile: undefined, idleTimeout: undefined, keepLease: undefined, leaseId: undefined, diff --git a/extensions/qa-lab/src/mantis/cli.ts b/extensions/qa-lab/src/mantis/cli.ts index 28eee774c86..905bfa901c0 100644 --- a/extensions/qa-lab/src/mantis/cli.ts +++ b/extensions/qa-lab/src/mantis/cli.ts @@ -56,6 +56,7 @@ type MantisDesktopBrowserSmokeCommanderOptions = { browserUrl?: string; class?: string; crabboxBin?: string; + htmlFile?: string; idleTimeout?: string; keepLease?: boolean; leaseId?: string; @@ -137,6 +138,7 @@ export function registerMantisCli(qa: Command) { .option("--repo-root ", "Repository root to target when running from a neutral cwd") .option("--output-dir ", "Mantis desktop browser artifact directory") .option("--browser-url ", "URL to open in the visible browser") + .option("--html-file ", "Repo-local HTML file to render in the visible browser") .option("--crabbox-bin ", "Crabbox binary path") .option("--provider ", "Crabbox provider") .option("--machine-class ", "Crabbox machine class") @@ -149,6 +151,7 @@ export function registerMantisCli(qa: Command) { await runDesktopBrowserSmoke({ browserUrl: opts.browserUrl, crabboxBin: opts.crabboxBin, + htmlFile: opts.htmlFile, idleTimeout: opts.idleTimeout, keepLease: opts.keepLease, leaseId: opts.leaseId, diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts index 2d44e9ceadf..4c4bf9fd253 100644 --- a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts +++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts @@ -16,6 +16,8 @@ describe("mantis desktop browser smoke runtime", () => { }); it("leases a desktop box, runs a visible browser, copies artifacts, and stops on pass", async () => { + await fs.mkdir(path.join(repoRoot, "qa-artifacts"), { recursive: true }); + await fs.writeFile(path.join(repoRoot, "qa-artifacts", "timeline.html"), "

Mantis

"); const commands: { args: readonly string[]; command: string }[] = []; const runner = vi.fn(async (command: string, args: readonly string[]) => { commands.push({ command, args }); @@ -53,6 +55,7 @@ describe("mantis desktop browser smoke runtime", () => { browserUrl: "https://openclaw.ai/docs", commandRunner: runner, crabboxBin: "/tmp/crabbox", + htmlFile: "qa-artifacts/timeline.html", now: () => new Date("2026-05-04T12:00:00.000Z"), outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-test", repoRoot, @@ -81,15 +84,19 @@ describe("mantis desktop browser smoke runtime", () => { expect(remoteScript).toContain("${BROWSER:-}"); expect(remoteScript).toContain("${CHROME_BIN:-}"); expect(remoteScript).toContain("chromium-browser"); + expect(remoteScript).toContain("base64 -d"); + expect(remoteScript).toContain('url="file://$out/input.html"'); expect(remoteScript).toContain('"browserBinary": "$browser_bin"'); await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png"); const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as { browserUrl: string; crabbox: { id: string; vncCommand: string }; + htmlFile?: string; status: string; }; + expect(summary.browserUrl).toMatch(/^file:\/\//u); expect(summary).toMatchObject({ - browserUrl: "https://openclaw.ai/docs", + htmlFile: path.join(repoRoot, "qa-artifacts", "timeline.html"), crabbox: { id: "cbx_abc123", vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open", @@ -98,6 +105,21 @@ describe("mantis desktop browser smoke runtime", () => { }); }); + it("rejects html files outside the repository", async () => { + const runner = vi.fn(async () => ({ stdout: "", stderr: "" })); + + await expect( + runMantisDesktopBrowserSmoke({ + commandRunner: runner, + crabboxBin: "/tmp/crabbox", + htmlFile: "../outside.html", + outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-outside", + repoRoot, + }), + ).rejects.toThrow("Mantis desktop HTML file must be inside the repository"); + expect(runner).not.toHaveBeenCalled(); + }); + it("keeps an existing lease and writes failure reports when the remote run fails", async () => { const commands: { args: readonly string[]; command: string }[] = []; const runner = vi.fn(async (command: string, args: readonly string[]) => { diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts index a5ee129bf91..6c4da15ed38 100644 --- a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts +++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts @@ -1,6 +1,7 @@ import { spawn, type SpawnOptions } from "node:child_process"; import fs from "node:fs/promises"; import path from "node:path"; +import { pathToFileURL } from "node:url"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js"; @@ -9,6 +10,7 @@ export type MantisDesktopBrowserSmokeOptions = { commandRunner?: CommandRunner; crabboxBin?: string; env?: NodeJS.ProcessEnv; + htmlFile?: string; idleTimeout?: string; keepLease?: boolean; leaseId?: string; @@ -58,6 +60,7 @@ type MantisDesktopBrowserSmokeSummary = { summaryPath: string; }; browserUrl: string; + htmlFile?: string; crabbox: { bin: string; createdLease: boolean; @@ -174,16 +177,43 @@ function shellQuote(value: string) { return `'${value.replaceAll("'", "'\\''")}'`; } -function renderRemoteScript(params: { browserUrl: string; remoteOutputDir: string }) { +function resolveRepoBoundFile(repoRoot: string, filePath: string, label: string) { + const resolved = path.resolve(repoRoot, filePath); + const relative = path.relative(repoRoot, resolved); + if (relative === "" || relative.startsWith("..") || path.isAbsolute(relative)) { + throw new Error(`${label} must be inside the repository: ${filePath}`); + } + return resolved; +} + +function renderRemoteScript(params: { + browserUrl: string; + htmlBase64?: string; + remoteOutputDir: string; +}) { const shellUrl = shellQuote(params.browserUrl); const shellUrlJson = shellQuote(JSON.stringify(params.browserUrl)); + const htmlBase64 = shellQuote(params.htmlBase64 ?? ""); const shellOutputDir = shellQuote(params.remoteOutputDir); + const inputModeJson = shellQuote(JSON.stringify(params.htmlBase64 ? "html-file" : "url")); + const openedUrlJson = shellQuote( + JSON.stringify( + params.htmlBase64 ? `file://${params.remoteOutputDir}/input.html` : params.browserUrl, + ), + ); return `set -euo pipefail out=${shellOutputDir} url=${shellUrl} url_json=${shellUrlJson} +html_b64=${htmlBase64} +input_mode_json=${inputModeJson} +opened_url_json=${openedUrlJson} rm -rf "$out" mkdir -p "$out" +if [ -n "$html_b64" ]; then + printf '%s' "$html_b64" | base64 -d >"$out/input.html" + url="file://$out/input.html" +fi export DISPLAY="\${DISPLAY:-:99}" if ! command -v scrot >/dev/null 2>&1; then sudo apt-get update -y >"$out/apt.log" 2>&1 @@ -228,6 +258,8 @@ cat >"$out/remote-metadata.json" <