diff --git a/.github/workflows/mantis-discord-status-reactions.yml b/.github/workflows/mantis-discord-status-reactions.yml
index 28d65c14c94..7dcd48623d9 100644
--- a/.github/workflows/mantis-discord-status-reactions.yml
+++ b/.github/workflows/mantis-discord-status-reactions.yml
@@ -255,6 +255,24 @@ jobs:
- name: Build Mantis harness
run: pnpm build
+ - name: Install Crabbox CLI
+ env:
+ GH_TOKEN: ${{ github.token }}
+ shell: bash
+ run: |
+ set -euo pipefail
+ install_dir="${RUNNER_TEMP}/crabbox"
+ mkdir -p "$install_dir" "$HOME/.local/bin"
+ gh release download \
+ --repo openclaw/crabbox \
+ --pattern 'crabbox_*_linux_amd64.tar.gz' \
+ --dir "$install_dir" \
+ --clobber
+ tar -xzf "$install_dir"/crabbox_*_linux_amd64.tar.gz -C "$install_dir"
+ install -m 0755 "$install_dir/crabbox" "$HOME/.local/bin/crabbox"
+ echo "$HOME/.local/bin" >> "$GITHUB_PATH"
+ "$HOME/.local/bin/crabbox" --version
+
- name: Prepare baseline and candidate worktrees
shell: bash
env:
@@ -285,6 +303,10 @@ jobs:
OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
OPENCLAW_QA_DISCORD_CAPTURE_CONTENT: "1"
+ CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }}
+ CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }}
+ CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }}
+ CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
run: |
set -euo pipefail
@@ -299,6 +321,7 @@ jobs:
require_var OPENAI_API_KEY
require_var OPENCLAW_QA_CONVEX_SITE_URL
require_var OPENCLAW_QA_CONVEX_SECRET_CI
+ require_var CRABBOX_COORDINATOR_TOKEN
root=".artifacts/qa-e2e/mantis/discord-status-reactions"
worktree_root=".artifacts/qa-e2e/mantis/discord-status-reactions-worktrees"
@@ -328,6 +351,55 @@ jobs:
run_lane baseline
run_lane candidate
+ desktop_lease_id=""
+ warmup_output="$(
+ crabbox warmup \
+ --provider hetzner \
+ --desktop \
+ --browser \
+ --class standard \
+ --idle-timeout 30m \
+ --ttl 90m
+ )"
+ printf '%s\n' "$warmup_output" | tee "$root/crabbox-desktop-warmup.log"
+ desktop_lease_id="$(printf '%s\n' "$warmup_output" | grep -Eo 'cbx_[a-f0-9]+' | head -n 1 || true)"
+ if [[ ! "$desktop_lease_id" =~ ^cbx_[a-f0-9]+$ ]]; then
+ echo "Crabbox desktop warmup did not return a lease id." >&2
+ exit 1
+ fi
+
+ cleanup_desktop_lease() {
+ if [[ -n "$desktop_lease_id" ]]; then
+ crabbox stop --provider hetzner "$desktop_lease_id" || true
+ fi
+ }
+ trap cleanup_desktop_lease EXIT
+
+ capture_desktop_lane() {
+ local lane="$1"
+ local html_file="$root/$lane/discord-status-reactions-tool-only-timeline.html"
+ local desktop_dir="$root/$lane/desktop-browser"
+ if [[ ! -f "$html_file" ]]; then
+ echo "Missing desktop source HTML for ${lane}: ${html_file}" >&2
+ exit 1
+ fi
+ local args=(
+ openclaw qa mantis desktop-browser-smoke
+ --html-file "$html_file"
+ --output-dir "$desktop_dir"
+ --provider hetzner
+ --class standard
+ --idle-timeout 30m
+ --ttl 90m
+ --lease-id "$desktop_lease_id"
+ )
+ pnpm "${args[@]}"
+ cp "$desktop_dir/desktop-browser-smoke.png" "$root/$lane/discord-status-reactions-tool-only-desktop.png"
+ }
+
+ capture_desktop_lane baseline
+ capture_desktop_lane candidate
+
baseline_status="$(jq -r '.scenarios[0].status' "$root/baseline/discord-qa-summary.json")"
candidate_status="$(jq -r '.scenarios[0].status' "$root/candidate/discord-qa-summary.json")"
@@ -351,6 +423,8 @@ jobs:
echo "- Candidate status: \`${candidate_status}\`"
echo "- Baseline screenshot: \`baseline/discord-status-reactions-tool-only-timeline.png\`"
echo "- Candidate screenshot: \`candidate/discord-status-reactions-tool-only-timeline.png\`"
+ echo "- Baseline desktop screenshot: \`baseline/discord-status-reactions-tool-only-desktop.png\`"
+ echo "- Candidate desktop screenshot: \`candidate/discord-status-reactions-tool-only-desktop.png\`"
} > "$root/mantis-report.md"
cat "$root/mantis-report.md" >> "$GITHUB_STEP_SUMMARY"
@@ -409,7 +483,9 @@ jobs:
for required in \
"$root/comparison.json" \
"$root/baseline/discord-status-reactions-tool-only-timeline.png" \
- "$root/candidate/discord-status-reactions-tool-only-timeline.png"
+ "$root/candidate/discord-status-reactions-tool-only-timeline.png" \
+ "$root/baseline/discord-status-reactions-tool-only-desktop.png" \
+ "$root/candidate/discord-status-reactions-tool-only-desktop.png"
do
if [[ ! -f "$required" ]]; then
echo "Missing required QA evidence file: $required" >&2
@@ -435,6 +511,8 @@ jobs:
mkdir -p "$artifacts_worktree/$artifact_root"
cp "$root/baseline/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/baseline.png"
cp "$root/candidate/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/candidate.png"
+ cp "$root/baseline/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/baseline-desktop.png"
+ cp "$root/candidate/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/candidate-desktop.png"
cp "$root/comparison.json" "$artifacts_worktree/$artifact_root/comparison.json"
cp "$root/mantis-report.md" "$artifacts_worktree/$artifact_root/mantis-report.md"
@@ -470,6 +548,10 @@ jobs:
| --- | --- |
|
|
|
+ | Baseline desktop/VNC browser | Candidate desktop/VNC browser |
+ | --- | --- |
+ |
|
|
+
Raw QA files: https://github.com/${GITHUB_REPOSITORY}/tree/qa-artifacts/${artifact_root}
EOF
diff --git a/docs/concepts/mantis.md b/docs/concepts/mantis.md
index 3ea4ff81cde..2f2381c09b5 100644
--- a/docs/concepts/mantis.md
+++ b/docs/concepts/mantis.md
@@ -107,6 +107,7 @@ Useful desktop smoke flags:
- `--lease-id ` or `OPENCLAW_MANTIS_CRABBOX_LEASE_ID` reuses a warmed desktop.
- `--browser-url ` changes the page opened in the visible browser.
+- `--html-file ` renders a repo-local HTML artifact in the visible browser. Mantis uses this to capture the generated Discord status-reaction timeline through a real Crabbox desktop.
- `--keep-lease` or `OPENCLAW_MANTIS_KEEP_VM=1` keeps a newly created passing lease open for VNC inspection. Failed runs keep the lease by default when one was created so an operator can reconnect.
- `--class`, `--idle-timeout`, and `--ttl` tune machine size and lease lifetime.
@@ -120,7 +121,9 @@ accepts:
It checks out the workflow harness ref, builds separate baseline and candidate
worktrees, runs `discord-status-reactions-tool-only` against each worktree, and
uploads `baseline/`, `candidate/`, `comparison.json`, and `mantis-report.md` as
-Actions artifacts.
+Actions artifacts. It also renders each lane's timeline HTML in a Crabbox
+desktop browser and publishes those VNC screenshots beside the deterministic
+timeline PNGs in the PR comment.
You can also trigger the status-reactions run directly from a PR comment:
diff --git a/extensions/qa-lab/src/cli.test.ts b/extensions/qa-lab/src/cli.test.ts
index 43c423c8216..380b99ef932 100644
--- a/extensions/qa-lab/src/cli.test.ts
+++ b/extensions/qa-lab/src/cli.test.ts
@@ -225,6 +225,8 @@ describe("qa cli registration", () => {
".artifacts/qa-e2e/mantis/desktop-browser",
"--browser-url",
"https://openclaw.ai/docs",
+ "--html-file",
+ "qa-artifacts/timeline.html",
"--crabbox-bin",
"/tmp/crabbox",
"--provider",
@@ -243,6 +245,7 @@ describe("qa cli registration", () => {
expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({
browserUrl: "https://openclaw.ai/docs",
crabboxBin: "/tmp/crabbox",
+ htmlFile: "qa-artifacts/timeline.html",
idleTimeout: "30m",
keepLease: true,
leaseId: "cbx_123abc",
@@ -268,6 +271,7 @@ describe("qa cli registration", () => {
expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({
browserUrl: undefined,
crabboxBin: undefined,
+ htmlFile: undefined,
idleTimeout: undefined,
keepLease: undefined,
leaseId: undefined,
diff --git a/extensions/qa-lab/src/mantis/cli.ts b/extensions/qa-lab/src/mantis/cli.ts
index 28eee774c86..905bfa901c0 100644
--- a/extensions/qa-lab/src/mantis/cli.ts
+++ b/extensions/qa-lab/src/mantis/cli.ts
@@ -56,6 +56,7 @@ type MantisDesktopBrowserSmokeCommanderOptions = {
browserUrl?: string;
class?: string;
crabboxBin?: string;
+ htmlFile?: string;
idleTimeout?: string;
keepLease?: boolean;
leaseId?: string;
@@ -137,6 +138,7 @@ export function registerMantisCli(qa: Command) {
.option("--repo-root ", "Repository root to target when running from a neutral cwd")
.option("--output-dir ", "Mantis desktop browser artifact directory")
.option("--browser-url ", "URL to open in the visible browser")
+ .option("--html-file ", "Repo-local HTML file to render in the visible browser")
.option("--crabbox-bin ", "Crabbox binary path")
.option("--provider ", "Crabbox provider")
.option("--machine-class ", "Crabbox machine class")
@@ -149,6 +151,7 @@ export function registerMantisCli(qa: Command) {
await runDesktopBrowserSmoke({
browserUrl: opts.browserUrl,
crabboxBin: opts.crabboxBin,
+ htmlFile: opts.htmlFile,
idleTimeout: opts.idleTimeout,
keepLease: opts.keepLease,
leaseId: opts.leaseId,
diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts
index 2d44e9ceadf..4c4bf9fd253 100644
--- a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts
+++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.test.ts
@@ -16,6 +16,8 @@ describe("mantis desktop browser smoke runtime", () => {
});
it("leases a desktop box, runs a visible browser, copies artifacts, and stops on pass", async () => {
+ await fs.mkdir(path.join(repoRoot, "qa-artifacts"), { recursive: true });
+ await fs.writeFile(path.join(repoRoot, "qa-artifacts", "timeline.html"), "Mantis
");
const commands: { args: readonly string[]; command: string }[] = [];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
commands.push({ command, args });
@@ -53,6 +55,7 @@ describe("mantis desktop browser smoke runtime", () => {
browserUrl: "https://openclaw.ai/docs",
commandRunner: runner,
crabboxBin: "/tmp/crabbox",
+ htmlFile: "qa-artifacts/timeline.html",
now: () => new Date("2026-05-04T12:00:00.000Z"),
outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-test",
repoRoot,
@@ -81,15 +84,19 @@ describe("mantis desktop browser smoke runtime", () => {
expect(remoteScript).toContain("${BROWSER:-}");
expect(remoteScript).toContain("${CHROME_BIN:-}");
expect(remoteScript).toContain("chromium-browser");
+ expect(remoteScript).toContain("base64 -d");
+ expect(remoteScript).toContain('url="file://$out/input.html"');
expect(remoteScript).toContain('"browserBinary": "$browser_bin"');
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
browserUrl: string;
crabbox: { id: string; vncCommand: string };
+ htmlFile?: string;
status: string;
};
+ expect(summary.browserUrl).toMatch(/^file:\/\//u);
expect(summary).toMatchObject({
- browserUrl: "https://openclaw.ai/docs",
+ htmlFile: path.join(repoRoot, "qa-artifacts", "timeline.html"),
crabbox: {
id: "cbx_abc123",
vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open",
@@ -98,6 +105,21 @@ describe("mantis desktop browser smoke runtime", () => {
});
});
+ it("rejects html files outside the repository", async () => {
+ const runner = vi.fn(async () => ({ stdout: "", stderr: "" }));
+
+ await expect(
+ runMantisDesktopBrowserSmoke({
+ commandRunner: runner,
+ crabboxBin: "/tmp/crabbox",
+ htmlFile: "../outside.html",
+ outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-outside",
+ repoRoot,
+ }),
+ ).rejects.toThrow("Mantis desktop HTML file must be inside the repository");
+ expect(runner).not.toHaveBeenCalled();
+ });
+
it("keeps an existing lease and writes failure reports when the remote run fails", async () => {
const commands: { args: readonly string[]; command: string }[] = [];
const runner = vi.fn(async (command: string, args: readonly string[]) => {
diff --git a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts
index a5ee129bf91..6c4da15ed38 100644
--- a/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts
+++ b/extensions/qa-lab/src/mantis/desktop-browser-smoke.runtime.ts
@@ -1,6 +1,7 @@
import { spawn, type SpawnOptions } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
+import { pathToFileURL } from "node:url";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js";
@@ -9,6 +10,7 @@ export type MantisDesktopBrowserSmokeOptions = {
commandRunner?: CommandRunner;
crabboxBin?: string;
env?: NodeJS.ProcessEnv;
+ htmlFile?: string;
idleTimeout?: string;
keepLease?: boolean;
leaseId?: string;
@@ -58,6 +60,7 @@ type MantisDesktopBrowserSmokeSummary = {
summaryPath: string;
};
browserUrl: string;
+ htmlFile?: string;
crabbox: {
bin: string;
createdLease: boolean;
@@ -174,16 +177,43 @@ function shellQuote(value: string) {
return `'${value.replaceAll("'", "'\\''")}'`;
}
-function renderRemoteScript(params: { browserUrl: string; remoteOutputDir: string }) {
+function resolveRepoBoundFile(repoRoot: string, filePath: string, label: string) {
+ const resolved = path.resolve(repoRoot, filePath);
+ const relative = path.relative(repoRoot, resolved);
+ if (relative === "" || relative.startsWith("..") || path.isAbsolute(relative)) {
+ throw new Error(`${label} must be inside the repository: ${filePath}`);
+ }
+ return resolved;
+}
+
+function renderRemoteScript(params: {
+ browserUrl: string;
+ htmlBase64?: string;
+ remoteOutputDir: string;
+}) {
const shellUrl = shellQuote(params.browserUrl);
const shellUrlJson = shellQuote(JSON.stringify(params.browserUrl));
+ const htmlBase64 = shellQuote(params.htmlBase64 ?? "");
const shellOutputDir = shellQuote(params.remoteOutputDir);
+ const inputModeJson = shellQuote(JSON.stringify(params.htmlBase64 ? "html-file" : "url"));
+ const openedUrlJson = shellQuote(
+ JSON.stringify(
+ params.htmlBase64 ? `file://${params.remoteOutputDir}/input.html` : params.browserUrl,
+ ),
+ );
return `set -euo pipefail
out=${shellOutputDir}
url=${shellUrl}
url_json=${shellUrlJson}
+html_b64=${htmlBase64}
+input_mode_json=${inputModeJson}
+opened_url_json=${openedUrlJson}
rm -rf "$out"
mkdir -p "$out"
+if [ -n "$html_b64" ]; then
+ printf '%s' "$html_b64" | base64 -d >"$out/input.html"
+ url="file://$out/input.html"
+fi
export DISPLAY="\${DISPLAY:-:99}"
if ! command -v scrot >/dev/null 2>&1; then
sudo apt-get update -y >"$out/apt.log" 2>&1
@@ -228,6 +258,8 @@ cat >"$out/remote-metadata.json" <