mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:00:43 +00:00
feat(qa): publish Mantis desktop screenshots
This commit is contained in:
@@ -255,6 +255,24 @@ jobs:
|
||||
- name: Build Mantis harness
|
||||
run: pnpm build
|
||||
|
||||
- name: Install Crabbox CLI
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
install_dir="${RUNNER_TEMP}/crabbox"
|
||||
mkdir -p "$install_dir" "$HOME/.local/bin"
|
||||
gh release download \
|
||||
--repo openclaw/crabbox \
|
||||
--pattern 'crabbox_*_linux_amd64.tar.gz' \
|
||||
--dir "$install_dir" \
|
||||
--clobber
|
||||
tar -xzf "$install_dir"/crabbox_*_linux_amd64.tar.gz -C "$install_dir"
|
||||
install -m 0755 "$install_dir/crabbox" "$HOME/.local/bin/crabbox"
|
||||
echo "$HOME/.local/bin" >> "$GITHUB_PATH"
|
||||
"$HOME/.local/bin/crabbox" --version
|
||||
|
||||
- name: Prepare baseline and candidate worktrees
|
||||
shell: bash
|
||||
env:
|
||||
@@ -285,6 +303,10 @@ jobs:
|
||||
OPENCLAW_QA_CONVEX_SECRET_CI: ${{ secrets.OPENCLAW_QA_CONVEX_SECRET_CI }}
|
||||
OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
|
||||
OPENCLAW_QA_DISCORD_CAPTURE_CONTENT: "1"
|
||||
CRABBOX_COORDINATOR: ${{ secrets.CRABBOX_COORDINATOR }}
|
||||
CRABBOX_COORDINATOR_TOKEN: ${{ secrets.CRABBOX_COORDINATOR_TOKEN }}
|
||||
CRABBOX_ACCESS_CLIENT_ID: ${{ secrets.CRABBOX_ACCESS_CLIENT_ID }}
|
||||
CRABBOX_ACCESS_CLIENT_SECRET: ${{ secrets.CRABBOX_ACCESS_CLIENT_SECRET }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
@@ -299,6 +321,7 @@ jobs:
|
||||
require_var OPENAI_API_KEY
|
||||
require_var OPENCLAW_QA_CONVEX_SITE_URL
|
||||
require_var OPENCLAW_QA_CONVEX_SECRET_CI
|
||||
require_var CRABBOX_COORDINATOR_TOKEN
|
||||
|
||||
root=".artifacts/qa-e2e/mantis/discord-status-reactions"
|
||||
worktree_root=".artifacts/qa-e2e/mantis/discord-status-reactions-worktrees"
|
||||
@@ -328,6 +351,55 @@ jobs:
|
||||
run_lane baseline
|
||||
run_lane candidate
|
||||
|
||||
desktop_lease_id=""
|
||||
warmup_output="$(
|
||||
crabbox warmup \
|
||||
--provider hetzner \
|
||||
--desktop \
|
||||
--browser \
|
||||
--class standard \
|
||||
--idle-timeout 30m \
|
||||
--ttl 90m
|
||||
)"
|
||||
printf '%s\n' "$warmup_output" | tee "$root/crabbox-desktop-warmup.log"
|
||||
desktop_lease_id="$(printf '%s\n' "$warmup_output" | grep -Eo 'cbx_[a-f0-9]+' | head -n 1 || true)"
|
||||
if [[ ! "$desktop_lease_id" =~ ^cbx_[a-f0-9]+$ ]]; then
|
||||
echo "Crabbox desktop warmup did not return a lease id." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cleanup_desktop_lease() {
|
||||
if [[ -n "$desktop_lease_id" ]]; then
|
||||
crabbox stop --provider hetzner "$desktop_lease_id" || true
|
||||
fi
|
||||
}
|
||||
trap cleanup_desktop_lease EXIT
|
||||
|
||||
capture_desktop_lane() {
|
||||
local lane="$1"
|
||||
local html_file="$root/$lane/discord-status-reactions-tool-only-timeline.html"
|
||||
local desktop_dir="$root/$lane/desktop-browser"
|
||||
if [[ ! -f "$html_file" ]]; then
|
||||
echo "Missing desktop source HTML for ${lane}: ${html_file}" >&2
|
||||
exit 1
|
||||
fi
|
||||
local args=(
|
||||
openclaw qa mantis desktop-browser-smoke
|
||||
--html-file "$html_file"
|
||||
--output-dir "$desktop_dir"
|
||||
--provider hetzner
|
||||
--class standard
|
||||
--idle-timeout 30m
|
||||
--ttl 90m
|
||||
--lease-id "$desktop_lease_id"
|
||||
)
|
||||
pnpm "${args[@]}"
|
||||
cp "$desktop_dir/desktop-browser-smoke.png" "$root/$lane/discord-status-reactions-tool-only-desktop.png"
|
||||
}
|
||||
|
||||
capture_desktop_lane baseline
|
||||
capture_desktop_lane candidate
|
||||
|
||||
baseline_status="$(jq -r '.scenarios[0].status' "$root/baseline/discord-qa-summary.json")"
|
||||
candidate_status="$(jq -r '.scenarios[0].status' "$root/candidate/discord-qa-summary.json")"
|
||||
|
||||
@@ -351,6 +423,8 @@ jobs:
|
||||
echo "- Candidate status: \`${candidate_status}\`"
|
||||
echo "- Baseline screenshot: \`baseline/discord-status-reactions-tool-only-timeline.png\`"
|
||||
echo "- Candidate screenshot: \`candidate/discord-status-reactions-tool-only-timeline.png\`"
|
||||
echo "- Baseline desktop screenshot: \`baseline/discord-status-reactions-tool-only-desktop.png\`"
|
||||
echo "- Candidate desktop screenshot: \`candidate/discord-status-reactions-tool-only-desktop.png\`"
|
||||
} > "$root/mantis-report.md"
|
||||
|
||||
cat "$root/mantis-report.md" >> "$GITHUB_STEP_SUMMARY"
|
||||
@@ -409,7 +483,9 @@ jobs:
|
||||
for required in \
|
||||
"$root/comparison.json" \
|
||||
"$root/baseline/discord-status-reactions-tool-only-timeline.png" \
|
||||
"$root/candidate/discord-status-reactions-tool-only-timeline.png"
|
||||
"$root/candidate/discord-status-reactions-tool-only-timeline.png" \
|
||||
"$root/baseline/discord-status-reactions-tool-only-desktop.png" \
|
||||
"$root/candidate/discord-status-reactions-tool-only-desktop.png"
|
||||
do
|
||||
if [[ ! -f "$required" ]]; then
|
||||
echo "Missing required QA evidence file: $required" >&2
|
||||
@@ -435,6 +511,8 @@ jobs:
|
||||
mkdir -p "$artifacts_worktree/$artifact_root"
|
||||
cp "$root/baseline/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/baseline.png"
|
||||
cp "$root/candidate/discord-status-reactions-tool-only-timeline.png" "$artifacts_worktree/$artifact_root/candidate.png"
|
||||
cp "$root/baseline/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/baseline-desktop.png"
|
||||
cp "$root/candidate/discord-status-reactions-tool-only-desktop.png" "$artifacts_worktree/$artifact_root/candidate-desktop.png"
|
||||
cp "$root/comparison.json" "$artifacts_worktree/$artifact_root/comparison.json"
|
||||
cp "$root/mantis-report.md" "$artifacts_worktree/$artifact_root/mantis-report.md"
|
||||
|
||||
@@ -470,6 +548,10 @@ jobs:
|
||||
| --- | --- |
|
||||
| <img src="${raw_base}/baseline.png" width="420" alt="Baseline Discord status reaction timeline"> | <img src="${raw_base}/candidate.png" width="420" alt="Candidate Discord status reaction timeline"> |
|
||||
|
||||
| Baseline desktop/VNC browser | Candidate desktop/VNC browser |
|
||||
| --- | --- |
|
||||
| <img src="${raw_base}/baseline-desktop.png" width="420" alt="Baseline Mantis desktop browser screenshot"> | <img src="${raw_base}/candidate-desktop.png" width="420" alt="Candidate Mantis desktop browser screenshot"> |
|
||||
|
||||
Raw QA files: https://github.com/${GITHUB_REPOSITORY}/tree/qa-artifacts/${artifact_root}
|
||||
EOF
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ Useful desktop smoke flags:
|
||||
|
||||
- `--lease-id <cbx_...>` or `OPENCLAW_MANTIS_CRABBOX_LEASE_ID` reuses a warmed desktop.
|
||||
- `--browser-url <url>` changes the page opened in the visible browser.
|
||||
- `--html-file <path>` renders a repo-local HTML artifact in the visible browser. Mantis uses this to capture the generated Discord status-reaction timeline through a real Crabbox desktop.
|
||||
- `--keep-lease` or `OPENCLAW_MANTIS_KEEP_VM=1` keeps a newly created passing lease open for VNC inspection. Failed runs keep the lease by default when one was created so an operator can reconnect.
|
||||
- `--class`, `--idle-timeout`, and `--ttl` tune machine size and lease lifetime.
|
||||
|
||||
@@ -120,7 +121,9 @@ accepts:
|
||||
It checks out the workflow harness ref, builds separate baseline and candidate
|
||||
worktrees, runs `discord-status-reactions-tool-only` against each worktree, and
|
||||
uploads `baseline/`, `candidate/`, `comparison.json`, and `mantis-report.md` as
|
||||
Actions artifacts.
|
||||
Actions artifacts. It also renders each lane's timeline HTML in a Crabbox
|
||||
desktop browser and publishes those VNC screenshots beside the deterministic
|
||||
timeline PNGs in the PR comment.
|
||||
|
||||
You can also trigger the status-reactions run directly from a PR comment:
|
||||
|
||||
|
||||
@@ -225,6 +225,8 @@ describe("qa cli registration", () => {
|
||||
".artifacts/qa-e2e/mantis/desktop-browser",
|
||||
"--browser-url",
|
||||
"https://openclaw.ai/docs",
|
||||
"--html-file",
|
||||
"qa-artifacts/timeline.html",
|
||||
"--crabbox-bin",
|
||||
"/tmp/crabbox",
|
||||
"--provider",
|
||||
@@ -243,6 +245,7 @@ describe("qa cli registration", () => {
|
||||
expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({
|
||||
browserUrl: "https://openclaw.ai/docs",
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
htmlFile: "qa-artifacts/timeline.html",
|
||||
idleTimeout: "30m",
|
||||
keepLease: true,
|
||||
leaseId: "cbx_123abc",
|
||||
@@ -268,6 +271,7 @@ describe("qa cli registration", () => {
|
||||
expect(runMantisDesktopBrowserSmokeCommand).toHaveBeenCalledWith({
|
||||
browserUrl: undefined,
|
||||
crabboxBin: undefined,
|
||||
htmlFile: undefined,
|
||||
idleTimeout: undefined,
|
||||
keepLease: undefined,
|
||||
leaseId: undefined,
|
||||
|
||||
@@ -56,6 +56,7 @@ type MantisDesktopBrowserSmokeCommanderOptions = {
|
||||
browserUrl?: string;
|
||||
class?: string;
|
||||
crabboxBin?: string;
|
||||
htmlFile?: string;
|
||||
idleTimeout?: string;
|
||||
keepLease?: boolean;
|
||||
leaseId?: string;
|
||||
@@ -137,6 +138,7 @@ export function registerMantisCli(qa: Command) {
|
||||
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
|
||||
.option("--output-dir <path>", "Mantis desktop browser artifact directory")
|
||||
.option("--browser-url <url>", "URL to open in the visible browser")
|
||||
.option("--html-file <path>", "Repo-local HTML file to render in the visible browser")
|
||||
.option("--crabbox-bin <path>", "Crabbox binary path")
|
||||
.option("--provider <provider>", "Crabbox provider")
|
||||
.option("--machine-class <class>", "Crabbox machine class")
|
||||
@@ -149,6 +151,7 @@ export function registerMantisCli(qa: Command) {
|
||||
await runDesktopBrowserSmoke({
|
||||
browserUrl: opts.browserUrl,
|
||||
crabboxBin: opts.crabboxBin,
|
||||
htmlFile: opts.htmlFile,
|
||||
idleTimeout: opts.idleTimeout,
|
||||
keepLease: opts.keepLease,
|
||||
leaseId: opts.leaseId,
|
||||
|
||||
@@ -16,6 +16,8 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
});
|
||||
|
||||
it("leases a desktop box, runs a visible browser, copies artifacts, and stops on pass", async () => {
|
||||
await fs.mkdir(path.join(repoRoot, "qa-artifacts"), { recursive: true });
|
||||
await fs.writeFile(path.join(repoRoot, "qa-artifacts", "timeline.html"), "<h1>Mantis</h1>");
|
||||
const commands: { args: readonly string[]; command: string }[] = [];
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
commands.push({ command, args });
|
||||
@@ -53,6 +55,7 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
browserUrl: "https://openclaw.ai/docs",
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
htmlFile: "qa-artifacts/timeline.html",
|
||||
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
||||
outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-test",
|
||||
repoRoot,
|
||||
@@ -81,15 +84,19 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
expect(remoteScript).toContain("${BROWSER:-}");
|
||||
expect(remoteScript).toContain("${CHROME_BIN:-}");
|
||||
expect(remoteScript).toContain("chromium-browser");
|
||||
expect(remoteScript).toContain("base64 -d");
|
||||
expect(remoteScript).toContain('url="file://$out/input.html"');
|
||||
expect(remoteScript).toContain('"browserBinary": "$browser_bin"');
|
||||
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
browserUrl: string;
|
||||
crabbox: { id: string; vncCommand: string };
|
||||
htmlFile?: string;
|
||||
status: string;
|
||||
};
|
||||
expect(summary.browserUrl).toMatch(/^file:\/\//u);
|
||||
expect(summary).toMatchObject({
|
||||
browserUrl: "https://openclaw.ai/docs",
|
||||
htmlFile: path.join(repoRoot, "qa-artifacts", "timeline.html"),
|
||||
crabbox: {
|
||||
id: "cbx_abc123",
|
||||
vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open",
|
||||
@@ -98,6 +105,21 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects html files outside the repository", async () => {
|
||||
const runner = vi.fn(async () => ({ stdout: "", stderr: "" }));
|
||||
|
||||
await expect(
|
||||
runMantisDesktopBrowserSmoke({
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
htmlFile: "../outside.html",
|
||||
outputDir: ".artifacts/qa-e2e/mantis/desktop-browser-outside",
|
||||
repoRoot,
|
||||
}),
|
||||
).rejects.toThrow("Mantis desktop HTML file must be inside the repository");
|
||||
expect(runner).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps an existing lease and writes failure reports when the remote run fails", async () => {
|
||||
const commands: { args: readonly string[]; command: string }[] = [];
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { spawn, type SpawnOptions } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js";
|
||||
|
||||
@@ -9,6 +10,7 @@ export type MantisDesktopBrowserSmokeOptions = {
|
||||
commandRunner?: CommandRunner;
|
||||
crabboxBin?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
htmlFile?: string;
|
||||
idleTimeout?: string;
|
||||
keepLease?: boolean;
|
||||
leaseId?: string;
|
||||
@@ -58,6 +60,7 @@ type MantisDesktopBrowserSmokeSummary = {
|
||||
summaryPath: string;
|
||||
};
|
||||
browserUrl: string;
|
||||
htmlFile?: string;
|
||||
crabbox: {
|
||||
bin: string;
|
||||
createdLease: boolean;
|
||||
@@ -174,16 +177,43 @@ function shellQuote(value: string) {
|
||||
return `'${value.replaceAll("'", "'\\''")}'`;
|
||||
}
|
||||
|
||||
function renderRemoteScript(params: { browserUrl: string; remoteOutputDir: string }) {
|
||||
function resolveRepoBoundFile(repoRoot: string, filePath: string, label: string) {
|
||||
const resolved = path.resolve(repoRoot, filePath);
|
||||
const relative = path.relative(repoRoot, resolved);
|
||||
if (relative === "" || relative.startsWith("..") || path.isAbsolute(relative)) {
|
||||
throw new Error(`${label} must be inside the repository: ${filePath}`);
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
function renderRemoteScript(params: {
|
||||
browserUrl: string;
|
||||
htmlBase64?: string;
|
||||
remoteOutputDir: string;
|
||||
}) {
|
||||
const shellUrl = shellQuote(params.browserUrl);
|
||||
const shellUrlJson = shellQuote(JSON.stringify(params.browserUrl));
|
||||
const htmlBase64 = shellQuote(params.htmlBase64 ?? "");
|
||||
const shellOutputDir = shellQuote(params.remoteOutputDir);
|
||||
const inputModeJson = shellQuote(JSON.stringify(params.htmlBase64 ? "html-file" : "url"));
|
||||
const openedUrlJson = shellQuote(
|
||||
JSON.stringify(
|
||||
params.htmlBase64 ? `file://${params.remoteOutputDir}/input.html` : params.browserUrl,
|
||||
),
|
||||
);
|
||||
return `set -euo pipefail
|
||||
out=${shellOutputDir}
|
||||
url=${shellUrl}
|
||||
url_json=${shellUrlJson}
|
||||
html_b64=${htmlBase64}
|
||||
input_mode_json=${inputModeJson}
|
||||
opened_url_json=${openedUrlJson}
|
||||
rm -rf "$out"
|
||||
mkdir -p "$out"
|
||||
if [ -n "$html_b64" ]; then
|
||||
printf '%s' "$html_b64" | base64 -d >"$out/input.html"
|
||||
url="file://$out/input.html"
|
||||
fi
|
||||
export DISPLAY="\${DISPLAY:-:99}"
|
||||
if ! command -v scrot >/dev/null 2>&1; then
|
||||
sudo apt-get update -y >"$out/apt.log" 2>&1
|
||||
@@ -228,6 +258,8 @@ cat >"$out/remote-metadata.json" <<MANTIS_REMOTE_METADATA
|
||||
"browserBinary": "$browser_bin",
|
||||
"display": "$DISPLAY",
|
||||
"chromePid": $chrome_pid,
|
||||
"inputMode": $input_mode_json,
|
||||
"openedUrl": $opened_url_json,
|
||||
"capturedAt": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
}
|
||||
MANTIS_REMOTE_METADATA
|
||||
@@ -241,6 +273,7 @@ function renderReport(summary: MantisDesktopBrowserSmokeSummary) {
|
||||
"",
|
||||
`Status: ${summary.status}`,
|
||||
`Browser URL: ${summary.browserUrl}`,
|
||||
summary.htmlFile ? `HTML file: ${summary.htmlFile}` : undefined,
|
||||
`Output: ${summary.outputDir}`,
|
||||
`Started: ${summary.startedAt}`,
|
||||
`Finished: ${summary.finishedAt}`,
|
||||
@@ -412,7 +445,16 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
trimToValue(env[CRABBOX_IDLE_TIMEOUT_ENV]) ??
|
||||
DEFAULT_IDLE_TIMEOUT;
|
||||
const ttl = trimToValue(opts.ttl) ?? trimToValue(env[CRABBOX_TTL_ENV]) ?? DEFAULT_TTL;
|
||||
const browserUrl = trimToValue(opts.browserUrl) ?? DEFAULT_BROWSER_URL;
|
||||
const htmlFileOption = trimToValue(opts.htmlFile);
|
||||
const htmlFile = htmlFileOption
|
||||
? resolveRepoBoundFile(repoRoot, htmlFileOption, "Mantis desktop HTML file")
|
||||
: undefined;
|
||||
const htmlBase64 = htmlFile
|
||||
? Buffer.from(await fs.readFile(htmlFile)).toString("base64")
|
||||
: undefined;
|
||||
const browserUrl = htmlFile
|
||||
? pathToFileURL(htmlFile).toString()
|
||||
: (trimToValue(opts.browserUrl) ?? DEFAULT_BROWSER_URL);
|
||||
const runner = opts.commandRunner ?? defaultCommandRunner;
|
||||
const explicitLeaseId = trimToValue(opts.leaseId) ?? trimToValue(env[CRABBOX_LEASE_ID_ENV]);
|
||||
const keepLease = opts.keepLease ?? isTruthyOptIn(env[CRABBOX_KEEP_ENV]);
|
||||
@@ -455,7 +497,7 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
"--no-sync",
|
||||
"--shell",
|
||||
"--",
|
||||
renderRemoteScript({ browserUrl, remoteOutputDir }),
|
||||
renderRemoteScript({ browserUrl, htmlBase64, remoteOutputDir }),
|
||||
],
|
||||
cwd: repoRoot,
|
||||
runner,
|
||||
@@ -479,6 +521,7 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
summaryPath,
|
||||
},
|
||||
browserUrl,
|
||||
htmlFile,
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
createdLease,
|
||||
@@ -508,6 +551,7 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
summaryPath,
|
||||
},
|
||||
browserUrl,
|
||||
htmlFile,
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
createdLease,
|
||||
|
||||
Reference in New Issue
Block a user