mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 20:20:42 +00:00
feat: add Mantis visual task video QA
This commit is contained in:
@@ -8,6 +8,12 @@ import {
|
||||
runMantisSlackDesktopSmoke,
|
||||
type MantisSlackDesktopSmokeOptions,
|
||||
} from "./slack-desktop-smoke.runtime.js";
|
||||
import {
|
||||
runMantisVisualDriver,
|
||||
runMantisVisualTask,
|
||||
type MantisVisualDriverOptions,
|
||||
type MantisVisualTaskOptions,
|
||||
} from "./visual-task.runtime.js";
|
||||
|
||||
export async function runMantisDiscordSmokeCommand(opts: MantisDiscordSmokeOptions) {
|
||||
const result = await runMantisDiscordSmoke(opts);
|
||||
@@ -34,6 +40,9 @@ export async function runMantisDesktopBrowserSmokeCommand(opts: MantisDesktopBro
|
||||
if (result.screenshotPath) {
|
||||
process.stdout.write(`Mantis desktop browser screenshot: ${result.screenshotPath}\n`);
|
||||
}
|
||||
if (result.videoPath) {
|
||||
process.stdout.write(`Mantis desktop browser video: ${result.videoPath}\n`);
|
||||
}
|
||||
if (result.status === "fail") {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
@@ -46,6 +55,33 @@ export async function runMantisSlackDesktopSmokeCommand(opts: MantisSlackDesktop
|
||||
if (result.screenshotPath) {
|
||||
process.stdout.write(`Mantis Slack desktop screenshot: ${result.screenshotPath}\n`);
|
||||
}
|
||||
if (result.videoPath) {
|
||||
process.stdout.write(`Mantis Slack desktop video: ${result.videoPath}\n`);
|
||||
}
|
||||
if (result.status === "fail") {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runMantisVisualDriverCommand(opts: MantisVisualDriverOptions) {
|
||||
const result = await runMantisVisualDriver(opts);
|
||||
process.stdout.write(`Mantis visual driver result: ${result.status}\n`);
|
||||
process.stdout.write(`Mantis visual driver screenshot: ${result.screenshotPath}\n`);
|
||||
if (result.status === "fail") {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runMantisVisualTaskCommand(opts: MantisVisualTaskOptions) {
|
||||
const result = await runMantisVisualTask(opts);
|
||||
process.stdout.write(`Mantis visual task report: ${result.reportPath}\n`);
|
||||
process.stdout.write(`Mantis visual task summary: ${result.summaryPath}\n`);
|
||||
if (result.screenshotPath) {
|
||||
process.stdout.write(`Mantis visual task screenshot: ${result.screenshotPath}\n`);
|
||||
}
|
||||
if (result.videoPath) {
|
||||
process.stdout.write(`Mantis visual task video: ${result.videoPath}\n`);
|
||||
}
|
||||
if (result.status === "fail") {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
|
||||
@@ -4,6 +4,11 @@ import type { MantisDesktopBrowserSmokeOptions } from "./desktop-browser-smoke.r
|
||||
import type { MantisDiscordSmokeOptions } from "./discord-smoke.runtime.js";
|
||||
import type { MantisBeforeAfterOptions } from "./run.runtime.js";
|
||||
import type { MantisSlackDesktopSmokeOptions } from "./slack-desktop-smoke.runtime.js";
|
||||
import type {
|
||||
MantisVisualDriverOptions,
|
||||
MantisVisualTaskOptions,
|
||||
MantisVisualTaskVisionMode,
|
||||
} from "./visual-task.runtime.js";
|
||||
|
||||
type MantisCliRuntime = typeof import("./cli.runtime.js");
|
||||
|
||||
@@ -31,6 +36,16 @@ async function runSlackDesktopSmoke(opts: MantisSlackDesktopSmokeOptions) {
|
||||
await runtime.runMantisSlackDesktopSmokeCommand(opts);
|
||||
}
|
||||
|
||||
async function runVisualDriver(opts: MantisVisualDriverOptions) {
|
||||
const runtime = await loadMantisCliRuntime();
|
||||
await runtime.runMantisVisualDriverCommand(opts);
|
||||
}
|
||||
|
||||
async function runVisualTask(opts: MantisVisualTaskOptions) {
|
||||
const runtime = await loadMantisCliRuntime();
|
||||
await runtime.runMantisVisualTaskCommand(opts);
|
||||
}
|
||||
|
||||
type MantisDiscordSmokeCommanderOptions = {
|
||||
channelId?: string;
|
||||
guildId?: string;
|
||||
@@ -96,10 +111,57 @@ type MantisSlackDesktopSmokeCommanderOptions = {
|
||||
ttl?: string;
|
||||
};
|
||||
|
||||
type MantisVisualTaskCommanderOptions = {
|
||||
browserUrl?: string;
|
||||
class?: string;
|
||||
crabboxBin?: string;
|
||||
duration?: string;
|
||||
expectText?: string;
|
||||
idleTimeout?: string;
|
||||
keepLease?: boolean;
|
||||
leaseId?: string;
|
||||
machineClass?: string;
|
||||
outputDir?: string;
|
||||
provider?: string;
|
||||
repoRoot?: string;
|
||||
settleMs?: string;
|
||||
ttl?: string;
|
||||
visionMode?: MantisVisualTaskVisionMode;
|
||||
visionModel?: string;
|
||||
visionPrompt?: string;
|
||||
visionTimeoutMs?: string;
|
||||
};
|
||||
|
||||
type MantisVisualDriverCommanderOptions = {
|
||||
browserUrl?: string;
|
||||
crabboxBin?: string;
|
||||
expectText?: string;
|
||||
leaseId?: string;
|
||||
outputDir?: string;
|
||||
provider?: string;
|
||||
repoRoot?: string;
|
||||
settleMs?: string;
|
||||
visionMode?: MantisVisualTaskVisionMode;
|
||||
visionModel?: string;
|
||||
visionPrompt?: string;
|
||||
visionTimeoutMs?: string;
|
||||
};
|
||||
|
||||
function collectString(value: string, previous: string[] = []) {
|
||||
return [...previous, value];
|
||||
}
|
||||
|
||||
function parseOptionalInteger(value: string | undefined, label: string) {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (!Number.isFinite(parsed) || String(parsed) !== value || parsed < 0) {
|
||||
throw new Error(`${label} must be a non-negative integer`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
export function registerMantisCli(qa: Command) {
|
||||
const mantis = qa
|
||||
.command("mantis")
|
||||
@@ -166,7 +228,7 @@ export function registerMantisCli(qa: Command) {
|
||||
mantis
|
||||
.command("desktop-browser-smoke")
|
||||
.description(
|
||||
"Lease or reuse a Crabbox desktop, open a visible browser, and capture a VNC desktop screenshot",
|
||||
"Lease or reuse a Crabbox desktop, open a visible browser, and capture VNC desktop screenshot/video artifacts",
|
||||
)
|
||||
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
|
||||
.option("--output-dir <path>", "Mantis desktop browser artifact directory")
|
||||
@@ -199,7 +261,7 @@ export function registerMantisCli(qa: Command) {
|
||||
mantis
|
||||
.command("slack-desktop-smoke")
|
||||
.description(
|
||||
"Lease or reuse a Crabbox VNC desktop, run Slack QA inside it, open Slack in the browser, and capture a screenshot",
|
||||
"Lease or reuse a Crabbox VNC desktop, run Slack QA inside it, open Slack in the browser, and capture screenshot/video artifacts",
|
||||
)
|
||||
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
|
||||
.option("--output-dir <path>", "Mantis Slack desktop artifact directory")
|
||||
@@ -249,4 +311,83 @@ export function registerMantisCli(qa: Command) {
|
||||
ttl: opts.ttl,
|
||||
});
|
||||
});
|
||||
|
||||
mantis
|
||||
.command("visual-task")
|
||||
.description(
|
||||
"Lease or reuse a Crabbox desktop, drive visible browser UI, record MP4, screenshot it, and optionally run image-understanding assertions",
|
||||
)
|
||||
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
|
||||
.option("--output-dir <path>", "Mantis visual-task artifact directory")
|
||||
.option("--crabbox-bin <path>", "Crabbox binary path")
|
||||
.option("--provider <provider>", "Crabbox provider")
|
||||
.option("--machine-class <class>", "Crabbox machine class")
|
||||
.option("--class <class>", "Alias for --machine-class")
|
||||
.option("--lease-id <id>", "Reuse an existing Crabbox lease")
|
||||
.option("--idle-timeout <duration>", "Crabbox idle timeout")
|
||||
.option("--ttl <duration>", "Crabbox maximum lease lifetime")
|
||||
.option("--keep-lease", "Keep a lease created by this run after a passing task")
|
||||
.option("--browser-url <url>", "URL to open in the visible browser")
|
||||
.option("--duration <duration>", "Desktop recording duration")
|
||||
.option("--settle-ms <ms>", "Milliseconds to wait after launch before screenshot")
|
||||
.option("--vision-mode <mode>", "Vision mode: image-describe or metadata")
|
||||
.option("--vision-prompt <text>", "Prompt for image understanding")
|
||||
.option("--vision-model <provider/model>", "Image-capable provider/model ref")
|
||||
.option("--vision-timeout-ms <ms>", "Image understanding timeout in milliseconds")
|
||||
.option("--expect-text <text>", "Case-insensitive text expected in the vision output")
|
||||
.action(async (opts: MantisVisualTaskCommanderOptions) => {
|
||||
await runVisualTask({
|
||||
browserUrl: opts.browserUrl,
|
||||
crabboxBin: opts.crabboxBin,
|
||||
duration: opts.duration,
|
||||
expectText: opts.expectText,
|
||||
idleTimeout: opts.idleTimeout,
|
||||
keepLease: opts.keepLease,
|
||||
leaseId: opts.leaseId,
|
||||
machineClass: opts.machineClass ?? opts.class,
|
||||
outputDir: opts.outputDir,
|
||||
provider: opts.provider,
|
||||
repoRoot: opts.repoRoot,
|
||||
settleMs: parseOptionalInteger(opts.settleMs, "--settle-ms"),
|
||||
ttl: opts.ttl,
|
||||
visionMode: opts.visionMode,
|
||||
visionModel: opts.visionModel,
|
||||
visionPrompt: opts.visionPrompt,
|
||||
visionTimeoutMs: parseOptionalInteger(opts.visionTimeoutMs, "--vision-timeout-ms"),
|
||||
});
|
||||
});
|
||||
|
||||
mantis
|
||||
.command("visual-driver")
|
||||
.description(
|
||||
"Driver half for Mantis visual-task; launched by Crabbox record --while, then opens browser, screenshots, and runs vision",
|
||||
)
|
||||
.option("--repo-root <path>", "Repository root to target when running from a neutral cwd")
|
||||
.option("--output-dir <path>", "Mantis visual-task artifact directory")
|
||||
.option("--crabbox-bin <path>", "Crabbox binary path")
|
||||
.option("--provider <provider>", "Crabbox provider")
|
||||
.option("--lease-id <id>", "Crabbox lease id")
|
||||
.option("--browser-url <url>", "URL to open in the visible browser")
|
||||
.option("--settle-ms <ms>", "Milliseconds to wait after launch before screenshot")
|
||||
.option("--vision-mode <mode>", "Vision mode: image-describe or metadata")
|
||||
.option("--vision-prompt <text>", "Prompt for image understanding")
|
||||
.option("--vision-model <provider/model>", "Image-capable provider/model ref")
|
||||
.option("--vision-timeout-ms <ms>", "Image understanding timeout in milliseconds")
|
||||
.option("--expect-text <text>", "Case-insensitive text expected in the vision output")
|
||||
.action(async (opts: MantisVisualDriverCommanderOptions) => {
|
||||
await runVisualDriver({
|
||||
browserUrl: opts.browserUrl,
|
||||
crabboxBin: opts.crabboxBin,
|
||||
expectText: opts.expectText,
|
||||
leaseId: opts.leaseId,
|
||||
outputDir: opts.outputDir,
|
||||
provider: opts.provider,
|
||||
repoRoot: opts.repoRoot,
|
||||
settleMs: parseOptionalInteger(opts.settleMs, "--settle-ms"),
|
||||
visionMode: opts.visionMode,
|
||||
visionModel: opts.visionModel,
|
||||
visionPrompt: opts.visionPrompt,
|
||||
visionTimeoutMs: parseOptionalInteger(opts.visionTimeoutMs, "--vision-timeout-ms"),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -50,8 +50,10 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
expect(outputDir).toBeTypeOf("string");
|
||||
await fs.mkdir(outputDir as string, { recursive: true });
|
||||
await fs.writeFile(path.join(outputDir as string, "desktop-browser-smoke.png"), "png");
|
||||
await fs.writeFile(path.join(outputDir as string, "desktop-browser-smoke.mp4"), "mp4");
|
||||
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
|
||||
return { stdout: "", stderr: "" };
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
@@ -80,11 +82,10 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
expect(commands.every((entry) => entry.env === runtimeEnv)).toBe(true);
|
||||
const rsyncArgs = commands.find((entry) => entry.command === "rsync")?.args ?? [];
|
||||
expect(rsyncArgs).not.toContain("--delete");
|
||||
expect(rsyncArgs).toEqual(expect.arrayContaining(["--exclude", "chrome-profile/**"]));
|
||||
expect(rsyncArgs).toEqual(
|
||||
expect.arrayContaining([
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/desktop-browser-smoke.png",
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/remote-metadata.json",
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/chrome.log",
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-desktop-2026-05-04T12-00-00-000Z/",
|
||||
]),
|
||||
);
|
||||
const remoteScript = commands
|
||||
@@ -94,9 +95,13 @@ describe("mantis desktop browser smoke runtime", () => {
|
||||
expect(remoteScript).toContain("${CHROME_BIN:-}");
|
||||
expect(remoteScript).toContain("chromium-browser");
|
||||
expect(remoteScript).toContain("base64 -d");
|
||||
expect(remoteScript).toContain("ffmpeg");
|
||||
expect(remoteScript).toContain('sudo apt-get update -y >>"$out/apt.log" 2>&1 || true');
|
||||
expect(remoteScript).toContain("desktop-browser-smoke.mp4");
|
||||
expect(remoteScript).toContain('url="file://$out/input.html"');
|
||||
expect(remoteScript).toContain('"browserBinary": "$browser_bin"');
|
||||
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
|
||||
await expect(fs.readFile(result.videoPath ?? "", "utf8")).resolves.toBe("mp4");
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
browserUrl: string;
|
||||
crabbox: { id: string; vncCommand: string };
|
||||
|
||||
@@ -28,6 +28,7 @@ export type MantisDesktopBrowserSmokeResult = {
|
||||
screenshotPath?: string;
|
||||
status: "pass" | "fail";
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
|
||||
type CommandResult = {
|
||||
@@ -58,6 +59,7 @@ type MantisDesktopBrowserSmokeSummary = {
|
||||
reportPath: string;
|
||||
screenshotPath?: string;
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
browserUrl: string;
|
||||
htmlFile?: string;
|
||||
@@ -232,6 +234,24 @@ if [ -z "$browser_bin" ]; then
|
||||
echo "No browser binary found. Checked BROWSER, CHROME_BIN, google-chrome, chromium, chromium-browser." >&2
|
||||
exit 127
|
||||
fi
|
||||
video_pid=""
|
||||
if command -v ffmpeg >/dev/null 2>&1; then
|
||||
:
|
||||
else
|
||||
sudo apt-get update -y >>"$out/apt.log" 2>&1 || true
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg >>"$out/apt.log" 2>&1 || true
|
||||
fi
|
||||
if command -v ffmpeg >/dev/null 2>&1; then
|
||||
display_input="$DISPLAY"
|
||||
case "$display_input" in
|
||||
*.*) ;;
|
||||
*) display_input="$display_input.0" ;;
|
||||
esac
|
||||
ffmpeg -hide_banner -loglevel error -y -f x11grab -video_size 1280x900 -framerate 15 -i "$display_input" -t 10 -pix_fmt yuv420p "$out/desktop-browser-smoke.mp4" >"$out/ffmpeg.log" 2>&1 &
|
||||
video_pid=$!
|
||||
else
|
||||
echo "ffmpeg missing; video artifact skipped" >"$out/ffmpeg.log"
|
||||
fi
|
||||
"$browser_bin" \
|
||||
--user-data-dir="$profile" \
|
||||
--no-first-run \
|
||||
@@ -248,6 +268,9 @@ cleanup() {
|
||||
trap cleanup EXIT
|
||||
sleep 8
|
||||
scrot "$out/desktop-browser-smoke.png"
|
||||
if [ -n "$video_pid" ]; then
|
||||
wait "$video_pid" || true
|
||||
fi
|
||||
cleanup
|
||||
trap - EXIT
|
||||
sleep 1
|
||||
@@ -291,7 +314,11 @@ function renderReport(summary: MantisDesktopBrowserSmokeSummary) {
|
||||
summary.artifacts.screenshotPath
|
||||
? `- Screenshot: \`${path.basename(summary.artifacts.screenshotPath)}\``
|
||||
: "- Screenshot: missing",
|
||||
summary.artifacts.videoPath
|
||||
? `- Video: \`${path.basename(summary.artifacts.videoPath)}\``
|
||||
: "- Video: missing",
|
||||
"- Remote metadata: `remote-metadata.json`",
|
||||
"- FFmpeg log: `ffmpeg.log`",
|
||||
"- Chrome log: `chrome.log`",
|
||||
summary.error ? `- Error: ${summary.error}` : undefined,
|
||||
"",
|
||||
@@ -401,9 +428,9 @@ async function copyRemoteArtifacts(params: {
|
||||
"-o",
|
||||
"UserKnownHostsFile=/dev/null",
|
||||
].join(" "),
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/desktop-browser-smoke.png`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/remote-metadata.json`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/chrome.log`,
|
||||
"--exclude",
|
||||
"chrome-profile/**",
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/`,
|
||||
`${params.outputDir}/`,
|
||||
],
|
||||
cwd: params.cwd,
|
||||
@@ -524,14 +551,17 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
runner,
|
||||
});
|
||||
const screenshotPath = path.join(outputDir, "desktop-browser-smoke.png");
|
||||
const videoPath = path.join(outputDir, "desktop-browser-smoke.mp4");
|
||||
if (!(await pathExists(screenshotPath))) {
|
||||
throw new Error("Desktop browser screenshot was not copied back from Crabbox.");
|
||||
}
|
||||
const copiedVideoPath = (await pathExists(videoPath)) ? videoPath : undefined;
|
||||
summary = {
|
||||
artifacts: {
|
||||
reportPath,
|
||||
screenshotPath,
|
||||
summaryPath,
|
||||
videoPath: copiedVideoPath,
|
||||
},
|
||||
browserUrl,
|
||||
htmlFile,
|
||||
@@ -556,6 +586,7 @@ export async function runMantisDesktopBrowserSmoke(
|
||||
screenshotPath,
|
||||
status: "pass",
|
||||
summaryPath,
|
||||
videoPath: copiedVideoPath,
|
||||
};
|
||||
} catch (error) {
|
||||
summary = {
|
||||
|
||||
@@ -28,14 +28,16 @@ describe("mantis before/after runtime", () => {
|
||||
const outputDir = path.join(repoRootArg, outputDirArg);
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
const screenshotPath = path.join(outputDir, `${lane}-timeline.png`);
|
||||
const videoPath = path.join(outputDir, `${lane}-timeline.mp4`);
|
||||
await fs.writeFile(screenshotPath, `${lane} screenshot`);
|
||||
await fs.writeFile(videoPath, `${lane} video`);
|
||||
await fs.writeFile(
|
||||
path.join(outputDir, "discord-qa-summary.json"),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
scenarios: [
|
||||
{
|
||||
artifactPaths: { screenshot: screenshotPath },
|
||||
artifactPaths: { screenshot: screenshotPath, video: videoPath },
|
||||
details:
|
||||
lane === "baseline"
|
||||
? "reaction timeline missing thinking/done"
|
||||
@@ -94,5 +96,11 @@ describe("mantis before/after runtime", () => {
|
||||
await expect(
|
||||
fs.readFile(path.join(result.outputDir, "candidate", "candidate.png"), "utf8"),
|
||||
).resolves.toBe("candidate screenshot");
|
||||
await expect(
|
||||
fs.readFile(path.join(result.outputDir, "baseline", "baseline.mp4"), "utf8"),
|
||||
).resolves.toBe("baseline video");
|
||||
await expect(
|
||||
fs.readFile(path.join(result.outputDir, "candidate", "candidate.mp4"), "utf8"),
|
||||
).resolves.toBe("candidate video");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -51,6 +51,7 @@ type LaneResult = {
|
||||
screenshotPath?: string;
|
||||
status: string;
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
|
||||
type Comparison = {
|
||||
@@ -60,6 +61,7 @@ type Comparison = {
|
||||
reproduced: boolean;
|
||||
screenshotPath?: string;
|
||||
status: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
candidate: {
|
||||
expected: "queued -> thinking -> done";
|
||||
@@ -67,6 +69,7 @@ type Comparison = {
|
||||
ref: string;
|
||||
screenshotPath?: string;
|
||||
status: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
pass: boolean;
|
||||
scenario: string;
|
||||
@@ -157,12 +160,14 @@ async function readLaneResult(params: {
|
||||
summary.scenarios?.find((entry) => entry.id === params.scenario) ?? summary.scenarios?.[0];
|
||||
const status = scenarioSummary?.status ?? "fail";
|
||||
const screenshotPath = scenarioSummary?.artifactPaths?.screenshot;
|
||||
const videoPath = scenarioSummary?.artifactPaths?.video;
|
||||
return {
|
||||
outputDir: params.publishedLaneDir,
|
||||
scenarioDetails: scenarioSummary?.details,
|
||||
screenshotPath,
|
||||
status,
|
||||
summaryPath,
|
||||
videoPath,
|
||||
} satisfies LaneResult;
|
||||
}
|
||||
|
||||
@@ -189,6 +194,9 @@ function renderReport(params: {
|
||||
params.baseline.screenshotPath
|
||||
? `- Screenshot: \`${path.join("baseline", path.basename(params.baseline.screenshotPath))}\``
|
||||
: "- Screenshot: missing",
|
||||
params.baseline.videoPath
|
||||
? `- Video: \`${path.join("baseline", path.basename(params.baseline.videoPath))}\``
|
||||
: "- Video: missing",
|
||||
params.baseline.scenarioDetails ? `- Details: ${params.baseline.scenarioDetails}` : undefined,
|
||||
"",
|
||||
"## Candidate",
|
||||
@@ -200,6 +208,9 @@ function renderReport(params: {
|
||||
params.candidate.screenshotPath
|
||||
? `- Screenshot: \`${path.join("candidate", path.basename(params.candidate.screenshotPath))}\``
|
||||
: "- Screenshot: missing",
|
||||
params.candidate.videoPath
|
||||
? `- Video: \`${path.join("candidate", path.basename(params.candidate.videoPath))}\``
|
||||
: "- Video: missing",
|
||||
params.candidate.scenarioDetails ? `- Details: ${params.candidate.scenarioDetails}` : undefined,
|
||||
"",
|
||||
].filter((line) => line !== undefined);
|
||||
@@ -218,6 +229,18 @@ async function copyScreenshot(params: { lane: "baseline" | "candidate"; result:
|
||||
return target;
|
||||
}
|
||||
|
||||
async function copyVideo(params: { lane: "baseline" | "candidate"; result: LaneResult }) {
|
||||
if (!params.result.videoPath) {
|
||||
return undefined;
|
||||
}
|
||||
const source = path.isAbsolute(params.result.videoPath)
|
||||
? params.result.videoPath
|
||||
: path.join(params.result.outputDir, params.result.videoPath);
|
||||
const target = path.join(params.result.outputDir, `${params.lane}.mp4`);
|
||||
await fs.copyFile(source, target);
|
||||
return target;
|
||||
}
|
||||
|
||||
async function runLane(params: {
|
||||
lane: "baseline" | "candidate";
|
||||
outputDir: string;
|
||||
@@ -300,9 +323,11 @@ async function runLane(params: {
|
||||
scenario: params.scenario,
|
||||
});
|
||||
const copiedScreenshot = await copyScreenshot({ lane: params.lane, result });
|
||||
const copiedVideo = await copyVideo({ lane: params.lane, result });
|
||||
return {
|
||||
...result,
|
||||
screenshotPath: copiedScreenshot ?? result.screenshotPath,
|
||||
videoPath: copiedVideo ?? result.videoPath,
|
||||
} satisfies LaneResult;
|
||||
}
|
||||
|
||||
@@ -373,6 +398,7 @@ export async function runMantisBeforeAfter(
|
||||
reproduced: baselineResult.status === "fail",
|
||||
screenshotPath: baselineResult.screenshotPath,
|
||||
status: baselineResult.status,
|
||||
videoPath: baselineResult.videoPath,
|
||||
},
|
||||
candidate: {
|
||||
expected: "queued -> thinking -> done",
|
||||
@@ -380,6 +406,7 @@ export async function runMantisBeforeAfter(
|
||||
ref: candidate,
|
||||
screenshotPath: candidateResult.screenshotPath,
|
||||
status: candidateResult.status,
|
||||
videoPath: candidateResult.videoPath,
|
||||
},
|
||||
pass: baselineResult.status === "fail" && candidateResult.status === "pass",
|
||||
scenario,
|
||||
|
||||
@@ -54,8 +54,10 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n");
|
||||
} else {
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4");
|
||||
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
@@ -97,6 +99,9 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
expect(remoteScript).toContain("${CHROME_BIN:-}");
|
||||
expect(remoteScript).toContain("pnpm install --frozen-lockfile");
|
||||
expect(remoteScript).toContain("pnpm build");
|
||||
expect(remoteScript).toContain("ffmpeg");
|
||||
expect(remoteScript).toContain('sudo apt-get update -y >>"$out/apt.log" 2>&1 || true');
|
||||
expect(remoteScript).toContain("slack-desktop-smoke.mp4");
|
||||
expect(remoteScript).toContain("openclaw qa slack");
|
||||
expect(remoteScript).toContain("--scenario 'slack-canary'");
|
||||
expect(remoteScript).toContain("OPENCLAW_MANTIS_SLACK_BROWSER_PROFILE_DIR");
|
||||
@@ -106,11 +111,12 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
expect(rsyncArgs).not.toContain("--delete");
|
||||
expect(rsyncArgs).toEqual(
|
||||
expect.arrayContaining([
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-slack-desktop-2026-05-04T13-00-00-000Z/slack-desktop-smoke.png",
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-slack-desktop-2026-05-04T13-00-00-000Z/",
|
||||
"crabbox@203.0.113.10:/tmp/openclaw-mantis-slack-desktop-2026-05-04T13-00-00-000Z/slack-qa/",
|
||||
]),
|
||||
);
|
||||
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
|
||||
await expect(fs.readFile(result.videoPath ?? "", "utf8")).resolves.toBe("mp4");
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
crabbox: { id: string; vncCommand: string };
|
||||
status: string;
|
||||
@@ -146,8 +152,10 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
const outputDir = args.at(-1);
|
||||
await fs.mkdir(outputDir as string, { recursive: true });
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4");
|
||||
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
@@ -163,17 +171,19 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
|
||||
expect(result.status).toBe("fail");
|
||||
expect(result.screenshotPath).toBe(path.join(result.outputDir, "slack-desktop-smoke.png"));
|
||||
expect(result.videoPath).toBe(path.join(result.outputDir, "slack-desktop-smoke.mp4"));
|
||||
await expect(
|
||||
fs.readFile(path.join(result.outputDir, "slack-desktop-smoke.png"), "utf8"),
|
||||
).resolves.toBe("png");
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
artifacts: { screenshotPath?: string };
|
||||
artifacts: { screenshotPath?: string; videoPath?: string };
|
||||
error?: string;
|
||||
status: string;
|
||||
};
|
||||
expect(summary.status).toBe("fail");
|
||||
expect(summary.error).toContain("remote Slack QA failed");
|
||||
expect(summary.artifacts.screenshotPath).toContain("slack-desktop-smoke.png");
|
||||
expect(summary.artifacts.videoPath).toContain("slack-desktop-smoke.mp4");
|
||||
});
|
||||
|
||||
it("accepts Blacksmith Testbox lease ids from Crabbox warmup", async () => {
|
||||
@@ -204,8 +214,10 @@ describe("mantis Slack desktop smoke runtime", () => {
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-qa-report.md"), "# Slack\n");
|
||||
} else {
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.png"), "png");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-smoke.mp4"), "mp4");
|
||||
await fs.writeFile(path.join(outputDir as string, "remote-metadata.json"), "{}\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "chrome.log"), "chrome\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "ffmpeg.log"), "ffmpeg\n");
|
||||
await fs.writeFile(path.join(outputDir as string, "slack-desktop-command.log"), "qa\n");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ export type MantisSlackDesktopSmokeResult = {
|
||||
screenshotPath?: string;
|
||||
status: "pass" | "fail";
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
|
||||
type CommandResult = {
|
||||
@@ -66,6 +67,7 @@ type MantisSlackDesktopSmokeSummary = {
|
||||
screenshotPath?: string;
|
||||
slackQaDir?: string;
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
crabbox: {
|
||||
bin: string;
|
||||
@@ -302,6 +304,24 @@ fi
|
||||
if [ -z "$slack_url" ]; then
|
||||
slack_url="https://app.slack.com/client"
|
||||
fi
|
||||
video_pid=""
|
||||
if command -v ffmpeg >/dev/null 2>&1; then
|
||||
:
|
||||
else
|
||||
sudo apt-get update -y >>"$out/apt.log" 2>&1 || true
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg >>"$out/apt.log" 2>&1 || true
|
||||
fi
|
||||
if command -v ffmpeg >/dev/null 2>&1; then
|
||||
display_input="$DISPLAY"
|
||||
case "$display_input" in
|
||||
*.*) ;;
|
||||
*) display_input="$display_input.0" ;;
|
||||
esac
|
||||
ffmpeg -hide_banner -loglevel error -y -f x11grab -video_size 1440x1000 -framerate 15 -i "$display_input" -t 45 -pix_fmt yuv420p "$out/slack-desktop-smoke.mp4" >"$out/ffmpeg.log" 2>&1 &
|
||||
video_pid=$!
|
||||
else
|
||||
echo "ffmpeg missing; video artifact skipped" >"$out/ffmpeg.log"
|
||||
fi
|
||||
if [ "$setup_gateway" = "1" ]; then
|
||||
nohup "$browser_bin" \
|
||||
--user-data-dir="$profile" \
|
||||
@@ -376,6 +396,9 @@ MANTIS_SLACK_PATCH
|
||||
} >"$out/slack-desktop-command.log" 2>&1 || qa_status=$?
|
||||
sleep 5
|
||||
scrot "$out/slack-desktop-smoke.png" || true
|
||||
if [ -n "$video_pid" ]; then
|
||||
wait "$video_pid" || true
|
||||
fi
|
||||
if [ "$setup_gateway" != "1" ]; then
|
||||
kill "$chrome_pid" >/dev/null 2>&1 || true
|
||||
fi
|
||||
@@ -422,9 +445,13 @@ function renderReport(summary: MantisSlackDesktopSmokeSummary) {
|
||||
summary.artifacts.screenshotPath
|
||||
? `- Screenshot: \`${path.basename(summary.artifacts.screenshotPath)}\``
|
||||
: "- Screenshot: missing",
|
||||
summary.artifacts.videoPath
|
||||
? `- Video: \`${path.basename(summary.artifacts.videoPath)}\``
|
||||
: "- Video: missing",
|
||||
summary.artifacts.slackQaDir ? "- Slack QA artifacts: `slack-qa/`" : undefined,
|
||||
"- Remote metadata: `remote-metadata.json`",
|
||||
"- Remote command log: `slack-desktop-command.log`",
|
||||
"- FFmpeg log: `ffmpeg.log`",
|
||||
"- Chrome log: `chrome.log`",
|
||||
summary.error ? `- Error: ${summary.error}` : undefined,
|
||||
"",
|
||||
@@ -544,10 +571,7 @@ async function copyRemoteArtifacts(params: {
|
||||
"-az",
|
||||
"-e",
|
||||
sshArgs,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/slack-desktop-smoke.png`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/remote-metadata.json`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/chrome.log`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/slack-desktop-command.log`,
|
||||
`${sshUser}@${host}:${params.remoteOutputDir}/`,
|
||||
`${params.outputDir}/`,
|
||||
],
|
||||
cwd: params.cwd,
|
||||
@@ -636,6 +660,7 @@ export async function runMantisSlackDesktopSmoke(
|
||||
let summary: MantisSlackDesktopSmokeSummary | undefined;
|
||||
let screenshotPath: string | undefined;
|
||||
let slackQaDir: string | undefined;
|
||||
let videoPath: string | undefined;
|
||||
|
||||
try {
|
||||
leaseId =
|
||||
@@ -702,6 +727,10 @@ export async function runMantisSlackDesktopSmoke(
|
||||
runner,
|
||||
});
|
||||
screenshotPath = path.join(outputDir, "slack-desktop-smoke.png");
|
||||
videoPath = path.join(outputDir, "slack-desktop-smoke.mp4");
|
||||
if (!(await pathExists(videoPath))) {
|
||||
videoPath = undefined;
|
||||
}
|
||||
slackQaDir = path.join(outputDir, "slack-qa");
|
||||
if (!(await pathExists(screenshotPath))) {
|
||||
throw new Error("Slack desktop screenshot was not copied back from Crabbox.");
|
||||
@@ -715,6 +744,7 @@ export async function runMantisSlackDesktopSmoke(
|
||||
screenshotPath,
|
||||
slackQaDir,
|
||||
summaryPath,
|
||||
videoPath,
|
||||
},
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
@@ -738,6 +768,7 @@ export async function runMantisSlackDesktopSmoke(
|
||||
screenshotPath,
|
||||
status: "pass",
|
||||
summaryPath,
|
||||
videoPath,
|
||||
};
|
||||
} catch (error) {
|
||||
summary = {
|
||||
@@ -746,6 +777,7 @@ export async function runMantisSlackDesktopSmoke(
|
||||
screenshotPath,
|
||||
slackQaDir,
|
||||
summaryPath,
|
||||
videoPath,
|
||||
},
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
@@ -771,6 +803,7 @@ export async function runMantisSlackDesktopSmoke(
|
||||
screenshotPath,
|
||||
status: "fail",
|
||||
summaryPath,
|
||||
videoPath,
|
||||
};
|
||||
} finally {
|
||||
if (summary) {
|
||||
|
||||
349
extensions/qa-lab/src/mantis/visual-task.runtime.test.ts
Normal file
349
extensions/qa-lab/src/mantis/visual-task.runtime.test.ts
Normal file
@@ -0,0 +1,349 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { runMantisVisualDriver, runMantisVisualTask } from "./visual-task.runtime.js";
|
||||
|
||||
describe("mantis visual task runtime", () => {
|
||||
let repoRoot: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "mantis-visual-task-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(repoRoot, { force: true, recursive: true });
|
||||
});
|
||||
|
||||
it("records a visible browser task and keeps screenshot/video artifacts", async () => {
|
||||
const commands: { args: readonly string[]; command: string }[] = [];
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
commands.push({ command, args });
|
||||
if (command === "/tmp/crabbox" && args[0] === "warmup") {
|
||||
return { stdout: "ready lease cbx_abc123\n", stderr: "" };
|
||||
}
|
||||
if (command === "/tmp/crabbox" && args[0] === "inspect") {
|
||||
return {
|
||||
stdout: `${JSON.stringify({
|
||||
id: "cbx_abc123",
|
||||
provider: "hetzner",
|
||||
slug: "brisk-mantis",
|
||||
state: "active",
|
||||
})}\n`,
|
||||
stderr: "",
|
||||
};
|
||||
}
|
||||
if (command === "/tmp/crabbox" && args[0] === "record") {
|
||||
const outputPath = args[args.indexOf("--output") + 1];
|
||||
const outputDir = args[args.indexOf("--output-dir") + 1];
|
||||
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await fs.writeFile(outputPath, "mp4");
|
||||
await fs.writeFile(path.join(outputDir, "visual-task.png"), "png");
|
||||
await fs.writeFile(
|
||||
path.join(outputDir, "mantis-visual-task-driver-result.json"),
|
||||
`${JSON.stringify({
|
||||
browserUrl: "https://example.net",
|
||||
finishedAt: "2026-05-04T12:00:05.000Z",
|
||||
matched: true,
|
||||
outputDir,
|
||||
screenshotPath: path.join(outputDir, "visual-task.png"),
|
||||
startedAt: "2026-05-04T12:00:01.000Z",
|
||||
status: "pass",
|
||||
vision: {
|
||||
mode: "metadata",
|
||||
timeoutMs: 120000,
|
||||
},
|
||||
})}\n`,
|
||||
);
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
});
|
||||
|
||||
const result = await runMantisVisualTask({
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
duration: "12s",
|
||||
env: { PATH: process.env.PATH },
|
||||
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
||||
outputDir: ".artifacts/qa-e2e/mantis/visual-task-test",
|
||||
repoRoot,
|
||||
settleMs: 0,
|
||||
visionMode: "metadata",
|
||||
});
|
||||
|
||||
expect(result.status).toBe("pass");
|
||||
expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([
|
||||
["/tmp/crabbox", "warmup"],
|
||||
["/tmp/crabbox", "inspect"],
|
||||
["/tmp/crabbox", "record"],
|
||||
["/tmp/crabbox", "stop"],
|
||||
]);
|
||||
const recordArgs = commands.find((entry) => entry.args[0] === "record")?.args ?? [];
|
||||
expect(recordArgs).toEqual(
|
||||
expect.arrayContaining([
|
||||
"--duration",
|
||||
"12s",
|
||||
"--output",
|
||||
path.join(repoRoot, ".artifacts/qa-e2e/mantis/visual-task-test/visual-task.mp4"),
|
||||
"--while",
|
||||
"--",
|
||||
"pnpm",
|
||||
"--dir",
|
||||
repoRoot,
|
||||
"openclaw",
|
||||
"qa",
|
||||
"mantis",
|
||||
"visual-driver",
|
||||
]),
|
||||
);
|
||||
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
|
||||
await expect(fs.readFile(result.videoPath ?? "", "utf8")).resolves.toBe("mp4");
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
crabbox: { id: string; vncCommand: string };
|
||||
status: string;
|
||||
visionMode: string;
|
||||
};
|
||||
expect(summary).toMatchObject({
|
||||
crabbox: {
|
||||
id: "cbx_abc123",
|
||||
vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open",
|
||||
},
|
||||
status: "pass",
|
||||
visionMode: "metadata",
|
||||
});
|
||||
});
|
||||
|
||||
it("fails when recording breaks after the visual driver passes", async () => {
|
||||
const commands: { args: readonly string[]; command: string }[] = [];
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
commands.push({ command, args });
|
||||
if (command === "/tmp/crabbox" && args[0] === "warmup") {
|
||||
return { stdout: "ready lease cbx_abc123\n", stderr: "" };
|
||||
}
|
||||
if (command === "/tmp/crabbox" && args[0] === "inspect") {
|
||||
return {
|
||||
stdout: `${JSON.stringify({
|
||||
id: "cbx_abc123",
|
||||
provider: "hetzner",
|
||||
slug: "brisk-mantis",
|
||||
state: "active",
|
||||
})}\n`,
|
||||
stderr: "",
|
||||
};
|
||||
}
|
||||
if (command === "/tmp/crabbox" && args[0] === "record") {
|
||||
const outputDir = args[args.indexOf("--output-dir") + 1];
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
await fs.writeFile(path.join(outputDir, "visual-task.png"), "png");
|
||||
await fs.writeFile(
|
||||
path.join(outputDir, "mantis-visual-task-driver-result.json"),
|
||||
`${JSON.stringify({
|
||||
browserUrl: "https://example.net",
|
||||
finishedAt: "2026-05-04T12:00:05.000Z",
|
||||
matched: true,
|
||||
outputDir,
|
||||
screenshotPath: path.join(outputDir, "visual-task.png"),
|
||||
startedAt: "2026-05-04T12:00:01.000Z",
|
||||
status: "pass",
|
||||
vision: {
|
||||
mode: "metadata",
|
||||
timeoutMs: 120000,
|
||||
},
|
||||
})}\n`,
|
||||
);
|
||||
throw new Error("crabbox record failed after driver exit");
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
});
|
||||
|
||||
const result = await runMantisVisualTask({
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
env: { PATH: process.env.PATH },
|
||||
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
||||
outputDir: ".artifacts/qa-e2e/mantis/visual-task-recording-fail",
|
||||
repoRoot,
|
||||
settleMs: 0,
|
||||
visionMode: "metadata",
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
status: "fail",
|
||||
videoPath: undefined,
|
||||
});
|
||||
expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([
|
||||
["/tmp/crabbox", "warmup"],
|
||||
["/tmp/crabbox", "inspect"],
|
||||
["/tmp/crabbox", "record"],
|
||||
]);
|
||||
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
||||
error?: string;
|
||||
recording?: { error?: string; required: boolean };
|
||||
status: string;
|
||||
};
|
||||
expect(summary).toMatchObject({
|
||||
error: "crabbox record failed after driver exit",
|
||||
recording: {
|
||||
error: "crabbox record failed after driver exit",
|
||||
required: true,
|
||||
},
|
||||
status: "fail",
|
||||
});
|
||||
});
|
||||
|
||||
it("drives a lease, screenshots it, and verifies image-describe text", async () => {
|
||||
const commands: { args: readonly string[]; command: string }[] = [];
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
commands.push({ command, args });
|
||||
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
||||
const outputPath = args[args.indexOf("--output") + 1];
|
||||
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await fs.writeFile(outputPath, "png");
|
||||
}
|
||||
if (command === "pnpm") {
|
||||
return {
|
||||
stdout: `\n> openclaw qa mantis visual-driver --vision-prompt '{"visible": boolean}'\n${JSON.stringify(
|
||||
{
|
||||
ok: true,
|
||||
outputs: [
|
||||
{
|
||||
kind: "image.description",
|
||||
text: JSON.stringify({
|
||||
evidence: 'The page heading reads "Example Domain".',
|
||||
reason: "The expected text is visible as the main heading.",
|
||||
visible: true,
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
)}\n`,
|
||||
stderr: "",
|
||||
};
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
});
|
||||
|
||||
const result = await runMantisVisualDriver({
|
||||
browserUrl: "https://example.net",
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
env: { PATH: process.env.PATH },
|
||||
expectText: "Example Domain",
|
||||
leaseId: "cbx_abc123",
|
||||
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-test",
|
||||
repoRoot,
|
||||
settleMs: 0,
|
||||
visionMode: "image-describe",
|
||||
visionModel: "openai/gpt-5.4",
|
||||
visionPrompt: "Read the page title",
|
||||
});
|
||||
|
||||
expect(result.status).toBe("pass");
|
||||
expect(commands.map((entry) => [entry.command, entry.args[0], entry.args[1]])).toEqual([
|
||||
["/tmp/crabbox", "desktop", "launch"],
|
||||
["/tmp/crabbox", "screenshot", "--provider"],
|
||||
["pnpm", "--dir", repoRoot],
|
||||
]);
|
||||
const launchArgs = commands.find((entry) => entry.args[0] === "desktop")?.args ?? [];
|
||||
expect(launchArgs).toEqual(
|
||||
expect.arrayContaining(["--", "sh", "-lc", expect.stringContaining("--no-first-run")]),
|
||||
);
|
||||
const visionArgs = commands.find((entry) => entry.command === "pnpm")?.args ?? [];
|
||||
expect(visionArgs).toEqual(
|
||||
expect.arrayContaining([
|
||||
"infer",
|
||||
"image",
|
||||
"describe",
|
||||
"--file",
|
||||
path.join(repoRoot, ".artifacts/qa-e2e/mantis/visual-driver-test/visual-task.png"),
|
||||
"--model",
|
||||
"openai/gpt-5.4",
|
||||
]),
|
||||
);
|
||||
expect(visionArgs).toEqual(
|
||||
expect.arrayContaining(["--prompt", expect.stringContaining("return only valid JSON")]),
|
||||
);
|
||||
expect(result.vision.assertion).toMatchObject({
|
||||
evidence: 'The page heading reads "Example Domain".',
|
||||
matched: true,
|
||||
visible: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("fails image-describe text checks when the model gives negative evidence that quotes the target", async () => {
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
||||
const outputPath = args[args.indexOf("--output") + 1];
|
||||
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await fs.writeFile(outputPath, "png");
|
||||
}
|
||||
if (command === "pnpm") {
|
||||
return {
|
||||
stdout: `${JSON.stringify({
|
||||
ok: true,
|
||||
outputs: [
|
||||
{
|
||||
kind: "image.description",
|
||||
text: 'The screenshot does not contain "Example Domain".',
|
||||
},
|
||||
],
|
||||
})}\n`,
|
||||
stderr: "",
|
||||
};
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
});
|
||||
|
||||
const result = await runMantisVisualDriver({
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
expectText: "Example Domain",
|
||||
leaseId: "cbx_abc123",
|
||||
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-negative",
|
||||
repoRoot,
|
||||
settleMs: 0,
|
||||
visionMode: "image-describe",
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
matched: false,
|
||||
status: "fail",
|
||||
vision: {
|
||||
assertion: {
|
||||
matched: false,
|
||||
reason: "Image describe did not return a structured visual assertion.",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("fails metadata mode when text evidence is requested", async () => {
|
||||
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
||||
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
||||
const outputPath = args[args.indexOf("--output") + 1];
|
||||
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await fs.writeFile(outputPath, "png");
|
||||
}
|
||||
return { stdout: "", stderr: "" };
|
||||
});
|
||||
|
||||
const result = await runMantisVisualDriver({
|
||||
commandRunner: runner,
|
||||
crabboxBin: "/tmp/crabbox",
|
||||
expectText: "Example Domain",
|
||||
leaseId: "cbx_abc123",
|
||||
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-metadata",
|
||||
repoRoot,
|
||||
settleMs: 0,
|
||||
visionMode: "metadata",
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
matched: false,
|
||||
status: "fail",
|
||||
vision: {
|
||||
mode: "metadata",
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
926
extensions/qa-lab/src/mantis/visual-task.runtime.ts
Normal file
926
extensions/qa-lab/src/mantis/visual-task.runtime.ts
Normal file
@@ -0,0 +1,926 @@
|
||||
import { spawn, type SpawnOptions } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { ensureRepoBoundDirectory, resolveRepoRelativeOutputDir } from "../cli-paths.js";
|
||||
|
||||
export type MantisVisualTaskVisionMode = "image-describe" | "metadata";
|
||||
|
||||
export type MantisVisualTaskOptions = {
|
||||
browserUrl?: string;
|
||||
commandRunner?: CommandRunner;
|
||||
crabboxBin?: string;
|
||||
duration?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
expectText?: string;
|
||||
idleTimeout?: string;
|
||||
keepLease?: boolean;
|
||||
leaseId?: string;
|
||||
machineClass?: string;
|
||||
now?: () => Date;
|
||||
outputDir?: string;
|
||||
provider?: string;
|
||||
repoRoot?: string;
|
||||
settleMs?: number;
|
||||
ttl?: string;
|
||||
visionMode?: MantisVisualTaskVisionMode;
|
||||
visionModel?: string;
|
||||
visionPrompt?: string;
|
||||
visionTimeoutMs?: number;
|
||||
};
|
||||
|
||||
export type MantisVisualDriverOptions = {
|
||||
browserUrl?: string;
|
||||
commandRunner?: CommandRunner;
|
||||
crabboxBin?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
expectText?: string;
|
||||
leaseId?: string;
|
||||
outputDir?: string;
|
||||
provider?: string;
|
||||
repoRoot?: string;
|
||||
settleMs?: number;
|
||||
visionMode?: MantisVisualTaskVisionMode;
|
||||
visionModel?: string;
|
||||
visionPrompt?: string;
|
||||
visionTimeoutMs?: number;
|
||||
};
|
||||
|
||||
export type MantisVisualTaskResult = {
|
||||
outputDir: string;
|
||||
reportPath: string;
|
||||
screenshotPath?: string;
|
||||
status: "pass" | "fail";
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
|
||||
type CommandResult = {
|
||||
stderr: string;
|
||||
stdout: string;
|
||||
};
|
||||
|
||||
type CommandRunner = (
|
||||
command: string,
|
||||
args: readonly string[],
|
||||
options: SpawnOptions,
|
||||
) => Promise<CommandResult>;
|
||||
|
||||
type CrabboxInspect = {
|
||||
id?: string;
|
||||
provider?: string;
|
||||
slug?: string;
|
||||
state?: string;
|
||||
};
|
||||
|
||||
type MantisVisualDriverResult = {
|
||||
browserUrl: string;
|
||||
error?: string;
|
||||
expectText?: string;
|
||||
finishedAt: string;
|
||||
matched?: boolean;
|
||||
outputDir: string;
|
||||
screenshotPath: string;
|
||||
startedAt: string;
|
||||
status: "pass" | "fail";
|
||||
vision: {
|
||||
assertion?: VisionAssertion;
|
||||
mode: MantisVisualTaskVisionMode;
|
||||
model?: string;
|
||||
prompt?: string;
|
||||
text?: string;
|
||||
timeoutMs: number;
|
||||
};
|
||||
};
|
||||
|
||||
type VisionAssertion = {
|
||||
evidence?: string;
|
||||
expectedText: string;
|
||||
matched: boolean;
|
||||
reason?: string;
|
||||
visible?: boolean;
|
||||
};
|
||||
|
||||
type MantisVisualTaskSummary = {
|
||||
artifacts: {
|
||||
driverResultPath: string;
|
||||
reportPath: string;
|
||||
screenshotPath?: string;
|
||||
summaryPath: string;
|
||||
videoPath?: string;
|
||||
};
|
||||
browserUrl: string;
|
||||
crabbox: {
|
||||
bin: string;
|
||||
createdLease: boolean;
|
||||
id: string;
|
||||
provider: string;
|
||||
slug?: string;
|
||||
state?: string;
|
||||
vncCommand: string;
|
||||
};
|
||||
driver?: MantisVisualDriverResult;
|
||||
error?: string;
|
||||
finishedAt: string;
|
||||
outputDir: string;
|
||||
recording: {
|
||||
error?: string;
|
||||
required: boolean;
|
||||
};
|
||||
startedAt: string;
|
||||
status: "pass" | "fail";
|
||||
visionMode: MantisVisualTaskVisionMode;
|
||||
};
|
||||
|
||||
const DEFAULT_BROWSER_URL = "https://example.net";
|
||||
const DEFAULT_PROVIDER = "hetzner";
|
||||
const DEFAULT_CLASS = "beast";
|
||||
const DEFAULT_DURATION = "180s";
|
||||
const DEFAULT_IDLE_TIMEOUT = "60m";
|
||||
const DEFAULT_TTL = "120m";
|
||||
const DEFAULT_SETTLE_MS = 8000;
|
||||
const DEFAULT_VISION_TIMEOUT_MS = 120000;
|
||||
const CRABBOX_BIN_ENV = "OPENCLAW_MANTIS_CRABBOX_BIN";
|
||||
const CRABBOX_PROVIDER_ENV = "OPENCLAW_MANTIS_CRABBOX_PROVIDER";
|
||||
const CRABBOX_CLASS_ENV = "OPENCLAW_MANTIS_CRABBOX_CLASS";
|
||||
const CRABBOX_LEASE_ID_ENV = "OPENCLAW_MANTIS_CRABBOX_LEASE_ID";
|
||||
const CRABBOX_KEEP_ENV = "OPENCLAW_MANTIS_KEEP_VM";
|
||||
const CRABBOX_IDLE_TIMEOUT_ENV = "OPENCLAW_MANTIS_CRABBOX_IDLE_TIMEOUT";
|
||||
const CRABBOX_TTL_ENV = "OPENCLAW_MANTIS_CRABBOX_TTL";
|
||||
|
||||
function trimToValue(value: string | undefined) {
|
||||
const trimmed = value?.trim();
|
||||
return trimmed && trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
function isTruthyOptIn(value: string | undefined) {
|
||||
const normalized = value?.trim().toLowerCase();
|
||||
return normalized === "1" || normalized === "true" || normalized === "yes";
|
||||
}
|
||||
|
||||
function defaultOutputDir(repoRoot: string, startedAt: Date) {
|
||||
const stamp = startedAt.toISOString().replace(/[:.]/gu, "-");
|
||||
return path.join(repoRoot, ".artifacts", "qa-e2e", "mantis", `visual-task-${stamp}`);
|
||||
}
|
||||
|
||||
function resolveMantisOutputDir(repoRoot: string, outputDir: string | undefined, startedAt: Date) {
|
||||
const configured = trimToValue(outputDir);
|
||||
if (!configured) {
|
||||
return defaultOutputDir(repoRoot, startedAt);
|
||||
}
|
||||
return path.isAbsolute(configured)
|
||||
? configured
|
||||
: (resolveRepoRelativeOutputDir(repoRoot, configured) ?? defaultOutputDir(repoRoot, startedAt));
|
||||
}
|
||||
|
||||
async function defaultCommandRunner(
|
||||
command: string,
|
||||
args: readonly string[],
|
||||
options: SpawnOptions,
|
||||
): Promise<CommandResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
...options,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString();
|
||||
stdout += text;
|
||||
if (options.stdio === "inherit") {
|
||||
process.stdout.write(text);
|
||||
}
|
||||
});
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString();
|
||||
stderr += text;
|
||||
if (options.stdio === "inherit") {
|
||||
process.stderr.write(text);
|
||||
}
|
||||
});
|
||||
child.on("error", reject);
|
||||
child.on("close", (code, signal) => {
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
const detail = signal ? `signal ${signal}` : `exit code ${code ?? "unknown"}`;
|
||||
reject(new Error(`${command} ${args.join(" ")} failed with ${detail}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function pathExists(filePath: string) {
|
||||
try {
|
||||
await fs.access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function nonEmptyFileExists(filePath: string) {
|
||||
try {
|
||||
const stat = await fs.stat(filePath);
|
||||
return stat.isFile() && stat.size > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveCrabboxBin(params: {
|
||||
env: NodeJS.ProcessEnv;
|
||||
explicit?: string;
|
||||
repoRoot: string;
|
||||
}) {
|
||||
const configured = trimToValue(params.explicit) ?? trimToValue(params.env[CRABBOX_BIN_ENV]);
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
const sibling = path.resolve(params.repoRoot, "../crabbox/bin/crabbox");
|
||||
if (await pathExists(sibling)) {
|
||||
return sibling;
|
||||
}
|
||||
return "crabbox";
|
||||
}
|
||||
|
||||
function extractLeaseId(output: string) {
|
||||
return output.match(/\b(?:cbx_[a-f0-9]+|tbx_[A-Za-z0-9_-]+)\b/u)?.[0];
|
||||
}
|
||||
|
||||
function normalizeVisionMode(value: string | undefined): MantisVisualTaskVisionMode {
|
||||
const normalized = trimToValue(value);
|
||||
if (normalized === undefined || normalized === "image-describe") {
|
||||
return "image-describe";
|
||||
}
|
||||
if (normalized === "metadata") {
|
||||
return "metadata";
|
||||
}
|
||||
throw new Error(`Unsupported Mantis visual-task vision mode: ${normalized}`);
|
||||
}
|
||||
|
||||
function defaultVisionPrompt(expectText: string | undefined) {
|
||||
if (expectText) {
|
||||
return `Inspect this UI screenshot and determine whether the exact text "${expectText}" is visibly present.`;
|
||||
}
|
||||
return "Inspect this UI screenshot and describe the visible page state in one concise sentence.";
|
||||
}
|
||||
|
||||
function buildVisionPrompt(prompt: string | undefined, expectText: string | undefined) {
|
||||
const base = trimToValue(prompt) ?? defaultVisionPrompt(expectText);
|
||||
if (!expectText) {
|
||||
return base;
|
||||
}
|
||||
if (base.includes("Visual assertion contract:")) {
|
||||
return base;
|
||||
}
|
||||
return `${base}\n\nVisual assertion contract: return only valid JSON: {"visible": boolean, "evidence": string, "reason": string}. Set visible=true only when the exact text "${expectText}" is actually visible in the screenshot; text quoted in the prompt or a negative statement is not evidence.`;
|
||||
}
|
||||
|
||||
async function runCommand(params: {
|
||||
args: readonly string[];
|
||||
command: string;
|
||||
cwd: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
runner: CommandRunner;
|
||||
stdio?: "inherit" | "pipe";
|
||||
}) {
|
||||
return params.runner(params.command, params.args, {
|
||||
cwd: params.cwd,
|
||||
env: params.env,
|
||||
stdio: params.stdio ?? "pipe",
|
||||
});
|
||||
}
|
||||
|
||||
async function warmupCrabbox(params: {
|
||||
crabboxBin: string;
|
||||
cwd: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
idleTimeout: string;
|
||||
machineClass: string;
|
||||
provider: string;
|
||||
runner: CommandRunner;
|
||||
ttl: string;
|
||||
}) {
|
||||
const result = await runCommand({
|
||||
command: params.crabboxBin,
|
||||
args: [
|
||||
"warmup",
|
||||
"--provider",
|
||||
params.provider,
|
||||
"--desktop",
|
||||
"--browser",
|
||||
"--class",
|
||||
params.machineClass,
|
||||
"--idle-timeout",
|
||||
params.idleTimeout,
|
||||
"--ttl",
|
||||
params.ttl,
|
||||
],
|
||||
cwd: params.cwd,
|
||||
env: params.env,
|
||||
runner: params.runner,
|
||||
stdio: "inherit",
|
||||
});
|
||||
const leaseId = extractLeaseId(`${result.stdout}\n${result.stderr}`);
|
||||
if (!leaseId) {
|
||||
throw new Error("Crabbox warmup did not print a lease id.");
|
||||
}
|
||||
return leaseId;
|
||||
}
|
||||
|
||||
async function inspectCrabbox(params: {
|
||||
crabboxBin: string;
|
||||
cwd: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
leaseId: string;
|
||||
provider: string;
|
||||
runner: CommandRunner;
|
||||
}) {
|
||||
const result = await runCommand({
|
||||
command: params.crabboxBin,
|
||||
args: ["inspect", "--provider", params.provider, "--id", params.leaseId, "--json"],
|
||||
cwd: params.cwd,
|
||||
env: params.env,
|
||||
runner: params.runner,
|
||||
});
|
||||
return JSON.parse(result.stdout) as CrabboxInspect;
|
||||
}
|
||||
|
||||
async function stopCrabbox(params: {
|
||||
crabboxBin: string;
|
||||
cwd: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
leaseId: string;
|
||||
provider: string;
|
||||
runner: CommandRunner;
|
||||
}) {
|
||||
await runCommand({
|
||||
command: params.crabboxBin,
|
||||
args: ["stop", "--provider", params.provider, params.leaseId],
|
||||
cwd: params.cwd,
|
||||
env: params.env,
|
||||
runner: params.runner,
|
||||
stdio: "inherit",
|
||||
});
|
||||
}
|
||||
|
||||
function buildVisualDriverArgs(params: {
|
||||
browserUrl: string;
|
||||
crabboxBin: string;
|
||||
expectText?: string;
|
||||
leaseId: string;
|
||||
outputDir: string;
|
||||
provider: string;
|
||||
repoRoot: string;
|
||||
settleMs: number;
|
||||
visionMode: MantisVisualTaskVisionMode;
|
||||
visionModel?: string;
|
||||
visionPrompt: string;
|
||||
visionTimeoutMs: number;
|
||||
}) {
|
||||
const args = [
|
||||
"--dir",
|
||||
params.repoRoot,
|
||||
"openclaw",
|
||||
"qa",
|
||||
"mantis",
|
||||
"visual-driver",
|
||||
"--repo-root",
|
||||
params.repoRoot,
|
||||
"--output-dir",
|
||||
params.outputDir,
|
||||
"--crabbox-bin",
|
||||
params.crabboxBin,
|
||||
"--provider",
|
||||
params.provider,
|
||||
"--lease-id",
|
||||
params.leaseId,
|
||||
"--browser-url",
|
||||
params.browserUrl,
|
||||
"--settle-ms",
|
||||
String(params.settleMs),
|
||||
"--vision-mode",
|
||||
params.visionMode,
|
||||
"--vision-prompt",
|
||||
params.visionPrompt,
|
||||
"--vision-timeout-ms",
|
||||
String(params.visionTimeoutMs),
|
||||
];
|
||||
if (params.expectText) {
|
||||
args.push("--expect-text", params.expectText);
|
||||
}
|
||||
if (params.visionModel) {
|
||||
args.push("--vision-model", params.visionModel);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
function parseImageDescribeText(stdout: string) {
|
||||
const parsed = parseJsonObjectFromText(
|
||||
stdout,
|
||||
(value): value is { outputs?: Array<{ text?: unknown }> } =>
|
||||
Boolean(
|
||||
value &&
|
||||
typeof value === "object" &&
|
||||
Array.isArray((value as { outputs?: unknown }).outputs),
|
||||
),
|
||||
);
|
||||
if (!parsed) {
|
||||
throw new Error("Image describe did not return a JSON envelope with outputs.");
|
||||
}
|
||||
const text = parsed.outputs?.find((output) => typeof output.text === "string")?.text;
|
||||
if (typeof text !== "string" || text.trim().length === 0) {
|
||||
throw new Error("Image describe did not return output text.");
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function parseJsonObjectFromText<T>(text: string, accepts: (value: unknown) => value is T) {
|
||||
const starts = [...text.matchAll(/\{/gu)]
|
||||
.map((match) => match.index)
|
||||
.filter((index) => index !== undefined);
|
||||
const ends = [...text.matchAll(/\}/gu)]
|
||||
.map((match) => match.index)
|
||||
.filter((index) => index !== undefined);
|
||||
for (const start of starts) {
|
||||
for (const end of ends.toReversed()) {
|
||||
if (end < start) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(text.slice(start, end + 1)) as unknown;
|
||||
if (accepts(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
} catch {
|
||||
// Keep scanning: command wrappers can echo prompt schemas before the real JSON.
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function parseVisionAssertion(text: string, expectText: string): VisionAssertion {
|
||||
const parsed = parseJsonObjectFromText(text, (value): value is Record<string, unknown> =>
|
||||
Boolean(value && typeof value === "object" && "visible" in value),
|
||||
);
|
||||
if (!parsed) {
|
||||
return {
|
||||
expectedText: expectText,
|
||||
matched: false,
|
||||
reason: "Image describe did not return a structured visual assertion.",
|
||||
};
|
||||
}
|
||||
const record = parsed;
|
||||
const visible = record.visible;
|
||||
const evidence = typeof record.evidence === "string" ? record.evidence.trim() : undefined;
|
||||
const reason = typeof record.reason === "string" ? record.reason.trim() : undefined;
|
||||
if (typeof visible !== "boolean") {
|
||||
return {
|
||||
evidence,
|
||||
expectedText: expectText,
|
||||
matched: false,
|
||||
reason: reason ?? "Image describe visual assertion is missing boolean visible.",
|
||||
};
|
||||
}
|
||||
const normalizedExpected = expectText.toLowerCase();
|
||||
const positiveEvidence = [evidence, reason]
|
||||
.filter((value): value is string => Boolean(value))
|
||||
.some((value) => value.toLowerCase().includes(normalizedExpected));
|
||||
return {
|
||||
evidence,
|
||||
expectedText: expectText,
|
||||
matched: visible && Boolean(evidence) && positiveEvidence,
|
||||
reason: positiveEvidence
|
||||
? reason
|
||||
: (reason ?? `Visual assertion did not cite the expected text "${expectText}".`),
|
||||
visible,
|
||||
};
|
||||
}
|
||||
|
||||
function evaluateVisualExpectation(text: string | undefined, expectText: string | undefined) {
|
||||
if (!expectText) {
|
||||
return { matched: true };
|
||||
}
|
||||
if (!text) {
|
||||
return {
|
||||
assertion: {
|
||||
expectedText: expectText,
|
||||
matched: false,
|
||||
reason: "Image describe did not return text.",
|
||||
},
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const assertion = parseVisionAssertion(text, expectText);
|
||||
return { assertion, matched: assertion.matched };
|
||||
}
|
||||
|
||||
function browserLaunchScript() {
|
||||
return [
|
||||
'browser="${BROWSER:-${CHROME_BIN:-google-chrome}}"',
|
||||
'profile="${TMPDIR:-/tmp}/openclaw-mantis-visual-chrome-profile"',
|
||||
'mkdir -p "$profile"',
|
||||
'exec "$browser" --user-data-dir="$profile" --no-first-run --no-default-browser-check --disable-default-apps --disable-dev-shm-usage --window-size=1280,900 --window-position=0,0 "$0"',
|
||||
].join("; ");
|
||||
}
|
||||
|
||||
function renderReport(summary: MantisVisualTaskSummary) {
|
||||
const lines = [
|
||||
"# Mantis Visual Task",
|
||||
"",
|
||||
`Status: ${summary.status}`,
|
||||
`Browser URL: ${summary.browserUrl}`,
|
||||
`Vision mode: ${summary.visionMode}`,
|
||||
`Output: ${summary.outputDir}`,
|
||||
`Started: ${summary.startedAt}`,
|
||||
`Finished: ${summary.finishedAt}`,
|
||||
"",
|
||||
"## Crabbox",
|
||||
"",
|
||||
`- Provider: ${summary.crabbox.provider}`,
|
||||
`- Lease: ${summary.crabbox.id}${summary.crabbox.slug ? ` (${summary.crabbox.slug})` : ""}`,
|
||||
`- Created by run: ${summary.crabbox.createdLease}`,
|
||||
`- State: ${summary.crabbox.state ?? "unknown"}`,
|
||||
`- VNC: \`${summary.crabbox.vncCommand}\``,
|
||||
"",
|
||||
"## Artifacts",
|
||||
"",
|
||||
summary.artifacts.screenshotPath
|
||||
? `- Screenshot: \`${path.basename(summary.artifacts.screenshotPath)}\``
|
||||
: "- Screenshot: missing",
|
||||
summary.artifacts.videoPath
|
||||
? `- Video: \`${path.basename(summary.artifacts.videoPath)}\``
|
||||
: "- Video: missing",
|
||||
`- Driver result: \`${path.basename(summary.artifacts.driverResultPath)}\``,
|
||||
"",
|
||||
"## Vision",
|
||||
"",
|
||||
summary.driver?.vision.text ? summary.driver.vision.text : "No vision text recorded.",
|
||||
summary.driver?.expectText ? `Expected text: ${summary.driver.expectText}` : undefined,
|
||||
summary.driver?.vision.assertion?.visible !== undefined
|
||||
? `Visible: ${summary.driver.vision.assertion.visible}`
|
||||
: undefined,
|
||||
summary.driver?.vision.assertion?.evidence
|
||||
? `Evidence: ${summary.driver.vision.assertion.evidence}`
|
||||
: undefined,
|
||||
summary.driver?.vision.assertion?.reason
|
||||
? `Reason: ${summary.driver.vision.assertion.reason}`
|
||||
: undefined,
|
||||
summary.driver?.matched !== undefined ? `Matched: ${summary.driver.matched}` : undefined,
|
||||
summary.recording.error ? `Recording error: ${summary.recording.error}` : undefined,
|
||||
summary.error ? `Error: ${summary.error}` : undefined,
|
||||
"",
|
||||
].filter((line) => line !== undefined);
|
||||
return `${lines.join("\n")}\n`;
|
||||
}
|
||||
|
||||
export async function runMantisVisualDriver(
|
||||
opts: MantisVisualDriverOptions = {},
|
||||
): Promise<MantisVisualDriverResult> {
|
||||
const env = opts.env ?? process.env;
|
||||
const startedAt = new Date();
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const outputDir = await ensureRepoBoundDirectory(
|
||||
repoRoot,
|
||||
resolveMantisOutputDir(repoRoot, opts.outputDir, startedAt),
|
||||
"Mantis visual driver output directory",
|
||||
{ mode: 0o755 },
|
||||
);
|
||||
const resultPath = path.join(outputDir, "mantis-visual-task-driver-result.json");
|
||||
const screenshotPath = path.join(outputDir, "visual-task.png");
|
||||
const crabboxBin = await resolveCrabboxBin({ env, explicit: opts.crabboxBin, repoRoot });
|
||||
const provider =
|
||||
trimToValue(opts.provider) ??
|
||||
trimToValue(env.CRABBOX_RECORD_PROVIDER) ??
|
||||
trimToValue(env[CRABBOX_PROVIDER_ENV]) ??
|
||||
DEFAULT_PROVIDER;
|
||||
const leaseId =
|
||||
trimToValue(opts.leaseId) ??
|
||||
trimToValue(env.CRABBOX_RECORD_LEASE_ID) ??
|
||||
trimToValue(env[CRABBOX_LEASE_ID_ENV]);
|
||||
if (!leaseId) {
|
||||
throw new Error("Mantis visual-driver needs --lease-id or CRABBOX_RECORD_LEASE_ID.");
|
||||
}
|
||||
const browserUrl = trimToValue(opts.browserUrl) ?? DEFAULT_BROWSER_URL;
|
||||
const visionMode = normalizeVisionMode(opts.visionMode);
|
||||
const expectText = trimToValue(opts.expectText);
|
||||
const visionPrompt = buildVisionPrompt(opts.visionPrompt, expectText);
|
||||
const visionTimeoutMs = opts.visionTimeoutMs ?? DEFAULT_VISION_TIMEOUT_MS;
|
||||
const runner = opts.commandRunner ?? defaultCommandRunner;
|
||||
let result: MantisVisualDriverResult;
|
||||
|
||||
try {
|
||||
await runCommand({
|
||||
command: crabboxBin,
|
||||
args: [
|
||||
"desktop",
|
||||
"launch",
|
||||
"--provider",
|
||||
provider,
|
||||
"--id",
|
||||
leaseId,
|
||||
"--browser",
|
||||
"--url",
|
||||
browserUrl,
|
||||
"--reclaim",
|
||||
"--",
|
||||
"sh",
|
||||
"-lc",
|
||||
browserLaunchScript(),
|
||||
],
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
runner,
|
||||
stdio: "inherit",
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, opts.settleMs ?? DEFAULT_SETTLE_MS));
|
||||
await runCommand({
|
||||
command: crabboxBin,
|
||||
args: [
|
||||
"screenshot",
|
||||
"--provider",
|
||||
provider,
|
||||
"--id",
|
||||
leaseId,
|
||||
"--output",
|
||||
screenshotPath,
|
||||
"--reclaim",
|
||||
],
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
runner,
|
||||
stdio: "inherit",
|
||||
});
|
||||
let visionText: string | undefined;
|
||||
if (visionMode === "image-describe") {
|
||||
const imageArgs = [
|
||||
"openclaw",
|
||||
"infer",
|
||||
"image",
|
||||
"describe",
|
||||
"--file",
|
||||
screenshotPath,
|
||||
"--prompt",
|
||||
visionPrompt,
|
||||
"--timeout-ms",
|
||||
String(visionTimeoutMs),
|
||||
"--json",
|
||||
];
|
||||
const visionModel = trimToValue(opts.visionModel);
|
||||
if (visionModel) {
|
||||
imageArgs.push("--model", visionModel);
|
||||
}
|
||||
const described = await runCommand({
|
||||
command: "pnpm",
|
||||
args: ["--dir", repoRoot, ...imageArgs],
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
runner,
|
||||
});
|
||||
visionText = parseImageDescribeText(described.stdout);
|
||||
}
|
||||
const { assertion, matched } = evaluateVisualExpectation(visionText, expectText);
|
||||
result = {
|
||||
browserUrl,
|
||||
expectText,
|
||||
finishedAt: new Date().toISOString(),
|
||||
matched,
|
||||
outputDir,
|
||||
screenshotPath,
|
||||
startedAt: startedAt.toISOString(),
|
||||
status: matched ? "pass" : "fail",
|
||||
vision: {
|
||||
assertion,
|
||||
mode: visionMode,
|
||||
model: trimToValue(opts.visionModel),
|
||||
prompt: visionPrompt,
|
||||
text: visionText,
|
||||
timeoutMs: visionTimeoutMs,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
result = {
|
||||
browserUrl,
|
||||
error: formatErrorMessage(error),
|
||||
expectText,
|
||||
finishedAt: new Date().toISOString(),
|
||||
matched: false,
|
||||
outputDir,
|
||||
screenshotPath,
|
||||
startedAt: startedAt.toISOString(),
|
||||
status: "fail",
|
||||
vision: {
|
||||
mode: visionMode,
|
||||
model: trimToValue(opts.visionModel),
|
||||
prompt: visionPrompt,
|
||||
timeoutMs: visionTimeoutMs,
|
||||
},
|
||||
};
|
||||
}
|
||||
await fs.writeFile(resultPath, `${JSON.stringify(result, null, 2)}\n`, "utf8");
|
||||
return result;
|
||||
}
|
||||
|
||||
export async function runMantisVisualTask(
|
||||
opts: MantisVisualTaskOptions = {},
|
||||
): Promise<MantisVisualTaskResult> {
|
||||
const env = opts.env ?? process.env;
|
||||
const startedAt = (opts.now ?? (() => new Date()))();
|
||||
const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
|
||||
const outputDir = await ensureRepoBoundDirectory(
|
||||
repoRoot,
|
||||
resolveMantisOutputDir(repoRoot, opts.outputDir, startedAt),
|
||||
"Mantis visual task output directory",
|
||||
{ mode: 0o755 },
|
||||
);
|
||||
const summaryPath = path.join(outputDir, "mantis-visual-task-summary.json");
|
||||
const reportPath = path.join(outputDir, "mantis-visual-task-report.md");
|
||||
const driverResultPath = path.join(outputDir, "mantis-visual-task-driver-result.json");
|
||||
const screenshotPath = path.join(outputDir, "visual-task.png");
|
||||
const videoPath = path.join(outputDir, "visual-task.mp4");
|
||||
const crabboxBin = await resolveCrabboxBin({ env, explicit: opts.crabboxBin, repoRoot });
|
||||
const provider =
|
||||
trimToValue(opts.provider) ?? trimToValue(env[CRABBOX_PROVIDER_ENV]) ?? DEFAULT_PROVIDER;
|
||||
const machineClass =
|
||||
trimToValue(opts.machineClass) ?? trimToValue(env[CRABBOX_CLASS_ENV]) ?? DEFAULT_CLASS;
|
||||
const idleTimeout =
|
||||
trimToValue(opts.idleTimeout) ??
|
||||
trimToValue(env[CRABBOX_IDLE_TIMEOUT_ENV]) ??
|
||||
DEFAULT_IDLE_TIMEOUT;
|
||||
const ttl = trimToValue(opts.ttl) ?? trimToValue(env[CRABBOX_TTL_ENV]) ?? DEFAULT_TTL;
|
||||
const explicitLeaseId = trimToValue(opts.leaseId) ?? trimToValue(env[CRABBOX_LEASE_ID_ENV]);
|
||||
const keepLease = opts.keepLease ?? isTruthyOptIn(env[CRABBOX_KEEP_ENV]);
|
||||
const createdLease = explicitLeaseId === undefined;
|
||||
const browserUrl = trimToValue(opts.browserUrl) ?? DEFAULT_BROWSER_URL;
|
||||
const expectText = trimToValue(opts.expectText);
|
||||
const visionMode = normalizeVisionMode(opts.visionMode);
|
||||
const visionPrompt = buildVisionPrompt(opts.visionPrompt, expectText);
|
||||
const runner = opts.commandRunner ?? defaultCommandRunner;
|
||||
let leaseId = explicitLeaseId;
|
||||
let inspected: CrabboxInspect = {};
|
||||
let summary: MantisVisualTaskSummary | undefined;
|
||||
|
||||
try {
|
||||
leaseId =
|
||||
leaseId ??
|
||||
(await warmupCrabbox({
|
||||
crabboxBin,
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
idleTimeout,
|
||||
machineClass,
|
||||
provider,
|
||||
runner,
|
||||
ttl,
|
||||
}));
|
||||
inspected = await inspectCrabbox({
|
||||
crabboxBin,
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
leaseId,
|
||||
provider,
|
||||
runner,
|
||||
});
|
||||
let recordingError: string | undefined;
|
||||
try {
|
||||
await runCommand({
|
||||
command: crabboxBin,
|
||||
args: [
|
||||
"record",
|
||||
"--provider",
|
||||
provider,
|
||||
"--id",
|
||||
leaseId,
|
||||
"--duration",
|
||||
trimToValue(opts.duration) ?? DEFAULT_DURATION,
|
||||
"--output",
|
||||
videoPath,
|
||||
"--while",
|
||||
"--",
|
||||
"pnpm",
|
||||
...buildVisualDriverArgs({
|
||||
browserUrl,
|
||||
crabboxBin,
|
||||
expectText,
|
||||
leaseId,
|
||||
outputDir,
|
||||
provider,
|
||||
repoRoot,
|
||||
settleMs: opts.settleMs ?? DEFAULT_SETTLE_MS,
|
||||
visionMode,
|
||||
visionModel: trimToValue(opts.visionModel),
|
||||
visionPrompt,
|
||||
visionTimeoutMs: opts.visionTimeoutMs ?? DEFAULT_VISION_TIMEOUT_MS,
|
||||
}),
|
||||
],
|
||||
cwd: repoRoot,
|
||||
env,
|
||||
runner,
|
||||
stdio: "inherit",
|
||||
});
|
||||
} catch (error) {
|
||||
if (!(await pathExists(driverResultPath))) {
|
||||
throw error;
|
||||
}
|
||||
recordingError = formatErrorMessage(error);
|
||||
}
|
||||
const driver = JSON.parse(
|
||||
await fs.readFile(driverResultPath, "utf8"),
|
||||
) as MantisVisualDriverResult;
|
||||
const copiedScreenshot = (await pathExists(screenshotPath)) ? screenshotPath : undefined;
|
||||
const copiedVideo = (await nonEmptyFileExists(videoPath)) ? videoPath : undefined;
|
||||
const recordingFailure =
|
||||
recordingError ??
|
||||
(copiedVideo ? undefined : "Mantis visual task recording did not produce visual-task.mp4.");
|
||||
const status = driver.status === "pass" && !recordingFailure ? "pass" : "fail";
|
||||
summary = {
|
||||
artifacts: {
|
||||
driverResultPath,
|
||||
reportPath,
|
||||
screenshotPath: copiedScreenshot,
|
||||
summaryPath,
|
||||
videoPath: copiedVideo,
|
||||
},
|
||||
browserUrl,
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
createdLease,
|
||||
id: leaseId,
|
||||
provider,
|
||||
slug: inspected.slug,
|
||||
state: inspected.state,
|
||||
vncCommand: `${crabboxBin} vnc --provider ${provider} --id ${leaseId} --open`,
|
||||
},
|
||||
driver,
|
||||
error: recordingFailure,
|
||||
finishedAt: new Date().toISOString(),
|
||||
outputDir,
|
||||
recording: {
|
||||
error: recordingFailure,
|
||||
required: true,
|
||||
},
|
||||
startedAt: startedAt.toISOString(),
|
||||
status,
|
||||
visionMode,
|
||||
};
|
||||
return {
|
||||
outputDir,
|
||||
reportPath,
|
||||
screenshotPath: copiedScreenshot,
|
||||
status,
|
||||
summaryPath,
|
||||
videoPath: copiedVideo,
|
||||
};
|
||||
} catch (error) {
|
||||
summary = {
|
||||
artifacts: {
|
||||
driverResultPath,
|
||||
reportPath,
|
||||
summaryPath,
|
||||
videoPath: (await pathExists(videoPath)) ? videoPath : undefined,
|
||||
},
|
||||
browserUrl,
|
||||
crabbox: {
|
||||
bin: crabboxBin,
|
||||
createdLease,
|
||||
id: leaseId ?? "unallocated",
|
||||
provider,
|
||||
slug: inspected.slug,
|
||||
state: inspected.state,
|
||||
vncCommand: leaseId
|
||||
? `${crabboxBin} vnc --provider ${provider} --id ${leaseId} --open`
|
||||
: "unallocated",
|
||||
},
|
||||
error: formatErrorMessage(error),
|
||||
finishedAt: new Date().toISOString(),
|
||||
outputDir,
|
||||
recording: {
|
||||
error: (await nonEmptyFileExists(videoPath)) ? undefined : "visual-task.mp4 missing",
|
||||
required: true,
|
||||
},
|
||||
startedAt: startedAt.toISOString(),
|
||||
status: "fail",
|
||||
visionMode,
|
||||
};
|
||||
await fs.writeFile(path.join(outputDir, "error.txt"), `${summary.error}\n`, "utf8");
|
||||
return {
|
||||
outputDir,
|
||||
reportPath,
|
||||
status: "fail",
|
||||
summaryPath,
|
||||
videoPath: summary.artifacts.videoPath,
|
||||
};
|
||||
} finally {
|
||||
if (summary) {
|
||||
summary.finishedAt = new Date().toISOString();
|
||||
await fs.writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
|
||||
await fs.writeFile(reportPath, renderReport(summary), "utf8");
|
||||
}
|
||||
if (summary?.status === "pass" && createdLease && leaseId && !keepLease) {
|
||||
await stopCrabbox({ crabboxBin, cwd: repoRoot, env, leaseId, provider, runner });
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user