mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 11:10:43 +00:00
446 lines
15 KiB
TypeScript
446 lines
15 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
import { runMantisVisualDriver, runMantisVisualTask } from "./visual-task.runtime.js";
|
|
|
|
describe("mantis visual task runtime", () => {
|
|
let repoRoot: string;
|
|
|
|
beforeEach(async () => {
|
|
repoRoot = await fs.mkdtemp(path.join(os.tmpdir(), "mantis-visual-task-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await fs.rm(repoRoot, { force: true, recursive: true });
|
|
});
|
|
|
|
it("records a visible browser task and keeps screenshot/video artifacts", async () => {
|
|
const commands: { args: readonly string[]; command: string }[] = [];
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
commands.push({ command, args });
|
|
if (command === "/tmp/crabbox" && args[0] === "warmup") {
|
|
return { stdout: "ready lease cbx_abc123\n", stderr: "" };
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "inspect") {
|
|
return {
|
|
stdout: `${JSON.stringify({
|
|
id: "cbx_abc123",
|
|
provider: "hetzner",
|
|
slug: "brisk-mantis",
|
|
state: "active",
|
|
})}\n`,
|
|
stderr: "",
|
|
};
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "record") {
|
|
const outputPath = args[args.indexOf("--output") + 1];
|
|
const outputDir = args[args.indexOf("--output-dir") + 1];
|
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
await fs.writeFile(outputPath, "mp4");
|
|
await fs.writeFile(path.join(outputDir, "visual-task.png"), "png");
|
|
await fs.writeFile(
|
|
path.join(outputDir, "mantis-visual-task-driver-result.json"),
|
|
`${JSON.stringify({
|
|
browserUrl: "https://example.net",
|
|
finishedAt: "2026-05-04T12:00:05.000Z",
|
|
matched: true,
|
|
outputDir,
|
|
screenshotPath: path.join(outputDir, "visual-task.png"),
|
|
startedAt: "2026-05-04T12:00:01.000Z",
|
|
status: "pass",
|
|
vision: {
|
|
mode: "metadata",
|
|
timeoutMs: 120000,
|
|
},
|
|
})}\n`,
|
|
);
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualTask({
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
duration: "12s",
|
|
env: { PATH: process.env.PATH },
|
|
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-task-test",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "metadata",
|
|
});
|
|
|
|
expect(result.status).toBe("pass");
|
|
expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([
|
|
["/tmp/crabbox", "warmup"],
|
|
["/tmp/crabbox", "inspect"],
|
|
["/tmp/crabbox", "record"],
|
|
["/tmp/crabbox", "stop"],
|
|
]);
|
|
const recordArgs = commands.find((entry) => entry.args[0] === "record")?.args ?? [];
|
|
const finalVideoPath = path.join(
|
|
repoRoot,
|
|
".artifacts/qa-e2e/mantis/visual-task-test/visual-task.mp4",
|
|
);
|
|
const stagedVideoPath = recordArgs[recordArgs.indexOf("--output") + 1];
|
|
expect(recordArgs).toEqual(
|
|
expect.arrayContaining([
|
|
"--duration",
|
|
"12s",
|
|
"--output",
|
|
stagedVideoPath,
|
|
"--while",
|
|
"--",
|
|
"pnpm",
|
|
"--dir",
|
|
repoRoot,
|
|
"openclaw",
|
|
"qa",
|
|
"mantis",
|
|
"visual-driver",
|
|
]),
|
|
);
|
|
expect(stagedVideoPath).not.toBe(finalVideoPath);
|
|
expect(path.basename(stagedVideoPath ?? "")).toContain(path.basename(finalVideoPath));
|
|
expect(path.basename(stagedVideoPath ?? "")).toMatch(/\.part$/);
|
|
await expect(fs.stat(stagedVideoPath ?? "")).rejects.toThrow();
|
|
await expect(fs.readFile(result.screenshotPath ?? "", "utf8")).resolves.toBe("png");
|
|
await expect(fs.readFile(result.videoPath ?? "", "utf8")).resolves.toBe("mp4");
|
|
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
|
crabbox: { id: string; vncCommand: string };
|
|
status: string;
|
|
visionMode: string;
|
|
};
|
|
expect(summary).toMatchObject({
|
|
crabbox: {
|
|
id: "cbx_abc123",
|
|
vncCommand: "/tmp/crabbox vnc --provider hetzner --id cbx_abc123 --open",
|
|
},
|
|
status: "pass",
|
|
visionMode: "metadata",
|
|
});
|
|
});
|
|
|
|
it("fails when recording breaks after the visual driver passes", async () => {
|
|
const commands: { args: readonly string[]; command: string }[] = [];
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
commands.push({ command, args });
|
|
if (command === "/tmp/crabbox" && args[0] === "warmup") {
|
|
return { stdout: "ready lease cbx_abc123\n", stderr: "" };
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "inspect") {
|
|
return {
|
|
stdout: `${JSON.stringify({
|
|
id: "cbx_abc123",
|
|
provider: "hetzner",
|
|
slug: "brisk-mantis",
|
|
state: "active",
|
|
})}\n`,
|
|
stderr: "",
|
|
};
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "record") {
|
|
const outputDir = args[args.indexOf("--output-dir") + 1];
|
|
await fs.mkdir(outputDir, { recursive: true });
|
|
await fs.writeFile(path.join(outputDir, "visual-task.png"), "png");
|
|
await fs.writeFile(
|
|
path.join(outputDir, "mantis-visual-task-driver-result.json"),
|
|
`${JSON.stringify({
|
|
browserUrl: "https://example.net",
|
|
finishedAt: "2026-05-04T12:00:05.000Z",
|
|
matched: true,
|
|
outputDir,
|
|
screenshotPath: path.join(outputDir, "visual-task.png"),
|
|
startedAt: "2026-05-04T12:00:01.000Z",
|
|
status: "pass",
|
|
vision: {
|
|
mode: "metadata",
|
|
timeoutMs: 120000,
|
|
},
|
|
})}\n`,
|
|
);
|
|
throw new Error("crabbox record failed after driver exit");
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualTask({
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
env: { PATH: process.env.PATH },
|
|
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-task-recording-fail",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "metadata",
|
|
});
|
|
|
|
expect(result).toMatchObject({
|
|
status: "fail",
|
|
videoPath: undefined,
|
|
});
|
|
expect(commands.map((entry) => [entry.command, entry.args[0]])).toEqual([
|
|
["/tmp/crabbox", "warmup"],
|
|
["/tmp/crabbox", "inspect"],
|
|
["/tmp/crabbox", "record"],
|
|
]);
|
|
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
|
error?: string;
|
|
recording?: { error?: string; required: boolean };
|
|
status: string;
|
|
};
|
|
expect(summary).toMatchObject({
|
|
error: "crabbox record failed after driver exit",
|
|
recording: {
|
|
error: "crabbox record failed after driver exit",
|
|
required: true,
|
|
},
|
|
status: "fail",
|
|
});
|
|
});
|
|
|
|
it("preserves the video artifact when recording fails after writing output", async () => {
|
|
const commands: { args: readonly string[]; command: string }[] = [];
|
|
let stagedVideoPath = "";
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
commands.push({ command, args });
|
|
if (command === "/tmp/crabbox" && args[0] === "warmup") {
|
|
return { stdout: "ready lease cbx_abc123\n", stderr: "" };
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "inspect") {
|
|
return {
|
|
stdout: `${JSON.stringify({
|
|
id: "cbx_abc123",
|
|
provider: "hetzner",
|
|
slug: "brisk-mantis",
|
|
state: "active",
|
|
})}\n`,
|
|
stderr: "",
|
|
};
|
|
}
|
|
if (command === "/tmp/crabbox" && args[0] === "record") {
|
|
const outputPath = args[args.indexOf("--output") + 1];
|
|
const outputDir = args[args.indexOf("--output-dir") + 1];
|
|
stagedVideoPath = outputPath;
|
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
await fs.writeFile(outputPath, "mp4");
|
|
await fs.mkdir(outputDir, { recursive: true });
|
|
await fs.writeFile(path.join(outputDir, "visual-task.png"), "png");
|
|
await fs.writeFile(
|
|
path.join(outputDir, "mantis-visual-task-driver-result.json"),
|
|
`${JSON.stringify({
|
|
browserUrl: "https://example.net",
|
|
finishedAt: "2026-05-04T12:00:05.000Z",
|
|
matched: true,
|
|
outputDir,
|
|
screenshotPath: path.join(outputDir, "visual-task.png"),
|
|
startedAt: "2026-05-04T12:00:01.000Z",
|
|
status: "pass",
|
|
vision: {
|
|
mode: "metadata",
|
|
timeoutMs: 120000,
|
|
},
|
|
})}\n`,
|
|
);
|
|
throw new Error("crabbox record failed after writing video");
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualTask({
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
env: { PATH: process.env.PATH },
|
|
now: () => new Date("2026-05-04T12:00:00.000Z"),
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-task-recording-preserved",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "metadata",
|
|
});
|
|
|
|
expect(result.status).toBe("fail");
|
|
expect(result.videoPath).toBe(
|
|
path.join(
|
|
repoRoot,
|
|
".artifacts/qa-e2e/mantis/visual-task-recording-preserved/visual-task.mp4",
|
|
),
|
|
);
|
|
await expect(fs.readFile(result.videoPath ?? "", "utf8")).resolves.toBe("mp4");
|
|
await expect(fs.stat(stagedVideoPath)).rejects.toThrow();
|
|
const summary = JSON.parse(await fs.readFile(result.summaryPath, "utf8")) as {
|
|
artifacts?: { videoPath?: string };
|
|
error?: string;
|
|
recording?: { error?: string; required: boolean };
|
|
status: string;
|
|
};
|
|
expect(summary).toMatchObject({
|
|
artifacts: {
|
|
videoPath: result.videoPath,
|
|
},
|
|
error: "crabbox record failed after writing video",
|
|
recording: {
|
|
error: "crabbox record failed after writing video",
|
|
required: true,
|
|
},
|
|
status: "fail",
|
|
});
|
|
});
|
|
|
|
it("drives a lease, screenshots it, and verifies image-describe text", async () => {
|
|
const commands: { args: readonly string[]; command: string }[] = [];
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
commands.push({ command, args });
|
|
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
|
const outputPath = args[args.indexOf("--output") + 1];
|
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
await fs.writeFile(outputPath, "png");
|
|
}
|
|
if (command === "pnpm") {
|
|
return {
|
|
stdout: `\n> openclaw qa mantis visual-driver --vision-prompt '{"visible": boolean}'\n${JSON.stringify(
|
|
{
|
|
ok: true,
|
|
outputs: [
|
|
{
|
|
kind: "image.description",
|
|
text: JSON.stringify({
|
|
evidence: 'The page heading reads "Example Domain".',
|
|
reason: "The expected text is visible as the main heading.",
|
|
visible: true,
|
|
}),
|
|
},
|
|
],
|
|
},
|
|
)}\n`,
|
|
stderr: "",
|
|
};
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualDriver({
|
|
browserUrl: "https://example.net",
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
env: { PATH: process.env.PATH },
|
|
expectText: "Example Domain",
|
|
leaseId: "cbx_abc123",
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-test",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "image-describe",
|
|
visionModel: "openai/gpt-5.4",
|
|
visionPrompt: "Read the page title",
|
|
});
|
|
|
|
expect(result.status).toBe("pass");
|
|
expect(commands.map((entry) => [entry.command, entry.args[0], entry.args[1]])).toEqual([
|
|
["/tmp/crabbox", "desktop", "launch"],
|
|
["/tmp/crabbox", "screenshot", "--provider"],
|
|
["pnpm", "--dir", repoRoot],
|
|
]);
|
|
const launchArgs = commands.find((entry) => entry.args[0] === "desktop")?.args ?? [];
|
|
expect(launchArgs).toEqual(
|
|
expect.arrayContaining(["--", "sh", "-lc", expect.stringContaining("--no-first-run")]),
|
|
);
|
|
const visionArgs = commands.find((entry) => entry.command === "pnpm")?.args ?? [];
|
|
expect(visionArgs).toEqual(
|
|
expect.arrayContaining([
|
|
"infer",
|
|
"image",
|
|
"describe",
|
|
"--file",
|
|
path.join(repoRoot, ".artifacts/qa-e2e/mantis/visual-driver-test/visual-task.png"),
|
|
"--model",
|
|
"openai/gpt-5.4",
|
|
]),
|
|
);
|
|
expect(visionArgs).toEqual(
|
|
expect.arrayContaining(["--prompt", expect.stringContaining("return only valid JSON")]),
|
|
);
|
|
expect(result.vision.assertion).toMatchObject({
|
|
evidence: 'The page heading reads "Example Domain".',
|
|
matched: true,
|
|
visible: true,
|
|
});
|
|
});
|
|
|
|
it("fails image-describe text checks when the model gives negative evidence that quotes the target", async () => {
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
|
const outputPath = args[args.indexOf("--output") + 1];
|
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
await fs.writeFile(outputPath, "png");
|
|
}
|
|
if (command === "pnpm") {
|
|
return {
|
|
stdout: `${JSON.stringify({
|
|
ok: true,
|
|
outputs: [
|
|
{
|
|
kind: "image.description",
|
|
text: 'The screenshot does not contain "Example Domain".',
|
|
},
|
|
],
|
|
})}\n`,
|
|
stderr: "",
|
|
};
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualDriver({
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
expectText: "Example Domain",
|
|
leaseId: "cbx_abc123",
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-negative",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "image-describe",
|
|
});
|
|
|
|
expect(result).toMatchObject({
|
|
matched: false,
|
|
status: "fail",
|
|
vision: {
|
|
assertion: {
|
|
matched: false,
|
|
reason: "Image describe did not return a structured visual assertion.",
|
|
},
|
|
},
|
|
});
|
|
});
|
|
|
|
it("fails metadata mode when text evidence is requested", async () => {
|
|
const runner = vi.fn(async (command: string, args: readonly string[]) => {
|
|
if (command === "/tmp/crabbox" && args[0] === "screenshot") {
|
|
const outputPath = args[args.indexOf("--output") + 1];
|
|
await fs.mkdir(path.dirname(outputPath), { recursive: true });
|
|
await fs.writeFile(outputPath, "png");
|
|
}
|
|
return { stdout: "", stderr: "" };
|
|
});
|
|
|
|
const result = await runMantisVisualDriver({
|
|
commandRunner: runner,
|
|
crabboxBin: "/tmp/crabbox",
|
|
expectText: "Example Domain",
|
|
leaseId: "cbx_abc123",
|
|
outputDir: ".artifacts/qa-e2e/mantis/visual-driver-metadata",
|
|
repoRoot,
|
|
settleMs: 0,
|
|
visionMode: "metadata",
|
|
});
|
|
|
|
expect(result).toMatchObject({
|
|
matched: false,
|
|
status: "fail",
|
|
vision: {
|
|
mode: "metadata",
|
|
},
|
|
});
|
|
});
|
|
});
|