Files
openclaw/src/agents/tools/browser-tool.ts

684 lines
22 KiB
TypeScript

import crypto from "node:crypto";
import {
browserAct,
browserArmDialog,
browserArmFileChooser,
browserNavigate,
browserPdfSave,
browserScreenshotAction,
} from "../../browser/client-actions.js";
import {
browserCloseTab,
browserFocusTab,
browserOpenTab,
browserProfiles,
browserStart,
browserStatus,
browserStop,
} from "../../browser/client.js";
import { resolveBrowserConfig, resolveProfile } from "../../browser/config.js";
import { DEFAULT_UPLOAD_DIR, resolveExistingPathsWithinRoot } from "../../browser/paths.js";
import { getBrowserProfileCapabilities } from "../../browser/profile-capabilities.js";
import { applyBrowserProxyPaths, persistBrowserProxyFiles } from "../../browser/proxy-files.js";
import {
trackSessionBrowserTab,
untrackSessionBrowserTab,
} from "../../browser/session-tab-registry.js";
import { loadConfig } from "../../config/config.js";
import {
executeActAction,
executeConsoleAction,
executeSnapshotAction,
executeTabsAction,
} from "./browser-tool.actions.js";
import { BrowserToolSchema } from "./browser-tool.schema.js";
import { type AnyAgentTool, imageResultFromFile, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool } from "./gateway.js";
import {
listNodes,
resolveNodeIdFromList,
selectDefaultNodeFromList,
type NodeListNode,
} from "./nodes-utils.js";
function readOptionalTargetAndTimeout(params: Record<string, unknown>) {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const timeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? params.timeoutMs
: undefined;
return { targetId, timeoutMs };
}
function readTargetUrlParam(params: Record<string, unknown>) {
return (
readStringParam(params, "targetUrl") ??
readStringParam(params, "url", { required: true, label: "targetUrl" })
);
}
const LEGACY_BROWSER_ACT_REQUEST_KEYS = [
"targetId",
"ref",
"doubleClick",
"button",
"modifiers",
"text",
"submit",
"slowly",
"key",
"delayMs",
"startRef",
"endRef",
"values",
"fields",
"width",
"height",
"timeMs",
"textGone",
"selector",
"url",
"loadState",
"fn",
"timeoutMs",
] as const;
function readActRequestParam(params: Record<string, unknown>) {
const requestParam = params.request;
if (requestParam && typeof requestParam === "object") {
return requestParam as Parameters<typeof browserAct>[1];
}
const kind = readStringParam(params, "kind");
if (!kind) {
return undefined;
}
const request: Record<string, unknown> = { kind };
for (const key of LEGACY_BROWSER_ACT_REQUEST_KEYS) {
if (!Object.hasOwn(params, key)) {
continue;
}
request[key] = params[key];
}
return request as Parameters<typeof browserAct>[1];
}
type BrowserProxyFile = {
path: string;
base64: string;
mimeType?: string;
};
type BrowserProxyResult = {
result: unknown;
files?: BrowserProxyFile[];
};
const DEFAULT_BROWSER_PROXY_TIMEOUT_MS = 20_000;
const BROWSER_PROXY_GATEWAY_TIMEOUT_SLACK_MS = 5_000;
type BrowserNodeTarget = {
nodeId: string;
label?: string;
};
function isBrowserNode(node: NodeListNode) {
const caps = Array.isArray(node.caps) ? node.caps : [];
const commands = Array.isArray(node.commands) ? node.commands : [];
return caps.includes("browser") || commands.includes("browser.proxy");
}
async function resolveBrowserNodeTarget(params: {
requestedNode?: string;
target?: "sandbox" | "host" | "node";
sandboxBridgeUrl?: string;
}): Promise<BrowserNodeTarget | null> {
const cfg = loadConfig();
const policy = cfg.gateway?.nodes?.browser;
const mode = policy?.mode ?? "auto";
if (mode === "off") {
if (params.target === "node" || params.requestedNode) {
throw new Error("Node browser proxy is disabled (gateway.nodes.browser.mode=off).");
}
return null;
}
if (params.sandboxBridgeUrl?.trim() && params.target !== "node" && !params.requestedNode) {
return null;
}
if (params.target && params.target !== "node") {
return null;
}
if (mode === "manual" && params.target !== "node" && !params.requestedNode) {
return null;
}
const nodes = await listNodes({});
const browserNodes = nodes.filter((node) => node.connected && isBrowserNode(node));
if (browserNodes.length === 0) {
if (params.target === "node" || params.requestedNode) {
throw new Error("No connected browser-capable nodes.");
}
return null;
}
const requested = params.requestedNode?.trim() || policy?.node?.trim();
if (requested) {
const nodeId = resolveNodeIdFromList(browserNodes, requested, false);
const node = browserNodes.find((entry) => entry.nodeId === nodeId);
return { nodeId, label: node?.displayName ?? node?.remoteIp ?? nodeId };
}
const selected = selectDefaultNodeFromList(browserNodes, {
preferLocalMac: false,
fallback: "none",
});
if (params.target === "node") {
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
throw new Error(
`Multiple browser-capable nodes connected (${browserNodes.length}). Set gateway.nodes.browser.node or pass node=<id>.`,
);
}
if (mode === "manual") {
return null;
}
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
return null;
}
async function callBrowserProxy(params: {
nodeId: string;
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}): Promise<BrowserProxyResult> {
const proxyTimeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? Math.max(1, Math.floor(params.timeoutMs))
: DEFAULT_BROWSER_PROXY_TIMEOUT_MS;
const gatewayTimeoutMs = proxyTimeoutMs + BROWSER_PROXY_GATEWAY_TIMEOUT_SLACK_MS;
const payload = await callGatewayTool<{ payloadJSON?: string; payload?: string }>(
"node.invoke",
{ timeoutMs: gatewayTimeoutMs },
{
nodeId: params.nodeId,
command: "browser.proxy",
params: {
method: params.method,
path: params.path,
query: params.query,
body: params.body,
timeoutMs: proxyTimeoutMs,
profile: params.profile,
},
idempotencyKey: crypto.randomUUID(),
},
);
const parsed =
payload?.payload ??
(typeof payload?.payloadJSON === "string" && payload.payloadJSON
? (JSON.parse(payload.payloadJSON) as BrowserProxyResult)
: null);
if (!parsed || typeof parsed !== "object" || !("result" in parsed)) {
throw new Error("browser proxy failed");
}
return parsed;
}
async function persistProxyFiles(files: BrowserProxyFile[] | undefined) {
return await persistBrowserProxyFiles(files);
}
function applyProxyPaths(result: unknown, mapping: Map<string, string>) {
applyBrowserProxyPaths(result, mapping);
}
function resolveBrowserBaseUrl(params: {
target?: "sandbox" | "host";
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
}): string | undefined {
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const normalizedSandbox = params.sandboxBridgeUrl?.trim() ?? "";
const target = params.target ?? (normalizedSandbox ? "sandbox" : "host");
if (target === "sandbox") {
if (!normalizedSandbox) {
throw new Error(
'Sandbox browser is unavailable. Enable agents.defaults.sandbox.browser.enabled or use target="host" if allowed.',
);
}
return normalizedSandbox.replace(/\/$/, "");
}
if (params.allowHostControl === false) {
throw new Error("Host browser control is disabled by sandbox policy.");
}
if (!resolved.enabled) {
throw new Error(
"Browser control is disabled. Set browser.enabled=true in ~/.openclaw/openclaw.json.",
);
}
return undefined;
}
function shouldPreferHostForProfile(profileName: string | undefined) {
if (!profileName) {
return false;
}
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const profile = resolveProfile(resolved, profileName);
if (!profile) {
return false;
}
const capabilities = getBrowserProfileCapabilities(profile);
return capabilities.requiresRelay || capabilities.usesChromeMcp;
}
export function createBrowserTool(opts?: {
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
agentSessionKey?: string;
}): AnyAgentTool {
const targetDefault = opts?.sandboxBridgeUrl ? "sandbox" : "host";
const hostHint =
opts?.allowHostControl === false ? "Host target blocked by policy." : "Host target allowed.";
return {
label: "Browser",
name: "browser",
description: [
"Control the browser via OpenClaw's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).",
"Browser choice: omit profile by default for the isolated OpenClaw-managed browser (`openclaw`).",
'For the logged-in user browser on the local host, use profile="user". Chrome must be running with remote debugging enabled (chrome://inspect/#remote-debugging). The user must approve the browser attach prompt. Use only when existing logins/cookies matter and the user is present.',
'When a node-hosted browser proxy is available, the tool may auto-route to it. Pin a node with node=<id|name> or target="node".',
"When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
'For stable, self-resolving refs across calls, use snapshot with refs="aria" (Playwright aria-ref ids). Default refs="role" are role+name-based.',
"Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.",
`target selects browser location (sandbox|host|node). Default: ${targetDefault}.`,
hostHint,
].join(" "),
parameters: BrowserToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = readStringParam(params, "action", { required: true });
const profile = readStringParam(params, "profile");
const requestedNode = readStringParam(params, "node");
let target = readStringParam(params, "target") as "sandbox" | "host" | "node" | undefined;
if (requestedNode && target && target !== "node") {
throw new Error('node is only supported with target="node".');
}
// User-browser profiles (existing-session, extension relay) are host-only.
const isUserBrowserProfile = shouldPreferHostForProfile(profile);
if (isUserBrowserProfile) {
if (requestedNode || target === "node") {
throw new Error(`profile="${profile}" only supports the local host browser.`);
}
if (target === "sandbox") {
throw new Error(
`profile="${profile}" cannot use the sandbox browser; use target="host" or omit target.`,
);
}
if (!target && !requestedNode) {
target = "host";
}
}
const nodeTarget = await resolveBrowserNodeTarget({
requestedNode: requestedNode ?? undefined,
target,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
});
const resolvedTarget = target === "node" ? undefined : target;
const baseUrl = nodeTarget
? undefined
: resolveBrowserBaseUrl({
target: resolvedTarget,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
allowHostControl: opts?.allowHostControl,
});
const proxyRequest = nodeTarget
? async (opts: {
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}) => {
const proxy = await callBrowserProxy({
nodeId: nodeTarget.nodeId,
method: opts.method,
path: opts.path,
query: opts.query,
body: opts.body,
timeoutMs: opts.timeoutMs,
profile: opts.profile,
});
const mapping = await persistProxyFiles(proxy.files);
applyProxyPaths(proxy.result, mapping);
return proxy.result;
}
: null;
switch (action) {
case "status":
if (proxyRequest) {
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
return jsonResult(await browserStatus(baseUrl, { profile }));
case "start":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/start",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserStart(baseUrl, { profile });
return jsonResult(await browserStatus(baseUrl, { profile }));
case "stop":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/stop",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserStop(baseUrl, { profile });
return jsonResult(await browserStatus(baseUrl, { profile }));
case "profiles":
if (proxyRequest) {
const result = await proxyRequest({
method: "GET",
path: "/profiles",
});
return jsonResult(result);
}
return jsonResult({ profiles: await browserProfiles(baseUrl) });
case "tabs":
return await executeTabsAction({ baseUrl, profile, proxyRequest });
case "open": {
const targetUrl = readTargetUrlParam(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/open",
profile,
body: { url: targetUrl },
});
return jsonResult(result);
}
const opened = await browserOpenTab(baseUrl, targetUrl, { profile });
trackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId: opened.targetId,
baseUrl,
profile,
});
return jsonResult(opened);
}
case "focus": {
const targetId = readStringParam(params, "targetId", {
required: true,
});
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/focus",
profile,
body: { targetId },
});
return jsonResult(result);
}
await browserFocusTab(baseUrl, targetId, { profile });
return jsonResult({ ok: true });
}
case "close": {
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = targetId
? await proxyRequest({
method: "DELETE",
path: `/tabs/${encodeURIComponent(targetId)}`,
profile,
})
: await proxyRequest({
method: "POST",
path: "/act",
profile,
body: { kind: "close" },
});
return jsonResult(result);
}
if (targetId) {
await browserCloseTab(baseUrl, targetId, { profile });
untrackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId,
baseUrl,
profile,
});
} else {
await browserAct(baseUrl, { kind: "close" }, { profile });
}
return jsonResult({ ok: true });
}
case "snapshot":
return await executeSnapshotAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "screenshot": {
const targetId = readStringParam(params, "targetId");
const fullPage = Boolean(params.fullPage);
const ref = readStringParam(params, "ref");
const element = readStringParam(params, "element");
const type = params.type === "jpeg" ? "jpeg" : "png";
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/screenshot",
profile,
body: {
targetId,
fullPage,
ref,
element,
type,
},
})) as Awaited<ReturnType<typeof browserScreenshotAction>>)
: await browserScreenshotAction(baseUrl, {
targetId,
fullPage,
ref,
element,
type,
profile,
});
return await imageResultFromFile({
label: "browser:screenshot",
path: result.path,
details: result,
});
}
case "navigate": {
const targetUrl = readTargetUrlParam(params);
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/navigate",
profile,
body: {
url: targetUrl,
targetId,
},
});
return jsonResult(result);
}
return jsonResult(
await browserNavigate(baseUrl, {
url: targetUrl,
targetId,
profile,
}),
);
}
case "console":
return await executeConsoleAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "pdf": {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/pdf",
profile,
body: { targetId },
})) as Awaited<ReturnType<typeof browserPdfSave>>)
: await browserPdfSave(baseUrl, { targetId, profile });
return {
content: [{ type: "text" as const, text: `FILE:${result.path}` }],
details: result,
};
}
case "upload": {
const paths = Array.isArray(params.paths) ? params.paths.map((p) => String(p)) : [];
if (paths.length === 0) {
throw new Error("paths required");
}
const uploadPathsResult = await resolveExistingPathsWithinRoot({
rootDir: DEFAULT_UPLOAD_DIR,
requestedPaths: paths,
scopeLabel: `uploads directory (${DEFAULT_UPLOAD_DIR})`,
});
if (!uploadPathsResult.ok) {
throw new Error(uploadPathsResult.error);
}
const normalizedPaths = uploadPathsResult.paths;
const ref = readStringParam(params, "ref");
const inputRef = readStringParam(params, "inputRef");
const element = readStringParam(params, "element");
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/file-chooser",
profile,
body: {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserArmFileChooser(baseUrl, {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
profile,
}),
);
}
case "dialog": {
const accept = Boolean(params.accept);
const promptText = typeof params.promptText === "string" ? params.promptText : undefined;
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/dialog",
profile,
body: {
accept,
promptText,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserArmDialog(baseUrl, {
accept,
promptText,
targetId,
timeoutMs,
profile,
}),
);
}
case "act": {
const request = readActRequestParam(params);
if (!request) {
throw new Error("request required");
}
return await executeActAction({
request,
baseUrl,
profile,
proxyRequest,
});
}
default:
throw new Error(`Unknown action: ${action}`);
}
},
};
}