refactor: switch browser ownership to bundled plugin

This commit is contained in:
Peter Steinberger
2026-03-26 22:18:41 +00:00
parent 197510f693
commit 8eeb7f0829
255 changed files with 16981 additions and 21074 deletions

View File

@@ -0,0 +1,51 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { clearPluginLoaderCache } from "../plugins/loader.js";
import { clearPluginManifestRegistryCache } from "../plugins/manifest-registry.js";
import { resetPluginRuntimeStateForTest } from "../plugins/runtime.js";
import { createOpenClawTools } from "./openclaw-tools.js";
function resetPluginState() {
clearPluginLoaderCache();
clearPluginManifestRegistryCache();
resetPluginRuntimeStateForTest();
}
describe("createOpenClawTools browser plugin integration", () => {
beforeEach(() => {
resetPluginState();
});
afterEach(() => {
resetPluginState();
});
it("loads the bundled browser plugin through normal plugin resolution", () => {
const tools = createOpenClawTools({
config: {
plugins: {
allow: ["browser"],
},
} as OpenClawConfig,
});
expect(tools.map((tool) => tool.name)).toContain("browser");
});
it("omits the browser tool when the bundled browser plugin is disabled", () => {
const tools = createOpenClawTools({
config: {
plugins: {
allow: ["browser"],
entries: {
browser: {
enabled: false,
},
},
},
} as OpenClawConfig,
});
expect(tools.map((tool) => tool.name)).not.toContain("browser");
});
});

View File

@@ -9,15 +9,19 @@ const { resolvePluginToolsMock } = vi.hoisted(() => ({
vi.mock("../plugins/tools.js", () => ({
resolvePluginTools: resolvePluginToolsMock,
copyPluginToolMeta: vi.fn(),
getPluginToolMeta: vi.fn(() => undefined),
}));
import { createOpenClawTools } from "./openclaw-tools.js";
import { createOpenClawCodingTools } from "./pi-tools.js";
let createOpenClawTools: typeof import("./openclaw-tools.js").createOpenClawTools;
let createOpenClawCodingTools: typeof import("./pi-tools.js").createOpenClawCodingTools;
describe("createOpenClawTools plugin context", () => {
beforeEach(() => {
beforeEach(async () => {
resolvePluginToolsMock.mockClear();
vi.resetModules();
({ createOpenClawTools } = await import("./openclaw-tools.js"));
({ createOpenClawCodingTools } = await import("./pi-tools.js"));
});
it("forwards trusted requester sender identity to plugin tool context", () => {
@@ -54,6 +58,25 @@ describe("createOpenClawTools plugin context", () => {
);
});
it("forwards browser session wiring to plugin tool context", () => {
createOpenClawTools({
config: {} as never,
sandboxBrowserBridgeUrl: "http://127.0.0.1:9999",
allowHostBrowserControl: true,
});
expect(resolvePluginToolsMock).toHaveBeenCalledWith(
expect.objectContaining({
context: expect.objectContaining({
browser: {
sandboxBridgeUrl: "http://127.0.0.1:9999",
allowHostControl: true,
},
}),
}),
);
});
it("forwards gateway subagent binding for plugin tools", () => {
createOpenClawTools({
config: {} as never,

View File

@@ -8,7 +8,6 @@ import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
import type { SpawnedToolContext } from "./spawned-context.js";
import type { ToolFsPolicy } from "./tool-fs-policy.js";
import { createAgentsListTool } from "./tools/agents-list-tool.js";
import { createBrowserTool } from "./tools/browser-tool.js";
import { createCanvasTool } from "./tools/canvas-tool.js";
import type { AnyAgentTool } from "./tools/common.js";
import { createCronTool } from "./tools/cron-tool.js";
@@ -161,11 +160,6 @@ export function createOpenClawTools(
requesterSenderId: options?.requesterSenderId ?? undefined,
});
const tools: AnyAgentTool[] = [
createBrowserTool({
sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl,
allowHostControl: options?.allowHostBrowserControl,
agentSessionKey: options?.agentSessionKey,
}),
createCanvasTool({ config: options?.config }),
createNodesTool({
agentSessionKey: options?.agentSessionKey,
@@ -255,6 +249,10 @@ export function createOpenClawTools(
}),
sessionKey: options?.agentSessionKey,
sessionId: options?.sessionId,
browser: {
sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl,
allowHostControl: options?.allowHostBrowserControl,
},
messageChannel: options?.agentChannel,
agentAccountId: options?.agentAccountId,
requesterSenderId: options?.requesterSenderId ?? undefined,

View File

@@ -1,396 +1 @@
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { browserAct, browserConsoleMessages } from "../../browser/client-actions.js";
import { browserSnapshot, browserTabs } from "../../browser/client.js";
import { resolveBrowserConfig, resolveProfile } from "../../browser/config.js";
import { DEFAULT_AI_SNAPSHOT_MAX_CHARS } from "../../browser/constants.js";
import { getBrowserProfileCapabilities } from "../../browser/profile-capabilities.js";
import { loadConfig } from "../../config/config.js";
import { wrapExternalContent } from "../../security/external-content.js";
import { imageResultFromFile, jsonResult } from "./common.js";
const browserToolActionDeps = {
browserAct,
browserConsoleMessages,
browserSnapshot,
browserTabs,
imageResultFromFile,
loadConfig,
};
export const __testing = {
setDepsForTest(
overrides: Partial<{
browserAct: typeof browserAct;
browserConsoleMessages: typeof browserConsoleMessages;
browserSnapshot: typeof browserSnapshot;
browserTabs: typeof browserTabs;
imageResultFromFile: typeof imageResultFromFile;
loadConfig: typeof loadConfig;
}> | null,
) {
browserToolActionDeps.browserAct = overrides?.browserAct ?? browserAct;
browserToolActionDeps.browserConsoleMessages =
overrides?.browserConsoleMessages ?? browserConsoleMessages;
browserToolActionDeps.browserSnapshot = overrides?.browserSnapshot ?? browserSnapshot;
browserToolActionDeps.browserTabs = overrides?.browserTabs ?? browserTabs;
browserToolActionDeps.imageResultFromFile =
overrides?.imageResultFromFile ?? imageResultFromFile;
browserToolActionDeps.loadConfig = overrides?.loadConfig ?? loadConfig;
},
};
type BrowserProxyRequest = (opts: {
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}) => Promise<unknown>;
function wrapBrowserExternalJson(params: {
kind: "snapshot" | "console" | "tabs";
payload: unknown;
includeWarning?: boolean;
}): { wrappedText: string; safeDetails: Record<string, unknown> } {
const extractedText = JSON.stringify(params.payload, null, 2);
const wrappedText = wrapExternalContent(extractedText, {
source: "browser",
includeWarning: params.includeWarning ?? true,
});
return {
wrappedText,
safeDetails: {
ok: true,
externalContent: {
untrusted: true,
source: "browser",
kind: params.kind,
wrapped: true,
},
},
};
}
function formatTabsToolResult(tabs: unknown[]): AgentToolResult<unknown> {
const wrapped = wrapBrowserExternalJson({
kind: "tabs",
payload: { tabs },
includeWarning: false,
});
const content: AgentToolResult<unknown>["content"] = [
{ type: "text", text: wrapped.wrappedText },
];
return {
content,
details: { ...wrapped.safeDetails, tabCount: tabs.length },
};
}
function formatConsoleToolResult(result: {
targetId?: string;
messages?: unknown[];
}): AgentToolResult<unknown> {
const wrapped = wrapBrowserExternalJson({
kind: "console",
payload: result,
includeWarning: false,
});
return {
content: [{ type: "text" as const, text: wrapped.wrappedText }],
details: {
...wrapped.safeDetails,
targetId: typeof result.targetId === "string" ? result.targetId : undefined,
messageCount: Array.isArray(result.messages) ? result.messages.length : undefined,
},
};
}
function isChromeStaleTargetError(profile: string | undefined, err: unknown): boolean {
if (!profile) {
return false;
}
if (profile === "user") {
const msg = String(err);
return msg.includes("404:") && msg.includes("tab not found");
}
const cfg = browserToolActionDeps.loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const browserProfile = resolveProfile(resolved, profile);
if (!browserProfile || !getBrowserProfileCapabilities(browserProfile).usesChromeMcp) {
return false;
}
const msg = String(err);
return msg.includes("404:") && msg.includes("tab not found");
}
function stripTargetIdFromActRequest(
request: Parameters<typeof browserAct>[1],
): Parameters<typeof browserAct>[1] | null {
const targetId = typeof request.targetId === "string" ? request.targetId.trim() : undefined;
if (!targetId) {
return null;
}
const retryRequest = { ...request };
delete retryRequest.targetId;
return retryRequest as Parameters<typeof browserAct>[1];
}
function canRetryChromeActWithoutTargetId(request: Parameters<typeof browserAct>[1]): boolean {
const typedRequest = request as Partial<Record<"kind" | "action", unknown>>;
const kind =
typeof typedRequest.kind === "string"
? typedRequest.kind
: typeof typedRequest.action === "string"
? typedRequest.action
: "";
return kind === "hover" || kind === "scrollIntoView" || kind === "wait";
}
export async function executeTabsAction(params: {
baseUrl?: string;
profile?: string;
proxyRequest: BrowserProxyRequest | null;
}): Promise<AgentToolResult<unknown>> {
const { baseUrl, profile, proxyRequest } = params;
if (proxyRequest) {
const result = await proxyRequest({
method: "GET",
path: "/tabs",
profile,
});
const tabs = (result as { tabs?: unknown[] }).tabs ?? [];
return formatTabsToolResult(tabs);
}
const tabs = await browserToolActionDeps.browserTabs(baseUrl, { profile });
return formatTabsToolResult(tabs);
}
export async function executeSnapshotAction(params: {
input: Record<string, unknown>;
baseUrl?: string;
profile?: string;
proxyRequest: BrowserProxyRequest | null;
}): Promise<AgentToolResult<unknown>> {
const { input, baseUrl, profile, proxyRequest } = params;
const snapshotDefaults = browserToolActionDeps.loadConfig().browser?.snapshotDefaults;
const format: "ai" | "aria" | undefined =
input.snapshotFormat === "ai" || input.snapshotFormat === "aria"
? input.snapshotFormat
: undefined;
const mode: "efficient" | undefined =
input.mode === "efficient"
? "efficient"
: format !== "aria" && snapshotDefaults?.mode === "efficient"
? "efficient"
: undefined;
const labels = typeof input.labels === "boolean" ? input.labels : undefined;
const refs: "aria" | "role" | undefined =
input.refs === "aria" || input.refs === "role" ? input.refs : undefined;
const hasMaxChars = Object.hasOwn(input, "maxChars");
const targetId = typeof input.targetId === "string" ? input.targetId.trim() : undefined;
const limit =
typeof input.limit === "number" && Number.isFinite(input.limit) ? input.limit : undefined;
const maxChars =
typeof input.maxChars === "number" && Number.isFinite(input.maxChars) && input.maxChars > 0
? Math.floor(input.maxChars)
: undefined;
const interactive = typeof input.interactive === "boolean" ? input.interactive : undefined;
const compact = typeof input.compact === "boolean" ? input.compact : undefined;
const depth =
typeof input.depth === "number" && Number.isFinite(input.depth) ? input.depth : undefined;
const selector = typeof input.selector === "string" ? input.selector.trim() : undefined;
const frame = typeof input.frame === "string" ? input.frame.trim() : undefined;
const resolvedMaxChars =
format === "ai"
? hasMaxChars
? maxChars
: mode === "efficient"
? undefined
: DEFAULT_AI_SNAPSHOT_MAX_CHARS
: hasMaxChars
? maxChars
: undefined;
const snapshotQuery = {
...(format ? { format } : {}),
targetId,
limit,
...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}),
refs,
interactive,
compact,
depth,
selector,
frame,
labels,
mode,
};
const snapshot = proxyRequest
? ((await proxyRequest({
method: "GET",
path: "/snapshot",
profile,
query: snapshotQuery,
})) as Awaited<ReturnType<typeof browserSnapshot>>)
: await browserToolActionDeps.browserSnapshot(baseUrl, {
...snapshotQuery,
profile,
});
if (snapshot.format === "ai") {
const extractedText = snapshot.snapshot ?? "";
const wrappedSnapshot = wrapExternalContent(extractedText, {
source: "browser",
includeWarning: true,
});
const safeDetails = {
ok: true,
format: snapshot.format,
targetId: snapshot.targetId,
url: snapshot.url,
truncated: snapshot.truncated,
stats: snapshot.stats,
refs: snapshot.refs ? Object.keys(snapshot.refs).length : undefined,
labels: snapshot.labels,
labelsCount: snapshot.labelsCount,
labelsSkipped: snapshot.labelsSkipped,
imagePath: snapshot.imagePath,
imageType: snapshot.imageType,
externalContent: {
untrusted: true,
source: "browser",
kind: "snapshot",
format: "ai",
wrapped: true,
},
};
if (labels && snapshot.imagePath) {
return await browserToolActionDeps.imageResultFromFile({
label: "browser:snapshot",
path: snapshot.imagePath,
extraText: wrappedSnapshot,
details: safeDetails,
});
}
return {
content: [{ type: "text" as const, text: wrappedSnapshot }],
details: safeDetails,
};
}
{
const wrapped = wrapBrowserExternalJson({
kind: "snapshot",
payload: snapshot,
});
return {
content: [{ type: "text" as const, text: wrapped.wrappedText }],
details: {
...wrapped.safeDetails,
format: "aria",
targetId: snapshot.targetId,
url: snapshot.url,
nodeCount: snapshot.nodes.length,
externalContent: {
untrusted: true,
source: "browser",
kind: "snapshot",
format: "aria",
wrapped: true,
},
},
};
}
}
export async function executeConsoleAction(params: {
input: Record<string, unknown>;
baseUrl?: string;
profile?: string;
proxyRequest: BrowserProxyRequest | null;
}): Promise<AgentToolResult<unknown>> {
const { input, baseUrl, profile, proxyRequest } = params;
const level = typeof input.level === "string" ? input.level.trim() : undefined;
const targetId = typeof input.targetId === "string" ? input.targetId.trim() : undefined;
if (proxyRequest) {
const result = (await proxyRequest({
method: "GET",
path: "/console",
profile,
query: {
level,
targetId,
},
})) as { ok?: boolean; targetId?: string; messages?: unknown[] };
return formatConsoleToolResult(result);
}
const result = await browserToolActionDeps.browserConsoleMessages(baseUrl, {
level,
targetId,
profile,
});
return formatConsoleToolResult(result);
}
export async function executeActAction(params: {
request: Parameters<typeof browserAct>[1];
baseUrl?: string;
profile?: string;
proxyRequest: BrowserProxyRequest | null;
}): Promise<AgentToolResult<unknown>> {
const { request, baseUrl, profile, proxyRequest } = params;
try {
const result = proxyRequest
? await proxyRequest({
method: "POST",
path: "/act",
profile,
body: request,
})
: await browserToolActionDeps.browserAct(baseUrl, request, {
profile,
});
return jsonResult(result);
} catch (err) {
if (isChromeStaleTargetError(profile, err)) {
const retryRequest = stripTargetIdFromActRequest(request);
const tabs = proxyRequest
? ((
(await proxyRequest({
method: "GET",
path: "/tabs",
profile,
})) as { tabs?: unknown[] }
).tabs ?? [])
: await browserToolActionDeps.browserTabs(baseUrl, { profile }).catch(() => []);
// Some user-browser targetIds can go stale between snapshots and actions.
// Only retry safe read-only actions, and only when exactly one tab remains attached.
if (retryRequest && canRetryChromeActWithoutTargetId(request) && tabs.length === 1) {
try {
const retryResult = proxyRequest
? await proxyRequest({
method: "POST",
path: "/act",
profile,
body: retryRequest,
})
: await browserToolActionDeps.browserAct(baseUrl, retryRequest, {
profile,
});
return jsonResult(retryResult);
} catch {
// Fall through to explicit stale-target guidance.
}
}
if (!tabs.length) {
throw new Error(
`No browser tabs found for profile="${profile}". Make sure the configured Chromium-based browser (v144+) is running and has open tabs, then retry.`,
{ cause: err },
);
}
throw new Error(
`Chrome tab not found (stale targetId?). Run action=tabs profile="${profile}" and use one of the returned targetIds.`,
{ cause: err },
);
}
throw err;
}
}
export * from "../../../extensions/browser/src/browser-tool.actions.js";

View File

@@ -1,138 +1 @@
import { Type } from "@sinclair/typebox";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
const BROWSER_ACT_KINDS = [
"click",
"type",
"press",
"hover",
"drag",
"select",
"fill",
"resize",
"wait",
"evaluate",
"close",
] as const;
const BROWSER_TOOL_ACTIONS = [
"status",
"start",
"stop",
"profiles",
"tabs",
"open",
"focus",
"close",
"snapshot",
"screenshot",
"navigate",
"console",
"pdf",
"upload",
"dialog",
"act",
] as const;
const BROWSER_TARGETS = ["sandbox", "host", "node"] as const;
const BROWSER_SNAPSHOT_FORMATS = ["aria", "ai"] as const;
const BROWSER_SNAPSHOT_MODES = ["efficient"] as const;
const BROWSER_SNAPSHOT_REFS = ["role", "aria"] as const;
const BROWSER_IMAGE_TYPES = ["png", "jpeg"] as const;
// NOTE: Using a flattened object schema instead of Type.Union([Type.Object(...), ...])
// because Claude API on Vertex AI rejects nested anyOf schemas as invalid JSON Schema.
// The discriminator (kind) determines which properties are relevant; runtime validates.
const BrowserActSchema = Type.Object({
kind: stringEnum(BROWSER_ACT_KINDS),
// Common fields
targetId: Type.Optional(Type.String()),
ref: Type.Optional(Type.String()),
// click
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
// type
text: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
// press
key: Type.Optional(Type.String()),
delayMs: Type.Optional(Type.Number()),
// drag
startRef: Type.Optional(Type.String()),
endRef: Type.Optional(Type.String()),
// select
values: Type.Optional(Type.Array(Type.String())),
// fill - use permissive array of objects
fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
// resize
width: Type.Optional(Type.Number()),
height: Type.Optional(Type.Number()),
// wait
timeMs: Type.Optional(Type.Number()),
selector: Type.Optional(Type.String()),
url: Type.Optional(Type.String()),
loadState: Type.Optional(Type.String()),
textGone: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
// evaluate
fn: Type.Optional(Type.String()),
});
// IMPORTANT: OpenAI function tool schemas must have a top-level `type: "object"`.
// A root-level `Type.Union([...])` compiles to `{ anyOf: [...] }` (no `type`),
// which OpenAI rejects ("Invalid schema ... type: None"). Keep this schema an object.
export const BrowserToolSchema = Type.Object({
action: stringEnum(BROWSER_TOOL_ACTIONS),
target: optionalStringEnum(BROWSER_TARGETS),
node: Type.Optional(Type.String()),
profile: Type.Optional(Type.String()),
targetUrl: Type.Optional(Type.String()),
url: Type.Optional(Type.String()),
targetId: Type.Optional(Type.String()),
limit: Type.Optional(Type.Number()),
maxChars: Type.Optional(Type.Number()),
mode: optionalStringEnum(BROWSER_SNAPSHOT_MODES),
snapshotFormat: optionalStringEnum(BROWSER_SNAPSHOT_FORMATS),
refs: optionalStringEnum(BROWSER_SNAPSHOT_REFS),
interactive: Type.Optional(Type.Boolean()),
compact: Type.Optional(Type.Boolean()),
depth: Type.Optional(Type.Number()),
selector: Type.Optional(Type.String()),
frame: Type.Optional(Type.String()),
labels: Type.Optional(Type.Boolean()),
fullPage: Type.Optional(Type.Boolean()),
ref: Type.Optional(Type.String()),
element: Type.Optional(Type.String()),
type: optionalStringEnum(BROWSER_IMAGE_TYPES),
level: Type.Optional(Type.String()),
paths: Type.Optional(Type.Array(Type.String())),
inputRef: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
accept: Type.Optional(Type.Boolean()),
promptText: Type.Optional(Type.String()),
// Legacy flattened act params (preferred: request={...})
kind: Type.Optional(stringEnum(BROWSER_ACT_KINDS)),
doubleClick: Type.Optional(Type.Boolean()),
button: Type.Optional(Type.String()),
modifiers: Type.Optional(Type.Array(Type.String())),
text: Type.Optional(Type.String()),
submit: Type.Optional(Type.Boolean()),
slowly: Type.Optional(Type.Boolean()),
key: Type.Optional(Type.String()),
delayMs: Type.Optional(Type.Number()),
startRef: Type.Optional(Type.String()),
endRef: Type.Optional(Type.String()),
values: Type.Optional(Type.Array(Type.String())),
fields: Type.Optional(Type.Array(Type.Object({}, { additionalProperties: true }))),
width: Type.Optional(Type.Number()),
height: Type.Optional(Type.Number()),
timeMs: Type.Optional(Type.Number()),
textGone: Type.Optional(Type.String()),
loadState: Type.Optional(Type.String()),
fn: Type.Optional(Type.String()),
request: Type.Optional(BrowserActSchema),
});
export * from "../../../extensions/browser/src/browser-tool.schema.js";

View File

@@ -1,755 +1 @@
import crypto from "node:crypto";
import {
browserAct,
browserArmDialog,
browserArmFileChooser,
browserNavigate,
browserPdfSave,
browserScreenshotAction,
} from "../../browser/client-actions.js";
import {
browserCloseTab,
browserFocusTab,
browserOpenTab,
browserProfiles,
browserStart,
browserStatus,
browserStop,
} from "../../browser/client.js";
import { resolveBrowserConfig, resolveProfile } from "../../browser/config.js";
import { DEFAULT_UPLOAD_DIR, resolveExistingPathsWithinRoot } from "../../browser/paths.js";
import { getBrowserProfileCapabilities } from "../../browser/profile-capabilities.js";
import { applyBrowserProxyPaths, persistBrowserProxyFiles } from "../../browser/proxy-files.js";
import {
trackSessionBrowserTab,
untrackSessionBrowserTab,
} from "../../browser/session-tab-registry.js";
import { loadConfig } from "../../config/config.js";
import {
executeActAction,
executeConsoleAction,
executeSnapshotAction,
executeTabsAction,
} from "./browser-tool.actions.js";
import { BrowserToolSchema } from "./browser-tool.schema.js";
import { type AnyAgentTool, imageResultFromFile, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool } from "./gateway.js";
import {
listNodes,
resolveNodeIdFromList,
selectDefaultNodeFromList,
type NodeListNode,
} from "./nodes-utils.js";
const browserToolDeps = {
browserAct,
browserArmDialog,
browserArmFileChooser,
browserCloseTab,
browserFocusTab,
browserNavigate,
browserOpenTab,
browserPdfSave,
browserProfiles,
browserScreenshotAction,
browserStart,
browserStatus,
browserStop,
imageResultFromFile,
loadConfig,
listNodes,
callGatewayTool,
trackSessionBrowserTab,
untrackSessionBrowserTab,
};
export const __testing = {
setDepsForTest(
overrides: Partial<{
browserAct: typeof browserAct;
browserArmDialog: typeof browserArmDialog;
browserArmFileChooser: typeof browserArmFileChooser;
browserCloseTab: typeof browserCloseTab;
browserFocusTab: typeof browserFocusTab;
browserNavigate: typeof browserNavigate;
browserOpenTab: typeof browserOpenTab;
browserPdfSave: typeof browserPdfSave;
browserProfiles: typeof browserProfiles;
browserScreenshotAction: typeof browserScreenshotAction;
browserStart: typeof browserStart;
browserStatus: typeof browserStatus;
browserStop: typeof browserStop;
imageResultFromFile: typeof imageResultFromFile;
loadConfig: typeof loadConfig;
listNodes: typeof listNodes;
callGatewayTool: typeof callGatewayTool;
trackSessionBrowserTab: typeof trackSessionBrowserTab;
untrackSessionBrowserTab: typeof untrackSessionBrowserTab;
}> | null,
) {
browserToolDeps.browserAct = overrides?.browserAct ?? browserAct;
browserToolDeps.browserArmDialog = overrides?.browserArmDialog ?? browserArmDialog;
browserToolDeps.browserArmFileChooser =
overrides?.browserArmFileChooser ?? browserArmFileChooser;
browserToolDeps.browserCloseTab = overrides?.browserCloseTab ?? browserCloseTab;
browserToolDeps.browserFocusTab = overrides?.browserFocusTab ?? browserFocusTab;
browserToolDeps.browserNavigate = overrides?.browserNavigate ?? browserNavigate;
browserToolDeps.browserOpenTab = overrides?.browserOpenTab ?? browserOpenTab;
browserToolDeps.browserPdfSave = overrides?.browserPdfSave ?? browserPdfSave;
browserToolDeps.browserProfiles = overrides?.browserProfiles ?? browserProfiles;
browserToolDeps.browserScreenshotAction =
overrides?.browserScreenshotAction ?? browserScreenshotAction;
browserToolDeps.browserStart = overrides?.browserStart ?? browserStart;
browserToolDeps.browserStatus = overrides?.browserStatus ?? browserStatus;
browserToolDeps.browserStop = overrides?.browserStop ?? browserStop;
browserToolDeps.imageResultFromFile = overrides?.imageResultFromFile ?? imageResultFromFile;
browserToolDeps.loadConfig = overrides?.loadConfig ?? loadConfig;
browserToolDeps.listNodes = overrides?.listNodes ?? listNodes;
browserToolDeps.callGatewayTool = overrides?.callGatewayTool ?? callGatewayTool;
browserToolDeps.trackSessionBrowserTab =
overrides?.trackSessionBrowserTab ?? trackSessionBrowserTab;
browserToolDeps.untrackSessionBrowserTab =
overrides?.untrackSessionBrowserTab ?? untrackSessionBrowserTab;
},
};
function readOptionalTargetAndTimeout(params: Record<string, unknown>) {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const timeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? params.timeoutMs
: undefined;
return { targetId, timeoutMs };
}
function readTargetUrlParam(params: Record<string, unknown>) {
return (
readStringParam(params, "targetUrl") ??
readStringParam(params, "url", { required: true, label: "targetUrl" })
);
}
const LEGACY_BROWSER_ACT_REQUEST_KEYS = [
"targetId",
"ref",
"doubleClick",
"button",
"modifiers",
"text",
"submit",
"slowly",
"key",
"delayMs",
"startRef",
"endRef",
"values",
"fields",
"width",
"height",
"timeMs",
"textGone",
"selector",
"url",
"loadState",
"fn",
"timeoutMs",
] as const;
function readActRequestParam(params: Record<string, unknown>) {
const requestParam = params.request;
if (requestParam && typeof requestParam === "object") {
return requestParam as Parameters<typeof browserAct>[1];
}
const kind = readStringParam(params, "kind");
if (!kind) {
return undefined;
}
const request: Record<string, unknown> = { kind };
for (const key of LEGACY_BROWSER_ACT_REQUEST_KEYS) {
if (!Object.hasOwn(params, key)) {
continue;
}
request[key] = params[key];
}
return request as Parameters<typeof browserAct>[1];
}
type BrowserProxyFile = {
path: string;
base64: string;
mimeType?: string;
};
type BrowserProxyResult = {
result: unknown;
files?: BrowserProxyFile[];
};
const DEFAULT_BROWSER_PROXY_TIMEOUT_MS = 20_000;
const BROWSER_PROXY_GATEWAY_TIMEOUT_SLACK_MS = 5_000;
type BrowserNodeTarget = {
nodeId: string;
label?: string;
};
function isBrowserNode(node: NodeListNode) {
const caps = Array.isArray(node.caps) ? node.caps : [];
const commands = Array.isArray(node.commands) ? node.commands : [];
return caps.includes("browser") || commands.includes("browser.proxy");
}
async function resolveBrowserNodeTarget(params: {
requestedNode?: string;
target?: "sandbox" | "host" | "node";
sandboxBridgeUrl?: string;
}): Promise<BrowserNodeTarget | null> {
const cfg = browserToolDeps.loadConfig();
const policy = cfg.gateway?.nodes?.browser;
const mode = policy?.mode ?? "auto";
if (mode === "off") {
if (params.target === "node" || params.requestedNode) {
throw new Error("Node browser proxy is disabled (gateway.nodes.browser.mode=off).");
}
return null;
}
if (params.sandboxBridgeUrl?.trim() && params.target !== "node" && !params.requestedNode) {
return null;
}
if (params.target && params.target !== "node") {
return null;
}
if (mode === "manual" && params.target !== "node" && !params.requestedNode) {
return null;
}
const nodes = await browserToolDeps.listNodes({});
const browserNodes = nodes.filter((node) => node.connected && isBrowserNode(node));
if (browserNodes.length === 0) {
if (params.target === "node" || params.requestedNode) {
throw new Error("No connected browser-capable nodes.");
}
return null;
}
const requested = params.requestedNode?.trim() || policy?.node?.trim();
if (requested) {
const nodeId = resolveNodeIdFromList(browserNodes, requested, false);
const node = browserNodes.find((entry) => entry.nodeId === nodeId);
return { nodeId, label: node?.displayName ?? node?.remoteIp ?? nodeId };
}
const selected = selectDefaultNodeFromList(browserNodes, {
preferLocalMac: false,
fallback: "none",
});
if (params.target === "node") {
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
throw new Error(
`Multiple browser-capable nodes connected (${browserNodes.length}). Set gateway.nodes.browser.node or pass node=<id>.`,
);
}
if (mode === "manual") {
return null;
}
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
return null;
}
async function callBrowserProxy(params: {
nodeId: string;
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}): Promise<BrowserProxyResult> {
const proxyTimeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? Math.max(1, Math.floor(params.timeoutMs))
: DEFAULT_BROWSER_PROXY_TIMEOUT_MS;
const gatewayTimeoutMs = proxyTimeoutMs + BROWSER_PROXY_GATEWAY_TIMEOUT_SLACK_MS;
const payload = await browserToolDeps.callGatewayTool<{ payloadJSON?: string; payload?: string }>(
"node.invoke",
{ timeoutMs: gatewayTimeoutMs },
{
nodeId: params.nodeId,
command: "browser.proxy",
params: {
method: params.method,
path: params.path,
query: params.query,
body: params.body,
timeoutMs: proxyTimeoutMs,
profile: params.profile,
},
idempotencyKey: crypto.randomUUID(),
},
);
const parsed =
payload?.payload ??
(typeof payload?.payloadJSON === "string" && payload.payloadJSON
? (JSON.parse(payload.payloadJSON) as BrowserProxyResult)
: null);
if (!parsed || typeof parsed !== "object" || !("result" in parsed)) {
throw new Error("browser proxy failed");
}
return parsed;
}
async function persistProxyFiles(files: BrowserProxyFile[] | undefined) {
return await persistBrowserProxyFiles(files);
}
function applyProxyPaths(result: unknown, mapping: Map<string, string>) {
applyBrowserProxyPaths(result, mapping);
}
function resolveBrowserBaseUrl(params: {
target?: "sandbox" | "host";
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
}): string | undefined {
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const normalizedSandbox = params.sandboxBridgeUrl?.trim() ?? "";
const target = params.target ?? (normalizedSandbox ? "sandbox" : "host");
if (target === "sandbox") {
if (!normalizedSandbox) {
throw new Error(
'Sandbox browser is unavailable. Enable agents.defaults.sandbox.browser.enabled or use target="host" if allowed.',
);
}
return normalizedSandbox.replace(/\/$/, "");
}
if (params.allowHostControl === false) {
throw new Error("Host browser control is disabled by sandbox policy.");
}
if (!resolved.enabled) {
throw new Error(
"Browser control is disabled. Set browser.enabled=true in ~/.openclaw/openclaw.json.",
);
}
return undefined;
}
function shouldPreferHostForProfile(profileName: string | undefined) {
if (!profileName) {
return false;
}
const cfg = browserToolDeps.loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const profile = resolveProfile(resolved, profileName);
if (!profile) {
return false;
}
const capabilities = getBrowserProfileCapabilities(profile);
return capabilities.usesChromeMcp;
}
export function createBrowserTool(opts?: {
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
agentSessionKey?: string;
}): AnyAgentTool {
const targetDefault = opts?.sandboxBridgeUrl ? "sandbox" : "host";
const hostHint =
opts?.allowHostControl === false ? "Host target blocked by policy." : "Host target allowed.";
return {
label: "Browser",
name: "browser",
description: [
"Control the browser via OpenClaw's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).",
"Browser choice: omit profile by default for the isolated OpenClaw-managed browser (`openclaw`).",
'For the logged-in user browser on the local host, use profile="user". A supported Chromium-based browser (v144+) must be running. Use only when existing logins/cookies matter and the user is present.',
'When a node-hosted browser proxy is available, the tool may auto-route to it. Pin a node with node=<id|name> or target="node".',
"When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
'For stable, self-resolving refs across calls, use snapshot with refs="aria" (Playwright aria-ref ids). Default refs="role" are role+name-based.',
"Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.",
`target selects browser location (sandbox|host|node). Default: ${targetDefault}.`,
hostHint,
].join(" "),
parameters: BrowserToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = readStringParam(params, "action", { required: true });
const profile = readStringParam(params, "profile");
const requestedNode = readStringParam(params, "node");
let target = readStringParam(params, "target") as "sandbox" | "host" | "node" | undefined;
if (requestedNode && target && target !== "node") {
throw new Error('node is only supported with target="node".');
}
// User-browser profiles (existing-session) are host-only.
const isUserBrowserProfile = shouldPreferHostForProfile(profile);
if (isUserBrowserProfile) {
if (requestedNode || target === "node") {
throw new Error(`profile="${profile}" only supports the local host browser.`);
}
if (target === "sandbox") {
throw new Error(
`profile="${profile}" cannot use the sandbox browser; use target="host" or omit target.`,
);
}
if (!target && !requestedNode) {
target = "host";
}
}
const nodeTarget = await resolveBrowserNodeTarget({
requestedNode: requestedNode ?? undefined,
target,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
});
const resolvedTarget = target === "node" ? undefined : target;
const baseUrl = nodeTarget
? undefined
: resolveBrowserBaseUrl({
target: resolvedTarget,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
allowHostControl: opts?.allowHostControl,
});
const proxyRequest = nodeTarget
? async (opts: {
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}) => {
const proxy = await callBrowserProxy({
nodeId: nodeTarget.nodeId,
method: opts.method,
path: opts.path,
query: opts.query,
body: opts.body,
timeoutMs: opts.timeoutMs,
profile: opts.profile,
});
const mapping = await persistProxyFiles(proxy.files);
applyProxyPaths(proxy.result, mapping);
return proxy.result;
}
: null;
switch (action) {
case "status":
if (proxyRequest) {
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
return jsonResult(await browserToolDeps.browserStatus(baseUrl, { profile }));
case "start":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/start",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserToolDeps.browserStart(baseUrl, { profile });
return jsonResult(await browserToolDeps.browserStatus(baseUrl, { profile }));
case "stop":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/stop",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserToolDeps.browserStop(baseUrl, { profile });
return jsonResult(await browserToolDeps.browserStatus(baseUrl, { profile }));
case "profiles":
if (proxyRequest) {
const result = await proxyRequest({
method: "GET",
path: "/profiles",
});
return jsonResult(result);
}
return jsonResult({ profiles: await browserToolDeps.browserProfiles(baseUrl) });
case "tabs":
return await executeTabsAction({ baseUrl, profile, proxyRequest });
case "open": {
const targetUrl = readTargetUrlParam(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/open",
profile,
body: { url: targetUrl },
});
return jsonResult(result);
}
const opened = await browserToolDeps.browserOpenTab(baseUrl, targetUrl, { profile });
browserToolDeps.trackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId: opened.targetId,
baseUrl,
profile,
});
return jsonResult(opened);
}
case "focus": {
const targetId = readStringParam(params, "targetId", {
required: true,
});
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/focus",
profile,
body: { targetId },
});
return jsonResult(result);
}
await browserToolDeps.browserFocusTab(baseUrl, targetId, { profile });
return jsonResult({ ok: true });
}
case "close": {
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = targetId
? await proxyRequest({
method: "DELETE",
path: `/tabs/${encodeURIComponent(targetId)}`,
profile,
})
: await proxyRequest({
method: "POST",
path: "/act",
profile,
body: { kind: "close" },
});
return jsonResult(result);
}
if (targetId) {
await browserToolDeps.browserCloseTab(baseUrl, targetId, { profile });
browserToolDeps.untrackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId,
baseUrl,
profile,
});
} else {
await browserToolDeps.browserAct(baseUrl, { kind: "close" }, { profile });
}
return jsonResult({ ok: true });
}
case "snapshot":
return await executeSnapshotAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "screenshot": {
const targetId = readStringParam(params, "targetId");
const fullPage = Boolean(params.fullPage);
const ref = readStringParam(params, "ref");
const element = readStringParam(params, "element");
const type = params.type === "jpeg" ? "jpeg" : "png";
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/screenshot",
profile,
body: {
targetId,
fullPage,
ref,
element,
type,
},
})) as Awaited<ReturnType<typeof browserScreenshotAction>>)
: await browserToolDeps.browserScreenshotAction(baseUrl, {
targetId,
fullPage,
ref,
element,
type,
profile,
});
return await browserToolDeps.imageResultFromFile({
label: "browser:screenshot",
path: result.path,
details: result,
});
}
case "navigate": {
const targetUrl = readTargetUrlParam(params);
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/navigate",
profile,
body: {
url: targetUrl,
targetId,
},
});
return jsonResult(result);
}
return jsonResult(
await browserToolDeps.browserNavigate(baseUrl, {
url: targetUrl,
targetId,
profile,
}),
);
}
case "console":
return await executeConsoleAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "pdf": {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/pdf",
profile,
body: { targetId },
})) as Awaited<ReturnType<typeof browserPdfSave>>)
: await browserToolDeps.browserPdfSave(baseUrl, { targetId, profile });
return {
content: [{ type: "text" as const, text: `FILE:${result.path}` }],
details: result,
};
}
case "upload": {
const paths = Array.isArray(params.paths) ? params.paths.map((p) => String(p)) : [];
if (paths.length === 0) {
throw new Error("paths required");
}
const uploadPathsResult = await resolveExistingPathsWithinRoot({
rootDir: DEFAULT_UPLOAD_DIR,
requestedPaths: paths,
scopeLabel: `uploads directory (${DEFAULT_UPLOAD_DIR})`,
});
if (!uploadPathsResult.ok) {
throw new Error(uploadPathsResult.error);
}
const normalizedPaths = uploadPathsResult.paths;
const ref = readStringParam(params, "ref");
const inputRef = readStringParam(params, "inputRef");
const element = readStringParam(params, "element");
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/file-chooser",
profile,
body: {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserToolDeps.browserArmFileChooser(baseUrl, {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
profile,
}),
);
}
case "dialog": {
const accept = Boolean(params.accept);
const promptText = typeof params.promptText === "string" ? params.promptText : undefined;
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/dialog",
profile,
body: {
accept,
promptText,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserToolDeps.browserArmDialog(baseUrl, {
accept,
promptText,
targetId,
timeoutMs,
profile,
}),
);
}
case "act": {
const request = readActRequestParam(params);
if (!request) {
throw new Error("request required");
}
return await executeActAction({
request,
baseUrl,
profile,
proxyRequest,
});
}
default:
throw new Error(`Unknown action: ${action}`);
}
},
};
}
export * from "../../../extensions/browser/src/browser-tool.js";