Files
openclaw/src/browser/client-actions-core.ts
Vincent Koc f59b2b1db3 fix(browser): normalize batch act dispatch for selector and batch support (#45457)
* feat(browser): add batch actions, CSS selector support, and click delayMs

Adds three improvements to the browser act tool:

1. CSS selector support: All element-targeting actions (click, type,
   hover, drag, scrollIntoView, select) now accept an optional
   'selector' parameter alongside 'ref'. When selector is provided,
   Playwright's page.locator() is used directly, skipping the need
   for a snapshot to obtain refs. This reduces roundtrips for agents
   that already know the DOM structure.

2. Click delay (delayMs): The click action now accepts an optional
   'delayMs' parameter. When set, the element is hovered first, then
   after the specified delay, clicked. This enables human-like
   hover-before-click in a single tool call instead of three
   (hover + wait + click).

3. Batch actions: New 'batch' action kind that accepts an array of
   actions to execute sequentially in a single tool call. Supports
   'stopOnError' (default true) to control whether execution halts
   on first failure. Results are returned as an array. This eliminates
   the AI inference roundtrip between each action, dramatically
   reducing latency and token cost for multi-step flows.

Addresses: #44431, #38844

* fix(browser): address security review — batch evaluateEnabled guard, input validation, recursion limit

Fixes all 4 issues raised by Greptile review:

1. Security: batch actions now respect evaluateEnabled flag.
   executeSingleAction and batchViaPlaywright accept evaluateEnabled
   param. evaluate and wait-with-fn inside batches are rejected
   when evaluateEnabled=false, matching the direct route guards.

2. Security: batch input validation. Each action in body.actions
   is validated as a plain object with a known kind string before
   dispatch. Applies same normalization as direct action handlers.

3. Perf: SELECTOR_ALLOWED_KINDS moved to module scope as a
   ReadonlySet<string> constant (was re-created on every request).

4. Security: max batch nesting depth of 5. Nested batch actions
   track depth and throw if MAX_BATCH_DEPTH exceeded, preventing
   call stack exhaustion from crafted payloads.

* fix(browser): normalize batch act dispatch

* fix(browser): tighten existing-session act typing

* fix(browser): preserve batch type text

* fix(browser): complete batch action execution

* test(browser): cover batch route normalization

* test(browser): cover batch interaction dispatch

* fix(browser): bound batch route action inputs

* fix(browser): harden batch interaction limits

* test(browser): cover batch security guardrails

---------

Co-authored-by: Diwakar <diwakarrankawat@gmail.com>
2026-03-13 15:10:55 -07:00

280 lines
6.7 KiB
TypeScript

import type {
BrowserActionOk,
BrowserActionPathResult,
BrowserActionTabResult,
} from "./client-actions-types.js";
import { buildProfileQuery, withBaseUrl } from "./client-actions-url.js";
import { fetchBrowserJson } from "./client-fetch.js";
export type BrowserFormField = {
ref: string;
type: string;
value?: string | number | boolean;
};
export type BrowserActRequest =
| {
kind: "click";
ref?: string;
selector?: string;
targetId?: string;
doubleClick?: boolean;
button?: string;
modifiers?: string[];
delayMs?: number;
timeoutMs?: number;
}
| {
kind: "type";
ref?: string;
selector?: string;
text: string;
targetId?: string;
submit?: boolean;
slowly?: boolean;
timeoutMs?: number;
}
| { kind: "press"; key: string; targetId?: string; delayMs?: number }
| {
kind: "hover";
ref?: string;
selector?: string;
targetId?: string;
timeoutMs?: number;
}
| {
kind: "scrollIntoView";
ref?: string;
selector?: string;
targetId?: string;
timeoutMs?: number;
}
| {
kind: "drag";
startRef?: string;
startSelector?: string;
endRef?: string;
endSelector?: string;
targetId?: string;
timeoutMs?: number;
}
| {
kind: "select";
ref?: string;
selector?: string;
values: string[];
targetId?: string;
timeoutMs?: number;
}
| {
kind: "fill";
fields: BrowserFormField[];
targetId?: string;
timeoutMs?: number;
}
| { kind: "resize"; width: number; height: number; targetId?: string }
| {
kind: "wait";
timeMs?: number;
text?: string;
textGone?: string;
selector?: string;
url?: string;
loadState?: "load" | "domcontentloaded" | "networkidle";
fn?: string;
targetId?: string;
timeoutMs?: number;
}
| { kind: "evaluate"; fn: string; ref?: string; targetId?: string; timeoutMs?: number }
| { kind: "close"; targetId?: string }
| {
kind: "batch";
actions: BrowserActRequest[];
targetId?: string;
stopOnError?: boolean;
};
export type BrowserActResponse = {
ok: true;
targetId: string;
url?: string;
result?: unknown;
results?: Array<{ ok: boolean; error?: string }>;
};
export type BrowserDownloadPayload = {
url: string;
suggestedFilename: string;
path: string;
};
type BrowserDownloadResult = { ok: true; targetId: string; download: BrowserDownloadPayload };
async function postDownloadRequest(
baseUrl: string | undefined,
route: "/wait/download" | "/download",
body: Record<string, unknown>,
profile?: string,
): Promise<BrowserDownloadResult> {
const q = buildProfileQuery(profile);
return await fetchBrowserJson<BrowserDownloadResult>(withBaseUrl(baseUrl, `${route}${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
timeoutMs: 20000,
});
}
export async function browserNavigate(
baseUrl: string | undefined,
opts: {
url: string;
targetId?: string;
profile?: string;
},
): Promise<BrowserActionTabResult> {
const q = buildProfileQuery(opts.profile);
return await fetchBrowserJson<BrowserActionTabResult>(withBaseUrl(baseUrl, `/navigate${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ url: opts.url, targetId: opts.targetId }),
timeoutMs: 20000,
});
}
export async function browserArmDialog(
baseUrl: string | undefined,
opts: {
accept: boolean;
promptText?: string;
targetId?: string;
timeoutMs?: number;
profile?: string;
},
): Promise<BrowserActionOk> {
const q = buildProfileQuery(opts.profile);
return await fetchBrowserJson<BrowserActionOk>(withBaseUrl(baseUrl, `/hooks/dialog${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
accept: opts.accept,
promptText: opts.promptText,
targetId: opts.targetId,
timeoutMs: opts.timeoutMs,
}),
timeoutMs: 20000,
});
}
export async function browserArmFileChooser(
baseUrl: string | undefined,
opts: {
paths: string[];
ref?: string;
inputRef?: string;
element?: string;
targetId?: string;
timeoutMs?: number;
profile?: string;
},
): Promise<BrowserActionOk> {
const q = buildProfileQuery(opts.profile);
return await fetchBrowserJson<BrowserActionOk>(withBaseUrl(baseUrl, `/hooks/file-chooser${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
paths: opts.paths,
ref: opts.ref,
inputRef: opts.inputRef,
element: opts.element,
targetId: opts.targetId,
timeoutMs: opts.timeoutMs,
}),
timeoutMs: 20000,
});
}
export async function browserWaitForDownload(
baseUrl: string | undefined,
opts: {
path?: string;
targetId?: string;
timeoutMs?: number;
profile?: string;
},
): Promise<BrowserDownloadResult> {
return await postDownloadRequest(
baseUrl,
"/wait/download",
{
targetId: opts.targetId,
path: opts.path,
timeoutMs: opts.timeoutMs,
},
opts.profile,
);
}
export async function browserDownload(
baseUrl: string | undefined,
opts: {
ref: string;
path: string;
targetId?: string;
timeoutMs?: number;
profile?: string;
},
): Promise<BrowserDownloadResult> {
return await postDownloadRequest(
baseUrl,
"/download",
{
targetId: opts.targetId,
ref: opts.ref,
path: opts.path,
timeoutMs: opts.timeoutMs,
},
opts.profile,
);
}
export async function browserAct(
baseUrl: string | undefined,
req: BrowserActRequest,
opts?: { profile?: string },
): Promise<BrowserActResponse> {
const q = buildProfileQuery(opts?.profile);
return await fetchBrowserJson<BrowserActResponse>(withBaseUrl(baseUrl, `/act${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(req),
timeoutMs: 20000,
});
}
export async function browserScreenshotAction(
baseUrl: string | undefined,
opts: {
targetId?: string;
fullPage?: boolean;
ref?: string;
element?: string;
type?: "png" | "jpeg";
profile?: string;
},
): Promise<BrowserActionPathResult> {
const q = buildProfileQuery(opts.profile);
return await fetchBrowserJson<BrowserActionPathResult>(withBaseUrl(baseUrl, `/screenshot${q}`), {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
targetId: opts.targetId,
fullPage: opts.fullPage,
ref: opts.ref,
element: opts.element,
type: opts.type,
}),
timeoutMs: 20000,
});
}