feat(browser): add CDP role snapshot fallback

This commit is contained in:
Peter Steinberger
2026-04-26 04:37:45 +01:00
parent 0ca9c4dcb0
commit ed1ac2fc44
13 changed files with 1026 additions and 37 deletions

View File

@@ -55,6 +55,7 @@ Detailed guidance: [Browser troubleshooting](/tools/browser#cdp-startup-failure-
```bash
openclaw browser status
openclaw browser doctor
openclaw browser doctor --deep
openclaw browser start
openclaw browser start --headless
openclaw browser stop
@@ -63,6 +64,8 @@ openclaw browser --browser-profile openclaw reset-profile
Notes:
- `doctor --deep` adds a live snapshot probe. It is useful when basic CDP
readiness is green but you want proof that the current tab can be inspected.
- For `attachOnly` and remote CDP profiles, `openclaw browser stop` closes the
active control session and clears temporary emulation overrides even when
OpenClaw did not launch the browser process itself.

View File

@@ -75,6 +75,10 @@ a clear 501 error.
What still works without Playwright:
- ARIA snapshots
- Role-style accessibility snapshots (`--interactive`, `--compact`,
`--depth`, `--efficient`) when a per-tab CDP WebSocket is available. This is
a fallback for inspection and ref discovery; Playwright remains the primary
action engine.
- Page screenshots for the managed `openclaw` browser when a per-tab CDP
WebSocket is available
- Page screenshots for `existing-session` / Chrome MCP profiles
@@ -84,7 +88,7 @@ What still needs Playwright:
- `navigate`
- `act`
- AI snapshots / role snapshots
- AI snapshots that depend on Playwright's native AI snapshot format
- CSS-selector element screenshots (`--element`)
- full browser PDF export
@@ -256,9 +260,12 @@ OpenClaw supports two “snapshot” styles:
- Output: the accessibility tree as structured nodes.
- Actions: `openclaw browser click ax12` works when the snapshot path can bind
the ref through Playwright and Chrome backend DOM ids.
- If Playwright is unavailable, ARIA snapshots can still be useful for
inspection, but refs may not be actionable. Re-snapshot with `--format ai`
or `--interactive` when you need action refs.
- If Playwright is unavailable, ARIA snapshots can still be useful for
inspection, but refs may not be actionable. Re-snapshot with `--format ai`
or `--interactive` when you need action refs.
- Docker proof for the raw-CDP fallback path: `pnpm test:docker:browser-cdp-snapshot`
starts Chromium with CDP, runs `browser doctor --deep`, and verifies role
snapshots include link URLs, cursor-promoted clickables, and iframe metadata.
Ref behavior:

View File

@@ -35,6 +35,7 @@ agent automation and verification.
```bash
openclaw browser --browser-profile openclaw doctor
openclaw browser --browser-profile openclaw doctor --deep
openclaw browser --browser-profile openclaw status
openclaw browser --browser-profile openclaw start
openclaw browser --browser-profile openclaw open https://example.com

View File

@@ -16,6 +16,7 @@ import {
type RawAXNode,
snapshotAria,
snapshotDom,
snapshotRoleViaCdp,
} from "./cdp.js";
/**
@@ -77,6 +78,16 @@ async function startMockWsServer(handle: CdpReplyHandler) {
params?: Record<string, unknown>;
};
handle(msg, socket);
if (
msg.method === "Page.enable" ||
msg.method === "Runtime.enable" ||
msg.method === "Network.enable" ||
msg.method === "DOM.enable" ||
msg.method === "Accessibility.enable" ||
msg.method === "Runtime.runIfWaitingForDebugger"
) {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
}
});
});
return {
@@ -475,6 +486,204 @@ describe("cdp internal", () => {
});
});
describe("snapshotRoleViaCdp", () => {
it("builds role refs, promotes cursor-interactive nodes, and appends link urls", async () => {
const server = await startMockWsServer((msg, socket) => {
if (msg.method === "Accessibility.enable" || msg.method === "Page.enable") {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
return;
}
if (msg.method === "Accessibility.getFullAXTree") {
socket.send(
JSON.stringify({
id: msg.id,
result: {
nodes: [
{
nodeId: "1",
role: { value: "RootWebArea" },
name: { value: "" },
childIds: ["2", "3", "4"],
},
{
nodeId: "2",
role: { value: "button" },
name: { value: "Save" },
backendDOMNodeId: 22,
childIds: [],
},
{
nodeId: "3",
role: { value: "link" },
name: { value: "Docs" },
backendDOMNodeId: 33,
childIds: [],
},
{
nodeId: "4",
role: { value: "generic" },
name: { value: "" },
backendDOMNodeId: 44,
childIds: [],
},
],
},
}),
);
return;
}
if (msg.method === "Runtime.evaluate") {
const expression =
typeof msg.params?.expression === "string" ? msg.params.expression : "";
if (expression.includes('querySelectorAll("*"')) {
socket.send(
JSON.stringify({
id: msg.id,
result: {
result: {
value: [
{
text: "Clickable Card",
tagName: "div",
hasCursorPointer: true,
hasOnClick: true,
},
],
},
},
}),
);
return;
}
socket.send(JSON.stringify({ id: msg.id, result: { result: { value: true } } }));
return;
}
if (msg.method === "DOM.getDocument") {
socket.send(JSON.stringify({ id: msg.id, result: { root: { nodeId: 1 } } }));
return;
}
if (msg.method === "DOM.querySelectorAll") {
socket.send(JSON.stringify({ id: msg.id, result: { nodeIds: [44] } }));
return;
}
if (msg.method === "DOM.describeNode") {
socket.send(
JSON.stringify({
id: msg.id,
result: { node: { backendNodeId: 44, attributes: ["data-openclaw-cdp-ci", "0"] } },
}),
);
return;
}
if (msg.method === "DOM.resolveNode") {
socket.send(JSON.stringify({ id: msg.id, result: { object: { objectId: "link1" } } }));
return;
}
if (msg.method === "Runtime.callFunctionOn") {
socket.send(
JSON.stringify({
id: msg.id,
result: { result: { value: "https://docs.openclaw.ai/" } },
}),
);
}
});
wss = server.wss;
const snap = await snapshotRoleViaCdp({
wsUrl: server.wsUrl,
urls: true,
options: { interactive: true },
});
expect(snap.snapshot).toContain('- button "Save" [ref=e1]');
expect(snap.snapshot).toContain('- link "Docs" [ref=e2] [url=https://docs.openclaw.ai/]');
expect(snap.snapshot).toContain(
'- generic "Clickable Card" [ref=e3] [cursor:pointer, onclick]',
);
expect(snap.refs.e3?.backendDOMNodeId).toBe(44);
});
it("expands one level of iframe snapshots with frame metadata", async () => {
const server = await startMockWsServer((msg, socket) => {
if (
msg.method === "Accessibility.enable" ||
msg.method === "Page.enable" ||
msg.method === "Runtime.evaluate"
) {
socket.send(
JSON.stringify({
id: msg.id,
result: msg.method === "Runtime.evaluate" ? { result: { value: [] } } : {},
}),
);
return;
}
if (msg.method === "Accessibility.getFullAXTree") {
const frameId = msg.params?.frameId;
socket.send(
JSON.stringify({
id: msg.id,
result: {
nodes: frameId
? [
{
nodeId: "c1",
role: { value: "RootWebArea" },
name: { value: "" },
childIds: ["c2"],
},
{
nodeId: "c2",
role: { value: "button" },
name: { value: "Inside" },
backendDOMNodeId: 55,
childIds: [],
},
]
: [
{
nodeId: "1",
role: { value: "RootWebArea" },
name: { value: "" },
childIds: ["2"],
},
{
nodeId: "2",
role: { value: "Iframe" },
name: { value: "Child" },
backendDOMNodeId: 44,
childIds: [],
},
],
},
}),
);
return;
}
if (msg.method === "DOM.describeNode") {
socket.send(
JSON.stringify({
id: msg.id,
result: { node: { contentDocument: { frameId: "FRAME_1" } } },
}),
);
}
});
wss = server.wss;
const snap = await snapshotRoleViaCdp({
wsUrl: server.wsUrl,
options: { interactive: true },
});
expect(snap.snapshot).toContain('- Iframe "Child" [ref=e1]');
expect(snap.snapshot).toContain(' - button "Inside" [ref=e2]');
expect(snap.refs.e1?.frameId).toBe("FRAME_1");
expect(snap.refs.e2?.frameId).toBe("FRAME_1");
});
});
describe("snapshotDom", () => {
it("returns the nodes array from the evaluated expression", async () => {
const server = await startMockWsServer((msg, socket) => {

View File

@@ -49,6 +49,19 @@ describe("cdp", () => {
params?: Record<string, unknown>;
};
onMessage(msg, socket);
if (msg.method === "Target.attachToTarget") {
socket.send(JSON.stringify({ id: msg.id, result: { sessionId: "S1" } }));
} else if (
msg.method === "Target.detachFromTarget" ||
msg.method === "Page.enable" ||
msg.method === "Runtime.enable" ||
msg.method === "Network.enable" ||
msg.method === "DOM.enable" ||
msg.method === "Accessibility.enable" ||
msg.method === "Runtime.runIfWaitingForDebugger"
) {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
}
});
});
return wsPort;
@@ -87,7 +100,11 @@ describe("cdp", () => {
});
it("creates a target via the browser websocket", async () => {
const methods: string[] = [];
const wsPort = await startWsServerWithMessages((msg, socket) => {
if (msg.method) {
methods.push(msg.method);
}
if (msg.method !== "Target.createTarget") {
return;
}
@@ -109,6 +126,19 @@ describe("cdp", () => {
});
expect(created.targetId).toBe("TARGET_123");
expect(methods).toEqual(
expect.arrayContaining([
"Target.createTarget",
"Target.attachToTarget",
"Page.enable",
"Runtime.enable",
"Network.enable",
"DOM.enable",
"Accessibility.enable",
"Runtime.runIfWaitingForDebugger",
"Target.detachFromTarget",
]),
);
});
it("creates a target via direct WebSocket URL (skips /json/version)", async () => {
@@ -447,6 +477,18 @@ describe("cdp", () => {
};
if (msg.method === "Target.createTarget") {
socket.send(JSON.stringify({ id: msg.id, result: { targetId: "ROOT_FALLBACK" } }));
} else if (msg.method === "Target.attachToTarget") {
socket.send(JSON.stringify({ id: msg.id, result: { sessionId: "S1" } }));
} else if (
msg.method === "Target.detachFromTarget" ||
msg.method === "Page.enable" ||
msg.method === "Runtime.enable" ||
msg.method === "Network.enable" ||
msg.method === "DOM.enable" ||
msg.method === "Accessibility.enable" ||
msg.method === "Runtime.runIfWaitingForDebugger"
) {
socket.send(JSON.stringify({ id: msg.id, result: {} }));
}
});
});

View File

@@ -2,6 +2,7 @@ import type { SsrFPolicy } from "../infra/net/ssrf.js";
import {
appendCdpPath,
assertCdpEndpointAllowed,
type CdpSendFn,
fetchJson,
isDirectCdpWebSocketEndpoint,
isLoopbackHost,
@@ -10,6 +11,7 @@ import {
withCdpSocket,
} from "./cdp.helpers.js";
import { assertBrowserNavigationAllowed, withBrowserNavigationPolicy } from "./navigation-guard.js";
import { CONTENT_ROLES, INTERACTIVE_ROLES, STRUCTURAL_ROLES } from "./snapshot-roles.js";
export {
appendCdpPath,
@@ -254,9 +256,13 @@ export async function createTargetViaCdp(opts: {
if (!targetId) {
throw new Error("CDP Target.createTarget returned no targetId");
}
await prepareCdpTargetSession(send, targetId);
return { targetId };
},
{ handshakeTimeoutMs: opts.timeouts?.handshakeTimeoutMs },
{
commandTimeoutMs: opts.timeouts?.httpTimeoutMs ?? 5000,
handshakeTimeoutMs: opts.timeouts?.handshakeTimeoutMs,
},
);
} catch (err) {
lastError = err;
@@ -268,6 +274,33 @@ export async function createTargetViaCdp(opts: {
throw new Error("CDP Target.createTarget failed");
}
async function prepareCdpTargetSession(send: CdpSendFn, targetId: string): Promise<void> {
const attached = (await send("Target.attachToTarget", {
targetId,
flatten: true,
}).catch(() => null)) as { sessionId?: unknown } | null;
const sessionId = typeof attached?.sessionId === "string" ? attached.sessionId : undefined;
if (!sessionId) {
return;
}
try {
await prepareCdpPageSession(send, sessionId);
} finally {
await send("Target.detachFromTarget", { sessionId }).catch(() => {});
}
}
async function prepareCdpPageSession(send: CdpSendFn, sessionId?: string): Promise<void> {
await Promise.all([
send("Page.enable", undefined, sessionId).catch(() => {}),
send("Runtime.enable", undefined, sessionId).catch(() => {}),
send("Network.enable", undefined, sessionId).catch(() => {}),
send("DOM.enable", undefined, sessionId).catch(() => {}),
send("Accessibility.enable", undefined, sessionId).catch(() => {}),
]);
await send("Runtime.runIfWaitingForDebugger", undefined, sessionId).catch(() => {});
}
export type CdpRemoteObject = {
type: string;
subtype?: string;
@@ -423,16 +456,505 @@ export function formatAriaSnapshot(nodes: RawAXNode[], limit: number): AriaSnaps
export async function snapshotAria(opts: {
wsUrl: string;
limit?: number;
timeoutMs?: number;
}): Promise<{ nodes: AriaSnapshotNode[] }> {
const limit = Math.max(1, Math.min(2000, Math.floor(opts.limit ?? 500)));
return await withCdpSocket(opts.wsUrl, async (send) => {
await send("Accessibility.enable").catch(() => {});
const res = (await send("Accessibility.getFullAXTree")) as {
nodes?: RawAXNode[];
return await withCdpSocket(
opts.wsUrl,
async (send) => {
await prepareCdpPageSession(send);
const res = (await send("Accessibility.getFullAXTree")) as {
nodes?: RawAXNode[];
};
const nodes = Array.isArray(res?.nodes) ? res.nodes : [];
return { nodes: formatAriaSnapshot(nodes, limit) };
},
{ commandTimeoutMs: opts.timeoutMs ?? 5000 },
);
}
export type CdpRoleRef = {
role: string;
name?: string;
nth?: number;
backendDOMNodeId?: number;
frameId?: string;
};
export type CdpRoleSnapshotOptions = {
interactive?: boolean;
compact?: boolean;
maxDepth?: number;
};
type CursorInteractiveInfo = {
text: string;
tagName: string;
hasOnClick?: boolean;
hasCursorPointer?: boolean;
hasTabIndex?: boolean;
isEditable?: boolean;
hiddenInputType?: string;
};
type RoleTreeNode = {
raw: RawAXNode;
role: string;
name: string;
value: string;
backendDOMNodeId?: number;
children: number[];
parent?: number;
depth: number;
ref?: string;
nth?: number;
url?: string;
cursorInfo?: CursorInteractiveInfo;
frameId?: string;
};
function buildRoleTree(nodes: RawAXNode[]): { tree: RoleTreeNode[]; roots: number[] } {
const byId = new Map<string, number>();
const tree: RoleTreeNode[] = [];
for (const raw of nodes) {
const nodeId = raw.nodeId ?? "";
if (!nodeId) {
continue;
}
byId.set(nodeId, tree.length);
tree.push({
raw,
role: axValue(raw.role) || "unknown",
name: axValue(raw.name),
value: axValue(raw.value),
backendDOMNodeId:
typeof raw.backendDOMNodeId === "number" && raw.backendDOMNodeId > 0
? Math.floor(raw.backendDOMNodeId)
: undefined,
children: [],
depth: 0,
});
}
const childIndexes = new Set<number>();
for (let index = 0; index < tree.length; index += 1) {
for (const childId of tree[index]?.raw.childIds ?? []) {
const childIndex = byId.get(childId);
if (childIndex === undefined) {
continue;
}
tree[index]?.children.push(childIndex);
tree[childIndex].parent = index;
childIndexes.add(childIndex);
}
}
const roots = tree.map((_node, index) => index).filter((index) => !childIndexes.has(index));
const stack = roots.map((index) => ({ index, depth: 0 }));
while (stack.length) {
const current = stack.pop();
if (!current) {
break;
}
tree[current.index].depth = current.depth;
for (const child of (tree[current.index]?.children ?? []).toReversed()) {
stack.push({ index: child, depth: current.depth + 1 });
}
}
return { tree, roots: roots.length ? roots : tree.length ? [0] : [] };
}
function shouldIncludeRoleNode(node: RoleTreeNode, options: CdpRoleSnapshotOptions): boolean {
const role = node.role.toLowerCase();
if (options.maxDepth !== undefined && node.depth > options.maxDepth) {
return false;
}
if (options.interactive) {
return INTERACTIVE_ROLES.has(role) || role === "iframe" || Boolean(node.cursorInfo);
}
if (options.compact && STRUCTURAL_ROLES.has(role) && !node.name && !node.ref) {
return false;
}
return true;
}
function cursorSuffix(info?: CursorInteractiveInfo): string {
if (!info) {
return "";
}
const parts = [
info.hasCursorPointer ? "cursor:pointer" : undefined,
info.hasOnClick ? "onclick" : undefined,
info.hasTabIndex ? "tabindex" : undefined,
info.isEditable ? "contenteditable" : undefined,
info.hiddenInputType ? `hidden-${info.hiddenInputType}` : undefined,
].filter(Boolean);
return parts.length ? ` [${parts.join(", ")}]` : "";
}
function renderRoleTree(
tree: RoleTreeNode[],
index: number,
output: string[],
options: CdpRoleSnapshotOptions,
indentOffset = 0,
): void {
const node = tree[index];
if (!node) {
return;
}
if (shouldIncludeRoleNode(node, options)) {
const indent = " ".repeat(Math.max(0, node.depth + indentOffset));
const name = node.name ? ` "${node.name.replaceAll('"', '\\"')}"` : "";
const ref = node.ref ? ` [ref=${node.ref}]` : "";
const nth = node.nth !== undefined && node.nth > 0 ? ` [nth=${node.nth}]` : "";
const value = node.value ? ` value="${node.value.replaceAll('"', '\\"')}"` : "";
const url = node.url ? ` [url=${node.url}]` : "";
output.push(
`${indent}- ${node.role}${name}${ref}${nth}${value}${url}${cursorSuffix(node.cursorInfo)}`,
);
}
for (const child of node.children) {
renderRoleTree(tree, child, output, options, indentOffset);
}
}
async function findCursorInteractiveElements(
send: CdpSendFn,
sessionId?: string,
): Promise<Map<number, CursorInteractiveInfo>> {
const attr = "data-openclaw-cdp-ci";
const evaluated = (await send(
"Runtime.evaluate",
{
expression: `(() => {
const out = [];
const roles = new Set(["button","link","textbox","checkbox","radio","combobox","listbox","menuitem","menuitemcheckbox","menuitemradio","option","searchbox","slider","spinbutton","switch","tab","treeitem"]);
const tags = new Set(["a","button","input","select","textarea","details","summary"]);
document.querySelectorAll("[${attr}]").forEach((el) => el.removeAttribute("${attr}"));
for (const el of Array.from(document.body ? document.body.querySelectorAll("*") : [])) {
if (!(el instanceof HTMLElement) || el.closest("[hidden],[aria-hidden='true']")) continue;
const tagName = el.tagName.toLowerCase();
if (tags.has(tagName)) continue;
const role = String(el.getAttribute("role") || "").toLowerCase();
if (roles.has(role)) continue;
const style = getComputedStyle(el);
const hasCursorPointer = style.cursor === "pointer";
const hasOnClick = el.hasAttribute("onclick") || el.onclick !== null;
const tabIndex = el.getAttribute("tabindex");
const hasTabIndex = tabIndex !== null && tabIndex !== "-1";
const ce = el.getAttribute("contenteditable");
const isEditable = ce === "" || ce === "true";
if (!hasCursorPointer && !hasOnClick && !hasTabIndex && !isEditable) continue;
if (hasCursorPointer && !hasOnClick && !hasTabIndex && !isEditable) {
const parent = el.parentElement;
if (parent && getComputedStyle(parent).cursor === "pointer") continue;
}
const rect = el.getBoundingClientRect();
if (rect.width <= 0 || rect.height <= 0) continue;
let hiddenInputType = "";
const hiddenInput = el.querySelector("input[type='radio'],input[type='checkbox']");
if (hiddenInput instanceof HTMLInputElement) {
const hiddenStyle = getComputedStyle(hiddenInput);
if (hiddenInput.hidden || hiddenStyle.display === "none" || hiddenStyle.visibility === "hidden") {
hiddenInputType = hiddenInput.type;
}
}
el.setAttribute("${attr}", String(out.length));
out.push({
text: String(el.textContent || "").replace(/\\s+/g, " ").trim().slice(0, 100),
tagName,
hasCursorPointer,
hasOnClick,
hasTabIndex,
isEditable,
hiddenInputType,
});
}
return out;
})()`,
returnByValue: true,
awaitPromise: false,
},
sessionId,
).catch(() => null)) as { result?: { value?: unknown } } | null;
const entries = Array.isArray(evaluated?.result?.value)
? (evaluated.result.value as CursorInteractiveInfo[])
: [];
if (!entries.length) {
return new Map();
}
const doc = (await send("DOM.getDocument", { depth: 0 }, sessionId).catch(() => null)) as {
root?: { nodeId?: number };
} | null;
const rootNodeId = doc?.root?.nodeId;
if (typeof rootNodeId !== "number") {
return new Map();
}
const queried = (await send(
"DOM.querySelectorAll",
{ nodeId: rootNodeId, selector: `[${attr}]` },
sessionId,
).catch(() => null)) as { nodeIds?: number[] } | null;
const out = new Map<number, CursorInteractiveInfo>();
await Promise.all(
(queried?.nodeIds ?? []).map(async (nodeId) => {
const described = (await send("DOM.describeNode", { nodeId }, sessionId).catch(
() => null,
)) as { node?: { backendNodeId?: number; attributes?: string[] } } | null;
const attrs = described?.node?.attributes ?? [];
const attrIndex = attrs.indexOf(attr);
const rawIndex = attrIndex >= 0 ? attrs[attrIndex + 1] : undefined;
const index = typeof rawIndex === "string" ? Number(rawIndex) : Number.NaN;
const backendNodeId = described?.node?.backendNodeId;
if (typeof backendNodeId === "number" && Number.isInteger(index) && entries[index]) {
out.set(backendNodeId, entries[index]);
}
}),
);
await send(
"Runtime.evaluate",
{
expression: `document.querySelectorAll("[${attr}]").forEach((el) => el.removeAttribute("${attr}"))`,
returnByValue: true,
},
sessionId,
).catch(() => {});
return out;
}
async function resolveLinkUrls(
send: CdpSendFn,
refs: Record<string, CdpRoleRef>,
sessionId?: string,
): Promise<Map<number, string>> {
const out = new Map<number, string>();
await Promise.all(
Object.values(refs).map(async (ref) => {
if (ref.role !== "link" || !ref.backendDOMNodeId) {
return;
}
const resolved = (await send(
"DOM.resolveNode",
{ backendNodeId: ref.backendDOMNodeId },
sessionId,
).catch(() => null)) as { object?: { objectId?: string } } | null;
const objectId = resolved?.object?.objectId;
if (!objectId) {
return;
}
const hrefResult = (await send(
"Runtime.callFunctionOn",
{
objectId,
functionDeclaration: "function() { return this.href || ''; }",
returnByValue: true,
},
sessionId,
).catch(() => null)) as { result?: { value?: unknown } } | null;
const href = typeof hrefResult?.result?.value === "string" ? hrefResult.result.value : "";
if (href) {
out.set(ref.backendDOMNodeId, href);
}
}),
);
return out;
}
async function resolveIframeFrameIds(
send: CdpSendFn,
tree: RoleTreeNode[],
sessionId?: string,
): Promise<Map<number, string>> {
const out = new Map<number, string>();
await Promise.all(
tree.map(async (node) => {
if (node.role.toLowerCase() !== "iframe" || !node.backendDOMNodeId) {
return;
}
const described = (await send(
"DOM.describeNode",
{ backendNodeId: node.backendDOMNodeId, depth: 1 },
sessionId,
).catch(() => null)) as {
node?: { frameId?: string; contentDocument?: { frameId?: string } };
} | null;
const frameId = described?.node?.contentDocument?.frameId ?? described?.node?.frameId ?? "";
if (frameId) {
out.set(node.backendDOMNodeId, frameId);
}
}),
);
return out;
}
async function buildCdpRoleSnapshot(params: {
send: CdpSendFn;
sessionId?: string;
frameId?: string;
options: CdpRoleSnapshotOptions;
urls?: boolean;
recurseIframes?: boolean;
nextRef: { value: number };
}): Promise<{
lines: string[];
refs: Record<string, CdpRoleRef>;
stats: { refs: number; interactive: number };
}> {
const res = (await params.send(
"Accessibility.getFullAXTree",
params.frameId ? { frameId: params.frameId } : undefined,
params.sessionId,
)) as { nodes?: RawAXNode[] };
const { tree, roots } = buildRoleTree(Array.isArray(res.nodes) ? res.nodes : []);
const cursorElements = await findCursorInteractiveElements(params.send, params.sessionId);
for (const node of tree) {
if (node.backendDOMNodeId && cursorElements.has(node.backendDOMNodeId)) {
const cursorInfo = cursorElements.get(node.backendDOMNodeId);
node.cursorInfo = cursorInfo;
if (!node.name && cursorInfo?.text) {
node.name = cursorInfo.text;
}
}
}
const counts = new Map<string, number>();
const refsByKey = new Map<string, string[]>();
const refs: Record<string, CdpRoleRef> = {};
for (const node of tree) {
const role = node.role.toLowerCase();
const shouldRef =
INTERACTIVE_ROLES.has(role) ||
(CONTENT_ROLES.has(role) && Boolean(node.name)) ||
role === "iframe" ||
Boolean(node.cursorInfo);
if (!shouldRef) {
continue;
}
const key = `${role}:${node.name}`;
const nth = counts.get(key) ?? 0;
counts.set(key, nth + 1);
const ref = `e${params.nextRef.value}`;
params.nextRef.value += 1;
node.ref = ref;
node.nth = nth;
refsByKey.set(key, [...(refsByKey.get(key) ?? []), ref]);
refs[ref] = {
role,
...(node.name ? { name: node.name } : {}),
...(nth > 0 ? { nth } : {}),
...(node.backendDOMNodeId ? { backendDOMNodeId: node.backendDOMNodeId } : {}),
...(params.frameId ? { frameId: params.frameId } : {}),
};
const nodes = Array.isArray(res?.nodes) ? res.nodes : [];
return { nodes: formatAriaSnapshot(nodes, limit) };
});
}
for (const refList of refsByKey.values()) {
if (refList.length > 1) {
continue;
}
const ref = refList[0];
if (ref) {
delete refs[ref]?.nth;
const node = tree.find((entry) => entry.ref === ref);
if (node) {
delete node.nth;
}
}
}
const iframeFrameIds = await resolveIframeFrameIds(params.send, tree, params.sessionId);
for (const node of tree) {
if (node.backendDOMNodeId && iframeFrameIds.has(node.backendDOMNodeId)) {
node.frameId = iframeFrameIds.get(node.backendDOMNodeId);
if (node.ref && refs[node.ref]) {
refs[node.ref].frameId = node.frameId;
}
}
}
if (params.urls) {
const urls = await resolveLinkUrls(params.send, refs, params.sessionId);
for (const node of tree) {
if (node.backendDOMNodeId && urls.has(node.backendDOMNodeId)) {
node.url = urls.get(node.backendDOMNodeId);
}
}
}
const lines: string[] = [];
for (const root of roots) {
renderRoleTree(tree, root, lines, params.options);
}
if (params.recurseIframes) {
const iframeNodes = tree.filter((node) => node.ref && node.frameId);
for (const iframe of iframeNodes) {
const marker = `[ref=${iframe.ref}]`;
const lineIndex = lines.findIndex((line) => line.includes(marker));
if (lineIndex < 0 || !iframe.frameId) {
continue;
}
const child = await buildCdpRoleSnapshot({
...params,
frameId: iframe.frameId,
recurseIframes: false,
}).catch(() => null);
if (!child?.lines.length) {
continue;
}
Object.assign(refs, child.refs);
lines.splice(lineIndex + 1, 0, ...child.lines.map((line) => ` ${line}`));
}
}
const refValues = Object.values(refs);
return {
lines,
refs,
stats: {
refs: refValues.length,
interactive: refValues.filter((ref) => INTERACTIVE_ROLES.has(ref.role)).length,
},
};
}
export async function snapshotRoleViaCdp(opts: {
wsUrl: string;
options?: CdpRoleSnapshotOptions;
urls?: boolean;
timeoutMs?: number;
}): Promise<{
snapshot: string;
refs: Record<string, CdpRoleRef>;
stats: { lines: number; chars: number; refs: number; interactive: number };
}> {
return await withCdpSocket(
opts.wsUrl,
async (send) => {
await prepareCdpPageSession(send);
const built = await buildCdpRoleSnapshot({
send,
options: opts.options ?? {},
urls: opts.urls,
recurseIframes: true,
nextRef: { value: 1 },
});
const snapshot =
built.lines.join("\n").trim() ||
(opts.options?.interactive ? "(no interactive elements)" : "(empty page)");
return {
snapshot,
refs: built.refs,
stats: {
lines: snapshot.split("\n").length,
chars: snapshot.length,
refs: built.stats.refs,
interactive: built.stats.interactive,
},
};
},
{ commandTimeoutMs: opts.timeoutMs ?? 5000 },
);
}
export async function snapshotDom(opts: {

View File

@@ -226,7 +226,7 @@ describe("browser client", () => {
}),
} as unknown as Response;
}
if (url.endsWith("/doctor")) {
if (url.includes("/doctor")) {
return {
ok: true,
json: async () => ({
@@ -270,6 +270,12 @@ describe("browser client", () => {
ok: true,
profile: "openclaw",
});
await expect(
browserDoctor("http://127.0.0.1:18791", { profile: "openclaw", deep: true }),
).resolves.toMatchObject({
ok: true,
profile: "openclaw",
});
await expect(browserTabs("http://127.0.0.1:18791")).resolves.toHaveLength(1);
await expect(
@@ -310,6 +316,7 @@ describe("browser client", () => {
expect(calls.some((c) => c.url.endsWith("/tabs"))).toBe(true);
expect(calls.some((c) => c.url.endsWith("/doctor"))).toBe(true);
expect(calls.some((c) => c.url.endsWith("/doctor?profile=openclaw&deep=true"))).toBe(true);
const open = calls.find((c) => c.url.endsWith("/tabs/open"));
expect(open?.init?.method).toBe("POST");

View File

@@ -82,11 +82,18 @@ export async function browserStatus(
export async function browserDoctor(
baseUrl?: string,
opts?: { profile?: string },
opts?: { profile?: string; deep?: boolean },
): Promise<BrowserDoctorReport> {
const q = buildProfileQuery(opts?.profile);
const params = new URLSearchParams();
if (opts?.profile) {
params.set("profile", opts.profile);
}
if (opts?.deep) {
params.set("deep", "true");
}
const q = params.size ? `?${params.toString()}` : "";
return await fetchBrowserJson<BrowserDoctorReport>(withBaseUrl(baseUrl, `/doctor${q}`), {
timeoutMs: 3000,
timeoutMs: opts?.deep ? 10000 : 3000,
});
}

View File

@@ -1,7 +1,7 @@
import path from "node:path";
import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js";
import { resolveBrowserNavigationProxyMode } from "../browser-proxy-mode.js";
import { captureScreenshot, snapshotAria } from "../cdp.js";
import { captureScreenshot, snapshotAria, snapshotRoleViaCdp } from "../cdp.js";
import {
evaluateChromeMcpScript,
navigateChromeMcpPage,
@@ -627,10 +627,6 @@ export function registerBrowserAgentSnapshotRoutes(
});
}
if (plan.format === "ai") {
const pw = await requirePwAi(res, "ai snapshot");
if (!pw) {
return;
}
const roleSnapshotArgs = {
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
@@ -646,18 +642,54 @@ export function registerBrowserAgentSnapshotRoutes(
},
};
const cdpRoleSnapshot = async () => {
if (!tab.wsUrl) {
return null;
}
if (plan.selectorValue || plan.frameSelectorValue) {
return null;
}
return await snapshotRoleViaCdp({
wsUrl: tab.wsUrl,
urls: plan.urls,
options: {
interactive: plan.interactive ?? undefined,
compact: plan.compact ?? undefined,
maxDepth: plan.depth ?? undefined,
},
});
};
const pw = await getPwAiModule();
const snap = plan.wantsRoleSnapshot
? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs)
: await pw.snapshotAiViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
ssrfPolicy: ctx.state().resolved.ssrfPolicy,
urls: plan.urls,
...(typeof plan.resolvedMaxChars === "number"
? { maxChars: plan.resolvedMaxChars }
: {}),
});
? pw
? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs).catch(async (err) => {
const fallback = await cdpRoleSnapshot();
if (fallback) {
return fallback;
}
throw err;
})
: await cdpRoleSnapshot()
: pw
? await pw.snapshotAiViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
ssrfPolicy: ctx.state().resolved.ssrfPolicy,
urls: plan.urls,
...(typeof plan.resolvedMaxChars === "number"
? { maxChars: plan.resolvedMaxChars }
: {}),
})
: await cdpRoleSnapshot();
if (!snap) {
await requirePwAi(res, "ai snapshot");
return;
}
if (plan.labels) {
if (!pw) {
return jsonError(res, 501, "Snapshot labels require Playwright.");
}
const labeled = await pw.screenshotWithLabelsViaPlaywright({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,

View File

@@ -1,3 +1,4 @@
import { snapshotAria } from "../cdp.js";
import { getChromeMcpPid } from "../chrome-mcp.js";
import { resolveBrowserExecutableForPlatform } from "../chrome.executables.js";
import { resolveManagedBrowserHeadlessMode } from "../config.js";
@@ -129,6 +130,62 @@ async function buildBrowserStatus(req: BrowserRequest, ctx: BrowserRouteContext)
};
}
async function runBrowserLiveProbe(req: BrowserRequest, ctx: BrowserRouteContext) {
const profileCtx = getProfileContext(req, ctx);
if ("error" in profileCtx) {
return {
id: "live-snapshot",
label: "Live snapshot",
status: "fail" as const,
summary: profileCtx.error,
};
}
const capabilities = getBrowserProfileCapabilities(profileCtx.profile);
try {
const tab = await profileCtx.ensureTabAvailable();
if (capabilities.usesChromeMcp) {
const { takeChromeMcpSnapshot } = await import("../chrome-mcp.js");
await takeChromeMcpSnapshot({
profileName: profileCtx.profile.name,
profile: profileCtx.profile,
targetId: tab.targetId,
});
return {
id: "live-snapshot",
label: "Live snapshot",
status: "pass" as const,
summary: `Chrome MCP snapshot succeeded on ${tab.suggestedTargetId ?? tab.targetId}`,
};
}
if (!tab.wsUrl) {
return {
id: "live-snapshot",
label: "Live snapshot",
status: "warn" as const,
summary: "No per-tab CDP WebSocket available for the lightweight live snapshot probe",
};
}
const snap = await snapshotAria({ wsUrl: tab.wsUrl, limit: 25 });
return {
id: "live-snapshot",
label: "Live snapshot",
status: snap.nodes.length > 0 ? ("pass" as const) : ("warn" as const),
summary:
snap.nodes.length > 0
? `CDP accessibility snapshot returned ${snap.nodes.length} nodes on ${tab.suggestedTargetId ?? tab.targetId}`
: `CDP accessibility snapshot returned no nodes on ${tab.suggestedTargetId ?? tab.targetId}`,
};
} catch (err) {
return {
id: "live-snapshot",
label: "Live snapshot",
status: "fail" as const,
summary: String(err),
fixHint: "Run openclaw browser start, then retry with openclaw browser doctor --deep.",
};
}
}
function hasQueryKey(query: BrowserRequest["query"], key: string): boolean {
return Object.prototype.hasOwnProperty.call(query ?? {}, key);
}
@@ -201,7 +258,12 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow
asyncBrowserRoute(async (req, res) => {
try {
const status = await buildBrowserStatus(req, ctx);
res.json(buildBrowserDoctorReport({ status }));
const report = buildBrowserDoctorReport({ status });
if (toBoolean(req.query.deep) === true || toBoolean(req.query.live) === true) {
report.checks.push(await runBrowserLiveProbe(req, ctx));
report.ok = report.checks.every((check) => check.status !== "fail");
}
res.json(report);
} catch (err) {
const mapped = toBrowserErrorResponse(err);
if (mapped) {

View File

@@ -291,6 +291,47 @@ describe("browser control server", () => {
dangerouslyAllowPrivateNetwork: true,
},
});
pwMocks.snapshotRoleViaPlaywright.mockRejectedValueOnce(new Error("playwright stale page"));
const fallback = (await realFetch(`${base}/snapshot?format=ai&interactive=true`).then((r) =>
r.json(),
)) as { ok: boolean; format?: string; snapshot?: string };
expect(fallback.ok).toBe(true);
expect(fallback.format).toBe("ai");
expect(fallback.snapshot).toContain("Fallback");
expect(cdpMocks.snapshotRoleViaCdp).toHaveBeenCalledWith({
wsUrl: "ws://127.0.0.1/devtools/page/abcd1234",
urls: undefined,
options: {
interactive: true,
compact: undefined,
maxDepth: undefined,
},
});
});
it("agent contract: doctor deep runs a live snapshot probe", async () => {
const base = await startServerAndBase();
const realFetch = getBrowserTestFetch();
const report = (await realFetch(`${base}/doctor?deep=true`).then((r) => r.json())) as {
ok: boolean;
checks?: Array<{ id?: string; status?: string; summary?: string }>;
};
expect(report.ok).toBe(true);
expect(report.checks).toEqual(
expect.arrayContaining([
expect.objectContaining({
id: "live-snapshot",
status: "pass",
}),
]),
);
expect(cdpMocks.snapshotAria).toHaveBeenCalledWith({
wsUrl: "ws://127.0.0.1/devtools/page/abcd1234",
limit: 25,
});
});
it("agent contract: navigation + common act commands", async () => {

View File

@@ -90,10 +90,23 @@ const cdpMocks = vi.hoisted(() => ({
snapshotAria: vi.fn(async () => ({
nodes: [{ ref: "1", role: "link", name: "x", depth: 0 }],
})),
snapshotRoleViaCdp: vi.fn(async () => ({
snapshot: '- button "Fallback" [ref=e1]',
refs: { e1: { role: "button", name: "Fallback" } },
stats: { lines: 1, chars: 29, refs: 1, interactive: 1 },
})),
}));
export function getCdpMocks(): { createTargetViaCdp: MockFn; snapshotAria: MockFn } {
return cdpMocks as unknown as { createTargetViaCdp: MockFn; snapshotAria: MockFn };
export function getCdpMocks(): {
createTargetViaCdp: MockFn;
snapshotAria: MockFn;
snapshotRoleViaCdp: MockFn;
} {
return cdpMocks as unknown as {
createTargetViaCdp: MockFn;
snapshotAria: MockFn;
snapshotRoleViaCdp: MockFn;
};
}
type ExecuteActMockAction = { kind: string } & Record<string, unknown>;
@@ -175,6 +188,11 @@ const pwMocks = vi.hoisted(() => ({
selectOptionViaPlaywright: vi.fn(async (_opts?: unknown) => {}),
setInputFilesViaPlaywright: vi.fn(async () => {}),
snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })),
snapshotRoleViaPlaywright: vi.fn(async () => ({
snapshot: '- button "Role" [ref=e1]',
refs: { e1: { role: "button", name: "Role" } },
stats: { lines: 1, chars: 24, refs: 1, interactive: 1 },
})),
storeAriaSnapshotRefsViaPlaywright: vi.fn(async () => {}),
traceStopViaPlaywright: vi.fn(async () => {}),
takeScreenshotViaPlaywright: vi.fn(async () => ({
@@ -445,6 +463,7 @@ vi.mock("./cdp.js", () => ({
createTargetViaCdp: cdpMocks.createTargetViaCdp,
normalizeCdpWsUrl: vi.fn((wsUrl: string) => wsUrl),
snapshotAria: cdpMocks.snapshotAria,
snapshotRoleViaCdp: cdpMocks.snapshotRoleViaCdp,
getHeadersWithAuth: vi.fn(() => ({})),
appendCdpPath: vi.fn((cdpUrl: string, cdpPath: string) => {
const base = cdpUrl.replace(/\/$/, "");

View File

@@ -135,7 +135,7 @@ function formatDoctorLine(check: BrowserDoctorCheck): string {
return `${check.ok ? "OK" : "FAIL"} ${check.name}${check.detail ? `: ${check.detail}` : ""}`;
}
async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string) {
async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string, deep?: boolean) {
const checks: BrowserDoctorCheck[] = [];
let status: BrowserStatus | null = null;
@@ -218,6 +218,42 @@ async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string) {
}
}
if (deep && status.running) {
try {
const result = await callBrowserRequest<
| { ok: true; format: "aria"; nodes?: unknown[] }
| { ok: true; format: "ai"; snapshot?: string }
>(
parent,
{
method: "GET",
path: "/snapshot",
query: resolveProfileQuery(profile, { format: "aria", limit: 25 }),
},
{ timeoutMs: 10_000 },
);
const count =
result.format === "aria"
? Array.isArray(result.nodes)
? result.nodes.length
: 0
: typeof result.snapshot === "string"
? result.snapshot.split("\n").length
: 0;
checks.push({
name: "live-snapshot",
ok: count > 0,
detail: count > 0 ? `${count} nodes/lines` : "snapshot returned no content",
});
} catch (err) {
checks.push({
name: "live-snapshot",
ok: false,
detail: String(err),
});
}
}
return { ok: checks.every((check) => check.ok), checks, status };
}
@@ -296,11 +332,12 @@ export function registerBrowserManageCommands(
browser
.command("doctor")
.description("Check browser plugin readiness")
.action(async (_opts, cmd) => {
.option("--deep", "Run a live snapshot probe")
.action(async (opts: { deep?: boolean }, cmd) => {
const parent = parentOpts(cmd);
const profile = parent?.browserProfile;
await runBrowserCommand(async () => {
const result = await runBrowserDoctor(parent, profile);
const result = await runBrowserDoctor(parent, profile, opts.deep === true);
if (printJsonResult(parent, result)) {
return;
}