diff --git a/docs/cli/browser.md b/docs/cli/browser.md index baf81622f59..06a1aaf4ad7 100644 --- a/docs/cli/browser.md +++ b/docs/cli/browser.md @@ -55,6 +55,7 @@ Detailed guidance: [Browser troubleshooting](/tools/browser#cdp-startup-failure- ```bash openclaw browser status openclaw browser doctor +openclaw browser doctor --deep openclaw browser start openclaw browser start --headless openclaw browser stop @@ -63,6 +64,8 @@ openclaw browser --browser-profile openclaw reset-profile Notes: +- `doctor --deep` adds a live snapshot probe. It is useful when basic CDP + readiness is green but you want proof that the current tab can be inspected. - For `attachOnly` and remote CDP profiles, `openclaw browser stop` closes the active control session and clears temporary emulation overrides even when OpenClaw did not launch the browser process itself. diff --git a/docs/tools/browser-control.md b/docs/tools/browser-control.md index 990e25ae1e9..4df81217457 100644 --- a/docs/tools/browser-control.md +++ b/docs/tools/browser-control.md @@ -75,6 +75,10 @@ a clear 501 error. What still works without Playwright: - ARIA snapshots +- Role-style accessibility snapshots (`--interactive`, `--compact`, + `--depth`, `--efficient`) when a per-tab CDP WebSocket is available. This is + a fallback for inspection and ref discovery; Playwright remains the primary + action engine. - Page screenshots for the managed `openclaw` browser when a per-tab CDP WebSocket is available - Page screenshots for `existing-session` / Chrome MCP profiles @@ -84,7 +88,7 @@ What still needs Playwright: - `navigate` - `act` -- AI snapshots / role snapshots +- AI snapshots that depend on Playwright's native AI snapshot format - CSS-selector element screenshots (`--element`) - full browser PDF export @@ -256,9 +260,12 @@ OpenClaw supports two “snapshot” styles: - Output: the accessibility tree as structured nodes. - Actions: `openclaw browser click ax12` works when the snapshot path can bind the ref through Playwright and Chrome backend DOM ids. - - If Playwright is unavailable, ARIA snapshots can still be useful for - inspection, but refs may not be actionable. Re-snapshot with `--format ai` - or `--interactive` when you need action refs. +- If Playwright is unavailable, ARIA snapshots can still be useful for + inspection, but refs may not be actionable. Re-snapshot with `--format ai` + or `--interactive` when you need action refs. +- Docker proof for the raw-CDP fallback path: `pnpm test:docker:browser-cdp-snapshot` + starts Chromium with CDP, runs `browser doctor --deep`, and verifies role + snapshots include link URLs, cursor-promoted clickables, and iframe metadata. Ref behavior: diff --git a/docs/tools/browser.md b/docs/tools/browser.md index a399e928650..def392a9852 100644 --- a/docs/tools/browser.md +++ b/docs/tools/browser.md @@ -35,6 +35,7 @@ agent automation and verification. ```bash openclaw browser --browser-profile openclaw doctor +openclaw browser --browser-profile openclaw doctor --deep openclaw browser --browser-profile openclaw status openclaw browser --browser-profile openclaw start openclaw browser --browser-profile openclaw open https://example.com diff --git a/extensions/browser/src/browser/cdp.internal.test.ts b/extensions/browser/src/browser/cdp.internal.test.ts index 159d3f9ea00..ac4fcd65757 100644 --- a/extensions/browser/src/browser/cdp.internal.test.ts +++ b/extensions/browser/src/browser/cdp.internal.test.ts @@ -16,6 +16,7 @@ import { type RawAXNode, snapshotAria, snapshotDom, + snapshotRoleViaCdp, } from "./cdp.js"; /** @@ -77,6 +78,16 @@ async function startMockWsServer(handle: CdpReplyHandler) { params?: Record; }; handle(msg, socket); + if ( + msg.method === "Page.enable" || + msg.method === "Runtime.enable" || + msg.method === "Network.enable" || + msg.method === "DOM.enable" || + msg.method === "Accessibility.enable" || + msg.method === "Runtime.runIfWaitingForDebugger" + ) { + socket.send(JSON.stringify({ id: msg.id, result: {} })); + } }); }); return { @@ -475,6 +486,204 @@ describe("cdp internal", () => { }); }); + describe("snapshotRoleViaCdp", () => { + it("builds role refs, promotes cursor-interactive nodes, and appends link urls", async () => { + const server = await startMockWsServer((msg, socket) => { + if (msg.method === "Accessibility.enable" || msg.method === "Page.enable") { + socket.send(JSON.stringify({ id: msg.id, result: {} })); + return; + } + if (msg.method === "Accessibility.getFullAXTree") { + socket.send( + JSON.stringify({ + id: msg.id, + result: { + nodes: [ + { + nodeId: "1", + role: { value: "RootWebArea" }, + name: { value: "" }, + childIds: ["2", "3", "4"], + }, + { + nodeId: "2", + role: { value: "button" }, + name: { value: "Save" }, + backendDOMNodeId: 22, + childIds: [], + }, + { + nodeId: "3", + role: { value: "link" }, + name: { value: "Docs" }, + backendDOMNodeId: 33, + childIds: [], + }, + { + nodeId: "4", + role: { value: "generic" }, + name: { value: "" }, + backendDOMNodeId: 44, + childIds: [], + }, + ], + }, + }), + ); + return; + } + if (msg.method === "Runtime.evaluate") { + const expression = + typeof msg.params?.expression === "string" ? msg.params.expression : ""; + if (expression.includes('querySelectorAll("*"')) { + socket.send( + JSON.stringify({ + id: msg.id, + result: { + result: { + value: [ + { + text: "Clickable Card", + tagName: "div", + hasCursorPointer: true, + hasOnClick: true, + }, + ], + }, + }, + }), + ); + return; + } + socket.send(JSON.stringify({ id: msg.id, result: { result: { value: true } } })); + return; + } + if (msg.method === "DOM.getDocument") { + socket.send(JSON.stringify({ id: msg.id, result: { root: { nodeId: 1 } } })); + return; + } + if (msg.method === "DOM.querySelectorAll") { + socket.send(JSON.stringify({ id: msg.id, result: { nodeIds: [44] } })); + return; + } + if (msg.method === "DOM.describeNode") { + socket.send( + JSON.stringify({ + id: msg.id, + result: { node: { backendNodeId: 44, attributes: ["data-openclaw-cdp-ci", "0"] } }, + }), + ); + return; + } + if (msg.method === "DOM.resolveNode") { + socket.send(JSON.stringify({ id: msg.id, result: { object: { objectId: "link1" } } })); + return; + } + if (msg.method === "Runtime.callFunctionOn") { + socket.send( + JSON.stringify({ + id: msg.id, + result: { result: { value: "https://docs.openclaw.ai/" } }, + }), + ); + } + }); + wss = server.wss; + + const snap = await snapshotRoleViaCdp({ + wsUrl: server.wsUrl, + urls: true, + options: { interactive: true }, + }); + + expect(snap.snapshot).toContain('- button "Save" [ref=e1]'); + expect(snap.snapshot).toContain('- link "Docs" [ref=e2] [url=https://docs.openclaw.ai/]'); + expect(snap.snapshot).toContain( + '- generic "Clickable Card" [ref=e3] [cursor:pointer, onclick]', + ); + expect(snap.refs.e3?.backendDOMNodeId).toBe(44); + }); + + it("expands one level of iframe snapshots with frame metadata", async () => { + const server = await startMockWsServer((msg, socket) => { + if ( + msg.method === "Accessibility.enable" || + msg.method === "Page.enable" || + msg.method === "Runtime.evaluate" + ) { + socket.send( + JSON.stringify({ + id: msg.id, + result: msg.method === "Runtime.evaluate" ? { result: { value: [] } } : {}, + }), + ); + return; + } + if (msg.method === "Accessibility.getFullAXTree") { + const frameId = msg.params?.frameId; + socket.send( + JSON.stringify({ + id: msg.id, + result: { + nodes: frameId + ? [ + { + nodeId: "c1", + role: { value: "RootWebArea" }, + name: { value: "" }, + childIds: ["c2"], + }, + { + nodeId: "c2", + role: { value: "button" }, + name: { value: "Inside" }, + backendDOMNodeId: 55, + childIds: [], + }, + ] + : [ + { + nodeId: "1", + role: { value: "RootWebArea" }, + name: { value: "" }, + childIds: ["2"], + }, + { + nodeId: "2", + role: { value: "Iframe" }, + name: { value: "Child" }, + backendDOMNodeId: 44, + childIds: [], + }, + ], + }, + }), + ); + return; + } + if (msg.method === "DOM.describeNode") { + socket.send( + JSON.stringify({ + id: msg.id, + result: { node: { contentDocument: { frameId: "FRAME_1" } } }, + }), + ); + } + }); + wss = server.wss; + + const snap = await snapshotRoleViaCdp({ + wsUrl: server.wsUrl, + options: { interactive: true }, + }); + + expect(snap.snapshot).toContain('- Iframe "Child" [ref=e1]'); + expect(snap.snapshot).toContain(' - button "Inside" [ref=e2]'); + expect(snap.refs.e1?.frameId).toBe("FRAME_1"); + expect(snap.refs.e2?.frameId).toBe("FRAME_1"); + }); + }); + describe("snapshotDom", () => { it("returns the nodes array from the evaluated expression", async () => { const server = await startMockWsServer((msg, socket) => { diff --git a/extensions/browser/src/browser/cdp.test.ts b/extensions/browser/src/browser/cdp.test.ts index d99dad6dd89..a68efdc27a6 100644 --- a/extensions/browser/src/browser/cdp.test.ts +++ b/extensions/browser/src/browser/cdp.test.ts @@ -49,6 +49,19 @@ describe("cdp", () => { params?: Record; }; onMessage(msg, socket); + if (msg.method === "Target.attachToTarget") { + socket.send(JSON.stringify({ id: msg.id, result: { sessionId: "S1" } })); + } else if ( + msg.method === "Target.detachFromTarget" || + msg.method === "Page.enable" || + msg.method === "Runtime.enable" || + msg.method === "Network.enable" || + msg.method === "DOM.enable" || + msg.method === "Accessibility.enable" || + msg.method === "Runtime.runIfWaitingForDebugger" + ) { + socket.send(JSON.stringify({ id: msg.id, result: {} })); + } }); }); return wsPort; @@ -87,7 +100,11 @@ describe("cdp", () => { }); it("creates a target via the browser websocket", async () => { + const methods: string[] = []; const wsPort = await startWsServerWithMessages((msg, socket) => { + if (msg.method) { + methods.push(msg.method); + } if (msg.method !== "Target.createTarget") { return; } @@ -109,6 +126,19 @@ describe("cdp", () => { }); expect(created.targetId).toBe("TARGET_123"); + expect(methods).toEqual( + expect.arrayContaining([ + "Target.createTarget", + "Target.attachToTarget", + "Page.enable", + "Runtime.enable", + "Network.enable", + "DOM.enable", + "Accessibility.enable", + "Runtime.runIfWaitingForDebugger", + "Target.detachFromTarget", + ]), + ); }); it("creates a target via direct WebSocket URL (skips /json/version)", async () => { @@ -447,6 +477,18 @@ describe("cdp", () => { }; if (msg.method === "Target.createTarget") { socket.send(JSON.stringify({ id: msg.id, result: { targetId: "ROOT_FALLBACK" } })); + } else if (msg.method === "Target.attachToTarget") { + socket.send(JSON.stringify({ id: msg.id, result: { sessionId: "S1" } })); + } else if ( + msg.method === "Target.detachFromTarget" || + msg.method === "Page.enable" || + msg.method === "Runtime.enable" || + msg.method === "Network.enable" || + msg.method === "DOM.enable" || + msg.method === "Accessibility.enable" || + msg.method === "Runtime.runIfWaitingForDebugger" + ) { + socket.send(JSON.stringify({ id: msg.id, result: {} })); } }); }); diff --git a/extensions/browser/src/browser/cdp.ts b/extensions/browser/src/browser/cdp.ts index 750fdd7a190..941b9591242 100644 --- a/extensions/browser/src/browser/cdp.ts +++ b/extensions/browser/src/browser/cdp.ts @@ -2,6 +2,7 @@ import type { SsrFPolicy } from "../infra/net/ssrf.js"; import { appendCdpPath, assertCdpEndpointAllowed, + type CdpSendFn, fetchJson, isDirectCdpWebSocketEndpoint, isLoopbackHost, @@ -10,6 +11,7 @@ import { withCdpSocket, } from "./cdp.helpers.js"; import { assertBrowserNavigationAllowed, withBrowserNavigationPolicy } from "./navigation-guard.js"; +import { CONTENT_ROLES, INTERACTIVE_ROLES, STRUCTURAL_ROLES } from "./snapshot-roles.js"; export { appendCdpPath, @@ -254,9 +256,13 @@ export async function createTargetViaCdp(opts: { if (!targetId) { throw new Error("CDP Target.createTarget returned no targetId"); } + await prepareCdpTargetSession(send, targetId); return { targetId }; }, - { handshakeTimeoutMs: opts.timeouts?.handshakeTimeoutMs }, + { + commandTimeoutMs: opts.timeouts?.httpTimeoutMs ?? 5000, + handshakeTimeoutMs: opts.timeouts?.handshakeTimeoutMs, + }, ); } catch (err) { lastError = err; @@ -268,6 +274,33 @@ export async function createTargetViaCdp(opts: { throw new Error("CDP Target.createTarget failed"); } +async function prepareCdpTargetSession(send: CdpSendFn, targetId: string): Promise { + const attached = (await send("Target.attachToTarget", { + targetId, + flatten: true, + }).catch(() => null)) as { sessionId?: unknown } | null; + const sessionId = typeof attached?.sessionId === "string" ? attached.sessionId : undefined; + if (!sessionId) { + return; + } + try { + await prepareCdpPageSession(send, sessionId); + } finally { + await send("Target.detachFromTarget", { sessionId }).catch(() => {}); + } +} + +async function prepareCdpPageSession(send: CdpSendFn, sessionId?: string): Promise { + await Promise.all([ + send("Page.enable", undefined, sessionId).catch(() => {}), + send("Runtime.enable", undefined, sessionId).catch(() => {}), + send("Network.enable", undefined, sessionId).catch(() => {}), + send("DOM.enable", undefined, sessionId).catch(() => {}), + send("Accessibility.enable", undefined, sessionId).catch(() => {}), + ]); + await send("Runtime.runIfWaitingForDebugger", undefined, sessionId).catch(() => {}); +} + export type CdpRemoteObject = { type: string; subtype?: string; @@ -423,16 +456,505 @@ export function formatAriaSnapshot(nodes: RawAXNode[], limit: number): AriaSnaps export async function snapshotAria(opts: { wsUrl: string; limit?: number; + timeoutMs?: number; }): Promise<{ nodes: AriaSnapshotNode[] }> { const limit = Math.max(1, Math.min(2000, Math.floor(opts.limit ?? 500))); - return await withCdpSocket(opts.wsUrl, async (send) => { - await send("Accessibility.enable").catch(() => {}); - const res = (await send("Accessibility.getFullAXTree")) as { - nodes?: RawAXNode[]; + return await withCdpSocket( + opts.wsUrl, + async (send) => { + await prepareCdpPageSession(send); + const res = (await send("Accessibility.getFullAXTree")) as { + nodes?: RawAXNode[]; + }; + const nodes = Array.isArray(res?.nodes) ? res.nodes : []; + return { nodes: formatAriaSnapshot(nodes, limit) }; + }, + { commandTimeoutMs: opts.timeoutMs ?? 5000 }, + ); +} + +export type CdpRoleRef = { + role: string; + name?: string; + nth?: number; + backendDOMNodeId?: number; + frameId?: string; +}; + +export type CdpRoleSnapshotOptions = { + interactive?: boolean; + compact?: boolean; + maxDepth?: number; +}; + +type CursorInteractiveInfo = { + text: string; + tagName: string; + hasOnClick?: boolean; + hasCursorPointer?: boolean; + hasTabIndex?: boolean; + isEditable?: boolean; + hiddenInputType?: string; +}; + +type RoleTreeNode = { + raw: RawAXNode; + role: string; + name: string; + value: string; + backendDOMNodeId?: number; + children: number[]; + parent?: number; + depth: number; + ref?: string; + nth?: number; + url?: string; + cursorInfo?: CursorInteractiveInfo; + frameId?: string; +}; + +function buildRoleTree(nodes: RawAXNode[]): { tree: RoleTreeNode[]; roots: number[] } { + const byId = new Map(); + const tree: RoleTreeNode[] = []; + for (const raw of nodes) { + const nodeId = raw.nodeId ?? ""; + if (!nodeId) { + continue; + } + byId.set(nodeId, tree.length); + tree.push({ + raw, + role: axValue(raw.role) || "unknown", + name: axValue(raw.name), + value: axValue(raw.value), + backendDOMNodeId: + typeof raw.backendDOMNodeId === "number" && raw.backendDOMNodeId > 0 + ? Math.floor(raw.backendDOMNodeId) + : undefined, + children: [], + depth: 0, + }); + } + + const childIndexes = new Set(); + for (let index = 0; index < tree.length; index += 1) { + for (const childId of tree[index]?.raw.childIds ?? []) { + const childIndex = byId.get(childId); + if (childIndex === undefined) { + continue; + } + tree[index]?.children.push(childIndex); + tree[childIndex].parent = index; + childIndexes.add(childIndex); + } + } + + const roots = tree.map((_node, index) => index).filter((index) => !childIndexes.has(index)); + const stack = roots.map((index) => ({ index, depth: 0 })); + while (stack.length) { + const current = stack.pop(); + if (!current) { + break; + } + tree[current.index].depth = current.depth; + for (const child of (tree[current.index]?.children ?? []).toReversed()) { + stack.push({ index: child, depth: current.depth + 1 }); + } + } + return { tree, roots: roots.length ? roots : tree.length ? [0] : [] }; +} + +function shouldIncludeRoleNode(node: RoleTreeNode, options: CdpRoleSnapshotOptions): boolean { + const role = node.role.toLowerCase(); + if (options.maxDepth !== undefined && node.depth > options.maxDepth) { + return false; + } + if (options.interactive) { + return INTERACTIVE_ROLES.has(role) || role === "iframe" || Boolean(node.cursorInfo); + } + if (options.compact && STRUCTURAL_ROLES.has(role) && !node.name && !node.ref) { + return false; + } + return true; +} + +function cursorSuffix(info?: CursorInteractiveInfo): string { + if (!info) { + return ""; + } + const parts = [ + info.hasCursorPointer ? "cursor:pointer" : undefined, + info.hasOnClick ? "onclick" : undefined, + info.hasTabIndex ? "tabindex" : undefined, + info.isEditable ? "contenteditable" : undefined, + info.hiddenInputType ? `hidden-${info.hiddenInputType}` : undefined, + ].filter(Boolean); + return parts.length ? ` [${parts.join(", ")}]` : ""; +} + +function renderRoleTree( + tree: RoleTreeNode[], + index: number, + output: string[], + options: CdpRoleSnapshotOptions, + indentOffset = 0, +): void { + const node = tree[index]; + if (!node) { + return; + } + if (shouldIncludeRoleNode(node, options)) { + const indent = " ".repeat(Math.max(0, node.depth + indentOffset)); + const name = node.name ? ` "${node.name.replaceAll('"', '\\"')}"` : ""; + const ref = node.ref ? ` [ref=${node.ref}]` : ""; + const nth = node.nth !== undefined && node.nth > 0 ? ` [nth=${node.nth}]` : ""; + const value = node.value ? ` value="${node.value.replaceAll('"', '\\"')}"` : ""; + const url = node.url ? ` [url=${node.url}]` : ""; + output.push( + `${indent}- ${node.role}${name}${ref}${nth}${value}${url}${cursorSuffix(node.cursorInfo)}`, + ); + } + for (const child of node.children) { + renderRoleTree(tree, child, output, options, indentOffset); + } +} + +async function findCursorInteractiveElements( + send: CdpSendFn, + sessionId?: string, +): Promise> { + const attr = "data-openclaw-cdp-ci"; + const evaluated = (await send( + "Runtime.evaluate", + { + expression: `(() => { + const out = []; + const roles = new Set(["button","link","textbox","checkbox","radio","combobox","listbox","menuitem","menuitemcheckbox","menuitemradio","option","searchbox","slider","spinbutton","switch","tab","treeitem"]); + const tags = new Set(["a","button","input","select","textarea","details","summary"]); + document.querySelectorAll("[${attr}]").forEach((el) => el.removeAttribute("${attr}")); + for (const el of Array.from(document.body ? document.body.querySelectorAll("*") : [])) { + if (!(el instanceof HTMLElement) || el.closest("[hidden],[aria-hidden='true']")) continue; + const tagName = el.tagName.toLowerCase(); + if (tags.has(tagName)) continue; + const role = String(el.getAttribute("role") || "").toLowerCase(); + if (roles.has(role)) continue; + const style = getComputedStyle(el); + const hasCursorPointer = style.cursor === "pointer"; + const hasOnClick = el.hasAttribute("onclick") || el.onclick !== null; + const tabIndex = el.getAttribute("tabindex"); + const hasTabIndex = tabIndex !== null && tabIndex !== "-1"; + const ce = el.getAttribute("contenteditable"); + const isEditable = ce === "" || ce === "true"; + if (!hasCursorPointer && !hasOnClick && !hasTabIndex && !isEditable) continue; + if (hasCursorPointer && !hasOnClick && !hasTabIndex && !isEditable) { + const parent = el.parentElement; + if (parent && getComputedStyle(parent).cursor === "pointer") continue; + } + const rect = el.getBoundingClientRect(); + if (rect.width <= 0 || rect.height <= 0) continue; + let hiddenInputType = ""; + const hiddenInput = el.querySelector("input[type='radio'],input[type='checkbox']"); + if (hiddenInput instanceof HTMLInputElement) { + const hiddenStyle = getComputedStyle(hiddenInput); + if (hiddenInput.hidden || hiddenStyle.display === "none" || hiddenStyle.visibility === "hidden") { + hiddenInputType = hiddenInput.type; + } + } + el.setAttribute("${attr}", String(out.length)); + out.push({ + text: String(el.textContent || "").replace(/\\s+/g, " ").trim().slice(0, 100), + tagName, + hasCursorPointer, + hasOnClick, + hasTabIndex, + isEditable, + hiddenInputType, + }); + } + return out; + })()`, + returnByValue: true, + awaitPromise: false, + }, + sessionId, + ).catch(() => null)) as { result?: { value?: unknown } } | null; + const entries = Array.isArray(evaluated?.result?.value) + ? (evaluated.result.value as CursorInteractiveInfo[]) + : []; + if (!entries.length) { + return new Map(); + } + + const doc = (await send("DOM.getDocument", { depth: 0 }, sessionId).catch(() => null)) as { + root?: { nodeId?: number }; + } | null; + const rootNodeId = doc?.root?.nodeId; + if (typeof rootNodeId !== "number") { + return new Map(); + } + const queried = (await send( + "DOM.querySelectorAll", + { nodeId: rootNodeId, selector: `[${attr}]` }, + sessionId, + ).catch(() => null)) as { nodeIds?: number[] } | null; + const out = new Map(); + await Promise.all( + (queried?.nodeIds ?? []).map(async (nodeId) => { + const described = (await send("DOM.describeNode", { nodeId }, sessionId).catch( + () => null, + )) as { node?: { backendNodeId?: number; attributes?: string[] } } | null; + const attrs = described?.node?.attributes ?? []; + const attrIndex = attrs.indexOf(attr); + const rawIndex = attrIndex >= 0 ? attrs[attrIndex + 1] : undefined; + const index = typeof rawIndex === "string" ? Number(rawIndex) : Number.NaN; + const backendNodeId = described?.node?.backendNodeId; + if (typeof backendNodeId === "number" && Number.isInteger(index) && entries[index]) { + out.set(backendNodeId, entries[index]); + } + }), + ); + await send( + "Runtime.evaluate", + { + expression: `document.querySelectorAll("[${attr}]").forEach((el) => el.removeAttribute("${attr}"))`, + returnByValue: true, + }, + sessionId, + ).catch(() => {}); + return out; +} + +async function resolveLinkUrls( + send: CdpSendFn, + refs: Record, + sessionId?: string, +): Promise> { + const out = new Map(); + await Promise.all( + Object.values(refs).map(async (ref) => { + if (ref.role !== "link" || !ref.backendDOMNodeId) { + return; + } + const resolved = (await send( + "DOM.resolveNode", + { backendNodeId: ref.backendDOMNodeId }, + sessionId, + ).catch(() => null)) as { object?: { objectId?: string } } | null; + const objectId = resolved?.object?.objectId; + if (!objectId) { + return; + } + const hrefResult = (await send( + "Runtime.callFunctionOn", + { + objectId, + functionDeclaration: "function() { return this.href || ''; }", + returnByValue: true, + }, + sessionId, + ).catch(() => null)) as { result?: { value?: unknown } } | null; + const href = typeof hrefResult?.result?.value === "string" ? hrefResult.result.value : ""; + if (href) { + out.set(ref.backendDOMNodeId, href); + } + }), + ); + return out; +} + +async function resolveIframeFrameIds( + send: CdpSendFn, + tree: RoleTreeNode[], + sessionId?: string, +): Promise> { + const out = new Map(); + await Promise.all( + tree.map(async (node) => { + if (node.role.toLowerCase() !== "iframe" || !node.backendDOMNodeId) { + return; + } + const described = (await send( + "DOM.describeNode", + { backendNodeId: node.backendDOMNodeId, depth: 1 }, + sessionId, + ).catch(() => null)) as { + node?: { frameId?: string; contentDocument?: { frameId?: string } }; + } | null; + const frameId = described?.node?.contentDocument?.frameId ?? described?.node?.frameId ?? ""; + if (frameId) { + out.set(node.backendDOMNodeId, frameId); + } + }), + ); + return out; +} + +async function buildCdpRoleSnapshot(params: { + send: CdpSendFn; + sessionId?: string; + frameId?: string; + options: CdpRoleSnapshotOptions; + urls?: boolean; + recurseIframes?: boolean; + nextRef: { value: number }; +}): Promise<{ + lines: string[]; + refs: Record; + stats: { refs: number; interactive: number }; +}> { + const res = (await params.send( + "Accessibility.getFullAXTree", + params.frameId ? { frameId: params.frameId } : undefined, + params.sessionId, + )) as { nodes?: RawAXNode[] }; + const { tree, roots } = buildRoleTree(Array.isArray(res.nodes) ? res.nodes : []); + const cursorElements = await findCursorInteractiveElements(params.send, params.sessionId); + for (const node of tree) { + if (node.backendDOMNodeId && cursorElements.has(node.backendDOMNodeId)) { + const cursorInfo = cursorElements.get(node.backendDOMNodeId); + node.cursorInfo = cursorInfo; + if (!node.name && cursorInfo?.text) { + node.name = cursorInfo.text; + } + } + } + + const counts = new Map(); + const refsByKey = new Map(); + const refs: Record = {}; + for (const node of tree) { + const role = node.role.toLowerCase(); + const shouldRef = + INTERACTIVE_ROLES.has(role) || + (CONTENT_ROLES.has(role) && Boolean(node.name)) || + role === "iframe" || + Boolean(node.cursorInfo); + if (!shouldRef) { + continue; + } + const key = `${role}:${node.name}`; + const nth = counts.get(key) ?? 0; + counts.set(key, nth + 1); + const ref = `e${params.nextRef.value}`; + params.nextRef.value += 1; + node.ref = ref; + node.nth = nth; + refsByKey.set(key, [...(refsByKey.get(key) ?? []), ref]); + refs[ref] = { + role, + ...(node.name ? { name: node.name } : {}), + ...(nth > 0 ? { nth } : {}), + ...(node.backendDOMNodeId ? { backendDOMNodeId: node.backendDOMNodeId } : {}), + ...(params.frameId ? { frameId: params.frameId } : {}), }; - const nodes = Array.isArray(res?.nodes) ? res.nodes : []; - return { nodes: formatAriaSnapshot(nodes, limit) }; - }); + } + for (const refList of refsByKey.values()) { + if (refList.length > 1) { + continue; + } + const ref = refList[0]; + if (ref) { + delete refs[ref]?.nth; + const node = tree.find((entry) => entry.ref === ref); + if (node) { + delete node.nth; + } + } + } + + const iframeFrameIds = await resolveIframeFrameIds(params.send, tree, params.sessionId); + for (const node of tree) { + if (node.backendDOMNodeId && iframeFrameIds.has(node.backendDOMNodeId)) { + node.frameId = iframeFrameIds.get(node.backendDOMNodeId); + if (node.ref && refs[node.ref]) { + refs[node.ref].frameId = node.frameId; + } + } + } + + if (params.urls) { + const urls = await resolveLinkUrls(params.send, refs, params.sessionId); + for (const node of tree) { + if (node.backendDOMNodeId && urls.has(node.backendDOMNodeId)) { + node.url = urls.get(node.backendDOMNodeId); + } + } + } + + const lines: string[] = []; + for (const root of roots) { + renderRoleTree(tree, root, lines, params.options); + } + + if (params.recurseIframes) { + const iframeNodes = tree.filter((node) => node.ref && node.frameId); + for (const iframe of iframeNodes) { + const marker = `[ref=${iframe.ref}]`; + const lineIndex = lines.findIndex((line) => line.includes(marker)); + if (lineIndex < 0 || !iframe.frameId) { + continue; + } + const child = await buildCdpRoleSnapshot({ + ...params, + frameId: iframe.frameId, + recurseIframes: false, + }).catch(() => null); + if (!child?.lines.length) { + continue; + } + Object.assign(refs, child.refs); + lines.splice(lineIndex + 1, 0, ...child.lines.map((line) => ` ${line}`)); + } + } + + const refValues = Object.values(refs); + return { + lines, + refs, + stats: { + refs: refValues.length, + interactive: refValues.filter((ref) => INTERACTIVE_ROLES.has(ref.role)).length, + }, + }; +} + +export async function snapshotRoleViaCdp(opts: { + wsUrl: string; + options?: CdpRoleSnapshotOptions; + urls?: boolean; + timeoutMs?: number; +}): Promise<{ + snapshot: string; + refs: Record; + stats: { lines: number; chars: number; refs: number; interactive: number }; +}> { + return await withCdpSocket( + opts.wsUrl, + async (send) => { + await prepareCdpPageSession(send); + const built = await buildCdpRoleSnapshot({ + send, + options: opts.options ?? {}, + urls: opts.urls, + recurseIframes: true, + nextRef: { value: 1 }, + }); + const snapshot = + built.lines.join("\n").trim() || + (opts.options?.interactive ? "(no interactive elements)" : "(empty page)"); + return { + snapshot, + refs: built.refs, + stats: { + lines: snapshot.split("\n").length, + chars: snapshot.length, + refs: built.stats.refs, + interactive: built.stats.interactive, + }, + }; + }, + { commandTimeoutMs: opts.timeoutMs ?? 5000 }, + ); } export async function snapshotDom(opts: { diff --git a/extensions/browser/src/browser/client.test.ts b/extensions/browser/src/browser/client.test.ts index fddd91be099..1d2fa5fd018 100644 --- a/extensions/browser/src/browser/client.test.ts +++ b/extensions/browser/src/browser/client.test.ts @@ -226,7 +226,7 @@ describe("browser client", () => { }), } as unknown as Response; } - if (url.endsWith("/doctor")) { + if (url.includes("/doctor")) { return { ok: true, json: async () => ({ @@ -270,6 +270,12 @@ describe("browser client", () => { ok: true, profile: "openclaw", }); + await expect( + browserDoctor("http://127.0.0.1:18791", { profile: "openclaw", deep: true }), + ).resolves.toMatchObject({ + ok: true, + profile: "openclaw", + }); await expect(browserTabs("http://127.0.0.1:18791")).resolves.toHaveLength(1); await expect( @@ -310,6 +316,7 @@ describe("browser client", () => { expect(calls.some((c) => c.url.endsWith("/tabs"))).toBe(true); expect(calls.some((c) => c.url.endsWith("/doctor"))).toBe(true); + expect(calls.some((c) => c.url.endsWith("/doctor?profile=openclaw&deep=true"))).toBe(true); const open = calls.find((c) => c.url.endsWith("/tabs/open")); expect(open?.init?.method).toBe("POST"); diff --git a/extensions/browser/src/browser/client.ts b/extensions/browser/src/browser/client.ts index 12859be7792..277b5ff90e2 100644 --- a/extensions/browser/src/browser/client.ts +++ b/extensions/browser/src/browser/client.ts @@ -82,11 +82,18 @@ export async function browserStatus( export async function browserDoctor( baseUrl?: string, - opts?: { profile?: string }, + opts?: { profile?: string; deep?: boolean }, ): Promise { - const q = buildProfileQuery(opts?.profile); + const params = new URLSearchParams(); + if (opts?.profile) { + params.set("profile", opts.profile); + } + if (opts?.deep) { + params.set("deep", "true"); + } + const q = params.size ? `?${params.toString()}` : ""; return await fetchBrowserJson(withBaseUrl(baseUrl, `/doctor${q}`), { - timeoutMs: 3000, + timeoutMs: opts?.deep ? 10000 : 3000, }); } diff --git a/extensions/browser/src/browser/routes/agent.snapshot.ts b/extensions/browser/src/browser/routes/agent.snapshot.ts index 424fb6c1a51..36d6d3039ee 100644 --- a/extensions/browser/src/browser/routes/agent.snapshot.ts +++ b/extensions/browser/src/browser/routes/agent.snapshot.ts @@ -1,7 +1,7 @@ import path from "node:path"; import { ensureMediaDir, saveMediaBuffer } from "../../media/store.js"; import { resolveBrowserNavigationProxyMode } from "../browser-proxy-mode.js"; -import { captureScreenshot, snapshotAria } from "../cdp.js"; +import { captureScreenshot, snapshotAria, snapshotRoleViaCdp } from "../cdp.js"; import { evaluateChromeMcpScript, navigateChromeMcpPage, @@ -627,10 +627,6 @@ export function registerBrowserAgentSnapshotRoutes( }); } if (plan.format === "ai") { - const pw = await requirePwAi(res, "ai snapshot"); - if (!pw) { - return; - } const roleSnapshotArgs = { cdpUrl: profileCtx.profile.cdpUrl, targetId: tab.targetId, @@ -646,18 +642,54 @@ export function registerBrowserAgentSnapshotRoutes( }, }; + const cdpRoleSnapshot = async () => { + if (!tab.wsUrl) { + return null; + } + if (plan.selectorValue || plan.frameSelectorValue) { + return null; + } + return await snapshotRoleViaCdp({ + wsUrl: tab.wsUrl, + urls: plan.urls, + options: { + interactive: plan.interactive ?? undefined, + compact: plan.compact ?? undefined, + maxDepth: plan.depth ?? undefined, + }, + }); + }; + + const pw = await getPwAiModule(); const snap = plan.wantsRoleSnapshot - ? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs) - : await pw.snapshotAiViaPlaywright({ - cdpUrl: profileCtx.profile.cdpUrl, - targetId: tab.targetId, - ssrfPolicy: ctx.state().resolved.ssrfPolicy, - urls: plan.urls, - ...(typeof plan.resolvedMaxChars === "number" - ? { maxChars: plan.resolvedMaxChars } - : {}), - }); + ? pw + ? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs).catch(async (err) => { + const fallback = await cdpRoleSnapshot(); + if (fallback) { + return fallback; + } + throw err; + }) + : await cdpRoleSnapshot() + : pw + ? await pw.snapshotAiViaPlaywright({ + cdpUrl: profileCtx.profile.cdpUrl, + targetId: tab.targetId, + ssrfPolicy: ctx.state().resolved.ssrfPolicy, + urls: plan.urls, + ...(typeof plan.resolvedMaxChars === "number" + ? { maxChars: plan.resolvedMaxChars } + : {}), + }) + : await cdpRoleSnapshot(); + if (!snap) { + await requirePwAi(res, "ai snapshot"); + return; + } if (plan.labels) { + if (!pw) { + return jsonError(res, 501, "Snapshot labels require Playwright."); + } const labeled = await pw.screenshotWithLabelsViaPlaywright({ cdpUrl: profileCtx.profile.cdpUrl, targetId: tab.targetId, diff --git a/extensions/browser/src/browser/routes/basic.ts b/extensions/browser/src/browser/routes/basic.ts index 8b852cfa0a6..b084b150982 100644 --- a/extensions/browser/src/browser/routes/basic.ts +++ b/extensions/browser/src/browser/routes/basic.ts @@ -1,3 +1,4 @@ +import { snapshotAria } from "../cdp.js"; import { getChromeMcpPid } from "../chrome-mcp.js"; import { resolveBrowserExecutableForPlatform } from "../chrome.executables.js"; import { resolveManagedBrowserHeadlessMode } from "../config.js"; @@ -129,6 +130,62 @@ async function buildBrowserStatus(req: BrowserRequest, ctx: BrowserRouteContext) }; } +async function runBrowserLiveProbe(req: BrowserRequest, ctx: BrowserRouteContext) { + const profileCtx = getProfileContext(req, ctx); + if ("error" in profileCtx) { + return { + id: "live-snapshot", + label: "Live snapshot", + status: "fail" as const, + summary: profileCtx.error, + }; + } + const capabilities = getBrowserProfileCapabilities(profileCtx.profile); + try { + const tab = await profileCtx.ensureTabAvailable(); + if (capabilities.usesChromeMcp) { + const { takeChromeMcpSnapshot } = await import("../chrome-mcp.js"); + await takeChromeMcpSnapshot({ + profileName: profileCtx.profile.name, + profile: profileCtx.profile, + targetId: tab.targetId, + }); + return { + id: "live-snapshot", + label: "Live snapshot", + status: "pass" as const, + summary: `Chrome MCP snapshot succeeded on ${tab.suggestedTargetId ?? tab.targetId}`, + }; + } + if (!tab.wsUrl) { + return { + id: "live-snapshot", + label: "Live snapshot", + status: "warn" as const, + summary: "No per-tab CDP WebSocket available for the lightweight live snapshot probe", + }; + } + const snap = await snapshotAria({ wsUrl: tab.wsUrl, limit: 25 }); + return { + id: "live-snapshot", + label: "Live snapshot", + status: snap.nodes.length > 0 ? ("pass" as const) : ("warn" as const), + summary: + snap.nodes.length > 0 + ? `CDP accessibility snapshot returned ${snap.nodes.length} nodes on ${tab.suggestedTargetId ?? tab.targetId}` + : `CDP accessibility snapshot returned no nodes on ${tab.suggestedTargetId ?? tab.targetId}`, + }; + } catch (err) { + return { + id: "live-snapshot", + label: "Live snapshot", + status: "fail" as const, + summary: String(err), + fixHint: "Run openclaw browser start, then retry with openclaw browser doctor --deep.", + }; + } +} + function hasQueryKey(query: BrowserRequest["query"], key: string): boolean { return Object.prototype.hasOwnProperty.call(query ?? {}, key); } @@ -201,7 +258,12 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow asyncBrowserRoute(async (req, res) => { try { const status = await buildBrowserStatus(req, ctx); - res.json(buildBrowserDoctorReport({ status })); + const report = buildBrowserDoctorReport({ status }); + if (toBoolean(req.query.deep) === true || toBoolean(req.query.live) === true) { + report.checks.push(await runBrowserLiveProbe(req, ctx)); + report.ok = report.checks.every((check) => check.status !== "fail"); + } + res.json(report); } catch (err) { const mapped = toBrowserErrorResponse(err); if (mapped) { diff --git a/extensions/browser/src/browser/server.agent-contract-core.test.ts b/extensions/browser/src/browser/server.agent-contract-core.test.ts index 638aff778b0..bfe0eb7aac4 100644 --- a/extensions/browser/src/browser/server.agent-contract-core.test.ts +++ b/extensions/browser/src/browser/server.agent-contract-core.test.ts @@ -291,6 +291,47 @@ describe("browser control server", () => { dangerouslyAllowPrivateNetwork: true, }, }); + + pwMocks.snapshotRoleViaPlaywright.mockRejectedValueOnce(new Error("playwright stale page")); + const fallback = (await realFetch(`${base}/snapshot?format=ai&interactive=true`).then((r) => + r.json(), + )) as { ok: boolean; format?: string; snapshot?: string }; + expect(fallback.ok).toBe(true); + expect(fallback.format).toBe("ai"); + expect(fallback.snapshot).toContain("Fallback"); + expect(cdpMocks.snapshotRoleViaCdp).toHaveBeenCalledWith({ + wsUrl: "ws://127.0.0.1/devtools/page/abcd1234", + urls: undefined, + options: { + interactive: true, + compact: undefined, + maxDepth: undefined, + }, + }); + }); + + it("agent contract: doctor deep runs a live snapshot probe", async () => { + const base = await startServerAndBase(); + const realFetch = getBrowserTestFetch(); + + const report = (await realFetch(`${base}/doctor?deep=true`).then((r) => r.json())) as { + ok: boolean; + checks?: Array<{ id?: string; status?: string; summary?: string }>; + }; + + expect(report.ok).toBe(true); + expect(report.checks).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: "live-snapshot", + status: "pass", + }), + ]), + ); + expect(cdpMocks.snapshotAria).toHaveBeenCalledWith({ + wsUrl: "ws://127.0.0.1/devtools/page/abcd1234", + limit: 25, + }); }); it("agent contract: navigation + common act commands", async () => { diff --git a/extensions/browser/src/browser/server.control-server.test-harness.ts b/extensions/browser/src/browser/server.control-server.test-harness.ts index 4a699ad52b7..121c1b32a45 100644 --- a/extensions/browser/src/browser/server.control-server.test-harness.ts +++ b/extensions/browser/src/browser/server.control-server.test-harness.ts @@ -90,10 +90,23 @@ const cdpMocks = vi.hoisted(() => ({ snapshotAria: vi.fn(async () => ({ nodes: [{ ref: "1", role: "link", name: "x", depth: 0 }], })), + snapshotRoleViaCdp: vi.fn(async () => ({ + snapshot: '- button "Fallback" [ref=e1]', + refs: { e1: { role: "button", name: "Fallback" } }, + stats: { lines: 1, chars: 29, refs: 1, interactive: 1 }, + })), })); -export function getCdpMocks(): { createTargetViaCdp: MockFn; snapshotAria: MockFn } { - return cdpMocks as unknown as { createTargetViaCdp: MockFn; snapshotAria: MockFn }; +export function getCdpMocks(): { + createTargetViaCdp: MockFn; + snapshotAria: MockFn; + snapshotRoleViaCdp: MockFn; +} { + return cdpMocks as unknown as { + createTargetViaCdp: MockFn; + snapshotAria: MockFn; + snapshotRoleViaCdp: MockFn; + }; } type ExecuteActMockAction = { kind: string } & Record; @@ -175,6 +188,11 @@ const pwMocks = vi.hoisted(() => ({ selectOptionViaPlaywright: vi.fn(async (_opts?: unknown) => {}), setInputFilesViaPlaywright: vi.fn(async () => {}), snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })), + snapshotRoleViaPlaywright: vi.fn(async () => ({ + snapshot: '- button "Role" [ref=e1]', + refs: { e1: { role: "button", name: "Role" } }, + stats: { lines: 1, chars: 24, refs: 1, interactive: 1 }, + })), storeAriaSnapshotRefsViaPlaywright: vi.fn(async () => {}), traceStopViaPlaywright: vi.fn(async () => {}), takeScreenshotViaPlaywright: vi.fn(async () => ({ @@ -445,6 +463,7 @@ vi.mock("./cdp.js", () => ({ createTargetViaCdp: cdpMocks.createTargetViaCdp, normalizeCdpWsUrl: vi.fn((wsUrl: string) => wsUrl), snapshotAria: cdpMocks.snapshotAria, + snapshotRoleViaCdp: cdpMocks.snapshotRoleViaCdp, getHeadersWithAuth: vi.fn(() => ({})), appendCdpPath: vi.fn((cdpUrl: string, cdpPath: string) => { const base = cdpUrl.replace(/\/$/, ""); diff --git a/extensions/browser/src/cli/browser-cli-manage.ts b/extensions/browser/src/cli/browser-cli-manage.ts index 67ae940bba0..3284a658830 100644 --- a/extensions/browser/src/cli/browser-cli-manage.ts +++ b/extensions/browser/src/cli/browser-cli-manage.ts @@ -135,7 +135,7 @@ function formatDoctorLine(check: BrowserDoctorCheck): string { return `${check.ok ? "OK" : "FAIL"} ${check.name}${check.detail ? `: ${check.detail}` : ""}`; } -async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string) { +async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string, deep?: boolean) { const checks: BrowserDoctorCheck[] = []; let status: BrowserStatus | null = null; @@ -218,6 +218,42 @@ async function runBrowserDoctor(parent: BrowserParentOpts, profile?: string) { } } + if (deep && status.running) { + try { + const result = await callBrowserRequest< + | { ok: true; format: "aria"; nodes?: unknown[] } + | { ok: true; format: "ai"; snapshot?: string } + >( + parent, + { + method: "GET", + path: "/snapshot", + query: resolveProfileQuery(profile, { format: "aria", limit: 25 }), + }, + { timeoutMs: 10_000 }, + ); + const count = + result.format === "aria" + ? Array.isArray(result.nodes) + ? result.nodes.length + : 0 + : typeof result.snapshot === "string" + ? result.snapshot.split("\n").length + : 0; + checks.push({ + name: "live-snapshot", + ok: count > 0, + detail: count > 0 ? `${count} nodes/lines` : "snapshot returned no content", + }); + } catch (err) { + checks.push({ + name: "live-snapshot", + ok: false, + detail: String(err), + }); + } + } + return { ok: checks.every((check) => check.ok), checks, status }; } @@ -296,11 +332,12 @@ export function registerBrowserManageCommands( browser .command("doctor") .description("Check browser plugin readiness") - .action(async (_opts, cmd) => { + .option("--deep", "Run a live snapshot probe") + .action(async (opts: { deep?: boolean }, cmd) => { const parent = parentOpts(cmd); const profile = parent?.browserProfile; await runBrowserCommand(async () => { - const result = await runBrowserDoctor(parent, profile); + const result = await runBrowserDoctor(parent, profile, opts.deep === true); if (printJsonResult(parent, result)) { return; }