fix(browser): resolve aria snapshot refs via DOM markers

Co-authored-by: MrKipler <mrkipler@kiphausen.com>
This commit is contained in:
Peter Steinberger
2026-04-25 09:42:11 +01:00
parent 207f0341e0
commit e10f20032a
12 changed files with 434 additions and 15 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Browser/Playwright: ignore benign already-handled route races during guarded navigation so browser-page tasks no longer fail when Playwright tears down a route mid-flight. (#68708) Thanks @Steady-ai.
- Browser/aria snapshots: bind `format=aria` `axN` refs to live DOM nodes through backend DOM ids when Playwright is available, so follow-up browser actions can use those refs without timing out. (#62434) Thanks @MrKipler.
- Telegram: prevent duplicate in-process long pollers for the same bot token and add clearer `getUpdates` conflict diagnostics for external duplicate pollers. Fixes #56230.
- Browser/Linux: detect Chromium-based installs under `/opt/google`, `/opt/brave.com`, `/usr/lib/chromium`, and `/usr/lib/chromium-browser` before asking users to set `browser.executablePath`. (#48563) Thanks @lupuletic.
- Sessions/browser: close tracked browser tabs when idle, daily, `/new`, or `/reset` session rollover archives the previous transcript, preventing tabs from leaking past the old session. Thanks @jakozloski.

View File

@@ -53,6 +53,7 @@ export {
snapshotAiViaPlaywright,
snapshotAriaViaPlaywright,
snapshotRoleViaPlaywright,
storeAriaSnapshotRefsViaPlaywright,
screenshotWithLabelsViaPlaywright,
storageClearViaPlaywright,
storageGetViaPlaywright,

View File

@@ -1,5 +1,9 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
import {
BROWSER_REF_MARKER_ATTRIBUTE,
markBackendDomRefsOnPage,
withPageScopedCdpClient,
} from "./pw-session.page-cdp.js";
describe("pw-session page-scoped CDP client", () => {
beforeEach(() => {
@@ -32,4 +36,107 @@ describe("pw-session page-scoped CDP client", () => {
expect(sessionSend).toHaveBeenCalledWith("Emulation.setLocaleOverride", { locale: "en-US" });
expect(sessionDetach).toHaveBeenCalledTimes(1);
});
it("marks backend DOM refs on the page", async () => {
const sessionSend = vi.fn(async (method: string, params?: Record<string, unknown>) => {
if (method === "DOM.pushNodesByBackendIdsToFrontend") {
expect(params).toEqual({ backendNodeIds: [42, 84] });
return { nodeIds: [101, 202] };
}
return {};
});
const sessionDetach = vi.fn(async () => {});
const newCDPSession = vi.fn(async () => ({
send: sessionSend,
detach: sessionDetach,
}));
const evaluateAll = vi.fn(async () => {});
const page = {
context: () => ({
newCDPSession,
}),
locator: vi.fn(() => ({ evaluateAll })),
};
const marked = await markBackendDomRefsOnPage({
page: page as never,
refs: [
{ ref: "ax1", backendDOMNodeId: 42 },
{ ref: "ax2", backendDOMNodeId: 84 },
],
});
expect(page.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`);
expect(evaluateAll).toHaveBeenCalledTimes(1);
expect(sessionSend).toHaveBeenNthCalledWith(1, "DOM.enable", undefined);
expect(sessionSend).toHaveBeenNthCalledWith(2, "DOM.pushNodesByBackendIdsToFrontend", {
backendNodeIds: [42, 84],
});
expect(sessionSend).toHaveBeenNthCalledWith(3, "DOM.setAttributeValue", {
nodeId: 101,
name: BROWSER_REF_MARKER_ATTRIBUTE,
value: "ax1",
});
expect(sessionSend).toHaveBeenNthCalledWith(4, "DOM.setAttributeValue", {
nodeId: 202,
name: BROWSER_REF_MARKER_ATTRIBUTE,
value: "ax2",
});
expect(marked).toEqual(new Set(["ax1", "ax2"]));
expect(sessionDetach).toHaveBeenCalledTimes(1);
});
it("clears stale markers even when no backend refs are valid", async () => {
const newCDPSession = vi.fn();
const evaluateAll = vi.fn(async () => {});
const page = {
context: () => ({
newCDPSession,
}),
locator: vi.fn(() => ({ evaluateAll })),
};
const marked = await markBackendDomRefsOnPage({
page: page as never,
refs: [{ ref: "e1", backendDOMNodeId: 0 }],
});
expect(page.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`);
expect(evaluateAll).toHaveBeenCalledTimes(1);
expect(newCDPSession).not.toHaveBeenCalled();
expect(marked).toEqual(new Set());
});
it("keeps unmarked refs out of the marked set when marker writes fail", async () => {
const sessionSend = vi.fn(async (method: string) => {
if (method === "DOM.pushNodesByBackendIdsToFrontend") {
return { nodeIds: [101, 202] };
}
if (method === "DOM.setAttributeValue") {
throw new Error("detached");
}
return {};
});
const sessionDetach = vi.fn(async () => {});
const page = {
context: () => ({
newCDPSession: vi.fn(async () => ({
send: sessionSend,
detach: sessionDetach,
})),
}),
locator: vi.fn(() => ({ evaluateAll: vi.fn(async () => {}) })),
};
const marked = await markBackendDomRefsOnPage({
page: page as never,
refs: [
{ ref: "ax1", backendDOMNodeId: 42 },
{ ref: "ax2", backendDOMNodeId: 84 },
],
});
expect(marked).toEqual(new Set());
expect(sessionDetach).toHaveBeenCalledTimes(1);
});
});

View File

@@ -1,6 +1,9 @@
import type { CDPSession, Page } from "playwright-core";
type PageCdpSend = (method: string, params?: Record<string, unknown>) => Promise<unknown>;
type MarkBackendDomRef = { ref: string; backendDOMNodeId: number };
export const BROWSER_REF_MARKER_ATTRIBUTE = "data-openclaw-browser-ref";
async function withPlaywrightPageCdpSession<T>(
page: Page,
@@ -31,3 +34,75 @@ export async function withPageScopedCdpClient<T>(opts: {
);
});
}
export async function markBackendDomRefsOnPage(opts: {
page: Page;
refs: MarkBackendDomRef[];
}): Promise<Set<string>> {
await opts.page
.locator(`[${BROWSER_REF_MARKER_ATTRIBUTE}]`)
.evaluateAll((elements, attr) => {
for (const element of elements) {
if (element instanceof Element) {
element.removeAttribute(attr);
}
}
}, BROWSER_REF_MARKER_ATTRIBUTE)
.catch(() => {});
const refs = opts.refs.filter(
(entry) =>
/^ax\d+$/.test(entry.ref) &&
Number.isFinite(entry.backendDOMNodeId) &&
Math.floor(entry.backendDOMNodeId) > 0,
);
const marked = new Set<string>();
if (!refs.length) {
return marked;
}
return await withPlaywrightPageCdpSession(opts.page, async (session) => {
const send = async (method: string, params?: Record<string, unknown>) =>
await (
session.send as unknown as (
method: string,
params?: Record<string, unknown>,
) => Promise<unknown>
)(method, params);
await send("DOM.enable").catch(() => {});
const backendNodeIds = [...new Set(refs.map((entry) => Math.floor(entry.backendDOMNodeId)))];
const pushed = (await send("DOM.pushNodesByBackendIdsToFrontend", {
backendNodeIds,
}).catch(() => ({}))) as { nodeIds?: number[] };
const nodeIds = Array.isArray(pushed.nodeIds) ? pushed.nodeIds : [];
const nodeIdByBackendId = new Map<number, number>();
for (let index = 0; index < backendNodeIds.length; index += 1) {
const backendNodeId = backendNodeIds[index];
const nodeId = nodeIds[index];
if (backendNodeId && typeof nodeId === "number" && nodeId > 0) {
nodeIdByBackendId.set(backendNodeId, nodeId);
}
}
for (const entry of refs) {
const nodeId = nodeIdByBackendId.get(Math.floor(entry.backendDOMNodeId));
if (!nodeId) {
continue;
}
try {
await send("DOM.setAttributeValue", {
nodeId,
name: BROWSER_REF_MARKER_ATTRIBUTE,
value: entry.ref,
});
marked.add(entry.ref);
} catch {
// Best-effort marker write. Unmarked refs fall back to role metadata.
}
}
return marked;
});
}

View File

@@ -6,6 +6,7 @@ import {
rememberRoleRefsForTarget,
restoreRoleRefsForTarget,
} from "./pw-session.js";
import { BROWSER_REF_MARKER_ATTRIBUTE } from "./pw-session.page-cdp.js";
function fakePage(): {
page: Page;
@@ -27,6 +28,7 @@ function fakePage(): {
const getByRole = vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) }));
const frameLocator = vi.fn(() => ({
getByRole: vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) })),
locator: vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) })),
}));
const locator = vi.fn(() => ({ nth: vi.fn(() => ({ ok: true })) }));
@@ -72,10 +74,30 @@ describe("pw-session refLocator", () => {
expect(mocks.locator).toHaveBeenCalledWith("aria-ref=e1");
});
it("rejects axN refs from format=aria snapshots instead of timing out", () => {
it("uses backend-marked DOM locators for ax refs", () => {
const { page, mocks } = fakePage();
const state = ensurePageState(page);
state.roleRefs = { ax12: { role: "button", name: "OK", domMarker: true } };
refLocator(page, "ax12");
expect(mocks.locator).toHaveBeenCalledWith(`[${BROWSER_REF_MARKER_ATTRIBUTE}="ax12"]`);
});
it("falls back to role heuristics for ax refs without backend markers", () => {
const { page, mocks } = fakePage();
const state = ensurePageState(page);
state.roleRefs = { ax12: { role: "button", name: "OK" } };
refLocator(page, "ax12");
expect(mocks.getByRole).toHaveBeenCalledWith("button", { name: "OK", exact: true });
});
it("rejects unknown ax refs instead of timing out on aria-ref locators", () => {
const { page, mocks } = fakePage();
expect(() => refLocator(page, "ax12")).toThrow(/format=aria snapshot/);
expect(() => refLocator(page, "ax12")).toThrow(/Unknown ref/);
expect(mocks.locator).not.toHaveBeenCalled();
});
});

View File

@@ -31,7 +31,7 @@ import {
InvalidBrowserNavigationUrlError,
withBrowserNavigationPolicy,
} from "./navigation-guard.js";
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
import { BROWSER_REF_MARKER_ATTRIBUTE, withPageScopedCdpClient } from "./pw-session.page-cdp.js";
export type BrowserConsoleMessage = {
type: string;
@@ -84,7 +84,7 @@ type PageState = {
* Mode "role" refs are generated from ariaSnapshot and resolved via getByRole.
* Mode "aria" refs are Playwright aria-ref ids and resolved via `aria-ref=...`.
*/
roleRefs?: Record<string, { role: string; name?: string; nth?: number }>;
roleRefs?: Record<string, { role: string; name?: string; nth?: number; domMarker?: boolean }>;
roleRefsMode?: "role" | "aria";
roleRefsFrameSelector?: string;
};
@@ -935,10 +935,29 @@ export function refLocator(page: Page, ref: string) {
}
if (AX_REF_PATTERN.test(normalized)) {
throw new Error(
`Ref "${normalized}" comes from a format=aria snapshot and cannot be used with act. ` +
`Re-snapshot with format=ai and use the eN refs from that snapshot.`,
);
const state = pageStates.get(page);
const info = state?.roleRefs?.[normalized];
if (!info) {
throw new Error(
`Unknown ref "${normalized}". Run a new snapshot and use a ref from that snapshot.`,
);
}
const scope = state.roleRefsFrameSelector
? page.frameLocator(state.roleRefsFrameSelector)
: page;
if (info.domMarker) {
return scope.locator(`[${BROWSER_REF_MARKER_ATTRIBUTE}="${normalized}"]`);
}
const locAny = scope as unknown as {
getByRole: (
role: never,
opts?: { name?: string; exact?: boolean },
) => ReturnType<Page["getByRole"]>;
};
const locator = info.name
? locAny.getByRole(info.role as never, { name: info.name, exact: true })
: locAny.getByRole(info.role as never);
return info.nth !== undefined ? locator.nth(info.nth) : locator;
}
return page.locator(`aria-ref=${normalized}`);

View File

@@ -27,6 +27,7 @@ const sessionMocks = vi.hoisted(() => ({
}));
const pageCdpMocks = vi.hoisted(() => ({
markBackendDomRefsOnPage: vi.fn(async () => new Set<string>()),
withPageScopedCdpClient: vi.fn(
async ({ fn }: { fn: (send: () => Promise<unknown>) => unknown }) =>
await fn(async () => ({ nodes: [] })),

View File

@@ -0,0 +1,101 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const getPageForTargetId = vi.fn();
const ensurePageState = vi.fn();
const storeRoleRefsForTarget = vi.fn();
const withPageScopedCdpClient = vi.fn();
const markBackendDomRefsOnPage = vi.fn();
const formatAriaSnapshot = vi.fn();
vi.mock("./pw-session.js", () => ({
assertPageNavigationCompletedSafely: vi.fn(),
ensurePageState,
forceDisconnectPlaywrightForTarget: vi.fn(),
getPageForTargetId,
gotoPageWithNavigationGuard: vi.fn(),
storeRoleRefsForTarget,
}));
vi.mock("./pw-session.page-cdp.js", () => ({
markBackendDomRefsOnPage,
withPageScopedCdpClient,
}));
vi.mock("./cdp.js", () => ({
formatAriaSnapshot,
}));
describe("pw-tools-core aria snapshot storage", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("reuses the resolved page when storing aria refs", async () => {
const page = { id: "page-1" };
const rawNodes = [{ backendDOMNodeId: 42 }];
const formattedNodes = [{ ref: "ax1", role: "button", name: "OK", backendDOMNodeId: 42 }];
getPageForTargetId.mockResolvedValue(page);
withPageScopedCdpClient.mockResolvedValue({ nodes: rawNodes });
formatAriaSnapshot.mockReturnValue(formattedNodes);
markBackendDomRefsOnPage.mockResolvedValue(new Set(["ax1"]));
const mod = await import("./pw-tools-core.snapshot.js");
const result = await mod.snapshotAriaViaPlaywright({
cdpUrl: "http://127.0.0.1:9222",
targetId: "tab-1",
limit: 5,
});
expect(result).toEqual({ nodes: formattedNodes });
expect(getPageForTargetId).toHaveBeenCalledTimes(1);
expect(ensurePageState).toHaveBeenCalledWith(page);
expect(withPageScopedCdpClient).toHaveBeenCalledWith({
cdpUrl: "http://127.0.0.1:9222",
page,
targetId: "tab-1",
fn: expect.any(Function),
});
expect(markBackendDomRefsOnPage).toHaveBeenCalledWith({
page,
refs: [{ ref: "ax1", backendDOMNodeId: 42 }],
});
expect(storeRoleRefsForTarget).toHaveBeenCalledWith({
page,
cdpUrl: "http://127.0.0.1:9222",
targetId: "tab-1",
refs: {
ax1: { role: "button", name: "OK", domMarker: true },
},
mode: "role",
});
});
it("stores role fallback metadata when backend markers are unavailable", async () => {
const page = { id: "page-1" };
const mod = await import("./pw-tools-core.snapshot.js");
getPageForTargetId.mockResolvedValue(page);
markBackendDomRefsOnPage.mockResolvedValue(new Set());
await mod.storeAriaSnapshotRefsViaPlaywright({
cdpUrl: "http://127.0.0.1:9222",
targetId: "tab-1",
nodes: [
{ ref: "ax1", role: "Button", name: "OK", backendDOMNodeId: 42, depth: 0 },
{ ref: "ax2", role: "Button", name: "OK", backendDOMNodeId: 84, depth: 0 },
],
});
expect(storeRoleRefsForTarget).toHaveBeenCalledWith({
page,
cdpUrl: "http://127.0.0.1:9222",
targetId: "tab-1",
refs: {
ax1: { role: "button", name: "OK" },
ax2: { role: "button", name: "OK", nth: 1 },
},
mode: "role",
});
});
});

View File

@@ -1,4 +1,7 @@
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalString,
} from "openclaw/plugin-sdk/text-runtime";
import type { Page } from "playwright-core";
import type { SsrFPolicy } from "../infra/net/ssrf.js";
import { type AriaSnapshotNode, formatAriaSnapshot, type RawAXNode } from "./cdp.js";
@@ -22,7 +25,7 @@ import {
gotoPageWithNavigationGuard,
storeRoleRefsForTarget,
} from "./pw-session.js";
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
import { markBackendDomRefsOnPage, withPageScopedCdpClient } from "./pw-session.page-cdp.js";
type SnapshotUrlEntry = {
text: string;
@@ -64,6 +67,73 @@ function appendSnapshotUrls(snapshot: string, urls: SnapshotUrlEntry[]): string
return `${snapshot}\n\nLinks:\n${lines.join("\n")}`;
}
function buildStoredAriaRefs(
nodes: AriaSnapshotNode[],
markedRefs: Set<string>,
): Record<string, { role: string; name?: string; nth?: number; domMarker?: boolean }> {
const refs: Record<string, { role: string; name?: string; nth?: number; domMarker?: boolean }> =
{};
const counts = new Map<string, number>();
const refsByKey = new Map<string, string[]>();
for (const node of nodes) {
const role = normalizeLowercaseStringOrEmpty(node.role) || "unknown";
const name = node.name.trim() || undefined;
const key = `${role}:${name ?? ""}`;
const nth = counts.get(key) ?? 0;
counts.set(key, nth + 1);
refsByKey.set(key, [...(refsByKey.get(key) ?? []), node.ref]);
refs[node.ref] = {
role,
...(name ? { name } : {}),
...(nth > 0 ? { nth } : {}),
...(markedRefs.has(node.ref) ? { domMarker: true } : {}),
};
}
for (const refsForKey of refsByKey.values()) {
if (refsForKey.length > 1) {
continue;
}
const ref = refsForKey[0];
if (ref) {
delete refs[ref]?.nth;
}
}
return refs;
}
export async function storeAriaSnapshotRefsViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;
nodes: AriaSnapshotNode[];
page?: Page;
}): Promise<void> {
const page =
opts.page ??
(await getPageForTargetId({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
}));
ensurePageState(page);
const markedRefs = await markBackendDomRefsOnPage({
page,
refs: opts.nodes.flatMap((node) =>
typeof node.backendDOMNodeId === "number"
? [{ ref: node.ref, backendDOMNodeId: node.backendDOMNodeId }]
: [],
),
});
storeRoleRefsForTarget({
page,
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
refs: buildStoredAriaRefs(opts.nodes, markedRefs),
mode: "role",
});
}
export async function snapshotAriaViaPlaywright(opts: {
cdpUrl: string;
targetId?: string;
@@ -99,7 +169,14 @@ export async function snapshotAriaViaPlaywright(opts: {
nodes?: RawAXNode[];
};
const nodes = Array.isArray(res?.nodes) ? res.nodes : [];
return { nodes: formatAriaSnapshot(nodes, limit) };
const formatted = formatAriaSnapshot(nodes, limit);
await storeAriaSnapshotRefsViaPlaywright({
cdpUrl: opts.cdpUrl,
targetId: opts.targetId,
nodes: formatted,
page,
});
return { nodes: formatted };
}
export async function snapshotAiViaPlaywright(opts: {

View File

@@ -505,7 +505,8 @@ export function registerBrowserAgentSnapshotRoutes(
return;
}
const targetId = typeof req.query.targetId === "string" ? req.query.targetId.trim() : "";
const hasPlaywright = Boolean(await getPwAiModule());
const pwModule = await getPwAiModule();
const hasPlaywright = Boolean(pwModule);
const plan = resolveSnapshotPlan({
profile: profileCtx.profile,
query: req.query,
@@ -691,10 +692,11 @@ export function registerBrowserAgentSnapshotRoutes(
});
}
const snap = shouldUsePlaywrightForAriaSnapshot({
const usePlaywrightAriaSnapshot = shouldUsePlaywrightForAriaSnapshot({
profile: profileCtx.profile,
wsUrl: tab.wsUrl,
})
});
const snap = usePlaywrightAriaSnapshot
? (() => {
// Extension relay doesn't expose per-page WS URLs; run AX snapshot via Playwright CDP session.
// Also covers cases where wsUrl is missing/unusable.
@@ -716,6 +718,13 @@ export function registerBrowserAgentSnapshotRoutes(
if (!resolved) {
return;
}
if (!usePlaywrightAriaSnapshot) {
await pwModule?.storeAriaSnapshotRefsViaPlaywright?.({
cdpUrl: profileCtx.profile.cdpUrl,
targetId: tab.targetId,
nodes: resolved.nodes,
});
}
return res.json({
ok: true,
format: plan.format,

View File

@@ -219,6 +219,11 @@ describe("browser control server", () => {
wsUrl: "ws://127.0.0.1/devtools/page/abcd1234",
limit: 1,
});
expect(pwMocks.storeAriaSnapshotRefsViaPlaywright).toHaveBeenCalledWith({
cdpUrl: state.cdpBaseUrl,
targetId: "abcd1234",
nodes: [{ ref: "1", role: "link", name: "x", depth: 0 }],
});
const snapAi = (await realFetch(`${base}/snapshot?format=ai`).then((r) => r.json())) as {
ok: boolean;

View File

@@ -175,6 +175,7 @@ const pwMocks = vi.hoisted(() => ({
selectOptionViaPlaywright: vi.fn(async (_opts?: unknown) => {}),
setInputFilesViaPlaywright: vi.fn(async () => {}),
snapshotAiViaPlaywright: vi.fn(async () => ({ snapshot: "ok" })),
storeAriaSnapshotRefsViaPlaywright: vi.fn(async () => {}),
traceStopViaPlaywright: vi.fn(async () => {}),
takeScreenshotViaPlaywright: vi.fn(async () => ({
buffer: Buffer.from("png"),