fix(browser): use current aria snapshot refs

This commit is contained in:
Peter Steinberger
2026-04-25 00:03:37 +01:00
parent 272a72b716
commit 50e484b22e
5 changed files with 123 additions and 45 deletions

View File

@@ -155,6 +155,20 @@ function canRetryChromeActWithoutTargetId(request: Parameters<typeof browserAct>
return kind === "hover" || kind === "scrollIntoView" || kind === "wait";
}
function isAriaRefsUnsupportedError(err: unknown): boolean {
const msg = String(err).toLowerCase();
return msg.includes("refs=aria") && msg.includes("_snapshotforai");
}
function withRoleRefsFallback<T extends { refs?: "aria" | "role" }>(
snapshotQuery: T,
): T & { refs: "role" } {
return {
...snapshotQuery,
refs: "role",
};
}
export async function executeTabsAction(params: {
baseUrl?: string;
profile?: string;
@@ -233,17 +247,29 @@ export async function executeSnapshotAction(params: {
labels,
mode,
};
const snapshot = proxyRequest
? ((await proxyRequest({
method: "GET",
path: "/snapshot",
profile,
query: snapshotQuery,
})) as Awaited<ReturnType<typeof browserSnapshot>>)
: await browserToolActionDeps.browserSnapshot(baseUrl, {
...snapshotQuery,
profile,
});
let refsFallback: "role" | undefined;
const readSnapshot = async (query: typeof snapshotQuery) =>
proxyRequest
? ((await proxyRequest({
method: "GET",
path: "/snapshot",
profile,
query,
})) as Awaited<ReturnType<typeof browserSnapshot>>)
: await browserToolActionDeps.browserSnapshot(baseUrl, {
...query,
profile,
});
let snapshot: Awaited<ReturnType<typeof browserSnapshot>>;
try {
snapshot = await readSnapshot(snapshotQuery);
} catch (err) {
if (refs !== "aria" || !isAriaRefsUnsupportedError(err)) {
throw err;
}
refsFallback = "role";
snapshot = await readSnapshot(withRoleRefsFallback(snapshotQuery));
}
if (snapshot.format === "ai") {
const extractedText = snapshot.snapshot ?? "";
const wrappedSnapshot = wrapExternalContent(extractedText, {
@@ -263,6 +289,7 @@ export async function executeSnapshotAction(params: {
labelsSkipped: snapshot.labelsSkipped,
imagePath: snapshot.imagePath,
imageType: snapshot.imageType,
refsFallback,
externalContent: {
untrusted: true,
source: "browser",

View File

@@ -105,10 +105,12 @@ vi.mock("../../../src/agents/tools/nodes-utils.js", async () => {
});
const gatewayMocks = vi.hoisted(() => ({
callGatewayTool: vi.fn(async () => ({
ok: true,
payload: { result: { ok: true, running: true } },
})),
callGatewayTool: vi.fn(
async (): Promise<Record<string, unknown>> => ({
ok: true,
payload: { result: { ok: true, running: true } },
}),
),
}));
vi.mock("../../../src/agents/tools/gateway.js", () => gatewayMocks);
@@ -507,6 +509,62 @@ describe("browser tool snapshot maxChars", () => {
expect(browserClientMocks.browserStatus).not.toHaveBeenCalled();
});
it("falls back to role refs when a node snapshot cannot provide aria refs", async () => {
mockSingleBrowserProxyNode();
gatewayMocks.callGatewayTool
.mockRejectedValueOnce(
new Error("INVALID_REQUEST: Error: refs=aria requires Playwright _snapshotForAI support."),
)
.mockResolvedValueOnce({
ok: true,
payload: {
result: {
ok: true,
format: "ai",
targetId: "tab-1",
url: "https://meet.google.com/abc-defg-hij",
snapshot: 'button "Admit"',
refs: { e1: { role: "button", name: "Admit" } },
},
},
});
const tool = createBrowserTool();
const result = await tool.execute?.("call-1", {
action: "snapshot",
target: "node",
node: "Browser Node",
targetId: "tab-1",
refs: "aria",
depth: 4,
maxChars: 12_000,
});
expect(result?.details).toMatchObject({ refsFallback: "role" });
expect(gatewayMocks.callGatewayTool).toHaveBeenNthCalledWith(
1,
"node.invoke",
{ timeoutMs: 25000 },
expect.objectContaining({
params: expect.objectContaining({
path: "/snapshot",
query: expect.objectContaining({ refs: "aria" }),
}),
}),
);
expect(gatewayMocks.callGatewayTool).toHaveBeenNthCalledWith(
2,
"node.invoke",
{ timeoutMs: 25000 },
expect.objectContaining({
params: expect.objectContaining({
path: "/snapshot",
query: expect.objectContaining({ refs: "role" }),
}),
}),
);
});
it("gives node.invoke extra slack beyond the default proxy timeout", async () => {
mockSingleBrowserProxyNode();
gatewayMocks.callGatewayTool.mockResolvedValueOnce({

View File

@@ -11,7 +11,7 @@ type FakeSession = {
detach: ReturnType<typeof vi.fn>;
};
function createPage(opts: { targetId: string; snapshotFull?: string; hasSnapshotForAI?: boolean }) {
function createPage(opts: { targetId: string; snapshotFull?: string; hasAriaSnapshot?: boolean }) {
const session: FakeSession = {
send: vi.fn().mockResolvedValue({
targetInfo: { targetId: opts.targetId },
@@ -33,10 +33,10 @@ function createPage(opts: { targetId: string; snapshotFull?: string; hasSnapshot
locator,
on: vi.fn(),
url: vi.fn(() => `https://example.test/${opts.targetId}`),
...(opts.hasSnapshotForAI === false
...(opts.hasAriaSnapshot === false
? {}
: {
_snapshotForAI: vi.fn().mockResolvedValue({ full: opts.snapshotFull ?? "SNAP" }),
ariaSnapshot: vi.fn().mockResolvedValue(opts.snapshotFull ?? "SNAP"),
}),
};
@@ -151,17 +151,21 @@ describe("pw-ai", () => {
expect(p1.click).toHaveBeenCalledTimes(1);
});
it("fails with a clear error when _snapshotForAI is missing", async () => {
const p1 = createPage({ targetId: "T1", hasSnapshotForAI: false });
it("uses Playwright's public AI aria snapshot API", async () => {
const p1 = createPage({ targetId: "T1", snapshotFull: "ONE" });
const browser = createBrowser([p1.page]);
(chromiumMock.connectOverCDP as unknown as ReturnType<typeof vi.fn>).mockResolvedValue(browser);
await expect(
snapshotAiViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
}),
).rejects.toThrow(/_snapshotForAI/i);
await snapshotAiViaPlaywright({
cdpUrl: "http://127.0.0.1:18792",
targetId: "T1",
timeoutMs: 1234,
});
expect("ariaSnapshot" in p1.page ? p1.page.ariaSnapshot : undefined).toHaveBeenCalledWith({
mode: "ai",
timeout: 1234,
});
});
it("reuses the CDP connection for repeated calls", async () => {

View File

@@ -116,9 +116,9 @@ describe("pw-tools-core browser SSRF guards", () => {
});
it("re-checks current page URL before snapshotting AI content", async () => {
const snapshotForAI = vi.fn(async () => ({ full: 'button "Save"' }));
const ariaSnapshot = vi.fn(async () => 'button "Save"');
pageState.page = {
_snapshotForAI: snapshotForAI,
ariaSnapshot,
url: vi.fn(() => "https://example.com"),
};
@@ -137,7 +137,7 @@ describe("pw-tools-core browser SSRF guards", () => {
});
expect(
sessionMocks.assertPageNavigationCompletedSafely.mock.invocationCallOrder[0],
).toBeLessThan(snapshotForAI.mock.invocationCallOrder[0]);
).toBeLessThan(ariaSnapshot.mock.invocationCallOrder[0]);
});
it("re-checks current page URL before role snapshots", async () => {

View File

@@ -16,7 +16,6 @@ import {
getPageForTargetId,
gotoPageWithNavigationGuard,
storeRoleRefsForTarget,
type WithSnapshotForAI,
} from "./pw-session.js";
import { withPageScopedCdpClient } from "./pw-session.page-cdp.js";
@@ -80,16 +79,10 @@ export async function snapshotAiViaPlaywright(opts: {
});
}
const maybe = page as unknown as WithSnapshotForAI;
if (!maybe._snapshotForAI) {
throw new Error("Playwright _snapshotForAI is not available. Upgrade playwright-core.");
}
const result = await maybe._snapshotForAI({
let snapshot = await page.ariaSnapshot({
mode: "ai",
timeout: Math.max(500, Math.min(60_000, Math.floor(opts.timeoutMs ?? 5000))),
track: "response",
});
let snapshot = result?.full ?? "";
const maxChars = opts.maxChars;
const limit =
typeof maxChars === "number" && Number.isFinite(maxChars) && maxChars > 0
@@ -144,15 +137,11 @@ export async function snapshotRoleViaPlaywright(opts: {
if (normalizeOptionalString(opts.selector) || normalizeOptionalString(opts.frameSelector)) {
throw new Error("refs=aria does not support selector/frame snapshots yet.");
}
const maybe = page as unknown as WithSnapshotForAI;
if (!maybe._snapshotForAI) {
throw new Error("refs=aria requires Playwright _snapshotForAI support.");
}
const result = await maybe._snapshotForAI({
const snapshot = await page.ariaSnapshot({
mode: "ai",
timeout: 5000,
track: "response",
});
const built = buildRoleSnapshotFromAiSnapshot(result?.full ?? "", opts.options);
const built = buildRoleSnapshotFromAiSnapshot(snapshot, opts.options);
storeRoleRefsForTarget({
page,
cdpUrl: opts.cdpUrl,