diff --git a/CHANGELOG.md b/CHANGELOG.md index f1d4e0f88fe..9ee0658a5a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ Docs: https://docs.openclaw.ai - Context engine: keep safeguard compaction checks active after context-engine windowing and for `ownsCompaction` engines, so large transcripts can compact before prompt submission instead of waiting for provider overflow. Fixes #71325. Thanks @steipete. - Plugins/runtime deps: isolate the internal npm cache used for bundled plugin runtime-dependency repair and let package updates refresh/verify already-current installs, so failed update or sudo doctor runs can be repaired by rerunning `openclaw update`. Thanks @steipete. - Agents/delete: keep `--json` output machine-readable and retain workspaces that overlap another agent's workspace instead of moving shared state to Trash. Fixes #70889 and #70890. (#70897) Thanks @kaseonedge. +- Browser/screenshot: honor `timeoutMs` through host and node screenshot requests, bound raw CDP screenshot commands, and avoid beyond-viewport CDP capture for ordinary viewport screenshots, so Windows Chrome captures no longer hang past the requested deadline. Fixes #68330. Thanks @Woodylai24. - Plugins/runtime deps: stage bundled plugin runtime dependencies for packaged/global installs in an external runtime root and retain already staged deps across repairs, avoiding package-tree update races and npm pruning after upgrades. Thanks @steipete. - Plugins/runtime deps: log bundled plugin runtime-dependency staging before synchronous npm installs start and include elapsed timing afterward, so first boot after upgrades no longer looks hung while dependencies are being repaired. Thanks @steipete. - Agents/failover: forward embedded run abort signals into provider-owned model streams, cap implicit LLM idle watchdogs below long run timeouts, and mark 429 responses without usable retry timing as non-retryable so GitHub Copilot rate limits fail over or surface promptly instead of hanging until run timeout. Fixes #71120. Thanks @steipete. diff --git a/extensions/browser/src/browser-tool.test.ts b/extensions/browser/src/browser-tool.test.ts index ea351e42053..83c07f12bdf 100644 --- a/extensions/browser/src/browser-tool.test.ts +++ b/extensions/browser/src/browser-tool.test.ts @@ -568,6 +568,86 @@ describe("browser tool snapshot maxChars", () => { expect(browserClientMocks.browserDoctor).not.toHaveBeenCalled(); }); + it("passes screenshot timeoutMs to the host browser client", async () => { + const tool = createBrowserTool(); + await tool.execute?.("call-1", { + action: "screenshot", + target: "host", + targetId: "tab-1", + timeoutMs: 12_345, + }); + + expect(browserActionsMocks.browserScreenshotAction).toHaveBeenCalledWith( + undefined, + expect.objectContaining({ + targetId: "tab-1", + timeoutMs: 12_345, + }), + ); + }); + + it("passes screenshot timeoutMs through the node browser proxy", async () => { + mockSingleBrowserProxyNode(); + gatewayMocks.callGatewayTool.mockResolvedValueOnce({ + ok: true, + payload: { + result: { ok: true, path: "/tmp/test.png" }, + }, + }); + const tool = createBrowserTool(); + await tool.execute?.("call-1", { + action: "screenshot", + target: "node", + targetId: "tab-1", + timeoutMs: 12_345, + }); + + expect(gatewayMocks.callGatewayTool).toHaveBeenCalledWith( + "node.invoke", + { timeoutMs: 17_345 }, + expect.objectContaining({ + params: expect.objectContaining({ + method: "POST", + path: "/screenshot", + timeoutMs: 12_345, + body: expect.objectContaining({ + targetId: "tab-1", + timeoutMs: 12_345, + }), + }), + }), + ); + }); + + it("uses the screenshot default timeout for node browser proxy requests", async () => { + mockSingleBrowserProxyNode(); + gatewayMocks.callGatewayTool.mockResolvedValueOnce({ + ok: true, + payload: { + result: { ok: true, path: "/tmp/test.png" }, + }, + }); + const tool = createBrowserTool(); + await tool.execute?.("call-1", { + action: "screenshot", + target: "node", + targetId: "tab-1", + }); + + expect(gatewayMocks.callGatewayTool).toHaveBeenCalledWith( + "node.invoke", + { timeoutMs: 25_000 }, + expect.objectContaining({ + params: expect.objectContaining({ + timeoutMs: 20_000, + body: expect.objectContaining({ + timeoutMs: 20_000, + }), + }), + }), + ); + }); + it("falls back to role refs when a node snapshot cannot provide aria refs", async () => { mockSingleBrowserProxyNode(); gatewayMocks.callGatewayTool diff --git a/extensions/browser/src/browser-tool.ts b/extensions/browser/src/browser-tool.ts index e971b6615f6..8a94bd2d591 100644 --- a/extensions/browser/src/browser-tool.ts +++ b/extensions/browser/src/browser-tool.ts @@ -43,6 +43,7 @@ import { trackSessionBrowserTab, untrackSessionBrowserTab, } from "./browser-tool.runtime.js"; +import { DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS } from "./browser/constants.js"; const browserToolDeps = { browserAct, @@ -621,11 +622,17 @@ export function createBrowserTool(opts?: { const element = readStringParam(params, "element"); const labels = typeof params.labels === "boolean" ? params.labels : undefined; const type = params.type === "jpeg" ? "jpeg" : "png"; + const timeoutMs = + typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs) + ? Math.max(1, Math.floor(params.timeoutMs)) + : undefined; + const effectiveTimeoutMs = timeoutMs ?? DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS; const result = proxyRequest ? ((await proxyRequest({ method: "POST", path: "/screenshot", profile, + timeoutMs: effectiveTimeoutMs, body: { targetId, fullPage, @@ -633,6 +640,7 @@ export function createBrowserTool(opts?: { element, type, labels, + timeoutMs: effectiveTimeoutMs, }, })) as Awaited>) : await browserToolDeps.browserScreenshotAction(baseUrl, { @@ -642,6 +650,7 @@ export function createBrowserTool(opts?: { element, type, labels, + timeoutMs: effectiveTimeoutMs, profile, }); return await browserToolDeps.imageResultFromFile({ diff --git a/extensions/browser/src/browser/cdp.helpers.internal.test.ts b/extensions/browser/src/browser/cdp.helpers.internal.test.ts index b41cb0c5d40..f1e716152aa 100644 --- a/extensions/browser/src/browser/cdp.helpers.internal.test.ts +++ b/extensions/browser/src/browser/cdp.helpers.internal.test.ts @@ -246,6 +246,31 @@ describe("cdp.helpers internal", () => { }), ).rejects.toThrow(/CDP socket closed/); }); + + it("rejects and closes the socket when a CDP command exceeds its timeout", async () => { + const server = await startWsServer(); + wss = server.wss; + let closed = false; + server.wss.on("connection", (socket) => { + socket.on("message", () => { + // Intentionally leave the command unanswered. + }); + socket.on("close", () => { + closed = true; + }); + }); + + await expect( + withCdpSocket( + server.url, + async (send) => { + await send("Page.captureScreenshot"); + }, + { commandTimeoutMs: 5 }, + ), + ).rejects.toThrow(/CDP command Page\.captureScreenshot timed out after 5ms/); + await vi.waitFor(() => expect(closed).toBe(true)); + }); }); describe("withCdpSocket", () => { diff --git a/extensions/browser/src/browser/cdp.helpers.ts b/extensions/browser/src/browser/cdp.helpers.ts index 82c8767f6a2..e22a18f6e16 100644 --- a/extensions/browser/src/browser/cdp.helpers.ts +++ b/extensions/browser/src/browser/cdp.helpers.ts @@ -142,6 +142,7 @@ type CdpResponse = { type Pending = { resolve: (value: unknown) => void; reject: (err: Error) => void; + timer?: ReturnType; }; export type CdpSendFn = ( @@ -221,9 +222,19 @@ type CdpFetchResult = { release: () => Promise; }; -function createCdpSender(ws: WebSocket) { +function createCdpSender(ws: WebSocket, opts?: { commandTimeoutMs?: number }) { let nextId = 1; const pending = new Map(); + const commandTimeoutMs = + typeof opts?.commandTimeoutMs === "number" && Number.isFinite(opts.commandTimeoutMs) + ? Math.max(1, Math.floor(opts.commandTimeoutMs)) + : undefined; + + const clearPendingTimer = (p: Pending) => { + if (p.timer !== undefined) { + clearTimeout(p.timer); + } + }; const send: CdpSendFn = ( method: string, @@ -232,14 +243,31 @@ function createCdpSender(ws: WebSocket) { ) => { const id = nextId++; const msg = { id, method, params, sessionId }; - ws.send(JSON.stringify(msg)); return new Promise((resolve, reject) => { - pending.set(id, { resolve, reject }); + if (ws.readyState !== WebSocket.OPEN) { + reject(new Error("CDP socket closed")); + return; + } + const entry: Pending = { resolve, reject }; + if (commandTimeoutMs !== undefined) { + entry.timer = setTimeout(() => { + closeWithError(new Error(`CDP command ${method} timed out after ${commandTimeoutMs}ms`)); + }, commandTimeoutMs); + } + pending.set(id, entry); + try { + ws.send(JSON.stringify(msg)); + } catch (err) { + pending.delete(id); + clearPendingTimer(entry); + reject(err instanceof Error ? err : new Error(String(err))); + } }); }; const closeWithError = (err: Error) => { for (const [, p] of pending) { + clearPendingTimer(p); p.reject(err); } pending.clear(); @@ -270,6 +298,7 @@ function createCdpSender(ws: WebSocket) { return; } pending.delete(parsed.id); + clearPendingTimer(p); if (parsed.error?.message) { p.reject(new Error(parsed.error.message)); return; @@ -383,10 +412,14 @@ export function openCdpWebSocket( export async function withCdpSocket( wsUrl: string, fn: (send: CdpSendFn) => Promise, - opts?: { headers?: Record; handshakeTimeoutMs?: number }, + opts?: { + headers?: Record; + handshakeTimeoutMs?: number; + commandTimeoutMs?: number; + }, ): Promise { const ws = openCdpWebSocket(wsUrl, opts); - const { send, closeWithError } = createCdpSender(ws); + const { send, closeWithError } = createCdpSender(ws, opts); const openPromise = new Promise((resolve, reject) => { ws.once("open", () => resolve()); diff --git a/extensions/browser/src/browser/cdp.internal.test.ts b/extensions/browser/src/browser/cdp.internal.test.ts index a562148abb5..159d3f9ea00 100644 --- a/extensions/browser/src/browser/cdp.internal.test.ts +++ b/extensions/browser/src/browser/cdp.internal.test.ts @@ -122,7 +122,8 @@ describe("cdp internal", () => { return; } if (msg.method === "Page.captureScreenshot") { - expect(msg.params).toMatchObject({ format: "png", captureBeyondViewport: true }); + expect(msg.params).toMatchObject({ format: "png" }); + expect(msg.params).not.toHaveProperty("captureBeyondViewport"); socket.send( JSON.stringify({ id: msg.id, diff --git a/extensions/browser/src/browser/cdp.screenshot-params.test.ts b/extensions/browser/src/browser/cdp.screenshot-params.test.ts index 36411a2b01f..49ad122fd03 100644 --- a/extensions/browser/src/browser/cdp.screenshot-params.test.ts +++ b/extensions/browser/src/browser/cdp.screenshot-params.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, it, vi } from "vitest"; +import { withCdpSocket } from "./cdp.helpers.js"; import { captureScreenshot } from "./cdp.js"; import type { ResolvedBrowserProfile } from "./config.js"; import { shouldUsePlaywrightForScreenshot } from "./profile-capabilities.js"; @@ -18,44 +19,50 @@ const mockState = vi.hoisted(() => ({ })); vi.mock("./cdp.helpers.js", () => ({ - withCdpSocket: vi.fn(async (_wsUrl: string, fn: (send: unknown) => Promise) => { - const send = (method: string, params?: Record) => { - sentMessages.push({ method, params }); - if (method === "Page.captureScreenshot") { - return Promise.resolve({ data: "AAAA" }); - } - if (method === "Page.getLayoutMetrics") { - return Promise.resolve({ - cssContentSize: { width: 1200, height: 3000 }, - contentSize: { width: 1200, height: 3000 }, - }); - } - if (method === "Emulation.clearDeviceMetricsOverride") { - mockState.emulationCleared = true; - return Promise.resolve({}); - } - if (method === "Emulation.setDeviceMetricsOverride") { - mockState.emulationCleared = false; - return Promise.resolve({}); - } - if (method === "Runtime.evaluate") { - if (mockState.emulationCleared && mockState.emulatedTab) { + withCdpSocket: vi.fn( + async ( + _wsUrl: string, + fn: (send: unknown) => Promise, + _opts?: { commandTimeoutMs?: number }, + ) => { + const send = (method: string, params?: Record) => { + sentMessages.push({ method, params }); + if (method === "Page.captureScreenshot") { + return Promise.resolve({ data: "AAAA" }); + } + if (method === "Page.getLayoutMetrics") { + return Promise.resolve({ + cssContentSize: { width: 1200, height: 3000 }, + contentSize: { width: 1200, height: 3000 }, + }); + } + if (method === "Emulation.clearDeviceMetricsOverride") { + mockState.emulationCleared = true; + return Promise.resolve({}); + } + if (method === "Emulation.setDeviceMetricsOverride") { + mockState.emulationCleared = false; + return Promise.resolve({}); + } + if (method === "Runtime.evaluate") { + if (mockState.emulationCleared && mockState.emulatedTab) { + return Promise.resolve({ + result: { + value: mockState.naturalViewport, + }, + }); + } return Promise.resolve({ result: { - value: mockState.naturalViewport, + value: mockState.viewport, }, }); } - return Promise.resolve({ - result: { - value: mockState.viewport, - }, - }); - } - return Promise.resolve({}); - }; - return fn(send); - }), + return Promise.resolve({}); + }; + return fn(send); + }, + ), appendCdpPath: vi.fn(), fetchJson: vi.fn(), isLoopbackHost: vi.fn(), @@ -88,16 +95,16 @@ beforeEach(() => { }); describe("CDP screenshot params", () => { - it("viewport screenshot omits fromSurface without clip or emulation override", async () => { + it("viewport screenshot omits fromSurface and captureBeyondViewport", async () => { await captureScreenshot({ wsUrl: "ws://localhost:9222/devtools/page/X", format: "png" }); const call = sentMessages.find((m) => m.method === "Page.captureScreenshot"); expect(call).toBeDefined(); expect(call!.params).toMatchObject({ format: "png", - captureBeyondViewport: true, }); expect(call!.params).not.toHaveProperty("fromSurface"); + expect(call!.params).not.toHaveProperty("captureBeyondViewport"); expect(call!.params).not.toHaveProperty("clip"); const emulationCalls = sentMessages.filter( @@ -106,6 +113,20 @@ describe("CDP screenshot params", () => { expect(emulationCalls).toHaveLength(0); }); + it("uses the requested timeout as the raw CDP command timeout", async () => { + await captureScreenshot({ + wsUrl: "ws://localhost:9222/devtools/page/X", + format: "png", + timeoutMs: 12_345, + }); + + expect(withCdpSocket).toHaveBeenCalledWith( + "ws://localhost:9222/devtools/page/X", + expect.any(Function), + { commandTimeoutMs: 12_345 }, + ); + }); + it("fullPage on emulated tab: clears, detects drift, re-applies saved emulation", async () => { mockState.emulatedTab = true; @@ -133,6 +154,8 @@ describe("CDP screenshot params", () => { // Clear is called first in the finally block const clearCall = sentMessages.find((m) => m.method === "Emulation.clearDeviceMetricsOverride"); expect(clearCall).toBeDefined(); + const captureCall = sentMessages.find((m) => m.method === "Page.captureScreenshot"); + expect(captureCall?.params).toMatchObject({ captureBeyondViewport: true }); // Viewport drifted after clear → re-apply saved dimensions expect(secondSetCall.params).toMatchObject({ diff --git a/extensions/browser/src/browser/cdp.ts b/extensions/browser/src/browser/cdp.ts index 230e4a6c313..aef31361bfe 100644 --- a/extensions/browser/src/browser/cdp.ts +++ b/extensions/browser/src/browser/cdp.ts @@ -58,11 +58,13 @@ export function normalizeCdpWsUrl(wsUrl: string, cdpUrl: string): string { export async function captureScreenshotPng(opts: { wsUrl: string; fullPage?: boolean; + timeoutMs?: number; }): Promise { return await captureScreenshot({ wsUrl: opts.wsUrl, fullPage: opts.fullPage, format: "png", + timeoutMs: opts.timeoutMs, }); } @@ -71,107 +73,111 @@ export async function captureScreenshot(opts: { fullPage?: boolean; format?: "png" | "jpeg"; quality?: number; // jpeg only (0..100) + timeoutMs?: number; }): Promise { - return await withCdpSocket(opts.wsUrl, async (send) => { - await send("Page.enable"); + return await withCdpSocket( + opts.wsUrl, + async (send) => { + await send("Page.enable"); - // For full-page captures, temporarily expand the viewport to the content - // size so the entire page is within the viewport bounds. We save the - // current viewport state and restore it after capture so pre-existing - // device emulation (mobile width, DPR, touch) is not lost. - let savedVp: { w: number; h: number; dpr: number; sw: number; sh: number } | undefined; - if (opts.fullPage) { - const metrics = (await send("Page.getLayoutMetrics")) as { - cssContentSize?: { width?: number; height?: number }; - contentSize?: { width?: number; height?: number }; - }; - const size = metrics?.cssContentSize ?? metrics?.contentSize; - const contentWidth = size?.width ?? 0; - const contentHeight = size?.height ?? 0; - if (contentWidth > 0 && contentHeight > 0) { - const vpResult = (await send("Runtime.evaluate", { - expression: - "({ w: window.innerWidth, h: window.innerHeight, dpr: window.devicePixelRatio, sw: screen.width, sh: screen.height })", - returnByValue: true, - })) as { - result?: { - value?: { w?: number; h?: number; dpr?: number; sw?: number; sh?: number }; - }; + // For full-page captures, temporarily expand the viewport to the content + // size so the entire page is within the viewport bounds. We save the + // current viewport state and restore it after capture so pre-existing + // device emulation (mobile width, DPR, touch) is not lost. + let savedVp: { w: number; h: number; dpr: number; sw: number; sh: number } | undefined; + if (opts.fullPage) { + const metrics = (await send("Page.getLayoutMetrics")) as { + cssContentSize?: { width?: number; height?: number }; + contentSize?: { width?: number; height?: number }; }; - const v = vpResult?.result?.value; - const currentW = v?.w ?? 0; - const currentH = v?.h ?? 0; - savedVp = { - w: currentW, - h: currentH, - dpr: v?.dpr ?? 1, - sw: v?.sw ?? currentW, - sh: v?.sh ?? currentH, - }; - // mobile: false is the safe default — CDP provides no way to query - // the active mobile flag, and inferring from navigator.maxTouchPoints - // would false-positive on touch-enabled desktops. - await send("Emulation.setDeviceMetricsOverride", { - width: Math.ceil(Math.max(currentW, contentWidth)), - height: Math.ceil(Math.max(currentH, contentHeight)), - deviceScaleFactor: savedVp.dpr, - mobile: false, - screenWidth: savedVp.sw, - screenHeight: savedVp.sh, - }); - } - } - - const format = opts.format ?? "png"; - const quality = - format === "jpeg" ? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85))) : undefined; - - try { - // Chromium bug 40760789 (cross-origin textures missing with - // fromSurface: true + captureBeyondViewport: true) was fixed around - // Chrome 130. Chrome 146+ managed/headful browsers now reject - // fromSurface: false, so we omit it and keep captureBeyondViewport: true. - const result = (await send("Page.captureScreenshot", { - format, - ...(quality !== undefined ? { quality } : {}), - captureBeyondViewport: true, - })) as { data?: string }; - - const base64 = result?.data; - if (!base64) { - throw new Error("Screenshot failed: missing data"); - } - return Buffer.from(base64, "base64"); - } finally { - if (savedVp) { - // Clear the temporary viewport expansion first. If the tab had - // prior device emulation the clear will change the viewport back to - // the browser's natural dimensions — detect that and re-apply the - // saved emulation so the tab's original state is preserved. - await send("Emulation.clearDeviceMetricsOverride").catch(() => {}); - try { - const postResult = (await send("Runtime.evaluate", { + const size = metrics?.cssContentSize ?? metrics?.contentSize; + const contentWidth = size?.width ?? 0; + const contentHeight = size?.height ?? 0; + if (contentWidth > 0 && contentHeight > 0) { + const vpResult = (await send("Runtime.evaluate", { expression: - "({ w: window.innerWidth, h: window.innerHeight, dpr: window.devicePixelRatio })", + "({ w: window.innerWidth, h: window.innerHeight, dpr: window.devicePixelRatio, sw: screen.width, sh: screen.height })", returnByValue: true, - })) as { result?: { value?: { w?: number; h?: number; dpr?: number } } }; - const p = postResult?.result?.value; - if (p?.w !== savedVp.w || p?.h !== savedVp.h || p?.dpr !== savedVp.dpr) { - await send("Emulation.setDeviceMetricsOverride", { - width: savedVp.w, - height: savedVp.h, - deviceScaleFactor: savedVp.dpr, - mobile: false, - screenWidth: savedVp.sw, - screenHeight: savedVp.sh, - }); - } - } catch { - // Best-effort restoration; ignore failures in the cleanup path. + })) as { + result?: { + value?: { w?: number; h?: number; dpr?: number; sw?: number; sh?: number }; + }; + }; + const v = vpResult?.result?.value; + const currentW = v?.w ?? 0; + const currentH = v?.h ?? 0; + savedVp = { + w: currentW, + h: currentH, + dpr: v?.dpr ?? 1, + sw: v?.sw ?? currentW, + sh: v?.sh ?? currentH, + }; + // mobile: false is the safe default — CDP provides no way to query + // the active mobile flag, and inferring from navigator.maxTouchPoints + // would false-positive on touch-enabled desktops. + await send("Emulation.setDeviceMetricsOverride", { + width: Math.ceil(Math.max(currentW, contentWidth)), + height: Math.ceil(Math.max(currentH, contentHeight)), + deviceScaleFactor: savedVp.dpr, + mobile: false, + screenWidth: savedVp.sw, + screenHeight: savedVp.sh, + }); } } - } - }); + + const format = opts.format ?? "png"; + const quality = + format === "jpeg" ? Math.max(0, Math.min(100, Math.round(opts.quality ?? 85))) : undefined; + + try { + // Chrome 146+ managed/headful browsers reject fromSurface: false. + // For ordinary viewport captures, keep CDP's captureBeyondViewport + // default (false), matching Playwright's Chromium path. + const result = (await send("Page.captureScreenshot", { + format, + ...(quality !== undefined ? { quality } : {}), + ...(opts.fullPage ? { captureBeyondViewport: true } : {}), + })) as { data?: string }; + + const base64 = result?.data; + if (!base64) { + throw new Error("Screenshot failed: missing data"); + } + return Buffer.from(base64, "base64"); + } finally { + if (savedVp) { + // Clear the temporary viewport expansion first. If the tab had + // prior device emulation the clear will change the viewport back to + // the browser's natural dimensions — detect that and re-apply the + // saved emulation so the tab's original state is preserved. + await send("Emulation.clearDeviceMetricsOverride").catch(() => {}); + try { + const postResult = (await send("Runtime.evaluate", { + expression: + "({ w: window.innerWidth, h: window.innerHeight, dpr: window.devicePixelRatio })", + returnByValue: true, + })) as { result?: { value?: { w?: number; h?: number; dpr?: number } } }; + const p = postResult?.result?.value; + if (p?.w !== savedVp.w || p?.h !== savedVp.h || p?.dpr !== savedVp.dpr) { + await send("Emulation.setDeviceMetricsOverride", { + width: savedVp.w, + height: savedVp.h, + deviceScaleFactor: savedVp.dpr, + mobile: false, + screenWidth: savedVp.sw, + screenHeight: savedVp.sh, + }); + } + } catch { + // Best-effort restoration; ignore failures in the cleanup path. + } + } + } + }, + { commandTimeoutMs: opts.timeoutMs }, + ); } export async function createTargetViaCdp(opts: { diff --git a/extensions/browser/src/browser/chrome-mcp.ts b/extensions/browser/src/browser/chrome-mcp.ts index edaef0fcd98..18e28a40b99 100644 --- a/extensions/browser/src/browser/chrome-mcp.ts +++ b/extensions/browser/src/browser/chrome-mcp.ts @@ -547,15 +547,22 @@ export async function takeChromeMcpScreenshot(params: { uid?: string; fullPage?: boolean; format?: "png" | "jpeg"; + timeoutMs?: number; }): Promise { return await withTempFile(async (filePath) => { - await callTool(params.profileName, params.userDataDir, "take_screenshot", { - pageId: parsePageId(params.targetId), - filePath, - format: params.format ?? "png", - ...(params.uid ? { uid: params.uid } : {}), - ...(params.fullPage ? { fullPage: true } : {}), - }); + await callTool( + params.profileName, + params.userDataDir, + "take_screenshot", + { + pageId: parsePageId(params.targetId), + filePath, + format: params.format ?? "png", + ...(params.uid ? { uid: params.uid } : {}), + ...(params.fullPage ? { fullPage: true } : {}), + }, + { timeoutMs: params.timeoutMs }, + ); return await fs.readFile(filePath); }); } diff --git a/extensions/browser/src/browser/client-actions-core.ts b/extensions/browser/src/browser/client-actions-core.ts index c899182a356..8a116e56c47 100644 --- a/extensions/browser/src/browser/client-actions-core.ts +++ b/extensions/browser/src/browser/client-actions-core.ts @@ -6,6 +6,7 @@ import type { import { buildProfileQuery, withBaseUrl } from "./client-actions-url.js"; import type { BrowserActRequest, BrowserFormField } from "./client-actions.types.js"; import { fetchBrowserJson } from "./client-fetch.js"; +import { DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS } from "./constants.js"; export type { BrowserActRequest, BrowserFormField } from "./client-actions.types.js"; @@ -176,10 +177,16 @@ export async function browserScreenshotAction( element?: string; type?: "png" | "jpeg"; labels?: boolean; + timeoutMs?: number; profile?: string; }, ): Promise { const q = buildProfileQuery(opts.profile); + const timeoutMs = + typeof opts.timeoutMs === "number" && Number.isFinite(opts.timeoutMs) + ? Math.max(1, Math.floor(opts.timeoutMs)) + : undefined; + const effectiveTimeoutMs = timeoutMs ?? DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS; return await fetchBrowserJson(withBaseUrl(baseUrl, `/screenshot${q}`), { method: "POST", headers: { "Content-Type": "application/json" }, @@ -190,7 +197,8 @@ export async function browserScreenshotAction( element: opts.element, type: opts.type, labels: opts.labels, + timeoutMs: effectiveTimeoutMs, }), - timeoutMs: 20000, + timeoutMs: effectiveTimeoutMs, }); } diff --git a/extensions/browser/src/browser/client.test.ts b/extensions/browser/src/browser/client.test.ts index 329cb33b5b1..649eb4fa078 100644 --- a/extensions/browser/src/browser/client.test.ts +++ b/extensions/browser/src/browser/client.test.ts @@ -123,11 +123,11 @@ describe("browser client", () => { }); it("uses the expected endpoints + methods for common calls", async () => { - const calls: Array<{ url: string; init?: RequestInit }> = []; + const calls: Array<{ url: string; init?: RequestInit & { timeoutMs?: number } }> = []; vi.stubGlobal( "fetch", - vi.fn(async (url: string, init?: RequestInit) => { + vi.fn(async (url: string, init?: RequestInit & { timeoutMs?: number }) => { calls.push({ url, init }); if (url.endsWith("/tabs") && (!init || init.method === undefined)) { return { @@ -302,7 +302,10 @@ describe("browser client", () => { path: "/tmp/a.pdf", }); await expect( - browserScreenshotAction("http://127.0.0.1:18791", { fullPage: true }), + browserScreenshotAction("http://127.0.0.1:18791", { fullPage: true, timeoutMs: 12_345 }), + ).resolves.toMatchObject({ ok: true, path: "/tmp/a.png" }); + await expect( + browserScreenshotAction("http://127.0.0.1:18791", { targetId: "t-default" }), ).resolves.toMatchObject({ ok: true, path: "/tmp/a.png" }); expect(calls.some((c) => c.url.endsWith("/tabs"))).toBe(true); @@ -310,7 +313,25 @@ describe("browser client", () => { const open = calls.find((c) => c.url.endsWith("/tabs/open")); expect(open?.init?.method).toBe("POST"); - const screenshot = calls.find((c) => c.url.endsWith("/screenshot")); + const screenshotCalls = calls.filter((c) => c.url.endsWith("/screenshot")); + const screenshot = screenshotCalls[0]; expect(screenshot?.init?.method).toBe("POST"); + expect(screenshot?.init?.timeoutMs).toBe(12_345); + expect( + JSON.parse(typeof screenshot?.init?.body === "string" ? screenshot.init.body : "{}"), + ).toMatchObject({ + fullPage: true, + timeoutMs: 12_345, + }); + const defaultScreenshot = screenshotCalls[1]; + expect(defaultScreenshot?.init?.timeoutMs).toBe(20_000); + expect( + JSON.parse( + typeof defaultScreenshot?.init?.body === "string" ? defaultScreenshot.init.body : "{}", + ), + ).toMatchObject({ + targetId: "t-default", + timeoutMs: 20_000, + }); }); }); diff --git a/extensions/browser/src/browser/constants.ts b/extensions/browser/src/browser/constants.ts index 942a05b8206..f1c58a130f7 100644 --- a/extensions/browser/src/browser/constants.ts +++ b/extensions/browser/src/browser/constants.ts @@ -3,6 +3,7 @@ export const DEFAULT_BROWSER_EVALUATE_ENABLED = true; export const DEFAULT_OPENCLAW_BROWSER_COLOR = "#FF4500"; export const DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME = "openclaw"; export const DEFAULT_BROWSER_DEFAULT_PROFILE_NAME = "openclaw"; +export const DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS = 20_000; export const DEFAULT_AI_SNAPSHOT_MAX_CHARS = 40_000; export const DEFAULT_AI_SNAPSHOT_EFFICIENT_MAX_CHARS = 8_000; export const DEFAULT_AI_SNAPSHOT_EFFICIENT_DEPTH = 6; diff --git a/extensions/browser/src/browser/pw-tools-core.interactions.ts b/extensions/browser/src/browser/pw-tools-core.interactions.ts index 0dc295a5ea6..ba2af27ad0e 100644 --- a/extensions/browser/src/browser/pw-tools-core.interactions.ts +++ b/extensions/browser/src/browser/pw-tools-core.interactions.ts @@ -1005,6 +1005,7 @@ export async function takeScreenshotViaPlaywright(opts: { element?: string; fullPage?: boolean; type?: "png" | "jpeg"; + timeoutMs?: number; }): Promise<{ buffer: Buffer }> { const page = await getPageForTargetId(opts); ensurePageState(page); @@ -1015,7 +1016,7 @@ export async function takeScreenshotViaPlaywright(opts: { throw new Error("fullPage is not supported for element screenshots"); } const locator = refLocator(page, opts.ref); - const buffer = await locator.screenshot({ type }); + const buffer = await locator.screenshot({ type, timeout: opts.timeoutMs }); return { buffer }; } if (opts.element) { @@ -1023,12 +1024,13 @@ export async function takeScreenshotViaPlaywright(opts: { throw new Error("fullPage is not supported for element screenshots"); } const locator = page.locator(opts.element).first(); - const buffer = await locator.screenshot({ type }); + const buffer = await locator.screenshot({ type, timeout: opts.timeoutMs }); return { buffer }; } const buffer = await page.screenshot({ type, fullPage: Boolean(opts.fullPage), + timeout: opts.timeoutMs, }); return { buffer }; } @@ -1039,6 +1041,7 @@ export async function screenshotWithLabelsViaPlaywright(opts: { refs: Record; maxLabels?: number; type?: "png" | "jpeg"; + timeoutMs?: number; }): Promise<{ buffer: Buffer; labels: number; skipped: number }> { const page = await getPageForTargetId(opts); ensurePageState(page); @@ -1148,7 +1151,7 @@ export async function screenshotWithLabelsViaPlaywright(opts: { }, boxes); } - const buffer = await page.screenshot({ type }); + const buffer = await page.screenshot({ type, timeout: opts.timeoutMs }); return { buffer, labels: boxes.length, skipped }; } finally { await page diff --git a/extensions/browser/src/browser/pw-tools-core.screenshots-element-selector.test.ts b/extensions/browser/src/browser/pw-tools-core.screenshots-element-selector.test.ts index 5da747108a4..1d9ec61963d 100644 --- a/extensions/browser/src/browser/pw-tools-core.screenshots-element-selector.test.ts +++ b/extensions/browser/src/browser/pw-tools-core.screenshots-element-selector.test.ts @@ -45,12 +45,13 @@ describe("pw-tools-core", () => { targetId: "T1", element: "#main", type: "png", + timeoutMs: 1234, }); expect(res.buffer.toString()).toBe("E"); expect(sessionMocks.getPageForTargetId).toHaveBeenCalled(); expect(page.locator as ReturnType).toHaveBeenCalledWith("#main"); - expect(elementScreenshot).toHaveBeenCalledWith({ type: "png" }); + expect(elementScreenshot).toHaveBeenCalledWith({ type: "png", timeout: 1234 }); }); it("screenshots a ref locator", async () => { const refScreenshot = vi.fn(async () => Buffer.from("R")); @@ -66,11 +67,12 @@ describe("pw-tools-core", () => { targetId: "T1", ref: "76", type: "jpeg", + timeoutMs: 2345, }); expect(res.buffer.toString()).toBe("R"); expect(sessionMocks.refLocator).toHaveBeenCalledWith(page, "76"); - expect(refScreenshot).toHaveBeenCalledWith({ type: "jpeg" }); + expect(refScreenshot).toHaveBeenCalledWith({ type: "jpeg", timeout: 2345 }); }); it("rejects fullPage for element or ref screenshots", async () => { setPwToolsCoreCurrentRefLocator({ screenshot: vi.fn(async () => Buffer.from("R")) }); diff --git a/extensions/browser/src/browser/routes/agent.existing-session.test.ts b/extensions/browser/src/browser/routes/agent.existing-session.test.ts index 3aec2196244..118ab6857fd 100644 --- a/extensions/browser/src/browser/routes/agent.existing-session.test.ts +++ b/extensions/browser/src/browser/routes/agent.existing-session.test.ts @@ -150,7 +150,7 @@ describe("existing-session browser routes", () => { { params: {}, query: {}, - body: { ref: "btn-1", type: "jpeg" }, + body: { ref: "btn-1", type: "jpeg", timeoutMs: 4321 }, }, response.res, ); @@ -167,6 +167,7 @@ describe("existing-session browser routes", () => { uid: "btn-1", fullPage: false, format: "jpeg", + timeoutMs: 4321, }); expect(navigationGuardMocks.assertBrowserNavigationResultAllowed).not.toHaveBeenCalled(); }); diff --git a/extensions/browser/src/browser/routes/agent.snapshot.ts b/extensions/browser/src/browser/routes/agent.snapshot.ts index 5fa2ae00437..3db2030b4f4 100644 --- a/extensions/browser/src/browser/routes/agent.snapshot.ts +++ b/extensions/browser/src/browser/routes/agent.snapshot.ts @@ -11,6 +11,7 @@ import { buildAiSnapshotFromChromeMcpSnapshot, flattenChromeMcpSnapshotToAriaNodes, } from "../chrome-mcp.snapshot.js"; +import { DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS } from "../constants.js"; import { assertBrowserNavigationAllowed, assertBrowserNavigationResultAllowed, @@ -40,7 +41,7 @@ import { } from "./agent.snapshot.plan.js"; import { EXISTING_SESSION_LIMITS } from "./existing-session-limits.js"; import type { BrowserResponse, BrowserRouteRegistrar } from "./types.js"; -import { asyncBrowserRoute, jsonError, toBoolean, toStringOrEmpty } from "./utils.js"; +import { asyncBrowserRoute, jsonError, toBoolean, toNumber, toStringOrEmpty } from "./utils.js"; const CHROME_MCP_OVERLAY_ATTR = "data-openclaw-mcp-overlay"; @@ -328,6 +329,11 @@ export function registerBrowserAgentSnapshotRoutes( const element = toStringOrEmpty(body.element) || undefined; const labels = toBoolean(body.labels) ?? false; const type = body.type === "jpeg" ? "jpeg" : "png"; + const timeoutMsRaw = toNumber(body.timeoutMs); + const timeoutMs = + timeoutMsRaw !== undefined + ? Math.max(1, Math.floor(timeoutMsRaw)) + : DEFAULT_BROWSER_SCREENSHOT_TIMEOUT_MS; if (fullPage && (ref || element)) { return jsonError(res, 400, "fullPage is not supported for element screenshots"); @@ -370,6 +376,7 @@ export function registerBrowserAgentSnapshotRoutes( targetId: tab.targetId, fullPage, format: type, + timeoutMs, }); await saveNormalizedScreenshotResponse({ res, @@ -397,6 +404,7 @@ export function registerBrowserAgentSnapshotRoutes( uid: ref, fullPage, format: type, + timeoutMs, }); await saveNormalizedScreenshotResponse({ res, @@ -433,6 +441,7 @@ export function registerBrowserAgentSnapshotRoutes( targetId: tab.targetId, refs: snap.refs, type, + timeoutMs, }); await saveNormalizedScreenshotResponse({ res, @@ -453,6 +462,7 @@ export function registerBrowserAgentSnapshotRoutes( element, fullPage, type, + timeoutMs, }); buffer = snap.buffer; } else { @@ -461,6 +471,7 @@ export function registerBrowserAgentSnapshotRoutes( fullPage, format: type, quality: type === "jpeg" ? 85 : undefined, + timeoutMs, }); } diff --git a/extensions/browser/src/browser/server.agent-contract-form-layout-act-commands.test.ts b/extensions/browser/src/browser/server.agent-contract-form-layout-act-commands.test.ts index fd8dc2c9626..531f132107a 100644 --- a/extensions/browser/src/browser/server.agent-contract-form-layout-act-commands.test.ts +++ b/extensions/browser/src/browser/server.agent-contract-form-layout-act-commands.test.ts @@ -374,9 +374,17 @@ describe("browser control server", () => { const shot = await postJson<{ ok: boolean; path?: string }>(`${base}/screenshot`, { element: "body", type: "jpeg", + timeoutMs: 3333, }); expect(shot.ok).toBe(true); expect(typeof shot.path).toBe("string"); + expect(pwMocks.takeScreenshotViaPlaywright).toHaveBeenCalledWith( + expect.objectContaining({ + element: "body", + type: "jpeg", + timeoutMs: 3333, + }), + ); }); it("blocks file chooser traversal / absolute paths outside uploads dir", async () => {