diff --git a/CHANGELOG.md b/CHANGELOG.md index 072241c17e5..a4a55a74156 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Google Meet: grant Meet media permissions through the Playwright browser context when CDP grants do not affect the attached Chrome page, and report in-call microphone/speaker permission problems instead of marking realtime speech ready. - Control UI/WebChat: collapse duplicate in-flight internal text sends onto the active Gateway run so rapid repeat submits do not start fresh `agent:main:main` dispatches. Fixes #75737. Thanks @dsdsddd1 and @BunsDev. - Channels/streaming: expose `streaming.progress.label`, `labels`, `maxLines`, and `toolProgress` in bundled channel config metadata so progress draft settings appear in config, docs, and control surfaces. Thanks @vincentkoc. - Channels/streaming: normalize whitespace and case for `streaming.progress.label: "auto"` so progress draft labels keep using the built-in label pool instead of rendering a literal `auto` title. Thanks @vincentkoc. diff --git a/extensions/browser/src/browser/routes/permissions.test.ts b/extensions/browser/src/browser/routes/permissions.test.ts index 4e52ca2ec06..c4681a68879 100644 --- a/extensions/browser/src/browser/routes/permissions.test.ts +++ b/extensions/browser/src/browser/routes/permissions.test.ts @@ -19,6 +19,16 @@ const cdpMocks = vi.hoisted(() => ({ ), })); +const pwMocks = vi.hoisted(() => ({ + getPwAiModule: vi.fn(async () => null), + grantPermissions: vi.fn(async () => {}), + getPageForTargetId: vi.fn(async () => ({ + context: () => ({ + grantPermissions: pwMocks.grantPermissions, + }), + })), +})); + vi.mock("../chrome.js", () => ({ getChromeWebSocketUrl: cdpMocks.getChromeWebSocketUrl, })); @@ -27,7 +37,7 @@ vi.mock("../cdp.helpers.js", () => ({ withCdpSocket: cdpMocks.withCdpSocket, })); -const { registerBrowserPermissionRoutes } = await import("./permissions.js"); +const { registerBrowserPermissionRoutes, __testing } = await import("./permissions.js"); function createProfileContext() { return { @@ -77,6 +87,42 @@ describe("browser permission routes", () => { cdpMocks.getChromeWebSocketUrl.mockClear(); cdpMocks.send.mockReset().mockResolvedValue({}); cdpMocks.withCdpSocket.mockClear(); + __testing.setDepsForTest(null); + pwMocks.getPwAiModule.mockReset().mockResolvedValue(null); + pwMocks.getPageForTargetId.mockClear(); + pwMocks.grantPermissions.mockClear(); + }); + + it("uses Playwright context permissions for attached pages when available", async () => { + pwMocks.getPwAiModule.mockResolvedValue({ + getPageForTargetId: pwMocks.getPageForTargetId, + } as never); + __testing.setDepsForTest({ getPwAiModule: pwMocks.getPwAiModule as never }); + + const { response } = await callGrant({ + origin: "https://meet.google.com/abc-defg-hij", + permissions: ["audioCapture", "videoCapture"], + optionalPermissions: ["speakerSelection"], + targetId: "meet-tab", + }); + + expect(response.statusCode).toBe(200); + expect(response.body).toMatchObject({ + ok: true, + origin: "https://meet.google.com", + grantedPermissions: ["audioCapture", "videoCapture"], + unsupportedPermissions: ["speakerSelection"], + grantMethod: "playwright", + }); + expect(pwMocks.getPageForTargetId).toHaveBeenCalledWith({ + cdpUrl: "http://127.0.0.1:18800", + targetId: "meet-tab", + ssrfPolicy: { allowPrivateNetwork: false }, + }); + expect(pwMocks.grantPermissions).toHaveBeenCalledWith(["microphone", "camera"], { + origin: "https://meet.google.com", + }); + expect(cdpMocks.send).not.toHaveBeenCalled(); }); it("grants required and optional Chrome permissions for an origin", async () => { diff --git a/extensions/browser/src/browser/routes/permissions.ts b/extensions/browser/src/browser/routes/permissions.ts index 71d3e4ea2e8..52b5f88d73a 100644 --- a/extensions/browser/src/browser/routes/permissions.ts +++ b/extensions/browser/src/browser/routes/permissions.ts @@ -1,6 +1,9 @@ +import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { withCdpSocket } from "../cdp.helpers.js"; import { getChromeWebSocketUrl } from "../chrome.js"; +import { getPwAiModule } from "../pw-ai-module.js"; import type { BrowserRouteContext } from "../server-context.js"; +import type { ProfileContext } from "../server-context.js"; import type { BrowserRouteRegistrar } from "./types.js"; import { asyncBrowserRoute, @@ -10,11 +13,22 @@ import { toStringOrEmpty, } from "./utils.js"; +const permissionRouteDeps = { + getPwAiModule, +}; + +export const __testing = { + setDepsForTest(deps: { getPwAiModule?: typeof getPwAiModule } | null) { + permissionRouteDeps.getPwAiModule = deps?.getPwAiModule ?? getPwAiModule; + }, +}; + type GrantPermissionsBody = { origin?: unknown; permissions?: unknown; optionalPermissions?: unknown; timeoutMs?: unknown; + targetId?: unknown; }; function readOrigin(raw: unknown): string | null { @@ -47,15 +61,45 @@ function readPermissions(raw: unknown): string[] | null { } async function grantPermissions(params: { + profileCtx: ProfileContext; + targetId?: string; wsUrl: string; origin: string; requiredPermissions: string[]; optionalPermissions: string[]; timeoutMs: number; + ssrfPolicy?: SsrFPolicy; }) { const allPermissions = [ ...new Set([...params.requiredPermissions, ...params.optionalPermissions]), ]; + const playwrightRequiredPermissions = params.requiredPermissions.map(toPlaywrightPermission); + const canUsePlaywright = + playwrightRequiredPermissions.every((value): value is string => Boolean(value)) && + params.requiredPermissions.length > 0; + if (canUsePlaywright) { + const pw = await permissionRouteDeps.getPwAiModule({ mode: "soft" }); + if (pw) { + try { + const page = await pw.getPageForTargetId({ + cdpUrl: params.profileCtx.profile.cdpUrl, + targetId: params.targetId, + ssrfPolicy: params.ssrfPolicy, + }); + await page.context().grantPermissions(playwrightRequiredPermissions, { + origin: params.origin, + }); + return { + grantedPermissions: params.requiredPermissions, + unsupportedPermissions: params.optionalPermissions, + grantMethod: "playwright", + }; + } catch { + // Fall back to the raw CDP browser command below. Some routes call this + // before a page exists, while attached browser profiles need Playwright. + } + } + } let unsupportedPermissions: string[] = []; await withCdpSocket( params.wsUrl, @@ -82,9 +126,21 @@ async function grantPermissions(params: { return { grantedPermissions: allPermissions.filter((value) => !unsupportedPermissions.includes(value)), unsupportedPermissions, + grantMethod: "cdp", }; } +function toPlaywrightPermission(permission: string): string | undefined { + switch (permission) { + case "audioCapture": + return "microphone"; + case "videoCapture": + return "camera"; + default: + return undefined; + } +} + export function registerBrowserPermissionRoutes( app: BrowserRouteRegistrar, ctx: BrowserRouteContext, @@ -107,6 +163,7 @@ export function registerBrowserPermissionRoutes( return jsonError(res, 400, "permissions must be a non-empty string array"); } const optionalPermissions = readPermissions(body.optionalPermissions ?? []) ?? []; + const targetId = toStringOrEmpty(body.targetId) || undefined; const timeoutMs = Math.max(1_000, toNumber(body.timeoutMs) ?? 5_000); try { @@ -120,11 +177,14 @@ export function registerBrowserPermissionRoutes( return jsonError(res, 409, "browser CDP WebSocket unavailable"); } const granted = await grantPermissions({ + profileCtx, + targetId, wsUrl, origin, requiredPermissions, optionalPermissions, timeoutMs, + ssrfPolicy: ctx.state().resolved.ssrfPolicy, }); return res.json({ ok: true, origin, ...granted }); } catch (error) { diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index b6e5992b301..9a41bbc652a 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -2118,6 +2118,64 @@ describe("google-meet plugin", () => { expect(captionButton.click).toHaveBeenCalledTimes(1); }); + it("reports in-call Meet audio permission problems from button labels", () => { + const makeButton = (label: string) => ({ + disabled: false, + innerText: "", + textContent: "", + click: vi.fn(), + getAttribute: vi.fn((name: string) => (name === "aria-label" ? label : null)), + }); + const document = { + body: { innerText: "", textContent: "" }, + title: "Meet", + querySelector: vi.fn(() => null), + querySelectorAll: vi.fn((selector: string) => { + if (selector === "button") { + return [ + makeButton("Leave call"), + makeButton("Microphone problem. Show more info"), + makeButton("Microphone: Permission needed"), + makeButton("Speaker: Permission needed"), + ]; + } + if (selector === "input") { + return []; + } + return []; + }), + }; + const context = createContext({ + JSON, + document, + location: { + href: "https://meet.google.com/abc-defg-hij", + hostname: "meet.google.com", + }, + window: {}, + }); + const inspect = new Script( + `(${chromeTransportTesting.meetStatusScriptForTest({ + allowMicrophone: true, + autoJoin: false, + captureCaptions: false, + guestName: "OpenClaw Agent", + })})`, + ).runInContext(context) as () => string; + + const result = JSON.parse(inspect()) as { + inCall?: boolean; + manualActionRequired?: boolean; + manualActionReason?: string; + manualActionMessage?: string; + }; + + expect(result.inCall).toBe(true); + expect(result.manualActionRequired).toBe(true); + expect(result.manualActionReason).toBe("meet-permission-required"); + expect(result.manualActionMessage).toContain("Allow microphone/camera/speaker permissions"); + }); + it("joins Chrome on a paired node without local Chrome or BlackHole", async () => { const { methods, nodesList, nodesInvoke } = setup( { diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index 710bc264fb0..870999a2bb2 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -333,16 +333,19 @@ function meetStatusScript(params: { const allowMicrophone = ${JSON.stringify(params.allowMicrophone)}; const captureCaptions = ${JSON.stringify(params.captureCaptions)}; const buttons = [...document.querySelectorAll('button')]; + const buttonLabel = (button) => + [ + button.getAttribute("aria-label"), + button.getAttribute("data-tooltip"), + text(button), + ] + .filter(Boolean) + .join(" "); + const buttonLabels = buttons.map(buttonLabel).filter(Boolean); const notes = []; const findButton = (pattern) => buttons.find((button) => { - const label = [ - button.getAttribute("aria-label"), - button.getAttribute("data-tooltip"), - text(button), - ] - .filter(Boolean) - .join(" "); + const label = buttonLabel(button); return pattern.test(label) && !button.disabled; }); const input = [...document.querySelectorAll('input')].find((el) => @@ -355,9 +358,10 @@ function meetStatusScript(params: { input.dispatchEvent(new Event('change', { bubbles: true })); } const pageText = text(document.body).toLowerCase(); + const permissionText = [pageText, ...buttonLabels].join("\\n"); const host = location.hostname.toLowerCase(); const pageUrl = location.href; - const permissionNeeded = /permission needed|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(pageText); + const permissionNeeded = /permission needed|microphone problem|speaker problem|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(permissionText); const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button))); if (!allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) { mic.click();