fix: repair Google Meet media permission grants

This commit is contained in:
Peter Steinberger
2026-05-03 22:40:06 +01:00
parent 3e80805d11
commit 5fa7d3b1a4
5 changed files with 178 additions and 9 deletions

View File

@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Google Meet: grant Meet media permissions through the Playwright browser context when CDP grants do not affect the attached Chrome page, and report in-call microphone/speaker permission problems instead of marking realtime speech ready.
- Control UI/WebChat: collapse duplicate in-flight internal text sends onto the active Gateway run so rapid repeat submits do not start fresh `agent:main:main` dispatches. Fixes #75737. Thanks @dsdsddd1 and @BunsDev.
- Channels/streaming: expose `streaming.progress.label`, `labels`, `maxLines`, and `toolProgress` in bundled channel config metadata so progress draft settings appear in config, docs, and control surfaces. Thanks @vincentkoc.
- Channels/streaming: normalize whitespace and case for `streaming.progress.label: "auto"` so progress draft labels keep using the built-in label pool instead of rendering a literal `auto` title. Thanks @vincentkoc.

View File

@@ -19,6 +19,16 @@ const cdpMocks = vi.hoisted(() => ({
),
}));
const pwMocks = vi.hoisted(() => ({
getPwAiModule: vi.fn(async () => null),
grantPermissions: vi.fn(async () => {}),
getPageForTargetId: vi.fn(async () => ({
context: () => ({
grantPermissions: pwMocks.grantPermissions,
}),
})),
}));
vi.mock("../chrome.js", () => ({
getChromeWebSocketUrl: cdpMocks.getChromeWebSocketUrl,
}));
@@ -27,7 +37,7 @@ vi.mock("../cdp.helpers.js", () => ({
withCdpSocket: cdpMocks.withCdpSocket,
}));
const { registerBrowserPermissionRoutes } = await import("./permissions.js");
const { registerBrowserPermissionRoutes, __testing } = await import("./permissions.js");
function createProfileContext() {
return {
@@ -77,6 +87,42 @@ describe("browser permission routes", () => {
cdpMocks.getChromeWebSocketUrl.mockClear();
cdpMocks.send.mockReset().mockResolvedValue({});
cdpMocks.withCdpSocket.mockClear();
__testing.setDepsForTest(null);
pwMocks.getPwAiModule.mockReset().mockResolvedValue(null);
pwMocks.getPageForTargetId.mockClear();
pwMocks.grantPermissions.mockClear();
});
it("uses Playwright context permissions for attached pages when available", async () => {
pwMocks.getPwAiModule.mockResolvedValue({
getPageForTargetId: pwMocks.getPageForTargetId,
} as never);
__testing.setDepsForTest({ getPwAiModule: pwMocks.getPwAiModule as never });
const { response } = await callGrant({
origin: "https://meet.google.com/abc-defg-hij",
permissions: ["audioCapture", "videoCapture"],
optionalPermissions: ["speakerSelection"],
targetId: "meet-tab",
});
expect(response.statusCode).toBe(200);
expect(response.body).toMatchObject({
ok: true,
origin: "https://meet.google.com",
grantedPermissions: ["audioCapture", "videoCapture"],
unsupportedPermissions: ["speakerSelection"],
grantMethod: "playwright",
});
expect(pwMocks.getPageForTargetId).toHaveBeenCalledWith({
cdpUrl: "http://127.0.0.1:18800",
targetId: "meet-tab",
ssrfPolicy: { allowPrivateNetwork: false },
});
expect(pwMocks.grantPermissions).toHaveBeenCalledWith(["microphone", "camera"], {
origin: "https://meet.google.com",
});
expect(cdpMocks.send).not.toHaveBeenCalled();
});
it("grants required and optional Chrome permissions for an origin", async () => {

View File

@@ -1,6 +1,9 @@
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { withCdpSocket } from "../cdp.helpers.js";
import { getChromeWebSocketUrl } from "../chrome.js";
import { getPwAiModule } from "../pw-ai-module.js";
import type { BrowserRouteContext } from "../server-context.js";
import type { ProfileContext } from "../server-context.js";
import type { BrowserRouteRegistrar } from "./types.js";
import {
asyncBrowserRoute,
@@ -10,11 +13,22 @@ import {
toStringOrEmpty,
} from "./utils.js";
const permissionRouteDeps = {
getPwAiModule,
};
export const __testing = {
setDepsForTest(deps: { getPwAiModule?: typeof getPwAiModule } | null) {
permissionRouteDeps.getPwAiModule = deps?.getPwAiModule ?? getPwAiModule;
},
};
type GrantPermissionsBody = {
origin?: unknown;
permissions?: unknown;
optionalPermissions?: unknown;
timeoutMs?: unknown;
targetId?: unknown;
};
function readOrigin(raw: unknown): string | null {
@@ -47,15 +61,45 @@ function readPermissions(raw: unknown): string[] | null {
}
async function grantPermissions(params: {
profileCtx: ProfileContext;
targetId?: string;
wsUrl: string;
origin: string;
requiredPermissions: string[];
optionalPermissions: string[];
timeoutMs: number;
ssrfPolicy?: SsrFPolicy;
}) {
const allPermissions = [
...new Set([...params.requiredPermissions, ...params.optionalPermissions]),
];
const playwrightRequiredPermissions = params.requiredPermissions.map(toPlaywrightPermission);
const canUsePlaywright =
playwrightRequiredPermissions.every((value): value is string => Boolean(value)) &&
params.requiredPermissions.length > 0;
if (canUsePlaywright) {
const pw = await permissionRouteDeps.getPwAiModule({ mode: "soft" });
if (pw) {
try {
const page = await pw.getPageForTargetId({
cdpUrl: params.profileCtx.profile.cdpUrl,
targetId: params.targetId,
ssrfPolicy: params.ssrfPolicy,
});
await page.context().grantPermissions(playwrightRequiredPermissions, {
origin: params.origin,
});
return {
grantedPermissions: params.requiredPermissions,
unsupportedPermissions: params.optionalPermissions,
grantMethod: "playwright",
};
} catch {
// Fall back to the raw CDP browser command below. Some routes call this
// before a page exists, while attached browser profiles need Playwright.
}
}
}
let unsupportedPermissions: string[] = [];
await withCdpSocket(
params.wsUrl,
@@ -82,9 +126,21 @@ async function grantPermissions(params: {
return {
grantedPermissions: allPermissions.filter((value) => !unsupportedPermissions.includes(value)),
unsupportedPermissions,
grantMethod: "cdp",
};
}
function toPlaywrightPermission(permission: string): string | undefined {
switch (permission) {
case "audioCapture":
return "microphone";
case "videoCapture":
return "camera";
default:
return undefined;
}
}
export function registerBrowserPermissionRoutes(
app: BrowserRouteRegistrar,
ctx: BrowserRouteContext,
@@ -107,6 +163,7 @@ export function registerBrowserPermissionRoutes(
return jsonError(res, 400, "permissions must be a non-empty string array");
}
const optionalPermissions = readPermissions(body.optionalPermissions ?? []) ?? [];
const targetId = toStringOrEmpty(body.targetId) || undefined;
const timeoutMs = Math.max(1_000, toNumber(body.timeoutMs) ?? 5_000);
try {
@@ -120,11 +177,14 @@ export function registerBrowserPermissionRoutes(
return jsonError(res, 409, "browser CDP WebSocket unavailable");
}
const granted = await grantPermissions({
profileCtx,
targetId,
wsUrl,
origin,
requiredPermissions,
optionalPermissions,
timeoutMs,
ssrfPolicy: ctx.state().resolved.ssrfPolicy,
});
return res.json({ ok: true, origin, ...granted });
} catch (error) {

View File

@@ -2118,6 +2118,64 @@ describe("google-meet plugin", () => {
expect(captionButton.click).toHaveBeenCalledTimes(1);
});
it("reports in-call Meet audio permission problems from button labels", () => {
const makeButton = (label: string) => ({
disabled: false,
innerText: "",
textContent: "",
click: vi.fn(),
getAttribute: vi.fn((name: string) => (name === "aria-label" ? label : null)),
});
const document = {
body: { innerText: "", textContent: "" },
title: "Meet",
querySelector: vi.fn(() => null),
querySelectorAll: vi.fn((selector: string) => {
if (selector === "button") {
return [
makeButton("Leave call"),
makeButton("Microphone problem. Show more info"),
makeButton("Microphone: Permission needed"),
makeButton("Speaker: Permission needed"),
];
}
if (selector === "input") {
return [];
}
return [];
}),
};
const context = createContext({
JSON,
document,
location: {
href: "https://meet.google.com/abc-defg-hij",
hostname: "meet.google.com",
},
window: {},
});
const inspect = new Script(
`(${chromeTransportTesting.meetStatusScriptForTest({
allowMicrophone: true,
autoJoin: false,
captureCaptions: false,
guestName: "OpenClaw Agent",
})})`,
).runInContext(context) as () => string;
const result = JSON.parse(inspect()) as {
inCall?: boolean;
manualActionRequired?: boolean;
manualActionReason?: string;
manualActionMessage?: string;
};
expect(result.inCall).toBe(true);
expect(result.manualActionRequired).toBe(true);
expect(result.manualActionReason).toBe("meet-permission-required");
expect(result.manualActionMessage).toContain("Allow microphone/camera/speaker permissions");
});
it("joins Chrome on a paired node without local Chrome or BlackHole", async () => {
const { methods, nodesList, nodesInvoke } = setup(
{

View File

@@ -333,16 +333,19 @@ function meetStatusScript(params: {
const allowMicrophone = ${JSON.stringify(params.allowMicrophone)};
const captureCaptions = ${JSON.stringify(params.captureCaptions)};
const buttons = [...document.querySelectorAll('button')];
const buttonLabel = (button) =>
[
button.getAttribute("aria-label"),
button.getAttribute("data-tooltip"),
text(button),
]
.filter(Boolean)
.join(" ");
const buttonLabels = buttons.map(buttonLabel).filter(Boolean);
const notes = [];
const findButton = (pattern) =>
buttons.find((button) => {
const label = [
button.getAttribute("aria-label"),
button.getAttribute("data-tooltip"),
text(button),
]
.filter(Boolean)
.join(" ");
const label = buttonLabel(button);
return pattern.test(label) && !button.disabled;
});
const input = [...document.querySelectorAll('input')].find((el) =>
@@ -355,9 +358,10 @@ function meetStatusScript(params: {
input.dispatchEvent(new Event('change', { bubbles: true }));
}
const pageText = text(document.body).toLowerCase();
const permissionText = [pageText, ...buttonLabels].join("\\n");
const host = location.hostname.toLowerCase();
const pageUrl = location.href;
const permissionNeeded = /permission needed|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(pageText);
const permissionNeeded = /permission needed|microphone problem|speaker problem|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(permissionText);
const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button)));
if (!allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
mic.click();