diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 8b48a3578a4..3a8b1720d6e 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -214,6 +214,12 @@ phrase, and prints session health: openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij ``` +If the browser profile is not signed in, Meet is waiting for host admission, or +Chrome needs microphone/camera permission, the join/test-speech result reports +`manualActionRequired: true` with `manualActionReason` and +`manualActionMessage`. Agents should stop retrying the join, report that message +to the operator, and retry only after the manual browser action is complete. + If `chromeNode.node` is omitted, OpenClaw auto-selects only when exactly one connected node advertises both `googlemeet.chrome` and browser control. If several capable nodes are connected, set `chromeNode.node` to the node id, @@ -460,6 +466,9 @@ report it. Use `action: "leave"` to mark a session ended. - `inCall`: Chrome appears to be inside the Meet call - `micMuted`: best-effort Meet microphone state +- `manualActionRequired` / `manualActionReason` / `manualActionMessage`: the + browser profile needs manual login, Meet host admission, permissions, or + browser-control repair before speech can work - `providerConnected` / `realtimeReady`: realtime voice bridge state - `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index 7ed136e1ca9..1bd2257fdca 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -82,6 +82,7 @@ function setup( options: { nodesListResult?: NodeListResult; nodesInvokeResult?: unknown; + browserActResult?: Record; nodesInvokeHandler?: (params: { nodeId: string; command: string; @@ -134,12 +135,14 @@ function setup( result: { ok: true, targetId: proxy.body?.targetId ?? "tab-1", - result: JSON.stringify({ - inCall: true, - micMuted: false, - title: "Meet call", - url: "https://meet.google.com/abc-defg-hij", - }), + result: JSON.stringify( + options.browserActResult ?? { + inCall: true, + micMuted: false, + title: "Meet call", + url: "https://meet.google.com/abc-defg-hij", + }, + ), }, }, }; @@ -706,6 +709,61 @@ describe("google-meet plugin", () => { expect(result.details).toMatchObject({ createdSession: true }); }); + it("reports manual action when the browser profile needs Google login", async () => { + const { tools } = setup( + { + defaultTransport: "chrome-node", + }, + { + browserActResult: { + inCall: false, + manualActionRequired: true, + manualActionReason: "google-login-required", + manualActionMessage: + "Sign in to Google in the OpenClaw browser profile, then retry the Meet join.", + title: "Sign in - Google Accounts", + url: "https://accounts.google.com/signin", + }, + nodesInvokeResult: { + payload: { + launched: true, + }, + }, + }, + ); + const tool = tools[0] as { + execute: ( + id: string, + params: unknown, + ) => Promise<{ + details: { + manualActionRequired?: boolean; + manualActionReason?: string; + session?: { chrome?: { health?: { manualActionRequired?: boolean } } }; + }; + }>; + }; + + const result = await tool.execute("id", { + action: "test_speech", + url: "https://meet.google.com/abc-defg-hij", + message: "Say exactly: hello.", + }); + + expect(result.details).toMatchObject({ + manualActionRequired: true, + manualActionReason: "google-login-required", + session: { + chrome: { + health: { + manualActionRequired: true, + manualActionReason: "google-login-required", + }, + }, + }, + }); + }); + it("explains when chrome-node has no capable paired node", async () => { const { tools } = setup( { diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 53b60909cb2..6306ef9c4ce 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -274,6 +274,9 @@ export class GoogleMeetRuntime { async testSpeech(request: GoogleMeetJoinRequest): Promise<{ createdSession: boolean; inCall?: boolean; + manualActionRequired?: boolean; + manualActionReason?: GoogleMeetChromeHealth["manualActionReason"]; + manualActionMessage?: string; spoken: boolean; session: GoogleMeetSession; }> { @@ -283,9 +286,13 @@ export class GoogleMeetRuntime { result.session.id, request.message ?? "Say exactly: Google Meet speech test complete.", ).spoken; + const health = result.session.chrome?.health; return { createdSession: !before.has(result.session.id), - inCall: result.session.chrome?.health?.inCall, + inCall: health?.inCall, + manualActionRequired: health?.manualActionRequired, + manualActionReason: health?.manualActionReason, + manualActionMessage: health?.manualActionMessage, spoken, session: result.session, }; diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index a2e4feb5850..ed8ee9b21d5 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -292,12 +292,18 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef const parsed = JSON.parse(raw) as { inCall?: boolean; micMuted?: boolean; + manualActionRequired?: boolean; + manualActionReason?: GoogleMeetChromeHealth["manualActionReason"]; + manualActionMessage?: string; url?: string; title?: string; }; return { inCall: parsed.inCall, micMuted: parsed.micMuted, + manualActionRequired: parsed.manualActionRequired, + manualActionReason: parsed.manualActionReason, + manualActionMessage: parsed.manualActionMessage, browserUrl: parsed.url, browserTitle: parsed.title, status: "browser-control", @@ -317,17 +323,36 @@ function meetStatusScript(params: { guestName: string; autoJoin: boolean }) { input.dispatchEvent(new Event('change', { bubbles: true })); } const buttons = [...document.querySelectorAll('button')]; + const pageText = text(document.body).toLowerCase(); + const host = location.hostname.toLowerCase(); + const pageUrl = location.href; const join = ${JSON.stringify(params.autoJoin)} ? buttons.find((button) => /join now|ask to join/i.test(text(button)) && !button.disabled) : null; if (join) join.click(); const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button))); + const inCall = buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button))); + let manualActionReason; + let manualActionMessage; + if (!inCall && (host === "accounts.google.com" || /use your google account|to continue to google meet|choose an account|sign in to (join|continue)/i.test(pageText))) { + manualActionReason = "google-login-required"; + manualActionMessage = "Sign in to Google in the OpenClaw browser profile, then retry the Meet join."; + } else if (!inCall && /asking to be let in|you.?ll join when someone lets you in|waiting to be let in|ask to join/i.test(pageText)) { + manualActionReason = "meet-admission-required"; + manualActionMessage = "Admit the OpenClaw browser participant in Google Meet, then retry speech."; + } else if (!inCall && /allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera)/i.test(pageText)) { + manualActionReason = "meet-permission-required"; + manualActionMessage = "Allow microphone/camera permissions for Meet in the OpenClaw browser profile, then retry."; + } return JSON.stringify({ clickedJoin: Boolean(join), - inCall: buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button))), + inCall, micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined, + manualActionRequired: Boolean(manualActionReason), + manualActionReason, + manualActionMessage, title: document.title, - url: location.href + url: pageUrl }); }`; } @@ -424,6 +449,10 @@ async function openMeetWithBrowserProxy(params: { browser = { ...browser, inCall: false, + manualActionRequired: true, + manualActionReason: "browser-control-unavailable", + manualActionMessage: + "Open the OpenClaw browser profile, finish Google Meet login, admission, or permission prompts, then retry.", notes: [ `Browser control could not inspect or auto-join Meet: ${ error instanceof Error ? error.message : String(error) diff --git a/extensions/google-meet/src/transports/types.ts b/extensions/google-meet/src/transports/types.ts index 7e6f60a4cae..3bf6f1a4754 100644 --- a/extensions/google-meet/src/transports/types.ts +++ b/extensions/google-meet/src/transports/types.ts @@ -12,9 +12,18 @@ export type GoogleMeetJoinRequest = { dtmfSequence?: string; }; +export type GoogleMeetManualActionReason = + | "google-login-required" + | "meet-admission-required" + | "meet-permission-required" + | "browser-control-unavailable"; + export type GoogleMeetChromeHealth = { inCall?: boolean; micMuted?: boolean; + manualActionRequired?: boolean; + manualActionReason?: GoogleMeetManualActionReason; + manualActionMessage?: string; providerConnected?: boolean; realtimeReady?: boolean; lastInputAt?: string;