From 2b45a112cb654d5c9e94113974b2f863ca621cb8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 24 Apr 2026 16:17:57 +0100 Subject: [PATCH] feat: harden Google Meet realtime join --- docs/plugins/google-meet.md | 48 +++- extensions/google-meet/index.test.ts | 98 ++++++- extensions/google-meet/index.ts | 65 ++++- extensions/google-meet/openclaw.plugin.json | 33 +++ extensions/google-meet/src/cli.ts | 25 ++ extensions/google-meet/src/config.ts | 19 ++ extensions/google-meet/src/node-host.ts | 251 +++++++++++++++++- extensions/google-meet/src/realtime-node.ts | 31 ++- extensions/google-meet/src/realtime.ts | 26 +- extensions/google-meet/src/runtime.ts | 64 +++++ extensions/google-meet/src/setup.ts | 31 +++ .../google-meet/src/transports/chrome.ts | 12 +- .../google-meet/src/transports/types.ts | 18 ++ 13 files changed, 695 insertions(+), 26 deletions(-) diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index d653966a5bd..1c32f209c19 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -187,6 +187,11 @@ Route Meet through that node on the Gateway host: enabled: true, config: { defaultTransport: "chrome-node", + chrome: { + guestName: "OpenClaw Agent", + autoJoin: true, + reuseExistingTab: true, + }, chromeNode: { node: "parallels-macos", }, @@ -205,6 +210,13 @@ openclaw googlemeet join https://meet.google.com/abc-defg-hij or ask the agent to use the `google_meet` tool with `transport: "chrome-node"`. +For a one-command smoke test that creates or reuses a session, speaks a known +phrase, and prints session health: + +```bash +openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij +``` + If `chromeNode.node` is omitted, OpenClaw auto-selects only when exactly one connected node advertises `googlemeet.chrome`. If several capable nodes are connected, set `chromeNode.node` to the node id, display name, or remote IP. @@ -217,8 +229,12 @@ Common failure checks: `gateway.nodes.allowCommands: ["googlemeet.chrome"]`. - `BlackHole 2ch audio device not found on the node`: install `blackhole-2ch` in the VM and reboot the VM. -- Chrome opens but cannot join: sign in to Chrome inside the VM and confirm that - profile can join the Meet URL manually. +- Chrome opens but cannot join: sign in to Chrome inside the VM, or keep + `chrome.guestName` set for guest join. Guest auto-join uses Chrome Apple + Events; if it reports an automation warning, enable Chrome > View > Developer + > Allow JavaScript from Apple Events, then retry. +- Duplicate Meet tabs: leave `chrome.reuseExistingTab: true` enabled. OpenClaw + activates an existing tab for the same Meet URL before opening a new one. - No audio: in Meet, route microphone/speaker through the virtual audio device path used by OpenClaw; use separate virtual devices or Loopback-style routing for clean duplex audio. @@ -353,6 +369,13 @@ Defaults: - `defaultMode: "realtime"` - `chromeNode.node`: optional node id/name/IP for `chrome-node` - `chrome.audioBackend: "blackhole-2ch"` +- `chrome.guestName: "OpenClaw Agent"`: name used on the signed-out Meet guest + screen +- `chrome.autoJoin: true`: best-effort guest-name fill and Join Now click +- `chrome.reuseExistingTab: true`: activate an existing Meet tab instead of + opening duplicates +- `chrome.waitForInCallMs: 20000`: wait for the Meet tab to report in-call + before the realtime intro is triggered - `chrome.audioInputCommand`: SoX `rec` command writing 8 kHz G.711 mu-law audio to stdout - `chrome.audioOutputCommand`: SoX `play` command reading 8 kHz G.711 mu-law @@ -373,6 +396,8 @@ Optional overrides: }, chrome: { browserProfile: "Default", + guestName: "OpenClaw Agent", + waitForInCallMs: 30000, }, chromeNode: { node: "parallels-macos", @@ -426,7 +451,16 @@ Gateway host, so model credentials stay there. Use `action: "status"` to list active sessions or inspect a session ID. Use `action: "speak"` with `sessionId` and `message` to make the realtime agent -speak immediately. Use `action: "leave"` to mark a session ended. +speak immediately. Use `action: "test_speech"` to create or reuse the session, +trigger a known phrase, and return `inCall` health when the Chrome host can +report it. Use `action: "leave"` to mark a session ended. + +`status` includes Chrome health when available: + +- `inCall`: Chrome appears to be inside the Meet call +- `micMuted`: best-effort Meet microphone state +- `providerConnected` / `realtimeReady`: realtime voice bridge state +- `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge ```json { @@ -465,6 +499,14 @@ To force a spoken readiness check after Chrome has fully joined the call: openclaw googlemeet speak meet_... "Say exactly: I'm here and listening." ``` +For the full join-and-speak smoke: + +```bash +openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij \ + --transport chrome-node \ + --message "Say exactly: I'm here and listening." +``` + ## Notes Google Meet's official media API is receive-oriented, so speaking into a Meet diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index d7fa71be44e..7e67dc6168e 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -171,6 +171,10 @@ describe("google-meet plugin", () => { chrome: { audioBackend: "blackhole-2ch", launch: true, + guestName: "OpenClaw Agent", + reuseExistingTab: true, + autoJoin: true, + waitForInCallMs: 20000, audioInputCommand: [ "rec", "-q", @@ -285,7 +289,16 @@ describe("google-meet plugin", () => { properties: { action: { type: "string", - enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"], + enum: [ + "join", + "status", + "setup_status", + "resolve_space", + "preflight", + "leave", + "speak", + "test_speech", + ], }, transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] }, mode: { type: "string", enum: ["realtime", "transcribe"] }, @@ -554,6 +567,10 @@ describe("google-meet plugin", () => { action: "start", url: "https://meet.google.com/abc-defg-hij", mode: "transcribe", + guestName: "OpenClaw Agent", + reuseExistingTab: true, + autoJoin: true, + waitForInCallMs: 20000, }), }), ); @@ -568,6 +585,81 @@ describe("google-meet plugin", () => { }); }); + it("reuses an active Meet session for the same URL and transport", async () => { + const { methods, nodesInvoke } = setup( + { + defaultTransport: "chrome-node", + defaultMode: "transcribe", + }, + { + nodesInvokeResult: { + payload: { + launched: true, + browser: { inCall: true, micMuted: false }, + }, + }, + }, + ); + const handler = methods.get("googlemeet.join") as + | ((ctx: { + params: Record; + respond: ReturnType; + }) => Promise) + | undefined; + const first = vi.fn(); + const second = vi.fn(); + + await handler?.({ + params: { url: "https://meet.google.com/abc-defg-hij" }, + respond: first, + }); + await handler?.({ + params: { url: "https://meet.google.com/abc-defg-hij" }, + respond: second, + }); + + expect(nodesInvoke).toHaveBeenCalledTimes(1); + expect(second.mock.calls[0]?.[1]).toMatchObject({ + session: { + chrome: { health: { inCall: true, micMuted: false } }, + notes: expect.arrayContaining(["Reused existing active Meet session."]), + }, + }); + }); + + it("exposes a test-speech action that joins the requested meeting", async () => { + const { tools, nodesInvoke } = setup( + { + defaultTransport: "chrome-node", + }, + { + nodesInvokeResult: { + payload: { + launched: true, + browser: { inCall: true }, + }, + }, + }, + ); + const tool = tools[0] as { + execute: (id: string, params: unknown) => Promise<{ details: { createdSession?: boolean } }>; + }; + + const result = await tool.execute("id", { + action: "test_speech", + url: "https://meet.google.com/abc-defg-hij", + message: "Say exactly: hello.", + }); + + expect(nodesInvoke).toHaveBeenCalledWith( + expect.objectContaining({ + command: "googlemeet.chrome", + params: expect.objectContaining({ action: "start" }), + }), + ); + expect(result.details).toMatchObject({ createdSession: true }); + }); + it("explains when chrome-node has no capable paired node", async () => { const { tools } = setup( { @@ -781,7 +873,7 @@ describe("google-meet plugin", () => { expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3])); expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]); expect(bridge.acknowledgeMark).toHaveBeenCalled(); - expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening."); + expect(bridge.triggerGreeting).not.toHaveBeenCalled(); handle.speak("Say exactly: hello from the meeting."); expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the meeting."); expect(callbacks).toMatchObject({ @@ -922,7 +1014,7 @@ describe("google-meet plugin", () => { text: "Use the launch update.", }); }); - expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening."); + expect(bridge.triggerGreeting).not.toHaveBeenCalled(); handle.speak("Say exactly: hello from the node."); expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the node."); expect(callbacks).toMatchObject({ diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index d4e3ed8558c..aca256d56ef 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -43,6 +43,23 @@ const googleMeetConfigSchema = { }, "chrome.launch": { label: "Launch Chrome" }, "chrome.browserProfile": { label: "Chrome Profile", advanced: true }, + "chrome.guestName": { + label: "Guest Name", + help: "Used when Chrome lands on the signed-out Meet guest-name screen.", + }, + "chrome.reuseExistingTab": { + label: "Reuse Existing Meet Tab", + help: "Avoids opening duplicate tabs for the same Meet URL.", + }, + "chrome.autoJoin": { + label: "Auto Join Guest Screen", + help: "Best-effort guest-name fill and Join Now click when Chrome allows JavaScript from Apple Events.", + }, + "chrome.waitForInCallMs": { + label: "Wait For In-Call (ms)", + help: "Waits for Chrome to report that the Meet tab is in-call before the realtime intro speaks.", + advanced: true, + }, "chrome.audioInputCommand": { label: "Audio Input Command", help: "Command that writes 8 kHz G.711 mu-law meeting audio to stdout.", @@ -115,7 +132,16 @@ const googleMeetConfigSchema = { const GoogleMeetToolSchema = Type.Object({ action: Type.String({ - enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"], + enum: [ + "join", + "status", + "setup_status", + "resolve_space", + "preflight", + "leave", + "speak", + "test_speech", + ], description: "Google Meet action to run", }), url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })), @@ -221,6 +247,7 @@ export default definePluginEntry({ dialInNumber: normalizeOptionalString(params?.dialInNumber), pin: normalizeOptionalString(params?.pin), dtmfSequence: normalizeOptionalString(params?.dtmfSequence), + message: normalizeOptionalString(params?.message), }); respond(true, result); } catch (err) { @@ -287,6 +314,27 @@ export default definePluginEntry({ }, ); + api.registerGatewayMethod( + "googlemeet.testSpeech", + async ({ params, respond }: GatewayRequestHandlerOptions) => { + try { + const rt = await ensureRuntime(); + const result = await rt.testSpeech({ + url: resolveMeetingInput(config, params?.url), + transport: normalizeTransport(params?.transport), + mode: normalizeMode(params?.mode), + dialInNumber: normalizeOptionalString(params?.dialInNumber), + pin: normalizeOptionalString(params?.pin), + dtmfSequence: normalizeOptionalString(params?.dtmfSequence), + message: normalizeOptionalString(params?.message), + }); + respond(true, result); + } catch (err) { + sendError(respond, err); + } + }, + ); + api.registerTool({ name: "google_meet", label: "Google Meet", @@ -306,6 +354,21 @@ export default definePluginEntry({ dialInNumber: normalizeOptionalString(raw.dialInNumber), pin: normalizeOptionalString(raw.pin), dtmfSequence: normalizeOptionalString(raw.dtmfSequence), + message: normalizeOptionalString(raw.message), + }), + ); + } + case "test_speech": { + const rt = await ensureRuntime(); + return json( + await rt.testSpeech({ + url: resolveMeetingInput(config, raw.url), + transport: normalizeTransport(raw.transport), + mode: normalizeMode(raw.mode), + dialInNumber: normalizeOptionalString(raw.dialInNumber), + pin: normalizeOptionalString(raw.pin), + dtmfSequence: normalizeOptionalString(raw.dtmfSequence), + message: normalizeOptionalString(raw.message), }), ); } diff --git a/extensions/google-meet/openclaw.plugin.json b/extensions/google-meet/openclaw.plugin.json index d7e5c3d7734..b5f0ef48997 100644 --- a/extensions/google-meet/openclaw.plugin.json +++ b/extensions/google-meet/openclaw.plugin.json @@ -37,6 +37,23 @@ "label": "Chrome Profile", "advanced": true }, + "chrome.guestName": { + "label": "Guest Name", + "help": "Used when Chrome lands on the signed-out Meet guest-name screen." + }, + "chrome.reuseExistingTab": { + "label": "Reuse Existing Meet Tab", + "help": "Avoids opening duplicate tabs for the same Meet URL." + }, + "chrome.autoJoin": { + "label": "Auto Join Guest Screen", + "help": "Best-effort guest-name fill and Join Now click when Chrome allows JavaScript from Apple Events." + }, + "chrome.waitForInCallMs": { + "label": "Wait For In-Call (ms)", + "help": "Waits for Chrome to report that the Meet tab is in-call before the realtime intro speaks.", + "advanced": true + }, "chrome.audioInputCommand": { "label": "Audio Input Command", "help": "Command that writes 8 kHz G.711 mu-law meeting audio to stdout.", @@ -190,10 +207,26 @@ "browserProfile": { "type": "string" }, + "guestName": { + "type": "string", + "default": "OpenClaw Agent" + }, + "reuseExistingTab": { + "type": "boolean", + "default": true + }, + "autoJoin": { + "type": "boolean", + "default": true + }, "joinTimeoutMs": { "type": "number", "default": 30000 }, + "waitForInCallMs": { + "type": "number", + "default": 20000 + }, "audioInputCommand": { "type": "array", "default": [ diff --git a/extensions/google-meet/src/cli.ts b/extensions/google-meet/src/cli.ts index 7925772c335..8001330b07e 100644 --- a/extensions/google-meet/src/cli.ts +++ b/extensions/google-meet/src/cli.ts @@ -16,6 +16,7 @@ import type { GoogleMeetRuntime } from "./runtime.js"; type JoinOptions = { transport?: GoogleMeetTransport; mode?: GoogleMeetMode; + message?: string; dialInNumber?: string; pin?: string; dtmfSequence?: string; @@ -177,6 +178,7 @@ export function registerGoogleMeetCli(params: { .argument("[url]", "Explicit https://meet.google.com/... URL") .option("--transport ", "Transport: chrome, chrome-node, or twilio") .option("--mode ", "Mode: realtime or transcribe") + .option("--message ", "Realtime speech to trigger after join") .option("--dial-in-number ", "Meet dial-in number for Twilio transport") .option("--pin ", "Meet phone PIN; # is appended if omitted") .option("--dtmf-sequence ", "Explicit Twilio DTMF sequence") @@ -186,6 +188,7 @@ export function registerGoogleMeetCli(params: { url: resolveMeetingInput(params.config, url), transport: options.transport, mode: options.mode, + message: options.message, dialInNumber: options.dialInNumber, pin: options.pin, dtmfSequence: options.dtmfSequence, @@ -193,6 +196,28 @@ export function registerGoogleMeetCli(params: { writeStdoutJson(result.session); }); + root + .command("test-speech") + .argument("[url]", "Explicit https://meet.google.com/... URL") + .option("--transport ", "Transport: chrome, chrome-node, or twilio") + .option("--mode ", "Mode: realtime or transcribe") + .option( + "--message ", + "Realtime speech to trigger", + "Say exactly: Google Meet speech test complete.", + ) + .action(async (url: string | undefined, options: JoinOptions) => { + const rt = await params.ensureRuntime(); + writeStdoutJson( + await rt.testSpeech({ + url: resolveMeetingInput(params.config, url), + transport: options.transport, + mode: options.mode, + message: options.message, + }), + ); + }); + root .command("resolve-space") .description("Resolve a Meet URL, meeting code, or spaces/{id} to its canonical space") diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index 53a36a73767..10918c64eeb 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -22,7 +22,11 @@ export type GoogleMeetConfig = { audioBackend: "blackhole-2ch"; launch: boolean; browserProfile?: string; + guestName: string; + reuseExistingTab: boolean; + autoJoin: boolean; joinTimeoutMs: number; + waitForInCallMs: number; audioInputCommand?: string[]; audioOutputCommand?: string[]; audioBridgeCommand?: string[]; @@ -113,7 +117,11 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { chrome: { audioBackend: "blackhole-2ch", launch: true, + guestName: "OpenClaw Agent", + reuseExistingTab: true, + autoJoin: true, joinTimeoutMs: 30_000, + waitForInCallMs: 20_000, audioInputCommand: [...DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND], audioOutputCommand: [...DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND], }, @@ -300,10 +308,21 @@ export function resolveGoogleMeetConfigWithEnv( audioBackend: "blackhole-2ch", launch: resolveBoolean(chrome.launch, DEFAULT_GOOGLE_MEET_CONFIG.chrome.launch), browserProfile: normalizeOptionalString(chrome.browserProfile), + guestName: + normalizeOptionalString(chrome.guestName) ?? DEFAULT_GOOGLE_MEET_CONFIG.chrome.guestName, + reuseExistingTab: resolveBoolean( + chrome.reuseExistingTab, + DEFAULT_GOOGLE_MEET_CONFIG.chrome.reuseExistingTab, + ), + autoJoin: resolveBoolean(chrome.autoJoin, DEFAULT_GOOGLE_MEET_CONFIG.chrome.autoJoin), joinTimeoutMs: resolveNumber( chrome.joinTimeoutMs, DEFAULT_GOOGLE_MEET_CONFIG.chrome.joinTimeoutMs, ), + waitForInCallMs: resolveNumber( + chrome.waitForInCallMs, + DEFAULT_GOOGLE_MEET_CONFIG.chrome.waitForInCallMs, + ), audioInputCommand: resolveStringArray(chrome.audioInputCommand) ?? [ ...DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND, ], diff --git a/extensions/google-meet/src/node-host.ts b/extensions/google-meet/src/node-host.ts index 7dd5c636b98..265bd71d64c 100644 --- a/extensions/google-meet/src/node-host.ts +++ b/extensions/google-meet/src/node-host.ts @@ -18,6 +18,20 @@ type NodeBridgeSession = { chunks: Buffer[]; waiters: Array<() => void>; closed: boolean; + createdAt: string; + lastInputAt?: string; + lastOutputAt?: string; + lastInputBytes: number; + lastOutputBytes: number; +}; + +type BrowserStatus = { + inCall?: boolean; + micMuted?: boolean; + browserUrl?: string; + browserTitle?: string; + status?: string; + notes?: string[]; }; const sessions = new Map(); @@ -46,6 +60,10 @@ function readNumber(value: unknown, fallback: number): number { return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback; } +function readBoolean(value: unknown, fallback: boolean): boolean { + return typeof value === "boolean" ? value : fallback; +} + function runCommandWithTimeout(argv: string[], timeoutMs: number) { const [command, ...args] = argv; if (!command) { @@ -62,6 +80,163 @@ function runCommandWithTimeout(argv: string[], timeoutMs: number) { }; } +function runAppleScript(script: string, timeoutMs: number) { + return runCommandWithTimeout(["/usr/bin/osascript", "-e", script], timeoutMs); +} + +function normalizeAppleScriptString(value: string): string { + return JSON.stringify(value); +} + +function activeMeetTabStatus(timeoutMs: number): BrowserStatus { + const script = ` +tell application "Google Chrome" + repeat with w in windows + repeat with t in tabs of w + set tabUrl to URL of t + if tabUrl starts with "https://meet.google.com/" then + set active tab index of w to index of t + set index of w to 1 + set tabTitle to title of t + return tabUrl & linefeed & tabTitle + end if + end repeat + end repeat +end tell`; + const result = runAppleScript(script, timeoutMs); + if (result.code !== 0) { + return { + inCall: false, + status: "browser-unavailable", + notes: [result.stderr || result.stdout || "Google Chrome tab status unavailable"], + }; + } + const [browserUrl = "", browserTitle = ""] = result.stdout.split(/\r?\n/u); + return { + inCall: Boolean(browserUrl.trim()) && !/Meet$/u.test(browserTitle.trim()), + browserUrl: browserUrl.trim() || undefined, + browserTitle: browserTitle.trim() || undefined, + status: "ok", + }; +} + +function activateExistingMeetTab(url: string, timeoutMs: number): boolean { + const script = ` +set targetUrl to ${normalizeAppleScriptString(url)} +tell application "Google Chrome" + repeat with w in windows + repeat with t in tabs of w + if URL of t is targetUrl then + set active tab index of w to index of t + set index of w to 1 + activate + return "found" + end if + end repeat + end repeat +end tell +return "missing"`; + const result = runAppleScript(script, timeoutMs); + return result.code === 0 && result.stdout.trim() === "found"; +} + +function executeMeetTabScript(url: string, javascript: string, timeoutMs: number) { + const script = ` +set targetUrl to ${normalizeAppleScriptString(url)} +set source to ${normalizeAppleScriptString(javascript)} +tell application "Google Chrome" + repeat with w in windows + repeat with t in tabs of w + if URL of t starts with targetUrl then + set active tab index of w to index of t + set index of w to 1 + return execute t javascript source + end if + end repeat + end repeat +end tell +return ""`; + return runAppleScript(script, timeoutMs); +} + +function tryAutoJoinMeet(params: { + url: string; + guestName: string; + timeoutMs: number; +}): BrowserStatus { + const js = ` +(() => { + const text = (node) => (node?.innerText || node?.textContent || "").trim(); + const input = [...document.querySelectorAll('input')].find((el) => + /your name/i.test(el.getAttribute('aria-label') || el.placeholder || '') + ); + if (input && !input.value) { + input.focus(); + input.value = ${JSON.stringify(params.guestName)}; + input.dispatchEvent(new Event('input', { bubbles: true })); + input.dispatchEvent(new Event('change', { bubbles: true })); + } + const buttons = [...document.querySelectorAll('button')]; + const join = buttons.find((button) => /join now|ask to join/i.test(text(button)) && !button.disabled); + if (join) join.click(); + const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button))); + return JSON.stringify({ + clickedJoin: Boolean(join), + inCall: buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button))), + micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined, + title: document.title, + url: location.href + }); +})();`; + const result = executeMeetTabScript(params.url, js, Math.min(params.timeoutMs, 5_000)); + if (result.code !== 0) { + return { + ...activeMeetTabStatus(Math.min(params.timeoutMs, 2_000)), + notes: [ + "Chrome JavaScript automation is unavailable; enable Chrome > View > Developer > Allow JavaScript from Apple Events for guest auto-join.", + result.stderr || result.stdout || "unknown Apple Events failure", + ], + }; + } + try { + const parsed = JSON.parse(result.stdout.trim()) as { + inCall?: boolean; + micMuted?: boolean; + url?: string; + title?: string; + }; + return { + inCall: parsed.inCall, + micMuted: parsed.micMuted, + browserUrl: parsed.url, + browserTitle: parsed.title, + status: "ok", + }; + } catch { + return activeMeetTabStatus(Math.min(params.timeoutMs, 2_000)); + } +} + +async function waitForInCall(params: { + url: string; + guestName: string; + autoJoin: boolean; + timeoutMs: number; +}): Promise { + const deadline = Date.now() + Math.max(0, params.timeoutMs); + let status: BrowserStatus = activeMeetTabStatus(2_000); + while (Date.now() <= deadline) { + status = params.autoJoin + ? tryAutoJoinMeet({ url: params.url, guestName: params.guestName, timeoutMs: 5_000 }) + : activeMeetTabStatus(2_000); + if (status.inCall === true) { + return status; + } + await sleep(750); + } + return status; +} + function assertBlackHoleAvailable(timeoutMs: number) { if (process.platform !== "darwin") { throw new Error("Chrome Meet transport with blackhole-2ch audio is currently macOS-only"); @@ -112,6 +287,9 @@ function startCommandPair(params: { chunks: [], waiters: [], closed: false, + createdAt: new Date().toISOString(), + lastInputBytes: 0, + lastOutputBytes: 0, }; const outputProcess = spawn(output.command, output.args, { stdio: ["pipe", "ignore", "pipe"], @@ -122,7 +300,10 @@ function startCommandPair(params: { session.input = inputProcess; session.output = outputProcess; inputProcess.stdout?.on("data", (chunk) => { - session.chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + const audio = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); + session.lastInputAt = new Date().toISOString(); + session.lastInputBytes += audio.byteLength; + session.chunks.push(audio); if (session.chunks.length > 200) { session.chunks.splice(0, session.chunks.length - 200); } @@ -172,7 +353,10 @@ function pushAudio(params: Record) { if (!session || session.closed) { throw new Error(`bridge is not open: ${bridgeId}`); } - session.output?.stdin?.write(Buffer.from(base64, "base64")); + const audio = Buffer.from(base64, "base64"); + session.lastOutputAt = new Date().toISOString(); + session.lastOutputBytes += audio.byteLength; + session.output?.stdin?.write(audio); return { bridgeId, ok: true }; } @@ -224,22 +408,58 @@ function startChrome(params: Record) { if (browserProfile) { argv.push("--args", `--profile-directory=${browserProfile}`); } - argv.push(url); - const result = runCommandWithTimeout(argv, timeoutMs); - if (result.code !== 0) { - if (bridgeId) { - const session = sessions.get(bridgeId); - if (session) { - stopSession(session); + const reused = readBoolean(params.reuseExistingTab, true) + ? activateExistingMeetTab(url, Math.min(timeoutMs, 5_000)) + : false; + if (!reused) { + argv.push(url); + const result = runCommandWithTimeout(argv, timeoutMs); + if (result.code !== 0) { + if (bridgeId) { + const session = sessions.get(bridgeId); + if (session) { + stopSession(session); + } } + throw new Error( + `failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`, + ); } - throw new Error( - `failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`, - ); } } - return { launched: params.launch !== false, bridgeId, audioBridge }; + const waitForInCallMs = readNumber(params.waitForInCallMs, 20_000); + return Promise.resolve( + params.launch !== false && waitForInCallMs > 0 + ? waitForInCall({ + url, + guestName: readString(params.guestName) ?? "OpenClaw Agent", + autoJoin: readBoolean(params.autoJoin, true), + timeoutMs: waitForInCallMs, + }) + : activeMeetTabStatus(2_000), + ).then((browser) => ({ launched: params.launch !== false, bridgeId, audioBridge, browser })); +} + +function bridgeStatus(params: Record) { + const bridgeId = readString(params.bridgeId); + const session = bridgeId ? sessions.get(bridgeId) : undefined; + return { + browser: activeMeetTabStatus(2_000), + bridge: session + ? { + bridgeId, + closed: session.closed, + createdAt: session.createdAt, + lastInputAt: session.lastInputAt, + lastOutputAt: session.lastOutputAt, + lastInputBytes: session.lastInputBytes, + lastOutputBytes: session.lastOutputBytes, + } + : bridgeId + ? { bridgeId, closed: true } + : undefined, + }; } function stopChrome(params: Record) { @@ -267,7 +487,10 @@ export async function handleGoogleMeetNodeHostCommand(paramsJSON?: string | null result = { ok: true }; break; case "start": - result = startChrome(params); + result = await startChrome(params); + break; + case "status": + result = bridgeStatus(params); break; case "pullAudio": result = await pullAudio(params); diff --git a/extensions/google-meet/src/realtime-node.ts b/extensions/google-meet/src/realtime-node.ts index 1a9aa6ffb69..4dbd7e61bbd 100644 --- a/extensions/google-meet/src/realtime-node.ts +++ b/extensions/google-meet/src/realtime-node.ts @@ -13,6 +13,7 @@ import { } from "./agent-consult.js"; import type { GoogleMeetConfig } from "./config.js"; import { resolveGoogleMeetRealtimeProvider } from "./realtime.js"; +import type { GoogleMeetChromeHealth } from "./transports/types.js"; export type ChromeNodeRealtimeAudioBridgeHandle = { type: "node-command-pair"; @@ -20,6 +21,7 @@ export type ChromeNodeRealtimeAudioBridgeHandle = { nodeId: string; bridgeId: string; speak: (instructions?: string) => void; + getHealth: () => GoogleMeetChromeHealth; stop: () => Promise; }; @@ -45,6 +47,11 @@ export async function startNodeRealtimeAudioBridge(params: { }): Promise { let stopped = false; let bridge: RealtimeVoiceBridgeSession | null = null; + let realtimeReady = false; + let lastInputAt: string | undefined; + let lastOutputAt: string | undefined; + let lastInputBytes = 0; + let lastOutputBytes = 0; const resolved = resolveGoogleMeetRealtimeProvider({ config: params.config, fullConfig: params.fullConfig, @@ -83,12 +90,14 @@ export async function startNodeRealtimeAudioBridge(params: { providerConfig: resolved.providerConfig, instructions: params.config.realtime.instructions, initialGreetingInstructions: params.config.realtime.introMessage, - triggerGreetingOnReady: Boolean(params.config.realtime.introMessage), + triggerGreetingOnReady: false, markStrategy: "ack-immediately", tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy), audioSink: { isOpen: () => !stopped, sendAudio: (muLaw) => { + lastOutputAt = new Date().toISOString(); + lastOutputBytes += muLaw.byteLength; void params.runtime.nodes .invoke({ nodeId: params.nodeId, @@ -149,10 +158,14 @@ export async function startNodeRealtimeAudioBridge(params: { void stop(); }, onClose: (reason) => { + realtimeReady = false; if (reason === "error") { void stop(); } }, + onReady: () => { + realtimeReady = true; + }, }); await bridge.connect(); @@ -169,10 +182,13 @@ export async function startNodeRealtimeAudioBridge(params: { params: { action: "pullAudio", bridgeId: params.bridgeId, timeoutMs: 250 }, timeoutMs: 2_000, }); - const result = asRecord(raw); + const result = asRecord(asRecord(raw).payload ?? raw); const base64 = readString(result.base64); if (base64) { - bridge?.sendAudio(Buffer.from(base64, "base64")); + const audio = Buffer.from(base64, "base64"); + lastInputAt = new Date().toISOString(); + lastInputBytes += audio.byteLength; + bridge?.sendAudio(audio); } if (result.closed === true) { await stop(); @@ -194,6 +210,15 @@ export async function startNodeRealtimeAudioBridge(params: { speak: (instructions) => { bridge?.triggerGreeting(instructions); }, + getHealth: () => ({ + providerConnected: bridge?.bridge.isConnected() ?? false, + realtimeReady, + lastInputAt, + lastOutputAt, + lastInputBytes, + lastOutputBytes, + bridgeClosed: stopped, + }), stop, }; } diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index 6c9fc988dc3..f37fad1230f 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -16,6 +16,7 @@ import { resolveGoogleMeetRealtimeTools, } from "./agent-consult.js"; import type { GoogleMeetConfig } from "./config.js"; +import type { GoogleMeetChromeHealth } from "./transports/types.js"; type BridgeProcess = { pid?: number; @@ -42,6 +43,7 @@ export type ChromeRealtimeAudioBridgeHandle = { inputCommand: string[]; outputCommand: string[]; speak: (instructions?: string) => void; + getHealth: () => GoogleMeetChromeHealth; stop: () => Promise; }; @@ -97,6 +99,11 @@ export async function startCommandRealtimeAudioBridge(params: { }); let stopped = false; let bridge: RealtimeVoiceBridgeSession | null = null; + let realtimeReady = false; + let lastInputAt: string | undefined; + let lastOutputAt: string | undefined; + let lastInputBytes = 0; + let lastOutputBytes = 0; const stop = async () => { if (stopped) { @@ -150,12 +157,14 @@ export async function startCommandRealtimeAudioBridge(params: { providerConfig: resolved.providerConfig, instructions: params.config.realtime.instructions, initialGreetingInstructions: params.config.realtime.introMessage, - triggerGreetingOnReady: Boolean(params.config.realtime.introMessage), + triggerGreetingOnReady: false, markStrategy: "ack-immediately", tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy), audioSink: { isOpen: () => !stopped, sendAudio: (muLaw) => { + lastOutputAt = new Date().toISOString(); + lastOutputBytes += muLaw.byteLength; outputProcess.stdin?.write(muLaw); }, }, @@ -195,15 +204,21 @@ export async function startCommandRealtimeAudioBridge(params: { }, onError: fail("realtime voice bridge"), onClose: (reason) => { + realtimeReady = false; if (reason === "error") { void stop(); } }, + onReady: () => { + realtimeReady = true; + }, }); inputProcess.stdout?.on("data", (chunk) => { const audio = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); if (!stopped && audio.byteLength > 0) { + lastInputAt = new Date().toISOString(); + lastInputBytes += audio.byteLength; bridge?.sendAudio(Buffer.from(audio)); } }); @@ -216,6 +231,15 @@ export async function startCommandRealtimeAudioBridge(params: { speak: (instructions) => { bridge?.triggerGreeting(instructions); }, + getHealth: () => ({ + providerConnected: bridge?.bridge.isConnected() ?? false, + realtimeReady, + lastInputAt, + lastOutputAt, + lastInputBytes, + lastOutputBytes, + bridgeClosed: stopped, + }), stop, }; } diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 44183046ad3..53b60909cb2 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -8,6 +8,7 @@ import { getGoogleMeetSetupStatus } from "./setup.js"; import { launchChromeMeet, launchChromeMeetOnNode } from "./transports/chrome.js"; import { buildMeetDtmfSequence, normalizeDialInNumber } from "./transports/twilio.js"; import type { + GoogleMeetChromeHealth, GoogleMeetJoinRequest, GoogleMeetJoinResult, GoogleMeetSession, @@ -50,6 +51,7 @@ export class GoogleMeetRuntime { readonly #sessions = new Map(); readonly #sessionStops = new Map Promise>(); readonly #sessionSpeakers = new Map void>(); + readonly #sessionHealth = new Map GoogleMeetChromeHealth>(); constructor( private readonly params: { @@ -61,6 +63,7 @@ export class GoogleMeetRuntime { ) {} list(): GoogleMeetSession[] { + this.#refreshHealth(); return [...this.#sessions.values()].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt)); } @@ -69,6 +72,7 @@ export class GoogleMeetRuntime { session?: GoogleMeetSession; sessions?: GoogleMeetSession[]; } { + this.#refreshHealth(sessionId); if (!sessionId) { return { found: true, sessions: this.list() }; } @@ -84,6 +88,24 @@ export class GoogleMeetRuntime { const url = normalizeMeetUrl(request.url); const transport = resolveTransport(request.transport, this.params.config); const mode = resolveMode(request.mode, this.params.config); + const reusable = this.list().find( + (session) => + session.state === "active" && + session.url === url && + session.transport === transport && + session.mode === mode, + ); + if (reusable) { + reusable.notes = [ + ...reusable.notes.filter((note) => note !== "Reused existing active Meet session."), + "Reused existing active Meet session.", + ]; + reusable.updatedAt = nowIso(); + if (request.message || this.params.config.realtime.introMessage) { + this.speak(reusable.id, request.message); + } + return { session: reusable }; + } const createdAt = nowIso(); const session: GoogleMeetSession = { @@ -146,6 +168,7 @@ export class GoogleMeetRuntime { : undefined, } : undefined, + health: "browser" in result ? result.browser : undefined, }; if ( result.audioBridge?.type === "command-pair" || @@ -153,6 +176,7 @@ export class GoogleMeetRuntime { ) { this.#sessionStops.set(session.id, result.audioBridge.stop); this.#sessionSpeakers.set(session.id, result.audioBridge.speak); + this.#sessionHealth.set(session.id, result.audioBridge.getHealth); } session.notes.push( result.audioBridge @@ -206,6 +230,9 @@ export class GoogleMeetRuntime { } this.#sessions.set(session.id, session); + if (mode === "realtime" && this.params.config.realtime.introMessage) { + this.speak(session.id, request.message); + } return { session }; } @@ -218,6 +245,7 @@ export class GoogleMeetRuntime { if (stop) { this.#sessionStops.delete(sessionId); this.#sessionSpeakers.delete(sessionId); + this.#sessionHealth.delete(sessionId); await stop(); } session.state = "ended"; @@ -239,6 +267,42 @@ export class GoogleMeetRuntime { } speak(instructions || this.params.config.realtime.introMessage); session.updatedAt = nowIso(); + this.#refreshHealth(sessionId); return { found: true, spoken: true, session }; } + + async testSpeech(request: GoogleMeetJoinRequest): Promise<{ + createdSession: boolean; + inCall?: boolean; + spoken: boolean; + session: GoogleMeetSession; + }> { + const before = new Set(this.list().map((session) => session.id)); + const result = await this.join(request); + const spoken = this.speak( + result.session.id, + request.message ?? "Say exactly: Google Meet speech test complete.", + ).spoken; + return { + createdSession: !before.has(result.session.id), + inCall: result.session.chrome?.health?.inCall, + spoken, + session: result.session, + }; + } + + #refreshHealth(sessionId?: string) { + const ids = sessionId ? [sessionId] : [...this.#sessionHealth.keys()]; + for (const id of ids) { + const session = this.#sessions.get(id); + const getHealth = this.#sessionHealth.get(id); + if (!session?.chrome || !getHealth) { + continue; + } + session.chrome.health = { + ...session.chrome.health, + ...getHealth(), + }; + } + } } diff --git a/extensions/google-meet/src/setup.ts b/extensions/google-meet/src/setup.ts index 2816f8fd6a7..e0f0fb2811b 100644 --- a/extensions/google-meet/src/setup.ts +++ b/extensions/google-meet/src/setup.ts @@ -79,6 +79,37 @@ export function getGoogleMeetSetupStatus(config: GoogleMeetConfig): { : "Chrome realtime audio bridge not configured", }); + checks.push({ + id: "guest-join-defaults", + ok: Boolean( + config.chrome.guestName && config.chrome.autoJoin && config.chrome.reuseExistingTab, + ), + message: + config.chrome.guestName && config.chrome.autoJoin && config.chrome.reuseExistingTab + ? "Guest auto-join and tab reuse defaults are enabled" + : "Set chrome.guestName, chrome.autoJoin, and chrome.reuseExistingTab for unattended guest joins", + }); + + checks.push({ + id: "chrome-node-target", + ok: config.defaultTransport !== "chrome-node" || Boolean(config.chromeNode.node), + message: + config.defaultTransport === "chrome-node" && !config.chromeNode.node + ? "chrome-node default should pin chromeNode.node when multiple nodes may be connected" + : config.chromeNode.node + ? `Chrome node pinned to ${config.chromeNode.node}` + : "Chrome node not pinned; automatic selection works when exactly one capable node is connected", + }); + + checks.push({ + id: "intro-after-in-call", + ok: config.chrome.waitForInCallMs > 0, + message: + config.chrome.waitForInCallMs > 0 + ? `Realtime intro waits up to ${config.chrome.waitForInCallMs}ms for the Meet tab to be in-call` + : "Set chrome.waitForInCallMs to delay realtime intro until the Meet tab is in-call", + }); + return { ok: checks.every((check) => check.ok), checks, diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index 588db7706ed..ac2ba0a1f3d 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -10,6 +10,7 @@ import { startCommandRealtimeAudioBridge, type ChromeRealtimeAudioBridgeHandle, } from "../realtime.js"; +import type { GoogleMeetChromeHealth } from "./types.js"; export const GOOGLE_MEET_SYSTEM_PROFILER_COMMAND = "/usr/sbin/system_profiler"; @@ -200,6 +201,7 @@ function parseNodeStartResult(raw: unknown): { launched?: boolean; bridgeId?: string; audioBridge?: { type?: string }; + browser?: GoogleMeetChromeHealth; } { const value = raw && typeof raw === "object" && "payload" in raw @@ -212,6 +214,7 @@ function parseNodeStartResult(raw: unknown): { launched?: boolean; bridgeId?: string; audioBridge?: { type?: string }; + browser?: GoogleMeetChromeHealth; }; } @@ -229,6 +232,7 @@ export async function launchChromeMeetOnNode(params: { audioBridge?: | { type: "external-command" } | ({ type: "node-command-pair" } & ChromeNodeRealtimeAudioBridgeHandle); + browser?: GoogleMeetChromeHealth; }> { const nodeId = await resolveChromeNode({ runtime: params.runtime, @@ -248,6 +252,10 @@ export async function launchChromeMeetOnNode(params: { audioOutputCommand: params.config.chrome.audioOutputCommand, audioBridgeCommand: params.config.chrome.audioBridgeCommand, audioBridgeHealthCommand: params.config.chrome.audioBridgeHealthCommand, + guestName: params.config.chrome.guestName, + reuseExistingTab: params.config.chrome.reuseExistingTab, + autoJoin: params.config.chrome.autoJoin, + waitForInCallMs: params.config.chrome.waitForInCallMs, }, timeoutMs: params.config.chrome.joinTimeoutMs + 5_000, }); @@ -269,6 +277,7 @@ export async function launchChromeMeetOnNode(params: { nodeId, launched: result.launched === true, audioBridge: bridge, + browser: result.browser, }; } if (result.audioBridge?.type === "external-command") { @@ -276,7 +285,8 @@ export async function launchChromeMeetOnNode(params: { nodeId, launched: result.launched === true, audioBridge: { type: "external-command" }, + browser: result.browser, }; } - return { nodeId, launched: result.launched === true }; + return { nodeId, launched: result.launched === true, browser: result.browser }; } diff --git a/extensions/google-meet/src/transports/types.ts b/extensions/google-meet/src/transports/types.ts index 943670a899f..7e6f60a4cae 100644 --- a/extensions/google-meet/src/transports/types.ts +++ b/extensions/google-meet/src/transports/types.ts @@ -6,11 +6,28 @@ export type GoogleMeetJoinRequest = { url: string; transport?: GoogleMeetTransport; mode?: GoogleMeetMode; + message?: string; dialInNumber?: string; pin?: string; dtmfSequence?: string; }; +export type GoogleMeetChromeHealth = { + inCall?: boolean; + micMuted?: boolean; + providerConnected?: boolean; + realtimeReady?: boolean; + lastInputAt?: string; + lastOutputAt?: string; + lastInputBytes?: number; + lastOutputBytes?: number; + browserUrl?: string; + browserTitle?: string; + bridgeClosed?: boolean; + status?: string; + notes?: string[]; +}; + export type GoogleMeetSession = { id: string; url: string; @@ -35,6 +52,7 @@ export type GoogleMeetSession = { type: "command-pair" | "node-command-pair" | "external-command"; provider?: string; }; + health?: GoogleMeetChromeHealth; }; twilio?: { dialInNumber: string;