feat: harden Google Meet realtime join

This commit is contained in:
Peter Steinberger
2026-04-24 16:17:57 +01:00
parent 2c701ab296
commit 2b45a112cb
13 changed files with 695 additions and 26 deletions

View File

@@ -187,6 +187,11 @@ Route Meet through that node on the Gateway host:
enabled: true,
config: {
defaultTransport: "chrome-node",
chrome: {
guestName: "OpenClaw Agent",
autoJoin: true,
reuseExistingTab: true,
},
chromeNode: {
node: "parallels-macos",
},
@@ -205,6 +210,13 @@ openclaw googlemeet join https://meet.google.com/abc-defg-hij
or ask the agent to use the `google_meet` tool with `transport: "chrome-node"`.
For a one-command smoke test that creates or reuses a session, speaks a known
phrase, and prints session health:
```bash
openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij
```
If `chromeNode.node` is omitted, OpenClaw auto-selects only when exactly one
connected node advertises `googlemeet.chrome`. If several capable nodes are
connected, set `chromeNode.node` to the node id, display name, or remote IP.
@@ -217,8 +229,12 @@ Common failure checks:
`gateway.nodes.allowCommands: ["googlemeet.chrome"]`.
- `BlackHole 2ch audio device not found on the node`: install `blackhole-2ch`
in the VM and reboot the VM.
- Chrome opens but cannot join: sign in to Chrome inside the VM and confirm that
profile can join the Meet URL manually.
- Chrome opens but cannot join: sign in to Chrome inside the VM, or keep
`chrome.guestName` set for guest join. Guest auto-join uses Chrome Apple
Events; if it reports an automation warning, enable Chrome > View > Developer
> Allow JavaScript from Apple Events, then retry.
- Duplicate Meet tabs: leave `chrome.reuseExistingTab: true` enabled. OpenClaw
activates an existing tab for the same Meet URL before opening a new one.
- No audio: in Meet, route microphone/speaker through the virtual audio device
path used by OpenClaw; use separate virtual devices or Loopback-style routing
for clean duplex audio.
@@ -353,6 +369,13 @@ Defaults:
- `defaultMode: "realtime"`
- `chromeNode.node`: optional node id/name/IP for `chrome-node`
- `chrome.audioBackend: "blackhole-2ch"`
- `chrome.guestName: "OpenClaw Agent"`: name used on the signed-out Meet guest
screen
- `chrome.autoJoin: true`: best-effort guest-name fill and Join Now click
- `chrome.reuseExistingTab: true`: activate an existing Meet tab instead of
opening duplicates
- `chrome.waitForInCallMs: 20000`: wait for the Meet tab to report in-call
before the realtime intro is triggered
- `chrome.audioInputCommand`: SoX `rec` command writing 8 kHz G.711 mu-law
audio to stdout
- `chrome.audioOutputCommand`: SoX `play` command reading 8 kHz G.711 mu-law
@@ -373,6 +396,8 @@ Optional overrides:
},
chrome: {
browserProfile: "Default",
guestName: "OpenClaw Agent",
waitForInCallMs: 30000,
},
chromeNode: {
node: "parallels-macos",
@@ -426,7 +451,16 @@ Gateway host, so model credentials stay there.
Use `action: "status"` to list active sessions or inspect a session ID. Use
`action: "speak"` with `sessionId` and `message` to make the realtime agent
speak immediately. Use `action: "leave"` to mark a session ended.
speak immediately. Use `action: "test_speech"` to create or reuse the session,
trigger a known phrase, and return `inCall` health when the Chrome host can
report it. Use `action: "leave"` to mark a session ended.
`status` includes Chrome health when available:
- `inCall`: Chrome appears to be inside the Meet call
- `micMuted`: best-effort Meet microphone state
- `providerConnected` / `realtimeReady`: realtime voice bridge state
- `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge
```json
{
@@ -465,6 +499,14 @@ To force a spoken readiness check after Chrome has fully joined the call:
openclaw googlemeet speak meet_... "Say exactly: I'm here and listening."
```
For the full join-and-speak smoke:
```bash
openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij \
--transport chrome-node \
--message "Say exactly: I'm here and listening."
```
## Notes
Google Meet's official media API is receive-oriented, so speaking into a Meet

View File

@@ -171,6 +171,10 @@ describe("google-meet plugin", () => {
chrome: {
audioBackend: "blackhole-2ch",
launch: true,
guestName: "OpenClaw Agent",
reuseExistingTab: true,
autoJoin: true,
waitForInCallMs: 20000,
audioInputCommand: [
"rec",
"-q",
@@ -285,7 +289,16 @@ describe("google-meet plugin", () => {
properties: {
action: {
type: "string",
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"],
enum: [
"join",
"status",
"setup_status",
"resolve_space",
"preflight",
"leave",
"speak",
"test_speech",
],
},
transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] },
mode: { type: "string", enum: ["realtime", "transcribe"] },
@@ -554,6 +567,10 @@ describe("google-meet plugin", () => {
action: "start",
url: "https://meet.google.com/abc-defg-hij",
mode: "transcribe",
guestName: "OpenClaw Agent",
reuseExistingTab: true,
autoJoin: true,
waitForInCallMs: 20000,
}),
}),
);
@@ -568,6 +585,81 @@ describe("google-meet plugin", () => {
});
});
it("reuses an active Meet session for the same URL and transport", async () => {
const { methods, nodesInvoke } = setup(
{
defaultTransport: "chrome-node",
defaultMode: "transcribe",
},
{
nodesInvokeResult: {
payload: {
launched: true,
browser: { inCall: true, micMuted: false },
},
},
},
);
const handler = methods.get("googlemeet.join") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const first = vi.fn();
const second = vi.fn();
await handler?.({
params: { url: "https://meet.google.com/abc-defg-hij" },
respond: first,
});
await handler?.({
params: { url: "https://meet.google.com/abc-defg-hij" },
respond: second,
});
expect(nodesInvoke).toHaveBeenCalledTimes(1);
expect(second.mock.calls[0]?.[1]).toMatchObject({
session: {
chrome: { health: { inCall: true, micMuted: false } },
notes: expect.arrayContaining(["Reused existing active Meet session."]),
},
});
});
it("exposes a test-speech action that joins the requested meeting", async () => {
const { tools, nodesInvoke } = setup(
{
defaultTransport: "chrome-node",
},
{
nodesInvokeResult: {
payload: {
launched: true,
browser: { inCall: true },
},
},
},
);
const tool = tools[0] as {
execute: (id: string, params: unknown) => Promise<{ details: { createdSession?: boolean } }>;
};
const result = await tool.execute("id", {
action: "test_speech",
url: "https://meet.google.com/abc-defg-hij",
message: "Say exactly: hello.",
});
expect(nodesInvoke).toHaveBeenCalledWith(
expect.objectContaining({
command: "googlemeet.chrome",
params: expect.objectContaining({ action: "start" }),
}),
);
expect(result.details).toMatchObject({ createdSession: true });
});
it("explains when chrome-node has no capable paired node", async () => {
const { tools } = setup(
{
@@ -781,7 +873,7 @@ describe("google-meet plugin", () => {
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3]));
expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]);
expect(bridge.acknowledgeMark).toHaveBeenCalled();
expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening.");
expect(bridge.triggerGreeting).not.toHaveBeenCalled();
handle.speak("Say exactly: hello from the meeting.");
expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the meeting.");
expect(callbacks).toMatchObject({
@@ -922,7 +1014,7 @@ describe("google-meet plugin", () => {
text: "Use the launch update.",
});
});
expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening.");
expect(bridge.triggerGreeting).not.toHaveBeenCalled();
handle.speak("Say exactly: hello from the node.");
expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the node.");
expect(callbacks).toMatchObject({

View File

@@ -43,6 +43,23 @@ const googleMeetConfigSchema = {
},
"chrome.launch": { label: "Launch Chrome" },
"chrome.browserProfile": { label: "Chrome Profile", advanced: true },
"chrome.guestName": {
label: "Guest Name",
help: "Used when Chrome lands on the signed-out Meet guest-name screen.",
},
"chrome.reuseExistingTab": {
label: "Reuse Existing Meet Tab",
help: "Avoids opening duplicate tabs for the same Meet URL.",
},
"chrome.autoJoin": {
label: "Auto Join Guest Screen",
help: "Best-effort guest-name fill and Join Now click when Chrome allows JavaScript from Apple Events.",
},
"chrome.waitForInCallMs": {
label: "Wait For In-Call (ms)",
help: "Waits for Chrome to report that the Meet tab is in-call before the realtime intro speaks.",
advanced: true,
},
"chrome.audioInputCommand": {
label: "Audio Input Command",
help: "Command that writes 8 kHz G.711 mu-law meeting audio to stdout.",
@@ -115,7 +132,16 @@ const googleMeetConfigSchema = {
const GoogleMeetToolSchema = Type.Object({
action: Type.String({
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"],
enum: [
"join",
"status",
"setup_status",
"resolve_space",
"preflight",
"leave",
"speak",
"test_speech",
],
description: "Google Meet action to run",
}),
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
@@ -221,6 +247,7 @@ export default definePluginEntry({
dialInNumber: normalizeOptionalString(params?.dialInNumber),
pin: normalizeOptionalString(params?.pin),
dtmfSequence: normalizeOptionalString(params?.dtmfSequence),
message: normalizeOptionalString(params?.message),
});
respond(true, result);
} catch (err) {
@@ -287,6 +314,27 @@ export default definePluginEntry({
},
);
api.registerGatewayMethod(
"googlemeet.testSpeech",
async ({ params, respond }: GatewayRequestHandlerOptions) => {
try {
const rt = await ensureRuntime();
const result = await rt.testSpeech({
url: resolveMeetingInput(config, params?.url),
transport: normalizeTransport(params?.transport),
mode: normalizeMode(params?.mode),
dialInNumber: normalizeOptionalString(params?.dialInNumber),
pin: normalizeOptionalString(params?.pin),
dtmfSequence: normalizeOptionalString(params?.dtmfSequence),
message: normalizeOptionalString(params?.message),
});
respond(true, result);
} catch (err) {
sendError(respond, err);
}
},
);
api.registerTool({
name: "google_meet",
label: "Google Meet",
@@ -306,6 +354,21 @@ export default definePluginEntry({
dialInNumber: normalizeOptionalString(raw.dialInNumber),
pin: normalizeOptionalString(raw.pin),
dtmfSequence: normalizeOptionalString(raw.dtmfSequence),
message: normalizeOptionalString(raw.message),
}),
);
}
case "test_speech": {
const rt = await ensureRuntime();
return json(
await rt.testSpeech({
url: resolveMeetingInput(config, raw.url),
transport: normalizeTransport(raw.transport),
mode: normalizeMode(raw.mode),
dialInNumber: normalizeOptionalString(raw.dialInNumber),
pin: normalizeOptionalString(raw.pin),
dtmfSequence: normalizeOptionalString(raw.dtmfSequence),
message: normalizeOptionalString(raw.message),
}),
);
}

View File

@@ -37,6 +37,23 @@
"label": "Chrome Profile",
"advanced": true
},
"chrome.guestName": {
"label": "Guest Name",
"help": "Used when Chrome lands on the signed-out Meet guest-name screen."
},
"chrome.reuseExistingTab": {
"label": "Reuse Existing Meet Tab",
"help": "Avoids opening duplicate tabs for the same Meet URL."
},
"chrome.autoJoin": {
"label": "Auto Join Guest Screen",
"help": "Best-effort guest-name fill and Join Now click when Chrome allows JavaScript from Apple Events."
},
"chrome.waitForInCallMs": {
"label": "Wait For In-Call (ms)",
"help": "Waits for Chrome to report that the Meet tab is in-call before the realtime intro speaks.",
"advanced": true
},
"chrome.audioInputCommand": {
"label": "Audio Input Command",
"help": "Command that writes 8 kHz G.711 mu-law meeting audio to stdout.",
@@ -190,10 +207,26 @@
"browserProfile": {
"type": "string"
},
"guestName": {
"type": "string",
"default": "OpenClaw Agent"
},
"reuseExistingTab": {
"type": "boolean",
"default": true
},
"autoJoin": {
"type": "boolean",
"default": true
},
"joinTimeoutMs": {
"type": "number",
"default": 30000
},
"waitForInCallMs": {
"type": "number",
"default": 20000
},
"audioInputCommand": {
"type": "array",
"default": [

View File

@@ -16,6 +16,7 @@ import type { GoogleMeetRuntime } from "./runtime.js";
type JoinOptions = {
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
message?: string;
dialInNumber?: string;
pin?: string;
dtmfSequence?: string;
@@ -177,6 +178,7 @@ export function registerGoogleMeetCli(params: {
.argument("[url]", "Explicit https://meet.google.com/... URL")
.option("--transport <transport>", "Transport: chrome, chrome-node, or twilio")
.option("--mode <mode>", "Mode: realtime or transcribe")
.option("--message <text>", "Realtime speech to trigger after join")
.option("--dial-in-number <phone>", "Meet dial-in number for Twilio transport")
.option("--pin <pin>", "Meet phone PIN; # is appended if omitted")
.option("--dtmf-sequence <sequence>", "Explicit Twilio DTMF sequence")
@@ -186,6 +188,7 @@ export function registerGoogleMeetCli(params: {
url: resolveMeetingInput(params.config, url),
transport: options.transport,
mode: options.mode,
message: options.message,
dialInNumber: options.dialInNumber,
pin: options.pin,
dtmfSequence: options.dtmfSequence,
@@ -193,6 +196,28 @@ export function registerGoogleMeetCli(params: {
writeStdoutJson(result.session);
});
root
.command("test-speech")
.argument("[url]", "Explicit https://meet.google.com/... URL")
.option("--transport <transport>", "Transport: chrome, chrome-node, or twilio")
.option("--mode <mode>", "Mode: realtime or transcribe")
.option(
"--message <text>",
"Realtime speech to trigger",
"Say exactly: Google Meet speech test complete.",
)
.action(async (url: string | undefined, options: JoinOptions) => {
const rt = await params.ensureRuntime();
writeStdoutJson(
await rt.testSpeech({
url: resolveMeetingInput(params.config, url),
transport: options.transport,
mode: options.mode,
message: options.message,
}),
);
});
root
.command("resolve-space")
.description("Resolve a Meet URL, meeting code, or spaces/{id} to its canonical space")

View File

@@ -22,7 +22,11 @@ export type GoogleMeetConfig = {
audioBackend: "blackhole-2ch";
launch: boolean;
browserProfile?: string;
guestName: string;
reuseExistingTab: boolean;
autoJoin: boolean;
joinTimeoutMs: number;
waitForInCallMs: number;
audioInputCommand?: string[];
audioOutputCommand?: string[];
audioBridgeCommand?: string[];
@@ -113,7 +117,11 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
chrome: {
audioBackend: "blackhole-2ch",
launch: true,
guestName: "OpenClaw Agent",
reuseExistingTab: true,
autoJoin: true,
joinTimeoutMs: 30_000,
waitForInCallMs: 20_000,
audioInputCommand: [...DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND],
audioOutputCommand: [...DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND],
},
@@ -300,10 +308,21 @@ export function resolveGoogleMeetConfigWithEnv(
audioBackend: "blackhole-2ch",
launch: resolveBoolean(chrome.launch, DEFAULT_GOOGLE_MEET_CONFIG.chrome.launch),
browserProfile: normalizeOptionalString(chrome.browserProfile),
guestName:
normalizeOptionalString(chrome.guestName) ?? DEFAULT_GOOGLE_MEET_CONFIG.chrome.guestName,
reuseExistingTab: resolveBoolean(
chrome.reuseExistingTab,
DEFAULT_GOOGLE_MEET_CONFIG.chrome.reuseExistingTab,
),
autoJoin: resolveBoolean(chrome.autoJoin, DEFAULT_GOOGLE_MEET_CONFIG.chrome.autoJoin),
joinTimeoutMs: resolveNumber(
chrome.joinTimeoutMs,
DEFAULT_GOOGLE_MEET_CONFIG.chrome.joinTimeoutMs,
),
waitForInCallMs: resolveNumber(
chrome.waitForInCallMs,
DEFAULT_GOOGLE_MEET_CONFIG.chrome.waitForInCallMs,
),
audioInputCommand: resolveStringArray(chrome.audioInputCommand) ?? [
...DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND,
],

View File

@@ -18,6 +18,20 @@ type NodeBridgeSession = {
chunks: Buffer[];
waiters: Array<() => void>;
closed: boolean;
createdAt: string;
lastInputAt?: string;
lastOutputAt?: string;
lastInputBytes: number;
lastOutputBytes: number;
};
type BrowserStatus = {
inCall?: boolean;
micMuted?: boolean;
browserUrl?: string;
browserTitle?: string;
status?: string;
notes?: string[];
};
const sessions = new Map<string, NodeBridgeSession>();
@@ -46,6 +60,10 @@ function readNumber(value: unknown, fallback: number): number {
return typeof value === "number" && Number.isFinite(value) && value > 0 ? value : fallback;
}
function readBoolean(value: unknown, fallback: boolean): boolean {
return typeof value === "boolean" ? value : fallback;
}
function runCommandWithTimeout(argv: string[], timeoutMs: number) {
const [command, ...args] = argv;
if (!command) {
@@ -62,6 +80,163 @@ function runCommandWithTimeout(argv: string[], timeoutMs: number) {
};
}
function runAppleScript(script: string, timeoutMs: number) {
return runCommandWithTimeout(["/usr/bin/osascript", "-e", script], timeoutMs);
}
function normalizeAppleScriptString(value: string): string {
return JSON.stringify(value);
}
function activeMeetTabStatus(timeoutMs: number): BrowserStatus {
const script = `
tell application "Google Chrome"
repeat with w in windows
repeat with t in tabs of w
set tabUrl to URL of t
if tabUrl starts with "https://meet.google.com/" then
set active tab index of w to index of t
set index of w to 1
set tabTitle to title of t
return tabUrl & linefeed & tabTitle
end if
end repeat
end repeat
end tell`;
const result = runAppleScript(script, timeoutMs);
if (result.code !== 0) {
return {
inCall: false,
status: "browser-unavailable",
notes: [result.stderr || result.stdout || "Google Chrome tab status unavailable"],
};
}
const [browserUrl = "", browserTitle = ""] = result.stdout.split(/\r?\n/u);
return {
inCall: Boolean(browserUrl.trim()) && !/Meet$/u.test(browserTitle.trim()),
browserUrl: browserUrl.trim() || undefined,
browserTitle: browserTitle.trim() || undefined,
status: "ok",
};
}
function activateExistingMeetTab(url: string, timeoutMs: number): boolean {
const script = `
set targetUrl to ${normalizeAppleScriptString(url)}
tell application "Google Chrome"
repeat with w in windows
repeat with t in tabs of w
if URL of t is targetUrl then
set active tab index of w to index of t
set index of w to 1
activate
return "found"
end if
end repeat
end repeat
end tell
return "missing"`;
const result = runAppleScript(script, timeoutMs);
return result.code === 0 && result.stdout.trim() === "found";
}
function executeMeetTabScript(url: string, javascript: string, timeoutMs: number) {
const script = `
set targetUrl to ${normalizeAppleScriptString(url)}
set source to ${normalizeAppleScriptString(javascript)}
tell application "Google Chrome"
repeat with w in windows
repeat with t in tabs of w
if URL of t starts with targetUrl then
set active tab index of w to index of t
set index of w to 1
return execute t javascript source
end if
end repeat
end repeat
end tell
return ""`;
return runAppleScript(script, timeoutMs);
}
function tryAutoJoinMeet(params: {
url: string;
guestName: string;
timeoutMs: number;
}): BrowserStatus {
const js = `
(() => {
const text = (node) => (node?.innerText || node?.textContent || "").trim();
const input = [...document.querySelectorAll('input')].find((el) =>
/your name/i.test(el.getAttribute('aria-label') || el.placeholder || '')
);
if (input && !input.value) {
input.focus();
input.value = ${JSON.stringify(params.guestName)};
input.dispatchEvent(new Event('input', { bubbles: true }));
input.dispatchEvent(new Event('change', { bubbles: true }));
}
const buttons = [...document.querySelectorAll('button')];
const join = buttons.find((button) => /join now|ask to join/i.test(text(button)) && !button.disabled);
if (join) join.click();
const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button)));
return JSON.stringify({
clickedJoin: Boolean(join),
inCall: buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button))),
micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined,
title: document.title,
url: location.href
});
})();`;
const result = executeMeetTabScript(params.url, js, Math.min(params.timeoutMs, 5_000));
if (result.code !== 0) {
return {
...activeMeetTabStatus(Math.min(params.timeoutMs, 2_000)),
notes: [
"Chrome JavaScript automation is unavailable; enable Chrome > View > Developer > Allow JavaScript from Apple Events for guest auto-join.",
result.stderr || result.stdout || "unknown Apple Events failure",
],
};
}
try {
const parsed = JSON.parse(result.stdout.trim()) as {
inCall?: boolean;
micMuted?: boolean;
url?: string;
title?: string;
};
return {
inCall: parsed.inCall,
micMuted: parsed.micMuted,
browserUrl: parsed.url,
browserTitle: parsed.title,
status: "ok",
};
} catch {
return activeMeetTabStatus(Math.min(params.timeoutMs, 2_000));
}
}
async function waitForInCall(params: {
url: string;
guestName: string;
autoJoin: boolean;
timeoutMs: number;
}): Promise<BrowserStatus> {
const deadline = Date.now() + Math.max(0, params.timeoutMs);
let status: BrowserStatus = activeMeetTabStatus(2_000);
while (Date.now() <= deadline) {
status = params.autoJoin
? tryAutoJoinMeet({ url: params.url, guestName: params.guestName, timeoutMs: 5_000 })
: activeMeetTabStatus(2_000);
if (status.inCall === true) {
return status;
}
await sleep(750);
}
return status;
}
function assertBlackHoleAvailable(timeoutMs: number) {
if (process.platform !== "darwin") {
throw new Error("Chrome Meet transport with blackhole-2ch audio is currently macOS-only");
@@ -112,6 +287,9 @@ function startCommandPair(params: {
chunks: [],
waiters: [],
closed: false,
createdAt: new Date().toISOString(),
lastInputBytes: 0,
lastOutputBytes: 0,
};
const outputProcess = spawn(output.command, output.args, {
stdio: ["pipe", "ignore", "pipe"],
@@ -122,7 +300,10 @@ function startCommandPair(params: {
session.input = inputProcess;
session.output = outputProcess;
inputProcess.stdout?.on("data", (chunk) => {
session.chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
const audio = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
session.lastInputAt = new Date().toISOString();
session.lastInputBytes += audio.byteLength;
session.chunks.push(audio);
if (session.chunks.length > 200) {
session.chunks.splice(0, session.chunks.length - 200);
}
@@ -172,7 +353,10 @@ function pushAudio(params: Record<string, unknown>) {
if (!session || session.closed) {
throw new Error(`bridge is not open: ${bridgeId}`);
}
session.output?.stdin?.write(Buffer.from(base64, "base64"));
const audio = Buffer.from(base64, "base64");
session.lastOutputAt = new Date().toISOString();
session.lastOutputBytes += audio.byteLength;
session.output?.stdin?.write(audio);
return { bridgeId, ok: true };
}
@@ -224,22 +408,58 @@ function startChrome(params: Record<string, unknown>) {
if (browserProfile) {
argv.push("--args", `--profile-directory=${browserProfile}`);
}
argv.push(url);
const result = runCommandWithTimeout(argv, timeoutMs);
if (result.code !== 0) {
if (bridgeId) {
const session = sessions.get(bridgeId);
if (session) {
stopSession(session);
const reused = readBoolean(params.reuseExistingTab, true)
? activateExistingMeetTab(url, Math.min(timeoutMs, 5_000))
: false;
if (!reused) {
argv.push(url);
const result = runCommandWithTimeout(argv, timeoutMs);
if (result.code !== 0) {
if (bridgeId) {
const session = sessions.get(bridgeId);
if (session) {
stopSession(session);
}
}
throw new Error(
`failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`,
);
}
throw new Error(
`failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`,
);
}
}
return { launched: params.launch !== false, bridgeId, audioBridge };
const waitForInCallMs = readNumber(params.waitForInCallMs, 20_000);
return Promise.resolve(
params.launch !== false && waitForInCallMs > 0
? waitForInCall({
url,
guestName: readString(params.guestName) ?? "OpenClaw Agent",
autoJoin: readBoolean(params.autoJoin, true),
timeoutMs: waitForInCallMs,
})
: activeMeetTabStatus(2_000),
).then((browser) => ({ launched: params.launch !== false, bridgeId, audioBridge, browser }));
}
function bridgeStatus(params: Record<string, unknown>) {
const bridgeId = readString(params.bridgeId);
const session = bridgeId ? sessions.get(bridgeId) : undefined;
return {
browser: activeMeetTabStatus(2_000),
bridge: session
? {
bridgeId,
closed: session.closed,
createdAt: session.createdAt,
lastInputAt: session.lastInputAt,
lastOutputAt: session.lastOutputAt,
lastInputBytes: session.lastInputBytes,
lastOutputBytes: session.lastOutputBytes,
}
: bridgeId
? { bridgeId, closed: true }
: undefined,
};
}
function stopChrome(params: Record<string, unknown>) {
@@ -267,7 +487,10 @@ export async function handleGoogleMeetNodeHostCommand(paramsJSON?: string | null
result = { ok: true };
break;
case "start":
result = startChrome(params);
result = await startChrome(params);
break;
case "status":
result = bridgeStatus(params);
break;
case "pullAudio":
result = await pullAudio(params);

View File

@@ -13,6 +13,7 @@ import {
} from "./agent-consult.js";
import type { GoogleMeetConfig } from "./config.js";
import { resolveGoogleMeetRealtimeProvider } from "./realtime.js";
import type { GoogleMeetChromeHealth } from "./transports/types.js";
export type ChromeNodeRealtimeAudioBridgeHandle = {
type: "node-command-pair";
@@ -20,6 +21,7 @@ export type ChromeNodeRealtimeAudioBridgeHandle = {
nodeId: string;
bridgeId: string;
speak: (instructions?: string) => void;
getHealth: () => GoogleMeetChromeHealth;
stop: () => Promise<void>;
};
@@ -45,6 +47,11 @@ export async function startNodeRealtimeAudioBridge(params: {
}): Promise<ChromeNodeRealtimeAudioBridgeHandle> {
let stopped = false;
let bridge: RealtimeVoiceBridgeSession | null = null;
let realtimeReady = false;
let lastInputAt: string | undefined;
let lastOutputAt: string | undefined;
let lastInputBytes = 0;
let lastOutputBytes = 0;
const resolved = resolveGoogleMeetRealtimeProvider({
config: params.config,
fullConfig: params.fullConfig,
@@ -83,12 +90,14 @@ export async function startNodeRealtimeAudioBridge(params: {
providerConfig: resolved.providerConfig,
instructions: params.config.realtime.instructions,
initialGreetingInstructions: params.config.realtime.introMessage,
triggerGreetingOnReady: Boolean(params.config.realtime.introMessage),
triggerGreetingOnReady: false,
markStrategy: "ack-immediately",
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
audioSink: {
isOpen: () => !stopped,
sendAudio: (muLaw) => {
lastOutputAt = new Date().toISOString();
lastOutputBytes += muLaw.byteLength;
void params.runtime.nodes
.invoke({
nodeId: params.nodeId,
@@ -149,10 +158,14 @@ export async function startNodeRealtimeAudioBridge(params: {
void stop();
},
onClose: (reason) => {
realtimeReady = false;
if (reason === "error") {
void stop();
}
},
onReady: () => {
realtimeReady = true;
},
});
await bridge.connect();
@@ -169,10 +182,13 @@ export async function startNodeRealtimeAudioBridge(params: {
params: { action: "pullAudio", bridgeId: params.bridgeId, timeoutMs: 250 },
timeoutMs: 2_000,
});
const result = asRecord(raw);
const result = asRecord(asRecord(raw).payload ?? raw);
const base64 = readString(result.base64);
if (base64) {
bridge?.sendAudio(Buffer.from(base64, "base64"));
const audio = Buffer.from(base64, "base64");
lastInputAt = new Date().toISOString();
lastInputBytes += audio.byteLength;
bridge?.sendAudio(audio);
}
if (result.closed === true) {
await stop();
@@ -194,6 +210,15 @@ export async function startNodeRealtimeAudioBridge(params: {
speak: (instructions) => {
bridge?.triggerGreeting(instructions);
},
getHealth: () => ({
providerConnected: bridge?.bridge.isConnected() ?? false,
realtimeReady,
lastInputAt,
lastOutputAt,
lastInputBytes,
lastOutputBytes,
bridgeClosed: stopped,
}),
stop,
};
}

View File

@@ -16,6 +16,7 @@ import {
resolveGoogleMeetRealtimeTools,
} from "./agent-consult.js";
import type { GoogleMeetConfig } from "./config.js";
import type { GoogleMeetChromeHealth } from "./transports/types.js";
type BridgeProcess = {
pid?: number;
@@ -42,6 +43,7 @@ export type ChromeRealtimeAudioBridgeHandle = {
inputCommand: string[];
outputCommand: string[];
speak: (instructions?: string) => void;
getHealth: () => GoogleMeetChromeHealth;
stop: () => Promise<void>;
};
@@ -97,6 +99,11 @@ export async function startCommandRealtimeAudioBridge(params: {
});
let stopped = false;
let bridge: RealtimeVoiceBridgeSession | null = null;
let realtimeReady = false;
let lastInputAt: string | undefined;
let lastOutputAt: string | undefined;
let lastInputBytes = 0;
let lastOutputBytes = 0;
const stop = async () => {
if (stopped) {
@@ -150,12 +157,14 @@ export async function startCommandRealtimeAudioBridge(params: {
providerConfig: resolved.providerConfig,
instructions: params.config.realtime.instructions,
initialGreetingInstructions: params.config.realtime.introMessage,
triggerGreetingOnReady: Boolean(params.config.realtime.introMessage),
triggerGreetingOnReady: false,
markStrategy: "ack-immediately",
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
audioSink: {
isOpen: () => !stopped,
sendAudio: (muLaw) => {
lastOutputAt = new Date().toISOString();
lastOutputBytes += muLaw.byteLength;
outputProcess.stdin?.write(muLaw);
},
},
@@ -195,15 +204,21 @@ export async function startCommandRealtimeAudioBridge(params: {
},
onError: fail("realtime voice bridge"),
onClose: (reason) => {
realtimeReady = false;
if (reason === "error") {
void stop();
}
},
onReady: () => {
realtimeReady = true;
},
});
inputProcess.stdout?.on("data", (chunk) => {
const audio = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
if (!stopped && audio.byteLength > 0) {
lastInputAt = new Date().toISOString();
lastInputBytes += audio.byteLength;
bridge?.sendAudio(Buffer.from(audio));
}
});
@@ -216,6 +231,15 @@ export async function startCommandRealtimeAudioBridge(params: {
speak: (instructions) => {
bridge?.triggerGreeting(instructions);
},
getHealth: () => ({
providerConnected: bridge?.bridge.isConnected() ?? false,
realtimeReady,
lastInputAt,
lastOutputAt,
lastInputBytes,
lastOutputBytes,
bridgeClosed: stopped,
}),
stop,
};
}

View File

@@ -8,6 +8,7 @@ import { getGoogleMeetSetupStatus } from "./setup.js";
import { launchChromeMeet, launchChromeMeetOnNode } from "./transports/chrome.js";
import { buildMeetDtmfSequence, normalizeDialInNumber } from "./transports/twilio.js";
import type {
GoogleMeetChromeHealth,
GoogleMeetJoinRequest,
GoogleMeetJoinResult,
GoogleMeetSession,
@@ -50,6 +51,7 @@ export class GoogleMeetRuntime {
readonly #sessions = new Map<string, GoogleMeetSession>();
readonly #sessionStops = new Map<string, () => Promise<void>>();
readonly #sessionSpeakers = new Map<string, (instructions?: string) => void>();
readonly #sessionHealth = new Map<string, () => GoogleMeetChromeHealth>();
constructor(
private readonly params: {
@@ -61,6 +63,7 @@ export class GoogleMeetRuntime {
) {}
list(): GoogleMeetSession[] {
this.#refreshHealth();
return [...this.#sessions.values()].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt));
}
@@ -69,6 +72,7 @@ export class GoogleMeetRuntime {
session?: GoogleMeetSession;
sessions?: GoogleMeetSession[];
} {
this.#refreshHealth(sessionId);
if (!sessionId) {
return { found: true, sessions: this.list() };
}
@@ -84,6 +88,24 @@ export class GoogleMeetRuntime {
const url = normalizeMeetUrl(request.url);
const transport = resolveTransport(request.transport, this.params.config);
const mode = resolveMode(request.mode, this.params.config);
const reusable = this.list().find(
(session) =>
session.state === "active" &&
session.url === url &&
session.transport === transport &&
session.mode === mode,
);
if (reusable) {
reusable.notes = [
...reusable.notes.filter((note) => note !== "Reused existing active Meet session."),
"Reused existing active Meet session.",
];
reusable.updatedAt = nowIso();
if (request.message || this.params.config.realtime.introMessage) {
this.speak(reusable.id, request.message);
}
return { session: reusable };
}
const createdAt = nowIso();
const session: GoogleMeetSession = {
@@ -146,6 +168,7 @@ export class GoogleMeetRuntime {
: undefined,
}
: undefined,
health: "browser" in result ? result.browser : undefined,
};
if (
result.audioBridge?.type === "command-pair" ||
@@ -153,6 +176,7 @@ export class GoogleMeetRuntime {
) {
this.#sessionStops.set(session.id, result.audioBridge.stop);
this.#sessionSpeakers.set(session.id, result.audioBridge.speak);
this.#sessionHealth.set(session.id, result.audioBridge.getHealth);
}
session.notes.push(
result.audioBridge
@@ -206,6 +230,9 @@ export class GoogleMeetRuntime {
}
this.#sessions.set(session.id, session);
if (mode === "realtime" && this.params.config.realtime.introMessage) {
this.speak(session.id, request.message);
}
return { session };
}
@@ -218,6 +245,7 @@ export class GoogleMeetRuntime {
if (stop) {
this.#sessionStops.delete(sessionId);
this.#sessionSpeakers.delete(sessionId);
this.#sessionHealth.delete(sessionId);
await stop();
}
session.state = "ended";
@@ -239,6 +267,42 @@ export class GoogleMeetRuntime {
}
speak(instructions || this.params.config.realtime.introMessage);
session.updatedAt = nowIso();
this.#refreshHealth(sessionId);
return { found: true, spoken: true, session };
}
async testSpeech(request: GoogleMeetJoinRequest): Promise<{
createdSession: boolean;
inCall?: boolean;
spoken: boolean;
session: GoogleMeetSession;
}> {
const before = new Set(this.list().map((session) => session.id));
const result = await this.join(request);
const spoken = this.speak(
result.session.id,
request.message ?? "Say exactly: Google Meet speech test complete.",
).spoken;
return {
createdSession: !before.has(result.session.id),
inCall: result.session.chrome?.health?.inCall,
spoken,
session: result.session,
};
}
#refreshHealth(sessionId?: string) {
const ids = sessionId ? [sessionId] : [...this.#sessionHealth.keys()];
for (const id of ids) {
const session = this.#sessions.get(id);
const getHealth = this.#sessionHealth.get(id);
if (!session?.chrome || !getHealth) {
continue;
}
session.chrome.health = {
...session.chrome.health,
...getHealth(),
};
}
}
}

View File

@@ -79,6 +79,37 @@ export function getGoogleMeetSetupStatus(config: GoogleMeetConfig): {
: "Chrome realtime audio bridge not configured",
});
checks.push({
id: "guest-join-defaults",
ok: Boolean(
config.chrome.guestName && config.chrome.autoJoin && config.chrome.reuseExistingTab,
),
message:
config.chrome.guestName && config.chrome.autoJoin && config.chrome.reuseExistingTab
? "Guest auto-join and tab reuse defaults are enabled"
: "Set chrome.guestName, chrome.autoJoin, and chrome.reuseExistingTab for unattended guest joins",
});
checks.push({
id: "chrome-node-target",
ok: config.defaultTransport !== "chrome-node" || Boolean(config.chromeNode.node),
message:
config.defaultTransport === "chrome-node" && !config.chromeNode.node
? "chrome-node default should pin chromeNode.node when multiple nodes may be connected"
: config.chromeNode.node
? `Chrome node pinned to ${config.chromeNode.node}`
: "Chrome node not pinned; automatic selection works when exactly one capable node is connected",
});
checks.push({
id: "intro-after-in-call",
ok: config.chrome.waitForInCallMs > 0,
message:
config.chrome.waitForInCallMs > 0
? `Realtime intro waits up to ${config.chrome.waitForInCallMs}ms for the Meet tab to be in-call`
: "Set chrome.waitForInCallMs to delay realtime intro until the Meet tab is in-call",
});
return {
ok: checks.every((check) => check.ok),
checks,

View File

@@ -10,6 +10,7 @@ import {
startCommandRealtimeAudioBridge,
type ChromeRealtimeAudioBridgeHandle,
} from "../realtime.js";
import type { GoogleMeetChromeHealth } from "./types.js";
export const GOOGLE_MEET_SYSTEM_PROFILER_COMMAND = "/usr/sbin/system_profiler";
@@ -200,6 +201,7 @@ function parseNodeStartResult(raw: unknown): {
launched?: boolean;
bridgeId?: string;
audioBridge?: { type?: string };
browser?: GoogleMeetChromeHealth;
} {
const value =
raw && typeof raw === "object" && "payload" in raw
@@ -212,6 +214,7 @@ function parseNodeStartResult(raw: unknown): {
launched?: boolean;
bridgeId?: string;
audioBridge?: { type?: string };
browser?: GoogleMeetChromeHealth;
};
}
@@ -229,6 +232,7 @@ export async function launchChromeMeetOnNode(params: {
audioBridge?:
| { type: "external-command" }
| ({ type: "node-command-pair" } & ChromeNodeRealtimeAudioBridgeHandle);
browser?: GoogleMeetChromeHealth;
}> {
const nodeId = await resolveChromeNode({
runtime: params.runtime,
@@ -248,6 +252,10 @@ export async function launchChromeMeetOnNode(params: {
audioOutputCommand: params.config.chrome.audioOutputCommand,
audioBridgeCommand: params.config.chrome.audioBridgeCommand,
audioBridgeHealthCommand: params.config.chrome.audioBridgeHealthCommand,
guestName: params.config.chrome.guestName,
reuseExistingTab: params.config.chrome.reuseExistingTab,
autoJoin: params.config.chrome.autoJoin,
waitForInCallMs: params.config.chrome.waitForInCallMs,
},
timeoutMs: params.config.chrome.joinTimeoutMs + 5_000,
});
@@ -269,6 +277,7 @@ export async function launchChromeMeetOnNode(params: {
nodeId,
launched: result.launched === true,
audioBridge: bridge,
browser: result.browser,
};
}
if (result.audioBridge?.type === "external-command") {
@@ -276,7 +285,8 @@ export async function launchChromeMeetOnNode(params: {
nodeId,
launched: result.launched === true,
audioBridge: { type: "external-command" },
browser: result.browser,
};
}
return { nodeId, launched: result.launched === true };
return { nodeId, launched: result.launched === true, browser: result.browser };
}

View File

@@ -6,11 +6,28 @@ export type GoogleMeetJoinRequest = {
url: string;
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
message?: string;
dialInNumber?: string;
pin?: string;
dtmfSequence?: string;
};
export type GoogleMeetChromeHealth = {
inCall?: boolean;
micMuted?: boolean;
providerConnected?: boolean;
realtimeReady?: boolean;
lastInputAt?: string;
lastOutputAt?: string;
lastInputBytes?: number;
lastOutputBytes?: number;
browserUrl?: string;
browserTitle?: string;
bridgeClosed?: boolean;
status?: string;
notes?: string[];
};
export type GoogleMeetSession = {
id: string;
url: string;
@@ -35,6 +52,7 @@ export type GoogleMeetSession = {
type: "command-pair" | "node-command-pair" | "external-command";
provider?: string;
};
health?: GoogleMeetChromeHealth;
};
twilio?: {
dialInNumber: string;