diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 2211084b780..96a9a9b8e4d 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -355,6 +355,8 @@ Defaults: - `realtime.toolPolicy: "safe-read-only"` - `realtime.instructions`: brief spoken replies, with `openclaw_agent_consult` for deeper answers +- `realtime.introMessage`: short spoken readiness check when the realtime bridge + connects; set it to `""` to join silently Optional overrides: @@ -371,6 +373,7 @@ Optional overrides: }, realtime: { toolPolicy: "owner", + introMessage: "Say exactly: I'm here.", }, } ``` @@ -409,7 +412,16 @@ VM. In both cases the realtime model and `openclaw_agent_consult` run on the Gateway host, so model credentials stay there. Use `action: "status"` to list active sessions or inspect a session ID. Use -`action: "leave"` to mark a session ended. +`action: "speak"` with `sessionId` and `message` to make the realtime agent +speak immediately. Use `action: "leave"` to mark a session ended. + +```json +{ + "action": "speak", + "sessionId": "meet_...", + "message": "Say exactly: I'm here and listening." +} +``` ## Realtime agent consult @@ -434,6 +446,12 @@ voice session. The voice model can then speak that answer back into the meeting. The consult session key is scoped per Meet session, so follow-up consult calls can reuse prior consult context during the same meeting. +To force a spoken readiness check after Chrome has fully joined the call: + +```bash +openclaw googlemeet speak meet_... "Say exactly: I'm here and listening." +``` + ## Notes Google Meet's official media API is receive-oriented, so speaking into a Meet @@ -453,9 +471,9 @@ For clean duplex audio, route Meet output and Meet microphone through separate virtual devices or a Loopback-style virtual device graph. A single shared BlackHole device can echo other participants back into the call. -`googlemeet leave` stops the command-pair realtime audio bridge for Chrome -sessions. For Twilio sessions delegated through the Voice Call plugin, it also -hangs up the underlying voice call. +`googlemeet speak` triggers the active realtime audio bridge for a Chrome +session. `googlemeet leave` stops that bridge. For Twilio sessions delegated +through the Voice Call plugin, `leave` also hangs up the underlying voice call. ## Related diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index 3d976b1e800..d7fa71be44e 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -205,6 +205,7 @@ describe("google-meet plugin", () => { voiceCall: { enabled: true, requestTimeoutMs: 30000, dtmfDelayMs: 2500 }, realtime: { provider: "openai", + introMessage: "Say exactly: I'm here and listening.", toolPolicy: "safe-read-only", }, oauth: {}, @@ -284,7 +285,7 @@ describe("google-meet plugin", () => { properties: { action: { type: "string", - enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"], + enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"], }, transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] }, mode: { type: "string", enum: ["realtime", "transcribe"] }, @@ -520,11 +521,16 @@ describe("google-meet plugin", () => { }); it("joins Chrome on a paired node without local Chrome or BlackHole", async () => { - const { methods, nodesList, nodesInvoke } = setup({ - defaultTransport: "chrome-node", - defaultMode: "transcribe", - chromeNode: { node: "parallels-macos" }, - }); + const { methods, nodesList, nodesInvoke } = setup( + { + defaultTransport: "chrome-node", + defaultMode: "transcribe", + chromeNode: { node: "parallels-macos" }, + }, + { + nodesInvokeResult: { payload: { launched: true } }, + }, + ); const handler = methods.get("googlemeet.join") as | ((ctx: { params: Record; @@ -669,6 +675,7 @@ describe("google-meet plugin", () => { name: string; args: unknown; }) => void; + onReady?: () => void; tools?: unknown[]; } | undefined; @@ -680,6 +687,7 @@ describe("google-meet plugin", () => { submitToolResult: vi.fn(), acknowledgeMark: vi.fn(), close: vi.fn(), + triggerGreeting: vi.fn(), isConnected: vi.fn(() => true), }; const provider: RealtimeVoiceProviderPlugin = { @@ -756,6 +764,7 @@ describe("google-meet plugin", () => { inputStdout.write(Buffer.from([1, 2, 3])); callbacks?.onAudio(Buffer.from([4, 5])); callbacks?.onMark?.("mark-1"); + callbacks?.onReady?.(); callbacks?.onToolCall?.({ itemId: "item-1", callId: "tool-call-1", @@ -772,6 +781,9 @@ describe("google-meet plugin", () => { expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3])); expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]); expect(bridge.acknowledgeMark).toHaveBeenCalled(); + expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening."); + handle.speak("Say exactly: hello from the meeting."); + expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the meeting."); expect(callbacks).toMatchObject({ tools: [ expect.objectContaining({ @@ -808,6 +820,7 @@ describe("google-meet plugin", () => { name: string; args: unknown; }) => void; + onReady?: () => void; tools?: unknown[]; } | undefined; @@ -819,6 +832,7 @@ describe("google-meet plugin", () => { submitToolResult: vi.fn(), acknowledgeMark: vi.fn(), close: vi.fn(), + triggerGreeting: vi.fn(), isConnected: vi.fn(() => true), }; const provider: RealtimeVoiceProviderPlugin = { @@ -879,6 +893,7 @@ describe("google-meet plugin", () => { }); callbacks?.onAudio(Buffer.from([1, 2, 3])); + callbacks?.onReady?.(); callbacks?.onToolCall?.({ itemId: "item-1", callId: "tool-call-1", @@ -907,6 +922,9 @@ describe("google-meet plugin", () => { text: "Use the launch update.", }); }); + expect(bridge.triggerGreeting).toHaveBeenCalledWith("Say exactly: I'm here and listening."); + handle.speak("Say exactly: hello from the node."); + expect(bridge.triggerGreeting).toHaveBeenLastCalledWith("Say exactly: hello from the node."); expect(callbacks).toMatchObject({ tools: [ expect.objectContaining({ diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index de3d5574951..d4e3ed8558c 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -88,6 +88,10 @@ const googleMeetConfigSchema = { }, "realtime.model": { label: "Realtime Model", advanced: true }, "realtime.instructions": { label: "Realtime Instructions", advanced: true }, + "realtime.introMessage": { + label: "Realtime Intro Message", + help: "Spoken once when the realtime bridge is ready. Set to an empty string to join silently.", + }, "realtime.toolPolicy": { label: "Realtime Tool Policy", help: "Safe read-only tools are available by default; owner requests can unlock broader tools.", @@ -111,7 +115,7 @@ const googleMeetConfigSchema = { const GoogleMeetToolSchema = Type.Object({ action: Type.String({ - enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"], + enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave", "speak"], description: "Google Meet action to run", }), url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })), @@ -123,6 +127,7 @@ const GoogleMeetToolSchema = Type.Object({ pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })), dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })), sessionId: Type.Optional(Type.String({ description: "Meet session ID" })), + message: Type.Optional(Type.String({ description: "Realtime instructions to speak now" })), meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })), accessToken: Type.Optional(Type.String({ description: "Access token override" })), refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })), @@ -265,6 +270,23 @@ export default definePluginEntry({ }, ); + api.registerGatewayMethod( + "googlemeet.speak", + async ({ params, respond }: GatewayRequestHandlerOptions) => { + try { + const sessionId = normalizeOptionalString(params?.sessionId); + if (!sessionId) { + respond(false, { error: "sessionId required" }); + return; + } + const rt = await ensureRuntime(); + respond(true, rt.speak(sessionId, normalizeOptionalString(params?.message))); + } catch (err) { + sendError(respond, err); + } + }, + ); + api.registerTool({ name: "google_meet", label: "Google Meet", @@ -318,6 +340,14 @@ export default definePluginEntry({ } return json(await rt.leave(sessionId)); } + case "speak": { + const rt = await ensureRuntime(); + const sessionId = normalizeOptionalString(raw.sessionId); + if (!sessionId) { + throw new Error("sessionId required"); + } + return json(rt.speak(sessionId, normalizeOptionalString(raw.message))); + } default: throw new Error("unknown google_meet action"); } diff --git a/extensions/google-meet/openclaw.plugin.json b/extensions/google-meet/openclaw.plugin.json index b1aff76df7b..d7e5c3d7734 100644 --- a/extensions/google-meet/openclaw.plugin.json +++ b/extensions/google-meet/openclaw.plugin.json @@ -108,6 +108,10 @@ "label": "Realtime Instructions", "advanced": true }, + "realtime.introMessage": { + "label": "Realtime Intro Message", + "help": "Spoken once when the realtime bridge is ready. Set to an empty string to join silently." + }, "realtime.toolPolicy": { "label": "Realtime Tool Policy", "help": "Safe read-only tools are available by default; owner requests can unlock broader tools.", @@ -312,6 +316,10 @@ "type": "string", "default": "You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call openclaw_agent_consult before answering." }, + "introMessage": { + "type": "string", + "default": "Say exactly: I'm here and listening." + }, "toolPolicy": { "type": "string", "enum": ["safe-read-only", "owner", "none"], diff --git a/extensions/google-meet/src/cli.ts b/extensions/google-meet/src/cli.ts index 2df39f2cc7a..7925772c335 100644 --- a/extensions/google-meet/src/cli.ts +++ b/extensions/google-meet/src/cli.ts @@ -304,4 +304,20 @@ export function registerGoogleMeetCli(params: { } writeStdoutLine("left %s", sessionId); }); + + root + .command("speak") + .argument("", "Meet session ID") + .argument("[message]", "Realtime instructions to speak now") + .action(async (sessionId: string, message?: string) => { + const rt = await params.ensureRuntime(); + const result = rt.speak(sessionId, message); + if (!result.found) { + throw new Error("session not found"); + } + if (!result.spoken) { + throw new Error("session has no active realtime audio bridge"); + } + writeStdoutLine("speaking on %s", sessionId); + }); } diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index ec6312c6cb1..53a36a73767 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -48,6 +48,7 @@ export type GoogleMeetConfig = { provider?: string; model?: string; instructions?: string; + introMessage?: string; toolPolicy: GoogleMeetToolPolicy; providers: Record>; }; @@ -99,6 +100,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [ ] as const; export const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS = `You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`; +export const DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE = "Say exactly: I'm here and listening."; export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { enabled: true, @@ -125,6 +127,7 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { realtime: { provider: "openai", instructions: DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS, + introMessage: DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE, toolPolicy: "safe-read-only", providers: {}, }, @@ -339,6 +342,9 @@ export function resolveGoogleMeetConfigWithEnv( instructions: normalizeOptionalString(realtime.instructions) ?? DEFAULT_GOOGLE_MEET_CONFIG.realtime.instructions, + introMessage: + normalizeOptionalString(realtime.introMessage) ?? + DEFAULT_GOOGLE_MEET_CONFIG.realtime.introMessage, toolPolicy: resolveToolPolicy( realtime.toolPolicy, DEFAULT_GOOGLE_MEET_CONFIG.realtime.toolPolicy, diff --git a/extensions/google-meet/src/realtime-node.ts b/extensions/google-meet/src/realtime-node.ts index 2b197fda37c..1a9aa6ffb69 100644 --- a/extensions/google-meet/src/realtime-node.ts +++ b/extensions/google-meet/src/realtime-node.ts @@ -19,6 +19,7 @@ export type ChromeNodeRealtimeAudioBridgeHandle = { providerId: string; nodeId: string; bridgeId: string; + speak: (instructions?: string) => void; stop: () => Promise; }; @@ -81,6 +82,8 @@ export async function startNodeRealtimeAudioBridge(params: { provider: resolved.provider, providerConfig: resolved.providerConfig, instructions: params.config.realtime.instructions, + initialGreetingInstructions: params.config.realtime.introMessage, + triggerGreetingOnReady: Boolean(params.config.realtime.introMessage), markStrategy: "ack-immediately", tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy), audioSink: { @@ -188,6 +191,9 @@ export async function startNodeRealtimeAudioBridge(params: { providerId: resolved.provider.id, nodeId: params.nodeId, bridgeId: params.bridgeId, + speak: (instructions) => { + bridge?.triggerGreeting(instructions); + }, stop, }; } diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index 944a25d005f..6c9fc988dc3 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -41,6 +41,7 @@ export type ChromeRealtimeAudioBridgeHandle = { providerId: string; inputCommand: string[]; outputCommand: string[]; + speak: (instructions?: string) => void; stop: () => Promise; }; @@ -148,6 +149,8 @@ export async function startCommandRealtimeAudioBridge(params: { provider: resolved.provider, providerConfig: resolved.providerConfig, instructions: params.config.realtime.instructions, + initialGreetingInstructions: params.config.realtime.introMessage, + triggerGreetingOnReady: Boolean(params.config.realtime.introMessage), markStrategy: "ack-immediately", tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy), audioSink: { @@ -210,6 +213,9 @@ export async function startCommandRealtimeAudioBridge(params: { providerId: resolved.provider.id, inputCommand: params.inputCommand, outputCommand: params.outputCommand, + speak: (instructions) => { + bridge?.triggerGreeting(instructions); + }, stop, }; } diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index e3bec919844..44183046ad3 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -49,6 +49,7 @@ function resolveMode(input: GoogleMeetMode | undefined, config: GoogleMeetConfig export class GoogleMeetRuntime { readonly #sessions = new Map(); readonly #sessionStops = new Map Promise>(); + readonly #sessionSpeakers = new Map void>(); constructor( private readonly params: { @@ -151,6 +152,7 @@ export class GoogleMeetRuntime { result.audioBridge?.type === "node-command-pair" ) { this.#sessionStops.set(session.id, result.audioBridge.stop); + this.#sessionSpeakers.set(session.id, result.audioBridge.speak); } session.notes.push( result.audioBridge @@ -215,10 +217,28 @@ export class GoogleMeetRuntime { const stop = this.#sessionStops.get(sessionId); if (stop) { this.#sessionStops.delete(sessionId); + this.#sessionSpeakers.delete(sessionId); await stop(); } session.state = "ended"; session.updatedAt = nowIso(); return { found: true, session }; } + + speak( + sessionId: string, + instructions?: string, + ): { found: boolean; spoken: boolean; session?: GoogleMeetSession } { + const session = this.#sessions.get(sessionId); + if (!session) { + return { found: false, spoken: false }; + } + const speak = this.#sessionSpeakers.get(sessionId); + if (!speak || session.state !== "active") { + return { found: true, spoken: false, session }; + } + speak(instructions || this.params.config.realtime.introMessage); + session.updatedAt = nowIso(); + return { found: true, spoken: true, session }; + } } diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index f3fe1fef0eb..588db7706ed 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -201,10 +201,14 @@ function parseNodeStartResult(raw: unknown): { bridgeId?: string; audioBridge?: { type?: string }; } { - if (!raw || typeof raw !== "object") { + const value = + raw && typeof raw === "object" && "payload" in raw + ? (raw as { payload?: unknown }).payload + : raw; + if (!value || typeof value !== "object") { throw new Error("Google Meet node returned an invalid start result."); } - return raw as { + return value as { launched?: boolean; bridgeId?: string; audioBridge?: { type?: string };