diff --git a/CHANGELOG.md b/CHANGELOG.md index 19ef6fbd2ab..cbf581f38b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,12 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: add image generation and reference-image editing through Codex OAuth, so `openai/gpt-image-2` works without an `OPENAI_API_KEY`. Fixes #70703. - Providers/OpenRouter: add image generation and reference-image editing through `image_generate`, so OpenRouter image models work with `OPENROUTER_API_KEY`. Fixes #55066 via #67668. Thanks @notamicrodose. - Image generation: let agents request provider-supported quality and output format hints, and pass OpenAI-specific background, moderation, compression, and user hints through the `image_generate` tool. (#70503) Thanks @ottodeng. +- Plugins/Google Meet: let realtime Meet sessions consult the full OpenClaw agent for deeper answers while staying in the live voice loop. ### Fixes - Gateway/WebChat: preserve image attachments for text-only primary models by offloading them as media refs instead of dropping them, so configured image tools can still inspect the original file. Fixes #68513, #44276, #51656, #70212. +- Plugins/Google Meet: hang up delegated Twilio calls on leave, clean up Chrome realtime audio bridges when launch fails, and use a flat provider-safe tool schema. - Media understanding: honor explicit image-model configuration before native-vision skips, including `agents.defaults.imageModel`, `tools.media.image.models`, and provider image defaults such as MiniMax VL when the active chat model is text-only. Fixes #47614, #63722, #69171. - Codex/media understanding: support `codex/*` image models through bounded Codex app-server image turns, while keeping `openai-codex/*` on the OpenAI Codex OAuth route and validating app-server responses against generated protocol contracts. Fixes #70201. - Providers/OpenAI Codex: synthesize the `openai-codex/gpt-5.5` OAuth model row when Codex catalog discovery omits it, so cron and subagent runs do not fail with `Unknown model` while the account is authenticated. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 37a5aed32dc..a1966b25d41 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -14,6 +14,8 @@ The plugin is explicit by design: - It only joins an explicit `https://meet.google.com/...` URL. - `realtime` voice is the default mode. +- Realtime voice can call back into the full OpenClaw agent when deeper + reasoning or tools are needed. - Auth starts as personal Google OAuth or an already signed-in Chrome profile. - There is no automatic consent announcement. - The default Chrome audio backend is `BlackHole 2ch`. @@ -212,6 +214,12 @@ call still needs a participant path. This plugin keeps that boundary visible: Chrome handles browser participation and local audio routing; Twilio handles phone dial-in participation. +Realtime mode gives the voice model one tool, `openclaw_agent_consult`, unless +`realtime.toolPolicy` is `none`. The tool asks the normal OpenClaw agent for a +concise spoken answer, using recent meeting transcript as context. With +`safe-read-only`, the consult run is limited to read/search/memory tools. With +`owner`, it inherits the normal agent tool policy. + Chrome realtime mode needs either: - `chrome.audioInputCommand` plus `chrome.audioOutputCommand`: OpenClaw owns the diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index a1b2587ca36..66a4545cd62 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -22,6 +22,7 @@ import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/t const voiceCallMocks = vi.hoisted(() => ({ joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })), + endMeetVoiceCallGatewayCall: vi.fn(async () => {}), })); const fetchGuardMocks = vi.hoisted(() => ({ @@ -45,6 +46,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ vi.mock("./src/voice-call-gateway.js", () => ({ joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway, + endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall, })); const noopLogger = { @@ -168,6 +170,24 @@ describe("google-meet plugin", () => { }); }); + it("uses a provider-safe flat tool parameter schema", () => { + const { tools } = setup(); + const tool = tools[0] as { parameters: unknown }; + + expect(JSON.stringify(tool.parameters)).not.toContain("anyOf"); + expect(tool.parameters).toMatchObject({ + type: "object", + properties: { + action: { + type: "string", + enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"], + }, + transport: { type: "string", enum: ["chrome", "twilio"] }, + mode: { type: "string", enum: ["realtime", "transcribe"] }, + }, + }); + }); + it("normalizes Meet URLs, codes, and space names for the Meet API", () => { expect(normalizeGoogleMeetSpaceName("spaces/abc-defg-hij")).toBe("spaces/abc-defg-hij"); expect(normalizeGoogleMeetSpaceName("abc-defg-hij")).toBe("spaces/abc-defg-hij"); @@ -323,6 +343,26 @@ describe("google-meet plugin", () => { }); }); + it("hangs up delegated Twilio calls on leave", async () => { + const { tools } = setup({ defaultTransport: "twilio" }); + const tool = tools[0] as { + execute: (id: string, params: unknown) => Promise<{ details: { session: { id: string } } }>; + }; + const joined = await tool.execute("id", { + action: "join", + url: "https://meet.google.com/abc-defg-hij", + dialInNumber: "+15551234567", + pin: "123456", + }); + + await tool.execute("id", { action: "leave", sessionId: joined.details.session.id }); + + expect(voiceCallMocks.endMeetVoiceCallGatewayCall).toHaveBeenCalledWith({ + config: expect.objectContaining({ defaultTransport: "twilio" }), + callId: "call-1", + }); + }); + it("reports setup status through the tool", async () => { const { tools } = setup({ chrome: { @@ -415,6 +455,13 @@ describe("google-meet plugin", () => { | { onAudio: (audio: Buffer) => void; onMark?: (markName: string) => void; + onToolCall?: (event: { + itemId: string; + callId: string; + name: string; + args: unknown; + }) => void; + tools?: unknown[]; } | undefined; const sendAudio = vi.fn(); @@ -464,12 +511,33 @@ describe("google-meet plugin", () => { const inputProcess = makeProcess({ stdout: inputStdout, stdin: null }); const outputProcess = makeProcess({ stdin: outputStdin, stdout: null }); const spawnMock = vi.fn().mockReturnValueOnce(outputProcess).mockReturnValueOnce(inputProcess); + const sessionStore: Record = {}; + const runtime = { + agent: { + resolveAgentDir: vi.fn(() => "/tmp/agent"), + resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"), + ensureAgentWorkspace: vi.fn(async () => {}), + session: { + resolveStorePath: vi.fn(() => "/tmp/sessions.json"), + loadSessionStore: vi.fn(() => sessionStore), + saveSessionStore: vi.fn(async () => {}), + resolveSessionFilePath: vi.fn(() => "/tmp/session.json"), + }, + runEmbeddedPiAgent: vi.fn(async () => ({ + payloads: [{ text: "Use the Portugal launch data." }], + meta: {}, + })), + resolveAgentTimeoutMs: vi.fn(() => 1000), + }, + }; const handle = await startCommandRealtimeAudioBridge({ config: resolveGoogleMeetConfig({ realtime: { provider: "openai", model: "gpt-realtime" }, }), fullConfig: {} as never, + runtime: runtime as never, + meetingSessionId: "meet-1", inputCommand: ["capture-meet"], outputCommand: ["play-meet"], logger: noopLogger, @@ -480,6 +548,12 @@ describe("google-meet plugin", () => { inputStdout.write(Buffer.from([1, 2, 3])); callbacks?.onAudio(Buffer.from([4, 5])); callbacks?.onMark?.("mark-1"); + callbacks?.onToolCall?.({ + itemId: "item-1", + callId: "tool-call-1", + name: "openclaw_agent_consult", + args: { question: "What should I say about launch timing?" }, + }); expect(spawnMock).toHaveBeenNthCalledWith(1, "play-meet", [], { stdio: ["pipe", "ignore", "pipe"], @@ -490,6 +564,25 @@ describe("google-meet plugin", () => { expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3])); expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]); expect(bridge.acknowledgeMark).toHaveBeenCalled(); + expect(callbacks).toMatchObject({ + tools: [ + expect.objectContaining({ + name: "openclaw_agent_consult", + }), + ], + }); + await vi.waitFor(() => { + expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", { + text: "Use the Portugal launch data.", + }); + }); + expect(runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + messageProvider: "google-meet", + thinkLevel: "high", + toolsAllow: ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"], + }), + ); await handle.stop(); expect(bridge.close).toHaveBeenCalled(); diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index cdefd416238..ab3940090cd 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -103,46 +103,27 @@ const googleMeetConfigSchema = { }, }; -const GoogleMeetToolSchema = Type.Union([ - Type.Object({ - action: Type.Literal("join"), - url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })), - transport: Type.Optional(Type.Union([Type.Literal("chrome"), Type.Literal("twilio")])), - mode: Type.Optional(Type.Union([Type.Literal("realtime"), Type.Literal("transcribe")])), - dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })), - pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })), - dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })), +const GoogleMeetToolSchema = Type.Object({ + action: Type.String({ + enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"], + description: "Google Meet action to run", }), - Type.Object({ - action: Type.Literal("status"), - sessionId: Type.Optional(Type.String({ description: "Meet session ID" })), - }), - Type.Object({ - action: Type.Literal("setup_status"), - }), - Type.Object({ - action: Type.Literal("resolve_space"), - meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })), - accessToken: Type.Optional(Type.String({ description: "Access token override" })), - refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })), - clientId: Type.Optional(Type.String({ description: "OAuth client id override" })), - clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })), - expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })), - }), - Type.Object({ - action: Type.Literal("preflight"), - meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })), - accessToken: Type.Optional(Type.String({ description: "Access token override" })), - refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })), - clientId: Type.Optional(Type.String({ description: "OAuth client id override" })), - clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })), - expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })), - }), - Type.Object({ - action: Type.Literal("leave"), - sessionId: Type.String({ description: "Meet session ID" }), - }), -]); + url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })), + transport: Type.Optional( + Type.String({ enum: ["chrome", "twilio"], description: "Join transport" }), + ), + mode: Type.Optional(Type.String({ enum: ["realtime", "transcribe"], description: "Join mode" })), + dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })), + pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })), + dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })), + sessionId: Type.Optional(Type.String({ description: "Meet session ID" })), + meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })), + accessToken: Type.Optional(Type.String({ description: "Access token override" })), + refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })), + clientId: Type.Optional(Type.String({ description: "OAuth client id override" })), + clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })), + expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })), +}); function asParamRecord(params: unknown): Record { return params && typeof params === "object" && !Array.isArray(params) diff --git a/extensions/google-meet/src/agent-consult.ts b/extensions/google-meet/src/agent-consult.ts new file mode 100644 index 00000000000..1e606768372 --- /dev/null +++ b/extensions/google-meet/src/agent-consult.ts @@ -0,0 +1,163 @@ +import { randomUUID } from "node:crypto"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime"; +import type { RealtimeVoiceTool } from "openclaw/plugin-sdk/realtime-voice"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; +import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js"; + +type AgentPayload = { + text?: string; + isError?: boolean; + isReasoning?: boolean; +}; + +export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult"; + +export const GOOGLE_MEET_AGENT_CONSULT_TOOL: RealtimeVoiceTool = { + type: "function", + name: GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME, + description: + "Ask the full OpenClaw agent for deeper reasoning, current information, or tool-backed help before speaking in the meeting.", + parameters: { + type: "object", + properties: { + question: { + type: "string", + description: "The concrete question or task the meeting participant asked.", + }, + context: { + type: "string", + description: "Optional relevant meeting context or transcript summary.", + }, + responseStyle: { + type: "string", + description: "Optional style hint for the spoken answer.", + }, + }, + required: ["question"], + }, +}; + +export function resolveGoogleMeetRealtimeTools(policy: GoogleMeetToolPolicy): RealtimeVoiceTool[] { + return policy === "none" ? [] : [GOOGLE_MEET_AGENT_CONSULT_TOOL]; +} + +function normalizeToolArgString(args: unknown, key: string): string | undefined { + if (!args || typeof args !== "object" || Array.isArray(args)) { + return undefined; + } + return normalizeOptionalString((args as Record)[key]); +} + +function collectVisibleText(payloads: AgentPayload[]): string | null { + const chunks: string[] = []; + for (const payload of payloads) { + if (payload.isError || payload.isReasoning) { + continue; + } + const text = normalizeOptionalString(payload.text); + if (text) { + chunks.push(text); + } + } + return chunks.length > 0 ? chunks.join("\n\n").trim() : null; +} + +function resolveToolsAllow(policy: GoogleMeetToolPolicy): string[] | undefined { + if (policy === "owner") { + return undefined; + } + if (policy === "safe-read-only") { + return ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"]; + } + return []; +} + +function buildPrompt(params: { + args: unknown; + transcript: Array<{ role: "user" | "assistant"; text: string }>; +}): string { + const question = normalizeToolArgString(params.args, "question"); + if (!question) { + throw new Error("question required"); + } + const context = normalizeToolArgString(params.args, "context"); + const responseStyle = normalizeToolArgString(params.args, "responseStyle"); + const transcript = params.transcript + .slice(-12) + .map((entry) => `${entry.role === "assistant" ? "Agent" : "Participant"}: ${entry.text}`) + .join("\n"); + return [ + "You are helping an OpenClaw realtime voice agent during a private Google Meet.", + "Answer the participant's question with the strongest useful reasoning and available tools.", + "Return only the concise answer the realtime voice agent should speak next.", + "Do not include markdown, citations unless needed, tool logs, or private reasoning.", + responseStyle ? `Spoken style: ${responseStyle}` : undefined, + transcript ? `Recent meeting transcript:\n${transcript}` : undefined, + context ? `Additional context:\n${context}` : undefined, + `Question:\n${question}`, + ] + .filter(Boolean) + .join("\n\n"); +} + +export async function consultOpenClawAgentForGoogleMeet(params: { + config: GoogleMeetConfig; + fullConfig: OpenClawConfig; + runtime: PluginRuntime; + logger: RuntimeLogger; + meetingSessionId: string; + args: unknown; + transcript: Array<{ role: "user" | "assistant"; text: string }>; +}): Promise<{ text: string }> { + const agentId = "main"; + const sessionKey = `google-meet:${params.meetingSessionId}`; + const cfg = params.fullConfig; + const agentDir = params.runtime.agent.resolveAgentDir(cfg, agentId); + const workspaceDir = params.runtime.agent.resolveAgentWorkspaceDir(cfg, agentId); + await params.runtime.agent.ensureAgentWorkspace({ dir: workspaceDir }); + + const storePath = params.runtime.agent.session.resolveStorePath(cfg.session?.store, { agentId }); + const sessionStore = params.runtime.agent.session.loadSessionStore(storePath); + const now = Date.now(); + const existing = sessionStore[sessionKey] as + | { sessionId?: string; updatedAt?: number } + | undefined; + const sessionId = normalizeOptionalString(existing?.sessionId) ?? randomUUID(); + sessionStore[sessionKey] = { ...existing, sessionId, updatedAt: now }; + await params.runtime.agent.session.saveSessionStore(storePath, sessionStore); + + const sessionFile = params.runtime.agent.session.resolveSessionFilePath( + sessionId, + sessionStore[sessionKey], + { agentId }, + ); + const result = await params.runtime.agent.runEmbeddedPiAgent({ + sessionId, + sessionKey, + messageProvider: "google-meet", + sessionFile, + workspaceDir, + config: cfg, + prompt: buildPrompt({ args: params.args, transcript: params.transcript }), + thinkLevel: "high", + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + toolsAllow: resolveToolsAllow(params.config.realtime.toolPolicy), + timeoutMs: params.runtime.agent.resolveAgentTimeoutMs({ cfg }), + runId: `google-meet:${params.meetingSessionId}:${Date.now()}`, + lane: "google-meet", + extraSystemPrompt: + "You are a behind-the-scenes consultant for a live meeting voice agent. Be accurate, brief, and speakable.", + agentDir, + }); + + const text = collectVisibleText((result.payloads ?? []) as AgentPayload[]); + if (!text) { + const reason = result.meta?.aborted ? "agent run aborted" : "agent returned no speakable text"; + params.logger.warn(`[google-meet] agent consult produced no answer: ${reason}`); + return { text: "I need a moment to verify that before answering." }; + } + return { text }; +} diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index 81415a98303..944a25d005f 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -2,7 +2,7 @@ import { spawn } from "node:child_process"; import type { Writable } from "node:stream"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; -import type { RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime"; +import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime"; import { createRealtimeVoiceBridgeSession, resolveConfiguredRealtimeVoiceProvider, @@ -10,6 +10,11 @@ import { type RealtimeVoiceProviderConfig, type RealtimeVoiceProviderPlugin, } from "openclaw/plugin-sdk/realtime-voice"; +import { + consultOpenClawAgentForGoogleMeet, + GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME, + resolveGoogleMeetRealtimeTools, +} from "./agent-consult.js"; import type { GoogleMeetConfig } from "./config.js"; type BridgeProcess = { @@ -70,6 +75,8 @@ export function resolveGoogleMeetRealtimeProvider(params: { export async function startCommandRealtimeAudioBridge(params: { config: GoogleMeetConfig; fullConfig: OpenClawConfig; + runtime: PluginRuntime; + meetingSessionId: string; inputCommand: string[]; outputCommand: string[]; logger: RuntimeLogger; @@ -136,11 +143,13 @@ export async function startCommandRealtimeAudioBridge(params: { fullConfig: params.fullConfig, providers: params.providers, }); + const transcript: Array<{ role: "user" | "assistant"; text: string }> = []; bridge = createRealtimeVoiceBridgeSession({ provider: resolved.provider, providerConfig: resolved.providerConfig, instructions: params.config.realtime.instructions, markStrategy: "ack-immediately", + tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy), audioSink: { isOpen: () => !stopped, sendAudio: (muLaw) => { @@ -149,9 +158,38 @@ export async function startCommandRealtimeAudioBridge(params: { }, onTranscript: (role, text, isFinal) => { if (isFinal) { + transcript.push({ role, text }); + if (transcript.length > 40) { + transcript.splice(0, transcript.length - 40); + } params.logger.debug?.(`[google-meet] ${role}: ${text}`); } }, + onToolCall: (event, session) => { + if (event.name !== GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME) { + session.submitToolResult(event.callId || event.itemId, { + error: `Tool "${event.name}" not available`, + }); + return; + } + void consultOpenClawAgentForGoogleMeet({ + config: params.config, + fullConfig: params.fullConfig, + runtime: params.runtime, + logger: params.logger, + meetingSessionId: params.meetingSessionId, + args: event.args, + transcript, + }) + .then((result) => { + session.submitToolResult(event.callId || event.itemId, result); + }) + .catch((error: Error) => { + session.submitToolResult(event.callId || event.itemId, { + error: formatErrorMessage(error), + }); + }); + }, onError: fail("realtime voice bridge"), onClose: (reason) => { if (reason === "error") { diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 9e64d00dc00..64ef8041b96 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -12,7 +12,7 @@ import type { GoogleMeetJoinResult, GoogleMeetSession, } from "./transports/types.js"; -import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js"; +import { endMeetVoiceCallGatewayCall, joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js"; function nowIso(): string { return new Date().toISOString(); @@ -110,6 +110,7 @@ export class GoogleMeetRuntime { runtime: this.params.runtime, config: this.params.config, fullConfig: this.params.fullConfig, + meetingSessionId: session.id, mode, url, logger: this.params.logger, @@ -161,6 +162,14 @@ export class GoogleMeetRuntime { voiceCallId: voiceCallResult?.callId, dtmfSent: voiceCallResult?.dtmfSent, }; + if (voiceCallResult?.callId) { + this.#sessionStops.set(session.id, async () => { + await endMeetVoiceCallGatewayCall({ + config: this.params.config, + callId: voiceCallResult.callId, + }); + }); + } session.notes.push( this.params.config.voiceCall.enabled ? "Twilio transport delegated the call to the voice-call plugin and sent configured DTMF." diff --git a/extensions/google-meet/src/transports/chrome.ts b/extensions/google-meet/src/transports/chrome.ts index 15d34d301ab..d2cdb36d303 100644 --- a/extensions/google-meet/src/transports/chrome.ts +++ b/extensions/google-meet/src/transports/chrome.ts @@ -46,6 +46,7 @@ export async function launchChromeMeet(params: { runtime: PluginRuntime; config: GoogleMeetConfig; fullConfig: OpenClawConfig; + meetingSessionId: string; mode: "realtime" | "transcribe"; url: string; logger: RuntimeLogger; @@ -99,6 +100,8 @@ export async function launchChromeMeet(params: { ...(await startCommandRealtimeAudioBridge({ config: params.config, fullConfig: params.fullConfig, + runtime: params.runtime, + meetingSessionId: params.meetingSessionId, inputCommand: params.config.chrome.audioInputCommand, outputCommand: params.config.chrome.audioOutputCommand, logger: params.logger, @@ -116,13 +119,30 @@ export async function launchChromeMeet(params: { } argv.push(params.url); - const result = await params.runtime.system.runCommandWithTimeout(argv, { - timeoutMs: params.config.chrome.joinTimeoutMs, - }); - if (result.code !== 0) { + let commandPairBridgeStopped = false; + const stopCommandPairBridge = async () => { + if (commandPairBridgeStopped) { + return; + } + commandPairBridgeStopped = true; + if (audioBridge?.type === "command-pair") { + await audioBridge.stop(); + } + }; + + try { + const result = await params.runtime.system.runCommandWithTimeout(argv, { + timeoutMs: params.config.chrome.joinTimeoutMs, + }); + if (result.code === 0) { + return { launched: true, audioBridge }; + } + await stopCommandPairBridge(); throw new Error( `failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`, ); + } catch (error) { + await stopCommandPairBridge(); + throw error; } - return { launched: true, audioBridge }; } diff --git a/extensions/google-meet/src/voice-call-gateway.ts b/extensions/google-meet/src/voice-call-gateway.ts index b712ccd87d1..f65a3e376ed 100644 --- a/extensions/google-meet/src/voice-call-gateway.ts +++ b/extensions/google-meet/src/voice-call-gateway.ts @@ -82,3 +82,23 @@ export async function joinMeetViaVoiceCallGateway(params: { await client?.stopAndWait({ timeoutMs: 1_000 }); } } + +export async function endMeetVoiceCallGatewayCall(params: { + config: GoogleMeetConfig; + callId: string; +}): Promise { + let client: VoiceCallGatewayClient | undefined; + + try { + client = await createConnectedGatewayClient(params.config); + await client.request( + "voicecall.end", + { + callId: params.callId, + }, + { timeoutMs: params.config.voiceCall.requestTimeoutMs }, + ); + } finally { + await client?.stopAndWait({ timeoutMs: 1_000 }); + } +}