diff --git a/CHANGELOG.md b/CHANGELOG.md index a790c92cab3..2adc155439b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Google Meet/Voice Call: defer Twilio dial-in intro speech until after Meet DTMF entry and route delayed speech through the active realtime Voice Call bridge. Thanks @donkeykong91 and @PfanP. - Google Meet/Voice Call: make Twilio setup preflight honor explicit `--transport twilio` and fail local/private Voice Call webhook URLs before joins. Thanks @donkeykong91 and @PfanP. - Voice Call/Twilio: retry transient 21220 live-call TwiML updates and catch answered-path initial-greeting failures, so a fast answered callback no longer crashes the Gateway or drops the Twilio greeting/listen transition. (#74606) Thanks @Sivan22. - Voice Call/Twilio: register accepted media streams immediately but wait for realtime transcription readiness before speaking the initial greeting, so reconnect grace handling stays live while OpenAI STT startup is no longer starved by TTS. Fixes #75197. (#75257) Thanks @donkeykong91 and @PfanP. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 0ffe53dea17..aea755b023d 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -1411,6 +1411,9 @@ participant: the PIN. - Increase the leading pauses in `--dtmf-sequence` if Meet answers slowly, for example `wwww123456#`. +- If the participant joins but you miss the first spoken line, increase + `plugins.entries.google-meet.config.voiceCall.postDtmfSpeechDelayMs` so the + intro is spoken after Meet finishes admitting the phone participant. If webhooks do not arrive, debug the Voice Call plugin first: the provider must reach `plugins.entries.voice-call.config.publicUrl` or the configured tunnel. diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 2adbab620d9..be59df2f39f 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -766,6 +766,11 @@ If Voice Call is green but the Meet participant never joins, check the Meet dial-in number, PIN, and `--dtmf-sequence`. The phone call can be healthy while the meeting rejects or ignores an incorrect DTMF sequence. +Google Meet starts Voice Call silently, sends DTMF, then asks Voice Call to +speak the intro after `voiceCall.postDtmfSpeechDelayMs`. Increase that delay in +the Google Meet plugin config if the first line is spoken before Meet admits the +phone participant. + ### Realtime call has no speech Confirm only one audio mode is enabled. `realtime.enabled` and diff --git a/extensions/google-meet/index.create.test.ts b/extensions/google-meet/index.create.test.ts index 29ec2468af2..92e1c787858 100644 --- a/extensions/google-meet/index.create.test.ts +++ b/extensions/google-meet/index.create.test.ts @@ -12,8 +12,13 @@ import { import { CREATE_MEET_FROM_BROWSER_SCRIPT } from "./src/transports/chrome-create.js"; const voiceCallMocks = vi.hoisted(() => ({ - joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })), + joinMeetViaVoiceCallGateway: vi.fn(async () => ({ + callId: "call-1", + dtmfSent: true, + introSent: true, + })), endMeetVoiceCallGatewayCall: vi.fn(async () => {}), + speakMeetViaVoiceCallGateway: vi.fn(async () => {}), })); const fetchGuardMocks = vi.hoisted(() => ({ @@ -38,6 +43,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ vi.mock("./src/voice-call-gateway.js", () => ({ joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway, endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall, + speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway, })); function setup( diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index 29493f41879..81f7b7732c6 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -35,8 +35,13 @@ import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/t import type { GoogleMeetSession } from "./src/transports/types.js"; const voiceCallMocks = vi.hoisted(() => ({ - joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })), + joinMeetViaVoiceCallGateway: vi.fn(async () => ({ + callId: "call-1", + dtmfSent: true, + introSent: true, + })), endMeetVoiceCallGatewayCall: vi.fn(async () => {}), + speakMeetViaVoiceCallGateway: vi.fn(async () => {}), })); const fetchGuardMocks = vi.hoisted(() => ({ @@ -61,6 +66,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({ vi.mock("./src/voice-call-gateway.js", () => ({ joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway, endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall, + speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway, })); function setup( @@ -348,7 +354,12 @@ describe("google-meet plugin", () => { "BlackHole 2ch", ], }, - voiceCall: { enabled: true, requestTimeoutMs: 30000, dtmfDelayMs: 2500 }, + voiceCall: { + enabled: true, + requestTimeoutMs: 30000, + dtmfDelayMs: 2500, + postDtmfSpeechDelayMs: 5000, + }, realtime: { provider: "openai", introMessage: "Say exactly: I'm here and listening.", @@ -955,12 +966,14 @@ describe("google-meet plugin", () => { dtmfSequence: "123456#", voiceCallId: "call-1", dtmfSent: true, + introSent: true, }, }); expect(voiceCallMocks.joinMeetViaVoiceCallGateway).toHaveBeenCalledWith({ config: expect.objectContaining({ defaultTransport: "twilio" }), dialInNumber: "+15551234567", dtmfSequence: "123456#", + message: "Say exactly: I'm here and listening.", }); }); @@ -984,6 +997,32 @@ describe("google-meet plugin", () => { }); }); + it("delegates Twilio session speech through voice-call", async () => { + const { tools } = setup({ defaultTransport: "twilio" }); + const tool = tools[0] as { + execute: (id: string, params: unknown) => Promise<{ details: { session: { id: string } } }>; + }; + const joined = await tool.execute("id", { + action: "join", + url: "https://meet.google.com/abc-defg-hij", + dialInNumber: "+15551234567", + pin: "123456", + }); + + const spoken = await tool.execute("id", { + action: "speak", + sessionId: joined.details.session.id, + message: "Say exactly: hello after joining.", + }); + + expect(spoken.details).toMatchObject({ spoken: true }); + expect(voiceCallMocks.speakMeetViaVoiceCallGateway).toHaveBeenCalledWith({ + config: expect.objectContaining({ defaultTransport: "twilio" }), + callId: "call-1", + message: "Say exactly: hello after joining.", + }); + }); + it("reports setup status through the tool", async () => { const originalPlatform = process.platform; Object.defineProperty(process, "platform", { value: "darwin" }); diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index b42e1d6c81e..5f58c2f82b9 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -119,6 +119,10 @@ const googleMeetConfigSchema = { advanced: true, }, "voiceCall.dtmfDelayMs": { label: "DTMF Delay (ms)", advanced: true }, + "voiceCall.postDtmfSpeechDelayMs": { + label: "Post-DTMF Speech Delay (ms)", + advanced: true, + }, "voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true }, "realtime.provider": { label: "Realtime Provider", diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index 57b28191b47..f75bdf084d7 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -52,6 +52,7 @@ export type GoogleMeetConfig = { token?: string; requestTimeoutMs: number; dtmfDelayMs: number; + postDtmfSpeechDelayMs: number; introMessage?: string; }; realtime: { @@ -181,6 +182,7 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { enabled: true, requestTimeoutMs: 30_000, dtmfDelayMs: 2_500, + postDtmfSpeechDelayMs: 5_000, }, realtime: { provider: "openai", @@ -432,6 +434,10 @@ export function resolveGoogleMeetConfigWithEnv( voiceCall.dtmfDelayMs, DEFAULT_GOOGLE_MEET_CONFIG.voiceCall.dtmfDelayMs, ), + postDtmfSpeechDelayMs: resolveNumber( + voiceCall.postDtmfSpeechDelayMs, + DEFAULT_GOOGLE_MEET_CONFIG.voiceCall.postDtmfSpeechDelayMs, + ), introMessage: normalizeOptionalString(voiceCall.introMessage), }, realtime: { diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 548554d78cf..ee8c8e7c6d4 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -21,7 +21,11 @@ import type { GoogleMeetJoinResult, GoogleMeetSession, } from "./transports/types.js"; -import { endMeetVoiceCallGatewayCall, joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js"; +import { + endMeetVoiceCallGatewayCall, + joinMeetViaVoiceCallGateway, + speakMeetViaVoiceCallGateway, +} from "./voice-call-gateway.js"; function nowIso(): string { return new Date().toISOString(); @@ -301,6 +305,7 @@ export class GoogleMeetRuntime { return { session: reusable, spoken }; } const createdAt = nowIso(); + let delegatedTwilioSpoken = false; const session: GoogleMeetSession = { id: `meet_${randomUUID()}`, @@ -398,14 +403,22 @@ export class GoogleMeetRuntime { config: this.params.config, dialInNumber, dtmfSequence, + message: + mode === "realtime" + ? (request.message ?? + this.params.config.voiceCall.introMessage ?? + this.params.config.realtime.introMessage) + : undefined, }) : undefined; + delegatedTwilioSpoken = Boolean(voiceCallResult?.introSent); session.twilio = { dialInNumber, pinProvided: Boolean(request.pin ?? this.params.config.twilio.defaultPin), dtmfSequence, voiceCallId: voiceCallResult?.callId, dtmfSent: voiceCallResult?.dtmfSent, + introSent: voiceCallResult?.introSent, }; if (voiceCallResult?.callId) { this.#sessionStops.set(session.id, async () => { @@ -428,9 +441,11 @@ export class GoogleMeetRuntime { this.#sessions.set(session.id, session); const spoken = - mode === "realtime" && speechInstructions - ? (await this.speak(session.id, speechInstructions)).spoken - : false; + transport === "twilio" + ? delegatedTwilioSpoken + : mode === "realtime" && speechInstructions + ? (await this.speak(session.id, speechInstructions)).spoken + : false; return { session, spoken }; } @@ -459,6 +474,20 @@ export class GoogleMeetRuntime { if (!session) { return { found: false, spoken: false }; } + if (session.transport === "twilio" && session.twilio?.voiceCallId) { + await speakMeetViaVoiceCallGateway({ + config: this.params.config, + callId: session.twilio.voiceCallId, + message: + instructions || + this.params.config.voiceCall.introMessage || + this.params.config.realtime.introMessage || + "", + }); + session.twilio.introSent = true; + session.updatedAt = nowIso(); + return { found: true, spoken: true, session }; + } await this.#refreshBrowserHealthForChromeSession(session); const speak = this.#sessionSpeakers.get(sessionId); if (!speak || session.state !== "active") { diff --git a/extensions/google-meet/src/transports/types.ts b/extensions/google-meet/src/transports/types.ts index d2a08aa3706..9c523bf68bf 100644 --- a/extensions/google-meet/src/transports/types.ts +++ b/extensions/google-meet/src/transports/types.ts @@ -86,6 +86,7 @@ export type GoogleMeetSession = { dtmfSequence?: string; voiceCallId?: string; dtmfSent?: boolean; + introSent?: boolean; }; notes: string[]; }; diff --git a/extensions/google-meet/src/voice-call-gateway.test.ts b/extensions/google-meet/src/voice-call-gateway.test.ts index 3c8baa22a35..6aadaf4e500 100644 --- a/extensions/google-meet/src/voice-call-gateway.test.ts +++ b/extensions/google-meet/src/voice-call-gateway.test.ts @@ -27,25 +27,49 @@ describe("Google Meet voice-call gateway", () => { gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear(); }); - it("starts Twilio Meet calls in conversation mode with the realtime intro by default", async () => { + it("starts Twilio Meet calls silently, sends DTMF, then speaks the realtime intro", async () => { const config = resolveGoogleMeetConfig({ - voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" }, + voiceCall: { + gatewayUrl: "ws://127.0.0.1:18789", + dtmfDelayMs: 1, + postDtmfSpeechDelayMs: 1, + }, realtime: { introMessage: "Say exactly: I'm here and listening." }, }); await joinMeetViaVoiceCallGateway({ config, dialInNumber: "+15551234567", + dtmfSequence: "123456#", + message: "Say exactly: I'm here and listening.", }); - expect(gatewayMocks.request).toHaveBeenCalledWith( + expect(gatewayMocks.request).toHaveBeenNthCalledWith( + 1, "voicecall.start", { to: "+15551234567", - message: "Say exactly: I'm here and listening.", mode: "conversation", }, { timeoutMs: 30_000 }, ); + expect(gatewayMocks.request).toHaveBeenNthCalledWith( + 2, + "voicecall.dtmf", + { + callId: "call-1", + digits: "123456#", + }, + { timeoutMs: 30_000 }, + ); + expect(gatewayMocks.request).toHaveBeenNthCalledWith( + 3, + "voicecall.speak", + { + callId: "call-1", + message: "Say exactly: I'm here and listening.", + }, + { timeoutMs: 30_000 }, + ); }); }); diff --git a/extensions/google-meet/src/voice-call-gateway.ts b/extensions/google-meet/src/voice-call-gateway.ts index 694f284f1da..84224d663b6 100644 --- a/extensions/google-meet/src/voice-call-gateway.ts +++ b/extensions/google-meet/src/voice-call-gateway.ts @@ -13,9 +13,15 @@ type VoiceCallStartResult = { error?: string; }; +type VoiceCallSpeakResult = { + success?: boolean; + error?: string; +}; + export type VoiceCallMeetJoinResult = { callId: string; dtmfSent: boolean; + introSent: boolean; }; async function createConnectedGatewayClient( @@ -67,6 +73,7 @@ export async function joinMeetViaVoiceCallGateway(params: { config: GoogleMeetConfig; dialInNumber: string; dtmfSequence?: string; + message?: string; }): Promise { let client: VoiceCallGatewayClient | undefined; @@ -76,7 +83,6 @@ export async function joinMeetViaVoiceCallGateway(params: { "voicecall.start", { to: params.dialInNumber, - message: params.config.voiceCall.introMessage ?? params.config.realtime.introMessage, mode: "conversation", }, { timeoutMs: params.config.voiceCall.requestTimeoutMs }, @@ -95,7 +101,25 @@ export async function joinMeetViaVoiceCallGateway(params: { { timeoutMs: params.config.voiceCall.requestTimeoutMs }, ); } - return { callId: start.callId, dtmfSent: Boolean(params.dtmfSequence) }; + if (params.message) { + await sleep(params.config.voiceCall.postDtmfSpeechDelayMs); + const spoken = (await client.request( + "voicecall.speak", + { + callId: start.callId, + message: params.message, + }, + { timeoutMs: params.config.voiceCall.requestTimeoutMs }, + )) as VoiceCallSpeakResult; + if (spoken.success === false) { + throw new Error(spoken.error || "voicecall.speak failed"); + } + } + return { + callId: start.callId, + dtmfSent: Boolean(params.dtmfSequence), + introSent: Boolean(params.message), + }; } finally { await client?.stopAndWait({ timeoutMs: 1_000 }); } @@ -120,3 +144,28 @@ export async function endMeetVoiceCallGatewayCall(params: { await client?.stopAndWait({ timeoutMs: 1_000 }); } } + +export async function speakMeetViaVoiceCallGateway(params: { + config: GoogleMeetConfig; + callId: string; + message: string; +}): Promise { + let client: VoiceCallGatewayClient | undefined; + + try { + client = await createConnectedGatewayClient(params.config); + const spoken = (await client.request( + "voicecall.speak", + { + callId: params.callId, + message: params.message, + }, + { timeoutMs: params.config.voiceCall.requestTimeoutMs }, + )) as VoiceCallSpeakResult; + if (spoken.success === false) { + throw new Error(spoken.error || "voicecall.speak failed"); + } + } finally { + await client?.stopAndWait({ timeoutMs: 1_000 }); + } +} diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index 3ce9df1a05d..d60d8576f00 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -369,12 +369,27 @@ export default definePluginEntry({ "voicecall.speak", async ({ params, respond }: GatewayRequestHandlerOptions) => { try { - await respondToCallMessageAction({ - requestParams: params, - respond, - action: (request) => request.rt.manager.speak(request.callId, request.message), - failure: "speak failed", - }); + const request = await resolveCallMessageRequest(params); + if ("error" in request) { + respond(false, { error: request.error }); + return; + } + if (request.rt.config.realtime.enabled) { + const realtimeResult = request.rt.webhookServer.speakRealtime( + request.callId, + request.message, + ); + if (realtimeResult.success) { + respond(true, { success: true }); + return; + } + } + const result = await request.rt.manager.speak(request.callId, request.message); + if (!result.success) { + respond(false, { error: result.error || "speak failed" }); + return; + } + respond(true, { success: true }); } catch (err) { sendError(respond, err); } diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index 34e58905bf6..f7e40987bb8 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -195,6 +195,13 @@ export class VoiceCallWebhookServer { return this.realtimeHandler; } + speakRealtime(callId: string, instructions: string): { success: boolean; error?: string } { + if (!this.realtimeHandler) { + return { success: false, error: "Realtime voice handler is not configured" }; + } + return this.realtimeHandler.speak(callId, instructions); + } + setRealtimeHandler(handler: RealtimeCallHandler): void { this.realtimeHandler = handler; } diff --git a/extensions/voice-call/src/webhook/realtime-handler.test.ts b/extensions/voice-call/src/webhook/realtime-handler.test.ts index fb3c176ea18..539a4bbe2c7 100644 --- a/extensions/voice-call/src/webhook/realtime-handler.test.ts +++ b/extensions/voice-call/src/webhook/realtime-handler.test.ts @@ -214,6 +214,121 @@ describe("RealtimeCallHandler path routing", () => { } }); + it("does not emit an outbound realtime greeting without an initial message", async () => { + let callbacks: + | { + onReady?: () => void; + } + | undefined; + const triggerGreeting = vi.fn(); + const createBridge = vi.fn( + (request: Parameters[0]) => { + callbacks = request; + return makeBridge({ triggerGreeting }); + }, + ); + const getCallByProviderCallId = vi.fn( + (): CallRecord => ({ + callId: "call-1", + providerCallId: "CA-silent", + provider: "twilio", + direction: "outbound", + state: "ringing", + from: "+15550001234", + to: "+15550009999", + startedAt: Date.now(), + transcript: [], + processedEventIds: [], + metadata: {}, + }), + ); + const handler = makeHandler(undefined, { + manager: { + getCallByProviderCallId, + }, + realtimeProvider: makeRealtimeProvider(createBridge), + }); + const server = await startRealtimeServer(handler); + + try { + const ws = await connectWs(server.url); + try { + ws.send( + JSON.stringify({ + event: "start", + start: { streamSid: "MZ-silent", callSid: "CA-silent" }, + }), + ); + await vi.waitFor(() => { + expect(createBridge).toHaveBeenCalled(); + }); + + callbacks?.onReady?.(); + + expect(triggerGreeting).not.toHaveBeenCalled(); + } finally { + if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) { + ws.close(); + } + } + } finally { + await server.close(); + } + }); + + it("speaks through the active outbound realtime bridge by call id", async () => { + const triggerGreeting = vi.fn(); + const createBridge = vi.fn(() => makeBridge({ triggerGreeting })); + const getCallByProviderCallId = vi.fn( + (): CallRecord => ({ + callId: "call-1", + providerCallId: "CA-speak", + provider: "twilio", + direction: "outbound", + state: "ringing", + from: "+15550001234", + to: "+15550009999", + startedAt: Date.now(), + transcript: [], + processedEventIds: [], + metadata: {}, + }), + ); + const handler = makeHandler(undefined, { + manager: { + getCallByProviderCallId, + }, + realtimeProvider: makeRealtimeProvider(createBridge), + }); + const server = await startRealtimeServer(handler); + + try { + const ws = await connectWs(server.url); + try { + ws.send( + JSON.stringify({ + event: "start", + start: { streamSid: "MZ-speak", callSid: "CA-speak" }, + }), + ); + await vi.waitFor(() => { + expect(createBridge).toHaveBeenCalled(); + }); + + expect(handler.speak("call-1", "Say exactly: hello from Meet.")).toEqual({ + success: true, + }); + expect(triggerGreeting).toHaveBeenCalledWith("Say exactly: hello from Meet."); + } finally { + if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) { + ws.close(); + } + } + } finally { + await server.close(); + } + }); + it("submits continuing responses only for realtime agent consult calls", async () => { let callbacks: | { diff --git a/extensions/voice-call/src/webhook/realtime-handler.ts b/extensions/voice-call/src/webhook/realtime-handler.ts index b77ca8d0d5d..ea8d30e1ff0 100644 --- a/extensions/voice-call/src/webhook/realtime-handler.ts +++ b/extensions/voice-call/src/webhook/realtime-handler.ts @@ -41,7 +41,7 @@ function buildGreetingInstructions( ): string | undefined { const trimmedGreeting = greeting?.trim(); if (!trimmedGreeting) { - return baseInstructions; + return undefined; } const intro = "Start the call by greeting the caller naturally. Include this greeting in your first spoken reply:"; @@ -64,9 +64,15 @@ type CallRegistration = { type ActiveRealtimeVoiceBridge = RealtimeVoiceBridgeSession; +type RealtimeSpeakResult = { + success: boolean; + error?: string; +}; + export class RealtimeCallHandler { private readonly toolHandlers = new Map(); private readonly pendingStreamTokens = new Map(); + private readonly activeBridgesByCallId = new Map(); private publicOrigin: string | null = null; private publicPathPrefix = ""; @@ -199,6 +205,19 @@ export class RealtimeCallHandler { this.toolHandlers.set(name, fn); } + speak(callId: string, instructions: string): RealtimeSpeakResult { + const bridge = this.activeBridgesByCallId.get(callId); + if (!bridge) { + return { success: false, error: "No active realtime bridge for call" }; + } + try { + bridge.triggerGreeting(instructions); + return { success: true }; + } catch (error) { + return { success: false, error: formatErrorMessage(error) }; + } + } + private issueStreamToken(meta: Omit = {}): string { const token = randomUUID(); this.pendingStreamTokens.set(token, { expiry: Date.now() + STREAM_TOKEN_TTL_MS, ...meta }); @@ -254,7 +273,7 @@ export class RealtimeCallHandler { instructions: this.config.instructions, tools: this.config.tools, initialGreetingInstructions, - triggerGreetingOnReady: true, + triggerGreetingOnReady: Boolean(initialGreetingInstructions), audioSink: { isOpen: () => ws.readyState === WebSocket.OPEN, sendAudio: (muLaw) => { @@ -312,6 +331,8 @@ export class RealtimeCallHandler { console.error("[voice-call] realtime voice error:", error.message); }, onClose: (reason) => { + this.activeBridgesByCallId.delete(callId); + this.activeBridgesByCallId.delete(callSid); if (reason !== "error") { return; } @@ -330,6 +351,14 @@ export class RealtimeCallHandler { }); }, }); + this.activeBridgesByCallId.set(callId, bridge); + this.activeBridgesByCallId.set(callSid, bridge); + const closeBridge = bridge.close.bind(bridge); + bridge.close = () => { + this.activeBridgesByCallId.delete(callId); + this.activeBridgesByCallId.delete(callSid); + closeBridge(); + }; bridge.connect().catch((error: Error) => { console.error("[voice-call] Failed to connect realtime bridge:", error);