From a1b49c4b20027c42abbfeeeafbc34fa4379d3ef0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 6 May 2026 09:16:40 +0100 Subject: [PATCH] fix: stabilize google meet twilio joins --- CHANGELOG.md | 1 + docs/plugins/google-meet.md | 12 +-- docs/plugins/voice-call.md | 11 ++- extensions/google-meet/index.test.ts | 64 +++++++++++- extensions/google-meet/index.ts | 8 +- extensions/google-meet/src/config.ts | 2 +- extensions/google-meet/src/runtime.ts | 99 +++++++++++++++---- .../google-meet/src/transports/twilio.ts | 11 +++ .../src/voice-call-gateway.test.ts | 40 +++++++- .../google-meet/src/voice-call-gateway.ts | 42 +++++++- 10 files changed, 249 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 807f6983b66..feee97ab764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -109,6 +109,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Google Meet/Voice Call: wait longer before playing PIN-derived Twilio DTMF for Meet dial-in prompts and retire stale delegated phone sessions instead of reusing completed calls. - Onboard/channels: recover externalized channel plugins from stale `channels.` config by falling back to `ensureChannelSetupPluginInstalled` via the trusted catalog when the plugin is missing on disk, so leftover `appId`/token entries no longer dead-end onboard with " plugin not available." (#78328) Thanks @sliverp. - Codex/app-server: forward the OpenClaw workspace bootstrap block through Codex `developerInstructions` instead of `config.instructions`, so persona/style guidance reaches the behavior-shaping app-server lane. Fixes #77363. Thanks @lonexreb. - Dependencies: override transitive `ip-address` to `10.2.0` so the runtime lockfile no longer includes the vulnerable `10.1.0` build flagged by Dependabot alert 109. Thanks @vincentkoc. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 162e26e14b7..6023a09dccd 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -1668,16 +1668,16 @@ participant: - Run `openclaw voicecall tail` and check that Twilio webhooks are arriving at the Gateway. - Run `openclaw logs --follow` and look for the Twilio Meet sequence: Google - Meet delegates the join, Voice Call starts the phone leg, Google Meet waits - `voiceCall.dtmfDelayMs`, sends DTMF with `voicecall.dtmf`, waits - `voiceCall.postDtmfSpeechDelayMs`, then requests intro speech with - `voicecall.speak`. + Meet delegates the join, Voice Call stores and serves pre-connect DTMF TwiML, + Voice Call serves realtime TwiML for the Twilio call, then Google Meet requests + intro speech with `voicecall.speak`. - Re-run `openclaw googlemeet setup --transport twilio`; a green setup check is required but does not prove the meeting PIN sequence is correct. - Confirm the dial-in number belongs to the same Meet invitation and region as the PIN. -- Increase `voiceCall.dtmfDelayMs` if Meet answers slowly or the call transcript - still shows the prompt asking for a PIN after DTMF was sent. +- Increase `voiceCall.dtmfDelayMs` from the 12-second default if Meet answers + slowly or the call transcript still shows the prompt asking for a PIN after + pre-connect DTMF was sent. - If the participant joins but you do not hear the greeting, check `openclaw logs --follow` for the post-DTMF `voicecall.speak` request and either media-stream TTS playback or the Twilio `` fallback. If the call diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 00ea1c74e8a..5d65eeaf69d 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -902,10 +902,11 @@ If Voice Call is green but the Meet participant never joins, check the Meet dial-in number, PIN, and `--dtmf-sequence`. The phone call can be healthy while the meeting rejects or ignores an incorrect DTMF sequence. -Google Meet passes the Meet DTMF sequence and intro text to `voicecall.start`. -For Twilio calls, Voice Call serves the DTMF TwiML first, redirects back to the -webhook, then opens the realtime media stream so the saved intro is generated -after the phone participant has joined the meeting. +Google Meet starts the Twilio phone leg through `voicecall.start` with a +pre-connect DTMF sequence. PIN-derived sequences include the Google Meet plugin's +`voiceCall.dtmfDelayMs` as leading Twilio wait digits. The default is 12 seconds +because Meet dial-in prompts can arrive late. Voice Call then redirects back to +realtime handling before the intro greeting is requested. Use `openclaw logs --follow` for the live phase trace. A healthy Twilio Meet join logs this order: @@ -914,7 +915,7 @@ join logs this order: - Voice Call stores pre-connect DTMF TwiML. - Twilio initial TwiML is consumed and served before realtime handling. - Voice Call serves realtime TwiML for the Twilio call. -- The realtime bridge starts with the initial greeting queued. +- Google Meet requests intro speech with `voicecall.speak` after the post-DTMF delay. `openclaw voicecall tail` still shows persisted call records; it is useful for call state and transcripts, but not every webhook/realtime transition appears diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index a80728c738b..db35bf2c858 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -43,7 +43,11 @@ import { setupGoogleMeetPlugin, } from "./src/test-support/plugin-harness.js"; import { __testing as chromeTransportTesting } from "./src/transports/chrome.js"; -import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/twilio.js"; +import { + buildMeetDtmfSequence, + normalizeDialInNumber, + prefixDtmfWait, +} from "./src/transports/twilio.js"; import type { GoogleMeetSession } from "./src/transports/types.js"; const voiceCallMocks = vi.hoisted(() => ({ @@ -53,6 +57,13 @@ const voiceCallMocks = vi.hoisted(() => ({ introSent: true, })), endMeetVoiceCallGatewayCall: vi.fn(async () => {}), + getMeetVoiceCallGatewayCall: vi.fn( + async (): Promise<{ found: boolean; call?: { callId: string } }> => ({ + found: true, + call: { callId: "call-1" }, + }), + ), + isVoiceCallMissingError: vi.fn((error: unknown) => String(error).includes("Call not found")), speakMeetViaVoiceCallGateway: vi.fn(async () => {}), })); @@ -82,6 +93,8 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", async (importOriginal) => { vi.mock("./src/voice-call-gateway.js", () => ({ joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway, endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall, + getMeetVoiceCallGatewayCall: voiceCallMocks.getMeetVoiceCallGatewayCall, + isVoiceCallMissingError: voiceCallMocks.isVoiceCallMissingError, speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway, })); @@ -313,6 +326,20 @@ type TestBridgeProcess = { describe("google-meet plugin", () => { beforeEach(() => { vi.clearAllMocks(); + voiceCallMocks.joinMeetViaVoiceCallGateway.mockResolvedValue({ + callId: "call-1", + dtmfSent: true, + introSent: true, + }); + voiceCallMocks.endMeetVoiceCallGatewayCall.mockResolvedValue(undefined); + voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValue({ + found: true, + call: { callId: "call-1" }, + }); + voiceCallMocks.isVoiceCallMissingError.mockImplementation((error: unknown) => + String(error).includes("Call not found"), + ); + voiceCallMocks.speakMeetViaVoiceCallGateway.mockResolvedValue(undefined); }); afterEach(() => { @@ -388,7 +415,7 @@ describe("google-meet plugin", () => { voiceCall: { enabled: true, requestTimeoutMs: 30000, - dtmfDelayMs: 2500, + dtmfDelayMs: 12000, postDtmfSpeechDelayMs: 5000, }, realtime: { @@ -1226,6 +1253,7 @@ describe("google-meet plugin", () => { expect(normalizeDialInNumber("+1 (555) 123-4567")).toBe("+15551234567"); expect(buildMeetDtmfSequence({ pin: "123 456" })).toBe("123456#"); expect(buildMeetDtmfSequence({ dtmfSequence: "ww123#" })).toBe("ww123#"); + expect(prefixDtmfWait("123456#", 12000)).toBe("wwwwwwwwwwwwwwwwwwwwwwww123456#"); }); it("joins a Twilio session through the tool without page parsing", async () => { @@ -1246,7 +1274,7 @@ describe("google-meet plugin", () => { twilio: { dialInNumber: "+15551234567", pinProvided: true, - dtmfSequence: "123456#", + dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#", voiceCallId: "call-1", dtmfSent: true, introSent: true, @@ -1256,7 +1284,7 @@ describe("google-meet plugin", () => { expect.objectContaining({ config: expect.objectContaining({ defaultTransport: "twilio" }), dialInNumber: "+15551234567", - dtmfSequence: "123456#", + dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#", logger: expect.objectContaining({ info: expect.any(Function) }), message: "Say exactly: I'm here and listening.", sessionKey: expect.stringMatching(/^voice:google-meet:meet_/), @@ -1325,6 +1353,34 @@ describe("google-meet plugin", () => { }); }); + it("does not reuse Twilio Meet sessions whose delegated call is no longer active", async () => { + voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValueOnce({ found: false }); + const { tools } = setup({ defaultTransport: "twilio" }); + const tool = tools[0] as { + execute: ( + id: string, + params: unknown, + ) => Promise<{ details: { session: { id: string; state: string; notes: string[] } } }>; + }; + const first = await tool.execute("id", { + action: "join", + url: "https://meet.google.com/abc-defg-hij", + dialInNumber: "+15551234567", + pin: "123456", + }); + const second = await tool.execute("id", { + action: "join", + url: "https://meet.google.com/abc-defg-hij", + dialInNumber: "+15551234567", + pin: "123456", + }); + + expect(first.details.session.state).toBe("ended"); + expect(first.details.session.notes).toContain("Voice Call is no longer active."); + expect(second.details.session.id).not.toBe(first.details.session.id); + expect(voiceCallMocks.joinMeetViaVoiceCallGateway).toHaveBeenCalledTimes(2); + }); + it("delegates Twilio session speech through voice-call", async () => { const { tools } = setup({ defaultTransport: "twilio" }); const tool = tools[0] as { diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index 98fcec84412..8c37ed6e437 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -145,13 +145,13 @@ const googleMeetConfigSchema = { advanced: true, }, "voiceCall.dtmfDelayMs": { - label: "Legacy DTMF Delay (ms)", - help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.", + label: "DTMF Wait Before PIN (ms)", + help: "Leading Twilio wait time before playing a PIN-derived Meet DTMF sequence. Increase it if Meet asks for the PIN after DTMF was sent.", advanced: true, }, "voiceCall.postDtmfSpeechDelayMs": { - label: "Legacy Post-DTMF Speech Delay (ms)", - help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.", + label: "Post-DTMF Speech Delay (ms)", + help: "Delay before requesting the realtime intro greeting after Voice Call starts the Twilio leg.", advanced: true, }, "voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true }, diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index 6fb80ee7093..ba9d723bfc4 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -216,7 +216,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { voiceCall: { enabled: true, requestTimeoutMs: 30_000, - dtmfDelayMs: 2_500, + dtmfDelayMs: 12_000, postDtmfSpeechDelayMs: 5_000, }, realtime: { diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index 80de8432282..601636a9050 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -19,7 +19,11 @@ import { recoverCurrentMeetTab, recoverCurrentMeetTabOnNode, } from "./transports/chrome.js"; -import { buildMeetDtmfSequence, normalizeDialInNumber } from "./transports/twilio.js"; +import { + buildMeetDtmfSequence, + normalizeDialInNumber, + prefixDtmfWait, +} from "./transports/twilio.js"; import type { GoogleMeetChromeHealth, GoogleMeetJoinRequest, @@ -28,6 +32,8 @@ import type { } from "./transports/types.js"; import { endMeetVoiceCallGatewayCall, + getMeetVoiceCallGatewayCall, + isVoiceCallMissingError, joinMeetViaVoiceCallGateway, speakMeetViaVoiceCallGateway, } from "./voice-call-gateway.js"; @@ -133,6 +139,10 @@ function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean { ); } +function noteSession(session: GoogleMeetSession, note: string): void { + session.notes = [...session.notes.filter((item) => item !== note), note]; +} + function evaluateSpeechReadiness(session: GoogleMeetSession): { ready: boolean; reason?: NonNullable; @@ -365,20 +375,23 @@ export class GoogleMeetRuntime { const url = normalizeMeetUrl(request.url); const transport = resolveTransport(request.transport, this.params.config); const mode = resolveMode(request.mode, this.params.config); - const reusable = this.list().find( + let reusable = this.list().find( (session) => session.state === "active" && isSameMeetUrlForReuse(session.url, url) && session.transport === transport && session.mode === mode, ); + if (reusable?.transport === "twilio") { + await this.#refreshTwilioVoiceCallStatus(reusable); + if (reusable.state !== "active") { + reusable = undefined; + } + } const speechInstructions = request.message ?? this.params.config.realtime.introMessage; if (reusable) { await this.#refreshBrowserHealthForChromeSession(reusable); - reusable.notes = [ - ...reusable.notes.filter((note) => note !== "Reused existing active Meet session."), - "Reused existing active Meet session.", - ]; + noteSession(reusable, "Reused existing active Meet session."); reusable.updatedAt = nowIso(); const spoken = isGoogleMeetTalkBackMode(mode) && speechInstructions @@ -472,10 +485,14 @@ export class GoogleMeetRuntime { "Twilio transport requires a Meet dial-in phone number. Google Meet URLs do not include dial-in details; pass dialInNumber with optional pin/dtmfSequence, configure twilio.defaultDialInNumber, or use chrome/chrome-node transport.", ); } - const dtmfSequence = buildMeetDtmfSequence({ + const rawDtmfSequence = buildMeetDtmfSequence({ pin: request.pin ?? this.params.config.twilio.defaultPin, dtmfSequence: request.dtmfSequence ?? this.params.config.twilio.defaultDtmfSequence, }); + const dtmfSequence = + request.dtmfSequence || this.params.config.twilio.defaultDtmfSequence + ? rawDtmfSequence + : prefixDtmfWait(rawDtmfSequence, this.params.config.voiceCall.dtmfDelayMs); const voiceCallResult = this.params.config.voiceCall.enabled ? await joinMeetViaVoiceCallGateway({ config: this.params.config, @@ -543,7 +560,12 @@ export class GoogleMeetRuntime { this.#sessionStops.delete(sessionId); this.#sessionSpeakers.delete(sessionId); this.#sessionHealth.delete(sessionId); - await stop(); + try { + await stop(); + } finally { + session.state = "ended"; + session.updatedAt = nowIso(); + } } session.state = "ended"; session.updatedAt = nowIso(); @@ -559,15 +581,23 @@ export class GoogleMeetRuntime { return { found: false, spoken: false }; } if (session.transport === "twilio" && session.twilio?.voiceCallId) { - await speakMeetViaVoiceCallGateway({ - config: this.params.config, - callId: session.twilio.voiceCallId, - message: - instructions || - this.params.config.voiceCall.introMessage || - this.params.config.realtime.introMessage || - "", - }); + try { + await speakMeetViaVoiceCallGateway({ + config: this.params.config, + callId: session.twilio.voiceCallId, + message: + instructions || + this.params.config.voiceCall.introMessage || + this.params.config.realtime.introMessage || + "", + }); + } catch (err) { + if (!isVoiceCallMissingError(err)) { + throw err; + } + this.#markTwilioSessionEnded(session, "Voice Call is no longer active."); + return { found: true, spoken: false, session }; + } session.twilio.introSent = true; session.updatedAt = nowIso(); return { found: true, spoken: true, session }; @@ -801,6 +831,41 @@ export class GoogleMeetRuntime { await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true }); return; } + if (session.transport === "twilio") { + await this.#refreshTwilioVoiceCallStatus(session); + return; + } + this.#refreshSpeechReadiness(session); + } + + #markTwilioSessionEnded(session: GoogleMeetSession, reason: string) { + session.state = "ended"; + session.updatedAt = nowIso(); + this.#sessionStops.delete(session.id); + this.#sessionSpeakers.delete(session.id); + this.#sessionHealth.delete(session.id); + noteSession(session, reason); + } + + async #refreshTwilioVoiceCallStatus(session: GoogleMeetSession) { + const callId = session.twilio?.voiceCallId; + if (!callId || session.state !== "active") { + this.#refreshSpeechReadiness(session); + return; + } + try { + const status = await getMeetVoiceCallGatewayCall({ + config: this.params.config, + callId, + }); + if (status.found === false) { + this.#markTwilioSessionEnded(session, "Voice Call is no longer active."); + } + } catch (error) { + this.params.logger.debug?.( + `[google-meet] voice-call status refresh ignored: ${formatErrorMessage(error)}`, + ); + } this.#refreshSpeechReadiness(session); } diff --git a/extensions/google-meet/src/transports/twilio.ts b/extensions/google-meet/src/transports/twilio.ts index 52168fe3f6d..155fafb8054 100644 --- a/extensions/google-meet/src/transports/twilio.ts +++ b/extensions/google-meet/src/transports/twilio.ts @@ -44,3 +44,14 @@ export function buildMeetDtmfSequence(params: { } return compactPin.endsWith("#") ? compactPin : `${compactPin}#`; } + +export function prefixDtmfWait(sequence: string | undefined, delayMs: number): string | undefined { + if (!sequence || delayMs <= 0) { + return sequence; + } + const waitCount = Math.ceil(delayMs / 500); + if (waitCount <= 0) { + return sequence; + } + return `${"w".repeat(waitCount)}${sequence}`; +} diff --git a/extensions/google-meet/src/voice-call-gateway.test.ts b/extensions/google-meet/src/voice-call-gateway.test.ts index 981c48c7d96..7953babc5cc 100644 --- a/extensions/google-meet/src/voice-call-gateway.test.ts +++ b/extensions/google-meet/src/voice-call-gateway.test.ts @@ -1,6 +1,10 @@ import { describe, expect, it, vi, beforeEach } from "vitest"; import { resolveGoogleMeetConfig } from "./config.js"; -import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js"; +import { + endMeetVoiceCallGatewayCall, + getMeetVoiceCallGatewayCall, + joinMeetViaVoiceCallGateway, +} from "./voice-call-gateway.js"; const gatewayMocks = vi.hoisted(() => ({ request: vi.fn(), @@ -100,4 +104,38 @@ describe("Google Meet voice-call gateway", () => { expect.stringContaining("Skipped intro speech because realtime bridge was not ready"), ); }); + + it("treats missing delegated calls as already ended", async () => { + gatewayMocks.request.mockRejectedValueOnce(new Error("Call not found")); + const config = resolveGoogleMeetConfig({ + voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" }, + }); + + await expect( + endMeetVoiceCallGatewayCall({ config, callId: "call-1" }), + ).resolves.toBeUndefined(); + + expect(gatewayMocks.request).toHaveBeenCalledWith( + "voicecall.end", + { callId: "call-1" }, + { timeoutMs: 30_000 }, + ); + }); + + it("reads delegated call status from the gateway", async () => { + gatewayMocks.request.mockResolvedValueOnce({ found: false }); + const config = resolveGoogleMeetConfig({ + voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" }, + }); + + await expect(getMeetVoiceCallGatewayCall({ config, callId: "call-1" })).resolves.toEqual({ + found: false, + }); + + expect(gatewayMocks.request).toHaveBeenCalledWith( + "voicecall.status", + { callId: "call-1" }, + { timeoutMs: 30_000 }, + ); + }); }); diff --git a/extensions/google-meet/src/voice-call-gateway.ts b/extensions/google-meet/src/voice-call-gateway.ts index 2cc685f20fc..aa7f10acb3b 100644 --- a/extensions/google-meet/src/voice-call-gateway.ts +++ b/extensions/google-meet/src/voice-call-gateway.ts @@ -19,6 +19,11 @@ type VoiceCallSpeakResult = { error?: string; }; +type VoiceCallStatusResult = { + found?: boolean; + call?: unknown; +}; + type VoiceCallMeetJoinResult = { callId: string; dtmfSent: boolean; @@ -77,6 +82,11 @@ async function createConnectedGatewayClient( return client!; } +export function isVoiceCallMissingError(error: unknown): boolean { + const message = formatErrorMessage(error).toLowerCase(); + return message.includes("call not found") || message.includes("call is not active"); +} + export async function joinMeetViaVoiceCallGateway(params: { config: GoogleMeetConfig; dialInNumber: string; @@ -173,13 +183,39 @@ export async function endMeetVoiceCallGatewayCall(params: { try { client = await createConnectedGatewayClient(params.config); - await client.request( - "voicecall.end", + try { + await client.request( + "voicecall.end", + { + callId: params.callId, + }, + { timeoutMs: params.config.voiceCall.requestTimeoutMs }, + ); + } catch (err) { + if (!isVoiceCallMissingError(err)) { + throw err; + } + } + } finally { + await client?.stopAndWait({ timeoutMs: 1_000 }); + } +} + +export async function getMeetVoiceCallGatewayCall(params: { + config: GoogleMeetConfig; + callId: string; +}): Promise { + let client: VoiceCallGatewayClient | undefined; + + try { + client = await createConnectedGatewayClient(params.config); + return (await client.request( + "voicecall.status", { callId: params.callId, }, { timeoutMs: params.config.voiceCall.requestTimeoutMs }, - ); + )) as VoiceCallStatusResult; } finally { await client?.stopAndWait({ timeoutMs: 1_000 }); }