fix: stabilize google meet twilio joins

This commit is contained in:
Peter Steinberger
2026-05-06 09:16:40 +01:00
parent 2eaf8ad712
commit a1b49c4b20
10 changed files with 249 additions and 41 deletions

View File

@@ -109,6 +109,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Google Meet/Voice Call: wait longer before playing PIN-derived Twilio DTMF for Meet dial-in prompts and retire stale delegated phone sessions instead of reusing completed calls.
- Onboard/channels: recover externalized channel plugins from stale `channels.<id>` config by falling back to `ensureChannelSetupPluginInstalled` via the trusted catalog when the plugin is missing on disk, so leftover `appId`/token entries no longer dead-end onboard with "<channel> plugin not available." (#78328) Thanks @sliverp.
- Codex/app-server: forward the OpenClaw workspace bootstrap block through Codex `developerInstructions` instead of `config.instructions`, so persona/style guidance reaches the behavior-shaping app-server lane. Fixes #77363. Thanks @lonexreb.
- Dependencies: override transitive `ip-address` to `10.2.0` so the runtime lockfile no longer includes the vulnerable `10.1.0` build flagged by Dependabot alert 109. Thanks @vincentkoc.

View File

@@ -1668,16 +1668,16 @@ participant:
- Run `openclaw voicecall tail` and check that Twilio webhooks are arriving at
the Gateway.
- Run `openclaw logs --follow` and look for the Twilio Meet sequence: Google
Meet delegates the join, Voice Call starts the phone leg, Google Meet waits
`voiceCall.dtmfDelayMs`, sends DTMF with `voicecall.dtmf`, waits
`voiceCall.postDtmfSpeechDelayMs`, then requests intro speech with
`voicecall.speak`.
Meet delegates the join, Voice Call stores and serves pre-connect DTMF TwiML,
Voice Call serves realtime TwiML for the Twilio call, then Google Meet requests
intro speech with `voicecall.speak`.
- Re-run `openclaw googlemeet setup --transport twilio`; a green setup check is
required but does not prove the meeting PIN sequence is correct.
- Confirm the dial-in number belongs to the same Meet invitation and region as
the PIN.
- Increase `voiceCall.dtmfDelayMs` if Meet answers slowly or the call transcript
still shows the prompt asking for a PIN after DTMF was sent.
- Increase `voiceCall.dtmfDelayMs` from the 12-second default if Meet answers
slowly or the call transcript still shows the prompt asking for a PIN after
pre-connect DTMF was sent.
- If the participant joins but you do not hear the greeting, check
`openclaw logs --follow` for the post-DTMF `voicecall.speak` request and
either media-stream TTS playback or the Twilio `<Say>` fallback. If the call

View File

@@ -902,10 +902,11 @@ If Voice Call is green but the Meet participant never joins, check the Meet
dial-in number, PIN, and `--dtmf-sequence`. The phone call can be healthy while
the meeting rejects or ignores an incorrect DTMF sequence.
Google Meet passes the Meet DTMF sequence and intro text to `voicecall.start`.
For Twilio calls, Voice Call serves the DTMF TwiML first, redirects back to the
webhook, then opens the realtime media stream so the saved intro is generated
after the phone participant has joined the meeting.
Google Meet starts the Twilio phone leg through `voicecall.start` with a
pre-connect DTMF sequence. PIN-derived sequences include the Google Meet plugin's
`voiceCall.dtmfDelayMs` as leading Twilio wait digits. The default is 12 seconds
because Meet dial-in prompts can arrive late. Voice Call then redirects back to
realtime handling before the intro greeting is requested.
Use `openclaw logs --follow` for the live phase trace. A healthy Twilio Meet
join logs this order:
@@ -914,7 +915,7 @@ join logs this order:
- Voice Call stores pre-connect DTMF TwiML.
- Twilio initial TwiML is consumed and served before realtime handling.
- Voice Call serves realtime TwiML for the Twilio call.
- The realtime bridge starts with the initial greeting queued.
- Google Meet requests intro speech with `voicecall.speak` after the post-DTMF delay.
`openclaw voicecall tail` still shows persisted call records; it is useful for
call state and transcripts, but not every webhook/realtime transition appears

View File

@@ -43,7 +43,11 @@ import {
setupGoogleMeetPlugin,
} from "./src/test-support/plugin-harness.js";
import { __testing as chromeTransportTesting } from "./src/transports/chrome.js";
import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/twilio.js";
import {
buildMeetDtmfSequence,
normalizeDialInNumber,
prefixDtmfWait,
} from "./src/transports/twilio.js";
import type { GoogleMeetSession } from "./src/transports/types.js";
const voiceCallMocks = vi.hoisted(() => ({
@@ -53,6 +57,13 @@ const voiceCallMocks = vi.hoisted(() => ({
introSent: true,
})),
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
getMeetVoiceCallGatewayCall: vi.fn(
async (): Promise<{ found: boolean; call?: { callId: string } }> => ({
found: true,
call: { callId: "call-1" },
}),
),
isVoiceCallMissingError: vi.fn((error: unknown) => String(error).includes("Call not found")),
speakMeetViaVoiceCallGateway: vi.fn(async () => {}),
}));
@@ -82,6 +93,8 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", async (importOriginal) => {
vi.mock("./src/voice-call-gateway.js", () => ({
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall: voiceCallMocks.getMeetVoiceCallGatewayCall,
isVoiceCallMissingError: voiceCallMocks.isVoiceCallMissingError,
speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway,
}));
@@ -313,6 +326,20 @@ type TestBridgeProcess = {
describe("google-meet plugin", () => {
beforeEach(() => {
vi.clearAllMocks();
voiceCallMocks.joinMeetViaVoiceCallGateway.mockResolvedValue({
callId: "call-1",
dtmfSent: true,
introSent: true,
});
voiceCallMocks.endMeetVoiceCallGatewayCall.mockResolvedValue(undefined);
voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValue({
found: true,
call: { callId: "call-1" },
});
voiceCallMocks.isVoiceCallMissingError.mockImplementation((error: unknown) =>
String(error).includes("Call not found"),
);
voiceCallMocks.speakMeetViaVoiceCallGateway.mockResolvedValue(undefined);
});
afterEach(() => {
@@ -388,7 +415,7 @@ describe("google-meet plugin", () => {
voiceCall: {
enabled: true,
requestTimeoutMs: 30000,
dtmfDelayMs: 2500,
dtmfDelayMs: 12000,
postDtmfSpeechDelayMs: 5000,
},
realtime: {
@@ -1226,6 +1253,7 @@ describe("google-meet plugin", () => {
expect(normalizeDialInNumber("+1 (555) 123-4567")).toBe("+15551234567");
expect(buildMeetDtmfSequence({ pin: "123 456" })).toBe("123456#");
expect(buildMeetDtmfSequence({ dtmfSequence: "ww123#" })).toBe("ww123#");
expect(prefixDtmfWait("123456#", 12000)).toBe("wwwwwwwwwwwwwwwwwwwwwwww123456#");
});
it("joins a Twilio session through the tool without page parsing", async () => {
@@ -1246,7 +1274,7 @@ describe("google-meet plugin", () => {
twilio: {
dialInNumber: "+15551234567",
pinProvided: true,
dtmfSequence: "123456#",
dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#",
voiceCallId: "call-1",
dtmfSent: true,
introSent: true,
@@ -1256,7 +1284,7 @@ describe("google-meet plugin", () => {
expect.objectContaining({
config: expect.objectContaining({ defaultTransport: "twilio" }),
dialInNumber: "+15551234567",
dtmfSequence: "123456#",
dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#",
logger: expect.objectContaining({ info: expect.any(Function) }),
message: "Say exactly: I'm here and listening.",
sessionKey: expect.stringMatching(/^voice:google-meet:meet_/),
@@ -1325,6 +1353,34 @@ describe("google-meet plugin", () => {
});
});
it("does not reuse Twilio Meet sessions whose delegated call is no longer active", async () => {
voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValueOnce({ found: false });
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {
execute: (
id: string,
params: unknown,
) => Promise<{ details: { session: { id: string; state: string; notes: string[] } } }>;
};
const first = await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
dialInNumber: "+15551234567",
pin: "123456",
});
const second = await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
dialInNumber: "+15551234567",
pin: "123456",
});
expect(first.details.session.state).toBe("ended");
expect(first.details.session.notes).toContain("Voice Call is no longer active.");
expect(second.details.session.id).not.toBe(first.details.session.id);
expect(voiceCallMocks.joinMeetViaVoiceCallGateway).toHaveBeenCalledTimes(2);
});
it("delegates Twilio session speech through voice-call", async () => {
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {

View File

@@ -145,13 +145,13 @@ const googleMeetConfigSchema = {
advanced: true,
},
"voiceCall.dtmfDelayMs": {
label: "Legacy DTMF Delay (ms)",
help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
label: "DTMF Wait Before PIN (ms)",
help: "Leading Twilio wait time before playing a PIN-derived Meet DTMF sequence. Increase it if Meet asks for the PIN after DTMF was sent.",
advanced: true,
},
"voiceCall.postDtmfSpeechDelayMs": {
label: "Legacy Post-DTMF Speech Delay (ms)",
help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.",
label: "Post-DTMF Speech Delay (ms)",
help: "Delay before requesting the realtime intro greeting after Voice Call starts the Twilio leg.",
advanced: true,
},
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },

View File

@@ -216,7 +216,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
voiceCall: {
enabled: true,
requestTimeoutMs: 30_000,
dtmfDelayMs: 2_500,
dtmfDelayMs: 12_000,
postDtmfSpeechDelayMs: 5_000,
},
realtime: {

View File

@@ -19,7 +19,11 @@ import {
recoverCurrentMeetTab,
recoverCurrentMeetTabOnNode,
} from "./transports/chrome.js";
import { buildMeetDtmfSequence, normalizeDialInNumber } from "./transports/twilio.js";
import {
buildMeetDtmfSequence,
normalizeDialInNumber,
prefixDtmfWait,
} from "./transports/twilio.js";
import type {
GoogleMeetChromeHealth,
GoogleMeetJoinRequest,
@@ -28,6 +32,8 @@ import type {
} from "./transports/types.js";
import {
endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall,
isVoiceCallMissingError,
joinMeetViaVoiceCallGateway,
speakMeetViaVoiceCallGateway,
} from "./voice-call-gateway.js";
@@ -133,6 +139,10 @@ function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean {
);
}
function noteSession(session: GoogleMeetSession, note: string): void {
session.notes = [...session.notes.filter((item) => item !== note), note];
}
function evaluateSpeechReadiness(session: GoogleMeetSession): {
ready: boolean;
reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
@@ -365,20 +375,23 @@ export class GoogleMeetRuntime {
const url = normalizeMeetUrl(request.url);
const transport = resolveTransport(request.transport, this.params.config);
const mode = resolveMode(request.mode, this.params.config);
const reusable = this.list().find(
let reusable = this.list().find(
(session) =>
session.state === "active" &&
isSameMeetUrlForReuse(session.url, url) &&
session.transport === transport &&
session.mode === mode,
);
if (reusable?.transport === "twilio") {
await this.#refreshTwilioVoiceCallStatus(reusable);
if (reusable.state !== "active") {
reusable = undefined;
}
}
const speechInstructions = request.message ?? this.params.config.realtime.introMessage;
if (reusable) {
await this.#refreshBrowserHealthForChromeSession(reusable);
reusable.notes = [
...reusable.notes.filter((note) => note !== "Reused existing active Meet session."),
"Reused existing active Meet session.",
];
noteSession(reusable, "Reused existing active Meet session.");
reusable.updatedAt = nowIso();
const spoken =
isGoogleMeetTalkBackMode(mode) && speechInstructions
@@ -472,10 +485,14 @@ export class GoogleMeetRuntime {
"Twilio transport requires a Meet dial-in phone number. Google Meet URLs do not include dial-in details; pass dialInNumber with optional pin/dtmfSequence, configure twilio.defaultDialInNumber, or use chrome/chrome-node transport.",
);
}
const dtmfSequence = buildMeetDtmfSequence({
const rawDtmfSequence = buildMeetDtmfSequence({
pin: request.pin ?? this.params.config.twilio.defaultPin,
dtmfSequence: request.dtmfSequence ?? this.params.config.twilio.defaultDtmfSequence,
});
const dtmfSequence =
request.dtmfSequence || this.params.config.twilio.defaultDtmfSequence
? rawDtmfSequence
: prefixDtmfWait(rawDtmfSequence, this.params.config.voiceCall.dtmfDelayMs);
const voiceCallResult = this.params.config.voiceCall.enabled
? await joinMeetViaVoiceCallGateway({
config: this.params.config,
@@ -543,7 +560,12 @@ export class GoogleMeetRuntime {
this.#sessionStops.delete(sessionId);
this.#sessionSpeakers.delete(sessionId);
this.#sessionHealth.delete(sessionId);
await stop();
try {
await stop();
} finally {
session.state = "ended";
session.updatedAt = nowIso();
}
}
session.state = "ended";
session.updatedAt = nowIso();
@@ -559,15 +581,23 @@ export class GoogleMeetRuntime {
return { found: false, spoken: false };
}
if (session.transport === "twilio" && session.twilio?.voiceCallId) {
await speakMeetViaVoiceCallGateway({
config: this.params.config,
callId: session.twilio.voiceCallId,
message:
instructions ||
this.params.config.voiceCall.introMessage ||
this.params.config.realtime.introMessage ||
"",
});
try {
await speakMeetViaVoiceCallGateway({
config: this.params.config,
callId: session.twilio.voiceCallId,
message:
instructions ||
this.params.config.voiceCall.introMessage ||
this.params.config.realtime.introMessage ||
"",
});
} catch (err) {
if (!isVoiceCallMissingError(err)) {
throw err;
}
this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
return { found: true, spoken: false, session };
}
session.twilio.introSent = true;
session.updatedAt = nowIso();
return { found: true, spoken: true, session };
@@ -801,6 +831,41 @@ export class GoogleMeetRuntime {
await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true });
return;
}
if (session.transport === "twilio") {
await this.#refreshTwilioVoiceCallStatus(session);
return;
}
this.#refreshSpeechReadiness(session);
}
#markTwilioSessionEnded(session: GoogleMeetSession, reason: string) {
session.state = "ended";
session.updatedAt = nowIso();
this.#sessionStops.delete(session.id);
this.#sessionSpeakers.delete(session.id);
this.#sessionHealth.delete(session.id);
noteSession(session, reason);
}
async #refreshTwilioVoiceCallStatus(session: GoogleMeetSession) {
const callId = session.twilio?.voiceCallId;
if (!callId || session.state !== "active") {
this.#refreshSpeechReadiness(session);
return;
}
try {
const status = await getMeetVoiceCallGatewayCall({
config: this.params.config,
callId,
});
if (status.found === false) {
this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
}
} catch (error) {
this.params.logger.debug?.(
`[google-meet] voice-call status refresh ignored: ${formatErrorMessage(error)}`,
);
}
this.#refreshSpeechReadiness(session);
}

View File

@@ -44,3 +44,14 @@ export function buildMeetDtmfSequence(params: {
}
return compactPin.endsWith("#") ? compactPin : `${compactPin}#`;
}
export function prefixDtmfWait(sequence: string | undefined, delayMs: number): string | undefined {
if (!sequence || delayMs <= 0) {
return sequence;
}
const waitCount = Math.ceil(delayMs / 500);
if (waitCount <= 0) {
return sequence;
}
return `${"w".repeat(waitCount)}${sequence}`;
}

View File

@@ -1,6 +1,10 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { resolveGoogleMeetConfig } from "./config.js";
import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
import {
endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall,
joinMeetViaVoiceCallGateway,
} from "./voice-call-gateway.js";
const gatewayMocks = vi.hoisted(() => ({
request: vi.fn(),
@@ -100,4 +104,38 @@ describe("Google Meet voice-call gateway", () => {
expect.stringContaining("Skipped intro speech because realtime bridge was not ready"),
);
});
it("treats missing delegated calls as already ended", async () => {
gatewayMocks.request.mockRejectedValueOnce(new Error("Call not found"));
const config = resolveGoogleMeetConfig({
voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" },
});
await expect(
endMeetVoiceCallGatewayCall({ config, callId: "call-1" }),
).resolves.toBeUndefined();
expect(gatewayMocks.request).toHaveBeenCalledWith(
"voicecall.end",
{ callId: "call-1" },
{ timeoutMs: 30_000 },
);
});
it("reads delegated call status from the gateway", async () => {
gatewayMocks.request.mockResolvedValueOnce({ found: false });
const config = resolveGoogleMeetConfig({
voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" },
});
await expect(getMeetVoiceCallGatewayCall({ config, callId: "call-1" })).resolves.toEqual({
found: false,
});
expect(gatewayMocks.request).toHaveBeenCalledWith(
"voicecall.status",
{ callId: "call-1" },
{ timeoutMs: 30_000 },
);
});
});

View File

@@ -19,6 +19,11 @@ type VoiceCallSpeakResult = {
error?: string;
};
type VoiceCallStatusResult = {
found?: boolean;
call?: unknown;
};
type VoiceCallMeetJoinResult = {
callId: string;
dtmfSent: boolean;
@@ -77,6 +82,11 @@ async function createConnectedGatewayClient(
return client!;
}
export function isVoiceCallMissingError(error: unknown): boolean {
const message = formatErrorMessage(error).toLowerCase();
return message.includes("call not found") || message.includes("call is not active");
}
export async function joinMeetViaVoiceCallGateway(params: {
config: GoogleMeetConfig;
dialInNumber: string;
@@ -173,13 +183,39 @@ export async function endMeetVoiceCallGatewayCall(params: {
try {
client = await createConnectedGatewayClient(params.config);
await client.request(
"voicecall.end",
try {
await client.request(
"voicecall.end",
{
callId: params.callId,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
} catch (err) {
if (!isVoiceCallMissingError(err)) {
throw err;
}
}
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });
}
}
export async function getMeetVoiceCallGatewayCall(params: {
config: GoogleMeetConfig;
callId: string;
}): Promise<VoiceCallStatusResult> {
let client: VoiceCallGatewayClient | undefined;
try {
client = await createConnectedGatewayClient(params.config);
return (await client.request(
"voicecall.status",
{
callId: params.callId,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
)) as VoiceCallStatusResult;
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });
}