fix: stabilize google meet twilio joins

This commit is contained in:
Peter Steinberger
2026-05-06 09:16:40 +01:00
parent 2eaf8ad712
commit a1b49c4b20
10 changed files with 249 additions and 41 deletions

View File

@@ -43,7 +43,11 @@ import {
setupGoogleMeetPlugin,
} from "./src/test-support/plugin-harness.js";
import { __testing as chromeTransportTesting } from "./src/transports/chrome.js";
import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/twilio.js";
import {
buildMeetDtmfSequence,
normalizeDialInNumber,
prefixDtmfWait,
} from "./src/transports/twilio.js";
import type { GoogleMeetSession } from "./src/transports/types.js";
const voiceCallMocks = vi.hoisted(() => ({
@@ -53,6 +57,13 @@ const voiceCallMocks = vi.hoisted(() => ({
introSent: true,
})),
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
getMeetVoiceCallGatewayCall: vi.fn(
async (): Promise<{ found: boolean; call?: { callId: string } }> => ({
found: true,
call: { callId: "call-1" },
}),
),
isVoiceCallMissingError: vi.fn((error: unknown) => String(error).includes("Call not found")),
speakMeetViaVoiceCallGateway: vi.fn(async () => {}),
}));
@@ -82,6 +93,8 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", async (importOriginal) => {
vi.mock("./src/voice-call-gateway.js", () => ({
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall: voiceCallMocks.getMeetVoiceCallGatewayCall,
isVoiceCallMissingError: voiceCallMocks.isVoiceCallMissingError,
speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway,
}));
@@ -313,6 +326,20 @@ type TestBridgeProcess = {
describe("google-meet plugin", () => {
beforeEach(() => {
vi.clearAllMocks();
voiceCallMocks.joinMeetViaVoiceCallGateway.mockResolvedValue({
callId: "call-1",
dtmfSent: true,
introSent: true,
});
voiceCallMocks.endMeetVoiceCallGatewayCall.mockResolvedValue(undefined);
voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValue({
found: true,
call: { callId: "call-1" },
});
voiceCallMocks.isVoiceCallMissingError.mockImplementation((error: unknown) =>
String(error).includes("Call not found"),
);
voiceCallMocks.speakMeetViaVoiceCallGateway.mockResolvedValue(undefined);
});
afterEach(() => {
@@ -388,7 +415,7 @@ describe("google-meet plugin", () => {
voiceCall: {
enabled: true,
requestTimeoutMs: 30000,
dtmfDelayMs: 2500,
dtmfDelayMs: 12000,
postDtmfSpeechDelayMs: 5000,
},
realtime: {
@@ -1226,6 +1253,7 @@ describe("google-meet plugin", () => {
expect(normalizeDialInNumber("+1 (555) 123-4567")).toBe("+15551234567");
expect(buildMeetDtmfSequence({ pin: "123 456" })).toBe("123456#");
expect(buildMeetDtmfSequence({ dtmfSequence: "ww123#" })).toBe("ww123#");
expect(prefixDtmfWait("123456#", 12000)).toBe("wwwwwwwwwwwwwwwwwwwwwwww123456#");
});
it("joins a Twilio session through the tool without page parsing", async () => {
@@ -1246,7 +1274,7 @@ describe("google-meet plugin", () => {
twilio: {
dialInNumber: "+15551234567",
pinProvided: true,
dtmfSequence: "123456#",
dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#",
voiceCallId: "call-1",
dtmfSent: true,
introSent: true,
@@ -1256,7 +1284,7 @@ describe("google-meet plugin", () => {
expect.objectContaining({
config: expect.objectContaining({ defaultTransport: "twilio" }),
dialInNumber: "+15551234567",
dtmfSequence: "123456#",
dtmfSequence: "wwwwwwwwwwwwwwwwwwwwwwww123456#",
logger: expect.objectContaining({ info: expect.any(Function) }),
message: "Say exactly: I'm here and listening.",
sessionKey: expect.stringMatching(/^voice:google-meet:meet_/),
@@ -1325,6 +1353,34 @@ describe("google-meet plugin", () => {
});
});
it("does not reuse Twilio Meet sessions whose delegated call is no longer active", async () => {
voiceCallMocks.getMeetVoiceCallGatewayCall.mockResolvedValueOnce({ found: false });
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {
execute: (
id: string,
params: unknown,
) => Promise<{ details: { session: { id: string; state: string; notes: string[] } } }>;
};
const first = await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
dialInNumber: "+15551234567",
pin: "123456",
});
const second = await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
dialInNumber: "+15551234567",
pin: "123456",
});
expect(first.details.session.state).toBe("ended");
expect(first.details.session.notes).toContain("Voice Call is no longer active.");
expect(second.details.session.id).not.toBe(first.details.session.id);
expect(voiceCallMocks.joinMeetViaVoiceCallGateway).toHaveBeenCalledTimes(2);
});
it("delegates Twilio session speech through voice-call", async () => {
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {

View File

@@ -145,13 +145,13 @@ const googleMeetConfigSchema = {
advanced: true,
},
"voiceCall.dtmfDelayMs": {
label: "Legacy DTMF Delay (ms)",
help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
label: "DTMF Wait Before PIN (ms)",
help: "Leading Twilio wait time before playing a PIN-derived Meet DTMF sequence. Increase it if Meet asks for the PIN after DTMF was sent.",
advanced: true,
},
"voiceCall.postDtmfSpeechDelayMs": {
label: "Legacy Post-DTMF Speech Delay (ms)",
help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.",
label: "Post-DTMF Speech Delay (ms)",
help: "Delay before requesting the realtime intro greeting after Voice Call starts the Twilio leg.",
advanced: true,
},
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },

View File

@@ -216,7 +216,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
voiceCall: {
enabled: true,
requestTimeoutMs: 30_000,
dtmfDelayMs: 2_500,
dtmfDelayMs: 12_000,
postDtmfSpeechDelayMs: 5_000,
},
realtime: {

View File

@@ -19,7 +19,11 @@ import {
recoverCurrentMeetTab,
recoverCurrentMeetTabOnNode,
} from "./transports/chrome.js";
import { buildMeetDtmfSequence, normalizeDialInNumber } from "./transports/twilio.js";
import {
buildMeetDtmfSequence,
normalizeDialInNumber,
prefixDtmfWait,
} from "./transports/twilio.js";
import type {
GoogleMeetChromeHealth,
GoogleMeetJoinRequest,
@@ -28,6 +32,8 @@ import type {
} from "./transports/types.js";
import {
endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall,
isVoiceCallMissingError,
joinMeetViaVoiceCallGateway,
speakMeetViaVoiceCallGateway,
} from "./voice-call-gateway.js";
@@ -133,6 +139,10 @@ function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean {
);
}
function noteSession(session: GoogleMeetSession, note: string): void {
session.notes = [...session.notes.filter((item) => item !== note), note];
}
function evaluateSpeechReadiness(session: GoogleMeetSession): {
ready: boolean;
reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
@@ -365,20 +375,23 @@ export class GoogleMeetRuntime {
const url = normalizeMeetUrl(request.url);
const transport = resolveTransport(request.transport, this.params.config);
const mode = resolveMode(request.mode, this.params.config);
const reusable = this.list().find(
let reusable = this.list().find(
(session) =>
session.state === "active" &&
isSameMeetUrlForReuse(session.url, url) &&
session.transport === transport &&
session.mode === mode,
);
if (reusable?.transport === "twilio") {
await this.#refreshTwilioVoiceCallStatus(reusable);
if (reusable.state !== "active") {
reusable = undefined;
}
}
const speechInstructions = request.message ?? this.params.config.realtime.introMessage;
if (reusable) {
await this.#refreshBrowserHealthForChromeSession(reusable);
reusable.notes = [
...reusable.notes.filter((note) => note !== "Reused existing active Meet session."),
"Reused existing active Meet session.",
];
noteSession(reusable, "Reused existing active Meet session.");
reusable.updatedAt = nowIso();
const spoken =
isGoogleMeetTalkBackMode(mode) && speechInstructions
@@ -472,10 +485,14 @@ export class GoogleMeetRuntime {
"Twilio transport requires a Meet dial-in phone number. Google Meet URLs do not include dial-in details; pass dialInNumber with optional pin/dtmfSequence, configure twilio.defaultDialInNumber, or use chrome/chrome-node transport.",
);
}
const dtmfSequence = buildMeetDtmfSequence({
const rawDtmfSequence = buildMeetDtmfSequence({
pin: request.pin ?? this.params.config.twilio.defaultPin,
dtmfSequence: request.dtmfSequence ?? this.params.config.twilio.defaultDtmfSequence,
});
const dtmfSequence =
request.dtmfSequence || this.params.config.twilio.defaultDtmfSequence
? rawDtmfSequence
: prefixDtmfWait(rawDtmfSequence, this.params.config.voiceCall.dtmfDelayMs);
const voiceCallResult = this.params.config.voiceCall.enabled
? await joinMeetViaVoiceCallGateway({
config: this.params.config,
@@ -543,7 +560,12 @@ export class GoogleMeetRuntime {
this.#sessionStops.delete(sessionId);
this.#sessionSpeakers.delete(sessionId);
this.#sessionHealth.delete(sessionId);
await stop();
try {
await stop();
} finally {
session.state = "ended";
session.updatedAt = nowIso();
}
}
session.state = "ended";
session.updatedAt = nowIso();
@@ -559,15 +581,23 @@ export class GoogleMeetRuntime {
return { found: false, spoken: false };
}
if (session.transport === "twilio" && session.twilio?.voiceCallId) {
await speakMeetViaVoiceCallGateway({
config: this.params.config,
callId: session.twilio.voiceCallId,
message:
instructions ||
this.params.config.voiceCall.introMessage ||
this.params.config.realtime.introMessage ||
"",
});
try {
await speakMeetViaVoiceCallGateway({
config: this.params.config,
callId: session.twilio.voiceCallId,
message:
instructions ||
this.params.config.voiceCall.introMessage ||
this.params.config.realtime.introMessage ||
"",
});
} catch (err) {
if (!isVoiceCallMissingError(err)) {
throw err;
}
this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
return { found: true, spoken: false, session };
}
session.twilio.introSent = true;
session.updatedAt = nowIso();
return { found: true, spoken: true, session };
@@ -801,6 +831,41 @@ export class GoogleMeetRuntime {
await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true });
return;
}
if (session.transport === "twilio") {
await this.#refreshTwilioVoiceCallStatus(session);
return;
}
this.#refreshSpeechReadiness(session);
}
#markTwilioSessionEnded(session: GoogleMeetSession, reason: string) {
session.state = "ended";
session.updatedAt = nowIso();
this.#sessionStops.delete(session.id);
this.#sessionSpeakers.delete(session.id);
this.#sessionHealth.delete(session.id);
noteSession(session, reason);
}
async #refreshTwilioVoiceCallStatus(session: GoogleMeetSession) {
const callId = session.twilio?.voiceCallId;
if (!callId || session.state !== "active") {
this.#refreshSpeechReadiness(session);
return;
}
try {
const status = await getMeetVoiceCallGatewayCall({
config: this.params.config,
callId,
});
if (status.found === false) {
this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
}
} catch (error) {
this.params.logger.debug?.(
`[google-meet] voice-call status refresh ignored: ${formatErrorMessage(error)}`,
);
}
this.#refreshSpeechReadiness(session);
}

View File

@@ -44,3 +44,14 @@ export function buildMeetDtmfSequence(params: {
}
return compactPin.endsWith("#") ? compactPin : `${compactPin}#`;
}
export function prefixDtmfWait(sequence: string | undefined, delayMs: number): string | undefined {
if (!sequence || delayMs <= 0) {
return sequence;
}
const waitCount = Math.ceil(delayMs / 500);
if (waitCount <= 0) {
return sequence;
}
return `${"w".repeat(waitCount)}${sequence}`;
}

View File

@@ -1,6 +1,10 @@
import { describe, expect, it, vi, beforeEach } from "vitest";
import { resolveGoogleMeetConfig } from "./config.js";
import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
import {
endMeetVoiceCallGatewayCall,
getMeetVoiceCallGatewayCall,
joinMeetViaVoiceCallGateway,
} from "./voice-call-gateway.js";
const gatewayMocks = vi.hoisted(() => ({
request: vi.fn(),
@@ -100,4 +104,38 @@ describe("Google Meet voice-call gateway", () => {
expect.stringContaining("Skipped intro speech because realtime bridge was not ready"),
);
});
it("treats missing delegated calls as already ended", async () => {
gatewayMocks.request.mockRejectedValueOnce(new Error("Call not found"));
const config = resolveGoogleMeetConfig({
voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" },
});
await expect(
endMeetVoiceCallGatewayCall({ config, callId: "call-1" }),
).resolves.toBeUndefined();
expect(gatewayMocks.request).toHaveBeenCalledWith(
"voicecall.end",
{ callId: "call-1" },
{ timeoutMs: 30_000 },
);
});
it("reads delegated call status from the gateway", async () => {
gatewayMocks.request.mockResolvedValueOnce({ found: false });
const config = resolveGoogleMeetConfig({
voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" },
});
await expect(getMeetVoiceCallGatewayCall({ config, callId: "call-1" })).resolves.toEqual({
found: false,
});
expect(gatewayMocks.request).toHaveBeenCalledWith(
"voicecall.status",
{ callId: "call-1" },
{ timeoutMs: 30_000 },
);
});
});

View File

@@ -19,6 +19,11 @@ type VoiceCallSpeakResult = {
error?: string;
};
type VoiceCallStatusResult = {
found?: boolean;
call?: unknown;
};
type VoiceCallMeetJoinResult = {
callId: string;
dtmfSent: boolean;
@@ -77,6 +82,11 @@ async function createConnectedGatewayClient(
return client!;
}
export function isVoiceCallMissingError(error: unknown): boolean {
const message = formatErrorMessage(error).toLowerCase();
return message.includes("call not found") || message.includes("call is not active");
}
export async function joinMeetViaVoiceCallGateway(params: {
config: GoogleMeetConfig;
dialInNumber: string;
@@ -173,13 +183,39 @@ export async function endMeetVoiceCallGatewayCall(params: {
try {
client = await createConnectedGatewayClient(params.config);
await client.request(
"voicecall.end",
try {
await client.request(
"voicecall.end",
{
callId: params.callId,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
} catch (err) {
if (!isVoiceCallMissingError(err)) {
throw err;
}
}
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });
}
}
export async function getMeetVoiceCallGatewayCall(params: {
config: GoogleMeetConfig;
callId: string;
}): Promise<VoiceCallStatusResult> {
let client: VoiceCallGatewayClient | undefined;
try {
client = await createConnectedGatewayClient(params.config);
return (await client.request(
"voicecall.status",
{
callId: params.callId,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
)) as VoiceCallStatusResult;
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });
}