fix: sequence meet dtmf before realtime bridge

This commit is contained in:
Peter Steinberger
2026-05-01 07:04:53 +01:00
parent 42d73fd955
commit ae07d57f9d
15 changed files with 151 additions and 61 deletions

View File

@@ -118,9 +118,14 @@ const googleMeetConfigSchema = {
label: "Voice Call Request Timeout (ms)",
advanced: true,
},
"voiceCall.dtmfDelayMs": { label: "DTMF Delay (ms)", advanced: true },
"voiceCall.dtmfDelayMs": {
label: "Legacy DTMF Delay (ms)",
help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
advanced: true,
},
"voiceCall.postDtmfSpeechDelayMs": {
label: "Post-DTMF Speech Delay (ms)",
label: "Legacy Post-DTMF Speech Delay (ms)",
help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.",
advanced: true,
},
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },

View File

@@ -112,7 +112,8 @@
"advanced": true
},
"voiceCall.dtmfDelayMs": {
"label": "DTMF Delay (ms)",
"label": "Legacy DTMF Delay (ms)",
"help": "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
"advanced": true
},
"voiceCall.introMessage": {

View File

@@ -27,12 +27,11 @@ describe("Google Meet voice-call gateway", () => {
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
});
it("starts Twilio Meet calls silently, sends DTMF, then speaks the realtime intro", async () => {
it("starts Twilio Meet calls with pre-connect DTMF and intro metadata", async () => {
const config = resolveGoogleMeetConfig({
voiceCall: {
gatewayUrl: "ws://127.0.0.1:18789",
dtmfDelayMs: 1,
postDtmfSpeechDelayMs: 1,
},
realtime: { introMessage: "Say exactly: I'm here and listening." },
});
@@ -50,26 +49,11 @@ describe("Google Meet voice-call gateway", () => {
{
to: "+15551234567",
mode: "conversation",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
2,
"voicecall.dtmf",
{
callId: "call-1",
digits: "123456#",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
3,
"voicecall.speak",
{
callId: "call-1",
message: "Say exactly: I'm here and listening.",
dtmfSequence: "123456#",
},
{ timeoutMs: 30_000 },
);
expect(gatewayMocks.request).toHaveBeenCalledTimes(1);
});
});

View File

@@ -1,4 +1,3 @@
import { setTimeout as sleep } from "node:timers/promises";
import {
GatewayClient,
startGatewayClientWhenEventLoopReady,
@@ -84,37 +83,14 @@ export async function joinMeetViaVoiceCallGateway(params: {
{
to: params.dialInNumber,
mode: "conversation",
...(params.message ? { message: params.message } : {}),
...(params.dtmfSequence ? { dtmfSequence: params.dtmfSequence } : {}),
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
)) as VoiceCallStartResult;
if (!start.callId) {
throw new Error(start.error || "voicecall.start did not return callId");
}
if (params.dtmfSequence) {
await sleep(params.config.voiceCall.dtmfDelayMs);
await client.request(
"voicecall.dtmf",
{
callId: start.callId,
digits: params.dtmfSequence,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
}
if (params.message) {
await sleep(params.config.voiceCall.postDtmfSpeechDelayMs);
const spoken = (await client.request(
"voicecall.speak",
{
callId: start.callId,
message: params.message,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
)) as VoiceCallSpeakResult;
if (spoken.success === false) {
throw new Error(spoken.error || "voicecall.speak failed");
}
}
return {
callId: start.callId,
dtmfSent: Boolean(params.dtmfSequence),

View File

@@ -325,10 +325,16 @@ describe("voice-call plugin", () => {
| undefined;
const respond = vi.fn();
await handler?.({
params: { message: "Hi", mode: "conversation", to: "+15550001234" },
params: {
dtmfSequence: "ww123456#",
message: "Hi",
mode: "conversation",
to: "+15550001234",
},
respond,
});
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledWith("+15550001234", undefined, {
dtmfSequence: "ww123456#",
message: "Hi",
mode: "conversation",
});

View File

@@ -121,6 +121,7 @@ const VoiceCallToolSchema = Type.Union([
to: Type.Optional(Type.String({ description: "Call target" })),
message: Type.String({ description: "Intro message" }),
mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
}),
Type.Object({
action: Type.Literal("continue_call"),
@@ -150,6 +151,7 @@ const VoiceCallToolSchema = Type.Union([
to: Type.Optional(Type.String({ description: "Call target" })),
sid: Type.Optional(Type.String({ description: "Call SID" })),
message: Type.Optional(Type.String({ description: "Optional intro message" })),
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
}),
]);
@@ -275,10 +277,12 @@ export default definePluginEntry({
to: string;
message?: string;
mode?: "notify" | "conversation";
dtmfSequence?: string;
}) => {
const result = await params.rt.manager.initiateCall(params.to, undefined, {
message: params.message,
mode: params.mode,
dtmfSequence: params.dtmfSequence,
});
if (!result.success) {
params.respond(false, { error: result.error || "initiate failed" });
@@ -470,6 +474,7 @@ export default definePluginEntry({
try {
const to = normalizeOptionalString(params?.to) ?? "";
const message = normalizeOptionalString(params?.message) ?? "";
const dtmfSequence = normalizeOptionalString(params?.dtmfSequence);
if (!to) {
respond(false, { error: "to required" });
return;
@@ -483,6 +488,7 @@ export default definePluginEntry({
to,
message: message || undefined,
mode,
dtmfSequence,
});
} catch (err) {
sendError(respond, err);
@@ -518,6 +524,7 @@ export default definePluginEntry({
}
const result = await rt.manager.initiateCall(to, undefined, {
message,
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
mode:
rawParams.mode === "notify" || rawParams.mode === "conversation"
? rawParams.mode
@@ -602,6 +609,7 @@ export default definePluginEntry({
throw new Error("to required for call");
}
const result = await rt.manager.initiateCall(to, undefined, {
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
message: normalizeOptionalString(rawParams.message),
});
if (!result.success) {

View File

@@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
const {
addTranscriptEntryMock,
clearMaxDurationTimerMock,
generateDtmfRedirectTwimlMock,
generateNotifyTwimlMock,
getCallByProviderCallIdMock,
mapVoiceToPollyMock,
@@ -12,6 +13,7 @@ const {
} = vi.hoisted(() => ({
addTranscriptEntryMock: vi.fn(),
clearMaxDurationTimerMock: vi.fn(),
generateDtmfRedirectTwimlMock: vi.fn(),
generateNotifyTwimlMock: vi.fn(),
getCallByProviderCallIdMock: vi.fn(),
mapVoiceToPollyMock: vi.fn(),
@@ -45,6 +47,7 @@ vi.mock("../voice-mapping.js", () => ({
}));
vi.mock("./twiml.js", () => ({
generateDtmfRedirectTwiml: generateDtmfRedirectTwimlMock,
generateNotifyTwiml: generateNotifyTwimlMock,
}));
@@ -69,6 +72,7 @@ describe("voice-call outbound helpers", () => {
beforeEach(() => {
vi.clearAllMocks();
mapVoiceToPollyMock.mockReturnValue("Polly.Joanna");
generateDtmfRedirectTwimlMock.mockReturnValue("<DtmfRedirect />");
generateNotifyTwimlMock.mockReturnValue("<Response />");
});
@@ -169,6 +173,51 @@ describe("voice-call outbound helpers", () => {
expect(persistCallRecordMock).toHaveBeenCalledTimes(2);
});
it("initiates conversation calls with pre-connect DTMF TwiML", async () => {
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
const ctx = {
activeCalls: new Map(),
providerCallIdMap: new Map(),
provider: { name: "twilio", initiateCall: initiateProviderCall },
config: {
maxConcurrentCalls: 3,
outbound: { defaultMode: "conversation" },
fromNumber: "+14155550100",
},
storePath: "/tmp/voice-call.json",
webhookUrl: "https://example.com/webhook",
};
const result = await initiateCall(ctx as never, "+14155550123", "session-1", {
mode: "conversation",
message: "hello meet",
dtmfSequence: "ww123456#",
});
expect(result).toEqual({
callId: expect.any(String),
success: true,
});
const callId = result.callId;
expect(generateDtmfRedirectTwimlMock).toHaveBeenCalledWith(
"ww123456#",
"https://example.com/webhook",
);
expect(initiateProviderCall).toHaveBeenCalledWith({
callId,
from: "+14155550100",
to: "+14155550123",
webhookUrl: "https://example.com/webhook",
inlineTwiml: undefined,
preConnectTwiml: "<DtmfRedirect />",
});
expect(ctx.activeCalls.get(callId)?.metadata).toMatchObject({
initialMessage: "hello meet",
mode: "conversation",
});
});
it("fails initiateCall cleanly when provider initiation throws", async () => {
const ctx = {
activeCalls: new Map(),

View File

@@ -16,7 +16,7 @@ import { getCallByProviderCallId } from "./lookup.js";
import { addTranscriptEntry, transitionState } from "./state.js";
import { persistCallRecord } from "./store.js";
import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
import { generateNotifyTwiml } from "./twiml.js";
import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
type InitiateContext = Pick<
CallManagerContext,
@@ -118,6 +118,13 @@ export async function initiateCall(
typeof options === "string" ? { message: options } : (options ?? {});
const initialMessage = opts.message;
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
const dtmfSequence = opts.dtmfSequence;
if (dtmfSequence) {
const validationError = validateDtmfDigits(dtmfSequence);
if (validationError) {
return { callId: "", success: false, error: validationError };
}
}
if (!ctx.provider) {
return { callId: "", success: false, error: "Provider not initialized" };
@@ -164,10 +171,13 @@ export async function initiateCall(
try {
// For notify mode with a message, use inline TwiML with <Say>.
let inlineTwiml: string | undefined;
let preConnectTwiml: string | undefined;
if (mode === "notify" && initialMessage) {
const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
} else if (dtmfSequence) {
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
}
const result = await ctx.provider.initiateCall({
@@ -176,6 +186,7 @@ export async function initiateCall(
to,
webhookUrl: ctx.webhookUrl,
inlineTwiml,
preConnectTwiml,
});
callRecord.providerCallId = result.providerCallId;

View File

@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
<Hangup/>
</Response>`;
}
export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
return `<?xml version="1.0" encoding="UTF-8"?>
<Response>
<Play digits="${escapeXml(digits)}" />
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
</Response>`;
}

View File

@@ -99,6 +99,41 @@ describe("TwilioProvider", () => {
expectStreamingTwiml(requireResponseBody(result.providerResponseBody));
});
it("serves pre-connect TwiML once before outbound streaming starts", async () => {
const provider = createProvider();
(
provider as unknown as {
apiRequest: TwilioApiRequest;
}
).apiRequest = vi.fn<TwilioApiRequest>(async () => ({
sid: "CA999",
status: "queued",
}));
const preConnectTwiml = '<Response><Play digits="ww123456#" /></Response>';
await provider.initiateCall({
callId: "call-1",
from: "+15550000001",
to: "+15550000002",
webhookUrl: "https://example.ngrok.app/voice/twilio",
preConnectTwiml,
});
const first = provider.parseWebhookEvent(
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
callId: "call-1",
}),
);
expect(requireResponseBody(first.providerResponseBody)).toBe(preConnectTwiml);
const second = provider.parseWebhookEvent(
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
callId: "call-1",
}),
);
expectStreamingTwiml(requireResponseBody(second.providerResponseBody));
});
it("returns empty TwiML for status callbacks", () => {
const provider = createProvider();
const ctx = createContext("CallStatus=ringing&Direction=outbound-api", {

View File

@@ -516,8 +516,8 @@ export class TwilioProvider implements VoiceCallProvider {
/**
* Initiate an outbound call via Twilio API.
* If inlineTwiml is provided, uses that directly (for notify mode).
* Otherwise, uses webhook URL for dynamic TwiML.
* If inlineTwiml or preConnectTwiml is provided, the first webhook request
* receives that TwiML before normal dynamic TwiML resumes.
*/
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
const url = new URL(input.webhookUrl);
@@ -533,6 +533,8 @@ export class TwilioProvider implements VoiceCallProvider {
if (input.inlineTwiml) {
this.twimlStorage.set(input.callId, input.inlineTwiml);
this.notifyCalls.add(input.callId);
} else if (input.preConnectTwiml) {
this.twimlStorage.set(input.callId, input.preConnectTwiml);
}
// Build request params - always use URL-based TwiML.

View File

@@ -214,6 +214,8 @@ export type InitiateCallInput = {
clientState?: Record<string, string>;
/** Inline TwiML to execute (skips webhook, used for notify mode) */
inlineTwiml?: string;
/** TwiML to serve once before normal webhook-driven call handling resumes. */
preConnectTwiml?: string;
};
export type InitiateCallResult = {