mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 12:00:44 +00:00
fix: sequence meet dtmf before realtime bridge
This commit is contained in:
@@ -118,9 +118,14 @@ const googleMeetConfigSchema = {
|
||||
label: "Voice Call Request Timeout (ms)",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.dtmfDelayMs": { label: "DTMF Delay (ms)", advanced: true },
|
||||
"voiceCall.dtmfDelayMs": {
|
||||
label: "Legacy DTMF Delay (ms)",
|
||||
help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.postDtmfSpeechDelayMs": {
|
||||
label: "Post-DTMF Speech Delay (ms)",
|
||||
label: "Legacy Post-DTMF Speech Delay (ms)",
|
||||
help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },
|
||||
|
||||
@@ -112,7 +112,8 @@
|
||||
"advanced": true
|
||||
},
|
||||
"voiceCall.dtmfDelayMs": {
|
||||
"label": "DTMF Delay (ms)",
|
||||
"label": "Legacy DTMF Delay (ms)",
|
||||
"help": "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
|
||||
"advanced": true
|
||||
},
|
||||
"voiceCall.introMessage": {
|
||||
|
||||
@@ -27,12 +27,11 @@ describe("Google Meet voice-call gateway", () => {
|
||||
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
|
||||
});
|
||||
|
||||
it("starts Twilio Meet calls silently, sends DTMF, then speaks the realtime intro", async () => {
|
||||
it("starts Twilio Meet calls with pre-connect DTMF and intro metadata", async () => {
|
||||
const config = resolveGoogleMeetConfig({
|
||||
voiceCall: {
|
||||
gatewayUrl: "ws://127.0.0.1:18789",
|
||||
dtmfDelayMs: 1,
|
||||
postDtmfSpeechDelayMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "Say exactly: I'm here and listening." },
|
||||
});
|
||||
@@ -50,26 +49,11 @@ describe("Google Meet voice-call gateway", () => {
|
||||
{
|
||||
to: "+15551234567",
|
||||
mode: "conversation",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: "call-1",
|
||||
digits: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: "call-1",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
dtmfSequence: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import {
|
||||
GatewayClient,
|
||||
startGatewayClientWhenEventLoopReady,
|
||||
@@ -84,37 +83,14 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
{
|
||||
to: params.dialInNumber,
|
||||
mode: "conversation",
|
||||
...(params.message ? { message: params.message } : {}),
|
||||
...(params.dtmfSequence ? { dtmfSequence: params.dtmfSequence } : {}),
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallStartResult;
|
||||
if (!start.callId) {
|
||||
throw new Error(start.error || "voicecall.start did not return callId");
|
||||
}
|
||||
if (params.dtmfSequence) {
|
||||
await sleep(params.config.voiceCall.dtmfDelayMs);
|
||||
await client.request(
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: start.callId,
|
||||
digits: params.dtmfSequence,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
);
|
||||
}
|
||||
if (params.message) {
|
||||
await sleep(params.config.voiceCall.postDtmfSpeechDelayMs);
|
||||
const spoken = (await client.request(
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: start.callId,
|
||||
message: params.message,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallSpeakResult;
|
||||
if (spoken.success === false) {
|
||||
throw new Error(spoken.error || "voicecall.speak failed");
|
||||
}
|
||||
}
|
||||
return {
|
||||
callId: start.callId,
|
||||
dtmfSent: Boolean(params.dtmfSequence),
|
||||
|
||||
@@ -325,10 +325,16 @@ describe("voice-call plugin", () => {
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
await handler?.({
|
||||
params: { message: "Hi", mode: "conversation", to: "+15550001234" },
|
||||
params: {
|
||||
dtmfSequence: "ww123456#",
|
||||
message: "Hi",
|
||||
mode: "conversation",
|
||||
to: "+15550001234",
|
||||
},
|
||||
respond,
|
||||
});
|
||||
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledWith("+15550001234", undefined, {
|
||||
dtmfSequence: "ww123456#",
|
||||
message: "Hi",
|
||||
mode: "conversation",
|
||||
});
|
||||
|
||||
@@ -121,6 +121,7 @@ const VoiceCallToolSchema = Type.Union([
|
||||
to: Type.Optional(Type.String({ description: "Call target" })),
|
||||
message: Type.String({ description: "Intro message" }),
|
||||
mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("continue_call"),
|
||||
@@ -150,6 +151,7 @@ const VoiceCallToolSchema = Type.Union([
|
||||
to: Type.Optional(Type.String({ description: "Call target" })),
|
||||
sid: Type.Optional(Type.String({ description: "Call SID" })),
|
||||
message: Type.Optional(Type.String({ description: "Optional intro message" })),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
|
||||
}),
|
||||
]);
|
||||
|
||||
@@ -275,10 +277,12 @@ export default definePluginEntry({
|
||||
to: string;
|
||||
message?: string;
|
||||
mode?: "notify" | "conversation";
|
||||
dtmfSequence?: string;
|
||||
}) => {
|
||||
const result = await params.rt.manager.initiateCall(params.to, undefined, {
|
||||
message: params.message,
|
||||
mode: params.mode,
|
||||
dtmfSequence: params.dtmfSequence,
|
||||
});
|
||||
if (!result.success) {
|
||||
params.respond(false, { error: result.error || "initiate failed" });
|
||||
@@ -470,6 +474,7 @@ export default definePluginEntry({
|
||||
try {
|
||||
const to = normalizeOptionalString(params?.to) ?? "";
|
||||
const message = normalizeOptionalString(params?.message) ?? "";
|
||||
const dtmfSequence = normalizeOptionalString(params?.dtmfSequence);
|
||||
if (!to) {
|
||||
respond(false, { error: "to required" });
|
||||
return;
|
||||
@@ -483,6 +488,7 @@ export default definePluginEntry({
|
||||
to,
|
||||
message: message || undefined,
|
||||
mode,
|
||||
dtmfSequence,
|
||||
});
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
@@ -518,6 +524,7 @@ export default definePluginEntry({
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
message,
|
||||
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
|
||||
mode:
|
||||
rawParams.mode === "notify" || rawParams.mode === "conversation"
|
||||
? rawParams.mode
|
||||
@@ -602,6 +609,7 @@ export default definePluginEntry({
|
||||
throw new Error("to required for call");
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
|
||||
message: normalizeOptionalString(rawParams.message),
|
||||
});
|
||||
if (!result.success) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
const {
|
||||
addTranscriptEntryMock,
|
||||
clearMaxDurationTimerMock,
|
||||
generateDtmfRedirectTwimlMock,
|
||||
generateNotifyTwimlMock,
|
||||
getCallByProviderCallIdMock,
|
||||
mapVoiceToPollyMock,
|
||||
@@ -12,6 +13,7 @@ const {
|
||||
} = vi.hoisted(() => ({
|
||||
addTranscriptEntryMock: vi.fn(),
|
||||
clearMaxDurationTimerMock: vi.fn(),
|
||||
generateDtmfRedirectTwimlMock: vi.fn(),
|
||||
generateNotifyTwimlMock: vi.fn(),
|
||||
getCallByProviderCallIdMock: vi.fn(),
|
||||
mapVoiceToPollyMock: vi.fn(),
|
||||
@@ -45,6 +47,7 @@ vi.mock("../voice-mapping.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("./twiml.js", () => ({
|
||||
generateDtmfRedirectTwiml: generateDtmfRedirectTwimlMock,
|
||||
generateNotifyTwiml: generateNotifyTwimlMock,
|
||||
}));
|
||||
|
||||
@@ -69,6 +72,7 @@ describe("voice-call outbound helpers", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mapVoiceToPollyMock.mockReturnValue("Polly.Joanna");
|
||||
generateDtmfRedirectTwimlMock.mockReturnValue("<DtmfRedirect />");
|
||||
generateNotifyTwimlMock.mockReturnValue("<Response />");
|
||||
});
|
||||
|
||||
@@ -169,6 +173,51 @@ describe("voice-call outbound helpers", () => {
|
||||
expect(persistCallRecordMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("initiates conversation calls with pre-connect DTMF TwiML", async () => {
|
||||
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
|
||||
const ctx = {
|
||||
activeCalls: new Map(),
|
||||
providerCallIdMap: new Map(),
|
||||
provider: { name: "twilio", initiateCall: initiateProviderCall },
|
||||
config: {
|
||||
maxConcurrentCalls: 3,
|
||||
outbound: { defaultMode: "conversation" },
|
||||
fromNumber: "+14155550100",
|
||||
},
|
||||
storePath: "/tmp/voice-call.json",
|
||||
webhookUrl: "https://example.com/webhook",
|
||||
};
|
||||
|
||||
const result = await initiateCall(ctx as never, "+14155550123", "session-1", {
|
||||
mode: "conversation",
|
||||
message: "hello meet",
|
||||
dtmfSequence: "ww123456#",
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
callId: expect.any(String),
|
||||
success: true,
|
||||
});
|
||||
const callId = result.callId;
|
||||
|
||||
expect(generateDtmfRedirectTwimlMock).toHaveBeenCalledWith(
|
||||
"ww123456#",
|
||||
"https://example.com/webhook",
|
||||
);
|
||||
expect(initiateProviderCall).toHaveBeenCalledWith({
|
||||
callId,
|
||||
from: "+14155550100",
|
||||
to: "+14155550123",
|
||||
webhookUrl: "https://example.com/webhook",
|
||||
inlineTwiml: undefined,
|
||||
preConnectTwiml: "<DtmfRedirect />",
|
||||
});
|
||||
expect(ctx.activeCalls.get(callId)?.metadata).toMatchObject({
|
||||
initialMessage: "hello meet",
|
||||
mode: "conversation",
|
||||
});
|
||||
});
|
||||
|
||||
it("fails initiateCall cleanly when provider initiation throws", async () => {
|
||||
const ctx = {
|
||||
activeCalls: new Map(),
|
||||
|
||||
@@ -16,7 +16,7 @@ import { getCallByProviderCallId } from "./lookup.js";
|
||||
import { addTranscriptEntry, transitionState } from "./state.js";
|
||||
import { persistCallRecord } from "./store.js";
|
||||
import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
|
||||
import { generateNotifyTwiml } from "./twiml.js";
|
||||
import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
|
||||
|
||||
type InitiateContext = Pick<
|
||||
CallManagerContext,
|
||||
@@ -118,6 +118,13 @@ export async function initiateCall(
|
||||
typeof options === "string" ? { message: options } : (options ?? {});
|
||||
const initialMessage = opts.message;
|
||||
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
|
||||
const dtmfSequence = opts.dtmfSequence;
|
||||
if (dtmfSequence) {
|
||||
const validationError = validateDtmfDigits(dtmfSequence);
|
||||
if (validationError) {
|
||||
return { callId: "", success: false, error: validationError };
|
||||
}
|
||||
}
|
||||
|
||||
if (!ctx.provider) {
|
||||
return { callId: "", success: false, error: "Provider not initialized" };
|
||||
@@ -164,10 +171,13 @@ export async function initiateCall(
|
||||
try {
|
||||
// For notify mode with a message, use inline TwiML with <Say>.
|
||||
let inlineTwiml: string | undefined;
|
||||
let preConnectTwiml: string | undefined;
|
||||
if (mode === "notify" && initialMessage) {
|
||||
const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
|
||||
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
||||
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
||||
} else if (dtmfSequence) {
|
||||
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
|
||||
}
|
||||
|
||||
const result = await ctx.provider.initiateCall({
|
||||
@@ -176,6 +186,7 @@ export async function initiateCall(
|
||||
to,
|
||||
webhookUrl: ctx.webhookUrl,
|
||||
inlineTwiml,
|
||||
preConnectTwiml,
|
||||
});
|
||||
|
||||
callRecord.providerCallId = result.providerCallId;
|
||||
|
||||
@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
|
||||
<Hangup/>
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Play digits="${escapeXml(digits)}" />
|
||||
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
@@ -99,6 +99,41 @@ describe("TwilioProvider", () => {
|
||||
expectStreamingTwiml(requireResponseBody(result.providerResponseBody));
|
||||
});
|
||||
|
||||
it("serves pre-connect TwiML once before outbound streaming starts", async () => {
|
||||
const provider = createProvider();
|
||||
(
|
||||
provider as unknown as {
|
||||
apiRequest: TwilioApiRequest;
|
||||
}
|
||||
).apiRequest = vi.fn<TwilioApiRequest>(async () => ({
|
||||
sid: "CA999",
|
||||
status: "queued",
|
||||
}));
|
||||
const preConnectTwiml = '<Response><Play digits="ww123456#" /></Response>';
|
||||
|
||||
await provider.initiateCall({
|
||||
callId: "call-1",
|
||||
from: "+15550000001",
|
||||
to: "+15550000002",
|
||||
webhookUrl: "https://example.ngrok.app/voice/twilio",
|
||||
preConnectTwiml,
|
||||
});
|
||||
|
||||
const first = provider.parseWebhookEvent(
|
||||
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
|
||||
callId: "call-1",
|
||||
}),
|
||||
);
|
||||
expect(requireResponseBody(first.providerResponseBody)).toBe(preConnectTwiml);
|
||||
|
||||
const second = provider.parseWebhookEvent(
|
||||
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
|
||||
callId: "call-1",
|
||||
}),
|
||||
);
|
||||
expectStreamingTwiml(requireResponseBody(second.providerResponseBody));
|
||||
});
|
||||
|
||||
it("returns empty TwiML for status callbacks", () => {
|
||||
const provider = createProvider();
|
||||
const ctx = createContext("CallStatus=ringing&Direction=outbound-api", {
|
||||
|
||||
@@ -516,8 +516,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
|
||||
/**
|
||||
* Initiate an outbound call via Twilio API.
|
||||
* If inlineTwiml is provided, uses that directly (for notify mode).
|
||||
* Otherwise, uses webhook URL for dynamic TwiML.
|
||||
* If inlineTwiml or preConnectTwiml is provided, the first webhook request
|
||||
* receives that TwiML before normal dynamic TwiML resumes.
|
||||
*/
|
||||
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
const url = new URL(input.webhookUrl);
|
||||
@@ -533,6 +533,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
if (input.inlineTwiml) {
|
||||
this.twimlStorage.set(input.callId, input.inlineTwiml);
|
||||
this.notifyCalls.add(input.callId);
|
||||
} else if (input.preConnectTwiml) {
|
||||
this.twimlStorage.set(input.callId, input.preConnectTwiml);
|
||||
}
|
||||
|
||||
// Build request params - always use URL-based TwiML.
|
||||
|
||||
@@ -214,6 +214,8 @@ export type InitiateCallInput = {
|
||||
clientState?: Record<string, string>;
|
||||
/** Inline TwiML to execute (skips webhook, used for notify mode) */
|
||||
inlineTwiml?: string;
|
||||
/** TwiML to serve once before normal webhook-driven call handling resumes. */
|
||||
preConnectTwiml?: string;
|
||||
};
|
||||
|
||||
export type InitiateCallResult = {
|
||||
|
||||
Reference in New Issue
Block a user