mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix: sequence meet dtmf before realtime bridge
This commit is contained in:
@@ -13,7 +13,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Google Meet/Voice Call: defer Twilio dial-in intro speech until after Meet DTMF entry and route delayed speech through the active realtime Voice Call bridge. Thanks @donkeykong91 and @PfanP.
|
||||
- Google Meet/Voice Call: play Twilio Meet DTMF before opening the realtime media stream and carry the intro as the initial Voice Call message, so the greeting is generated after Meet admits the phone participant instead of racing a live-call TwiML update. Thanks @donkeykong91 and @PfanP.
|
||||
- Google Meet/Voice Call: make Twilio setup preflight honor explicit `--transport twilio` and fail local/private Voice Call webhook URLs before joins. Thanks @donkeykong91 and @PfanP.
|
||||
- Voice Call/Twilio: retry transient 21220 live-call TwiML updates and catch answered-path initial-greeting failures, so a fast answered callback no longer crashes the Gateway or drops the Twilio greeting/listen transition. (#74606) Thanks @Sivan22.
|
||||
- Voice Call/Twilio: register accepted media streams immediately but wait for realtime transcription readiness before speaking the initial greeting, so reconnect grace handling stays live while OpenAI STT startup is no longer starved by TTS. Fixes #75197. (#75257) Thanks @donkeykong91 and @PfanP.
|
||||
|
||||
@@ -981,7 +981,9 @@ Twilio-only config:
|
||||
```
|
||||
|
||||
`voiceCall.enabled` defaults to `true`; with Twilio transport it delegates the
|
||||
actual PSTN call and DTMF to the Voice Call plugin. If `voice-call` is not
|
||||
actual PSTN call, DTMF, and intro greeting to the Voice Call plugin. Voice Call
|
||||
plays the DTMF sequence before opening the realtime media stream, then uses the
|
||||
saved intro text as the initial realtime greeting. If `voice-call` is not
|
||||
enabled, Google Meet can still validate and record the dial plan, but it cannot
|
||||
place the Twilio call.
|
||||
|
||||
@@ -1411,9 +1413,10 @@ participant:
|
||||
the PIN.
|
||||
- Increase the leading pauses in `--dtmf-sequence` if Meet answers slowly, for
|
||||
example `wwww123456#`.
|
||||
- If the participant joins but you miss the first spoken line, increase
|
||||
`plugins.entries.google-meet.config.voiceCall.postDtmfSpeechDelayMs` so the
|
||||
intro is spoken after Meet finishes admitting the phone participant.
|
||||
- If the participant joins but you do not hear the greeting, check
|
||||
`openclaw voicecall tail` for a Twilio stream start followed by realtime
|
||||
provider readiness. The greeting is now generated from the initial
|
||||
`voicecall.start` message after the stream connects.
|
||||
|
||||
If webhooks do not arrive, debug the Voice Call plugin first: the provider must
|
||||
reach `plugins.entries.voice-call.config.publicUrl` or the configured tunnel.
|
||||
|
||||
@@ -766,10 +766,10 @@ If Voice Call is green but the Meet participant never joins, check the Meet
|
||||
dial-in number, PIN, and `--dtmf-sequence`. The phone call can be healthy while
|
||||
the meeting rejects or ignores an incorrect DTMF sequence.
|
||||
|
||||
Google Meet starts Voice Call silently, sends DTMF, then asks Voice Call to
|
||||
speak the intro after `voiceCall.postDtmfSpeechDelayMs`. Increase that delay in
|
||||
the Google Meet plugin config if the first line is spoken before Meet admits the
|
||||
phone participant.
|
||||
Google Meet passes the Meet DTMF sequence and intro text to `voicecall.start`.
|
||||
For Twilio calls, Voice Call serves the DTMF TwiML first, redirects back to the
|
||||
webhook, then opens the realtime media stream so the saved intro is generated
|
||||
after the phone participant has joined the meeting.
|
||||
|
||||
### Realtime call has no speech
|
||||
|
||||
|
||||
@@ -118,9 +118,14 @@ const googleMeetConfigSchema = {
|
||||
label: "Voice Call Request Timeout (ms)",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.dtmfDelayMs": { label: "DTMF Delay (ms)", advanced: true },
|
||||
"voiceCall.dtmfDelayMs": {
|
||||
label: "Legacy DTMF Delay (ms)",
|
||||
help: "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.postDtmfSpeechDelayMs": {
|
||||
label: "Post-DTMF Speech Delay (ms)",
|
||||
label: "Legacy Post-DTMF Speech Delay (ms)",
|
||||
help: "Compatibility setting from the old delayed-speech flow. Twilio Meet joins now carry the intro as the initial Voice Call message.",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },
|
||||
|
||||
@@ -112,7 +112,8 @@
|
||||
"advanced": true
|
||||
},
|
||||
"voiceCall.dtmfDelayMs": {
|
||||
"label": "DTMF Delay (ms)",
|
||||
"label": "Legacy DTMF Delay (ms)",
|
||||
"help": "Compatibility setting from the old post-connect DTMF flow. Twilio Meet joins now play DTMF before realtime connect.",
|
||||
"advanced": true
|
||||
},
|
||||
"voiceCall.introMessage": {
|
||||
|
||||
@@ -27,12 +27,11 @@ describe("Google Meet voice-call gateway", () => {
|
||||
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
|
||||
});
|
||||
|
||||
it("starts Twilio Meet calls silently, sends DTMF, then speaks the realtime intro", async () => {
|
||||
it("starts Twilio Meet calls with pre-connect DTMF and intro metadata", async () => {
|
||||
const config = resolveGoogleMeetConfig({
|
||||
voiceCall: {
|
||||
gatewayUrl: "ws://127.0.0.1:18789",
|
||||
dtmfDelayMs: 1,
|
||||
postDtmfSpeechDelayMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "Say exactly: I'm here and listening." },
|
||||
});
|
||||
@@ -50,26 +49,11 @@ describe("Google Meet voice-call gateway", () => {
|
||||
{
|
||||
to: "+15551234567",
|
||||
mode: "conversation",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: "call-1",
|
||||
digits: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: "call-1",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
dtmfSequence: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import {
|
||||
GatewayClient,
|
||||
startGatewayClientWhenEventLoopReady,
|
||||
@@ -84,37 +83,14 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
{
|
||||
to: params.dialInNumber,
|
||||
mode: "conversation",
|
||||
...(params.message ? { message: params.message } : {}),
|
||||
...(params.dtmfSequence ? { dtmfSequence: params.dtmfSequence } : {}),
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallStartResult;
|
||||
if (!start.callId) {
|
||||
throw new Error(start.error || "voicecall.start did not return callId");
|
||||
}
|
||||
if (params.dtmfSequence) {
|
||||
await sleep(params.config.voiceCall.dtmfDelayMs);
|
||||
await client.request(
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: start.callId,
|
||||
digits: params.dtmfSequence,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
);
|
||||
}
|
||||
if (params.message) {
|
||||
await sleep(params.config.voiceCall.postDtmfSpeechDelayMs);
|
||||
const spoken = (await client.request(
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: start.callId,
|
||||
message: params.message,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallSpeakResult;
|
||||
if (spoken.success === false) {
|
||||
throw new Error(spoken.error || "voicecall.speak failed");
|
||||
}
|
||||
}
|
||||
return {
|
||||
callId: start.callId,
|
||||
dtmfSent: Boolean(params.dtmfSequence),
|
||||
|
||||
@@ -325,10 +325,16 @@ describe("voice-call plugin", () => {
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
await handler?.({
|
||||
params: { message: "Hi", mode: "conversation", to: "+15550001234" },
|
||||
params: {
|
||||
dtmfSequence: "ww123456#",
|
||||
message: "Hi",
|
||||
mode: "conversation",
|
||||
to: "+15550001234",
|
||||
},
|
||||
respond,
|
||||
});
|
||||
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledWith("+15550001234", undefined, {
|
||||
dtmfSequence: "ww123456#",
|
||||
message: "Hi",
|
||||
mode: "conversation",
|
||||
});
|
||||
|
||||
@@ -121,6 +121,7 @@ const VoiceCallToolSchema = Type.Union([
|
||||
to: Type.Optional(Type.String({ description: "Call target" })),
|
||||
message: Type.String({ description: "Intro message" }),
|
||||
mode: Type.Optional(Type.Union([Type.Literal("notify"), Type.Literal("conversation")])),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("continue_call"),
|
||||
@@ -150,6 +151,7 @@ const VoiceCallToolSchema = Type.Union([
|
||||
to: Type.Optional(Type.String({ description: "Call target" })),
|
||||
sid: Type.Optional(Type.String({ description: "Call SID" })),
|
||||
message: Type.Optional(Type.String({ description: "Optional intro message" })),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "DTMF digits to play before connect" })),
|
||||
}),
|
||||
]);
|
||||
|
||||
@@ -275,10 +277,12 @@ export default definePluginEntry({
|
||||
to: string;
|
||||
message?: string;
|
||||
mode?: "notify" | "conversation";
|
||||
dtmfSequence?: string;
|
||||
}) => {
|
||||
const result = await params.rt.manager.initiateCall(params.to, undefined, {
|
||||
message: params.message,
|
||||
mode: params.mode,
|
||||
dtmfSequence: params.dtmfSequence,
|
||||
});
|
||||
if (!result.success) {
|
||||
params.respond(false, { error: result.error || "initiate failed" });
|
||||
@@ -470,6 +474,7 @@ export default definePluginEntry({
|
||||
try {
|
||||
const to = normalizeOptionalString(params?.to) ?? "";
|
||||
const message = normalizeOptionalString(params?.message) ?? "";
|
||||
const dtmfSequence = normalizeOptionalString(params?.dtmfSequence);
|
||||
if (!to) {
|
||||
respond(false, { error: "to required" });
|
||||
return;
|
||||
@@ -483,6 +488,7 @@ export default definePluginEntry({
|
||||
to,
|
||||
message: message || undefined,
|
||||
mode,
|
||||
dtmfSequence,
|
||||
});
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
@@ -518,6 +524,7 @@ export default definePluginEntry({
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
message,
|
||||
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
|
||||
mode:
|
||||
rawParams.mode === "notify" || rawParams.mode === "conversation"
|
||||
? rawParams.mode
|
||||
@@ -602,6 +609,7 @@ export default definePluginEntry({
|
||||
throw new Error("to required for call");
|
||||
}
|
||||
const result = await rt.manager.initiateCall(to, undefined, {
|
||||
dtmfSequence: normalizeOptionalString(rawParams.dtmfSequence),
|
||||
message: normalizeOptionalString(rawParams.message),
|
||||
});
|
||||
if (!result.success) {
|
||||
|
||||
@@ -3,6 +3,7 @@ import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
const {
|
||||
addTranscriptEntryMock,
|
||||
clearMaxDurationTimerMock,
|
||||
generateDtmfRedirectTwimlMock,
|
||||
generateNotifyTwimlMock,
|
||||
getCallByProviderCallIdMock,
|
||||
mapVoiceToPollyMock,
|
||||
@@ -12,6 +13,7 @@ const {
|
||||
} = vi.hoisted(() => ({
|
||||
addTranscriptEntryMock: vi.fn(),
|
||||
clearMaxDurationTimerMock: vi.fn(),
|
||||
generateDtmfRedirectTwimlMock: vi.fn(),
|
||||
generateNotifyTwimlMock: vi.fn(),
|
||||
getCallByProviderCallIdMock: vi.fn(),
|
||||
mapVoiceToPollyMock: vi.fn(),
|
||||
@@ -45,6 +47,7 @@ vi.mock("../voice-mapping.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("./twiml.js", () => ({
|
||||
generateDtmfRedirectTwiml: generateDtmfRedirectTwimlMock,
|
||||
generateNotifyTwiml: generateNotifyTwimlMock,
|
||||
}));
|
||||
|
||||
@@ -69,6 +72,7 @@ describe("voice-call outbound helpers", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mapVoiceToPollyMock.mockReturnValue("Polly.Joanna");
|
||||
generateDtmfRedirectTwimlMock.mockReturnValue("<DtmfRedirect />");
|
||||
generateNotifyTwimlMock.mockReturnValue("<Response />");
|
||||
});
|
||||
|
||||
@@ -169,6 +173,51 @@ describe("voice-call outbound helpers", () => {
|
||||
expect(persistCallRecordMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("initiates conversation calls with pre-connect DTMF TwiML", async () => {
|
||||
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
|
||||
const ctx = {
|
||||
activeCalls: new Map(),
|
||||
providerCallIdMap: new Map(),
|
||||
provider: { name: "twilio", initiateCall: initiateProviderCall },
|
||||
config: {
|
||||
maxConcurrentCalls: 3,
|
||||
outbound: { defaultMode: "conversation" },
|
||||
fromNumber: "+14155550100",
|
||||
},
|
||||
storePath: "/tmp/voice-call.json",
|
||||
webhookUrl: "https://example.com/webhook",
|
||||
};
|
||||
|
||||
const result = await initiateCall(ctx as never, "+14155550123", "session-1", {
|
||||
mode: "conversation",
|
||||
message: "hello meet",
|
||||
dtmfSequence: "ww123456#",
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
callId: expect.any(String),
|
||||
success: true,
|
||||
});
|
||||
const callId = result.callId;
|
||||
|
||||
expect(generateDtmfRedirectTwimlMock).toHaveBeenCalledWith(
|
||||
"ww123456#",
|
||||
"https://example.com/webhook",
|
||||
);
|
||||
expect(initiateProviderCall).toHaveBeenCalledWith({
|
||||
callId,
|
||||
from: "+14155550100",
|
||||
to: "+14155550123",
|
||||
webhookUrl: "https://example.com/webhook",
|
||||
inlineTwiml: undefined,
|
||||
preConnectTwiml: "<DtmfRedirect />",
|
||||
});
|
||||
expect(ctx.activeCalls.get(callId)?.metadata).toMatchObject({
|
||||
initialMessage: "hello meet",
|
||||
mode: "conversation",
|
||||
});
|
||||
});
|
||||
|
||||
it("fails initiateCall cleanly when provider initiation throws", async () => {
|
||||
const ctx = {
|
||||
activeCalls: new Map(),
|
||||
|
||||
@@ -16,7 +16,7 @@ import { getCallByProviderCallId } from "./lookup.js";
|
||||
import { addTranscriptEntry, transitionState } from "./state.js";
|
||||
import { persistCallRecord } from "./store.js";
|
||||
import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
|
||||
import { generateNotifyTwiml } from "./twiml.js";
|
||||
import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
|
||||
|
||||
type InitiateContext = Pick<
|
||||
CallManagerContext,
|
||||
@@ -118,6 +118,13 @@ export async function initiateCall(
|
||||
typeof options === "string" ? { message: options } : (options ?? {});
|
||||
const initialMessage = opts.message;
|
||||
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
|
||||
const dtmfSequence = opts.dtmfSequence;
|
||||
if (dtmfSequence) {
|
||||
const validationError = validateDtmfDigits(dtmfSequence);
|
||||
if (validationError) {
|
||||
return { callId: "", success: false, error: validationError };
|
||||
}
|
||||
}
|
||||
|
||||
if (!ctx.provider) {
|
||||
return { callId: "", success: false, error: "Provider not initialized" };
|
||||
@@ -164,10 +171,13 @@ export async function initiateCall(
|
||||
try {
|
||||
// For notify mode with a message, use inline TwiML with <Say>.
|
||||
let inlineTwiml: string | undefined;
|
||||
let preConnectTwiml: string | undefined;
|
||||
if (mode === "notify" && initialMessage) {
|
||||
const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
|
||||
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
||||
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
||||
} else if (dtmfSequence) {
|
||||
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
|
||||
}
|
||||
|
||||
const result = await ctx.provider.initiateCall({
|
||||
@@ -176,6 +186,7 @@ export async function initiateCall(
|
||||
to,
|
||||
webhookUrl: ctx.webhookUrl,
|
||||
inlineTwiml,
|
||||
preConnectTwiml,
|
||||
});
|
||||
|
||||
callRecord.providerCallId = result.providerCallId;
|
||||
|
||||
@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
|
||||
<Hangup/>
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Play digits="${escapeXml(digits)}" />
|
||||
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
||||
</Response>`;
|
||||
}
|
||||
|
||||
@@ -99,6 +99,41 @@ describe("TwilioProvider", () => {
|
||||
expectStreamingTwiml(requireResponseBody(result.providerResponseBody));
|
||||
});
|
||||
|
||||
it("serves pre-connect TwiML once before outbound streaming starts", async () => {
|
||||
const provider = createProvider();
|
||||
(
|
||||
provider as unknown as {
|
||||
apiRequest: TwilioApiRequest;
|
||||
}
|
||||
).apiRequest = vi.fn<TwilioApiRequest>(async () => ({
|
||||
sid: "CA999",
|
||||
status: "queued",
|
||||
}));
|
||||
const preConnectTwiml = '<Response><Play digits="ww123456#" /></Response>';
|
||||
|
||||
await provider.initiateCall({
|
||||
callId: "call-1",
|
||||
from: "+15550000001",
|
||||
to: "+15550000002",
|
||||
webhookUrl: "https://example.ngrok.app/voice/twilio",
|
||||
preConnectTwiml,
|
||||
});
|
||||
|
||||
const first = provider.parseWebhookEvent(
|
||||
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
|
||||
callId: "call-1",
|
||||
}),
|
||||
);
|
||||
expect(requireResponseBody(first.providerResponseBody)).toBe(preConnectTwiml);
|
||||
|
||||
const second = provider.parseWebhookEvent(
|
||||
createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA999", {
|
||||
callId: "call-1",
|
||||
}),
|
||||
);
|
||||
expectStreamingTwiml(requireResponseBody(second.providerResponseBody));
|
||||
});
|
||||
|
||||
it("returns empty TwiML for status callbacks", () => {
|
||||
const provider = createProvider();
|
||||
const ctx = createContext("CallStatus=ringing&Direction=outbound-api", {
|
||||
|
||||
@@ -516,8 +516,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
|
||||
/**
|
||||
* Initiate an outbound call via Twilio API.
|
||||
* If inlineTwiml is provided, uses that directly (for notify mode).
|
||||
* Otherwise, uses webhook URL for dynamic TwiML.
|
||||
* If inlineTwiml or preConnectTwiml is provided, the first webhook request
|
||||
* receives that TwiML before normal dynamic TwiML resumes.
|
||||
*/
|
||||
async initiateCall(input: InitiateCallInput): Promise<InitiateCallResult> {
|
||||
const url = new URL(input.webhookUrl);
|
||||
@@ -533,6 +533,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
||||
if (input.inlineTwiml) {
|
||||
this.twimlStorage.set(input.callId, input.inlineTwiml);
|
||||
this.notifyCalls.add(input.callId);
|
||||
} else if (input.preConnectTwiml) {
|
||||
this.twimlStorage.set(input.callId, input.preConnectTwiml);
|
||||
}
|
||||
|
||||
// Build request params - always use URL-based TwiML.
|
||||
|
||||
@@ -214,6 +214,8 @@ export type InitiateCallInput = {
|
||||
clientState?: Record<string, string>;
|
||||
/** Inline TwiML to execute (skips webhook, used for notify mode) */
|
||||
inlineTwiml?: string;
|
||||
/** TwiML to serve once before normal webhook-driven call handling resumes. */
|
||||
preConnectTwiml?: string;
|
||||
};
|
||||
|
||||
export type InitiateCallResult = {
|
||||
|
||||
Reference in New Issue
Block a user