fix: honor meet preconnect twiml

This commit is contained in:
Peter Steinberger
2026-05-01 07:17:00 +01:00
parent d23c8a8eba
commit ec1b96cdfa
8 changed files with 151 additions and 16 deletions

View File

@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Voice Call/Twilio: honor stored pre-connect TwiML before realtime webhook shortcuts and reject DTMF sequences outside conversation mode, so Meet PIN entry cannot be skipped or silently dropped. Thanks @donkeykong91 and @PfanP.
- Google Meet/Voice Call: play Twilio Meet DTMF before opening the realtime media stream and carry the intro as the initial Voice Call message, so the greeting is generated after Meet admits the phone participant instead of racing a live-call TwiML update. Thanks @donkeykong91 and @PfanP.
- Google Meet/Voice Call: make Twilio setup preflight honor explicit `--transport twilio` and fail local/private Voice Call webhook URLs before joins. Thanks @donkeykong91 and @PfanP.
- Voice Call/Twilio: retry transient 21220 live-call TwiML updates and catch answered-path initial-greeting failures, so a fast answered callback no longer crashes the Gateway or drops the Twilio greeting/listen transition. (#74606) Thanks @Sivan22.

View File

@@ -624,27 +624,31 @@ for turn latency and listen-wait times.
Tool name: `voice_call`.
| Action | Args |
| --------------- | ------------------------- |
| `initiate_call` | `message`, `to?`, `mode?` |
| `continue_call` | `callId`, `message` |
| `speak_to_user` | `callId`, `message` |
| `send_dtmf` | `callId`, `digits` |
| `end_call` | `callId` |
| `get_status` | `callId` |
| Action | Args |
| --------------- | ------------------------------------------ |
| `initiate_call` | `message`, `to?`, `mode?`, `dtmfSequence?` |
| `continue_call` | `callId`, `message` |
| `speak_to_user` | `callId`, `message` |
| `send_dtmf` | `callId`, `digits` |
| `end_call` | `callId` |
| `get_status` | `callId` |
This repo ships a matching skill doc at `skills/voice-call/SKILL.md`.
## Gateway RPC
| Method | Args |
| -------------------- | ------------------------- |
| `voicecall.initiate` | `to?`, `message`, `mode?` |
| `voicecall.continue` | `callId`, `message` |
| `voicecall.speak` | `callId`, `message` |
| `voicecall.dtmf` | `callId`, `digits` |
| `voicecall.end` | `callId` |
| `voicecall.status` | `callId` |
| Method | Args |
| -------------------- | ------------------------------------------ |
| `voicecall.initiate` | `to?`, `message`, `mode?`, `dtmfSequence?` |
| `voicecall.continue` | `callId`, `message` |
| `voicecall.speak` | `callId`, `message` |
| `voicecall.dtmf` | `callId`, `digits` |
| `voicecall.end` | `callId` |
| `voicecall.status` | `callId` |
`dtmfSequence` is only valid with `mode: "conversation"`. Notify-mode calls
should use `voicecall.dtmf` after the call exists if they need post-connect
digits.
## Troubleshooting

View File

@@ -218,6 +218,36 @@ describe("voice-call outbound helpers", () => {
});
});
it("rejects DTMF sequences outside conversation mode", async () => {
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
const ctx = {
activeCalls: new Map(),
providerCallIdMap: new Map(),
provider: { name: "twilio", initiateCall: initiateProviderCall },
config: {
maxConcurrentCalls: 3,
outbound: { defaultMode: "notify" },
fromNumber: "+14155550100",
},
storePath: "/tmp/voice-call.json",
webhookUrl: "https://example.com/webhook",
};
await expect(
initiateCall(ctx as never, "+14155550123", "session-1", {
message: "hello",
dtmfSequence: "123456#",
}),
).resolves.toEqual({
callId: "",
success: false,
error: "dtmfSequence requires conversation mode",
});
expect(initiateProviderCall).not.toHaveBeenCalled();
expect(ctx.activeCalls.size).toBe(0);
});
it("fails initiateCall cleanly when provider initiation throws", async () => {
const ctx = {
activeCalls: new Map(),

View File

@@ -124,6 +124,13 @@ export async function initiateCall(
if (validationError) {
return { callId: "", success: false, error: validationError };
}
if (mode !== "conversation") {
return {
callId: "",
success: false,
error: "dtmfSequence requires conversation mode",
};
}
}
if (!ctx.provider) {

View File

@@ -43,6 +43,12 @@ export interface VoiceCallProvider {
*/
parseWebhookEvent(ctx: WebhookContext, options?: WebhookParseOptions): ProviderWebhookParseResult;
/**
* Consume one-time TwiML that must be served before shortcut handlers such as
* realtime media streams take over the webhook response.
*/
consumeInitialTwiML?: (ctx: WebhookContext) => string | null;
/**
* Initiate an outbound call.
* @returns Provider call ID and status

View File

@@ -443,6 +443,19 @@ export class TwilioProvider implements VoiceCallProvider {
}
}
consumeInitialTwiML(ctx: WebhookContext): string | null {
const view = readTwimlRequestView(ctx);
if (!view.callIdFromQuery || view.isStatusCallback) {
return null;
}
const storedTwiml = this.twimlStorage.get(view.callIdFromQuery);
if (!storedTwiml) {
return null;
}
this.deleteStoredTwiml(view.callIdFromQuery);
return storedTwiml;
}
/**
* Get the WebSocket URL for media streaming.
* Derives from the public URL origin + stream path.

View File

@@ -679,6 +679,71 @@ describe("VoiceCallWebhookServer replay handling", () => {
},
);
it("serves initial provider TwiML before the realtime shortcut", async () => {
const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
const consumeInitialTwiML = vi.fn(
() =>
'<Response><Play digits="ww123456#" /><Redirect method="POST">https://example.test</Redirect></Response>',
);
const buildTwiMLPayload = vi.fn(() => ({
statusCode: 200,
headers: { "Content-Type": "text/xml" },
body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
}));
const twilioProvider: VoiceCallProvider = {
...provider,
name: "twilio",
verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-stored" }),
parseWebhookEvent,
consumeInitialTwiML,
};
const { manager, processEvent } = createManager([]);
const config = createConfig({
provider: "twilio",
inboundPolicy: "disabled",
realtime: {
enabled: true,
streamPath: "/voice/stream/realtime",
instructions: "Be helpful.",
toolPolicy: "safe-read-only",
tools: [],
providers: {},
},
});
const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
server.setRealtimeHandler({
buildTwiMLPayload,
getStreamPathPattern: () => "/voice/stream/realtime",
handleWebSocketUpgrade: () => {},
registerToolHandler: () => {},
setPublicUrl: () => {},
} as unknown as RealtimeCallHandler);
try {
const baseUrl = await server.start();
const requestUrl = requireBoundRequestUrl(server, baseUrl);
requestUrl.searchParams.set("callId", "call-1");
const response = await fetch(requestUrl.toString(), {
method: "POST",
headers: {
"content-type": "application/x-www-form-urlencoded",
"x-twilio-signature": "sig",
},
body: "CallSid=CA123&Direction=outbound-api&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222",
});
expect(response.status).toBe(200);
const body = await response.text();
expect(body).toContain('<Play digits="ww123456#"');
expect(consumeInitialTwiML).toHaveBeenCalledTimes(1);
expect(buildTwiMLPayload).not.toHaveBeenCalled();
expect(parseWebhookEvent).not.toHaveBeenCalled();
expect(processEvent).not.toHaveBeenCalled();
} finally {
await server.stop();
}
});
it("rejects non-allowlisted inbound realtime calls before creating a stream token", async () => {
const buildTwiMLPayload = vi.fn(() => ({
statusCode: 200,

View File

@@ -672,6 +672,15 @@ export class VoiceCallWebhookServer {
return { statusCode: 401, body: "Unauthorized" };
}
const initialTwiML = this.provider.consumeInitialTwiML?.(ctx);
if (initialTwiML !== undefined && initialTwiML !== null) {
return {
statusCode: 200,
headers: { "Content-Type": "application/xml" },
body: initialTwiML,
};
}
const realtimeParams = this.getRealtimeTwimlParams(ctx);
if (realtimeParams) {
const direction = realtimeParams.get("Direction");