mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:50:43 +00:00
fix(voice-call): keep outbound realtime streams attached (#71266)
Fixes outbound Twilio realtime conversations so the TwiML fetch returns the realtime <Connect><Stream> path for outbound directions and the answered-call path does not overwrite it with legacy <Say> TwiML. Local proof: - pnpm test extensions/voice-call/src/manager.notify.test.ts extensions/voice-call/src/webhook.test.ts - pnpm check:changed - pnpm check - pnpm build - local VoiceCallWebhookServer + CallManager smoke for Direction=outbound-api Closes #68713.
This commit is contained in:
committed by
GitHub
parent
5b8bd6371c
commit
8a9d02dd82
@@ -548,6 +548,7 @@ For outbound `conversation` calls, first-message handling is tied to live playba
|
||||
- Barge-in queue clear and auto-response are suppressed only while the initial greeting is actively speaking.
|
||||
- If initial playback fails, the call returns to `listening` and the initial message remains queued for retry.
|
||||
- Initial playback for Twilio streaming starts on stream connect without extra delay.
|
||||
- Realtime voice conversations use the realtime stream's own opening turn. Voice Call does not post a legacy `<Say>` TwiML update for that initial message, so outbound `<Connect><Stream>` sessions stay attached.
|
||||
|
||||
### Twilio stream disconnect grace
|
||||
|
||||
|
||||
@@ -177,6 +177,38 @@ describe("CallManager notify and mapping", () => {
|
||||
expectFirstPlayTtsText(provider, "Twilio non-stream");
|
||||
});
|
||||
|
||||
it("lets realtime conversations own the initial greeting instead of posting legacy TwiML", async () => {
|
||||
const { manager, provider } = await createManagerHarness(
|
||||
{ realtime: { enabled: true, provider: "openai" } },
|
||||
new FakeProvider("twilio"),
|
||||
);
|
||||
|
||||
const callId = await initiateCallWithMessage(
|
||||
manager,
|
||||
"+15550000010",
|
||||
"Tell Nana dinner is at 6pm.",
|
||||
"conversation",
|
||||
);
|
||||
await answerCall(manager, callId, "evt-conversation-twilio-realtime");
|
||||
|
||||
expect(provider.playTtsCalls).toHaveLength(0);
|
||||
expect(requireCall(manager, callId).metadata).toEqual(
|
||||
expect.objectContaining({ initialMessage: "Tell Nana dinner is at 6pm." }),
|
||||
);
|
||||
});
|
||||
|
||||
it("still speaks initial message in notify mode when realtime is enabled", async () => {
|
||||
const { manager, provider } = await createManagerHarness(
|
||||
{ realtime: { enabled: true, provider: "openai" } },
|
||||
new FakeProvider("twilio"),
|
||||
);
|
||||
|
||||
const callId = await initiateCallWithMessage(manager, "+15550000011", "Notify text", "notify");
|
||||
await answerCall(manager, callId, "evt-notify-twilio-realtime");
|
||||
|
||||
expectFirstPlayTtsText(provider, "Notify text");
|
||||
});
|
||||
|
||||
it("waits for stream connect in conversation mode when Twilio streaming is enabled", async () => {
|
||||
const { manager, provider } = await createManagerHarness(
|
||||
{ streaming: { enabled: true } },
|
||||
|
||||
@@ -307,6 +307,9 @@ export class CallManager {
|
||||
// is actually available; otherwise speak immediately on answered.
|
||||
const mode = (call.metadata?.mode as string | undefined) ?? "conversation";
|
||||
if (mode === "conversation") {
|
||||
if (this.config.realtime.enabled) {
|
||||
return;
|
||||
}
|
||||
const shouldWaitForStreamConnect =
|
||||
this.shouldDeferConversationInitialMessageUntilStreamConnect();
|
||||
if (shouldWaitForStreamConnect) {
|
||||
|
||||
@@ -606,6 +606,61 @@ describe("VoiceCallWebhookServer replay handling", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it.each(["outbound-api", "outbound-dial"] as const)(
|
||||
"returns realtime TwiML for %s twilio TwiML fetches",
|
||||
async (direction) => {
|
||||
const parseWebhookEvent = vi.fn(() => ({ events: [], statusCode: 200 }));
|
||||
const buildTwiMLPayload = vi.fn(() => ({
|
||||
statusCode: 200,
|
||||
headers: { "Content-Type": "text/xml" },
|
||||
body: '<Response><Connect><Stream url="wss://example.test/voice/stream/realtime/token" /></Connect></Response>',
|
||||
}));
|
||||
const twilioProvider: VoiceCallProvider = {
|
||||
...provider,
|
||||
name: "twilio",
|
||||
verifyWebhook: () => ({ ok: true, verifiedRequestKey: "twilio:req:rt-outbound" }),
|
||||
parseWebhookEvent,
|
||||
};
|
||||
const { manager, processEvent } = createManager([]);
|
||||
const config = createConfig({
|
||||
provider: "twilio",
|
||||
inboundPolicy: "disabled",
|
||||
realtime: {
|
||||
enabled: true,
|
||||
streamPath: "/voice/stream/realtime",
|
||||
tools: [],
|
||||
providers: {},
|
||||
},
|
||||
});
|
||||
const server = new VoiceCallWebhookServer(config, manager, twilioProvider);
|
||||
server.setRealtimeHandler({
|
||||
buildTwiMLPayload,
|
||||
getStreamPathPattern: () => "/voice/stream/realtime",
|
||||
handleWebSocketUpgrade: () => {},
|
||||
registerToolHandler: () => {},
|
||||
setPublicUrl: () => {},
|
||||
} as unknown as RealtimeCallHandler);
|
||||
|
||||
try {
|
||||
const baseUrl = await server.start();
|
||||
const response = await postWebhookFormWithHeaders(
|
||||
server,
|
||||
baseUrl,
|
||||
`CallSid=CA123&Direction=${direction}&CallStatus=in-progress&From=%2B15550001111&To=%2B15550002222`,
|
||||
{ "x-twilio-signature": "sig" },
|
||||
);
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(await response.text()).toContain("<Connect><Stream");
|
||||
expect(buildTwiMLPayload).toHaveBeenCalledTimes(1);
|
||||
expect(parseWebhookEvent).not.toHaveBeenCalled();
|
||||
expect(processEvent).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
await server.stop();
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
it("rejects non-allowlisted inbound realtime calls before creating a stream token", async () => {
|
||||
const buildTwiMLPayload = vi.fn(() => ({
|
||||
statusCode: 200,
|
||||
|
||||
@@ -643,7 +643,9 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
const realtimeParams = this.getRealtimeTwimlParams(ctx);
|
||||
if (realtimeParams) {
|
||||
if (!this.shouldAcceptRealtimeInboundRequest(realtimeParams)) {
|
||||
const direction = realtimeParams.get("Direction");
|
||||
const isInboundRealtimeRequest = !direction || direction === "inbound";
|
||||
if (isInboundRealtimeRequest && !this.shouldAcceptRealtimeInboundRequest(realtimeParams)) {
|
||||
console.log("[voice-call] Realtime inbound call rejected before stream setup");
|
||||
return buildRealtimeRejectedTwiML();
|
||||
}
|
||||
@@ -718,8 +720,9 @@ export class VoiceCallWebhookServer {
|
||||
|
||||
const params = new URLSearchParams(ctx.rawBody);
|
||||
const direction = params.get("Direction");
|
||||
const isInbound = !direction || direction === "inbound";
|
||||
if (!isInbound) {
|
||||
const isSupportedDirection =
|
||||
!direction || direction === "inbound" || direction.startsWith("outbound");
|
||||
if (!isSupportedDirection) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user