From 7c19c31144e2949b854c929cb5ec57790714e75e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 21:18:55 +0100 Subject: [PATCH] feat: support DTMF for voice-call --- docs/cli/voicecall.md | 3 +- docs/plugins/voice-call.md | 5 ++- extensions/voice-call/index.test.ts | 32 ++++++++++++++ extensions/voice-call/index.ts | 40 +++++++++++++++++ extensions/voice-call/openclaw.plugin.json | 1 + extensions/voice-call/src/cli.ts | 14 ++++++ extensions/voice-call/src/config.test.ts | 16 +++++++ extensions/voice-call/src/config.ts | 7 ++- extensions/voice-call/src/manager.ts | 8 ++++ .../voice-call/src/manager/outbound.test.ts | 43 ++++++++++++++++++- extensions/voice-call/src/manager/outbound.ts | 35 +++++++++++++++ extensions/voice-call/src/providers/base.ts | 6 +++ extensions/voice-call/src/providers/mock.ts | 5 +++ .../voice-call/src/providers/twilio.test.ts | 23 ++++++++++ extensions/voice-call/src/providers/twilio.ts | 18 ++++++++ extensions/voice-call/src/types.ts | 8 ++++ 16 files changed, 260 insertions(+), 4 deletions(-) diff --git a/docs/cli/voicecall.md b/docs/cli/voicecall.md index 30da85b148e..8d075e2a072 100644 --- a/docs/cli/voicecall.md +++ b/docs/cli/voicecall.md @@ -2,7 +2,7 @@ summary: "CLI reference for `openclaw voicecall` (voice-call plugin command surface)" read_when: - You use the voice-call plugin and want the CLI entry points - - You want quick examples for `voicecall call|continue|status|tail|expose` + - You want quick examples for `voicecall call|continue|dtmf|status|tail|expose` title: "Voicecall" --- @@ -20,6 +20,7 @@ Primary doc: openclaw voicecall status --call-id openclaw voicecall call --to "+15555550123" --message "Hello" --mode notify openclaw voicecall continue --call-id --message "Any questions?" +openclaw voicecall dtmf --call-id --digits "ww123456#" openclaw voicecall end --call-id ``` diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index b887483c472..70fd8fd2518 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -63,7 +63,7 @@ Set config under `plugins.entries.voice-call.config`: enabled: true, config: { provider: "twilio", // or "telnyx" | "plivo" | "mock" - fromNumber: "+15550001234", + fromNumber: "+15550001234", // or TWILIO_FROM_NUMBER for Twilio toNumber: "+15550005678", twilio: { @@ -468,6 +468,7 @@ openclaw voicecall call --to "+15555550123" --message "Hello from OpenClaw" openclaw voicecall start --to "+15555550123" # alias for call openclaw voicecall continue --call-id --message "Any questions?" openclaw voicecall speak --call-id --message "One moment" +openclaw voicecall dtmf --call-id --digits "ww123456#" openclaw voicecall end --call-id openclaw voicecall status --call-id openclaw voicecall tail @@ -489,6 +490,7 @@ Actions: - `initiate_call` (message, to?, mode?) - `continue_call` (callId, message) - `speak_to_user` (callId, message) +- `send_dtmf` (callId, digits) - `end_call` (callId) - `get_status` (callId) @@ -499,6 +501,7 @@ This repo ships a matching skill doc at `skills/voice-call/SKILL.md`. - `voicecall.initiate` (`to?`, `message`, `mode?`) - `voicecall.continue` (`callId`, `message`) - `voicecall.speak` (`callId`, `message`) +- `voicecall.dtmf` (`callId`, `digits`) - `voicecall.end` (`callId`) - `voicecall.status` (`callId`) diff --git a/extensions/voice-call/index.test.ts b/extensions/voice-call/index.test.ts index c40df6e59e9..090a5eb0fea 100644 --- a/extensions/voice-call/index.test.ts +++ b/extensions/voice-call/index.test.ts @@ -12,6 +12,7 @@ let runtimeStub: { initiateCall: ReturnType; continueCall: ReturnType; speak: ReturnType; + sendDtmf: ReturnType; endCall: ReturnType; getCall: ReturnType; getCallByProviderCallId: ReturnType; @@ -123,6 +124,7 @@ describe("voice-call plugin", () => { transcript: "hello", })), speak: vi.fn(async () => ({ success: true })), + sendDtmf: vi.fn(async () => ({ success: true })), endCall: vi.fn(async () => ({ success: true })), getCall: vi.fn((id: string) => (id === "call-1" ? { callId: "call-1" } : undefined)), getCallByProviderCallId: vi.fn(() => undefined), @@ -164,6 +166,22 @@ describe("voice-call plugin", () => { expect(payload.found).toBe(true); }); + it("sends DTMF via voicecall.dtmf", async () => { + const { methods } = setup({ provider: "mock" }); + const handler = methods.get("voicecall.dtmf") as + | ((ctx: { + params: Record; + respond: ReturnType; + }) => Promise) + | undefined; + const respond = vi.fn(); + + await handler?.({ params: { callId: "call-1", digits: "ww123#" }, respond }); + + expect(runtimeStub.manager.sendDtmf).toHaveBeenCalledWith("call-1", "ww123#"); + expect(respond.mock.calls[0]).toEqual([true, { success: true }]); + }); + it("normalizes legacy config through runtime creation and warns to run doctor", async () => { const { methods } = setup({ enabled: true, @@ -219,6 +237,20 @@ describe("voice-call plugin", () => { expect(result.details.found).toBe(true); }); + it("tool send_dtmf returns json payload", async () => { + const { tools } = setup({ provider: "mock" }); + const tool = tools[0] as { + execute: (id: string, params: unknown) => Promise; + }; + const result = (await tool.execute("id", { + action: "send_dtmf", + callId: "call-1", + digits: "ww123#", + })) as { details: { success?: boolean } }; + expect(runtimeStub.manager.sendDtmf).toHaveBeenCalledWith("call-1", "ww123#"); + expect(result.details.success).toBe(true); + }); + it("legacy tool status without sid returns error payload", async () => { const { tools } = setup({ provider: "mock" }); const tool = tools[0] as { diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index 478b133b0f0..7ed35ed7b63 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -122,6 +122,11 @@ const VoiceCallToolSchema = Type.Union([ callId: Type.String({ description: "Call ID" }), message: Type.String({ description: "Message to speak" }), }), + Type.Object({ + action: Type.Literal("send_dtmf"), + callId: Type.String({ description: "Call ID" }), + digits: Type.String({ description: "DTMF digits to send" }), + }), Type.Object({ action: Type.Literal("end_call"), callId: Type.String({ description: "Call ID" }), @@ -323,6 +328,29 @@ export default definePluginEntry({ }, ); + api.registerGatewayMethod( + "voicecall.dtmf", + async ({ params, respond }: GatewayRequestHandlerOptions) => { + try { + const callId = normalizeOptionalString(params?.callId) ?? ""; + const digits = normalizeOptionalString(params?.digits) ?? ""; + if (!callId || !digits) { + respond(false, { error: "callId and digits required" }); + return; + } + const rt = await ensureRuntime(); + const result = await rt.manager.sendDtmf(callId, digits); + if (!result.success) { + respond(false, { error: result.error || "dtmf failed" }); + return; + } + respond(true, { success: true }); + } catch (err) { + sendError(respond, err); + } + }, + ); + api.registerGatewayMethod( "voicecall.end", async ({ params, respond }: GatewayRequestHandlerOptions) => { @@ -453,6 +481,18 @@ export default definePluginEntry({ } return json({ success: true }); } + case "send_dtmf": { + const callId = normalizeOptionalString(rawParams.callId) ?? ""; + const digits = normalizeOptionalString(rawParams.digits) ?? ""; + if (!callId || !digits) { + throw new Error("callId and digits required"); + } + const result = await rt.manager.sendDtmf(callId, digits); + if (!result.success) { + throw new Error(result.error || "dtmf failed"); + } + return json({ success: true }); + } case "end_call": { const callId = normalizeOptionalString(rawParams.callId) ?? ""; if (!callId) { diff --git a/extensions/voice-call/openclaw.plugin.json b/extensions/voice-call/openclaw.plugin.json index a0445fc8fcc..4a65212dcf4 100644 --- a/extensions/voice-call/openclaw.plugin.json +++ b/extensions/voice-call/openclaw.plugin.json @@ -7,6 +7,7 @@ "TELNYX_PUBLIC_KEY", "TWILIO_ACCOUNT_SID", "TWILIO_AUTH_TOKEN", + "TWILIO_FROM_NUMBER", "PLIVO_AUTH_ID", "PLIVO_AUTH_TOKEN", "NGROK_AUTHTOKEN", diff --git a/extensions/voice-call/src/cli.ts b/extensions/voice-call/src/cli.ts index b496a232477..79f48e1cffe 100644 --- a/extensions/voice-call/src/cli.ts +++ b/extensions/voice-call/src/cli.ts @@ -198,6 +198,20 @@ export function registerVoiceCallCli(params: { writeStdoutJson(result); }); + root + .command("dtmf") + .description("Send DTMF digits to an active call") + .requiredOption("--call-id ", "Call ID") + .requiredOption("--digits ", "DTMF digits") + .action(async (options: { callId: string; digits: string }) => { + const rt = await ensureRuntime(); + const result = await rt.manager.sendDtmf(options.callId, options.digits); + if (!result.success) { + throw new Error(result.error || "dtmf failed"); + } + writeStdoutJson(result); + }); + root .command("end") .description("Hang up an active call") diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 641928c6abd..ab5aa7fe30f 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -25,6 +25,7 @@ describe("validateProviderConfig", () => { const clearProviderEnv = () => { delete process.env.TWILIO_ACCOUNT_SID; delete process.env.TWILIO_AUTH_TOKEN; + delete process.env.TWILIO_FROM_NUMBER; delete process.env.TELNYX_API_KEY; delete process.env.TELNYX_CONNECTION_ID; delete process.env.TELNYX_PUBLIC_KEY; @@ -63,6 +64,7 @@ describe("validateProviderConfig", () => { if (provider === "twilio") { process.env.TWILIO_ACCOUNT_SID = "AC123"; process.env.TWILIO_AUTH_TOKEN = "secret"; + process.env.TWILIO_FROM_NUMBER = "+15550001234"; } else if (provider === "telnyx") { process.env.TELNYX_API_KEY = "KEY123"; process.env.TELNYX_CONNECTION_ID = "CONN456"; @@ -90,6 +92,20 @@ describe("validateProviderConfig", () => { expect(result.errors).toEqual([]); }); + it("resolves the Twilio from number from environment", () => { + process.env.TWILIO_ACCOUNT_SID = "AC123"; + process.env.TWILIO_AUTH_TOKEN = "secret"; + process.env.TWILIO_FROM_NUMBER = "+15550001234"; + + const config = resolveVoiceCallConfig({ + ...createBaseConfig("twilio"), + fromNumber: undefined, + }); + + expect(config.fromNumber).toBe("+15550001234"); + expect(validateProviderConfig(config)).toMatchObject({ valid: true, errors: [] }); + }); + it("fails validation when required twilio credentials are missing", () => { process.env.TWILIO_AUTH_TOKEN = "secret"; const missingSid = validateProviderConfig(resolveVoiceCallConfig(createBaseConfig("twilio"))); diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index a1758901d1a..c86cbf64961 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -502,6 +502,7 @@ export function resolveVoiceCallConfig(config: VoiceCallConfigInput): VoiceCallC // Twilio if (resolved.provider === "twilio") { + resolved.fromNumber = resolved.fromNumber ?? process.env.TWILIO_FROM_NUMBER; resolved.twilio = resolved.twilio ?? {}; resolved.twilio.accountSid = resolved.twilio.accountSid ?? process.env.TWILIO_ACCOUNT_SID; resolved.twilio.authToken = resolved.twilio.authToken ?? process.env.TWILIO_AUTH_TOKEN; @@ -556,7 +557,11 @@ export function validateProviderConfig(config: VoiceCallConfig): { } if (!config.fromNumber && config.provider !== "mock") { - errors.push("plugins.entries.voice-call.config.fromNumber is required"); + errors.push( + config.provider === "twilio" + ? "plugins.entries.voice-call.config.fromNumber is required (or set TWILIO_FROM_NUMBER env)" + : "plugins.entries.voice-call.config.fromNumber is required", + ); } if (config.provider === "telnyx") { diff --git a/extensions/voice-call/src/manager.ts b/extensions/voice-call/src/manager.ts index e9d30dd898a..4db45779c75 100644 --- a/extensions/voice-call/src/manager.ts +++ b/extensions/voice-call/src/manager.ts @@ -10,6 +10,7 @@ import { continueCall as continueCallWithContext, endCall as endCallWithContext, initiateCall as initiateCallWithContext, + sendDtmf as sendDtmfWithContext, speak as speakWithContext, speakInitialMessage as speakInitialMessageWithContext, } from "./manager/outbound.js"; @@ -221,6 +222,13 @@ export class CallManager { return speakWithContext(this.getContext(), callId, text); } + /** + * Send DTMF digits to an active call. + */ + async sendDtmf(callId: CallId, digits: string): Promise<{ success: boolean; error?: string }> { + return sendDtmfWithContext(this.getContext(), callId, digits); + } + /** * Speak the initial message for a call (called when media stream connects). */ diff --git a/extensions/voice-call/src/manager/outbound.test.ts b/extensions/voice-call/src/manager/outbound.test.ts index a2ad2da972c..e5f807d00b4 100644 --- a/extensions/voice-call/src/manager/outbound.test.ts +++ b/extensions/voice-call/src/manager/outbound.test.ts @@ -48,7 +48,7 @@ vi.mock("./twiml.js", () => ({ generateNotifyTwiml: generateNotifyTwimlMock, })); -import { endCall, initiateCall, speak } from "./outbound.js"; +import { endCall, initiateCall, sendDtmf, speak } from "./outbound.js"; function createActiveCallContext(params: { hangupCall?: ReturnType } = {}) { const call = { callId: "call-1", providerCallId: "provider-1", state: "active" }; @@ -226,6 +226,47 @@ describe("voice-call outbound helpers", () => { expect(transitionStateMock).toHaveBeenLastCalledWith(call, "listening"); }); + it("sends DTMF through connected provider calls", async () => { + const call = { callId: "call-1", providerCallId: "provider-1", state: "active" }; + const sendDtmfProvider = vi.fn(async () => {}); + const ctx = { + activeCalls: new Map([["call-1", call]]), + providerCallIdMap: new Map(), + provider: { name: "twilio", sendDtmf: sendDtmfProvider }, + config: {}, + storePath: "/tmp/voice-call.json", + }; + + await expect(sendDtmf(ctx as never, "call-1", "ww123#")).resolves.toEqual({ + success: true, + }); + expect(sendDtmfProvider).toHaveBeenCalledWith({ + callId: "call-1", + providerCallId: "provider-1", + digits: "ww123#", + }); + }); + + it("rejects invalid or unsupported outbound DTMF", async () => { + const call = { callId: "call-1", providerCallId: "provider-1", state: "active" }; + const ctx = { + activeCalls: new Map([["call-1", call]]), + providerCallIdMap: new Map(), + provider: { name: "telnyx" }, + config: {}, + storePath: "/tmp/voice-call.json", + }; + + await expect(sendDtmf(ctx as never, "call-1", "abc")).resolves.toEqual({ + success: false, + error: "digits may only contain digits, *, #, comma, w, p", + }); + await expect(sendDtmf(ctx as never, "call-1", "123#")).resolves.toEqual({ + success: false, + error: "telnyx does not support outbound DTMF", + }); + }); + it("ends connected calls, clears timers, and rejects pending transcripts", async () => { const { call, ctx, hangupCall } = createActiveCallContext(); diff --git a/extensions/voice-call/src/manager/outbound.ts b/extensions/voice-call/src/manager/outbound.ts index b008c929a68..e0b82d73935 100644 --- a/extensions/voice-call/src/manager/outbound.ts +++ b/extensions/voice-call/src/manager/outbound.ts @@ -102,6 +102,12 @@ function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connec }; } +function validateDtmfDigits(digits: string): string | null { + return /^[0-9*#wWpP,]+$/.test(digits) + ? null + : "digits may only contain digits, *, #, comma, w, p"; +} + export async function initiateCall( ctx: InitiateContext, to: string, @@ -227,6 +233,35 @@ export async function speak( } } +export async function sendDtmf( + ctx: SpeakContext, + callId: CallId, + digits: string, +): Promise<{ success: boolean; error?: string }> { + const validationError = validateDtmfDigits(digits); + if (validationError) { + return { success: false, error: validationError }; + } + const connected = requireConnectedCall(ctx, callId); + if (!connected.ok) { + return { success: false, error: connected.error }; + } + if (!connected.provider.sendDtmf) { + return { success: false, error: `${connected.provider.name} does not support outbound DTMF` }; + } + + try { + await connected.provider.sendDtmf({ + callId, + providerCallId: connected.providerCallId, + digits, + }); + return { success: true }; + } catch (err) { + return { success: false, error: formatErrorMessage(err) }; + } +} + export async function speakInitialMessage( ctx: ConversationContext, providerCallId: string, diff --git a/extensions/voice-call/src/providers/base.ts b/extensions/voice-call/src/providers/base.ts index 37f2bdd50e0..bfba45e63e7 100644 --- a/extensions/voice-call/src/providers/base.ts +++ b/extensions/voice-call/src/providers/base.ts @@ -6,6 +6,7 @@ import type { InitiateCallResult, PlayTtsInput, ProviderName, + SendDtmfInput, WebhookParseOptions, ProviderWebhookParseResult, StartListeningInput, @@ -58,6 +59,11 @@ export interface VoiceCallProvider { */ playTts(input: PlayTtsInput): Promise; + /** + * Send DTMF digits to an active call. + */ + sendDtmf?: (input: SendDtmfInput) => Promise; + /** * Start listening for user speech (activate STT). */ diff --git a/extensions/voice-call/src/providers/mock.ts b/extensions/voice-call/src/providers/mock.ts index 133f9e3d862..0063b8c51e9 100644 --- a/extensions/voice-call/src/providers/mock.ts +++ b/extensions/voice-call/src/providers/mock.ts @@ -11,6 +11,7 @@ import type { PlayTtsInput, WebhookParseOptions, ProviderWebhookParseResult, + SendDtmfInput, StartListeningInput, StopListeningInput, WebhookContext, @@ -162,6 +163,10 @@ export class MockProvider implements VoiceCallProvider { // No-op for mock } + async sendDtmf(_input: SendDtmfInput): Promise { + // No-op for mock + } + async startListening(_input: StartListeningInput): Promise { // No-op for mock } diff --git a/extensions/voice-call/src/providers/twilio.test.ts b/extensions/voice-call/src/providers/twilio.test.ts index 7a79f1d7bd9..836701422f7 100644 --- a/extensions/voice-call/src/providers/twilio.test.ts +++ b/extensions/voice-call/src/providers/twilio.test.ts @@ -280,6 +280,29 @@ describe("TwilioProvider", () => { expect(params.Twiml).toContain(" { + const { provider, apiRequest } = configureTelephonyTwiMlFallback({ + providerCallId: "CA-dtmf", + }); + + await expect( + provider.sendDtmf({ + callId: "call-dtmf", + providerCallId: "CA-dtmf", + digits: "ww123#", + }), + ).resolves.toBeUndefined(); + + expect(apiRequest).toHaveBeenCalledTimes(1); + const call = apiRequest.mock.calls[0]; + const endpoint = call[0]; + const params = call[1] as { Twiml?: string }; + expect(endpoint).toBe("/Calls/CA-dtmf.json"); + expect(params.Twiml).toContain(' { const provider = createProvider(); provider.registerCallStream("CA-reconnect", "MZ-new"); diff --git a/extensions/voice-call/src/providers/twilio.ts b/extensions/voice-call/src/providers/twilio.ts index 44520c63d0c..e977b94c00f 100644 --- a/extensions/voice-call/src/providers/twilio.ts +++ b/extensions/voice-call/src/providers/twilio.ts @@ -15,6 +15,7 @@ import type { NormalizedEvent, PlayTtsInput, ProviderWebhookParseResult, + SendDtmfInput, StartListeningInput, StopListeningInput, WebhookContext, @@ -594,6 +595,23 @@ export class TwilioProvider implements VoiceCallProvider { }); } + async sendDtmf(input: SendDtmfInput): Promise { + const webhookUrl = this.callWebhookUrls.get(input.providerCallId); + if (!webhookUrl) { + throw new Error("Missing webhook URL for this call (provider state not initialized)"); + } + + const twiml = ` + + + ${escapeXml(webhookUrl)} +`; + + await this.apiRequest(`/Calls/${input.providerCallId}.json`, { + Twiml: twiml, + }); + } + /** * Play TTS via core TTS and Twilio Media Streams. * Generates audio with core TTS, converts to mu-law, and streams via WebSocket. diff --git a/extensions/voice-call/src/types.ts b/extensions/voice-call/src/types.ts index 955fa424623..3963e9d3188 100644 --- a/extensions/voice-call/src/types.ts +++ b/extensions/voice-call/src/types.ts @@ -235,6 +235,12 @@ export type PlayTtsInput = { locale?: string; }; +export type SendDtmfInput = { + callId: CallId; + providerCallId: ProviderCallId; + digits: string; +}; + export type StartListeningInput = { callId: CallId; providerCallId: ProviderCallId; @@ -274,6 +280,8 @@ export type OutboundCallOptions = { message?: string; /** Call mode (overrides config default) */ mode?: CallMode; + /** DTMF digits to send after the call is connected */ + dtmfSequence?: string; }; // -----------------------------------------------------------------------------