From 464e57360262b7e0f9a705431bedd402fe8c356b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 1 May 2026 06:35:36 +0100 Subject: [PATCH] fix(voice-call): delegate cli calls to gateway --- CHANGELOG.md | 1 + docs/cli/voicecall.md | 5 + docs/plugins/voice-call.md | 5 + extensions/voice-call/index.test.ts | 80 +++++++++++ extensions/voice-call/index.ts | 11 +- extensions/voice-call/src/cli.ts | 200 +++++++++++++++++++++++++--- 6 files changed, 281 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f20d16a1142..9506b640a4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Voice Call/Twilio: register accepted media streams immediately but wait for realtime transcription readiness before speaking the initial greeting, so reconnect grace handling stays live while OpenAI STT startup is no longer starved by TTS. Fixes #75197. (#75257) Thanks @donkeykong91 and @PfanP. +- Voice Call CLI: delegate operational `voicecall` commands to the running Gateway runtime and skip webhook startup during CLI-only plugin loading, preventing webhook port conflicts and `setup --json` hangs. Fixes #72345. Thanks @serrurco and @DougButdorf. - Agents/pi-embedded-runner: extract the `abortable` provider-call wrapper from `runEmbeddedAttempt` to module scope so its promise handlers no longer close over the run lexical context, releasing transcripts, tool buffers, and subscription callbacks when a provider call hangs past abort. (#74182) Thanks @cjboy007. - Docker: restore `python3` in the gateway runtime image after the slim-runtime switch. Fixes #75041. - CLI/Voice Call: scope `voicecall` command activation to the Voice Call plugin so setup and smoke checks no longer broad-load unrelated plugin runtimes or hang after printing JSON. Thanks @vincentkoc. diff --git a/docs/cli/voicecall.md b/docs/cli/voicecall.md index 681e0f88c35..0e07a47602e 100644 --- a/docs/cli/voicecall.md +++ b/docs/cli/voicecall.md @@ -10,6 +10,11 @@ title: "Voicecall" `voicecall` is a plugin-provided command. It only appears if the voice-call plugin is installed and enabled. +When the Gateway is running, operational commands (`call`, `start`, +`continue`, `speak`, `dtmf`, `end`, and `status`) are sent to that Gateway's +voice-call runtime. If no Gateway is reachable, they fall back to a standalone +CLI runtime. + Primary doc: - Voice-call plugin: [Voice Call](/plugins/voice-call) diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 49ce6d04fb6..b0af9fdc592 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -610,6 +610,11 @@ openclaw voicecall latency # summarize turn latency from lo openclaw voicecall expose --mode funnel ``` +When the Gateway is already running, operational `voicecall` commands delegate +to the Gateway-owned voice-call runtime so the CLI does not bind a second +webhook server. If no Gateway is reachable, the commands fall back to a +standalone CLI runtime. + `latency` reads `calls.jsonl` from the default voice-call storage path. Use `--file ` to point at a different log and `--last ` to limit analysis to the last N records (default 200). Output includes p50/p90/p99 diff --git a/extensions/voice-call/index.test.ts b/extensions/voice-call/index.test.ts index 0cf6ea0b5b8..3b4c3859e21 100644 --- a/extensions/voice-call/index.test.ts +++ b/extensions/voice-call/index.test.ts @@ -15,6 +15,7 @@ vi.mock("./runtime-entry.js", () => ({ import plugin from "./index.js"; import { createVoiceCallRuntime } from "./runtime-entry.js"; +import { __testing as voiceCallCliTesting } from "./src/cli.js"; const noopLogger = { info: vi.fn(), @@ -23,6 +24,8 @@ const noopLogger = { debug: vi.fn(), }; +const callGatewayFromCliMock = vi.fn(); + type Registered = { methods: Map; tools: unknown[]; @@ -144,11 +147,15 @@ describe("voice-call plugin", () => { noopLogger.error.mockClear(); noopLogger.debug.mockClear(); runtimeStub = createRuntimeStub(); + callGatewayFromCliMock.mockReset(); + callGatewayFromCliMock.mockRejectedValue(new Error("connect ECONNREFUSED 127.0.0.1:18789")); + voiceCallCliTesting.setCallGatewayFromCliForTests(callGatewayFromCliMock); vi.mocked(createVoiceCallRuntime).mockReset(); vi.mocked(createVoiceCallRuntime).mockImplementation(async () => runtimeStub); }); afterEach(() => { + voiceCallCliTesting.setCallGatewayFromCliForTests(); vi.restoreAllMocks(); vi.unstubAllEnvs(); delete (globalThis as Record)[Symbol.for("openclaw.voice-call.runtime")]; @@ -205,6 +212,29 @@ describe("voice-call plugin", () => { expect(respond).toHaveBeenCalledWith(true, { callId: "call-1", initiated: true }); }); + it("does not start the webhook runtime for CLI-only plugin loading", async () => { + vi.stubEnv("OPENCLAW_CLI", "1"); + const { service } = setup({ provider: "mock" }); + + await service?.start(createServiceContext()); + + expect(createVoiceCallRuntime).not.toHaveBeenCalled(); + }); + + it("still starts the webhook runtime for gateway CLI processes", async () => { + const previousArgv = process.argv; + vi.stubEnv("OPENCLAW_CLI", "1"); + process.argv = ["node", "openclaw", "gateway", "run"]; + const { service } = setup({ provider: "mock" }); + + try { + await service?.start(createServiceContext()); + expect(createVoiceCallRuntime).toHaveBeenCalledTimes(1); + } finally { + process.argv = previousArgv; + } + }); + it("creates a fresh shared runtime after service stop", async () => { const first = setup({ provider: "mock" }); await first.service?.start(createServiceContext()); @@ -462,6 +492,29 @@ describe("voice-call plugin", () => { } }); + it("CLI start delegates to the running gateway runtime", async () => { + callGatewayFromCliMock.mockResolvedValueOnce({ callId: "gateway-call", initiated: true }); + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program); + + try { + await program.parseAsync(["voicecall", "start", "--to", "+1", "--message", "Hello"], { + from: "user", + }); + expect(callGatewayFromCliMock).toHaveBeenCalledWith( + "voicecall.start", + { json: true, timeout: "5000" }, + { to: "+1", message: "Hello", mode: "conversation" }, + { progress: false }, + ); + expect(createVoiceCallRuntime).not.toHaveBeenCalled(); + expect(stdout.output()).toContain('"callId": "gateway-call"'); + } finally { + stdout.restore(); + } + }); + it("CLI setup prints human-readable checks by default", async () => { const program = new Command(); const stdout = captureStdout(); @@ -527,6 +580,33 @@ describe("voice-call plugin", () => { } }); + it("CLI status lists active calls through the running gateway runtime", async () => { + callGatewayFromCliMock.mockResolvedValueOnce({ + found: true, + calls: [{ callId: "gateway-call" }], + }); + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program); + + try { + await program.parseAsync(["voicecall", "status", "--json"], { from: "user" }); + const parsed = JSON.parse(stdout.output()) as { + calls?: Array<{ callId?: string }>; + }; + expect(callGatewayFromCliMock).toHaveBeenCalledWith( + "voicecall.status", + { json: true, timeout: "5000" }, + undefined, + { progress: false }, + ); + expect(createVoiceCallRuntime).not.toHaveBeenCalled(); + expect(parsed.calls).toEqual([expect.objectContaining({ callId: "gateway-call" })]); + } finally { + stdout.restore(); + } + }); + it("CLI smoke dry-runs a live call unless --yes is passed", async () => { const program = new Command(); const stdout = captureStdout(); diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index 66df71e9865..3ce9df1a05d 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -159,6 +159,10 @@ function asParamRecord(params: unknown): Record { : {}; } +function isCliOnlyProcess(): boolean { + return process.env.OPENCLAW_CLI === "1" && !process.argv.slice(2).includes("gateway"); +} + const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime"); const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise"); const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise"); @@ -428,11 +432,11 @@ export default definePluginEntry({ try { const raw = normalizeOptionalString(params?.callId) ?? normalizeOptionalString(params?.sid) ?? ""; + const rt = await ensureRuntime(); if (!raw) { - respond(false, { error: "callId required" }); + respond(true, { found: true, calls: rt.manager.getActiveCalls() }); return; } - const rt = await ensureRuntime(); const call = rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw); if (!call) { respond(true, { found: false }); @@ -611,6 +615,9 @@ export default definePluginEntry({ api.registerService({ id: "voicecall", start: () => { + if (isCliOnlyProcess()) { + return; + } if (!config.enabled) { return; } diff --git a/extensions/voice-call/src/cli.ts b/extensions/voice-call/src/cli.ts index 253c79ea8f6..4ef5c8aa347 100644 --- a/extensions/voice-call/src/cli.ts +++ b/extensions/voice-call/src/cli.ts @@ -3,6 +3,8 @@ import os from "node:os"; import path from "node:path"; import { format } from "node:util"; import type { Command } from "commander"; +import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; +import { callGatewayFromCli } from "openclaw/plugin-sdk/gateway-runtime"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime"; import { sleep } from "../api.js"; import { validateProviderConfig, type VoiceCallConfig } from "./config.js"; @@ -31,6 +33,29 @@ type SetupStatus = { checks: SetupCheck[]; }; +type VoiceCallGatewayMethod = + | "voicecall.initiate" + | "voicecall.start" + | "voicecall.continue" + | "voicecall.speak" + | "voicecall.dtmf" + | "voicecall.end" + | "voicecall.status"; + +type VoiceCallGatewayCallResult = { ok: true; payload: unknown } | { ok: false; error: unknown }; + +const VOICE_CALL_GATEWAY_TIMEOUT_MS = "5000"; + +const voiceCallCliDeps = { + callGatewayFromCli, +}; + +export const __testing = { + setCallGatewayFromCliForTests(next?: typeof callGatewayFromCli): void { + voiceCallCliDeps.callGatewayFromCli = next ?? callGatewayFromCli; + }, +}; + function writeStdoutLine(...values: unknown[]): void { process.stdout.write(`${format(...values)}\n`); } @@ -39,6 +64,41 @@ function writeStdoutJson(value: unknown): void { process.stdout.write(`${JSON.stringify(value, null, 2)}\n`); } +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function isGatewayUnavailableForLocalFallback(err: unknown): boolean { + const message = formatErrorMessage(err); + return ( + message.includes("ECONNREFUSED") || + message.includes("ECONNRESET") || + message.includes("EHOSTUNREACH") || + message.includes("ENOTFOUND") || + message.includes("gateway not connected") + ); +} + +async function callVoiceCallGateway( + method: VoiceCallGatewayMethod, + params?: Record, +): Promise { + try { + const payload = await voiceCallCliDeps.callGatewayFromCli( + method, + { json: true, timeout: VOICE_CALL_GATEWAY_TIMEOUT_MS }, + params, + { progress: false }, + ); + return { ok: true, payload }; + } catch (err) { + if (isGatewayUnavailableForLocalFallback(err)) { + return { ok: false, error: err }; + } + throw err; + } +} + function resolveMode(input: string): "off" | "serve" | "funnel" { const raw = normalizeOptionalLowercaseString(input) ?? ""; if (raw === "serve" || raw === "off") { @@ -192,6 +252,48 @@ async function initiateCallAndPrintId(params: { writeStdoutJson({ callId: result.callId }); } +function writeGatewayCallId(payload: unknown): void { + if (isRecord(payload) && typeof payload.callId === "string") { + writeStdoutJson({ callId: payload.callId }); + return; + } + if (isRecord(payload) && typeof payload.error === "string") { + throw new Error(payload.error); + } + throw new Error("voicecall gateway response missing callId"); +} + +async function initiateCallViaGatewayOrRuntime(params: { + ensureRuntime: () => Promise; + method: "voicecall.initiate" | "voicecall.start"; + to?: string; + message?: string; + mode?: string; +}) { + const mode = resolveCallMode(params.mode); + const gateway = await callVoiceCallGateway(params.method, { + ...(params.to ? { to: params.to } : {}), + ...(params.message ? { message: params.message } : {}), + ...(mode ? { mode } : {}), + }); + if (gateway.ok) { + writeGatewayCallId(gateway.payload); + return; + } + + const rt = await params.ensureRuntime(); + const to = params.to ?? rt.config.toNumber; + if (!to) { + throw new Error("Missing --to and no toNumber configured"); + } + await initiateCallAndPrintId({ + runtime: rt, + to, + message: params.message, + mode: params.mode, + }); +} + export function registerVoiceCallCli(params: { program: Command; config: VoiceCallConfig; @@ -265,20 +367,35 @@ export function registerVoiceCallCli(params: { } return; } - const rt = await ensureRuntime(); - const result = await rt.manager.initiateCall(options.to, undefined, { - message: options.message, - mode: resolveCallMode(options.mode) ?? "notify", + const mode = resolveCallMode(options.mode) ?? "notify"; + const gateway = await callVoiceCallGateway("voicecall.start", { + to: options.to, + ...(options.message ? { message: options.message } : {}), + mode, }); - if (!result.success) { - throw new Error(result.error || "smoke call failed"); + let callId: unknown; + if (gateway.ok) { + callId = isRecord(gateway.payload) ? gateway.payload.callId : undefined; + } else { + const rt = await ensureRuntime(); + const result = await rt.manager.initiateCall(options.to, undefined, { + message: options.message, + mode, + }); + if (!result.success) { + throw new Error(result.error || "smoke call failed"); + } + callId = result.callId; + } + if (typeof callId !== "string" || !callId) { + throw new Error("smoke call failed"); } if (options.json) { - writeStdoutJson({ ok: true, setup, liveCall: true, callId: result.callId }); + writeStdoutJson({ ok: true, setup, liveCall: true, callId }); return; } writeSetupStatus(setup); - writeStdoutLine("live-call: started %s", result.callId); + writeStdoutLine("live-call: started %s", callId); }, ); @@ -296,14 +413,10 @@ export function registerVoiceCallCli(params: { "conversation", ) .action(async (options: { message: string; to?: string; mode?: string }) => { - const rt = await ensureRuntime(); - const to = options.to ?? rt.config.toNumber; - if (!to) { - throw new Error("Missing --to and no toNumber configured"); - } - await initiateCallAndPrintId({ - runtime: rt, - to, + await initiateCallViaGatewayOrRuntime({ + ensureRuntime, + method: "voicecall.initiate", + to: options.to, message: options.message, mode: options.mode, }); @@ -320,9 +433,9 @@ export function registerVoiceCallCli(params: { "conversation", ) .action(async (options: { to: string; message?: string; mode?: string }) => { - const rt = await ensureRuntime(); - await initiateCallAndPrintId({ - runtime: rt, + await initiateCallViaGatewayOrRuntime({ + ensureRuntime, + method: "voicecall.start", to: options.to, message: options.message, mode: options.mode, @@ -335,6 +448,14 @@ export function registerVoiceCallCli(params: { .requiredOption("--call-id ", "Call ID") .requiredOption("--message ", "Message to speak") .action(async (options: { callId: string; message: string }) => { + const gateway = await callVoiceCallGateway("voicecall.continue", { + callId: options.callId, + message: options.message, + }); + if (gateway.ok) { + writeStdoutJson(gateway.payload); + return; + } const rt = await ensureRuntime(); const result = await rt.manager.continueCall(options.callId, options.message); if (!result.success) { @@ -349,6 +470,14 @@ export function registerVoiceCallCli(params: { .requiredOption("--call-id ", "Call ID") .requiredOption("--message ", "Message to speak") .action(async (options: { callId: string; message: string }) => { + const gateway = await callVoiceCallGateway("voicecall.speak", { + callId: options.callId, + message: options.message, + }); + if (gateway.ok) { + writeStdoutJson(gateway.payload); + return; + } const rt = await ensureRuntime(); const result = await rt.manager.speak(options.callId, options.message); if (!result.success) { @@ -363,6 +492,14 @@ export function registerVoiceCallCli(params: { .requiredOption("--call-id ", "Call ID") .requiredOption("--digits ", "DTMF digits") .action(async (options: { callId: string; digits: string }) => { + const gateway = await callVoiceCallGateway("voicecall.dtmf", { + callId: options.callId, + digits: options.digits, + }); + if (gateway.ok) { + writeStdoutJson(gateway.payload); + return; + } const rt = await ensureRuntime(); const result = await rt.manager.sendDtmf(options.callId, options.digits); if (!result.success) { @@ -376,6 +513,13 @@ export function registerVoiceCallCli(params: { .description("Hang up an active call") .requiredOption("--call-id ", "Call ID") .action(async (options: { callId: string }) => { + const gateway = await callVoiceCallGateway("voicecall.end", { + callId: options.callId, + }); + if (gateway.ok) { + writeStdoutJson(gateway.payload); + return; + } const rt = await ensureRuntime(); const result = await rt.manager.endCall(options.callId); if (!result.success) { @@ -390,6 +534,24 @@ export function registerVoiceCallCli(params: { .option("--call-id ", "Call ID") .option("--json", "Print machine-readable JSON") .action(async (options: { callId?: string; json?: boolean }) => { + const gateway = await callVoiceCallGateway( + "voicecall.status", + options.callId ? { callId: options.callId } : undefined, + ); + if (gateway.ok) { + if (options.callId && isRecord(gateway.payload)) { + if (gateway.payload.found === true && "call" in gateway.payload) { + writeStdoutJson(gateway.payload.call); + return; + } + if (gateway.payload.found === false) { + writeStdoutJson({ found: false }); + return; + } + } + writeStdoutJson(gateway.payload); + return; + } const rt = await ensureRuntime(); if (options.callId) { const call = rt.manager.getCall(options.callId);