fix(voice-call): delegate cli calls to gateway

This commit is contained in:
Peter Steinberger
2026-05-01 06:35:36 +01:00
parent e8f9c3e6de
commit 464e573602
6 changed files with 281 additions and 21 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Voice Call/Twilio: register accepted media streams immediately but wait for realtime transcription readiness before speaking the initial greeting, so reconnect grace handling stays live while OpenAI STT startup is no longer starved by TTS. Fixes #75197. (#75257) Thanks @donkeykong91 and @PfanP.
- Voice Call CLI: delegate operational `voicecall` commands to the running Gateway runtime and skip webhook startup during CLI-only plugin loading, preventing webhook port conflicts and `setup --json` hangs. Fixes #72345. Thanks @serrurco and @DougButdorf.
- Agents/pi-embedded-runner: extract the `abortable` provider-call wrapper from `runEmbeddedAttempt` to module scope so its promise handlers no longer close over the run lexical context, releasing transcripts, tool buffers, and subscription callbacks when a provider call hangs past abort. (#74182) Thanks @cjboy007.
- Docker: restore `python3` in the gateway runtime image after the slim-runtime switch. Fixes #75041.
- CLI/Voice Call: scope `voicecall` command activation to the Voice Call plugin so setup and smoke checks no longer broad-load unrelated plugin runtimes or hang after printing JSON. Thanks @vincentkoc.

View File

@@ -10,6 +10,11 @@ title: "Voicecall"
`voicecall` is a plugin-provided command. It only appears if the voice-call plugin is installed and enabled.
When the Gateway is running, operational commands (`call`, `start`,
`continue`, `speak`, `dtmf`, `end`, and `status`) are sent to that Gateway's
voice-call runtime. If no Gateway is reachable, they fall back to a standalone
CLI runtime.
Primary doc:
- Voice-call plugin: [Voice Call](/plugins/voice-call)

View File

@@ -610,6 +610,11 @@ openclaw voicecall latency # summarize turn latency from lo
openclaw voicecall expose --mode funnel
```
When the Gateway is already running, operational `voicecall` commands delegate
to the Gateway-owned voice-call runtime so the CLI does not bind a second
webhook server. If no Gateway is reachable, the commands fall back to a
standalone CLI runtime.
`latency` reads `calls.jsonl` from the default voice-call storage path.
Use `--file <path>` to point at a different log and `--last <n>` to limit
analysis to the last N records (default 200). Output includes p50/p90/p99

View File

@@ -15,6 +15,7 @@ vi.mock("./runtime-entry.js", () => ({
import plugin from "./index.js";
import { createVoiceCallRuntime } from "./runtime-entry.js";
import { __testing as voiceCallCliTesting } from "./src/cli.js";
const noopLogger = {
info: vi.fn(),
@@ -23,6 +24,8 @@ const noopLogger = {
debug: vi.fn(),
};
const callGatewayFromCliMock = vi.fn();
type Registered = {
methods: Map<string, unknown>;
tools: unknown[];
@@ -144,11 +147,15 @@ describe("voice-call plugin", () => {
noopLogger.error.mockClear();
noopLogger.debug.mockClear();
runtimeStub = createRuntimeStub();
callGatewayFromCliMock.mockReset();
callGatewayFromCliMock.mockRejectedValue(new Error("connect ECONNREFUSED 127.0.0.1:18789"));
voiceCallCliTesting.setCallGatewayFromCliForTests(callGatewayFromCliMock);
vi.mocked(createVoiceCallRuntime).mockReset();
vi.mocked(createVoiceCallRuntime).mockImplementation(async () => runtimeStub);
});
afterEach(() => {
voiceCallCliTesting.setCallGatewayFromCliForTests();
vi.restoreAllMocks();
vi.unstubAllEnvs();
delete (globalThis as Record<PropertyKey, unknown>)[Symbol.for("openclaw.voice-call.runtime")];
@@ -205,6 +212,29 @@ describe("voice-call plugin", () => {
expect(respond).toHaveBeenCalledWith(true, { callId: "call-1", initiated: true });
});
it("does not start the webhook runtime for CLI-only plugin loading", async () => {
vi.stubEnv("OPENCLAW_CLI", "1");
const { service } = setup({ provider: "mock" });
await service?.start(createServiceContext());
expect(createVoiceCallRuntime).not.toHaveBeenCalled();
});
it("still starts the webhook runtime for gateway CLI processes", async () => {
const previousArgv = process.argv;
vi.stubEnv("OPENCLAW_CLI", "1");
process.argv = ["node", "openclaw", "gateway", "run"];
const { service } = setup({ provider: "mock" });
try {
await service?.start(createServiceContext());
expect(createVoiceCallRuntime).toHaveBeenCalledTimes(1);
} finally {
process.argv = previousArgv;
}
});
it("creates a fresh shared runtime after service stop", async () => {
const first = setup({ provider: "mock" });
await first.service?.start(createServiceContext());
@@ -462,6 +492,29 @@ describe("voice-call plugin", () => {
}
});
it("CLI start delegates to the running gateway runtime", async () => {
callGatewayFromCliMock.mockResolvedValueOnce({ callId: "gateway-call", initiated: true });
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program);
try {
await program.parseAsync(["voicecall", "start", "--to", "+1", "--message", "Hello"], {
from: "user",
});
expect(callGatewayFromCliMock).toHaveBeenCalledWith(
"voicecall.start",
{ json: true, timeout: "5000" },
{ to: "+1", message: "Hello", mode: "conversation" },
{ progress: false },
);
expect(createVoiceCallRuntime).not.toHaveBeenCalled();
expect(stdout.output()).toContain('"callId": "gateway-call"');
} finally {
stdout.restore();
}
});
it("CLI setup prints human-readable checks by default", async () => {
const program = new Command();
const stdout = captureStdout();
@@ -527,6 +580,33 @@ describe("voice-call plugin", () => {
}
});
it("CLI status lists active calls through the running gateway runtime", async () => {
callGatewayFromCliMock.mockResolvedValueOnce({
found: true,
calls: [{ callId: "gateway-call" }],
});
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program);
try {
await program.parseAsync(["voicecall", "status", "--json"], { from: "user" });
const parsed = JSON.parse(stdout.output()) as {
calls?: Array<{ callId?: string }>;
};
expect(callGatewayFromCliMock).toHaveBeenCalledWith(
"voicecall.status",
{ json: true, timeout: "5000" },
undefined,
{ progress: false },
);
expect(createVoiceCallRuntime).not.toHaveBeenCalled();
expect(parsed.calls).toEqual([expect.objectContaining({ callId: "gateway-call" })]);
} finally {
stdout.restore();
}
});
it("CLI smoke dry-runs a live call unless --yes is passed", async () => {
const program = new Command();
const stdout = captureStdout();

View File

@@ -159,6 +159,10 @@ function asParamRecord(params: unknown): Record<string, unknown> {
: {};
}
function isCliOnlyProcess(): boolean {
return process.env.OPENCLAW_CLI === "1" && !process.argv.slice(2).includes("gateway");
}
const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime");
const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise");
const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise");
@@ -428,11 +432,11 @@ export default definePluginEntry({
try {
const raw =
normalizeOptionalString(params?.callId) ?? normalizeOptionalString(params?.sid) ?? "";
const rt = await ensureRuntime();
if (!raw) {
respond(false, { error: "callId required" });
respond(true, { found: true, calls: rt.manager.getActiveCalls() });
return;
}
const rt = await ensureRuntime();
const call = rt.manager.getCall(raw) || rt.manager.getCallByProviderCallId(raw);
if (!call) {
respond(true, { found: false });
@@ -611,6 +615,9 @@ export default definePluginEntry({
api.registerService({
id: "voicecall",
start: () => {
if (isCliOnlyProcess()) {
return;
}
if (!config.enabled) {
return;
}

View File

@@ -3,6 +3,8 @@ import os from "node:os";
import path from "node:path";
import { format } from "node:util";
import type { Command } from "commander";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import { callGatewayFromCli } from "openclaw/plugin-sdk/gateway-runtime";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
import { sleep } from "../api.js";
import { validateProviderConfig, type VoiceCallConfig } from "./config.js";
@@ -31,6 +33,29 @@ type SetupStatus = {
checks: SetupCheck[];
};
type VoiceCallGatewayMethod =
| "voicecall.initiate"
| "voicecall.start"
| "voicecall.continue"
| "voicecall.speak"
| "voicecall.dtmf"
| "voicecall.end"
| "voicecall.status";
type VoiceCallGatewayCallResult = { ok: true; payload: unknown } | { ok: false; error: unknown };
const VOICE_CALL_GATEWAY_TIMEOUT_MS = "5000";
const voiceCallCliDeps = {
callGatewayFromCli,
};
export const __testing = {
setCallGatewayFromCliForTests(next?: typeof callGatewayFromCli): void {
voiceCallCliDeps.callGatewayFromCli = next ?? callGatewayFromCli;
},
};
function writeStdoutLine(...values: unknown[]): void {
process.stdout.write(`${format(...values)}\n`);
}
@@ -39,6 +64,41 @@ function writeStdoutJson(value: unknown): void {
process.stdout.write(`${JSON.stringify(value, null, 2)}\n`);
}
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === "object" && !Array.isArray(value));
}
function isGatewayUnavailableForLocalFallback(err: unknown): boolean {
const message = formatErrorMessage(err);
return (
message.includes("ECONNREFUSED") ||
message.includes("ECONNRESET") ||
message.includes("EHOSTUNREACH") ||
message.includes("ENOTFOUND") ||
message.includes("gateway not connected")
);
}
async function callVoiceCallGateway(
method: VoiceCallGatewayMethod,
params?: Record<string, unknown>,
): Promise<VoiceCallGatewayCallResult> {
try {
const payload = await voiceCallCliDeps.callGatewayFromCli(
method,
{ json: true, timeout: VOICE_CALL_GATEWAY_TIMEOUT_MS },
params,
{ progress: false },
);
return { ok: true, payload };
} catch (err) {
if (isGatewayUnavailableForLocalFallback(err)) {
return { ok: false, error: err };
}
throw err;
}
}
function resolveMode(input: string): "off" | "serve" | "funnel" {
const raw = normalizeOptionalLowercaseString(input) ?? "";
if (raw === "serve" || raw === "off") {
@@ -192,6 +252,48 @@ async function initiateCallAndPrintId(params: {
writeStdoutJson({ callId: result.callId });
}
function writeGatewayCallId(payload: unknown): void {
if (isRecord(payload) && typeof payload.callId === "string") {
writeStdoutJson({ callId: payload.callId });
return;
}
if (isRecord(payload) && typeof payload.error === "string") {
throw new Error(payload.error);
}
throw new Error("voicecall gateway response missing callId");
}
async function initiateCallViaGatewayOrRuntime(params: {
ensureRuntime: () => Promise<VoiceCallRuntime>;
method: "voicecall.initiate" | "voicecall.start";
to?: string;
message?: string;
mode?: string;
}) {
const mode = resolveCallMode(params.mode);
const gateway = await callVoiceCallGateway(params.method, {
...(params.to ? { to: params.to } : {}),
...(params.message ? { message: params.message } : {}),
...(mode ? { mode } : {}),
});
if (gateway.ok) {
writeGatewayCallId(gateway.payload);
return;
}
const rt = await params.ensureRuntime();
const to = params.to ?? rt.config.toNumber;
if (!to) {
throw new Error("Missing --to and no toNumber configured");
}
await initiateCallAndPrintId({
runtime: rt,
to,
message: params.message,
mode: params.mode,
});
}
export function registerVoiceCallCli(params: {
program: Command;
config: VoiceCallConfig;
@@ -265,20 +367,35 @@ export function registerVoiceCallCli(params: {
}
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.initiateCall(options.to, undefined, {
message: options.message,
mode: resolveCallMode(options.mode) ?? "notify",
const mode = resolveCallMode(options.mode) ?? "notify";
const gateway = await callVoiceCallGateway("voicecall.start", {
to: options.to,
...(options.message ? { message: options.message } : {}),
mode,
});
if (!result.success) {
throw new Error(result.error || "smoke call failed");
let callId: unknown;
if (gateway.ok) {
callId = isRecord(gateway.payload) ? gateway.payload.callId : undefined;
} else {
const rt = await ensureRuntime();
const result = await rt.manager.initiateCall(options.to, undefined, {
message: options.message,
mode,
});
if (!result.success) {
throw new Error(result.error || "smoke call failed");
}
callId = result.callId;
}
if (typeof callId !== "string" || !callId) {
throw new Error("smoke call failed");
}
if (options.json) {
writeStdoutJson({ ok: true, setup, liveCall: true, callId: result.callId });
writeStdoutJson({ ok: true, setup, liveCall: true, callId });
return;
}
writeSetupStatus(setup);
writeStdoutLine("live-call: started %s", result.callId);
writeStdoutLine("live-call: started %s", callId);
},
);
@@ -296,14 +413,10 @@ export function registerVoiceCallCli(params: {
"conversation",
)
.action(async (options: { message: string; to?: string; mode?: string }) => {
const rt = await ensureRuntime();
const to = options.to ?? rt.config.toNumber;
if (!to) {
throw new Error("Missing --to and no toNumber configured");
}
await initiateCallAndPrintId({
runtime: rt,
to,
await initiateCallViaGatewayOrRuntime({
ensureRuntime,
method: "voicecall.initiate",
to: options.to,
message: options.message,
mode: options.mode,
});
@@ -320,9 +433,9 @@ export function registerVoiceCallCli(params: {
"conversation",
)
.action(async (options: { to: string; message?: string; mode?: string }) => {
const rt = await ensureRuntime();
await initiateCallAndPrintId({
runtime: rt,
await initiateCallViaGatewayOrRuntime({
ensureRuntime,
method: "voicecall.start",
to: options.to,
message: options.message,
mode: options.mode,
@@ -335,6 +448,14 @@ export function registerVoiceCallCli(params: {
.requiredOption("--call-id <id>", "Call ID")
.requiredOption("--message <text>", "Message to speak")
.action(async (options: { callId: string; message: string }) => {
const gateway = await callVoiceCallGateway("voicecall.continue", {
callId: options.callId,
message: options.message,
});
if (gateway.ok) {
writeStdoutJson(gateway.payload);
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.continueCall(options.callId, options.message);
if (!result.success) {
@@ -349,6 +470,14 @@ export function registerVoiceCallCli(params: {
.requiredOption("--call-id <id>", "Call ID")
.requiredOption("--message <text>", "Message to speak")
.action(async (options: { callId: string; message: string }) => {
const gateway = await callVoiceCallGateway("voicecall.speak", {
callId: options.callId,
message: options.message,
});
if (gateway.ok) {
writeStdoutJson(gateway.payload);
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.speak(options.callId, options.message);
if (!result.success) {
@@ -363,6 +492,14 @@ export function registerVoiceCallCli(params: {
.requiredOption("--call-id <id>", "Call ID")
.requiredOption("--digits <digits>", "DTMF digits")
.action(async (options: { callId: string; digits: string }) => {
const gateway = await callVoiceCallGateway("voicecall.dtmf", {
callId: options.callId,
digits: options.digits,
});
if (gateway.ok) {
writeStdoutJson(gateway.payload);
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.sendDtmf(options.callId, options.digits);
if (!result.success) {
@@ -376,6 +513,13 @@ export function registerVoiceCallCli(params: {
.description("Hang up an active call")
.requiredOption("--call-id <id>", "Call ID")
.action(async (options: { callId: string }) => {
const gateway = await callVoiceCallGateway("voicecall.end", {
callId: options.callId,
});
if (gateway.ok) {
writeStdoutJson(gateway.payload);
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.endCall(options.callId);
if (!result.success) {
@@ -390,6 +534,24 @@ export function registerVoiceCallCli(params: {
.option("--call-id <id>", "Call ID")
.option("--json", "Print machine-readable JSON")
.action(async (options: { callId?: string; json?: boolean }) => {
const gateway = await callVoiceCallGateway(
"voicecall.status",
options.callId ? { callId: options.callId } : undefined,
);
if (gateway.ok) {
if (options.callId && isRecord(gateway.payload)) {
if (gateway.payload.found === true && "call" in gateway.payload) {
writeStdoutJson(gateway.payload.call);
return;
}
if (gateway.payload.found === false) {
writeStdoutJson({ found: false });
return;
}
}
writeStdoutJson(gateway.payload);
return;
}
const rt = await ensureRuntime();
if (options.callId) {
const call = rt.manager.getCall(options.callId);