feat(voice-call): add setup smoke checks

This commit is contained in:
Peter Steinberger
2026-04-25 03:10:52 +01:00
parent f9f7d6ffb5
commit a3862ffdf1
5 changed files with 310 additions and 4 deletions

View File

@@ -56,6 +56,7 @@ Docs: https://docs.openclaw.ai
- Plugins/Google Meet: default Chrome realtime sessions to OpenAI plus SoX `rec`/`play` audio bridge commands, so the usual setup only needs the plugin enabled and `OPENAI_API_KEY`. Thanks @steipete.
- Plugins/Google Meet: add a `chrome-node` transport so a paired macOS node, such as a Parallels VM, can own Chrome, BlackHole, and SoX while the Gateway machine keeps the agent and model key. Thanks @steipete.
- Plugins/Voice Call: expose the shared `openclaw_agent_consult` realtime tool so live phone calls can ask the full OpenClaw agent for deeper/tool-backed answers. Thanks @steipete.
- Plugins/Voice Call: add `voicecall setup` and a dry-run-by-default `voicecall smoke` command so Twilio/provider readiness can be checked before placing a live test call. Thanks @steipete.
- Plugins/Bonjour: move LAN Gateway discovery advertising into a default-enabled bundled plugin with its own `@homebridge/ciao` dependency, so users can disable Bonjour without cutting wide-area discovery. Thanks @vincentkoc.
- Providers/Google: add a Gemini Live realtime voice provider for backend Voice Call and Google Meet audio bridges, with bidirectional audio and function-call support. Thanks @steipete.
- Plugins/Google Meet: let realtime Meet sessions consult the full OpenClaw agent for deeper answers while staying in the live voice loop. Thanks @steipete.
@@ -72,6 +73,7 @@ Docs: https://docs.openclaw.ai
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.
- Plugins/Google Meet: reuse existing Meet tabs and active sessions across harmless URL query differences, avoiding duplicate Chrome windows when agents retry a join. Thanks @steipete.
- Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi.
- Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9.
- Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete.

View File

@@ -2,7 +2,7 @@
summary: "CLI reference for `openclaw voicecall` (voice-call plugin command surface)"
read_when:
- You use the voice-call plugin and want the CLI entry points
- You want quick examples for `voicecall call|continue|dtmf|status|tail|expose`
- You want quick examples for `voicecall setup|smoke|call|continue|dtmf|status|tail|expose`
title: "Voicecall"
---
@@ -17,6 +17,8 @@ Primary doc:
## Common commands
```bash
openclaw voicecall setup
openclaw voicecall smoke
openclaw voicecall status --call-id <id>
openclaw voicecall call --to "+15555550123" --message "Hello" --mode notify
openclaw voicecall continue --call-id <id> --message "Any questions?"
@@ -24,6 +26,21 @@ openclaw voicecall dtmf --call-id <id> --digits "ww123456#"
openclaw voicecall end --call-id <id>
```
`setup` prints human-readable readiness checks by default. Use `--json` for
scripts:
```bash
openclaw voicecall setup --json
```
`smoke` runs the same readiness checks. It will not place a real phone call
unless both `--to` and `--yes` are present:
```bash
openclaw voicecall smoke --to "+15555550123" # dry run
openclaw voicecall smoke --to "+15555550123" --yes # live notify call
```
## Exposing webhooks (Tailscale)
```bash

View File

@@ -141,6 +141,31 @@ Set config under `plugins.entries.voice-call.config`:
}
```
Check setup before testing with a real provider:
```bash
openclaw voicecall setup
```
The default output is readable in chat logs and terminal sessions. It checks
whether the plugin is enabled, the provider and credentials are present, webhook
exposure is configured, and only one audio mode is active. Use
`openclaw voicecall setup --json` for scripts.
For a no-surprises smoke test, run:
```bash
openclaw voicecall smoke
openclaw voicecall smoke --to "+15555550123"
```
The second command is still a dry run. Add `--yes` to place a short outbound
notify call:
```bash
openclaw voicecall smoke --to "+15555550123" --yes
```
Notes:
- Twilio/Telnyx require a **publicly reachable** webhook URL.

View File

@@ -105,7 +105,10 @@ function setup(config: Record<string, unknown>): Registered {
return { methods, tools, service };
}
async function registerVoiceCallCli(program: Command) {
async function registerVoiceCallCli(
program: Command,
pluginConfig: Record<string, unknown> = { provider: "mock" },
) {
const { register } = plugin as unknown as {
register: RegisterVoiceCall;
};
@@ -116,7 +119,7 @@ async function registerVoiceCallCli(program: Command) {
version: "0",
source: "test",
config: {},
pluginConfig: { provider: "mock" },
pluginConfig,
runtime: { tts: { textToSpeechTelephony: vi.fn() } },
logger: noopLogger,
registerGatewayMethod: () => {},
@@ -366,4 +369,104 @@ describe("voice-call plugin", () => {
stdout.restore();
}
});
it("CLI setup prints human-readable checks by default", async () => {
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program, {
provider: "twilio",
fromNumber: "+15550001234",
publicUrl: "https://voice.example.com/voice/webhook",
twilio: {
accountSid: "AC123",
authToken: "token",
},
});
try {
await program.parseAsync(["voicecall", "setup"], { from: "user" });
expect(stdout.output()).toContain("Voice Call setup: OK");
expect(stdout.output()).toContain("OK provider: Provider configured: twilio");
} finally {
stdout.restore();
}
});
it("CLI setup preserves JSON output with --json", async () => {
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program, {
provider: "twilio",
fromNumber: "+15550001234",
twilio: {
accountSid: "AC123",
authToken: "token",
},
});
try {
await program.parseAsync(["voicecall", "setup", "--json"], { from: "user" });
const parsed = JSON.parse(stdout.output()) as {
ok?: boolean;
checks?: Array<{ id: string; ok: boolean }>;
};
expect(parsed.ok).toBe(false);
expect(parsed.checks).toContainEqual(
expect.objectContaining({ id: "webhook-exposure", ok: false }),
);
} finally {
stdout.restore();
}
});
it("CLI smoke dry-runs a live call unless --yes is passed", async () => {
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program, {
provider: "twilio",
fromNumber: "+15550001234",
publicUrl: "https://voice.example.com/voice/webhook",
twilio: {
accountSid: "AC123",
authToken: "token",
},
});
try {
await program.parseAsync(["voicecall", "smoke", "--to", "+15550009999"], {
from: "user",
});
expect(stdout.output()).toContain("live-call: dry run for +15550009999");
expect(runtimeStub.manager.initiateCall).not.toHaveBeenCalled();
} finally {
stdout.restore();
}
});
it("CLI smoke can place a live notify call with --yes", async () => {
const program = new Command();
const stdout = captureStdout();
await registerVoiceCallCli(program, {
provider: "twilio",
fromNumber: "+15550001234",
publicUrl: "https://voice.example.com/voice/webhook",
twilio: {
accountSid: "AC123",
authToken: "token",
},
});
try {
await program.parseAsync(["voicecall", "smoke", "--to", "+15550009999", "--yes"], {
from: "user",
});
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledWith("+15550009999", undefined, {
message: "OpenClaw voice call smoke test.",
mode: "notify",
});
expect(stdout.output()).toContain("live-call: started call-1");
} finally {
stdout.restore();
}
});
});

View File

@@ -5,7 +5,7 @@ import { format } from "node:util";
import type { Command } from "commander";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime";
import { sleep } from "../api.js";
import type { VoiceCallConfig } from "./config.js";
import { validateProviderConfig, type VoiceCallConfig } from "./config.js";
import type { VoiceCallRuntime } from "./runtime.js";
import { resolveUserPath } from "./utils.js";
import {
@@ -20,6 +20,17 @@ type Logger = {
error: (message: string) => void;
};
type SetupCheck = {
id: string;
ok: boolean;
message: string;
};
type SetupStatus = {
ok: boolean;
checks: SetupCheck[];
};
function writeStdoutLine(...values: unknown[]): void {
process.stdout.write(`${format(...values)}\n`);
}
@@ -95,6 +106,76 @@ function resolveCallMode(mode?: string): "notify" | "conversation" | undefined {
return mode === "notify" || mode === "conversation" ? mode : undefined;
}
function hasPublicExposure(config: VoiceCallConfig): boolean {
return Boolean(
config.publicUrl ||
(config.tunnel?.provider && config.tunnel.provider !== "none") ||
(config.tailscale?.mode && config.tailscale.mode !== "off"),
);
}
function buildSetupStatus(config: VoiceCallConfig): SetupStatus {
const validation = validateProviderConfig(config);
const checks: SetupCheck[] = [
{
id: "plugin-enabled",
ok: config.enabled,
message: config.enabled
? "Voice Call plugin is enabled"
: "Enable plugins.entries.voice-call.enabled",
},
{
id: "provider",
ok: Boolean(config.provider),
message: config.provider
? `Provider configured: ${config.provider}`
: "Set plugins.entries.voice-call.config.provider",
},
{
id: "provider-config",
ok: validation.valid,
message: validation.valid
? "Provider credentials/config look complete"
: validation.errors.join("; "),
},
{
id: "webhook-exposure",
ok: config.provider === "mock" || hasPublicExposure(config),
message:
config.provider === "mock"
? "Mock provider does not need a public webhook"
: hasPublicExposure(config)
? config.publicUrl
? `Public webhook URL configured: ${config.publicUrl}`
: "Webhook exposure configured through tunnel or Tailscale"
: "Set publicUrl or configure tunnel/tailscale so the provider can reach webhooks",
},
{
id: "mode",
ok: !(config.streaming.enabled && config.realtime.enabled),
message:
config.streaming.enabled && config.realtime.enabled
? "streaming.enabled and realtime.enabled cannot both be true"
: config.realtime.enabled
? `Realtime voice enabled (${config.realtime.provider ?? "first registered provider"})`
: config.streaming.enabled
? `Streaming transcription enabled (${config.streaming.provider ?? "first registered provider"})`
: "Notify/conversation calls use normal TTS/STT flow",
},
];
return {
ok: checks.every((check) => check.ok),
checks,
};
}
function writeSetupStatus(status: SetupStatus): void {
writeStdoutLine("Voice Call setup: %s", status.ok ? "OK" : "needs attention");
for (const check of status.checks) {
writeStdoutLine("%s %s: %s", check.ok ? "OK" : "FAIL", check.id, check.message);
}
}
async function initiateCallAndPrintId(params: {
runtime: VoiceCallRuntime;
to: string;
@@ -123,6 +204,84 @@ export function registerVoiceCallCli(params: {
.description("Voice call utilities")
.addHelpText("after", () => `\nDocs: https://docs.openclaw.ai/cli/voicecall\n`);
root
.command("setup")
.description("Show Voice Call provider and webhook setup status")
.option("--json", "Print machine-readable JSON")
.action((options: { json?: boolean }) => {
const status = buildSetupStatus(config);
if (options.json) {
writeStdoutJson(status);
return;
}
writeSetupStatus(status);
});
root
.command("smoke")
.description("Check Voice Call readiness and optionally place a short outbound test call")
.option("-t, --to <phone>", "Phone number to call for a live smoke")
.option(
"--message <text>",
"Message to speak during the smoke call",
"OpenClaw voice call smoke test.",
)
.option("--mode <mode>", "Call mode: notify or conversation", "notify")
.option("--yes", "Actually place the live outbound call")
.option("--json", "Print machine-readable JSON")
.action(
async (options: {
to?: string;
message?: string;
mode?: string;
yes?: boolean;
json?: boolean;
}) => {
const setup = buildSetupStatus(config);
if (!setup.ok) {
if (options.json) {
writeStdoutJson({ ok: false, setup });
} else {
writeSetupStatus(setup);
}
process.exitCode = 1;
return;
}
if (!options.to) {
if (options.json) {
writeStdoutJson({ ok: true, setup, liveCall: false });
} else {
writeSetupStatus(setup);
writeStdoutLine("live-call: skipped (pass --to and --yes to place one)");
}
return;
}
if (!options.yes) {
if (options.json) {
writeStdoutJson({ ok: true, setup, liveCall: false, wouldCall: options.to });
} else {
writeSetupStatus(setup);
writeStdoutLine("live-call: dry run for %s (add --yes to place it)", options.to);
}
return;
}
const rt = await ensureRuntime();
const result = await rt.manager.initiateCall(options.to, undefined, {
message: options.message,
mode: resolveCallMode(options.mode) ?? "notify",
});
if (!result.success) {
throw new Error(result.error || "smoke call failed");
}
if (options.json) {
writeStdoutJson({ ok: true, setup, liveCall: true, callId: result.callId });
return;
}
writeSetupStatus(setup);
writeStdoutLine("live-call: started %s", result.callId);
},
);
root
.command("call")
.description("Initiate an outbound voice call")