diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a31a2529ef..58e57abb9f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ Docs: https://docs.openclaw.ai - Plugins/Google Meet: default Chrome realtime sessions to OpenAI plus SoX `rec`/`play` audio bridge commands, so the usual setup only needs the plugin enabled and `OPENAI_API_KEY`. Thanks @steipete. - Plugins/Google Meet: add a `chrome-node` transport so a paired macOS node, such as a Parallels VM, can own Chrome, BlackHole, and SoX while the Gateway machine keeps the agent and model key. Thanks @steipete. - Plugins/Voice Call: expose the shared `openclaw_agent_consult` realtime tool so live phone calls can ask the full OpenClaw agent for deeper/tool-backed answers. Thanks @steipete. +- Plugins/Voice Call: add `voicecall setup` and a dry-run-by-default `voicecall smoke` command so Twilio/provider readiness can be checked before placing a live test call. Thanks @steipete. - Plugins/Bonjour: move LAN Gateway discovery advertising into a default-enabled bundled plugin with its own `@homebridge/ciao` dependency, so users can disable Bonjour without cutting wide-area discovery. Thanks @vincentkoc. - Providers/Google: add a Gemini Live realtime voice provider for backend Voice Call and Google Meet audio bridges, with bidirectional audio and function-call support. Thanks @steipete. - Plugins/Google Meet: let realtime Meet sessions consult the full OpenClaw agent for deeper answers while staying in the live voice loop. Thanks @steipete. @@ -72,6 +73,7 @@ Docs: https://docs.openclaw.ai - Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv. - Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana. - Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana. +- Plugins/Google Meet: reuse existing Meet tabs and active sessions across harmless URL query differences, avoiding duplicate Chrome windows when agents retry a join. Thanks @steipete. - Gateway/sessions: recover main-agent turns interrupted by a gateway restart from stale transcript-lock evidence, avoiding stuck `status: "running"` sessions without broad post-boot transcript scans. Fixes #70555. Thanks @bitloi. - Codex approvals: keep command approval responses within Codex app-server `availableDecisions`, including deny/cancel fallbacks for prompts that do not offer `decline`. (#71338) Thanks @Lucenx9. - Plugins/Google Meet: include live Chrome-node readiness in `googlemeet setup` and document the Parallels recovery checks, so stale node tokens or disconnected VM browsers are visible before an agent opens a meeting. Thanks @steipete. diff --git a/docs/cli/voicecall.md b/docs/cli/voicecall.md index 17afcb5ef70..4ad165c6069 100644 --- a/docs/cli/voicecall.md +++ b/docs/cli/voicecall.md @@ -2,7 +2,7 @@ summary: "CLI reference for `openclaw voicecall` (voice-call plugin command surface)" read_when: - You use the voice-call plugin and want the CLI entry points - - You want quick examples for `voicecall call|continue|dtmf|status|tail|expose` + - You want quick examples for `voicecall setup|smoke|call|continue|dtmf|status|tail|expose` title: "Voicecall" --- @@ -17,6 +17,8 @@ Primary doc: ## Common commands ```bash +openclaw voicecall setup +openclaw voicecall smoke openclaw voicecall status --call-id openclaw voicecall call --to "+15555550123" --message "Hello" --mode notify openclaw voicecall continue --call-id --message "Any questions?" @@ -24,6 +26,21 @@ openclaw voicecall dtmf --call-id --digits "ww123456#" openclaw voicecall end --call-id ``` +`setup` prints human-readable readiness checks by default. Use `--json` for +scripts: + +```bash +openclaw voicecall setup --json +``` + +`smoke` runs the same readiness checks. It will not place a real phone call +unless both `--to` and `--yes` are present: + +```bash +openclaw voicecall smoke --to "+15555550123" # dry run +openclaw voicecall smoke --to "+15555550123" --yes # live notify call +``` + ## Exposing webhooks (Tailscale) ```bash diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 92abfd5f37c..38f544fd92f 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -141,6 +141,31 @@ Set config under `plugins.entries.voice-call.config`: } ``` +Check setup before testing with a real provider: + +```bash +openclaw voicecall setup +``` + +The default output is readable in chat logs and terminal sessions. It checks +whether the plugin is enabled, the provider and credentials are present, webhook +exposure is configured, and only one audio mode is active. Use +`openclaw voicecall setup --json` for scripts. + +For a no-surprises smoke test, run: + +```bash +openclaw voicecall smoke +openclaw voicecall smoke --to "+15555550123" +``` + +The second command is still a dry run. Add `--yes` to place a short outbound +notify call: + +```bash +openclaw voicecall smoke --to "+15555550123" --yes +``` + Notes: - Twilio/Telnyx require a **publicly reachable** webhook URL. diff --git a/extensions/voice-call/index.test.ts b/extensions/voice-call/index.test.ts index 64930ec991f..23bd45cb347 100644 --- a/extensions/voice-call/index.test.ts +++ b/extensions/voice-call/index.test.ts @@ -105,7 +105,10 @@ function setup(config: Record): Registered { return { methods, tools, service }; } -async function registerVoiceCallCli(program: Command) { +async function registerVoiceCallCli( + program: Command, + pluginConfig: Record = { provider: "mock" }, +) { const { register } = plugin as unknown as { register: RegisterVoiceCall; }; @@ -116,7 +119,7 @@ async function registerVoiceCallCli(program: Command) { version: "0", source: "test", config: {}, - pluginConfig: { provider: "mock" }, + pluginConfig, runtime: { tts: { textToSpeechTelephony: vi.fn() } }, logger: noopLogger, registerGatewayMethod: () => {}, @@ -366,4 +369,104 @@ describe("voice-call plugin", () => { stdout.restore(); } }); + + it("CLI setup prints human-readable checks by default", async () => { + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program, { + provider: "twilio", + fromNumber: "+15550001234", + publicUrl: "https://voice.example.com/voice/webhook", + twilio: { + accountSid: "AC123", + authToken: "token", + }, + }); + + try { + await program.parseAsync(["voicecall", "setup"], { from: "user" }); + expect(stdout.output()).toContain("Voice Call setup: OK"); + expect(stdout.output()).toContain("OK provider: Provider configured: twilio"); + } finally { + stdout.restore(); + } + }); + + it("CLI setup preserves JSON output with --json", async () => { + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program, { + provider: "twilio", + fromNumber: "+15550001234", + twilio: { + accountSid: "AC123", + authToken: "token", + }, + }); + + try { + await program.parseAsync(["voicecall", "setup", "--json"], { from: "user" }); + const parsed = JSON.parse(stdout.output()) as { + ok?: boolean; + checks?: Array<{ id: string; ok: boolean }>; + }; + expect(parsed.ok).toBe(false); + expect(parsed.checks).toContainEqual( + expect.objectContaining({ id: "webhook-exposure", ok: false }), + ); + } finally { + stdout.restore(); + } + }); + + it("CLI smoke dry-runs a live call unless --yes is passed", async () => { + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program, { + provider: "twilio", + fromNumber: "+15550001234", + publicUrl: "https://voice.example.com/voice/webhook", + twilio: { + accountSid: "AC123", + authToken: "token", + }, + }); + + try { + await program.parseAsync(["voicecall", "smoke", "--to", "+15550009999"], { + from: "user", + }); + expect(stdout.output()).toContain("live-call: dry run for +15550009999"); + expect(runtimeStub.manager.initiateCall).not.toHaveBeenCalled(); + } finally { + stdout.restore(); + } + }); + + it("CLI smoke can place a live notify call with --yes", async () => { + const program = new Command(); + const stdout = captureStdout(); + await registerVoiceCallCli(program, { + provider: "twilio", + fromNumber: "+15550001234", + publicUrl: "https://voice.example.com/voice/webhook", + twilio: { + accountSid: "AC123", + authToken: "token", + }, + }); + + try { + await program.parseAsync(["voicecall", "smoke", "--to", "+15550009999", "--yes"], { + from: "user", + }); + expect(runtimeStub.manager.initiateCall).toHaveBeenCalledWith("+15550009999", undefined, { + message: "OpenClaw voice call smoke test.", + mode: "notify", + }); + expect(stdout.output()).toContain("live-call: started call-1"); + } finally { + stdout.restore(); + } + }); }); diff --git a/extensions/voice-call/src/cli.ts b/extensions/voice-call/src/cli.ts index 79f48e1cffe..4d67af54f67 100644 --- a/extensions/voice-call/src/cli.ts +++ b/extensions/voice-call/src/cli.ts @@ -5,7 +5,7 @@ import { format } from "node:util"; import type { Command } from "commander"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/text-runtime"; import { sleep } from "../api.js"; -import type { VoiceCallConfig } from "./config.js"; +import { validateProviderConfig, type VoiceCallConfig } from "./config.js"; import type { VoiceCallRuntime } from "./runtime.js"; import { resolveUserPath } from "./utils.js"; import { @@ -20,6 +20,17 @@ type Logger = { error: (message: string) => void; }; +type SetupCheck = { + id: string; + ok: boolean; + message: string; +}; + +type SetupStatus = { + ok: boolean; + checks: SetupCheck[]; +}; + function writeStdoutLine(...values: unknown[]): void { process.stdout.write(`${format(...values)}\n`); } @@ -95,6 +106,76 @@ function resolveCallMode(mode?: string): "notify" | "conversation" | undefined { return mode === "notify" || mode === "conversation" ? mode : undefined; } +function hasPublicExposure(config: VoiceCallConfig): boolean { + return Boolean( + config.publicUrl || + (config.tunnel?.provider && config.tunnel.provider !== "none") || + (config.tailscale?.mode && config.tailscale.mode !== "off"), + ); +} + +function buildSetupStatus(config: VoiceCallConfig): SetupStatus { + const validation = validateProviderConfig(config); + const checks: SetupCheck[] = [ + { + id: "plugin-enabled", + ok: config.enabled, + message: config.enabled + ? "Voice Call plugin is enabled" + : "Enable plugins.entries.voice-call.enabled", + }, + { + id: "provider", + ok: Boolean(config.provider), + message: config.provider + ? `Provider configured: ${config.provider}` + : "Set plugins.entries.voice-call.config.provider", + }, + { + id: "provider-config", + ok: validation.valid, + message: validation.valid + ? "Provider credentials/config look complete" + : validation.errors.join("; "), + }, + { + id: "webhook-exposure", + ok: config.provider === "mock" || hasPublicExposure(config), + message: + config.provider === "mock" + ? "Mock provider does not need a public webhook" + : hasPublicExposure(config) + ? config.publicUrl + ? `Public webhook URL configured: ${config.publicUrl}` + : "Webhook exposure configured through tunnel or Tailscale" + : "Set publicUrl or configure tunnel/tailscale so the provider can reach webhooks", + }, + { + id: "mode", + ok: !(config.streaming.enabled && config.realtime.enabled), + message: + config.streaming.enabled && config.realtime.enabled + ? "streaming.enabled and realtime.enabled cannot both be true" + : config.realtime.enabled + ? `Realtime voice enabled (${config.realtime.provider ?? "first registered provider"})` + : config.streaming.enabled + ? `Streaming transcription enabled (${config.streaming.provider ?? "first registered provider"})` + : "Notify/conversation calls use normal TTS/STT flow", + }, + ]; + return { + ok: checks.every((check) => check.ok), + checks, + }; +} + +function writeSetupStatus(status: SetupStatus): void { + writeStdoutLine("Voice Call setup: %s", status.ok ? "OK" : "needs attention"); + for (const check of status.checks) { + writeStdoutLine("%s %s: %s", check.ok ? "OK" : "FAIL", check.id, check.message); + } +} + async function initiateCallAndPrintId(params: { runtime: VoiceCallRuntime; to: string; @@ -123,6 +204,84 @@ export function registerVoiceCallCli(params: { .description("Voice call utilities") .addHelpText("after", () => `\nDocs: https://docs.openclaw.ai/cli/voicecall\n`); + root + .command("setup") + .description("Show Voice Call provider and webhook setup status") + .option("--json", "Print machine-readable JSON") + .action((options: { json?: boolean }) => { + const status = buildSetupStatus(config); + if (options.json) { + writeStdoutJson(status); + return; + } + writeSetupStatus(status); + }); + + root + .command("smoke") + .description("Check Voice Call readiness and optionally place a short outbound test call") + .option("-t, --to ", "Phone number to call for a live smoke") + .option( + "--message ", + "Message to speak during the smoke call", + "OpenClaw voice call smoke test.", + ) + .option("--mode ", "Call mode: notify or conversation", "notify") + .option("--yes", "Actually place the live outbound call") + .option("--json", "Print machine-readable JSON") + .action( + async (options: { + to?: string; + message?: string; + mode?: string; + yes?: boolean; + json?: boolean; + }) => { + const setup = buildSetupStatus(config); + if (!setup.ok) { + if (options.json) { + writeStdoutJson({ ok: false, setup }); + } else { + writeSetupStatus(setup); + } + process.exitCode = 1; + return; + } + if (!options.to) { + if (options.json) { + writeStdoutJson({ ok: true, setup, liveCall: false }); + } else { + writeSetupStatus(setup); + writeStdoutLine("live-call: skipped (pass --to and --yes to place one)"); + } + return; + } + if (!options.yes) { + if (options.json) { + writeStdoutJson({ ok: true, setup, liveCall: false, wouldCall: options.to }); + } else { + writeSetupStatus(setup); + writeStdoutLine("live-call: dry run for %s (add --yes to place it)", options.to); + } + return; + } + const rt = await ensureRuntime(); + const result = await rt.manager.initiateCall(options.to, undefined, { + message: options.message, + mode: resolveCallMode(options.mode) ?? "notify", + }); + if (!result.success) { + throw new Error(result.error || "smoke call failed"); + } + if (options.json) { + writeStdoutJson({ ok: true, setup, liveCall: true, callId: result.callId }); + return; + } + writeSetupStatus(setup); + writeStdoutLine("live-call: started %s", result.callId); + }, + ); + root .command("call") .description("Initiate an outbound voice call")