diff --git a/CHANGELOG.md b/CHANGELOG.md index b98a5622eec..4a31a2529ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai - Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka. - Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete. - Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler. +- Plugins/Voice Call: reuse the webhook runtime across in-process plugin contexts, avoiding `EADDRINUSE` when agent tools or CLI commands run while the Gateway already owns the voice webhook port. Fixes #58115. Thanks @sfbrian. - Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv. - Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana. - Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana. diff --git a/extensions/voice-call/index.test.ts b/extensions/voice-call/index.test.ts index 090a5eb0fea..64930ec991f 100644 --- a/extensions/voice-call/index.test.ts +++ b/extensions/voice-call/index.test.ts @@ -5,20 +5,9 @@ import { Command } from "commander"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { createTestPluginApi } from "../../test/helpers/plugins/plugin-api.ts"; import type { OpenClawPluginApi } from "./api.js"; +import type { VoiceCallRuntime } from "./runtime-entry.js"; -let runtimeStub: { - config: { toNumber?: string }; - manager: { - initiateCall: ReturnType; - continueCall: ReturnType; - speak: ReturnType; - sendDtmf: ReturnType; - endCall: ReturnType; - getCall: ReturnType; - getCallByProviderCallId: ReturnType; - }; - stop: ReturnType; -}; +let runtimeStub: VoiceCallRuntime; vi.mock("./runtime-entry.js", () => ({ createVoiceCallRuntime: vi.fn(async () => runtimeStub), @@ -37,6 +26,7 @@ const noopLogger = { type Registered = { methods: Map; tools: unknown[]; + service?: Parameters[0]; }; type RegisterVoiceCall = (api: Record) => void; type RegisterCliContext = { @@ -57,9 +47,42 @@ function captureStdout() { restore: () => writeSpy.mockRestore(), }; } + +function createRuntimeStub(callId = "call-1"): VoiceCallRuntime { + return { + config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"], + provider: {} as VoiceCallRuntime["provider"], + manager: { + initiateCall: vi.fn(async () => ({ callId, success: true })), + continueCall: vi.fn(async () => ({ + success: true, + transcript: "hello", + })), + speak: vi.fn(async () => ({ success: true })), + sendDtmf: vi.fn(async () => ({ success: true })), + endCall: vi.fn(async () => ({ success: true })), + getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)), + getCallByProviderCallId: vi.fn(() => undefined), + } as unknown as VoiceCallRuntime["manager"], + webhookServer: {} as VoiceCallRuntime["webhookServer"], + webhookUrl: "http://127.0.0.1:3334/voice/webhook", + publicUrl: null, + stop: vi.fn(async () => {}), + }; +} + +function createServiceContext(): Parameters["start"]>[0] { + return { + config: {}, + stateDir: os.tmpdir(), + logger: noopLogger, + } as Parameters["start"]>[0]; +} + function setup(config: Record): Registered { const methods = new Map(); const tools: unknown[] = []; + let service: Registered["service"]; const api = createTestPluginApi({ id: "voice-call", name: "Voice Call", @@ -73,11 +96,13 @@ function setup(config: Record): Registered { registerGatewayMethod: (method: string, handler: unknown) => methods.set(method, handler), registerTool: (tool: unknown) => tools.push(tool), registerCli: () => {}, - registerService: () => {}, + registerService: (registeredService) => { + service = registeredService; + }, resolvePath: (p: string) => p, }); plugin.register(api); - return { methods, tools }; + return { methods, tools, service }; } async function registerVoiceCallCli(program: Command) { @@ -114,26 +139,60 @@ describe("voice-call plugin", () => { noopLogger.warn.mockClear(); noopLogger.error.mockClear(); noopLogger.debug.mockClear(); - vi.mocked(createVoiceCallRuntime).mockClear(); - runtimeStub = { - config: { toNumber: "+15550001234" }, - manager: { - initiateCall: vi.fn(async () => ({ callId: "call-1", success: true })), - continueCall: vi.fn(async () => ({ - success: true, - transcript: "hello", - })), - speak: vi.fn(async () => ({ success: true })), - sendDtmf: vi.fn(async () => ({ success: true })), - endCall: vi.fn(async () => ({ success: true })), - getCall: vi.fn((id: string) => (id === "call-1" ? { callId: "call-1" } : undefined)), - getCallByProviderCallId: vi.fn(() => undefined), - }, - stop: vi.fn(async () => {}), - }; + runtimeStub = createRuntimeStub(); + vi.mocked(createVoiceCallRuntime).mockReset(); + vi.mocked(createVoiceCallRuntime).mockImplementation(async () => runtimeStub); }); - afterEach(() => vi.restoreAllMocks()); + afterEach(() => { + vi.restoreAllMocks(); + delete (globalThis as Record)[Symbol.for("openclaw.voice-call.runtime")]; + delete (globalThis as Record)[ + Symbol.for("openclaw.voice-call.runtimePromise") + ]; + delete (globalThis as Record)[ + Symbol.for("openclaw.voice-call.runtimeStopPromise") + ]; + }); + + it("reuses a started runtime across plugin registration contexts", async () => { + const first = setup({ provider: "mock" }); + const second = setup({ provider: "mock" }); + + await first.service?.start(createServiceContext()); + const handler = second.methods.get("voicecall.initiate") as + | ((ctx: { + params: Record; + respond: ReturnType; + }) => Promise) + | undefined; + const respond = vi.fn(); + await handler?.({ params: { message: "Hi" }, respond }); + + expect(createVoiceCallRuntime).toHaveBeenCalledTimes(1); + expect(runtimeStub.manager.initiateCall).toHaveBeenCalledTimes(1); + expect(respond).toHaveBeenCalledWith(true, { callId: "call-1", initiated: true }); + }); + + it("creates a fresh shared runtime after service stop", async () => { + const first = setup({ provider: "mock" }); + await first.service?.start(createServiceContext()); + await first.service?.stop?.(createServiceContext()); + + runtimeStub = createRuntimeStub("call-2"); + const second = setup({ provider: "mock" }); + const handler = second.methods.get("voicecall.initiate") as + | ((ctx: { + params: Record; + respond: ReturnType; + }) => Promise) + | undefined; + const respond = vi.fn(); + await handler?.({ params: { message: "Hi" }, respond }); + + expect(createVoiceCallRuntime).toHaveBeenCalledTimes(2); + expect(respond).toHaveBeenCalledWith(true, { callId: "call-2", initiated: true }); + }); it("initiates a call via voicecall.initiate", async () => { const { methods } = setup({ provider: "mock" }); diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index c9381f9c11b..f487b730541 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -154,6 +154,24 @@ function asParamRecord(params: unknown): Record { : {}; } +const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime"); +const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise"); +const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise"); + +type VoiceCallRuntimeGlobalState = typeof globalThis & { + [VOICE_CALL_RUNTIME_KEY]?: VoiceCallRuntime | null; + [VOICE_CALL_RUNTIME_PROMISE_KEY]?: Promise | null; + [VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]?: Promise | null; +}; + +function getVoiceCallRuntimeGlobalState(): VoiceCallRuntimeGlobalState { + const state = globalThis as VoiceCallRuntimeGlobalState; + state[VOICE_CALL_RUNTIME_KEY] ??= null; + state[VOICE_CALL_RUNTIME_PROMISE_KEY] ??= null; + state[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] ??= null; + return state; +} + export default definePluginEntry({ id: "voice-call", name: "Voice Call", @@ -173,39 +191,60 @@ export default definePluginEntry({ } } - let runtimePromise: Promise | null = null; - let runtime: VoiceCallRuntime | null = null; + const runtimeState = getVoiceCallRuntimeGlobalState(); - const ensureRuntime = async () => { + const ensureRuntime = async (): Promise => { if (!config.enabled) { throw new Error("Voice call disabled in plugin config"); } if (!validation.valid) { throw new Error(validation.errors.join("; ")); } - if (runtime) { - return runtime; + + while (true) { + if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) { + await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]; + continue; + } + + const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY]; + if (runtime) { + return runtime; + } + + let runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY]; + if (!runtimePromise) { + runtimePromise = createVoiceCallRuntime({ + config, + coreConfig: api.config as CoreConfig, + fullConfig: api.config, + agentRuntime: api.runtime.agent, + ttsRuntime: api.runtime.tts, + logger: api.logger, + }); + runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = runtimePromise; + } + + try { + const createdRuntime = await runtimePromise; + if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) { + continue; + } + if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] !== runtimePromise) { + continue; + } + runtimeState[VOICE_CALL_RUNTIME_KEY] = createdRuntime; + return createdRuntime; + } catch (err) { + if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] === runtimePromise) { + // Reset shared state so the next call can retry instead of caching + // a rejected promise across plugin contexts. See: #32387, #58115. + runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null; + runtimeState[VOICE_CALL_RUNTIME_KEY] = null; + } + throw err; + } } - if (!runtimePromise) { - runtimePromise = createVoiceCallRuntime({ - config, - coreConfig: api.config as CoreConfig, - fullConfig: api.config, - agentRuntime: api.runtime.agent, - ttsRuntime: api.runtime.tts, - logger: api.logger, - }); - } - try { - runtime = await runtimePromise; - } catch (err) { - // Reset so the next call can retry instead of caching the - // rejected promise forever (which also leaves the port orphaned - // if the server started before the failure). See: #32387 - runtimePromise = null; - throw err; - } - return runtime; }; const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => { @@ -574,15 +613,28 @@ export default definePluginEntry({ } }, stop: async () => { - if (!runtimePromise) { + if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) { + await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]; return; } - try { - const rt = await runtimePromise; + const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY]; + const runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY]; + if (!runtime && !runtimePromise) { + return; + } + runtimeState[VOICE_CALL_RUNTIME_KEY] = null; + runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null; + const stopPromise = (async () => { + const rt = runtime ?? (await runtimePromise!); await rt.stop(); + })(); + runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = stopPromise; + try { + await stopPromise; } finally { - runtimePromise = null; - runtime = null; + if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] === stopPromise) { + runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = null; + } } }, });