fix(voice-call): share webhook runtime across contexts

This commit is contained in:
Peter Steinberger
2026-04-25 03:09:33 +01:00
parent 250d13de53
commit ba4cd90dbc
3 changed files with 175 additions and 63 deletions

View File

@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
- Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka.
- Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete.
- Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler.
- Plugins/Voice Call: reuse the webhook runtime across in-process plugin contexts, avoiding `EADDRINUSE` when agent tools or CLI commands run while the Gateway already owns the voice webhook port. Fixes #58115. Thanks @sfbrian.
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.

View File

@@ -5,20 +5,9 @@ import { Command } from "commander";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { createTestPluginApi } from "../../test/helpers/plugins/plugin-api.ts";
import type { OpenClawPluginApi } from "./api.js";
import type { VoiceCallRuntime } from "./runtime-entry.js";
let runtimeStub: {
config: { toNumber?: string };
manager: {
initiateCall: ReturnType<typeof vi.fn>;
continueCall: ReturnType<typeof vi.fn>;
speak: ReturnType<typeof vi.fn>;
sendDtmf: ReturnType<typeof vi.fn>;
endCall: ReturnType<typeof vi.fn>;
getCall: ReturnType<typeof vi.fn>;
getCallByProviderCallId: ReturnType<typeof vi.fn>;
};
stop: ReturnType<typeof vi.fn>;
};
let runtimeStub: VoiceCallRuntime;
vi.mock("./runtime-entry.js", () => ({
createVoiceCallRuntime: vi.fn(async () => runtimeStub),
@@ -37,6 +26,7 @@ const noopLogger = {
type Registered = {
methods: Map<string, unknown>;
tools: unknown[];
service?: Parameters<OpenClawPluginApi["registerService"]>[0];
};
type RegisterVoiceCall = (api: Record<string, unknown>) => void;
type RegisterCliContext = {
@@ -57,9 +47,42 @@ function captureStdout() {
restore: () => writeSpy.mockRestore(),
};
}
function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
return {
config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"],
provider: {} as VoiceCallRuntime["provider"],
manager: {
initiateCall: vi.fn(async () => ({ callId, success: true })),
continueCall: vi.fn(async () => ({
success: true,
transcript: "hello",
})),
speak: vi.fn(async () => ({ success: true })),
sendDtmf: vi.fn(async () => ({ success: true })),
endCall: vi.fn(async () => ({ success: true })),
getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)),
getCallByProviderCallId: vi.fn(() => undefined),
} as unknown as VoiceCallRuntime["manager"],
webhookServer: {} as VoiceCallRuntime["webhookServer"],
webhookUrl: "http://127.0.0.1:3334/voice/webhook",
publicUrl: null,
stop: vi.fn(async () => {}),
};
}
function createServiceContext(): Parameters<NonNullable<Registered["service"]>["start"]>[0] {
return {
config: {},
stateDir: os.tmpdir(),
logger: noopLogger,
} as Parameters<NonNullable<Registered["service"]>["start"]>[0];
}
function setup(config: Record<string, unknown>): Registered {
const methods = new Map<string, unknown>();
const tools: unknown[] = [];
let service: Registered["service"];
const api = createTestPluginApi({
id: "voice-call",
name: "Voice Call",
@@ -73,11 +96,13 @@ function setup(config: Record<string, unknown>): Registered {
registerGatewayMethod: (method: string, handler: unknown) => methods.set(method, handler),
registerTool: (tool: unknown) => tools.push(tool),
registerCli: () => {},
registerService: () => {},
registerService: (registeredService) => {
service = registeredService;
},
resolvePath: (p: string) => p,
});
plugin.register(api);
return { methods, tools };
return { methods, tools, service };
}
async function registerVoiceCallCli(program: Command) {
@@ -114,26 +139,60 @@ describe("voice-call plugin", () => {
noopLogger.warn.mockClear();
noopLogger.error.mockClear();
noopLogger.debug.mockClear();
vi.mocked(createVoiceCallRuntime).mockClear();
runtimeStub = {
config: { toNumber: "+15550001234" },
manager: {
initiateCall: vi.fn(async () => ({ callId: "call-1", success: true })),
continueCall: vi.fn(async () => ({
success: true,
transcript: "hello",
})),
speak: vi.fn(async () => ({ success: true })),
sendDtmf: vi.fn(async () => ({ success: true })),
endCall: vi.fn(async () => ({ success: true })),
getCall: vi.fn((id: string) => (id === "call-1" ? { callId: "call-1" } : undefined)),
getCallByProviderCallId: vi.fn(() => undefined),
},
stop: vi.fn(async () => {}),
};
runtimeStub = createRuntimeStub();
vi.mocked(createVoiceCallRuntime).mockReset();
vi.mocked(createVoiceCallRuntime).mockImplementation(async () => runtimeStub);
});
afterEach(() => vi.restoreAllMocks());
afterEach(() => {
vi.restoreAllMocks();
delete (globalThis as Record<PropertyKey, unknown>)[Symbol.for("openclaw.voice-call.runtime")];
delete (globalThis as Record<PropertyKey, unknown>)[
Symbol.for("openclaw.voice-call.runtimePromise")
];
delete (globalThis as Record<PropertyKey, unknown>)[
Symbol.for("openclaw.voice-call.runtimeStopPromise")
];
});
it("reuses a started runtime across plugin registration contexts", async () => {
const first = setup({ provider: "mock" });
const second = setup({ provider: "mock" });
await first.service?.start(createServiceContext());
const handler = second.methods.get("voicecall.initiate") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const respond = vi.fn();
await handler?.({ params: { message: "Hi" }, respond });
expect(createVoiceCallRuntime).toHaveBeenCalledTimes(1);
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledTimes(1);
expect(respond).toHaveBeenCalledWith(true, { callId: "call-1", initiated: true });
});
it("creates a fresh shared runtime after service stop", async () => {
const first = setup({ provider: "mock" });
await first.service?.start(createServiceContext());
await first.service?.stop?.(createServiceContext());
runtimeStub = createRuntimeStub("call-2");
const second = setup({ provider: "mock" });
const handler = second.methods.get("voicecall.initiate") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const respond = vi.fn();
await handler?.({ params: { message: "Hi" }, respond });
expect(createVoiceCallRuntime).toHaveBeenCalledTimes(2);
expect(respond).toHaveBeenCalledWith(true, { callId: "call-2", initiated: true });
});
it("initiates a call via voicecall.initiate", async () => {
const { methods } = setup({ provider: "mock" });

View File

@@ -154,6 +154,24 @@ function asParamRecord(params: unknown): Record<string, unknown> {
: {};
}
const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime");
const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise");
const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise");
type VoiceCallRuntimeGlobalState = typeof globalThis & {
[VOICE_CALL_RUNTIME_KEY]?: VoiceCallRuntime | null;
[VOICE_CALL_RUNTIME_PROMISE_KEY]?: Promise<VoiceCallRuntime> | null;
[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]?: Promise<void> | null;
};
function getVoiceCallRuntimeGlobalState(): VoiceCallRuntimeGlobalState {
const state = globalThis as VoiceCallRuntimeGlobalState;
state[VOICE_CALL_RUNTIME_KEY] ??= null;
state[VOICE_CALL_RUNTIME_PROMISE_KEY] ??= null;
state[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] ??= null;
return state;
}
export default definePluginEntry({
id: "voice-call",
name: "Voice Call",
@@ -173,39 +191,60 @@ export default definePluginEntry({
}
}
let runtimePromise: Promise<VoiceCallRuntime> | null = null;
let runtime: VoiceCallRuntime | null = null;
const runtimeState = getVoiceCallRuntimeGlobalState();
const ensureRuntime = async () => {
const ensureRuntime = async (): Promise<VoiceCallRuntime> => {
if (!config.enabled) {
throw new Error("Voice call disabled in plugin config");
}
if (!validation.valid) {
throw new Error(validation.errors.join("; "));
}
if (runtime) {
return runtime;
while (true) {
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
continue;
}
const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
if (runtime) {
return runtime;
}
let runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
if (!runtimePromise) {
runtimePromise = createVoiceCallRuntime({
config,
coreConfig: api.config as CoreConfig,
fullConfig: api.config,
agentRuntime: api.runtime.agent,
ttsRuntime: api.runtime.tts,
logger: api.logger,
});
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = runtimePromise;
}
try {
const createdRuntime = await runtimePromise;
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
continue;
}
if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] !== runtimePromise) {
continue;
}
runtimeState[VOICE_CALL_RUNTIME_KEY] = createdRuntime;
return createdRuntime;
} catch (err) {
if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] === runtimePromise) {
// Reset shared state so the next call can retry instead of caching
// a rejected promise across plugin contexts. See: #32387, #58115.
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
}
throw err;
}
}
if (!runtimePromise) {
runtimePromise = createVoiceCallRuntime({
config,
coreConfig: api.config as CoreConfig,
fullConfig: api.config,
agentRuntime: api.runtime.agent,
ttsRuntime: api.runtime.tts,
logger: api.logger,
});
}
try {
runtime = await runtimePromise;
} catch (err) {
// Reset so the next call can retry instead of caching the
// rejected promise forever (which also leaves the port orphaned
// if the server started before the failure). See: #32387
runtimePromise = null;
throw err;
}
return runtime;
};
const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
@@ -574,15 +613,28 @@ export default definePluginEntry({
}
},
stop: async () => {
if (!runtimePromise) {
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
return;
}
try {
const rt = await runtimePromise;
const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
const runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
if (!runtime && !runtimePromise) {
return;
}
runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
const stopPromise = (async () => {
const rt = runtime ?? (await runtimePromise!);
await rt.stop();
})();
runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = stopPromise;
try {
await stopPromise;
} finally {
runtimePromise = null;
runtime = null;
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] === stopPromise) {
runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = null;
}
}
},
});