mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:10:43 +00:00
fix(voice-call): share webhook runtime across contexts
This commit is contained in:
@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/subagents: preserve thread-bound completion delivery by keeping the requester-agent announce path primary and falling back to direct thread sends only when the announce produces no visible output. (#71064) Thanks @DolencLuka.
|
||||
- Browser/tool: give Chrome MCP existing-session manage calls a longer default timeout, pass explicit tool timeouts through tab management, and recover stale selected-page MCP sessions instead of forcing a manual reset. Thanks @steipete.
|
||||
- Browser/sandbox: clean up idle tracked tabs opened by primary-agent browser sessions, while preserving active tab reuse and lifecycle cleanup for subagents, cron, and ACP sessions. Fixes #71165. Thanks @dwbutler.
|
||||
- Plugins/Voice Call: reuse the webhook runtime across in-process plugin contexts, avoiding `EADDRINUSE` when agent tools or CLI commands run while the Gateway already owns the voice webhook port. Fixes #58115. Thanks @sfbrian.
|
||||
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
|
||||
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
|
||||
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.
|
||||
|
||||
@@ -5,20 +5,9 @@ import { Command } from "commander";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { createTestPluginApi } from "../../test/helpers/plugins/plugin-api.ts";
|
||||
import type { OpenClawPluginApi } from "./api.js";
|
||||
import type { VoiceCallRuntime } from "./runtime-entry.js";
|
||||
|
||||
let runtimeStub: {
|
||||
config: { toNumber?: string };
|
||||
manager: {
|
||||
initiateCall: ReturnType<typeof vi.fn>;
|
||||
continueCall: ReturnType<typeof vi.fn>;
|
||||
speak: ReturnType<typeof vi.fn>;
|
||||
sendDtmf: ReturnType<typeof vi.fn>;
|
||||
endCall: ReturnType<typeof vi.fn>;
|
||||
getCall: ReturnType<typeof vi.fn>;
|
||||
getCallByProviderCallId: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
stop: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
let runtimeStub: VoiceCallRuntime;
|
||||
|
||||
vi.mock("./runtime-entry.js", () => ({
|
||||
createVoiceCallRuntime: vi.fn(async () => runtimeStub),
|
||||
@@ -37,6 +26,7 @@ const noopLogger = {
|
||||
type Registered = {
|
||||
methods: Map<string, unknown>;
|
||||
tools: unknown[];
|
||||
service?: Parameters<OpenClawPluginApi["registerService"]>[0];
|
||||
};
|
||||
type RegisterVoiceCall = (api: Record<string, unknown>) => void;
|
||||
type RegisterCliContext = {
|
||||
@@ -57,9 +47,42 @@ function captureStdout() {
|
||||
restore: () => writeSpy.mockRestore(),
|
||||
};
|
||||
}
|
||||
|
||||
function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
|
||||
return {
|
||||
config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"],
|
||||
provider: {} as VoiceCallRuntime["provider"],
|
||||
manager: {
|
||||
initiateCall: vi.fn(async () => ({ callId, success: true })),
|
||||
continueCall: vi.fn(async () => ({
|
||||
success: true,
|
||||
transcript: "hello",
|
||||
})),
|
||||
speak: vi.fn(async () => ({ success: true })),
|
||||
sendDtmf: vi.fn(async () => ({ success: true })),
|
||||
endCall: vi.fn(async () => ({ success: true })),
|
||||
getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)),
|
||||
getCallByProviderCallId: vi.fn(() => undefined),
|
||||
} as unknown as VoiceCallRuntime["manager"],
|
||||
webhookServer: {} as VoiceCallRuntime["webhookServer"],
|
||||
webhookUrl: "http://127.0.0.1:3334/voice/webhook",
|
||||
publicUrl: null,
|
||||
stop: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
function createServiceContext(): Parameters<NonNullable<Registered["service"]>["start"]>[0] {
|
||||
return {
|
||||
config: {},
|
||||
stateDir: os.tmpdir(),
|
||||
logger: noopLogger,
|
||||
} as Parameters<NonNullable<Registered["service"]>["start"]>[0];
|
||||
}
|
||||
|
||||
function setup(config: Record<string, unknown>): Registered {
|
||||
const methods = new Map<string, unknown>();
|
||||
const tools: unknown[] = [];
|
||||
let service: Registered["service"];
|
||||
const api = createTestPluginApi({
|
||||
id: "voice-call",
|
||||
name: "Voice Call",
|
||||
@@ -73,11 +96,13 @@ function setup(config: Record<string, unknown>): Registered {
|
||||
registerGatewayMethod: (method: string, handler: unknown) => methods.set(method, handler),
|
||||
registerTool: (tool: unknown) => tools.push(tool),
|
||||
registerCli: () => {},
|
||||
registerService: () => {},
|
||||
registerService: (registeredService) => {
|
||||
service = registeredService;
|
||||
},
|
||||
resolvePath: (p: string) => p,
|
||||
});
|
||||
plugin.register(api);
|
||||
return { methods, tools };
|
||||
return { methods, tools, service };
|
||||
}
|
||||
|
||||
async function registerVoiceCallCli(program: Command) {
|
||||
@@ -114,26 +139,60 @@ describe("voice-call plugin", () => {
|
||||
noopLogger.warn.mockClear();
|
||||
noopLogger.error.mockClear();
|
||||
noopLogger.debug.mockClear();
|
||||
vi.mocked(createVoiceCallRuntime).mockClear();
|
||||
runtimeStub = {
|
||||
config: { toNumber: "+15550001234" },
|
||||
manager: {
|
||||
initiateCall: vi.fn(async () => ({ callId: "call-1", success: true })),
|
||||
continueCall: vi.fn(async () => ({
|
||||
success: true,
|
||||
transcript: "hello",
|
||||
})),
|
||||
speak: vi.fn(async () => ({ success: true })),
|
||||
sendDtmf: vi.fn(async () => ({ success: true })),
|
||||
endCall: vi.fn(async () => ({ success: true })),
|
||||
getCall: vi.fn((id: string) => (id === "call-1" ? { callId: "call-1" } : undefined)),
|
||||
getCallByProviderCallId: vi.fn(() => undefined),
|
||||
},
|
||||
stop: vi.fn(async () => {}),
|
||||
};
|
||||
runtimeStub = createRuntimeStub();
|
||||
vi.mocked(createVoiceCallRuntime).mockReset();
|
||||
vi.mocked(createVoiceCallRuntime).mockImplementation(async () => runtimeStub);
|
||||
});
|
||||
|
||||
afterEach(() => vi.restoreAllMocks());
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
delete (globalThis as Record<PropertyKey, unknown>)[Symbol.for("openclaw.voice-call.runtime")];
|
||||
delete (globalThis as Record<PropertyKey, unknown>)[
|
||||
Symbol.for("openclaw.voice-call.runtimePromise")
|
||||
];
|
||||
delete (globalThis as Record<PropertyKey, unknown>)[
|
||||
Symbol.for("openclaw.voice-call.runtimeStopPromise")
|
||||
];
|
||||
});
|
||||
|
||||
it("reuses a started runtime across plugin registration contexts", async () => {
|
||||
const first = setup({ provider: "mock" });
|
||||
const second = setup({ provider: "mock" });
|
||||
|
||||
await first.service?.start(createServiceContext());
|
||||
const handler = second.methods.get("voicecall.initiate") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
await handler?.({ params: { message: "Hi" }, respond });
|
||||
|
||||
expect(createVoiceCallRuntime).toHaveBeenCalledTimes(1);
|
||||
expect(runtimeStub.manager.initiateCall).toHaveBeenCalledTimes(1);
|
||||
expect(respond).toHaveBeenCalledWith(true, { callId: "call-1", initiated: true });
|
||||
});
|
||||
|
||||
it("creates a fresh shared runtime after service stop", async () => {
|
||||
const first = setup({ provider: "mock" });
|
||||
await first.service?.start(createServiceContext());
|
||||
await first.service?.stop?.(createServiceContext());
|
||||
|
||||
runtimeStub = createRuntimeStub("call-2");
|
||||
const second = setup({ provider: "mock" });
|
||||
const handler = second.methods.get("voicecall.initiate") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
await handler?.({ params: { message: "Hi" }, respond });
|
||||
|
||||
expect(createVoiceCallRuntime).toHaveBeenCalledTimes(2);
|
||||
expect(respond).toHaveBeenCalledWith(true, { callId: "call-2", initiated: true });
|
||||
});
|
||||
|
||||
it("initiates a call via voicecall.initiate", async () => {
|
||||
const { methods } = setup({ provider: "mock" });
|
||||
|
||||
@@ -154,6 +154,24 @@ function asParamRecord(params: unknown): Record<string, unknown> {
|
||||
: {};
|
||||
}
|
||||
|
||||
const VOICE_CALL_RUNTIME_KEY = Symbol.for("openclaw.voice-call.runtime");
|
||||
const VOICE_CALL_RUNTIME_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimePromise");
|
||||
const VOICE_CALL_RUNTIME_STOP_PROMISE_KEY = Symbol.for("openclaw.voice-call.runtimeStopPromise");
|
||||
|
||||
type VoiceCallRuntimeGlobalState = typeof globalThis & {
|
||||
[VOICE_CALL_RUNTIME_KEY]?: VoiceCallRuntime | null;
|
||||
[VOICE_CALL_RUNTIME_PROMISE_KEY]?: Promise<VoiceCallRuntime> | null;
|
||||
[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]?: Promise<void> | null;
|
||||
};
|
||||
|
||||
function getVoiceCallRuntimeGlobalState(): VoiceCallRuntimeGlobalState {
|
||||
const state = globalThis as VoiceCallRuntimeGlobalState;
|
||||
state[VOICE_CALL_RUNTIME_KEY] ??= null;
|
||||
state[VOICE_CALL_RUNTIME_PROMISE_KEY] ??= null;
|
||||
state[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] ??= null;
|
||||
return state;
|
||||
}
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "voice-call",
|
||||
name: "Voice Call",
|
||||
@@ -173,39 +191,60 @@ export default definePluginEntry({
|
||||
}
|
||||
}
|
||||
|
||||
let runtimePromise: Promise<VoiceCallRuntime> | null = null;
|
||||
let runtime: VoiceCallRuntime | null = null;
|
||||
const runtimeState = getVoiceCallRuntimeGlobalState();
|
||||
|
||||
const ensureRuntime = async () => {
|
||||
const ensureRuntime = async (): Promise<VoiceCallRuntime> => {
|
||||
if (!config.enabled) {
|
||||
throw new Error("Voice call disabled in plugin config");
|
||||
}
|
||||
if (!validation.valid) {
|
||||
throw new Error(validation.errors.join("; "));
|
||||
}
|
||||
if (runtime) {
|
||||
return runtime;
|
||||
|
||||
while (true) {
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
|
||||
await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
|
||||
continue;
|
||||
}
|
||||
|
||||
const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
|
||||
if (runtime) {
|
||||
return runtime;
|
||||
}
|
||||
|
||||
let runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
|
||||
if (!runtimePromise) {
|
||||
runtimePromise = createVoiceCallRuntime({
|
||||
config,
|
||||
coreConfig: api.config as CoreConfig,
|
||||
fullConfig: api.config,
|
||||
agentRuntime: api.runtime.agent,
|
||||
ttsRuntime: api.runtime.tts,
|
||||
logger: api.logger,
|
||||
});
|
||||
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = runtimePromise;
|
||||
}
|
||||
|
||||
try {
|
||||
const createdRuntime = await runtimePromise;
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
|
||||
continue;
|
||||
}
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] !== runtimePromise) {
|
||||
continue;
|
||||
}
|
||||
runtimeState[VOICE_CALL_RUNTIME_KEY] = createdRuntime;
|
||||
return createdRuntime;
|
||||
} catch (err) {
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] === runtimePromise) {
|
||||
// Reset shared state so the next call can retry instead of caching
|
||||
// a rejected promise across plugin contexts. See: #32387, #58115.
|
||||
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
|
||||
runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (!runtimePromise) {
|
||||
runtimePromise = createVoiceCallRuntime({
|
||||
config,
|
||||
coreConfig: api.config as CoreConfig,
|
||||
fullConfig: api.config,
|
||||
agentRuntime: api.runtime.agent,
|
||||
ttsRuntime: api.runtime.tts,
|
||||
logger: api.logger,
|
||||
});
|
||||
}
|
||||
try {
|
||||
runtime = await runtimePromise;
|
||||
} catch (err) {
|
||||
// Reset so the next call can retry instead of caching the
|
||||
// rejected promise forever (which also leaves the port orphaned
|
||||
// if the server started before the failure). See: #32387
|
||||
runtimePromise = null;
|
||||
throw err;
|
||||
}
|
||||
return runtime;
|
||||
};
|
||||
|
||||
const sendError = (respond: (ok: boolean, payload?: unknown) => void, err: unknown) => {
|
||||
@@ -574,15 +613,28 @@ export default definePluginEntry({
|
||||
}
|
||||
},
|
||||
stop: async () => {
|
||||
if (!runtimePromise) {
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY]) {
|
||||
await runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY];
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const rt = await runtimePromise;
|
||||
const runtime = runtimeState[VOICE_CALL_RUNTIME_KEY];
|
||||
const runtimePromise = runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY];
|
||||
if (!runtime && !runtimePromise) {
|
||||
return;
|
||||
}
|
||||
runtimeState[VOICE_CALL_RUNTIME_KEY] = null;
|
||||
runtimeState[VOICE_CALL_RUNTIME_PROMISE_KEY] = null;
|
||||
const stopPromise = (async () => {
|
||||
const rt = runtime ?? (await runtimePromise!);
|
||||
await rt.stop();
|
||||
})();
|
||||
runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = stopPromise;
|
||||
try {
|
||||
await stopPromise;
|
||||
} finally {
|
||||
runtimePromise = null;
|
||||
runtime = null;
|
||||
if (runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] === stopPromise) {
|
||||
runtimeState[VOICE_CALL_RUNTIME_STOP_PROMISE_KEY] = null;
|
||||
}
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user