From 9a3dece879a78d9fe40608073493f976059cb43c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 03:31:31 +0100 Subject: [PATCH] fix(voice-call): allow dedicated response agent --- CHANGELOG.md | 1 + extensions/voice-call/README.md | 3 + extensions/voice-call/index.ts | 5 + extensions/voice-call/src/config.test.ts | 10 ++ extensions/voice-call/src/config.ts | 3 + .../voice-call/src/response-generator.test.ts | 121 +++++++++++++++++- .../voice-call/src/response-generator.ts | 2 +- 7 files changed, 137 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d7c181e978..77b0f0d9714 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,7 @@ Docs: https://docs.openclaw.ai - Plugins/Voice Call: answer accepted Telnyx inbound Call Control legs on `call.initiated`, so webhooks that reach OpenClaw no longer leave the caller ringing until hangup. Fixes #58231 and #40131. Thanks @KonsultDigital. - Plugins/Voice Call: coalesce concurrent webhook server starts on the same runtime instance, avoiding a second `listen()` bind when overlapping startup paths race. Thanks @education-01. - Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv. +- Plugins/Voice Call: add `agentId` for voice response generation, so phone calls can use a dedicated agent workspace instead of always routing through `main`. Fixes #42155. Thanks @TheOpie. - Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana. - Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana. - Plugins/Google Meet: reuse existing Meet tabs and active sessions across harmless URL query differences, avoiding duplicate Chrome windows when agents retry a join. Thanks @steipete. diff --git a/extensions/voice-call/README.md b/extensions/voice-call/README.md index ce7736a7432..0f25e83ea30 100644 --- a/extensions/voice-call/README.md +++ b/extensions/voice-call/README.md @@ -74,6 +74,9 @@ Put under `plugins.entries.voice-call.config`: defaultMode: "notify", // or "conversation" }, + // Optional response agent workspace. Defaults to "main". + agentId: "main", + streaming: { enabled: true, // optional; if omitted, Voice Call picks the first registered diff --git a/extensions/voice-call/index.ts b/extensions/voice-call/index.ts index f487b730541..b416fedbc82 100644 --- a/extensions/voice-call/index.ts +++ b/extensions/voice-call/index.ts @@ -100,6 +100,11 @@ const voiceCallConfigSchema = { advanced: true, }, store: { label: "Call Log Store Path", advanced: true }, + agentId: { + label: "Response Agent ID", + help: 'Agent workspace used for voice response generation. Defaults to "main".', + advanced: true, + }, responseModel: { label: "Response Model", help: "Optional override. Falls back to the runtime default model when unset.", diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 0bac83934dc..60f8d9c4e46 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -314,4 +314,14 @@ describe("resolveVoiceCallConfig", () => { expect(resolved.responseModel).toBeUndefined(); }); + + it("preserves the configured voice response agent id", () => { + const resolved = resolveVoiceCallConfig({ + enabled: true, + provider: "mock", + agentId: "voice", + }); + + expect(resolved.agentId).toBe("voice"); + }); }); diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index 18ae073d384..565bd8e2112 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -379,6 +379,9 @@ export const VoiceCallConfigSchema = z /** Store path for call logs */ store: z.string().optional(), + /** Agent ID to use for voice response generation. Defaults to "main". */ + agentId: z.string().min(1).optional(), + /** Optional model override for generating voice responses. */ responseModel: z.string().optional(), diff --git a/extensions/voice-call/src/response-generator.test.ts b/extensions/voice-call/src/response-generator.test.ts index 07ad6eecba4..b686eeab91b 100644 --- a/extensions/voice-call/src/response-generator.test.ts +++ b/extensions/voice-call/src/response-generator.test.ts @@ -10,28 +10,55 @@ function createAgentRuntime(payloads: Array>) { payloads, meta: { durationMs: 12, aborted: false }, })); + const resolveAgentDir = vi.fn((_cfg: CoreConfig, agentId: string) => { + return `/tmp/openclaw/agents/${agentId}`; + }); + const resolveAgentWorkspaceDir = vi.fn((_cfg: CoreConfig, agentId: string) => { + return `/tmp/openclaw/workspace/${agentId}`; + }); + const resolveAgentIdentity = vi.fn((_cfg: CoreConfig, agentId: string) => ({ + name: `${agentId} tester`, + })); + const resolveStorePath = vi.fn((_store: string | undefined, params: { agentId?: string }) => { + return `/tmp/openclaw/${params.agentId ?? "main"}/sessions.json`; + }); + const resolveSessionFilePath = vi.fn( + (_sessionId: string, _entry: unknown, params: { agentId?: string }) => { + return `/tmp/openclaw/${params.agentId ?? "main"}/sessions/session.jsonl`; + }, + ); const runtime = { defaults: { provider: "together", model: "Qwen/Qwen2.5-7B-Instruct-Turbo", }, - resolveAgentDir: () => "/tmp/openclaw/agents/main", - resolveAgentWorkspaceDir: () => "/tmp/openclaw/workspace/main", - resolveAgentIdentity: () => ({ name: "tester" }), + resolveAgentDir, + resolveAgentWorkspaceDir, + resolveAgentIdentity, resolveThinkingDefault: () => "off", resolveAgentTimeoutMs: () => 30_000, ensureAgentWorkspace: async () => {}, runEmbeddedPiAgent, session: { - resolveStorePath: () => "/tmp/openclaw/sessions.json", + resolveStorePath, loadSessionStore: () => sessionStore, saveSessionStore, - resolveSessionFilePath: () => "/tmp/openclaw/sessions/session.jsonl", + resolveSessionFilePath, }, } as unknown as CoreAgentDeps; - return { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore }; + return { + runtime, + runEmbeddedPiAgent, + saveSessionStore, + sessionStore, + resolveAgentDir, + resolveAgentWorkspaceDir, + resolveAgentIdentity, + resolveStorePath, + resolveSessionFilePath, + }; } function requireEmbeddedAgentArgs(runEmbeddedPiAgent: ReturnType) { @@ -154,7 +181,7 @@ describe("generateVoiceResponse", () => { modelOverride: "gpt-4.1-nano", modelOverrideSource: "auto", }); - expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/sessions.json", sessionStore); + expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/main/sessions.json", sessionStore); expect(runEmbeddedPiAgent).toHaveBeenCalledWith( expect.objectContaining({ provider: "openai", @@ -163,4 +190,84 @@ describe("generateVoiceResponse", () => { }), ); }); + + it("uses the main agent workspace when voice config omits agentId", async () => { + const { + runtime, + runEmbeddedPiAgent, + resolveAgentDir, + resolveAgentWorkspaceDir, + resolveAgentIdentity, + resolveStorePath, + resolveSessionFilePath, + } = createAgentRuntime([{ text: '{"spoken":"Default agent."}' }]); + const coreConfig = {} as CoreConfig; + + await generateVoiceResponse({ + voiceConfig: VoiceCallConfigSchema.parse({ responseTimeoutMs: 5000 }), + coreConfig, + agentRuntime: runtime, + callId: "call-123", + from: "+15550001111", + transcript: [], + userMessage: "hello there", + }); + + expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "main" }); + expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "main"); + expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "main"); + expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "main"); + expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), { + agentId: "main", + }); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + agentDir: "/tmp/openclaw/agents/main", + workspaceDir: "/tmp/openclaw/workspace/main", + sessionFile: "/tmp/openclaw/main/sessions/session.jsonl", + }), + ); + }); + + it("uses the configured voice response agent workspace", async () => { + const { + runtime, + runEmbeddedPiAgent, + resolveAgentDir, + resolveAgentWorkspaceDir, + resolveAgentIdentity, + resolveStorePath, + resolveSessionFilePath, + } = createAgentRuntime([{ text: '{"spoken":"Voice agent."}' }]); + const coreConfig = {} as CoreConfig; + + const result = await generateVoiceResponse({ + voiceConfig: VoiceCallConfigSchema.parse({ + agentId: "voice", + responseTimeoutMs: 5000, + }), + coreConfig, + agentRuntime: runtime, + callId: "call-123", + from: "+15550001111", + transcript: [], + userMessage: "hello there", + }); + + expect(result.text).toBe("Voice agent."); + expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "voice" }); + expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "voice"); + expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "voice"); + expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "voice"); + expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), { + agentId: "voice", + }); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + agentDir: "/tmp/openclaw/agents/voice", + workspaceDir: "/tmp/openclaw/workspace/voice", + sessionFile: "/tmp/openclaw/voice/sessions/session.jsonl", + }), + ); + }); }); diff --git a/extensions/voice-call/src/response-generator.ts b/extensions/voice-call/src/response-generator.ts index f91d3184bd6..7dd39da8ef2 100644 --- a/extensions/voice-call/src/response-generator.ts +++ b/extensions/voice-call/src/response-generator.ts @@ -189,7 +189,7 @@ export async function generateVoiceResponse( // Build voice-specific session key based on phone number const normalizedPhone = from.replace(/\D/g, ""); const sessionKey = `voice:${normalizedPhone}`; - const agentId = "main"; + const agentId = voiceConfig.agentId ?? "main"; // Resolve paths const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });