fix(voice-call): allow dedicated response agent

This commit is contained in:
Peter Steinberger
2026-04-25 03:31:31 +01:00
parent e442065970
commit 9a3dece879
7 changed files with 137 additions and 8 deletions

View File

@@ -80,6 +80,7 @@ Docs: https://docs.openclaw.ai
- Plugins/Voice Call: answer accepted Telnyx inbound Call Control legs on `call.initiated`, so webhooks that reach OpenClaw no longer leave the caller ringing until hangup. Fixes #58231 and #40131. Thanks @KonsultDigital.
- Plugins/Voice Call: coalesce concurrent webhook server starts on the same runtime instance, avoiding a second `listen()` bind when overlapping startup paths race. Thanks @education-01.
- Plugins/Voice Call: pin voice response sessions to `responseModel` before embedded agent runs, avoiding live-session model switch failures when the global default model differs. Fixes #60118. Thanks @xinbenlv.
- Plugins/Voice Call: add `agentId` for voice response generation, so phone calls can use a dedicated agent workspace instead of always routing through `main`. Fixes #42155. Thanks @TheOpie.
- Media tools: honor the configured web-fetch SSRF policy for media understanding, image/music/video generation references, and PDF inputs, so explicit RFC2544 opt-ins cover WebChat OSS uploads without weakening defaults. Fixes #71300. (#71321) Thanks @neeravmakwana.
- Agents/TTS: suppress successful spoken transcripts from verbose chat tool output when structured voice media is already queued, while preserving text output for non-builtin tool-name collisions. Fixes #71282. Thanks @neeravmakwana.
- Plugins/Google Meet: reuse existing Meet tabs and active sessions across harmless URL query differences, avoiding duplicate Chrome windows when agents retry a join. Thanks @steipete.

View File

@@ -74,6 +74,9 @@ Put under `plugins.entries.voice-call.config`:
defaultMode: "notify", // or "conversation"
},
// Optional response agent workspace. Defaults to "main".
agentId: "main",
streaming: {
enabled: true,
// optional; if omitted, Voice Call picks the first registered

View File

@@ -100,6 +100,11 @@ const voiceCallConfigSchema = {
advanced: true,
},
store: { label: "Call Log Store Path", advanced: true },
agentId: {
label: "Response Agent ID",
help: 'Agent workspace used for voice response generation. Defaults to "main".',
advanced: true,
},
responseModel: {
label: "Response Model",
help: "Optional override. Falls back to the runtime default model when unset.",

View File

@@ -314,4 +314,14 @@ describe("resolveVoiceCallConfig", () => {
expect(resolved.responseModel).toBeUndefined();
});
it("preserves the configured voice response agent id", () => {
const resolved = resolveVoiceCallConfig({
enabled: true,
provider: "mock",
agentId: "voice",
});
expect(resolved.agentId).toBe("voice");
});
});

View File

@@ -379,6 +379,9 @@ export const VoiceCallConfigSchema = z
/** Store path for call logs */
store: z.string().optional(),
/** Agent ID to use for voice response generation. Defaults to "main". */
agentId: z.string().min(1).optional(),
/** Optional model override for generating voice responses. */
responseModel: z.string().optional(),

View File

@@ -10,28 +10,55 @@ function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
payloads,
meta: { durationMs: 12, aborted: false },
}));
const resolveAgentDir = vi.fn((_cfg: CoreConfig, agentId: string) => {
return `/tmp/openclaw/agents/${agentId}`;
});
const resolveAgentWorkspaceDir = vi.fn((_cfg: CoreConfig, agentId: string) => {
return `/tmp/openclaw/workspace/${agentId}`;
});
const resolveAgentIdentity = vi.fn((_cfg: CoreConfig, agentId: string) => ({
name: `${agentId} tester`,
}));
const resolveStorePath = vi.fn((_store: string | undefined, params: { agentId?: string }) => {
return `/tmp/openclaw/${params.agentId ?? "main"}/sessions.json`;
});
const resolveSessionFilePath = vi.fn(
(_sessionId: string, _entry: unknown, params: { agentId?: string }) => {
return `/tmp/openclaw/${params.agentId ?? "main"}/sessions/session.jsonl`;
},
);
const runtime = {
defaults: {
provider: "together",
model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
},
resolveAgentDir: () => "/tmp/openclaw/agents/main",
resolveAgentWorkspaceDir: () => "/tmp/openclaw/workspace/main",
resolveAgentIdentity: () => ({ name: "tester" }),
resolveAgentDir,
resolveAgentWorkspaceDir,
resolveAgentIdentity,
resolveThinkingDefault: () => "off",
resolveAgentTimeoutMs: () => 30_000,
ensureAgentWorkspace: async () => {},
runEmbeddedPiAgent,
session: {
resolveStorePath: () => "/tmp/openclaw/sessions.json",
resolveStorePath,
loadSessionStore: () => sessionStore,
saveSessionStore,
resolveSessionFilePath: () => "/tmp/openclaw/sessions/session.jsonl",
resolveSessionFilePath,
},
} as unknown as CoreAgentDeps;
return { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore };
return {
runtime,
runEmbeddedPiAgent,
saveSessionStore,
sessionStore,
resolveAgentDir,
resolveAgentWorkspaceDir,
resolveAgentIdentity,
resolveStorePath,
resolveSessionFilePath,
};
}
function requireEmbeddedAgentArgs(runEmbeddedPiAgent: ReturnType<typeof vi.fn>) {
@@ -154,7 +181,7 @@ describe("generateVoiceResponse", () => {
modelOverride: "gpt-4.1-nano",
modelOverrideSource: "auto",
});
expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/sessions.json", sessionStore);
expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/main/sessions.json", sessionStore);
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openai",
@@ -163,4 +190,84 @@ describe("generateVoiceResponse", () => {
}),
);
});
it("uses the main agent workspace when voice config omits agentId", async () => {
const {
runtime,
runEmbeddedPiAgent,
resolveAgentDir,
resolveAgentWorkspaceDir,
resolveAgentIdentity,
resolveStorePath,
resolveSessionFilePath,
} = createAgentRuntime([{ text: '{"spoken":"Default agent."}' }]);
const coreConfig = {} as CoreConfig;
await generateVoiceResponse({
voiceConfig: VoiceCallConfigSchema.parse({ responseTimeoutMs: 5000 }),
coreConfig,
agentRuntime: runtime,
callId: "call-123",
from: "+15550001111",
transcript: [],
userMessage: "hello there",
});
expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "main" });
expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "main");
expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "main");
expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "main");
expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), {
agentId: "main",
});
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
agentDir: "/tmp/openclaw/agents/main",
workspaceDir: "/tmp/openclaw/workspace/main",
sessionFile: "/tmp/openclaw/main/sessions/session.jsonl",
}),
);
});
it("uses the configured voice response agent workspace", async () => {
const {
runtime,
runEmbeddedPiAgent,
resolveAgentDir,
resolveAgentWorkspaceDir,
resolveAgentIdentity,
resolveStorePath,
resolveSessionFilePath,
} = createAgentRuntime([{ text: '{"spoken":"Voice agent."}' }]);
const coreConfig = {} as CoreConfig;
const result = await generateVoiceResponse({
voiceConfig: VoiceCallConfigSchema.parse({
agentId: "voice",
responseTimeoutMs: 5000,
}),
coreConfig,
agentRuntime: runtime,
callId: "call-123",
from: "+15550001111",
transcript: [],
userMessage: "hello there",
});
expect(result.text).toBe("Voice agent.");
expect(resolveStorePath).toHaveBeenCalledWith(undefined, { agentId: "voice" });
expect(resolveAgentDir).toHaveBeenCalledWith(coreConfig, "voice");
expect(resolveAgentWorkspaceDir).toHaveBeenCalledWith(coreConfig, "voice");
expect(resolveAgentIdentity).toHaveBeenCalledWith(coreConfig, "voice");
expect(resolveSessionFilePath).toHaveBeenCalledWith(expect.any(String), expect.any(Object), {
agentId: "voice",
});
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
agentDir: "/tmp/openclaw/agents/voice",
workspaceDir: "/tmp/openclaw/workspace/voice",
sessionFile: "/tmp/openclaw/voice/sessions/session.jsonl",
}),
);
});
});

View File

@@ -189,7 +189,7 @@ export async function generateVoiceResponse(
// Build voice-specific session key based on phone number
const normalizedPhone = from.replace(/\D/g, "");
const sessionKey = `voice:${normalizedPhone}`;
const agentId = "main";
const agentId = voiceConfig.agentId ?? "main";
// Resolve paths
const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });