From e4aab1419ab87404ffedb0fb1d61efe26ca5940d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 08:42:38 +0100 Subject: [PATCH] fix(voice-call): support per-call session scope --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 4 +- docs/plugins/voice-call.md | 11 ++- extensions/voice-call/README.md | 2 + extensions/voice-call/openclaw.plugin.json | 8 ++ extensions/voice-call/src/config.test.ts | 48 ++++++++++++ extensions/voice-call/src/config.ts | 23 ++++++ .../voice-call/src/manager/events.test.ts | 27 +++++++ extensions/voice-call/src/manager/events.ts | 6 ++ .../voice-call/src/manager/outbound.test.ts | 26 +++++++ extensions/voice-call/src/manager/outbound.ts | 9 ++- .../voice-call/src/response-generator.test.ts | 31 ++++++++ .../voice-call/src/response-generator.ts | 32 +++++--- extensions/voice-call/src/runtime.test.ts | 74 +++++++++++++++++++ extensions/voice-call/src/runtime.ts | 11 ++- extensions/voice-call/src/test-fixtures.ts | 1 + extensions/voice-call/src/webhook.ts | 1 + 17 files changed, 298 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b660fee438a..ffc1c433160 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI: resolve `keychain::` `OPENAI_API_KEY` refs before creating OpenAI Realtime browser sessions or voice bridges, with a bounded cached Keychain lookup. Fixes #72120. Thanks @ctbritt. - Discord/gateway: reconnect when the gateway socket closes while waiting for the shared IDENTIFY concurrency window, instead of silently skipping IDENTIFY and leaving the bot online but unresponsive. Fixes #74617. Thanks @zeeskdr-ai. +- Voice Call: add `sessionScope: "per-call"` for fresh per-call agent memory while preserving the default per-phone caller history. Fixes #45280. Thanks @pondcountry. - Telegram/startup: use the existing `getMe` request guard for the gateway bot probe instead of a fixed 2.5-second budget, and honor higher `timeoutSeconds` configs for slow Telegram API paths. Fixes #75783. Thanks @tankotan. - Telegram/models: make model picker confirmations say selections are session-scoped and do not change the agent's persistent default. Fixes #75965. Thanks @sd1114820. - Control UI/slash commands: keep fallback command metadata on a browser-safe registry path, so provider thinking runtime imports cannot blank the Web UI with `process is not defined`. Fixes #75987. Thanks @novkien. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 3851000d64f..a14957d7ca7 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -737056ad5544e24250ce91c000ae4a5fe0af751681a529f2e4710b383ef5d4e7 config-baseline.json +a7158716d9262edba32ef9a18ab04d9f48f83cb903444b6f87b991977b6be52f config-baseline.json 2d132b4c2e3b0e0f2524fc1cc889d3be658ad0e40c970b2d367bf27348883658 config-baseline.core.json f42329d45c095881bd226bdb192c235980658fd250606d0c0badc2b12f12f5d3 config-baseline.channel.json -726c2fb81319f05be6977cdf5c9598884feafc600e6c76d482be626f4983bc32 config-baseline.plugin.json +de03faf42db470fe419a3f93a5777161f830f0355912603c6795945e42f39735 config-baseline.plugin.json diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 075aa00bd6e..27b5041de7c 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -109,6 +109,7 @@ Voice-call credentials accept SecretRefs. `plugins.entries.voice-call.config.twi provider: "twilio", // or "telnyx" | "plivo" | "mock" fromNumber: "+15550001234", // or TWILIO_FROM_NUMBER for Twilio toNumber: "+15550005678", + sessionScope: "per-phone", // per-phone | per-call twilio: { accountSid: "ACxxxxxxxx", @@ -192,6 +193,14 @@ Voice-call credentials accept SecretRefs. `plugins.entries.voice-call.config.twi +## Session scope + +By default, Voice Call uses `sessionScope: "per-phone"` so repeat calls from +the same caller keep conversation memory. Set `sessionScope: "per-call"` when +each carrier call should start with fresh context, for example reception, +booking, IVR, or Google Meet bridge flows where the same phone number may +represent different meetings. + ## Realtime voice conversations `realtime` selects a full-duplex realtime voice provider for live call @@ -212,7 +221,7 @@ Current runtime behaviour: - Voice Call exposes the shared `openclaw_agent_consult` realtime tool by default. The realtime model can call it when the caller asks for deeper reasoning, current information, or normal OpenClaw tools. - `realtime.fastContext.enabled` is default-off. When enabled, Voice Call first searches indexed memory/session context for the consult question and returns those snippets to the realtime model within `realtime.fastContext.timeoutMs` before falling back to the full consult agent only if `realtime.fastContext.fallbackToConsult` is true. - If `realtime.provider` points at an unregistered provider, or no realtime voice provider is registered at all, Voice Call logs a warning and skips realtime media instead of failing the whole plugin. -- Consult session keys reuse the existing voice session when available, then fall back to the caller/callee phone number so follow-up consult calls keep context during the call. +- Consult session keys reuse the stored call session when available, then fall back to the configured `sessionScope` (`per-phone` by default, or `per-call` for isolated calls). ### Tool policy diff --git a/extensions/voice-call/README.md b/extensions/voice-call/README.md index 5cd455db126..1832a0169c7 100644 --- a/extensions/voice-call/README.md +++ b/extensions/voice-call/README.md @@ -40,6 +40,7 @@ Put under `plugins.entries.voice-call.config`: provider: "twilio", // or "telnyx" | "plivo" | "mock" fromNumber: "+15550001234", toNumber: "+15550005678", + sessionScope: "per-phone", // or "per-call" twilio: { accountSid: "ACxxxxxxxx", @@ -104,6 +105,7 @@ Notes: - If older configs still use `provider: "log"`, `twilio.from`, or legacy `streaming.*` OpenAI keys, run `openclaw doctor --fix` to rewrite them. - advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call` - `responseModel` is optional. When unset, voice responses use the runtime default model. +- `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh. ## Stale call reaper diff --git a/extensions/voice-call/openclaw.plugin.json b/extensions/voice-call/openclaw.plugin.json index ebe29b4577a..fa37e35bb28 100644 --- a/extensions/voice-call/openclaw.plugin.json +++ b/extensions/voice-call/openclaw.plugin.json @@ -189,6 +189,10 @@ "label": "Call Log Store Path", "advanced": true }, + "sessionScope": { + "label": "Session Scope", + "help": "Use per-phone to preserve caller memory across calls, or per-call to isolate every call into a fresh voice session." + }, "responseModel": { "label": "Response Model", "help": "Optional override. Falls back to the runtime default model when unset.", @@ -767,6 +771,10 @@ "store": { "type": "string" }, + "sessionScope": { + "type": "string", + "enum": ["per-phone", "per-call"] + }, "responseModel": { "type": "string" }, diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 171eb7cd29d..6a699daef00 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { VoiceCallConfigSchema, resolveTwilioAuthToken, + resolveVoiceCallSessionKey, validateProviderConfig, normalizeVoiceCallConfig, resolveVoiceCallConfig, @@ -256,6 +257,53 @@ describe("resolveVoiceCallConfig", () => { expect(config.staleCallReaperSeconds).toBe(120); }); + + it("keeps voice sessions scoped by phone by default", () => { + const config = resolveVoiceCallConfig({ enabled: true, provider: "mock" }); + + expect(config.sessionScope).toBe("per-phone"); + expect( + resolveVoiceCallSessionKey({ + config, + callId: "call-123", + phone: "+1 (555) 000-1111", + }), + ).toBe("voice:15550001111"); + }); + + it("can scope voice sessions to each call", () => { + const config = resolveVoiceCallConfig({ + enabled: true, + provider: "mock", + sessionScope: "per-call", + }); + + expect(config.sessionScope).toBe("per-call"); + expect( + resolveVoiceCallSessionKey({ + config, + callId: "call-123", + phone: "+1 (555) 000-1111", + }), + ).toBe("voice:call:call-123"); + }); + + it("preserves explicit voice session keys", () => { + const config = resolveVoiceCallConfig({ + enabled: true, + provider: "mock", + sessionScope: "per-call", + }); + + expect( + resolveVoiceCallSessionKey({ + config, + callId: "call-123", + phone: "+1 (555) 000-1111", + explicitSessionKey: "meet-room-1", + }), + ).toBe("meet-room-1"); + }); }); describe("normalizeVoiceCallConfig", () => { diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index ad3105d8b5d..32596dbff8e 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -173,6 +173,9 @@ export type WebhookSecurityConfig = z.infer; +const VoiceCallSessionScopeSchema = z.enum(["per-phone", "per-call"]); +export type VoiceCallSessionScope = z.infer; + const OutboundConfigSchema = z .object({ /** Default call mode for outbound calls */ @@ -393,6 +396,9 @@ export const VoiceCallConfigSchema = z /** Realtime voice-to-voice configuration */ realtime: VoiceCallRealtimeConfigSchema, + /** Session memory scope for voice conversations. */ + sessionScope: VoiceCallSessionScopeSchema.default("per-phone"), + /** Public webhook URL override (if set, bypasses tunnel auto-detection) */ publicUrl: z.string().url().optional(), @@ -549,6 +555,23 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal }; } +export function resolveVoiceCallSessionKey(params: { + config: Pick; + callId: string; + phone?: string; + explicitSessionKey?: string; +}): string { + const explicit = params.explicitSessionKey?.trim(); + if (explicit) { + return explicit; + } + if (params.config.sessionScope === "per-call") { + return `voice:call:${params.callId}`; + } + const normalizedPhone = params.phone?.replace(/\D/g, ""); + return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`; +} + /** * Resolves the configuration by merging environment variables into missing fields. * Returns a new configuration object with environment variables applied. diff --git a/extensions/voice-call/src/manager/events.test.ts b/extensions/voice-call/src/manager/events.test.ts index c5d436841c7..41462817d09 100644 --- a/extensions/voice-call/src/manager/events.test.ts +++ b/extensions/voice-call/src/manager/events.test.ts @@ -426,6 +426,33 @@ describe("processEvent (functional)", () => { expect(call.direction).toBe("inbound"); }); + it("assigns per-call session keys to inbound calls when configured", () => { + const ctx = createContext({ + config: VoiceCallConfigSchema.parse({ + enabled: true, + provider: "plivo", + fromNumber: "+15550000000", + inboundPolicy: "open", + sessionScope: "per-call", + }), + }); + const event: NormalizedEvent = { + id: "evt-inbound-session-scope", + type: "call.initiated", + callId: "CA-inbound-session-scope", + providerCallId: "CA-inbound-session-scope", + timestamp: Date.now(), + direction: "inbound", + from: "+15554444444", + to: "+15550000000", + }; + + processEvent(ctx, event); + + const call = requireFirstActiveCall(ctx); + expect(call.sessionKey).toBe(`voice:call:${call.callId}`); + }); + it("deduplicates by dedupeKey even when event IDs differ", () => { const now = Date.now(); const ctx = createContext(); diff --git a/extensions/voice-call/src/manager/events.ts b/extensions/voice-call/src/manager/events.ts index 464e8c2c6fe..f2eea043239 100644 --- a/extensions/voice-call/src/manager/events.ts +++ b/extensions/voice-call/src/manager/events.ts @@ -1,6 +1,7 @@ import crypto from "node:crypto"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js"; +import { resolveVoiceCallSessionKey } from "../config.js"; import type { CallRecord, NormalizedEvent } from "../types.js"; import type { CallManagerContext } from "./context.js"; import { finalizeCall } from "./lifecycle.js"; @@ -73,6 +74,11 @@ function createWebhookCall(params: { state: "ringing", from: params.from, to: params.to, + sessionKey: resolveVoiceCallSessionKey({ + config: params.ctx.config, + callId, + phone: params.direction === "outbound" ? params.to : params.from, + }), startedAt: Date.now(), transcript: [], processedEventIds: [], diff --git a/extensions/voice-call/src/manager/outbound.test.ts b/extensions/voice-call/src/manager/outbound.test.ts index dd417ed98e9..27077e0fb5d 100644 --- a/extensions/voice-call/src/manager/outbound.test.ts +++ b/extensions/voice-call/src/manager/outbound.test.ts @@ -170,9 +170,35 @@ describe("voice-call outbound helpers", () => { inlineTwiml: "", }); expect(ctx.providerCallIdMap.get("provider-1")).toBe(callId); + expect(ctx.activeCalls.get(callId)?.sessionKey).toBe("session-1"); expect(persistCallRecordMock).toHaveBeenCalledTimes(2); }); + it("assigns per-call session keys to outbound calls when configured", async () => { + const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" })); + const ctx = { + activeCalls: new Map(), + providerCallIdMap: new Map(), + provider: { name: "twilio", initiateCall: initiateProviderCall }, + config: { + maxConcurrentCalls: 3, + outbound: { defaultMode: "conversation" }, + fromNumber: "+14155550100", + sessionScope: "per-call", + }, + storePath: "/tmp/voice-call.json", + webhookUrl: "https://example.com/webhook", + }; + + const result = await initiateCall(ctx as never, "+14155550123"); + + expect(result).toEqual({ + callId: expect.any(String), + success: true, + }); + expect(ctx.activeCalls.get(result.callId)?.sessionKey).toBe(`voice:call:${result.callId}`); + }); + it("initiates conversation calls with pre-connect DTMF TwiML", async () => { const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" })); const ctx = { diff --git a/extensions/voice-call/src/manager/outbound.ts b/extensions/voice-call/src/manager/outbound.ts index 53382636f42..01a1218c8f0 100644 --- a/extensions/voice-call/src/manager/outbound.ts +++ b/extensions/voice-call/src/manager/outbound.ts @@ -1,6 +1,6 @@ import crypto from "node:crypto"; import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime"; -import type { CallMode } from "../config.js"; +import { resolveVoiceCallSessionKey, type CallMode } from "../config.js"; import { resolvePreferredTtsVoice } from "../tts-provider-voice.js"; import { type EndReason, @@ -162,7 +162,12 @@ export async function initiateCall( state: "initiated", from, to, - sessionKey, + sessionKey: resolveVoiceCallSessionKey({ + config: ctx.config, + callId, + phone: to, + explicitSessionKey: sessionKey, + }), startedAt: Date.now(), transcript: [], processedEventIds: [], diff --git a/extensions/voice-call/src/response-generator.test.ts b/extensions/voice-call/src/response-generator.test.ts index 2bd32bfa971..fa59e451df7 100644 --- a/extensions/voice-call/src/response-generator.test.ts +++ b/extensions/voice-call/src/response-generator.test.ts @@ -191,6 +191,37 @@ describe("generateVoiceResponse", () => { ); }); + it("uses the persisted per-call session key for classic responses", async () => { + const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime([ + { text: '{"spoken":"Fresh call context."}' }, + ]); + const voiceConfig = VoiceCallConfigSchema.parse({ + sessionScope: "per-call", + responseTimeoutMs: 5000, + }); + + const result = await generateVoiceResponse({ + voiceConfig, + coreConfig: {} as CoreConfig, + agentRuntime: runtime, + callId: "call-123", + sessionKey: "voice:call:call-123", + from: "+15550001111", + transcript: [{ speaker: "user", text: "hello there" }], + userMessage: "hello there", + }); + + expect(result.text).toBe("Fresh call context."); + expect(sessionStore["voice:call:call-123"]).toBeDefined(); + expect(sessionStore["voice:15550001111"]).toBeUndefined(); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + sessionKey: "voice:call:call-123", + sandboxSessionKey: "agent:main:voice:call:call-123", + }), + ); + }); + it("uses the main agent workspace when voice config omits agentId", async () => { const { runtime, diff --git a/extensions/voice-call/src/response-generator.ts b/extensions/voice-call/src/response-generator.ts index 83c9aaa6de2..4f18c6586e2 100644 --- a/extensions/voice-call/src/response-generator.ts +++ b/extensions/voice-call/src/response-generator.ts @@ -7,7 +7,7 @@ import crypto from "node:crypto"; import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import type { SessionEntry } from "../api.js"; -import type { VoiceCallConfig } from "./config.js"; +import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js"; import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js"; import { resolveVoiceResponseModel } from "./response-model.js"; @@ -20,6 +20,8 @@ export type VoiceResponseParams = { agentRuntime: CoreAgentDeps; /** Call ID for session tracking */ callId: string; + /** Persisted call session key */ + sessionKey?: string; /** Caller's phone number */ from: string; /** Conversation transcript */ @@ -187,16 +189,28 @@ function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): str export async function generateVoiceResponse( params: VoiceResponseParams, ): Promise { - const { voiceConfig, callId, from, transcript, userMessage, coreConfig, agentRuntime } = params; + const { + voiceConfig, + callId, + sessionKey, + from, + transcript, + userMessage, + coreConfig, + agentRuntime, + } = params; if (!coreConfig) { return { text: null, error: "Core config unavailable for voice response" }; } const cfg = coreConfig; - // Build voice-specific session key based on phone number - const normalizedPhone = from.replace(/\D/g, ""); - const sessionKey = `voice:${normalizedPhone}`; + const resolvedSessionKey = resolveVoiceCallSessionKey({ + config: voiceConfig, + callId, + phone: from, + explicitSessionKey: sessionKey, + }); const agentId = voiceConfig.agentId ?? "main"; // Resolve paths @@ -210,7 +224,7 @@ export async function generateVoiceResponse( // Load or create session entry const sessionStore = agentRuntime.session.loadSessionStore(storePath); const now = Date.now(); - let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined; + let sessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined; let sessionEntryUpdated = false; if (!sessionEntry) { @@ -218,7 +232,7 @@ export async function generateVoiceResponse( sessionId: crypto.randomUUID(), updatedAt: now, }; - sessionStore[sessionKey] = sessionEntry; + sessionStore[resolvedSessionKey] = sessionEntry; sessionEntryUpdated = true; } @@ -271,8 +285,8 @@ export async function generateVoiceResponse( try { const result = await agentRuntime.runEmbeddedPiAgent({ sessionId, - sessionKey, - sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, sessionKey), + sessionKey: resolvedSessionKey, + sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey), agentId, messageProvider: "voice", sessionFile, diff --git a/extensions/voice-call/src/runtime.test.ts b/extensions/voice-call/src/runtime.test.ts index 74f1b167164..73a72fc0166 100644 --- a/extensions/voice-call/src/runtime.test.ts +++ b/extensions/voice-call/src/runtime.test.ts @@ -28,6 +28,22 @@ const mocks = vi.hoisted(() => ({ })); vi.mock("./config.js", () => ({ + resolveVoiceCallSessionKey: (params: { + config: Pick; + callId: string; + phone?: string; + explicitSessionKey?: string; + }) => { + const explicit = params.explicitSessionKey?.trim(); + if (explicit) { + return explicit; + } + if (params.config.sessionScope === "per-call") { + return `voice:call:${params.callId}`; + } + const normalizedPhone = params.phone?.replace(/\D/g, ""); + return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`; + }, resolveVoiceCallConfig: mocks.resolveVoiceCallConfig, resolveTwilioAuthToken: mocks.resolveTwilioAuthToken, validateProviderConfig: mocks.validateProviderConfig, @@ -382,6 +398,64 @@ describe("createVoiceCallRuntime lifecycle", () => { ); }); + it("uses persisted per-call session keys for realtime consults", async () => { + const config = createBaseConfig(); + config.inboundPolicy = "allowlist"; + config.realtime.enabled = true; + config.sessionScope = "per-call"; + const runEmbeddedPiAgent = vi.fn(async () => ({ + payloads: [{ text: "Per-call consult answer." }], + meta: {}, + })); + const sessionStore: Record = {}; + const agentRuntime = { + defaults: { provider: "openai", model: "gpt-5.4" }, + resolveAgentDir: vi.fn(() => "/tmp/agent"), + resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"), + resolveAgentIdentity: vi.fn(), + resolveThinkingDefault: vi.fn(() => "high"), + resolveAgentTimeoutMs: vi.fn(() => 30_000), + ensureAgentWorkspace: vi.fn(async () => {}), + session: { + resolveStorePath: vi.fn(() => "/tmp/sessions.json"), + loadSessionStore: vi.fn(() => sessionStore), + saveSessionStore: vi.fn(async () => {}), + resolveSessionFilePath: vi.fn(() => "/tmp/session.json"), + }, + runEmbeddedPiAgent, + }; + mocks.managerGetCall.mockReturnValue({ + callId: "call-1", + sessionKey: "voice:call:call-1", + direction: "inbound", + from: "+15550001234", + to: "+15550009999", + transcript: [], + }); + + await createVoiceCallRuntime({ + config, + coreConfig: {} as CoreConfig, + agentRuntime: agentRuntime as never, + }); + + const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as + | (( + args: unknown, + callId: string, + context?: { partialUserTranscript?: string }, + ) => Promise) + | undefined; + await expect(handler?.({ question: "What should I say?" }, "call-1")).resolves.toEqual({ + text: "Per-call consult answer.", + }); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + sessionKey: "voice:call:call-1", + }), + ); + }); + it("answers realtime consults from fast memory context before starting the full agent", async () => { const config = createBaseConfig(); config.realtime.enabled = true; diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index 9e5fa7b7f60..b5cb725b173 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -10,6 +10,7 @@ import { } from "openclaw/plugin-sdk/realtime-voice"; import type { VoiceCallConfig } from "./config.js"; import { + resolveVoiceCallSessionKey, resolveTwilioAuthToken, resolveVoiceCallConfig, validateProviderConfig, @@ -103,6 +104,7 @@ function loadRealtimeHandler(): Promise { } function resolveVoiceCallConsultSessionKey(call: { + config: VoiceCallConfig; sessionKey?: string; from?: string; to?: string; @@ -113,8 +115,11 @@ function resolveVoiceCallConsultSessionKey(call: { return call.sessionKey; } const phone = call.direction === "outbound" ? call.to : call.from; - const normalizedPhone = phone?.replace(/\D/g, ""); - return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${call.callId}`; + return resolveVoiceCallSessionKey({ + config: call.config, + callId: call.callId, + phone, + }); } function mapVoiceCallConsultTranscript( @@ -335,7 +340,7 @@ export async function createVoiceCallRuntime(params: { return { error: `Call "${callId}" not found` }; } const agentId = config.agentId ?? "main"; - const sessionKey = resolveVoiceCallConsultSessionKey(call); + const sessionKey = resolveVoiceCallConsultSessionKey({ ...call, config }); const fastContext = await resolveRealtimeFastContextConsult({ cfg, agentId, diff --git a/extensions/voice-call/src/test-fixtures.ts b/extensions/voice-call/src/test-fixtures.ts index f382d8bd9b7..3fe58e1561d 100644 --- a/extensions/voice-call/src/test-fixtures.ts +++ b/extensions/voice-call/src/test-fixtures.ts @@ -18,6 +18,7 @@ export function createVoiceCallBaseConfig(params?: { transcriptTimeoutMs: 180000, ringTimeoutMs: 30000, maxConcurrentCalls: 1, + sessionScope: "per-phone", serve: { port: 3334, bind: "127.0.0.1", path: "/voice/webhook" }, tailscale: { mode: "off", path: "/voice/webhook" }, tunnel: { diff --git a/extensions/voice-call/src/webhook.ts b/extensions/voice-call/src/webhook.ts index c746fcccd5b..7ca9106b14e 100644 --- a/extensions/voice-call/src/webhook.ts +++ b/extensions/voice-call/src/webhook.ts @@ -879,6 +879,7 @@ export class VoiceCallWebhookServer { coreConfig: this.coreConfig, agentRuntime: this.agentRuntime, callId, + sessionKey: call.sessionKey, from: call.from, transcript: call.transcript, userMessage,