mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:10:44 +00:00
fix(voice-call): support per-call session scope
This commit is contained in:
@@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Providers/OpenAI: resolve `keychain:<service>:<account>` `OPENAI_API_KEY` refs before creating OpenAI Realtime browser sessions or voice bridges, with a bounded cached Keychain lookup. Fixes #72120. Thanks @ctbritt.
|
||||
- Discord/gateway: reconnect when the gateway socket closes while waiting for the shared IDENTIFY concurrency window, instead of silently skipping IDENTIFY and leaving the bot online but unresponsive. Fixes #74617. Thanks @zeeskdr-ai.
|
||||
- Voice Call: add `sessionScope: "per-call"` for fresh per-call agent memory while preserving the default per-phone caller history. Fixes #45280. Thanks @pondcountry.
|
||||
- Telegram/startup: use the existing `getMe` request guard for the gateway bot probe instead of a fixed 2.5-second budget, and honor higher `timeoutSeconds` configs for slow Telegram API paths. Fixes #75783. Thanks @tankotan.
|
||||
- Telegram/models: make model picker confirmations say selections are session-scoped and do not change the agent's persistent default. Fixes #75965. Thanks @sd1114820.
|
||||
- Control UI/slash commands: keep fallback command metadata on a browser-safe registry path, so provider thinking runtime imports cannot blank the Web UI with `process is not defined`. Fixes #75987. Thanks @novkien.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
737056ad5544e24250ce91c000ae4a5fe0af751681a529f2e4710b383ef5d4e7 config-baseline.json
|
||||
a7158716d9262edba32ef9a18ab04d9f48f83cb903444b6f87b991977b6be52f config-baseline.json
|
||||
2d132b4c2e3b0e0f2524fc1cc889d3be658ad0e40c970b2d367bf27348883658 config-baseline.core.json
|
||||
f42329d45c095881bd226bdb192c235980658fd250606d0c0badc2b12f12f5d3 config-baseline.channel.json
|
||||
726c2fb81319f05be6977cdf5c9598884feafc600e6c76d482be626f4983bc32 config-baseline.plugin.json
|
||||
de03faf42db470fe419a3f93a5777161f830f0355912603c6795945e42f39735 config-baseline.plugin.json
|
||||
|
||||
@@ -109,6 +109,7 @@ Voice-call credentials accept SecretRefs. `plugins.entries.voice-call.config.twi
|
||||
provider: "twilio", // or "telnyx" | "plivo" | "mock"
|
||||
fromNumber: "+15550001234", // or TWILIO_FROM_NUMBER for Twilio
|
||||
toNumber: "+15550005678",
|
||||
sessionScope: "per-phone", // per-phone | per-call
|
||||
|
||||
twilio: {
|
||||
accountSid: "ACxxxxxxxx",
|
||||
@@ -192,6 +193,14 @@ Voice-call credentials accept SecretRefs. `plugins.entries.voice-call.config.twi
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
## Session scope
|
||||
|
||||
By default, Voice Call uses `sessionScope: "per-phone"` so repeat calls from
|
||||
the same caller keep conversation memory. Set `sessionScope: "per-call"` when
|
||||
each carrier call should start with fresh context, for example reception,
|
||||
booking, IVR, or Google Meet bridge flows where the same phone number may
|
||||
represent different meetings.
|
||||
|
||||
## Realtime voice conversations
|
||||
|
||||
`realtime` selects a full-duplex realtime voice provider for live call
|
||||
@@ -212,7 +221,7 @@ Current runtime behaviour:
|
||||
- Voice Call exposes the shared `openclaw_agent_consult` realtime tool by default. The realtime model can call it when the caller asks for deeper reasoning, current information, or normal OpenClaw tools.
|
||||
- `realtime.fastContext.enabled` is default-off. When enabled, Voice Call first searches indexed memory/session context for the consult question and returns those snippets to the realtime model within `realtime.fastContext.timeoutMs` before falling back to the full consult agent only if `realtime.fastContext.fallbackToConsult` is true.
|
||||
- If `realtime.provider` points at an unregistered provider, or no realtime voice provider is registered at all, Voice Call logs a warning and skips realtime media instead of failing the whole plugin.
|
||||
- Consult session keys reuse the existing voice session when available, then fall back to the caller/callee phone number so follow-up consult calls keep context during the call.
|
||||
- Consult session keys reuse the stored call session when available, then fall back to the configured `sessionScope` (`per-phone` by default, or `per-call` for isolated calls).
|
||||
|
||||
### Tool policy
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ Put under `plugins.entries.voice-call.config`:
|
||||
provider: "twilio", // or "telnyx" | "plivo" | "mock"
|
||||
fromNumber: "+15550001234",
|
||||
toNumber: "+15550005678",
|
||||
sessionScope: "per-phone", // or "per-call"
|
||||
|
||||
twilio: {
|
||||
accountSid: "ACxxxxxxxx",
|
||||
@@ -104,6 +105,7 @@ Notes:
|
||||
- If older configs still use `provider: "log"`, `twilio.from`, or legacy `streaming.*` OpenAI keys, run `openclaw doctor --fix` to rewrite them.
|
||||
- advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
|
||||
- `responseModel` is optional. When unset, voice responses use the runtime default model.
|
||||
- `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
|
||||
|
||||
## Stale call reaper
|
||||
|
||||
|
||||
@@ -189,6 +189,10 @@
|
||||
"label": "Call Log Store Path",
|
||||
"advanced": true
|
||||
},
|
||||
"sessionScope": {
|
||||
"label": "Session Scope",
|
||||
"help": "Use per-phone to preserve caller memory across calls, or per-call to isolate every call into a fresh voice session."
|
||||
},
|
||||
"responseModel": {
|
||||
"label": "Response Model",
|
||||
"help": "Optional override. Falls back to the runtime default model when unset.",
|
||||
@@ -767,6 +771,10 @@
|
||||
"store": {
|
||||
"type": "string"
|
||||
},
|
||||
"sessionScope": {
|
||||
"type": "string",
|
||||
"enum": ["per-phone", "per-call"]
|
||||
},
|
||||
"responseModel": {
|
||||
"type": "string"
|
||||
},
|
||||
|
||||
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
VoiceCallConfigSchema,
|
||||
resolveTwilioAuthToken,
|
||||
resolveVoiceCallSessionKey,
|
||||
validateProviderConfig,
|
||||
normalizeVoiceCallConfig,
|
||||
resolveVoiceCallConfig,
|
||||
@@ -256,6 +257,53 @@ describe("resolveVoiceCallConfig", () => {
|
||||
|
||||
expect(config.staleCallReaperSeconds).toBe(120);
|
||||
});
|
||||
|
||||
it("keeps voice sessions scoped by phone by default", () => {
|
||||
const config = resolveVoiceCallConfig({ enabled: true, provider: "mock" });
|
||||
|
||||
expect(config.sessionScope).toBe("per-phone");
|
||||
expect(
|
||||
resolveVoiceCallSessionKey({
|
||||
config,
|
||||
callId: "call-123",
|
||||
phone: "+1 (555) 000-1111",
|
||||
}),
|
||||
).toBe("voice:15550001111");
|
||||
});
|
||||
|
||||
it("can scope voice sessions to each call", () => {
|
||||
const config = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
sessionScope: "per-call",
|
||||
});
|
||||
|
||||
expect(config.sessionScope).toBe("per-call");
|
||||
expect(
|
||||
resolveVoiceCallSessionKey({
|
||||
config,
|
||||
callId: "call-123",
|
||||
phone: "+1 (555) 000-1111",
|
||||
}),
|
||||
).toBe("voice:call:call-123");
|
||||
});
|
||||
|
||||
it("preserves explicit voice session keys", () => {
|
||||
const config = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
sessionScope: "per-call",
|
||||
});
|
||||
|
||||
expect(
|
||||
resolveVoiceCallSessionKey({
|
||||
config,
|
||||
callId: "call-123",
|
||||
phone: "+1 (555) 000-1111",
|
||||
explicitSessionKey: "meet-room-1",
|
||||
}),
|
||||
).toBe("meet-room-1");
|
||||
});
|
||||
});
|
||||
|
||||
describe("normalizeVoiceCallConfig", () => {
|
||||
|
||||
@@ -173,6 +173,9 @@ export type WebhookSecurityConfig = z.infer<typeof VoiceCallWebhookSecurityConfi
|
||||
const CallModeSchema = z.enum(["notify", "conversation"]);
|
||||
export type CallMode = z.infer<typeof CallModeSchema>;
|
||||
|
||||
const VoiceCallSessionScopeSchema = z.enum(["per-phone", "per-call"]);
|
||||
export type VoiceCallSessionScope = z.infer<typeof VoiceCallSessionScopeSchema>;
|
||||
|
||||
const OutboundConfigSchema = z
|
||||
.object({
|
||||
/** Default call mode for outbound calls */
|
||||
@@ -393,6 +396,9 @@ export const VoiceCallConfigSchema = z
|
||||
/** Realtime voice-to-voice configuration */
|
||||
realtime: VoiceCallRealtimeConfigSchema,
|
||||
|
||||
/** Session memory scope for voice conversations. */
|
||||
sessionScope: VoiceCallSessionScopeSchema.default("per-phone"),
|
||||
|
||||
/** Public webhook URL override (if set, bypasses tunnel auto-detection) */
|
||||
publicUrl: z.string().url().optional(),
|
||||
|
||||
@@ -549,6 +555,23 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveVoiceCallSessionKey(params: {
|
||||
config: Pick<VoiceCallConfig, "sessionScope">;
|
||||
callId: string;
|
||||
phone?: string;
|
||||
explicitSessionKey?: string;
|
||||
}): string {
|
||||
const explicit = params.explicitSessionKey?.trim();
|
||||
if (explicit) {
|
||||
return explicit;
|
||||
}
|
||||
if (params.config.sessionScope === "per-call") {
|
||||
return `voice:call:${params.callId}`;
|
||||
}
|
||||
const normalizedPhone = params.phone?.replace(/\D/g, "");
|
||||
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the configuration by merging environment variables into missing fields.
|
||||
* Returns a new configuration object with environment variables applied.
|
||||
|
||||
@@ -426,6 +426,33 @@ describe("processEvent (functional)", () => {
|
||||
expect(call.direction).toBe("inbound");
|
||||
});
|
||||
|
||||
it("assigns per-call session keys to inbound calls when configured", () => {
|
||||
const ctx = createContext({
|
||||
config: VoiceCallConfigSchema.parse({
|
||||
enabled: true,
|
||||
provider: "plivo",
|
||||
fromNumber: "+15550000000",
|
||||
inboundPolicy: "open",
|
||||
sessionScope: "per-call",
|
||||
}),
|
||||
});
|
||||
const event: NormalizedEvent = {
|
||||
id: "evt-inbound-session-scope",
|
||||
type: "call.initiated",
|
||||
callId: "CA-inbound-session-scope",
|
||||
providerCallId: "CA-inbound-session-scope",
|
||||
timestamp: Date.now(),
|
||||
direction: "inbound",
|
||||
from: "+15554444444",
|
||||
to: "+15550000000",
|
||||
};
|
||||
|
||||
processEvent(ctx, event);
|
||||
|
||||
const call = requireFirstActiveCall(ctx);
|
||||
expect(call.sessionKey).toBe(`voice:call:${call.callId}`);
|
||||
});
|
||||
|
||||
it("deduplicates by dedupeKey even when event IDs differ", () => {
|
||||
const now = Date.now();
|
||||
const ctx = createContext();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import crypto from "node:crypto";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
|
||||
import { resolveVoiceCallSessionKey } from "../config.js";
|
||||
import type { CallRecord, NormalizedEvent } from "../types.js";
|
||||
import type { CallManagerContext } from "./context.js";
|
||||
import { finalizeCall } from "./lifecycle.js";
|
||||
@@ -73,6 +74,11 @@ function createWebhookCall(params: {
|
||||
state: "ringing",
|
||||
from: params.from,
|
||||
to: params.to,
|
||||
sessionKey: resolveVoiceCallSessionKey({
|
||||
config: params.ctx.config,
|
||||
callId,
|
||||
phone: params.direction === "outbound" ? params.to : params.from,
|
||||
}),
|
||||
startedAt: Date.now(),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
|
||||
@@ -170,9 +170,35 @@ describe("voice-call outbound helpers", () => {
|
||||
inlineTwiml: "<Response />",
|
||||
});
|
||||
expect(ctx.providerCallIdMap.get("provider-1")).toBe(callId);
|
||||
expect(ctx.activeCalls.get(callId)?.sessionKey).toBe("session-1");
|
||||
expect(persistCallRecordMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("assigns per-call session keys to outbound calls when configured", async () => {
|
||||
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
|
||||
const ctx = {
|
||||
activeCalls: new Map(),
|
||||
providerCallIdMap: new Map(),
|
||||
provider: { name: "twilio", initiateCall: initiateProviderCall },
|
||||
config: {
|
||||
maxConcurrentCalls: 3,
|
||||
outbound: { defaultMode: "conversation" },
|
||||
fromNumber: "+14155550100",
|
||||
sessionScope: "per-call",
|
||||
},
|
||||
storePath: "/tmp/voice-call.json",
|
||||
webhookUrl: "https://example.com/webhook",
|
||||
};
|
||||
|
||||
const result = await initiateCall(ctx as never, "+14155550123");
|
||||
|
||||
expect(result).toEqual({
|
||||
callId: expect.any(String),
|
||||
success: true,
|
||||
});
|
||||
expect(ctx.activeCalls.get(result.callId)?.sessionKey).toBe(`voice:call:${result.callId}`);
|
||||
});
|
||||
|
||||
it("initiates conversation calls with pre-connect DTMF TwiML", async () => {
|
||||
const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
|
||||
const ctx = {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import crypto from "node:crypto";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import type { CallMode } from "../config.js";
|
||||
import { resolveVoiceCallSessionKey, type CallMode } from "../config.js";
|
||||
import { resolvePreferredTtsVoice } from "../tts-provider-voice.js";
|
||||
import {
|
||||
type EndReason,
|
||||
@@ -162,7 +162,12 @@ export async function initiateCall(
|
||||
state: "initiated",
|
||||
from,
|
||||
to,
|
||||
sessionKey,
|
||||
sessionKey: resolveVoiceCallSessionKey({
|
||||
config: ctx.config,
|
||||
callId,
|
||||
phone: to,
|
||||
explicitSessionKey: sessionKey,
|
||||
}),
|
||||
startedAt: Date.now(),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
|
||||
@@ -191,6 +191,37 @@ describe("generateVoiceResponse", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("uses the persisted per-call session key for classic responses", async () => {
|
||||
const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime([
|
||||
{ text: '{"spoken":"Fresh call context."}' },
|
||||
]);
|
||||
const voiceConfig = VoiceCallConfigSchema.parse({
|
||||
sessionScope: "per-call",
|
||||
responseTimeoutMs: 5000,
|
||||
});
|
||||
|
||||
const result = await generateVoiceResponse({
|
||||
voiceConfig,
|
||||
coreConfig: {} as CoreConfig,
|
||||
agentRuntime: runtime,
|
||||
callId: "call-123",
|
||||
sessionKey: "voice:call:call-123",
|
||||
from: "+15550001111",
|
||||
transcript: [{ speaker: "user", text: "hello there" }],
|
||||
userMessage: "hello there",
|
||||
});
|
||||
|
||||
expect(result.text).toBe("Fresh call context.");
|
||||
expect(sessionStore["voice:call:call-123"]).toBeDefined();
|
||||
expect(sessionStore["voice:15550001111"]).toBeUndefined();
|
||||
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
sessionKey: "voice:call:call-123",
|
||||
sandboxSessionKey: "agent:main:voice:call:call-123",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("uses the main agent workspace when voice config omits agentId", async () => {
|
||||
const {
|
||||
runtime,
|
||||
|
||||
@@ -7,7 +7,7 @@ import crypto from "node:crypto";
|
||||
import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime";
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
||||
import type { SessionEntry } from "../api.js";
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js";
|
||||
import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
|
||||
import { resolveVoiceResponseModel } from "./response-model.js";
|
||||
|
||||
@@ -20,6 +20,8 @@ export type VoiceResponseParams = {
|
||||
agentRuntime: CoreAgentDeps;
|
||||
/** Call ID for session tracking */
|
||||
callId: string;
|
||||
/** Persisted call session key */
|
||||
sessionKey?: string;
|
||||
/** Caller's phone number */
|
||||
from: string;
|
||||
/** Conversation transcript */
|
||||
@@ -187,16 +189,28 @@ function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): str
|
||||
export async function generateVoiceResponse(
|
||||
params: VoiceResponseParams,
|
||||
): Promise<VoiceResponseResult> {
|
||||
const { voiceConfig, callId, from, transcript, userMessage, coreConfig, agentRuntime } = params;
|
||||
const {
|
||||
voiceConfig,
|
||||
callId,
|
||||
sessionKey,
|
||||
from,
|
||||
transcript,
|
||||
userMessage,
|
||||
coreConfig,
|
||||
agentRuntime,
|
||||
} = params;
|
||||
|
||||
if (!coreConfig) {
|
||||
return { text: null, error: "Core config unavailable for voice response" };
|
||||
}
|
||||
const cfg = coreConfig;
|
||||
|
||||
// Build voice-specific session key based on phone number
|
||||
const normalizedPhone = from.replace(/\D/g, "");
|
||||
const sessionKey = `voice:${normalizedPhone}`;
|
||||
const resolvedSessionKey = resolveVoiceCallSessionKey({
|
||||
config: voiceConfig,
|
||||
callId,
|
||||
phone: from,
|
||||
explicitSessionKey: sessionKey,
|
||||
});
|
||||
const agentId = voiceConfig.agentId ?? "main";
|
||||
|
||||
// Resolve paths
|
||||
@@ -210,7 +224,7 @@ export async function generateVoiceResponse(
|
||||
// Load or create session entry
|
||||
const sessionStore = agentRuntime.session.loadSessionStore(storePath);
|
||||
const now = Date.now();
|
||||
let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined;
|
||||
let sessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined;
|
||||
let sessionEntryUpdated = false;
|
||||
|
||||
if (!sessionEntry) {
|
||||
@@ -218,7 +232,7 @@ export async function generateVoiceResponse(
|
||||
sessionId: crypto.randomUUID(),
|
||||
updatedAt: now,
|
||||
};
|
||||
sessionStore[sessionKey] = sessionEntry;
|
||||
sessionStore[resolvedSessionKey] = sessionEntry;
|
||||
sessionEntryUpdated = true;
|
||||
}
|
||||
|
||||
@@ -271,8 +285,8 @@ export async function generateVoiceResponse(
|
||||
try {
|
||||
const result = await agentRuntime.runEmbeddedPiAgent({
|
||||
sessionId,
|
||||
sessionKey,
|
||||
sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, sessionKey),
|
||||
sessionKey: resolvedSessionKey,
|
||||
sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey),
|
||||
agentId,
|
||||
messageProvider: "voice",
|
||||
sessionFile,
|
||||
|
||||
@@ -28,6 +28,22 @@ const mocks = vi.hoisted(() => ({
|
||||
}));
|
||||
|
||||
vi.mock("./config.js", () => ({
|
||||
resolveVoiceCallSessionKey: (params: {
|
||||
config: Pick<VoiceCallConfig, "sessionScope">;
|
||||
callId: string;
|
||||
phone?: string;
|
||||
explicitSessionKey?: string;
|
||||
}) => {
|
||||
const explicit = params.explicitSessionKey?.trim();
|
||||
if (explicit) {
|
||||
return explicit;
|
||||
}
|
||||
if (params.config.sessionScope === "per-call") {
|
||||
return `voice:call:${params.callId}`;
|
||||
}
|
||||
const normalizedPhone = params.phone?.replace(/\D/g, "");
|
||||
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
|
||||
},
|
||||
resolveVoiceCallConfig: mocks.resolveVoiceCallConfig,
|
||||
resolveTwilioAuthToken: mocks.resolveTwilioAuthToken,
|
||||
validateProviderConfig: mocks.validateProviderConfig,
|
||||
@@ -382,6 +398,64 @@ describe("createVoiceCallRuntime lifecycle", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("uses persisted per-call session keys for realtime consults", async () => {
|
||||
const config = createBaseConfig();
|
||||
config.inboundPolicy = "allowlist";
|
||||
config.realtime.enabled = true;
|
||||
config.sessionScope = "per-call";
|
||||
const runEmbeddedPiAgent = vi.fn(async () => ({
|
||||
payloads: [{ text: "Per-call consult answer." }],
|
||||
meta: {},
|
||||
}));
|
||||
const sessionStore: Record<string, unknown> = {};
|
||||
const agentRuntime = {
|
||||
defaults: { provider: "openai", model: "gpt-5.4" },
|
||||
resolveAgentDir: vi.fn(() => "/tmp/agent"),
|
||||
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
|
||||
resolveAgentIdentity: vi.fn(),
|
||||
resolveThinkingDefault: vi.fn(() => "high"),
|
||||
resolveAgentTimeoutMs: vi.fn(() => 30_000),
|
||||
ensureAgentWorkspace: vi.fn(async () => {}),
|
||||
session: {
|
||||
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||
loadSessionStore: vi.fn(() => sessionStore),
|
||||
saveSessionStore: vi.fn(async () => {}),
|
||||
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
|
||||
},
|
||||
runEmbeddedPiAgent,
|
||||
};
|
||||
mocks.managerGetCall.mockReturnValue({
|
||||
callId: "call-1",
|
||||
sessionKey: "voice:call:call-1",
|
||||
direction: "inbound",
|
||||
from: "+15550001234",
|
||||
to: "+15550009999",
|
||||
transcript: [],
|
||||
});
|
||||
|
||||
await createVoiceCallRuntime({
|
||||
config,
|
||||
coreConfig: {} as CoreConfig,
|
||||
agentRuntime: agentRuntime as never,
|
||||
});
|
||||
|
||||
const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
|
||||
| ((
|
||||
args: unknown,
|
||||
callId: string,
|
||||
context?: { partialUserTranscript?: string },
|
||||
) => Promise<unknown>)
|
||||
| undefined;
|
||||
await expect(handler?.({ question: "What should I say?" }, "call-1")).resolves.toEqual({
|
||||
text: "Per-call consult answer.",
|
||||
});
|
||||
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
sessionKey: "voice:call:call-1",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("answers realtime consults from fast memory context before starting the full agent", async () => {
|
||||
const config = createBaseConfig();
|
||||
config.realtime.enabled = true;
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import type { VoiceCallConfig } from "./config.js";
|
||||
import {
|
||||
resolveVoiceCallSessionKey,
|
||||
resolveTwilioAuthToken,
|
||||
resolveVoiceCallConfig,
|
||||
validateProviderConfig,
|
||||
@@ -103,6 +104,7 @@ function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
|
||||
}
|
||||
|
||||
function resolveVoiceCallConsultSessionKey(call: {
|
||||
config: VoiceCallConfig;
|
||||
sessionKey?: string;
|
||||
from?: string;
|
||||
to?: string;
|
||||
@@ -113,8 +115,11 @@ function resolveVoiceCallConsultSessionKey(call: {
|
||||
return call.sessionKey;
|
||||
}
|
||||
const phone = call.direction === "outbound" ? call.to : call.from;
|
||||
const normalizedPhone = phone?.replace(/\D/g, "");
|
||||
return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${call.callId}`;
|
||||
return resolveVoiceCallSessionKey({
|
||||
config: call.config,
|
||||
callId: call.callId,
|
||||
phone,
|
||||
});
|
||||
}
|
||||
|
||||
function mapVoiceCallConsultTranscript(
|
||||
@@ -335,7 +340,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
return { error: `Call "${callId}" not found` };
|
||||
}
|
||||
const agentId = config.agentId ?? "main";
|
||||
const sessionKey = resolveVoiceCallConsultSessionKey(call);
|
||||
const sessionKey = resolveVoiceCallConsultSessionKey({ ...call, config });
|
||||
const fastContext = await resolveRealtimeFastContextConsult({
|
||||
cfg,
|
||||
agentId,
|
||||
|
||||
@@ -18,6 +18,7 @@ export function createVoiceCallBaseConfig(params?: {
|
||||
transcriptTimeoutMs: 180000,
|
||||
ringTimeoutMs: 30000,
|
||||
maxConcurrentCalls: 1,
|
||||
sessionScope: "per-phone",
|
||||
serve: { port: 3334, bind: "127.0.0.1", path: "/voice/webhook" },
|
||||
tailscale: { mode: "off", path: "/voice/webhook" },
|
||||
tunnel: {
|
||||
|
||||
@@ -879,6 +879,7 @@ export class VoiceCallWebhookServer {
|
||||
coreConfig: this.coreConfig,
|
||||
agentRuntime: this.agentRuntime,
|
||||
callId,
|
||||
sessionKey: call.sessionKey,
|
||||
from: call.from,
|
||||
transcript: call.transcript,
|
||||
userMessage,
|
||||
|
||||
Reference in New Issue
Block a user