mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 12:30:44 +00:00
fix: delay meet twilio intro speech
This commit is contained in:
@@ -12,8 +12,13 @@ import {
|
||||
import { CREATE_MEET_FROM_BROWSER_SCRIPT } from "./src/transports/chrome-create.js";
|
||||
|
||||
const voiceCallMocks = vi.hoisted(() => ({
|
||||
joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })),
|
||||
joinMeetViaVoiceCallGateway: vi.fn(async () => ({
|
||||
callId: "call-1",
|
||||
dtmfSent: true,
|
||||
introSent: true,
|
||||
})),
|
||||
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
|
||||
speakMeetViaVoiceCallGateway: vi.fn(async () => {}),
|
||||
}));
|
||||
|
||||
const fetchGuardMocks = vi.hoisted(() => ({
|
||||
@@ -38,6 +43,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||||
vi.mock("./src/voice-call-gateway.js", () => ({
|
||||
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
|
||||
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
|
||||
speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway,
|
||||
}));
|
||||
|
||||
function setup(
|
||||
|
||||
@@ -35,8 +35,13 @@ import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/t
|
||||
import type { GoogleMeetSession } from "./src/transports/types.js";
|
||||
|
||||
const voiceCallMocks = vi.hoisted(() => ({
|
||||
joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })),
|
||||
joinMeetViaVoiceCallGateway: vi.fn(async () => ({
|
||||
callId: "call-1",
|
||||
dtmfSent: true,
|
||||
introSent: true,
|
||||
})),
|
||||
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
|
||||
speakMeetViaVoiceCallGateway: vi.fn(async () => {}),
|
||||
}));
|
||||
|
||||
const fetchGuardMocks = vi.hoisted(() => ({
|
||||
@@ -61,6 +66,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||||
vi.mock("./src/voice-call-gateway.js", () => ({
|
||||
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
|
||||
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
|
||||
speakMeetViaVoiceCallGateway: voiceCallMocks.speakMeetViaVoiceCallGateway,
|
||||
}));
|
||||
|
||||
function setup(
|
||||
@@ -348,7 +354,12 @@ describe("google-meet plugin", () => {
|
||||
"BlackHole 2ch",
|
||||
],
|
||||
},
|
||||
voiceCall: { enabled: true, requestTimeoutMs: 30000, dtmfDelayMs: 2500 },
|
||||
voiceCall: {
|
||||
enabled: true,
|
||||
requestTimeoutMs: 30000,
|
||||
dtmfDelayMs: 2500,
|
||||
postDtmfSpeechDelayMs: 5000,
|
||||
},
|
||||
realtime: {
|
||||
provider: "openai",
|
||||
introMessage: "Say exactly: I'm here and listening.",
|
||||
@@ -955,12 +966,14 @@ describe("google-meet plugin", () => {
|
||||
dtmfSequence: "123456#",
|
||||
voiceCallId: "call-1",
|
||||
dtmfSent: true,
|
||||
introSent: true,
|
||||
},
|
||||
});
|
||||
expect(voiceCallMocks.joinMeetViaVoiceCallGateway).toHaveBeenCalledWith({
|
||||
config: expect.objectContaining({ defaultTransport: "twilio" }),
|
||||
dialInNumber: "+15551234567",
|
||||
dtmfSequence: "123456#",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
});
|
||||
});
|
||||
|
||||
@@ -984,6 +997,32 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("delegates Twilio session speech through voice-call", async () => {
|
||||
const { tools } = setup({ defaultTransport: "twilio" });
|
||||
const tool = tools[0] as {
|
||||
execute: (id: string, params: unknown) => Promise<{ details: { session: { id: string } } }>;
|
||||
};
|
||||
const joined = await tool.execute("id", {
|
||||
action: "join",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
dialInNumber: "+15551234567",
|
||||
pin: "123456",
|
||||
});
|
||||
|
||||
const spoken = await tool.execute("id", {
|
||||
action: "speak",
|
||||
sessionId: joined.details.session.id,
|
||||
message: "Say exactly: hello after joining.",
|
||||
});
|
||||
|
||||
expect(spoken.details).toMatchObject({ spoken: true });
|
||||
expect(voiceCallMocks.speakMeetViaVoiceCallGateway).toHaveBeenCalledWith({
|
||||
config: expect.objectContaining({ defaultTransport: "twilio" }),
|
||||
callId: "call-1",
|
||||
message: "Say exactly: hello after joining.",
|
||||
});
|
||||
});
|
||||
|
||||
it("reports setup status through the tool", async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
|
||||
@@ -119,6 +119,10 @@ const googleMeetConfigSchema = {
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.dtmfDelayMs": { label: "DTMF Delay (ms)", advanced: true },
|
||||
"voiceCall.postDtmfSpeechDelayMs": {
|
||||
label: "Post-DTMF Speech Delay (ms)",
|
||||
advanced: true,
|
||||
},
|
||||
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },
|
||||
"realtime.provider": {
|
||||
label: "Realtime Provider",
|
||||
|
||||
@@ -52,6 +52,7 @@ export type GoogleMeetConfig = {
|
||||
token?: string;
|
||||
requestTimeoutMs: number;
|
||||
dtmfDelayMs: number;
|
||||
postDtmfSpeechDelayMs: number;
|
||||
introMessage?: string;
|
||||
};
|
||||
realtime: {
|
||||
@@ -181,6 +182,7 @@ export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
|
||||
enabled: true,
|
||||
requestTimeoutMs: 30_000,
|
||||
dtmfDelayMs: 2_500,
|
||||
postDtmfSpeechDelayMs: 5_000,
|
||||
},
|
||||
realtime: {
|
||||
provider: "openai",
|
||||
@@ -432,6 +434,10 @@ export function resolveGoogleMeetConfigWithEnv(
|
||||
voiceCall.dtmfDelayMs,
|
||||
DEFAULT_GOOGLE_MEET_CONFIG.voiceCall.dtmfDelayMs,
|
||||
),
|
||||
postDtmfSpeechDelayMs: resolveNumber(
|
||||
voiceCall.postDtmfSpeechDelayMs,
|
||||
DEFAULT_GOOGLE_MEET_CONFIG.voiceCall.postDtmfSpeechDelayMs,
|
||||
),
|
||||
introMessage: normalizeOptionalString(voiceCall.introMessage),
|
||||
},
|
||||
realtime: {
|
||||
|
||||
@@ -21,7 +21,11 @@ import type {
|
||||
GoogleMeetJoinResult,
|
||||
GoogleMeetSession,
|
||||
} from "./transports/types.js";
|
||||
import { endMeetVoiceCallGatewayCall, joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
|
||||
import {
|
||||
endMeetVoiceCallGatewayCall,
|
||||
joinMeetViaVoiceCallGateway,
|
||||
speakMeetViaVoiceCallGateway,
|
||||
} from "./voice-call-gateway.js";
|
||||
|
||||
function nowIso(): string {
|
||||
return new Date().toISOString();
|
||||
@@ -301,6 +305,7 @@ export class GoogleMeetRuntime {
|
||||
return { session: reusable, spoken };
|
||||
}
|
||||
const createdAt = nowIso();
|
||||
let delegatedTwilioSpoken = false;
|
||||
|
||||
const session: GoogleMeetSession = {
|
||||
id: `meet_${randomUUID()}`,
|
||||
@@ -398,14 +403,22 @@ export class GoogleMeetRuntime {
|
||||
config: this.params.config,
|
||||
dialInNumber,
|
||||
dtmfSequence,
|
||||
message:
|
||||
mode === "realtime"
|
||||
? (request.message ??
|
||||
this.params.config.voiceCall.introMessage ??
|
||||
this.params.config.realtime.introMessage)
|
||||
: undefined,
|
||||
})
|
||||
: undefined;
|
||||
delegatedTwilioSpoken = Boolean(voiceCallResult?.introSent);
|
||||
session.twilio = {
|
||||
dialInNumber,
|
||||
pinProvided: Boolean(request.pin ?? this.params.config.twilio.defaultPin),
|
||||
dtmfSequence,
|
||||
voiceCallId: voiceCallResult?.callId,
|
||||
dtmfSent: voiceCallResult?.dtmfSent,
|
||||
introSent: voiceCallResult?.introSent,
|
||||
};
|
||||
if (voiceCallResult?.callId) {
|
||||
this.#sessionStops.set(session.id, async () => {
|
||||
@@ -428,9 +441,11 @@ export class GoogleMeetRuntime {
|
||||
|
||||
this.#sessions.set(session.id, session);
|
||||
const spoken =
|
||||
mode === "realtime" && speechInstructions
|
||||
? (await this.speak(session.id, speechInstructions)).spoken
|
||||
: false;
|
||||
transport === "twilio"
|
||||
? delegatedTwilioSpoken
|
||||
: mode === "realtime" && speechInstructions
|
||||
? (await this.speak(session.id, speechInstructions)).spoken
|
||||
: false;
|
||||
return { session, spoken };
|
||||
}
|
||||
|
||||
@@ -459,6 +474,20 @@ export class GoogleMeetRuntime {
|
||||
if (!session) {
|
||||
return { found: false, spoken: false };
|
||||
}
|
||||
if (session.transport === "twilio" && session.twilio?.voiceCallId) {
|
||||
await speakMeetViaVoiceCallGateway({
|
||||
config: this.params.config,
|
||||
callId: session.twilio.voiceCallId,
|
||||
message:
|
||||
instructions ||
|
||||
this.params.config.voiceCall.introMessage ||
|
||||
this.params.config.realtime.introMessage ||
|
||||
"",
|
||||
});
|
||||
session.twilio.introSent = true;
|
||||
session.updatedAt = nowIso();
|
||||
return { found: true, spoken: true, session };
|
||||
}
|
||||
await this.#refreshBrowserHealthForChromeSession(session);
|
||||
const speak = this.#sessionSpeakers.get(sessionId);
|
||||
if (!speak || session.state !== "active") {
|
||||
|
||||
@@ -86,6 +86,7 @@ export type GoogleMeetSession = {
|
||||
dtmfSequence?: string;
|
||||
voiceCallId?: string;
|
||||
dtmfSent?: boolean;
|
||||
introSent?: boolean;
|
||||
};
|
||||
notes: string[];
|
||||
};
|
||||
|
||||
@@ -27,25 +27,49 @@ describe("Google Meet voice-call gateway", () => {
|
||||
gatewayMocks.startGatewayClientWhenEventLoopReady.mockClear();
|
||||
});
|
||||
|
||||
it("starts Twilio Meet calls in conversation mode with the realtime intro by default", async () => {
|
||||
it("starts Twilio Meet calls silently, sends DTMF, then speaks the realtime intro", async () => {
|
||||
const config = resolveGoogleMeetConfig({
|
||||
voiceCall: { gatewayUrl: "ws://127.0.0.1:18789" },
|
||||
voiceCall: {
|
||||
gatewayUrl: "ws://127.0.0.1:18789",
|
||||
dtmfDelayMs: 1,
|
||||
postDtmfSpeechDelayMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "Say exactly: I'm here and listening." },
|
||||
});
|
||||
|
||||
await joinMeetViaVoiceCallGateway({
|
||||
config,
|
||||
dialInNumber: "+15551234567",
|
||||
dtmfSequence: "123456#",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
});
|
||||
|
||||
expect(gatewayMocks.request).toHaveBeenCalledWith(
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"voicecall.start",
|
||||
{
|
||||
to: "+15551234567",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
mode: "conversation",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"voicecall.dtmf",
|
||||
{
|
||||
callId: "call-1",
|
||||
digits: "123456#",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
expect(gatewayMocks.request).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: "call-1",
|
||||
message: "Say exactly: I'm here and listening.",
|
||||
},
|
||||
{ timeoutMs: 30_000 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,9 +13,15 @@ type VoiceCallStartResult = {
|
||||
error?: string;
|
||||
};
|
||||
|
||||
type VoiceCallSpeakResult = {
|
||||
success?: boolean;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export type VoiceCallMeetJoinResult = {
|
||||
callId: string;
|
||||
dtmfSent: boolean;
|
||||
introSent: boolean;
|
||||
};
|
||||
|
||||
async function createConnectedGatewayClient(
|
||||
@@ -67,6 +73,7 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
config: GoogleMeetConfig;
|
||||
dialInNumber: string;
|
||||
dtmfSequence?: string;
|
||||
message?: string;
|
||||
}): Promise<VoiceCallMeetJoinResult> {
|
||||
let client: VoiceCallGatewayClient | undefined;
|
||||
|
||||
@@ -76,7 +83,6 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
"voicecall.start",
|
||||
{
|
||||
to: params.dialInNumber,
|
||||
message: params.config.voiceCall.introMessage ?? params.config.realtime.introMessage,
|
||||
mode: "conversation",
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
@@ -95,7 +101,25 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
);
|
||||
}
|
||||
return { callId: start.callId, dtmfSent: Boolean(params.dtmfSequence) };
|
||||
if (params.message) {
|
||||
await sleep(params.config.voiceCall.postDtmfSpeechDelayMs);
|
||||
const spoken = (await client.request(
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: start.callId,
|
||||
message: params.message,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallSpeakResult;
|
||||
if (spoken.success === false) {
|
||||
throw new Error(spoken.error || "voicecall.speak failed");
|
||||
}
|
||||
}
|
||||
return {
|
||||
callId: start.callId,
|
||||
dtmfSent: Boolean(params.dtmfSequence),
|
||||
introSent: Boolean(params.message),
|
||||
};
|
||||
} finally {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
}
|
||||
@@ -120,3 +144,28 @@ export async function endMeetVoiceCallGatewayCall(params: {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function speakMeetViaVoiceCallGateway(params: {
|
||||
config: GoogleMeetConfig;
|
||||
callId: string;
|
||||
message: string;
|
||||
}): Promise<void> {
|
||||
let client: VoiceCallGatewayClient | undefined;
|
||||
|
||||
try {
|
||||
client = await createConnectedGatewayClient(params.config);
|
||||
const spoken = (await client.request(
|
||||
"voicecall.speak",
|
||||
{
|
||||
callId: params.callId,
|
||||
message: params.message,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
)) as VoiceCallSpeakResult;
|
||||
if (spoken.success === false) {
|
||||
throw new Error(spoken.error || "voicecall.speak failed");
|
||||
}
|
||||
} finally {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -369,12 +369,27 @@ export default definePluginEntry({
|
||||
"voicecall.speak",
|
||||
async ({ params, respond }: GatewayRequestHandlerOptions) => {
|
||||
try {
|
||||
await respondToCallMessageAction({
|
||||
requestParams: params,
|
||||
respond,
|
||||
action: (request) => request.rt.manager.speak(request.callId, request.message),
|
||||
failure: "speak failed",
|
||||
});
|
||||
const request = await resolveCallMessageRequest(params);
|
||||
if ("error" in request) {
|
||||
respond(false, { error: request.error });
|
||||
return;
|
||||
}
|
||||
if (request.rt.config.realtime.enabled) {
|
||||
const realtimeResult = request.rt.webhookServer.speakRealtime(
|
||||
request.callId,
|
||||
request.message,
|
||||
);
|
||||
if (realtimeResult.success) {
|
||||
respond(true, { success: true });
|
||||
return;
|
||||
}
|
||||
}
|
||||
const result = await request.rt.manager.speak(request.callId, request.message);
|
||||
if (!result.success) {
|
||||
respond(false, { error: result.error || "speak failed" });
|
||||
return;
|
||||
}
|
||||
respond(true, { success: true });
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
}
|
||||
|
||||
@@ -195,6 +195,13 @@ export class VoiceCallWebhookServer {
|
||||
return this.realtimeHandler;
|
||||
}
|
||||
|
||||
speakRealtime(callId: string, instructions: string): { success: boolean; error?: string } {
|
||||
if (!this.realtimeHandler) {
|
||||
return { success: false, error: "Realtime voice handler is not configured" };
|
||||
}
|
||||
return this.realtimeHandler.speak(callId, instructions);
|
||||
}
|
||||
|
||||
setRealtimeHandler(handler: RealtimeCallHandler): void {
|
||||
this.realtimeHandler = handler;
|
||||
}
|
||||
|
||||
@@ -214,6 +214,121 @@ describe("RealtimeCallHandler path routing", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not emit an outbound realtime greeting without an initial message", async () => {
|
||||
let callbacks:
|
||||
| {
|
||||
onReady?: () => void;
|
||||
}
|
||||
| undefined;
|
||||
const triggerGreeting = vi.fn();
|
||||
const createBridge = vi.fn(
|
||||
(request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
|
||||
callbacks = request;
|
||||
return makeBridge({ triggerGreeting });
|
||||
},
|
||||
);
|
||||
const getCallByProviderCallId = vi.fn(
|
||||
(): CallRecord => ({
|
||||
callId: "call-1",
|
||||
providerCallId: "CA-silent",
|
||||
provider: "twilio",
|
||||
direction: "outbound",
|
||||
state: "ringing",
|
||||
from: "+15550001234",
|
||||
to: "+15550009999",
|
||||
startedAt: Date.now(),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
metadata: {},
|
||||
}),
|
||||
);
|
||||
const handler = makeHandler(undefined, {
|
||||
manager: {
|
||||
getCallByProviderCallId,
|
||||
},
|
||||
realtimeProvider: makeRealtimeProvider(createBridge),
|
||||
});
|
||||
const server = await startRealtimeServer(handler);
|
||||
|
||||
try {
|
||||
const ws = await connectWs(server.url);
|
||||
try {
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
event: "start",
|
||||
start: { streamSid: "MZ-silent", callSid: "CA-silent" },
|
||||
}),
|
||||
);
|
||||
await vi.waitFor(() => {
|
||||
expect(createBridge).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
callbacks?.onReady?.();
|
||||
|
||||
expect(triggerGreeting).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
|
||||
ws.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("speaks through the active outbound realtime bridge by call id", async () => {
|
||||
const triggerGreeting = vi.fn();
|
||||
const createBridge = vi.fn(() => makeBridge({ triggerGreeting }));
|
||||
const getCallByProviderCallId = vi.fn(
|
||||
(): CallRecord => ({
|
||||
callId: "call-1",
|
||||
providerCallId: "CA-speak",
|
||||
provider: "twilio",
|
||||
direction: "outbound",
|
||||
state: "ringing",
|
||||
from: "+15550001234",
|
||||
to: "+15550009999",
|
||||
startedAt: Date.now(),
|
||||
transcript: [],
|
||||
processedEventIds: [],
|
||||
metadata: {},
|
||||
}),
|
||||
);
|
||||
const handler = makeHandler(undefined, {
|
||||
manager: {
|
||||
getCallByProviderCallId,
|
||||
},
|
||||
realtimeProvider: makeRealtimeProvider(createBridge),
|
||||
});
|
||||
const server = await startRealtimeServer(handler);
|
||||
|
||||
try {
|
||||
const ws = await connectWs(server.url);
|
||||
try {
|
||||
ws.send(
|
||||
JSON.stringify({
|
||||
event: "start",
|
||||
start: { streamSid: "MZ-speak", callSid: "CA-speak" },
|
||||
}),
|
||||
);
|
||||
await vi.waitFor(() => {
|
||||
expect(createBridge).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
expect(handler.speak("call-1", "Say exactly: hello from Meet.")).toEqual({
|
||||
success: true,
|
||||
});
|
||||
expect(triggerGreeting).toHaveBeenCalledWith("Say exactly: hello from Meet.");
|
||||
} finally {
|
||||
if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
|
||||
ws.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("submits continuing responses only for realtime agent consult calls", async () => {
|
||||
let callbacks:
|
||||
| {
|
||||
|
||||
@@ -41,7 +41,7 @@ function buildGreetingInstructions(
|
||||
): string | undefined {
|
||||
const trimmedGreeting = greeting?.trim();
|
||||
if (!trimmedGreeting) {
|
||||
return baseInstructions;
|
||||
return undefined;
|
||||
}
|
||||
const intro =
|
||||
"Start the call by greeting the caller naturally. Include this greeting in your first spoken reply:";
|
||||
@@ -64,9 +64,15 @@ type CallRegistration = {
|
||||
|
||||
type ActiveRealtimeVoiceBridge = RealtimeVoiceBridgeSession;
|
||||
|
||||
type RealtimeSpeakResult = {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export class RealtimeCallHandler {
|
||||
private readonly toolHandlers = new Map<string, ToolHandlerFn>();
|
||||
private readonly pendingStreamTokens = new Map<string, PendingStreamToken>();
|
||||
private readonly activeBridgesByCallId = new Map<string, ActiveRealtimeVoiceBridge>();
|
||||
private publicOrigin: string | null = null;
|
||||
private publicPathPrefix = "";
|
||||
|
||||
@@ -199,6 +205,19 @@ export class RealtimeCallHandler {
|
||||
this.toolHandlers.set(name, fn);
|
||||
}
|
||||
|
||||
speak(callId: string, instructions: string): RealtimeSpeakResult {
|
||||
const bridge = this.activeBridgesByCallId.get(callId);
|
||||
if (!bridge) {
|
||||
return { success: false, error: "No active realtime bridge for call" };
|
||||
}
|
||||
try {
|
||||
bridge.triggerGreeting(instructions);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: formatErrorMessage(error) };
|
||||
}
|
||||
}
|
||||
|
||||
private issueStreamToken(meta: Omit<PendingStreamToken, "expiry"> = {}): string {
|
||||
const token = randomUUID();
|
||||
this.pendingStreamTokens.set(token, { expiry: Date.now() + STREAM_TOKEN_TTL_MS, ...meta });
|
||||
@@ -254,7 +273,7 @@ export class RealtimeCallHandler {
|
||||
instructions: this.config.instructions,
|
||||
tools: this.config.tools,
|
||||
initialGreetingInstructions,
|
||||
triggerGreetingOnReady: true,
|
||||
triggerGreetingOnReady: Boolean(initialGreetingInstructions),
|
||||
audioSink: {
|
||||
isOpen: () => ws.readyState === WebSocket.OPEN,
|
||||
sendAudio: (muLaw) => {
|
||||
@@ -312,6 +331,8 @@ export class RealtimeCallHandler {
|
||||
console.error("[voice-call] realtime voice error:", error.message);
|
||||
},
|
||||
onClose: (reason) => {
|
||||
this.activeBridgesByCallId.delete(callId);
|
||||
this.activeBridgesByCallId.delete(callSid);
|
||||
if (reason !== "error") {
|
||||
return;
|
||||
}
|
||||
@@ -330,6 +351,14 @@ export class RealtimeCallHandler {
|
||||
});
|
||||
},
|
||||
});
|
||||
this.activeBridgesByCallId.set(callId, bridge);
|
||||
this.activeBridgesByCallId.set(callSid, bridge);
|
||||
const closeBridge = bridge.close.bind(bridge);
|
||||
bridge.close = () => {
|
||||
this.activeBridgesByCallId.delete(callId);
|
||||
this.activeBridgesByCallId.delete(callSid);
|
||||
closeBridge();
|
||||
};
|
||||
|
||||
bridge.connect().catch((error: Error) => {
|
||||
console.error("[voice-call] Failed to connect realtime bridge:", error);
|
||||
|
||||
Reference in New Issue
Block a user