refactor: share realtime voice bridge sessions

This commit is contained in:
Peter Steinberger
2026-04-24 01:42:16 +01:00
parent 958afeb397
commit 57e139100b
5 changed files with 278 additions and 51 deletions

View File

@@ -2,10 +2,11 @@ import { randomUUID } from "node:crypto";
import http from "node:http";
import type { Duplex } from "node:stream";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import type {
RealtimeVoiceBridge,
RealtimeVoiceProviderConfig,
RealtimeVoiceProviderPlugin,
import {
createRealtimeVoiceBridgeSession,
type RealtimeVoiceBridgeSession,
type RealtimeVoiceProviderConfig,
type RealtimeVoiceProviderPlugin,
} from "openclaw/plugin-sdk/realtime-voice";
import WebSocket, { WebSocketServer } from "ws";
import type { VoiceCallRealtimeConfig } from "../config.js";
@@ -59,16 +60,7 @@ type CallRegistration = {
initialGreetingInstructions?: string;
};
type ActiveRealtimeVoiceBridge = Pick<
RealtimeVoiceBridge,
| "connect"
| "sendAudio"
| "setMediaTimestamp"
| "submitToolResult"
| "acknowledgeMark"
| "close"
| "triggerGreeting"
>;
type ActiveRealtimeVoiceBridge = RealtimeVoiceBridgeSession;
export class RealtimeCallHandler {
private readonly toolHandlers = new Map<string, ToolHandlerFn>();
@@ -254,34 +246,30 @@ export class RealtimeCallHandler {
this.endCallInManager(callSid, callId, reason);
};
const bridgeRef: { current?: ActiveRealtimeVoiceBridge } = {};
const bridge = this.realtimeProvider.createBridge({
const bridge = createRealtimeVoiceBridgeSession({
provider: this.realtimeProvider,
providerConfig: this.providerConfig,
instructions: this.config.instructions,
tools: this.config.tools,
onAudio: (muLaw) => {
if (ws.readyState !== WebSocket.OPEN) {
return;
}
ws.send(
JSON.stringify({
event: "media",
streamSid,
media: { payload: muLaw.toString("base64") },
}),
);
},
onClearAudio: () => {
if (ws.readyState !== WebSocket.OPEN) {
return;
}
ws.send(JSON.stringify({ event: "clear", streamSid }));
},
onMark: (markName) => {
if (ws.readyState !== WebSocket.OPEN) {
return;
}
ws.send(JSON.stringify({ event: "mark", streamSid, mark: { name: markName } }));
initialGreetingInstructions,
triggerGreetingOnReady: true,
audioSink: {
isOpen: () => ws.readyState === WebSocket.OPEN,
sendAudio: (muLaw) => {
ws.send(
JSON.stringify({
event: "media",
streamSid,
media: { payload: muLaw.toString("base64") },
}),
);
},
clearAudio: () => {
ws.send(JSON.stringify({ event: "clear", streamSid }));
},
sendMark: (markName) => {
ws.send(JSON.stringify({ event: "mark", streamSid, mark: { name: markName } }));
},
},
onTranscript: (role, text, isFinal) => {
if (!isFinal) {
@@ -309,22 +297,15 @@ export class RealtimeCallHandler {
text,
});
},
onToolCall: (toolEvent) => {
const activeBridge = bridgeRef.current;
if (!activeBridge) {
return;
}
onToolCall: (toolEvent, session) => {
void this.executeToolCall(
activeBridge,
session,
callId,
toolEvent.callId || toolEvent.itemId,
toolEvent.name,
toolEvent.args,
);
},
onReady: () => {
bridgeRef.current?.triggerGreeting?.(initialGreetingInstructions);
},
onError: (error) => {
console.error("[voice-call] realtime voice error:", error.message);
},
@@ -348,8 +329,6 @@ export class RealtimeCallHandler {
},
});
bridgeRef.current = bridge;
bridge.connect().catch((error: Error) => {
console.error("[voice-call] Failed to connect realtime bridge:", error);
bridge.close();