mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix: stabilize Google Meet realtime audio
This commit is contained in:
@@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Plugins/catalog: supplement lagging official external WeCom and Yuanbao npm manifests with channel config descriptors and declared tool contracts from the OpenClaw catalog, so trusted package sweeps no longer fail because external package metadata trails the host contract. Thanks @vincentkoc.
|
||||
- Plugins/install: let trusted official `@openclaw/*` catalog installs recover when npm `latest` points at a prerelease by falling back to the newest stable version, or by allowing prerelease-only launch packages with a warning instead of making beta/development plugin sweeps fail at install time. Thanks @vincentkoc.
|
||||
- Google Meet: grant Chrome media permissions against the actual Meet tab, start the local realtime audio bridge only after Meet joins, expose realtime transcripts in status/logs, and force explicit audio responses with current OpenAI realtime output-audio events so BlackHole capture does not keep the OpenClaw participant muted or silent.
|
||||
- Google Meet: use the local call-control microphone button instead of disabled remote participant mute buttons, and block realtime speech when the OpenClaw Meet microphone remains muted.
|
||||
- Google Meet: refresh realtime browser state during status and retry delayed speech after Meet finishes joining, so a just-opened in-call tab no longer leaves speech stuck behind stale `not-in-call` health.
|
||||
- Plugins/install: recover the install ledger from the managed npm root when `plugins/installs.json` is empty or partial, so reinstalling Discord and Codex no longer makes the other installed plugin disappear.
|
||||
|
||||
@@ -1865,6 +1865,206 @@ describe("google-meet plugin", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("grants local Chrome Meet media permissions against the opened tab", async () => {
|
||||
const callGatewayFromCli = mockLocalMeetBrowserRequest({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
});
|
||||
const { methods } = setup({
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
});
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(callGatewayFromCli).toHaveBeenCalledWith(
|
||||
"browser.request",
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
path: "/permissions/grant",
|
||||
body: expect.objectContaining({
|
||||
origin: "https://meet.google.com",
|
||||
permissions: ["audioCapture", "videoCapture"],
|
||||
targetId: "local-meet-tab",
|
||||
}),
|
||||
}),
|
||||
{ progress: false },
|
||||
);
|
||||
});
|
||||
|
||||
it("starts the local realtime audio bridge after Meet is inspected", async () => {
|
||||
const events: string[] = [];
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as {
|
||||
path?: string;
|
||||
body?: { fn?: string; targetId?: string; url?: string };
|
||||
};
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events.indexOf("browser:/act")).toBeGreaterThan(-1);
|
||||
expect(events.indexOf("command:bridge start")).toBeGreaterThan(events.indexOf("browser:/act"));
|
||||
});
|
||||
|
||||
it("does not start the local realtime audio bridge while Meet admission is pending", async () => {
|
||||
const events: string[] = [];
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as { path?: string; body?: { targetId?: string; url?: string } };
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: false,
|
||||
lobbyWaiting: true,
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "meet-admission-required",
|
||||
manualActionMessage: "Admit the OpenClaw browser participant in Google Meet.",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
waitForInCallMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events).toContain("browser:/act");
|
||||
expect(events).not.toContain("command:bridge start");
|
||||
});
|
||||
|
||||
it("refreshes observe-only caption health when status is requested", async () => {
|
||||
let openedTab = false;
|
||||
let actCount = 0;
|
||||
@@ -2790,7 +2990,8 @@ describe("google-meet plugin", () => {
|
||||
chrome: {
|
||||
health: {
|
||||
inCall: true,
|
||||
speechReady: true,
|
||||
speechReady: false,
|
||||
speechBlockedReason: "audio-bridge-unavailable",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -3239,21 +3440,7 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
|
||||
it("pipes Chrome command-pair audio through the realtime provider", async () => {
|
||||
let callbacks:
|
||||
| {
|
||||
onAudio: (audio: Buffer) => void;
|
||||
onClearAudio: () => void;
|
||||
onMark?: (markName: string) => void;
|
||||
onToolCall?: (event: {
|
||||
itemId: string;
|
||||
callId: string;
|
||||
name: string;
|
||||
args: unknown;
|
||||
}) => void;
|
||||
onReady?: () => void;
|
||||
tools?: unknown[];
|
||||
}
|
||||
| undefined;
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const sendAudio = vi.fn();
|
||||
const bridge = {
|
||||
supportsToolResultContinuation: true,
|
||||
@@ -3357,6 +3544,14 @@ describe("google-meet plugin", () => {
|
||||
callbacks?.onClearAudio();
|
||||
callbacks?.onAudio(Buffer.from([6, 7]));
|
||||
callbacks?.onReady?.();
|
||||
callbacks?.onTranscript?.("assistant", "How can I help you?", true);
|
||||
callbacks?.onTranscript?.("user", "Please summarize the launch.", true);
|
||||
callbacks?.onEvent?.({ direction: "client", type: "response.create" });
|
||||
callbacks?.onEvent?.({
|
||||
direction: "server",
|
||||
type: "response.done",
|
||||
detail: "status=completed",
|
||||
});
|
||||
callbacks?.onToolCall?.({
|
||||
itemId: "item-1",
|
||||
callId: "tool-call-1",
|
||||
@@ -3396,6 +3591,23 @@ describe("google-meet plugin", () => {
|
||||
audioOutputActive: true,
|
||||
lastInputBytes: 3,
|
||||
lastOutputBytes: 4,
|
||||
realtimeTranscriptLines: 2,
|
||||
lastRealtimeTranscriptRole: "user",
|
||||
lastRealtimeTranscriptText: "Please summarize the launch.",
|
||||
lastRealtimeEventType: "server:response.done",
|
||||
lastRealtimeEventDetail: "status=completed",
|
||||
recentRealtimeTranscript: [
|
||||
expect.objectContaining({ role: "assistant", text: "How can I help you?" }),
|
||||
expect.objectContaining({ role: "user", text: "Please summarize the launch." }),
|
||||
],
|
||||
recentRealtimeEvents: [
|
||||
expect.objectContaining({ direction: "client", type: "response.create" }),
|
||||
expect.objectContaining({
|
||||
direction: "server",
|
||||
type: "response.done",
|
||||
detail: "status=completed",
|
||||
}),
|
||||
],
|
||||
clearCount: 1,
|
||||
});
|
||||
expect(callbacks).toMatchObject({
|
||||
@@ -3545,20 +3757,7 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
|
||||
it("pipes paired-node command-pair audio through the realtime provider", async () => {
|
||||
let callbacks:
|
||||
| {
|
||||
onAudio: (audio: Buffer) => void;
|
||||
onClearAudio: () => void;
|
||||
onToolCall?: (event: {
|
||||
itemId: string;
|
||||
callId: string;
|
||||
name: string;
|
||||
args: unknown;
|
||||
}) => void;
|
||||
onReady?: () => void;
|
||||
tools?: unknown[];
|
||||
}
|
||||
| undefined;
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const sendAudio = vi.fn();
|
||||
const bridge = {
|
||||
supportsToolResultContinuation: true,
|
||||
@@ -3633,6 +3832,12 @@ describe("google-meet plugin", () => {
|
||||
callbacks?.onAudio(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onClearAudio();
|
||||
callbacks?.onReady?.();
|
||||
callbacks?.onTranscript?.("assistant", "How can I help from the node?", true);
|
||||
callbacks?.onEvent?.({
|
||||
direction: "server",
|
||||
type: "response.done",
|
||||
detail: "status=completed",
|
||||
});
|
||||
callbacks?.onToolCall?.({
|
||||
itemId: "item-1",
|
||||
callId: "tool-call-1",
|
||||
@@ -3715,6 +3920,11 @@ describe("google-meet plugin", () => {
|
||||
audioOutputActive: true,
|
||||
lastInputBytes: 3,
|
||||
lastOutputBytes: 3,
|
||||
realtimeTranscriptLines: 1,
|
||||
lastRealtimeTranscriptRole: "assistant",
|
||||
lastRealtimeTranscriptText: "How can I help from the node?",
|
||||
lastRealtimeEventType: "server:response.done",
|
||||
lastRealtimeEventDetail: "status=completed",
|
||||
clearCount: 1,
|
||||
});
|
||||
|
||||
|
||||
@@ -388,6 +388,17 @@ function writeDoctorStatus(status: Awaited<ReturnType<GoogleMeetRuntime["status"
|
||||
const speaker = health.lastCaptionSpeaker ? `${health.lastCaptionSpeaker}: ` : "";
|
||||
writeStdoutLine("last caption text: %s%s", speaker, health.lastCaptionText);
|
||||
}
|
||||
writeStdoutLine("realtime transcript lines: %s", health?.realtimeTranscriptLines ?? 0);
|
||||
if (health?.lastRealtimeTranscriptText) {
|
||||
const role = health.lastRealtimeTranscriptRole
|
||||
? `${health.lastRealtimeTranscriptRole}: `
|
||||
: "";
|
||||
writeStdoutLine("last realtime transcript: %s%s", role, health.lastRealtimeTranscriptText);
|
||||
}
|
||||
if (health?.lastRealtimeEventType) {
|
||||
const detail = health.lastRealtimeEventDetail ? ` ${health.lastRealtimeEventDetail}` : "";
|
||||
writeStdoutLine("last realtime event: %s%s", health.lastRealtimeEventType, detail);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,14 @@ import {
|
||||
} from "./agent-consult.js";
|
||||
import type { GoogleMeetConfig } from "./config.js";
|
||||
import {
|
||||
getGoogleMeetRealtimeTranscriptHealth,
|
||||
getGoogleMeetRealtimeEventHealth,
|
||||
recordGoogleMeetRealtimeTranscript,
|
||||
recordGoogleMeetRealtimeEvent,
|
||||
resolveGoogleMeetRealtimeAudioFormat,
|
||||
resolveGoogleMeetRealtimeProvider,
|
||||
type GoogleMeetRealtimeEventEntry,
|
||||
type GoogleMeetRealtimeTranscriptEntry,
|
||||
} from "./realtime.js";
|
||||
import type { GoogleMeetChromeHealth } from "./transports/types.js";
|
||||
|
||||
@@ -65,7 +71,8 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
const transcript: Array<{ role: "user" | "assistant"; text: string }> = [];
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
|
||||
const stop = async () => {
|
||||
if (stopped) {
|
||||
@@ -148,11 +155,15 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
},
|
||||
onTranscript: (role, text, isFinal) => {
|
||||
if (isFinal) {
|
||||
transcript.push({ role, text });
|
||||
if (transcript.length > 40) {
|
||||
transcript.splice(0, transcript.length - 40);
|
||||
}
|
||||
params.logger.debug?.(`[google-meet] ${role}: ${text}`);
|
||||
recordGoogleMeetRealtimeTranscript(transcript, role, text);
|
||||
params.logger.info(`[google-meet] node realtime ${role}: ${text}`);
|
||||
}
|
||||
},
|
||||
onEvent: (event) => {
|
||||
recordGoogleMeetRealtimeEvent(realtimeEvents, event);
|
||||
if (event.type === "error" || event.type === "response.done") {
|
||||
const detail = event.detail ? ` ${event.detail}` : "";
|
||||
params.logger.info(`[google-meet] node realtime ${event.direction}:${event.type}${detail}`);
|
||||
}
|
||||
},
|
||||
onToolCall: (event, session) => {
|
||||
@@ -261,6 +272,8 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
lastClearAt,
|
||||
lastInputBytes,
|
||||
lastOutputBytes,
|
||||
...getGoogleMeetRealtimeTranscriptHealth(transcript),
|
||||
...getGoogleMeetRealtimeEventHealth(realtimeEvents),
|
||||
consecutiveInputErrors,
|
||||
lastInputError,
|
||||
clearCount,
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
|
||||
resolveConfiguredRealtimeVoiceProvider,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceBridgeEvent,
|
||||
type RealtimeVoiceProviderConfig,
|
||||
type RealtimeVoiceProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
@@ -55,6 +56,77 @@ type ResolvedRealtimeProvider = {
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
};
|
||||
|
||||
export type GoogleMeetRealtimeTranscriptEntry = {
|
||||
at: string;
|
||||
role: "user" | "assistant";
|
||||
text: string;
|
||||
};
|
||||
|
||||
export function recordGoogleMeetRealtimeTranscript(
|
||||
transcript: GoogleMeetRealtimeTranscriptEntry[],
|
||||
role: "user" | "assistant",
|
||||
text: string,
|
||||
): GoogleMeetRealtimeTranscriptEntry {
|
||||
const entry = { at: new Date().toISOString(), role, text };
|
||||
transcript.push(entry);
|
||||
if (transcript.length > 40) {
|
||||
transcript.splice(0, transcript.length - 40);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
export function getGoogleMeetRealtimeTranscriptHealth(
|
||||
transcript: GoogleMeetRealtimeTranscriptEntry[],
|
||||
): Pick<
|
||||
GoogleMeetChromeHealth,
|
||||
| "realtimeTranscriptLines"
|
||||
| "lastRealtimeTranscriptAt"
|
||||
| "lastRealtimeTranscriptRole"
|
||||
| "lastRealtimeTranscriptText"
|
||||
| "recentRealtimeTranscript"
|
||||
> {
|
||||
const last = transcript.at(-1);
|
||||
return {
|
||||
realtimeTranscriptLines: transcript.length,
|
||||
lastRealtimeTranscriptAt: last?.at,
|
||||
lastRealtimeTranscriptRole: last?.role,
|
||||
lastRealtimeTranscriptText: last?.text,
|
||||
recentRealtimeTranscript: transcript.slice(-5),
|
||||
};
|
||||
}
|
||||
|
||||
export type GoogleMeetRealtimeEventEntry = RealtimeVoiceBridgeEvent & {
|
||||
at: string;
|
||||
};
|
||||
|
||||
export function recordGoogleMeetRealtimeEvent(
|
||||
events: GoogleMeetRealtimeEventEntry[],
|
||||
event: RealtimeVoiceBridgeEvent,
|
||||
) {
|
||||
events.push({ at: new Date().toISOString(), ...event });
|
||||
if (events.length > 40) {
|
||||
events.splice(0, events.length - 40);
|
||||
}
|
||||
}
|
||||
|
||||
export function getGoogleMeetRealtimeEventHealth(
|
||||
events: GoogleMeetRealtimeEventEntry[],
|
||||
): Pick<
|
||||
GoogleMeetChromeHealth,
|
||||
| "lastRealtimeEventAt"
|
||||
| "lastRealtimeEventType"
|
||||
| "lastRealtimeEventDetail"
|
||||
| "recentRealtimeEvents"
|
||||
> {
|
||||
const last = events.at(-1);
|
||||
return {
|
||||
lastRealtimeEventAt: last?.at,
|
||||
lastRealtimeEventType: last ? `${last.direction}:${last.type}` : undefined,
|
||||
lastRealtimeEventDetail: last?.detail,
|
||||
recentRealtimeEvents: events.slice(-10),
|
||||
};
|
||||
}
|
||||
|
||||
function splitCommand(argv: string[]): { command: string; args: string[] } {
|
||||
const [command, ...args] = argv;
|
||||
if (!command) {
|
||||
@@ -312,7 +384,8 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
const transcript: Array<{ role: "user" | "assistant"; text: string }> = [];
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
bridge = createRealtimeVoiceBridgeSession({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
@@ -335,11 +408,15 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
},
|
||||
onTranscript: (role, text, isFinal) => {
|
||||
if (isFinal) {
|
||||
transcript.push({ role, text });
|
||||
if (transcript.length > 40) {
|
||||
transcript.splice(0, transcript.length - 40);
|
||||
}
|
||||
params.logger.debug?.(`[google-meet] ${role}: ${text}`);
|
||||
recordGoogleMeetRealtimeTranscript(transcript, role, text);
|
||||
params.logger.info(`[google-meet] realtime ${role}: ${text}`);
|
||||
}
|
||||
},
|
||||
onEvent: (event) => {
|
||||
recordGoogleMeetRealtimeEvent(realtimeEvents, event);
|
||||
if (event.type === "error" || event.type === "response.done") {
|
||||
const detail = event.detail ? ` ${event.detail}` : "";
|
||||
params.logger.info(`[google-meet] realtime ${event.direction}:${event.type}${detail}`);
|
||||
}
|
||||
},
|
||||
onToolCall: (event, session) => {
|
||||
@@ -414,6 +491,8 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
lastInputBytes,
|
||||
lastOutputBytes,
|
||||
suppressedInputBytes,
|
||||
...getGoogleMeetRealtimeTranscriptHealth(transcript),
|
||||
...getGoogleMeetRealtimeEventHealth(realtimeEvents),
|
||||
lastClearAt,
|
||||
clearCount,
|
||||
bridgeClosed: stopped,
|
||||
|
||||
@@ -27,6 +27,11 @@ import {
|
||||
speakMeetViaVoiceCallGateway,
|
||||
} from "./voice-call-gateway.js";
|
||||
|
||||
type ChromeAudioBridgeResult = NonNullable<
|
||||
| Awaited<ReturnType<typeof launchChromeMeet>>["audioBridge"]
|
||||
| Awaited<ReturnType<typeof launchChromeMeetOnNode>>["audioBridge"]
|
||||
>;
|
||||
|
||||
function nowIso(): string {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
@@ -421,26 +426,9 @@ export class GoogleMeetRuntime {
|
||||
launched: result.launched,
|
||||
nodeId: "nodeId" in result ? result.nodeId : undefined,
|
||||
browserProfile: this.params.config.chrome.browserProfile,
|
||||
audioBridge: result.audioBridge
|
||||
? {
|
||||
type: result.audioBridge.type,
|
||||
provider:
|
||||
result.audioBridge.type === "command-pair" ||
|
||||
result.audioBridge.type === "node-command-pair"
|
||||
? result.audioBridge.providerId
|
||||
: undefined,
|
||||
}
|
||||
: undefined,
|
||||
health: "browser" in result ? result.browser : undefined,
|
||||
};
|
||||
if (
|
||||
result.audioBridge?.type === "command-pair" ||
|
||||
result.audioBridge?.type === "node-command-pair"
|
||||
) {
|
||||
this.#sessionStops.set(session.id, result.audioBridge.stop);
|
||||
this.#sessionSpeakers.set(session.id, result.audioBridge.speak);
|
||||
this.#sessionHealth.set(session.id, result.audioBridge.getHealth);
|
||||
}
|
||||
this.#attachChromeAudioBridge(session, result.audioBridge);
|
||||
session.notes.push(
|
||||
result.audioBridge
|
||||
? transport === "chrome-node"
|
||||
@@ -558,6 +546,7 @@ export class GoogleMeetRuntime {
|
||||
return { found: true, spoken: true, session };
|
||||
}
|
||||
await this.#refreshBrowserHealthForChromeSession(session);
|
||||
await this.#ensureChromeRealtimeBridge(session);
|
||||
const speak = this.#sessionSpeakers.get(sessionId);
|
||||
if (!speak || session.state !== "active") {
|
||||
return { found: true, spoken: false, session };
|
||||
@@ -579,7 +568,7 @@ export class GoogleMeetRuntime {
|
||||
|
||||
async #speakWhenReady(session: GoogleMeetSession, instructions: string): Promise<boolean> {
|
||||
let result = await this.speak(session.id, instructions);
|
||||
if (result.spoken || !session.chrome?.audioBridge || session.transport === "twilio") {
|
||||
if (result.spoken || session.transport === "twilio") {
|
||||
return result.spoken;
|
||||
}
|
||||
const waitMs = Math.min(
|
||||
@@ -825,6 +814,64 @@ export class GoogleMeetRuntime {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
}
|
||||
|
||||
#attachChromeAudioBridge(
|
||||
session: GoogleMeetSession,
|
||||
audioBridge: ChromeAudioBridgeResult | undefined,
|
||||
) {
|
||||
if (!session.chrome || !audioBridge) {
|
||||
return;
|
||||
}
|
||||
session.chrome.audioBridge = {
|
||||
type: audioBridge.type,
|
||||
provider:
|
||||
audioBridge.type === "command-pair" || audioBridge.type === "node-command-pair"
|
||||
? audioBridge.providerId
|
||||
: undefined,
|
||||
};
|
||||
if (audioBridge.type === "command-pair" || audioBridge.type === "node-command-pair") {
|
||||
this.#sessionStops.set(session.id, audioBridge.stop);
|
||||
this.#sessionSpeakers.set(session.id, audioBridge.speak);
|
||||
this.#sessionHealth.set(session.id, audioBridge.getHealth);
|
||||
}
|
||||
}
|
||||
|
||||
async #ensureChromeRealtimeBridge(session: GoogleMeetSession) {
|
||||
if (
|
||||
session.mode !== "realtime" ||
|
||||
session.transport !== "chrome" ||
|
||||
session.state !== "active" ||
|
||||
!session.chrome ||
|
||||
session.chrome.audioBridge
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const health = session.chrome.health;
|
||||
if (
|
||||
health?.inCall !== true ||
|
||||
health.micMuted === true ||
|
||||
health.manualActionRequired === true
|
||||
) {
|
||||
return;
|
||||
}
|
||||
const result = await launchChromeMeet({
|
||||
runtime: this.params.runtime,
|
||||
config: {
|
||||
...this.params.config,
|
||||
chrome: {
|
||||
...this.params.config.chrome,
|
||||
launch: false,
|
||||
},
|
||||
},
|
||||
fullConfig: this.params.fullConfig,
|
||||
meetingSessionId: session.id,
|
||||
mode: session.mode,
|
||||
url: session.url,
|
||||
logger: this.params.logger,
|
||||
});
|
||||
this.#attachChromeAudioBridge(session, result.audioBridge);
|
||||
session.updatedAt = nowIso();
|
||||
}
|
||||
|
||||
#refreshSpeechReadiness(session: GoogleMeetSession) {
|
||||
const readiness = evaluateSpeechReadiness(session);
|
||||
if (readiness.ready) {
|
||||
|
||||
@@ -96,12 +96,10 @@ export async function launchChromeMeet(params: {
|
||||
| ({ type: "command-pair" } & ChromeRealtimeAudioBridgeHandle);
|
||||
browser?: GoogleMeetChromeHealth;
|
||||
}> {
|
||||
let audioBridge:
|
||||
| { type: "external-command" }
|
||||
| ({ type: "command-pair" } & ChromeRealtimeAudioBridgeHandle)
|
||||
| undefined;
|
||||
|
||||
if (params.mode === "realtime") {
|
||||
const checkRealtimeAudioPrerequisites = async () => {
|
||||
if (params.mode !== "realtime") {
|
||||
return;
|
||||
}
|
||||
await assertBlackHole2chAvailable({
|
||||
runtime: params.runtime,
|
||||
timeoutMs: Math.min(params.config.chrome.joinTimeoutMs, 10_000),
|
||||
@@ -118,7 +116,16 @@ export async function launchChromeMeet(params: {
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const startRealtimeAudioBridge = async (): Promise<
|
||||
| { type: "external-command" }
|
||||
| ({ type: "command-pair" } & ChromeRealtimeAudioBridgeHandle)
|
||||
| undefined
|
||||
> => {
|
||||
if (params.mode !== "realtime") {
|
||||
return undefined;
|
||||
}
|
||||
if (params.config.chrome.audioBridgeCommand) {
|
||||
const bridge = await params.runtime.system.runCommandWithTimeout(
|
||||
params.config.chrome.audioBridgeCommand,
|
||||
@@ -129,55 +136,46 @@ export async function launchChromeMeet(params: {
|
||||
`failed to start Chrome audio bridge: ${bridge.stderr || bridge.stdout || bridge.code}`,
|
||||
);
|
||||
}
|
||||
audioBridge = { type: "external-command" };
|
||||
} else {
|
||||
if (!params.config.chrome.audioInputCommand || !params.config.chrome.audioOutputCommand) {
|
||||
throw new Error(
|
||||
"Chrome realtime mode requires chrome.audioInputCommand and chrome.audioOutputCommand, or chrome.audioBridgeCommand for an external bridge.",
|
||||
);
|
||||
}
|
||||
audioBridge = {
|
||||
type: "command-pair",
|
||||
...(await startCommandRealtimeAudioBridge({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
})),
|
||||
};
|
||||
return { type: "external-command" };
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.config.chrome.launch) {
|
||||
return { launched: false, audioBridge };
|
||||
}
|
||||
|
||||
let commandPairBridgeStopped = false;
|
||||
const stopCommandPairBridge = async () => {
|
||||
if (commandPairBridgeStopped) {
|
||||
return;
|
||||
}
|
||||
commandPairBridgeStopped = true;
|
||||
if (audioBridge?.type === "command-pair") {
|
||||
await audioBridge.stop();
|
||||
if (!params.config.chrome.audioInputCommand || !params.config.chrome.audioOutputCommand) {
|
||||
throw new Error(
|
||||
"Chrome realtime mode requires chrome.audioInputCommand and chrome.audioOutputCommand, or chrome.audioBridgeCommand for an external bridge.",
|
||||
);
|
||||
}
|
||||
return {
|
||||
type: "command-pair",
|
||||
...(await startCommandRealtimeAudioBridge({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
})),
|
||||
};
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await openMeetWithBrowserRequest({
|
||||
callBrowser: callLocalBrowserRequest,
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
url: params.url,
|
||||
});
|
||||
return { ...result, audioBridge };
|
||||
} catch (error) {
|
||||
await stopCommandPairBridge();
|
||||
throw error;
|
||||
await checkRealtimeAudioPrerequisites();
|
||||
|
||||
if (!params.config.chrome.launch) {
|
||||
return { launched: false, audioBridge: await startRealtimeAudioBridge() };
|
||||
}
|
||||
|
||||
const result = await openMeetWithBrowserRequest({
|
||||
callBrowser: callLocalBrowserRequest,
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
url: params.url,
|
||||
});
|
||||
const shouldStartRealtimeBridge =
|
||||
params.mode === "realtime" &&
|
||||
result.browser?.inCall === true &&
|
||||
result.browser.micMuted !== true &&
|
||||
result.browser.manualActionRequired !== true;
|
||||
const audioBridge = shouldStartRealtimeBridge ? await startRealtimeAudioBridge() : undefined;
|
||||
return { ...result, audioBridge };
|
||||
}
|
||||
|
||||
function parseNodeStartResult(raw: unknown): {
|
||||
@@ -296,6 +294,7 @@ async function grantMeetMediaPermissions(params: {
|
||||
callBrowser: BrowserRequestCaller;
|
||||
timeoutMs: number;
|
||||
allowMicrophone: boolean;
|
||||
targetId: string;
|
||||
}): Promise<string[]> {
|
||||
if (!params.allowMicrophone) {
|
||||
return ["Observe-only mode skips Meet microphone/camera permission grants."];
|
||||
@@ -308,6 +307,7 @@ async function grantMeetMediaPermissions(params: {
|
||||
origin: "https://meet.google.com",
|
||||
permissions: ["audioCapture", "videoCapture"],
|
||||
optionalPermissions: ["speakerSelection"],
|
||||
targetId: params.targetId,
|
||||
timeoutMs: Math.min(params.timeoutMs, 5_000),
|
||||
},
|
||||
timeoutMs: Math.min(params.timeoutMs, 5_000),
|
||||
@@ -611,6 +611,7 @@ async function openMeetWithBrowserRequest(params: {
|
||||
const permissionNotes = await grantMeetMediaPermissions({
|
||||
allowMicrophone: params.mode === "realtime",
|
||||
callBrowser: params.callBrowser,
|
||||
targetId,
|
||||
timeoutMs,
|
||||
});
|
||||
const deadline = Date.now() + Math.max(0, params.config.chrome.waitForInCallMs);
|
||||
@@ -703,6 +704,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
: await grantMeetMediaPermissions({
|
||||
allowMicrophone,
|
||||
callBrowser: params.callBrowser,
|
||||
targetId: params.targetId,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const evaluated = await params.callBrowser({
|
||||
|
||||
@@ -43,6 +43,24 @@ export type GoogleMeetChromeHealth = {
|
||||
speaker?: string;
|
||||
text: string;
|
||||
}>;
|
||||
realtimeTranscriptLines?: number;
|
||||
lastRealtimeTranscriptAt?: string;
|
||||
lastRealtimeTranscriptRole?: "user" | "assistant";
|
||||
lastRealtimeTranscriptText?: string;
|
||||
recentRealtimeTranscript?: Array<{
|
||||
at: string;
|
||||
role: "user" | "assistant";
|
||||
text: string;
|
||||
}>;
|
||||
lastRealtimeEventAt?: string;
|
||||
lastRealtimeEventType?: string;
|
||||
lastRealtimeEventDetail?: string;
|
||||
recentRealtimeEvents?: Array<{
|
||||
at: string;
|
||||
direction: "client" | "server";
|
||||
type: string;
|
||||
detail?: string;
|
||||
}>;
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: GoogleMeetManualActionReason;
|
||||
manualActionMessage?: string;
|
||||
|
||||
@@ -457,4 +457,101 @@ describe("buildOpenAIRealtimeVoiceProvider", () => {
|
||||
audio_end_ms: 240,
|
||||
});
|
||||
});
|
||||
|
||||
it("forwards current realtime output audio events", async () => {
|
||||
const provider = buildOpenAIRealtimeVoiceProvider();
|
||||
const onAudio = vi.fn();
|
||||
const onTranscript = vi.fn();
|
||||
const bridge = provider.createBridge({
|
||||
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
|
||||
onAudio,
|
||||
onClearAudio: vi.fn(),
|
||||
onTranscript,
|
||||
});
|
||||
const connecting = bridge.connect();
|
||||
const socket = FakeWebSocket.instances[0];
|
||||
if (!socket) {
|
||||
throw new Error("expected bridge to create a websocket");
|
||||
}
|
||||
|
||||
socket.readyState = FakeWebSocket.OPEN;
|
||||
socket.emit("open");
|
||||
await connecting;
|
||||
socket.emit("message", Buffer.from(JSON.stringify({ type: "session.updated" })));
|
||||
|
||||
const audio = Buffer.from("assistant audio");
|
||||
socket.emit(
|
||||
"message",
|
||||
Buffer.from(
|
||||
JSON.stringify({
|
||||
type: "response.output_audio.delta",
|
||||
item_id: "item_1",
|
||||
delta: audio.toString("base64"),
|
||||
}),
|
||||
),
|
||||
);
|
||||
socket.emit(
|
||||
"message",
|
||||
Buffer.from(
|
||||
JSON.stringify({
|
||||
type: "response.output_audio_transcript.done",
|
||||
transcript: "hello from current realtime events",
|
||||
}),
|
||||
),
|
||||
);
|
||||
|
||||
expect(onAudio).toHaveBeenCalledWith(audio);
|
||||
expect(onTranscript).toHaveBeenCalledWith(
|
||||
"assistant",
|
||||
"hello from current realtime events",
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it("creates an explicit user item and audio response for manual speech", async () => {
|
||||
const provider = buildOpenAIRealtimeVoiceProvider();
|
||||
const onEvent = vi.fn();
|
||||
const bridge = provider.createBridge({
|
||||
providerConfig: { apiKey: "sk-test" }, // pragma: allowlist secret
|
||||
onAudio: vi.fn(),
|
||||
onClearAudio: vi.fn(),
|
||||
onEvent,
|
||||
});
|
||||
const connecting = bridge.connect();
|
||||
const socket = FakeWebSocket.instances[0];
|
||||
if (!socket) {
|
||||
throw new Error("expected bridge to create a websocket");
|
||||
}
|
||||
|
||||
socket.readyState = FakeWebSocket.OPEN;
|
||||
socket.emit("open");
|
||||
await connecting;
|
||||
socket.emit("message", Buffer.from(JSON.stringify({ type: "session.updated" })));
|
||||
|
||||
bridge.triggerGreeting?.("Say exactly: hello from explicit speech.");
|
||||
|
||||
expect(parseSent(socket).slice(-2)).toEqual([
|
||||
{
|
||||
type: "conversation.item.create",
|
||||
item: {
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "Say exactly: hello from explicit speech.",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "response.create",
|
||||
response: {
|
||||
output_modalities: ["audio", "text"],
|
||||
},
|
||||
},
|
||||
]);
|
||||
expect(onEvent).toHaveBeenCalledWith({ direction: "client", type: "conversation.item.create" });
|
||||
expect(onEvent).toHaveBeenCalledWith({ direction: "client", type: "response.create" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -85,6 +85,7 @@ type RealtimeEvent = {
|
||||
response?: {
|
||||
id?: string;
|
||||
status?: string;
|
||||
status_details?: unknown;
|
||||
};
|
||||
error?: unknown;
|
||||
};
|
||||
@@ -265,19 +266,19 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
content: [{ type: "input_text", text }],
|
||||
},
|
||||
});
|
||||
this.sendEvent({ type: "response.create" });
|
||||
this.sendEvent({
|
||||
type: "response.create",
|
||||
response: {
|
||||
output_modalities: ["audio", "text"],
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
triggerGreeting(instructions?: string): void {
|
||||
if (!this.isConnected() || !this.ws) {
|
||||
return;
|
||||
}
|
||||
this.sendEvent({
|
||||
type: "response.create",
|
||||
response: {
|
||||
instructions: instructions ?? this.config.instructions,
|
||||
},
|
||||
});
|
||||
this.sendUserMessage(instructions ?? this.config.instructions ?? "Greet the meeting.");
|
||||
}
|
||||
|
||||
submitToolResult(callId: string, result: unknown): void {
|
||||
@@ -545,6 +546,11 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
}
|
||||
|
||||
private handleEvent(event: RealtimeEvent): void {
|
||||
this.config.onEvent?.({
|
||||
direction: "server",
|
||||
type: event.type,
|
||||
detail: this.describeServerEvent(event),
|
||||
});
|
||||
switch (event.type) {
|
||||
case "session.created":
|
||||
return;
|
||||
@@ -564,7 +570,8 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
this.responseActive = true;
|
||||
return;
|
||||
|
||||
case "response.audio.delta": {
|
||||
case "response.audio.delta":
|
||||
case "response.output_audio.delta": {
|
||||
if (!event.delta) {
|
||||
return;
|
||||
}
|
||||
@@ -586,12 +593,14 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
return;
|
||||
|
||||
case "response.audio_transcript.delta":
|
||||
case "response.output_audio_transcript.delta":
|
||||
if (event.delta) {
|
||||
this.config.onTranscript?.("assistant", event.delta, false);
|
||||
}
|
||||
return;
|
||||
|
||||
case "response.audio_transcript.done":
|
||||
case "response.output_audio_transcript.done":
|
||||
if (event.transcript) {
|
||||
this.config.onTranscript?.("assistant", event.transcript, true);
|
||||
}
|
||||
@@ -698,6 +707,11 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
|
||||
private sendEvent(event: unknown): void {
|
||||
if (this.ws?.readyState === WebSocket.OPEN) {
|
||||
const type =
|
||||
event && typeof event === "object" && typeof (event as { type?: unknown }).type === "string"
|
||||
? (event as { type: string }).type
|
||||
: "unknown";
|
||||
this.config.onEvent?.({ direction: "client", type });
|
||||
const payload = JSON.stringify(event);
|
||||
captureWsEvent({
|
||||
url: this.resolveConnectionParams().url,
|
||||
@@ -713,6 +727,23 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
|
||||
this.ws.send(payload);
|
||||
}
|
||||
}
|
||||
|
||||
private describeServerEvent(event: RealtimeEvent): string | undefined {
|
||||
if (event.type === "error") {
|
||||
return readRealtimeErrorDetail(event.error);
|
||||
}
|
||||
if (event.type === "response.done") {
|
||||
const status = event.response?.status;
|
||||
const details =
|
||||
event.response?.status_details === undefined
|
||||
? undefined
|
||||
: JSON.stringify(event.response.status_details);
|
||||
return (
|
||||
[status ? `status=${status}` : undefined, details].filter(Boolean).join(" ") || undefined
|
||||
);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function readStringField(value: unknown, key: string): string | undefined {
|
||||
|
||||
@@ -4,6 +4,7 @@ export type {
|
||||
RealtimeVoiceBargeInOptions,
|
||||
RealtimeVoiceBridge,
|
||||
RealtimeVoiceBridgeCallbacks,
|
||||
RealtimeVoiceBridgeEvent,
|
||||
RealtimeVoiceBrowserSession,
|
||||
RealtimeVoiceBrowserSessionCreateRequest,
|
||||
RealtimeVoiceBridgeCreateRequest,
|
||||
|
||||
@@ -208,7 +208,7 @@ async function resolveTrustedOfficialPrereleaseResolution(params: {
|
||||
);
|
||||
const stableVersion = semverVersions
|
||||
.filter((value) => !isPrereleaseSemverVersion(value))
|
||||
.sort(compareStableSemver)
|
||||
.toSorted(compareStableSemver)
|
||||
.at(-1);
|
||||
if (!stableVersion) {
|
||||
if (semverVersions.length > 0 && semverVersions.every(isPrereleaseSemverVersion)) {
|
||||
|
||||
@@ -52,11 +52,18 @@ export type RealtimeVoiceToolResultOptions = {
|
||||
willContinue?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeVoiceBridgeEvent = {
|
||||
direction: "client" | "server";
|
||||
type: string;
|
||||
detail?: string;
|
||||
};
|
||||
|
||||
export type RealtimeVoiceBridgeCallbacks = {
|
||||
onAudio: (audio: Buffer) => void;
|
||||
onClearAudio: () => void;
|
||||
onMark?: (markName: string) => void;
|
||||
onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void;
|
||||
onEvent?: (event: RealtimeVoiceBridgeEvent) => void;
|
||||
onToolCall?: (event: RealtimeVoiceToolCallEvent) => void;
|
||||
onReady?: () => void;
|
||||
onError?: (error: Error) => void;
|
||||
|
||||
@@ -4,6 +4,7 @@ import type {
|
||||
RealtimeVoiceAudioFormat,
|
||||
RealtimeVoiceBargeInOptions,
|
||||
RealtimeVoiceCloseReason,
|
||||
RealtimeVoiceBridgeEvent,
|
||||
RealtimeVoiceProviderConfig,
|
||||
RealtimeVoiceRole,
|
||||
RealtimeVoiceTool,
|
||||
@@ -44,6 +45,7 @@ export type RealtimeVoiceBridgeSessionParams = {
|
||||
triggerGreetingOnReady?: boolean;
|
||||
tools?: RealtimeVoiceTool[];
|
||||
onTranscript?: (role: RealtimeVoiceRole, text: string, isFinal: boolean) => void;
|
||||
onEvent?: (event: RealtimeVoiceBridgeEvent) => void;
|
||||
onToolCall?: (event: RealtimeVoiceToolCallEvent, session: RealtimeVoiceBridgeSession) => void;
|
||||
onReady?: (session: RealtimeVoiceBridgeSession) => void;
|
||||
onError?: (error: Error) => void;
|
||||
@@ -104,6 +106,7 @@ export function createRealtimeVoiceBridgeSession(
|
||||
}
|
||||
},
|
||||
onTranscript: params.onTranscript,
|
||||
onEvent: params.onEvent,
|
||||
onToolCall: (event) => {
|
||||
if (!bridge) {
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user