fix: log google meet realtime models

This commit is contained in:
Peter Steinberger
2026-05-04 06:03:32 +01:00
parent 1bf824f586
commit a6d67ccf29
12 changed files with 123 additions and 3 deletions

View File

@@ -250,6 +250,7 @@ export function buildDeepgramRealtimeTranscriptionProvider(): RealtimeTranscript
id: "deepgram",
label: "Deepgram Realtime Transcription",
aliases: ["deepgram-realtime", "nova-3-streaming"],
defaultModel: DEFAULT_DEEPGRAM_AUDIO_MODEL,
autoSelectOrder: 35,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -243,6 +243,7 @@ export function buildElevenLabsRealtimeTranscriptionProvider(): RealtimeTranscri
id: "elevenlabs",
label: "ElevenLabs Realtime Transcription",
aliases: ["elevenlabs-realtime", "scribe-v2-realtime"],
defaultModel: ELEVENLABS_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 40,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -3806,6 +3806,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeTranscriptionProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-4o-transcribe",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -3882,6 +3883,9 @@ describe("google-meet plugin", () => {
spawn: spawnMock,
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] agent audio bridge starting: transcriptionProvider=openai transcriptionModel=gpt-4o-transcribe tts=telephony audioFormat=pcm16-24khz",
);
inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0]));
callbacks?.onTranscript?.("Please summarize the launch.");
await new Promise((resolve) => setTimeout(resolve, 1100));
@@ -3942,6 +3946,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeVoiceProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-realtime-1.5",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -4023,6 +4028,9 @@ describe("google-meet plugin", () => {
spawn: spawnMock,
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
);
inputStdout.write(Buffer.from([1, 2, 3]));
callbacks?.onAudio(Buffer.from([4, 5]));
callbacks?.onMark?.("mark-1");
@@ -4154,6 +4162,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeVoiceProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-realtime-1.5",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -4492,6 +4501,9 @@ describe("google-meet plugin", () => {
providers: [provider],
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
);
callbacks?.onAudio(Buffer.from([1, 2, 3]));
callbacks?.onClearAudio();
callbacks?.onReady?.();

View File

@@ -31,6 +31,8 @@ import {
isGoogleMeetLikelyAssistantEchoTranscript,
convertGoogleMeetBridgeAudioForStt,
convertGoogleMeetTtsAudioForBridge,
formatGoogleMeetAgentAudioModelLog,
formatGoogleMeetRealtimeVoiceModelLog,
type GoogleMeetRealtimeEventEntry,
type GoogleMeetRealtimeTranscriptEntry,
} from "./realtime.js";
@@ -96,6 +98,13 @@ export async function startNodeAgentAudioBridge(params: {
fullConfig: params.fullConfig,
providers: params.providers,
});
params.logger.info(
formatGoogleMeetAgentAudioModelLog({
provider: resolved.provider,
providerConfig: resolved.providerConfig,
audioFormat: params.config.chrome.audioFormat,
}),
);
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
let agentConsultActive = false;
let pendingAgentQuestion: string | undefined;
@@ -390,6 +399,15 @@ export async function startNodeRealtimeAudioBridge(params: {
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
const strategy = params.config.realtime.strategy;
params.logger.info(
formatGoogleMeetRealtimeVoiceModelLog({
strategy,
provider: resolved.provider,
providerConfig: resolved.providerConfig,
fallbackModel: params.config.realtime.model,
audioFormat: params.config.chrome.audioFormat,
}),
);
let agentConsultActive = false;
let pendingAgentQuestion: string | undefined;
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;

View File

@@ -409,6 +409,70 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
].join("\n");
}
function readLogString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function formatLogValue(value: string | undefined): string {
const normalized = value?.replace(/\s+/g, "_").slice(0, 180);
return normalized || "unknown";
}
function resolveProviderModelForLog(params: {
provider: { defaultModel?: string };
providerConfig: RealtimeVoiceProviderConfig | RealtimeTranscriptionProviderConfig;
fallbackModel?: string;
}): string {
return (
readLogString(params.providerConfig.model) ??
readLogString(params.providerConfig.modelId) ??
readLogString(params.fallbackModel) ??
readLogString(params.provider.defaultModel) ??
"provider-default"
);
}
export function formatGoogleMeetRealtimeVoiceModelLog(params: {
strategy: string;
provider: RealtimeVoiceProviderPlugin;
providerConfig: RealtimeVoiceProviderConfig;
fallbackModel?: string;
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
}): string {
return [
`[google-meet] realtime voice bridge starting: strategy=${formatLogValue(params.strategy)}`,
`provider=${formatLogValue(params.provider.id)}`,
`model=${formatLogValue(
resolveProviderModelForLog({
provider: params.provider,
providerConfig: params.providerConfig,
fallbackModel: params.fallbackModel,
}),
)}`,
`audioFormat=${formatLogValue(params.audioFormat)}`,
].join(" ");
}
export function formatGoogleMeetAgentAudioModelLog(params: {
provider: RealtimeTranscriptionProviderPlugin;
providerConfig: RealtimeTranscriptionProviderConfig;
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
}): string {
return [
`[google-meet] agent audio bridge starting: transcriptionProvider=${formatLogValue(
params.provider.id,
)}`,
`transcriptionModel=${formatLogValue(
resolveProviderModelForLog({
provider: params.provider,
providerConfig: params.providerConfig,
}),
)}`,
"tts=telephony",
`audioFormat=${formatLogValue(params.audioFormat)}`,
].join(" ");
}
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
const trimmed = text?.trim();
if (!trimmed) {
@@ -464,6 +528,13 @@ export async function startCommandAgentAudioBridge(params: {
fullConfig: params.fullConfig,
providers: params.providers,
});
params.logger.info(
formatGoogleMeetAgentAudioModelLog({
provider: resolved.provider,
providerConfig: resolved.providerConfig,
audioFormat: params.config.chrome.audioFormat,
}),
);
const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => {
if (proc.killed && signal !== "SIGKILL") {
@@ -956,6 +1027,15 @@ export async function startCommandRealtimeAudioBridge(params: {
providers: params.providers,
});
const strategy = params.config.realtime.strategy;
params.logger.info(
formatGoogleMeetRealtimeVoiceModelLog({
strategy,
provider: resolved.provider,
providerConfig: resolved.providerConfig,
fallbackModel: params.config.realtime.model,
audioFormat: params.config.chrome.audioFormat,
}),
);
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
let agentConsultActive = false;

View File

@@ -830,6 +830,7 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
return {
id: "google",
label: "Google Live Voice",
defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 20,
resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg),
isConfigured: ({ providerConfig }) =>

View File

@@ -249,6 +249,7 @@ export function buildMistralRealtimeTranscriptionProvider(): RealtimeTranscripti
id: "mistral",
label: "Mistral Realtime Transcription",
aliases: ["mistral-realtime", "voxtral-realtime"],
defaultModel: MISTRAL_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 45,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -44,6 +44,7 @@ const OPENAI_REALTIME_TRANSCRIPTION_URL = "wss://api.openai.com/v1/realtime?inte
const OPENAI_REALTIME_TRANSCRIPTION_CONNECT_TIMEOUT_MS = 10_000;
const OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS = 5;
const OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS = 1000;
const OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL = "gpt-4o-transcribe";
function normalizeProviderConfig(
config: RealtimeTranscriptionProviderConfig,
@@ -174,6 +175,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
id: "openai",
label: "OpenAI Realtime Transcription",
aliases: ["openai-realtime"],
defaultModel: OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
autoSelectOrder: 10,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>
@@ -188,7 +190,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
...req,
apiKey,
language: config.language,
model: config.model ?? "gpt-4o-transcribe",
model: config.model ?? OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
prompt: config.prompt,
silenceDurationMs: config.silenceDurationMs ?? 800,
vadThreshold: config.vadThreshold ?? 0.5,

View File

@@ -871,6 +871,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
return {
id: "openai",
label: "OpenAI Realtime Voice",
defaultModel: OPENAI_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 10,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>