mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:10:43 +00:00
fix: log google meet realtime models
This commit is contained in:
@@ -250,6 +250,7 @@ export function buildDeepgramRealtimeTranscriptionProvider(): RealtimeTranscript
|
||||
id: "deepgram",
|
||||
label: "Deepgram Realtime Transcription",
|
||||
aliases: ["deepgram-realtime", "nova-3-streaming"],
|
||||
defaultModel: DEFAULT_DEEPGRAM_AUDIO_MODEL,
|
||||
autoSelectOrder: 35,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -243,6 +243,7 @@ export function buildElevenLabsRealtimeTranscriptionProvider(): RealtimeTranscri
|
||||
id: "elevenlabs",
|
||||
label: "ElevenLabs Realtime Transcription",
|
||||
aliases: ["elevenlabs-realtime", "scribe-v2-realtime"],
|
||||
defaultModel: ELEVENLABS_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 40,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -3806,6 +3806,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeTranscriptionProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-4o-transcribe",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -3882,6 +3883,9 @@ describe("google-meet plugin", () => {
|
||||
spawn: spawnMock,
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] agent audio bridge starting: transcriptionProvider=openai transcriptionModel=gpt-4o-transcribe tts=telephony audioFormat=pcm16-24khz",
|
||||
);
|
||||
inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0]));
|
||||
callbacks?.onTranscript?.("Please summarize the launch.");
|
||||
await new Promise((resolve) => setTimeout(resolve, 1100));
|
||||
@@ -3942,6 +3946,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-realtime-1.5",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -4023,6 +4028,9 @@ describe("google-meet plugin", () => {
|
||||
spawn: spawnMock,
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
|
||||
);
|
||||
inputStdout.write(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onAudio(Buffer.from([4, 5]));
|
||||
callbacks?.onMark?.("mark-1");
|
||||
@@ -4154,6 +4162,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-realtime-1.5",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -4492,6 +4501,9 @@ describe("google-meet plugin", () => {
|
||||
providers: [provider],
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
|
||||
);
|
||||
callbacks?.onAudio(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onClearAudio();
|
||||
callbacks?.onReady?.();
|
||||
|
||||
@@ -31,6 +31,8 @@ import {
|
||||
isGoogleMeetLikelyAssistantEchoTranscript,
|
||||
convertGoogleMeetBridgeAudioForStt,
|
||||
convertGoogleMeetTtsAudioForBridge,
|
||||
formatGoogleMeetAgentAudioModelLog,
|
||||
formatGoogleMeetRealtimeVoiceModelLog,
|
||||
type GoogleMeetRealtimeEventEntry,
|
||||
type GoogleMeetRealtimeTranscriptEntry,
|
||||
} from "./realtime.js";
|
||||
@@ -96,6 +98,13 @@ export async function startNodeAgentAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
params.logger.info(
|
||||
formatGoogleMeetAgentAudioModelLog({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
@@ -390,6 +399,15 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
const strategy = params.config.realtime.strategy;
|
||||
params.logger.info(
|
||||
formatGoogleMeetRealtimeVoiceModelLog({
|
||||
strategy,
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
fallbackModel: params.config.realtime.model,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
|
||||
@@ -409,6 +409,70 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function readLogString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function formatLogValue(value: string | undefined): string {
|
||||
const normalized = value?.replace(/\s+/g, "_").slice(0, 180);
|
||||
return normalized || "unknown";
|
||||
}
|
||||
|
||||
function resolveProviderModelForLog(params: {
|
||||
provider: { defaultModel?: string };
|
||||
providerConfig: RealtimeVoiceProviderConfig | RealtimeTranscriptionProviderConfig;
|
||||
fallbackModel?: string;
|
||||
}): string {
|
||||
return (
|
||||
readLogString(params.providerConfig.model) ??
|
||||
readLogString(params.providerConfig.modelId) ??
|
||||
readLogString(params.fallbackModel) ??
|
||||
readLogString(params.provider.defaultModel) ??
|
||||
"provider-default"
|
||||
);
|
||||
}
|
||||
|
||||
export function formatGoogleMeetRealtimeVoiceModelLog(params: {
|
||||
strategy: string;
|
||||
provider: RealtimeVoiceProviderPlugin;
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
fallbackModel?: string;
|
||||
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
|
||||
}): string {
|
||||
return [
|
||||
`[google-meet] realtime voice bridge starting: strategy=${formatLogValue(params.strategy)}`,
|
||||
`provider=${formatLogValue(params.provider.id)}`,
|
||||
`model=${formatLogValue(
|
||||
resolveProviderModelForLog({
|
||||
provider: params.provider,
|
||||
providerConfig: params.providerConfig,
|
||||
fallbackModel: params.fallbackModel,
|
||||
}),
|
||||
)}`,
|
||||
`audioFormat=${formatLogValue(params.audioFormat)}`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function formatGoogleMeetAgentAudioModelLog(params: {
|
||||
provider: RealtimeTranscriptionProviderPlugin;
|
||||
providerConfig: RealtimeTranscriptionProviderConfig;
|
||||
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
|
||||
}): string {
|
||||
return [
|
||||
`[google-meet] agent audio bridge starting: transcriptionProvider=${formatLogValue(
|
||||
params.provider.id,
|
||||
)}`,
|
||||
`transcriptionModel=${formatLogValue(
|
||||
resolveProviderModelForLog({
|
||||
provider: params.provider,
|
||||
providerConfig: params.providerConfig,
|
||||
}),
|
||||
)}`,
|
||||
"tts=telephony",
|
||||
`audioFormat=${formatLogValue(params.audioFormat)}`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
@@ -464,6 +528,13 @@ export async function startCommandAgentAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
params.logger.info(
|
||||
formatGoogleMeetAgentAudioModelLog({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
|
||||
const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => {
|
||||
if (proc.killed && signal !== "SIGKILL") {
|
||||
@@ -956,6 +1027,15 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
providers: params.providers,
|
||||
});
|
||||
const strategy = params.config.realtime.strategy;
|
||||
params.logger.info(
|
||||
formatGoogleMeetRealtimeVoiceModelLog({
|
||||
strategy,
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
fallbackModel: params.config.realtime.model,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
let agentConsultActive = false;
|
||||
|
||||
@@ -830,6 +830,7 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
|
||||
return {
|
||||
id: "google",
|
||||
label: "Google Live Voice",
|
||||
defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 20,
|
||||
resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -249,6 +249,7 @@ export function buildMistralRealtimeTranscriptionProvider(): RealtimeTranscripti
|
||||
id: "mistral",
|
||||
label: "Mistral Realtime Transcription",
|
||||
aliases: ["mistral-realtime", "voxtral-realtime"],
|
||||
defaultModel: MISTRAL_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 45,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -44,6 +44,7 @@ const OPENAI_REALTIME_TRANSCRIPTION_URL = "wss://api.openai.com/v1/realtime?inte
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_CONNECT_TIMEOUT_MS = 10_000;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS = 5;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS = 1000;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL = "gpt-4o-transcribe";
|
||||
|
||||
function normalizeProviderConfig(
|
||||
config: RealtimeTranscriptionProviderConfig,
|
||||
@@ -174,6 +175,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
|
||||
id: "openai",
|
||||
label: "OpenAI Realtime Transcription",
|
||||
aliases: ["openai-realtime"],
|
||||
defaultModel: OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
@@ -188,7 +190,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
|
||||
...req,
|
||||
apiKey,
|
||||
language: config.language,
|
||||
model: config.model ?? "gpt-4o-transcribe",
|
||||
model: config.model ?? OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
|
||||
prompt: config.prompt,
|
||||
silenceDurationMs: config.silenceDurationMs ?? 800,
|
||||
vadThreshold: config.vadThreshold ?? 0.5,
|
||||
|
||||
@@ -871,6 +871,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI Realtime Voice",
|
||||
defaultModel: OPENAI_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
Reference in New Issue
Block a user