diff --git a/CHANGELOG.md b/CHANGELOG.md index f032e5a2bce..954728f1dbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Exec approvals: treat POSIX `exec` as a command carrier for inline eval, shell-wrapper, and eval/source detection, so approval explanations and command-risk checks do not miss payloads hidden behind `exec`. Thanks @vincentkoc. +- Google Meet: log the resolved audio provider model when starting Chrome and paired-node Meet talk-back bridges, so agent-mode joins show the STT model and bidi joins show the realtime voice model. - Diagnostics: handle missing session-tail files in cron recovery context without tripping extension test typecheck. Thanks @vincentkoc. - QA/Slack: update the Slack dispatch preview fallback test SDK mock for structured progress draft helpers, so the rich progress draft regression suite covers the new imports instead of failing before assertions run. Thanks @vincentkoc. - Plugins/loader: keep bundled plugin package `test-api.js` aliases behind private QA mode, so source transforms do not expose test-only public surfaces during normal plugin loading. Thanks @vincentkoc. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 2fcf47d7d31..3e3904a90db 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -2943ada651fd9a07c9e715a90ad4a76f725a1b60fa142dcfd504ba6d6c202ed4 plugin-sdk-api-baseline.json -ff31408a26bcad4c54dc0c897d0103ca3d7dc91b3394a3ab65e7dade0c3f6ff5 plugin-sdk-api-baseline.jsonl +c38441e2e18aa519c5dc22c2b593694444869673447740327c87f16f3d4a0f8d plugin-sdk-api-baseline.json +5711948923b5a4f89ac04a182266ee0fb57275369a3a8112433f3758a7d38c86 plugin-sdk-api-baseline.jsonl diff --git a/extensions/deepgram/realtime-transcription-provider.ts b/extensions/deepgram/realtime-transcription-provider.ts index 49cdc1a7b4e..97ce1e26cde 100644 --- a/extensions/deepgram/realtime-transcription-provider.ts +++ b/extensions/deepgram/realtime-transcription-provider.ts @@ -250,6 +250,7 @@ export function buildDeepgramRealtimeTranscriptionProvider(): RealtimeTranscript id: "deepgram", label: "Deepgram Realtime Transcription", aliases: ["deepgram-realtime", "nova-3-streaming"], + defaultModel: DEFAULT_DEEPGRAM_AUDIO_MODEL, autoSelectOrder: 35, resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig), isConfigured: ({ providerConfig }) => diff --git a/extensions/elevenlabs/realtime-transcription-provider.ts b/extensions/elevenlabs/realtime-transcription-provider.ts index 3215c0120c1..fdb885a36b9 100644 --- a/extensions/elevenlabs/realtime-transcription-provider.ts +++ b/extensions/elevenlabs/realtime-transcription-provider.ts @@ -243,6 +243,7 @@ export function buildElevenLabsRealtimeTranscriptionProvider(): RealtimeTranscri id: "elevenlabs", label: "ElevenLabs Realtime Transcription", aliases: ["elevenlabs-realtime", "scribe-v2-realtime"], + defaultModel: ELEVENLABS_REALTIME_DEFAULT_MODEL, autoSelectOrder: 40, resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig), isConfigured: ({ providerConfig }) => diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index de31f55d92e..6513af119da 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -3806,6 +3806,7 @@ describe("google-meet plugin", () => { const provider: RealtimeTranscriptionProviderPlugin = { id: "openai", label: "OpenAI", + defaultModel: "gpt-4o-transcribe", autoSelectOrder: 1, resolveConfig: ({ rawConfig }) => rawConfig, isConfigured: () => true, @@ -3882,6 +3883,9 @@ describe("google-meet plugin", () => { spawn: spawnMock, }); + expect(noopLogger.info).toHaveBeenCalledWith( + "[google-meet] agent audio bridge starting: transcriptionProvider=openai transcriptionModel=gpt-4o-transcribe tts=telephony audioFormat=pcm16-24khz", + ); inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0])); callbacks?.onTranscript?.("Please summarize the launch."); await new Promise((resolve) => setTimeout(resolve, 1100)); @@ -3942,6 +3946,7 @@ describe("google-meet plugin", () => { const provider: RealtimeVoiceProviderPlugin = { id: "openai", label: "OpenAI", + defaultModel: "gpt-realtime-1.5", autoSelectOrder: 1, resolveConfig: ({ rawConfig }) => rawConfig, isConfigured: () => true, @@ -4023,6 +4028,9 @@ describe("google-meet plugin", () => { spawn: spawnMock, }); + expect(noopLogger.info).toHaveBeenCalledWith( + "[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz", + ); inputStdout.write(Buffer.from([1, 2, 3])); callbacks?.onAudio(Buffer.from([4, 5])); callbacks?.onMark?.("mark-1"); @@ -4154,6 +4162,7 @@ describe("google-meet plugin", () => { const provider: RealtimeVoiceProviderPlugin = { id: "openai", label: "OpenAI", + defaultModel: "gpt-realtime-1.5", autoSelectOrder: 1, resolveConfig: ({ rawConfig }) => rawConfig, isConfigured: () => true, @@ -4492,6 +4501,9 @@ describe("google-meet plugin", () => { providers: [provider], }); + expect(noopLogger.info).toHaveBeenCalledWith( + "[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz", + ); callbacks?.onAudio(Buffer.from([1, 2, 3])); callbacks?.onClearAudio(); callbacks?.onReady?.(); diff --git a/extensions/google-meet/src/realtime-node.ts b/extensions/google-meet/src/realtime-node.ts index 96569cb4885..9ef1622ed98 100644 --- a/extensions/google-meet/src/realtime-node.ts +++ b/extensions/google-meet/src/realtime-node.ts @@ -31,6 +31,8 @@ import { isGoogleMeetLikelyAssistantEchoTranscript, convertGoogleMeetBridgeAudioForStt, convertGoogleMeetTtsAudioForBridge, + formatGoogleMeetAgentAudioModelLog, + formatGoogleMeetRealtimeVoiceModelLog, type GoogleMeetRealtimeEventEntry, type GoogleMeetRealtimeTranscriptEntry, } from "./realtime.js"; @@ -96,6 +98,13 @@ export async function startNodeAgentAudioBridge(params: { fullConfig: params.fullConfig, providers: params.providers, }); + params.logger.info( + formatGoogleMeetAgentAudioModelLog({ + provider: resolved.provider, + providerConfig: resolved.providerConfig, + audioFormat: params.config.chrome.audioFormat, + }), + ); const transcript: GoogleMeetRealtimeTranscriptEntry[] = []; let agentConsultActive = false; let pendingAgentQuestion: string | undefined; @@ -390,6 +399,15 @@ export async function startNodeRealtimeAudioBridge(params: { const transcript: GoogleMeetRealtimeTranscriptEntry[] = []; const realtimeEvents: GoogleMeetRealtimeEventEntry[] = []; const strategy = params.config.realtime.strategy; + params.logger.info( + formatGoogleMeetRealtimeVoiceModelLog({ + strategy, + provider: resolved.provider, + providerConfig: resolved.providerConfig, + fallbackModel: params.config.realtime.model, + audioFormat: params.config.chrome.audioFormat, + }), + ); let agentConsultActive = false; let pendingAgentQuestion: string | undefined; let agentConsultDebounceTimer: ReturnType | undefined; diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index 4e4191f7d56..94b50a21ce1 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -409,6 +409,70 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string { ].join("\n"); } +function readLogString(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value.trim() : undefined; +} + +function formatLogValue(value: string | undefined): string { + const normalized = value?.replace(/\s+/g, "_").slice(0, 180); + return normalized || "unknown"; +} + +function resolveProviderModelForLog(params: { + provider: { defaultModel?: string }; + providerConfig: RealtimeVoiceProviderConfig | RealtimeTranscriptionProviderConfig; + fallbackModel?: string; +}): string { + return ( + readLogString(params.providerConfig.model) ?? + readLogString(params.providerConfig.modelId) ?? + readLogString(params.fallbackModel) ?? + readLogString(params.provider.defaultModel) ?? + "provider-default" + ); +} + +export function formatGoogleMeetRealtimeVoiceModelLog(params: { + strategy: string; + provider: RealtimeVoiceProviderPlugin; + providerConfig: RealtimeVoiceProviderConfig; + fallbackModel?: string; + audioFormat: GoogleMeetConfig["chrome"]["audioFormat"]; +}): string { + return [ + `[google-meet] realtime voice bridge starting: strategy=${formatLogValue(params.strategy)}`, + `provider=${formatLogValue(params.provider.id)}`, + `model=${formatLogValue( + resolveProviderModelForLog({ + provider: params.provider, + providerConfig: params.providerConfig, + fallbackModel: params.fallbackModel, + }), + )}`, + `audioFormat=${formatLogValue(params.audioFormat)}`, + ].join(" "); +} + +export function formatGoogleMeetAgentAudioModelLog(params: { + provider: RealtimeTranscriptionProviderPlugin; + providerConfig: RealtimeTranscriptionProviderConfig; + audioFormat: GoogleMeetConfig["chrome"]["audioFormat"]; +}): string { + return [ + `[google-meet] agent audio bridge starting: transcriptionProvider=${formatLogValue( + params.provider.id, + )}`, + `transcriptionModel=${formatLogValue( + resolveProviderModelForLog({ + provider: params.provider, + providerConfig: params.providerConfig, + }), + )}`, + "tts=telephony", + `audioFormat=${formatLogValue(params.audioFormat)}`, + ].join(" "); +} + function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined { const trimmed = text?.trim(); if (!trimmed) { @@ -464,6 +528,13 @@ export async function startCommandAgentAudioBridge(params: { fullConfig: params.fullConfig, providers: params.providers, }); + params.logger.info( + formatGoogleMeetAgentAudioModelLog({ + provider: resolved.provider, + providerConfig: resolved.providerConfig, + audioFormat: params.config.chrome.audioFormat, + }), + ); const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => { if (proc.killed && signal !== "SIGKILL") { @@ -956,6 +1027,15 @@ export async function startCommandRealtimeAudioBridge(params: { providers: params.providers, }); const strategy = params.config.realtime.strategy; + params.logger.info( + formatGoogleMeetRealtimeVoiceModelLog({ + strategy, + provider: resolved.provider, + providerConfig: resolved.providerConfig, + fallbackModel: params.config.realtime.model, + audioFormat: params.config.chrome.audioFormat, + }), + ); const transcript: GoogleMeetRealtimeTranscriptEntry[] = []; const realtimeEvents: GoogleMeetRealtimeEventEntry[] = []; let agentConsultActive = false; diff --git a/extensions/google/realtime-voice-provider.ts b/extensions/google/realtime-voice-provider.ts index 2d4a70be0d9..77bf205a203 100644 --- a/extensions/google/realtime-voice-provider.ts +++ b/extensions/google/realtime-voice-provider.ts @@ -830,6 +830,7 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin return { id: "google", label: "Google Live Voice", + defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL, autoSelectOrder: 20, resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg), isConfigured: ({ providerConfig }) => diff --git a/extensions/mistral/realtime-transcription-provider.ts b/extensions/mistral/realtime-transcription-provider.ts index c46efd4c984..51526f85a88 100644 --- a/extensions/mistral/realtime-transcription-provider.ts +++ b/extensions/mistral/realtime-transcription-provider.ts @@ -249,6 +249,7 @@ export function buildMistralRealtimeTranscriptionProvider(): RealtimeTranscripti id: "mistral", label: "Mistral Realtime Transcription", aliases: ["mistral-realtime", "voxtral-realtime"], + defaultModel: MISTRAL_REALTIME_DEFAULT_MODEL, autoSelectOrder: 45, resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig), isConfigured: ({ providerConfig }) => diff --git a/extensions/openai/realtime-transcription-provider.ts b/extensions/openai/realtime-transcription-provider.ts index a49169c850f..c1e8e9bf4b5 100644 --- a/extensions/openai/realtime-transcription-provider.ts +++ b/extensions/openai/realtime-transcription-provider.ts @@ -44,6 +44,7 @@ const OPENAI_REALTIME_TRANSCRIPTION_URL = "wss://api.openai.com/v1/realtime?inte const OPENAI_REALTIME_TRANSCRIPTION_CONNECT_TIMEOUT_MS = 10_000; const OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS = 5; const OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS = 1000; +const OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL = "gpt-4o-transcribe"; function normalizeProviderConfig( config: RealtimeTranscriptionProviderConfig, @@ -174,6 +175,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio id: "openai", label: "OpenAI Realtime Transcription", aliases: ["openai-realtime"], + defaultModel: OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL, autoSelectOrder: 10, resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig), isConfigured: ({ providerConfig }) => @@ -188,7 +190,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio ...req, apiKey, language: config.language, - model: config.model ?? "gpt-4o-transcribe", + model: config.model ?? OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL, prompt: config.prompt, silenceDurationMs: config.silenceDurationMs ?? 800, vadThreshold: config.vadThreshold ?? 0.5, diff --git a/extensions/openai/realtime-voice-provider.ts b/extensions/openai/realtime-voice-provider.ts index c668242c308..e4f23275168 100644 --- a/extensions/openai/realtime-voice-provider.ts +++ b/extensions/openai/realtime-voice-provider.ts @@ -871,6 +871,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin return { id: "openai", label: "OpenAI Realtime Voice", + defaultModel: OPENAI_REALTIME_DEFAULT_MODEL, autoSelectOrder: 10, resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig), isConfigured: ({ providerConfig }) => diff --git a/src/plugins/types.ts b/src/plugins/types.ts index 30c5b038c12..2c287edf71b 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -1813,6 +1813,7 @@ export type RealtimeTranscriptionProviderPlugin = { id: RealtimeTranscriptionProviderId; label: string; aliases?: string[]; + defaultModel?: string; autoSelectOrder?: number; resolveConfig?: ( ctx: RealtimeTranscriptionProviderResolveConfigContext, @@ -1830,6 +1831,7 @@ export type RealtimeVoiceProviderPlugin = { id: RealtimeVoiceProviderId; label: string; aliases?: string[]; + defaultModel?: string; autoSelectOrder?: number; resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig; isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;