fix: log google meet realtime models

This commit is contained in:
Peter Steinberger
2026-05-04 06:03:32 +01:00
parent 1bf824f586
commit a6d67ccf29
12 changed files with 123 additions and 3 deletions

View File

@@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Exec approvals: treat POSIX `exec` as a command carrier for inline eval, shell-wrapper, and eval/source detection, so approval explanations and command-risk checks do not miss payloads hidden behind `exec`. Thanks @vincentkoc.
- Google Meet: log the resolved audio provider model when starting Chrome and paired-node Meet talk-back bridges, so agent-mode joins show the STT model and bidi joins show the realtime voice model.
- Diagnostics: handle missing session-tail files in cron recovery context without tripping extension test typecheck. Thanks @vincentkoc.
- QA/Slack: update the Slack dispatch preview fallback test SDK mock for structured progress draft helpers, so the rich progress draft regression suite covers the new imports instead of failing before assertions run. Thanks @vincentkoc.
- Plugins/loader: keep bundled plugin package `test-api.js` aliases behind private QA mode, so source transforms do not expose test-only public surfaces during normal plugin loading. Thanks @vincentkoc.

View File

@@ -1,2 +1,2 @@
2943ada651fd9a07c9e715a90ad4a76f725a1b60fa142dcfd504ba6d6c202ed4 plugin-sdk-api-baseline.json
ff31408a26bcad4c54dc0c897d0103ca3d7dc91b3394a3ab65e7dade0c3f6ff5 plugin-sdk-api-baseline.jsonl
c38441e2e18aa519c5dc22c2b593694444869673447740327c87f16f3d4a0f8d plugin-sdk-api-baseline.json
5711948923b5a4f89ac04a182266ee0fb57275369a3a8112433f3758a7d38c86 plugin-sdk-api-baseline.jsonl

View File

@@ -250,6 +250,7 @@ export function buildDeepgramRealtimeTranscriptionProvider(): RealtimeTranscript
id: "deepgram",
label: "Deepgram Realtime Transcription",
aliases: ["deepgram-realtime", "nova-3-streaming"],
defaultModel: DEFAULT_DEEPGRAM_AUDIO_MODEL,
autoSelectOrder: 35,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -243,6 +243,7 @@ export function buildElevenLabsRealtimeTranscriptionProvider(): RealtimeTranscri
id: "elevenlabs",
label: "ElevenLabs Realtime Transcription",
aliases: ["elevenlabs-realtime", "scribe-v2-realtime"],
defaultModel: ELEVENLABS_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 40,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -3806,6 +3806,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeTranscriptionProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-4o-transcribe",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -3882,6 +3883,9 @@ describe("google-meet plugin", () => {
spawn: spawnMock,
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] agent audio bridge starting: transcriptionProvider=openai transcriptionModel=gpt-4o-transcribe tts=telephony audioFormat=pcm16-24khz",
);
inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0]));
callbacks?.onTranscript?.("Please summarize the launch.");
await new Promise((resolve) => setTimeout(resolve, 1100));
@@ -3942,6 +3946,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeVoiceProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-realtime-1.5",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -4023,6 +4028,9 @@ describe("google-meet plugin", () => {
spawn: spawnMock,
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
);
inputStdout.write(Buffer.from([1, 2, 3]));
callbacks?.onAudio(Buffer.from([4, 5]));
callbacks?.onMark?.("mark-1");
@@ -4154,6 +4162,7 @@ describe("google-meet plugin", () => {
const provider: RealtimeVoiceProviderPlugin = {
id: "openai",
label: "OpenAI",
defaultModel: "gpt-realtime-1.5",
autoSelectOrder: 1,
resolveConfig: ({ rawConfig }) => rawConfig,
isConfigured: () => true,
@@ -4492,6 +4501,9 @@ describe("google-meet plugin", () => {
providers: [provider],
});
expect(noopLogger.info).toHaveBeenCalledWith(
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
);
callbacks?.onAudio(Buffer.from([1, 2, 3]));
callbacks?.onClearAudio();
callbacks?.onReady?.();

View File

@@ -31,6 +31,8 @@ import {
isGoogleMeetLikelyAssistantEchoTranscript,
convertGoogleMeetBridgeAudioForStt,
convertGoogleMeetTtsAudioForBridge,
formatGoogleMeetAgentAudioModelLog,
formatGoogleMeetRealtimeVoiceModelLog,
type GoogleMeetRealtimeEventEntry,
type GoogleMeetRealtimeTranscriptEntry,
} from "./realtime.js";
@@ -96,6 +98,13 @@ export async function startNodeAgentAudioBridge(params: {
fullConfig: params.fullConfig,
providers: params.providers,
});
params.logger.info(
formatGoogleMeetAgentAudioModelLog({
provider: resolved.provider,
providerConfig: resolved.providerConfig,
audioFormat: params.config.chrome.audioFormat,
}),
);
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
let agentConsultActive = false;
let pendingAgentQuestion: string | undefined;
@@ -390,6 +399,15 @@ export async function startNodeRealtimeAudioBridge(params: {
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
const strategy = params.config.realtime.strategy;
params.logger.info(
formatGoogleMeetRealtimeVoiceModelLog({
strategy,
provider: resolved.provider,
providerConfig: resolved.providerConfig,
fallbackModel: params.config.realtime.model,
audioFormat: params.config.chrome.audioFormat,
}),
);
let agentConsultActive = false;
let pendingAgentQuestion: string | undefined;
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;

View File

@@ -409,6 +409,70 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
].join("\n");
}
function readLogString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function formatLogValue(value: string | undefined): string {
const normalized = value?.replace(/\s+/g, "_").slice(0, 180);
return normalized || "unknown";
}
function resolveProviderModelForLog(params: {
provider: { defaultModel?: string };
providerConfig: RealtimeVoiceProviderConfig | RealtimeTranscriptionProviderConfig;
fallbackModel?: string;
}): string {
return (
readLogString(params.providerConfig.model) ??
readLogString(params.providerConfig.modelId) ??
readLogString(params.fallbackModel) ??
readLogString(params.provider.defaultModel) ??
"provider-default"
);
}
export function formatGoogleMeetRealtimeVoiceModelLog(params: {
strategy: string;
provider: RealtimeVoiceProviderPlugin;
providerConfig: RealtimeVoiceProviderConfig;
fallbackModel?: string;
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
}): string {
return [
`[google-meet] realtime voice bridge starting: strategy=${formatLogValue(params.strategy)}`,
`provider=${formatLogValue(params.provider.id)}`,
`model=${formatLogValue(
resolveProviderModelForLog({
provider: params.provider,
providerConfig: params.providerConfig,
fallbackModel: params.fallbackModel,
}),
)}`,
`audioFormat=${formatLogValue(params.audioFormat)}`,
].join(" ");
}
export function formatGoogleMeetAgentAudioModelLog(params: {
provider: RealtimeTranscriptionProviderPlugin;
providerConfig: RealtimeTranscriptionProviderConfig;
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
}): string {
return [
`[google-meet] agent audio bridge starting: transcriptionProvider=${formatLogValue(
params.provider.id,
)}`,
`transcriptionModel=${formatLogValue(
resolveProviderModelForLog({
provider: params.provider,
providerConfig: params.providerConfig,
}),
)}`,
"tts=telephony",
`audioFormat=${formatLogValue(params.audioFormat)}`,
].join(" ");
}
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
const trimmed = text?.trim();
if (!trimmed) {
@@ -464,6 +528,13 @@ export async function startCommandAgentAudioBridge(params: {
fullConfig: params.fullConfig,
providers: params.providers,
});
params.logger.info(
formatGoogleMeetAgentAudioModelLog({
provider: resolved.provider,
providerConfig: resolved.providerConfig,
audioFormat: params.config.chrome.audioFormat,
}),
);
const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => {
if (proc.killed && signal !== "SIGKILL") {
@@ -956,6 +1027,15 @@ export async function startCommandRealtimeAudioBridge(params: {
providers: params.providers,
});
const strategy = params.config.realtime.strategy;
params.logger.info(
formatGoogleMeetRealtimeVoiceModelLog({
strategy,
provider: resolved.provider,
providerConfig: resolved.providerConfig,
fallbackModel: params.config.realtime.model,
audioFormat: params.config.chrome.audioFormat,
}),
);
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
let agentConsultActive = false;

View File

@@ -830,6 +830,7 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
return {
id: "google",
label: "Google Live Voice",
defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 20,
resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg),
isConfigured: ({ providerConfig }) =>

View File

@@ -249,6 +249,7 @@ export function buildMistralRealtimeTranscriptionProvider(): RealtimeTranscripti
id: "mistral",
label: "Mistral Realtime Transcription",
aliases: ["mistral-realtime", "voxtral-realtime"],
defaultModel: MISTRAL_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 45,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -44,6 +44,7 @@ const OPENAI_REALTIME_TRANSCRIPTION_URL = "wss://api.openai.com/v1/realtime?inte
const OPENAI_REALTIME_TRANSCRIPTION_CONNECT_TIMEOUT_MS = 10_000;
const OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS = 5;
const OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS = 1000;
const OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL = "gpt-4o-transcribe";
function normalizeProviderConfig(
config: RealtimeTranscriptionProviderConfig,
@@ -174,6 +175,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
id: "openai",
label: "OpenAI Realtime Transcription",
aliases: ["openai-realtime"],
defaultModel: OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
autoSelectOrder: 10,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>
@@ -188,7 +190,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
...req,
apiKey,
language: config.language,
model: config.model ?? "gpt-4o-transcribe",
model: config.model ?? OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
prompt: config.prompt,
silenceDurationMs: config.silenceDurationMs ?? 800,
vadThreshold: config.vadThreshold ?? 0.5,

View File

@@ -871,6 +871,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
return {
id: "openai",
label: "OpenAI Realtime Voice",
defaultModel: OPENAI_REALTIME_DEFAULT_MODEL,
autoSelectOrder: 10,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>

View File

@@ -1813,6 +1813,7 @@ export type RealtimeTranscriptionProviderPlugin = {
id: RealtimeTranscriptionProviderId;
label: string;
aliases?: string[];
defaultModel?: string;
autoSelectOrder?: number;
resolveConfig?: (
ctx: RealtimeTranscriptionProviderResolveConfigContext,
@@ -1830,6 +1831,7 @@ export type RealtimeVoiceProviderPlugin = {
id: RealtimeVoiceProviderId;
label: string;
aliases?: string[];
defaultModel?: string;
autoSelectOrder?: number;
resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig;
isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;