mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:00:42 +00:00
fix: log google meet realtime models
This commit is contained in:
@@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Exec approvals: treat POSIX `exec` as a command carrier for inline eval, shell-wrapper, and eval/source detection, so approval explanations and command-risk checks do not miss payloads hidden behind `exec`. Thanks @vincentkoc.
|
||||
- Google Meet: log the resolved audio provider model when starting Chrome and paired-node Meet talk-back bridges, so agent-mode joins show the STT model and bidi joins show the realtime voice model.
|
||||
- Diagnostics: handle missing session-tail files in cron recovery context without tripping extension test typecheck. Thanks @vincentkoc.
|
||||
- QA/Slack: update the Slack dispatch preview fallback test SDK mock for structured progress draft helpers, so the rich progress draft regression suite covers the new imports instead of failing before assertions run. Thanks @vincentkoc.
|
||||
- Plugins/loader: keep bundled plugin package `test-api.js` aliases behind private QA mode, so source transforms do not expose test-only public surfaces during normal plugin loading. Thanks @vincentkoc.
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
2943ada651fd9a07c9e715a90ad4a76f725a1b60fa142dcfd504ba6d6c202ed4 plugin-sdk-api-baseline.json
|
||||
ff31408a26bcad4c54dc0c897d0103ca3d7dc91b3394a3ab65e7dade0c3f6ff5 plugin-sdk-api-baseline.jsonl
|
||||
c38441e2e18aa519c5dc22c2b593694444869673447740327c87f16f3d4a0f8d plugin-sdk-api-baseline.json
|
||||
5711948923b5a4f89ac04a182266ee0fb57275369a3a8112433f3758a7d38c86 plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -250,6 +250,7 @@ export function buildDeepgramRealtimeTranscriptionProvider(): RealtimeTranscript
|
||||
id: "deepgram",
|
||||
label: "Deepgram Realtime Transcription",
|
||||
aliases: ["deepgram-realtime", "nova-3-streaming"],
|
||||
defaultModel: DEFAULT_DEEPGRAM_AUDIO_MODEL,
|
||||
autoSelectOrder: 35,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -243,6 +243,7 @@ export function buildElevenLabsRealtimeTranscriptionProvider(): RealtimeTranscri
|
||||
id: "elevenlabs",
|
||||
label: "ElevenLabs Realtime Transcription",
|
||||
aliases: ["elevenlabs-realtime", "scribe-v2-realtime"],
|
||||
defaultModel: ELEVENLABS_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 40,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -3806,6 +3806,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeTranscriptionProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-4o-transcribe",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -3882,6 +3883,9 @@ describe("google-meet plugin", () => {
|
||||
spawn: spawnMock,
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] agent audio bridge starting: transcriptionProvider=openai transcriptionModel=gpt-4o-transcribe tts=telephony audioFormat=pcm16-24khz",
|
||||
);
|
||||
inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0]));
|
||||
callbacks?.onTranscript?.("Please summarize the launch.");
|
||||
await new Promise((resolve) => setTimeout(resolve, 1100));
|
||||
@@ -3942,6 +3946,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-realtime-1.5",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -4023,6 +4028,9 @@ describe("google-meet plugin", () => {
|
||||
spawn: spawnMock,
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
|
||||
);
|
||||
inputStdout.write(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onAudio(Buffer.from([4, 5]));
|
||||
callbacks?.onMark?.("mark-1");
|
||||
@@ -4154,6 +4162,7 @@ describe("google-meet plugin", () => {
|
||||
const provider: RealtimeVoiceProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: "gpt-realtime-1.5",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
@@ -4492,6 +4501,9 @@ describe("google-meet plugin", () => {
|
||||
providers: [provider],
|
||||
});
|
||||
|
||||
expect(noopLogger.info).toHaveBeenCalledWith(
|
||||
"[google-meet] realtime voice bridge starting: strategy=bidi provider=openai model=gpt-realtime audioFormat=pcm16-24khz",
|
||||
);
|
||||
callbacks?.onAudio(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onClearAudio();
|
||||
callbacks?.onReady?.();
|
||||
|
||||
@@ -31,6 +31,8 @@ import {
|
||||
isGoogleMeetLikelyAssistantEchoTranscript,
|
||||
convertGoogleMeetBridgeAudioForStt,
|
||||
convertGoogleMeetTtsAudioForBridge,
|
||||
formatGoogleMeetAgentAudioModelLog,
|
||||
formatGoogleMeetRealtimeVoiceModelLog,
|
||||
type GoogleMeetRealtimeEventEntry,
|
||||
type GoogleMeetRealtimeTranscriptEntry,
|
||||
} from "./realtime.js";
|
||||
@@ -96,6 +98,13 @@ export async function startNodeAgentAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
params.logger.info(
|
||||
formatGoogleMeetAgentAudioModelLog({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
@@ -390,6 +399,15 @@ export async function startNodeRealtimeAudioBridge(params: {
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
const strategy = params.config.realtime.strategy;
|
||||
params.logger.info(
|
||||
formatGoogleMeetRealtimeVoiceModelLog({
|
||||
strategy,
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
fallbackModel: params.config.realtime.model,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
|
||||
@@ -409,6 +409,70 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function readLogString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim() ? value.trim() : undefined;
|
||||
}
|
||||
|
||||
function formatLogValue(value: string | undefined): string {
|
||||
const normalized = value?.replace(/\s+/g, "_").slice(0, 180);
|
||||
return normalized || "unknown";
|
||||
}
|
||||
|
||||
function resolveProviderModelForLog(params: {
|
||||
provider: { defaultModel?: string };
|
||||
providerConfig: RealtimeVoiceProviderConfig | RealtimeTranscriptionProviderConfig;
|
||||
fallbackModel?: string;
|
||||
}): string {
|
||||
return (
|
||||
readLogString(params.providerConfig.model) ??
|
||||
readLogString(params.providerConfig.modelId) ??
|
||||
readLogString(params.fallbackModel) ??
|
||||
readLogString(params.provider.defaultModel) ??
|
||||
"provider-default"
|
||||
);
|
||||
}
|
||||
|
||||
export function formatGoogleMeetRealtimeVoiceModelLog(params: {
|
||||
strategy: string;
|
||||
provider: RealtimeVoiceProviderPlugin;
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
fallbackModel?: string;
|
||||
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
|
||||
}): string {
|
||||
return [
|
||||
`[google-meet] realtime voice bridge starting: strategy=${formatLogValue(params.strategy)}`,
|
||||
`provider=${formatLogValue(params.provider.id)}`,
|
||||
`model=${formatLogValue(
|
||||
resolveProviderModelForLog({
|
||||
provider: params.provider,
|
||||
providerConfig: params.providerConfig,
|
||||
fallbackModel: params.fallbackModel,
|
||||
}),
|
||||
)}`,
|
||||
`audioFormat=${formatLogValue(params.audioFormat)}`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
export function formatGoogleMeetAgentAudioModelLog(params: {
|
||||
provider: RealtimeTranscriptionProviderPlugin;
|
||||
providerConfig: RealtimeTranscriptionProviderConfig;
|
||||
audioFormat: GoogleMeetConfig["chrome"]["audioFormat"];
|
||||
}): string {
|
||||
return [
|
||||
`[google-meet] agent audio bridge starting: transcriptionProvider=${formatLogValue(
|
||||
params.provider.id,
|
||||
)}`,
|
||||
`transcriptionModel=${formatLogValue(
|
||||
resolveProviderModelForLog({
|
||||
provider: params.provider,
|
||||
providerConfig: params.providerConfig,
|
||||
}),
|
||||
)}`,
|
||||
"tts=telephony",
|
||||
`audioFormat=${formatLogValue(params.audioFormat)}`,
|
||||
].join(" ");
|
||||
}
|
||||
|
||||
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
@@ -464,6 +528,13 @@ export async function startCommandAgentAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
params.logger.info(
|
||||
formatGoogleMeetAgentAudioModelLog({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
|
||||
const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => {
|
||||
if (proc.killed && signal !== "SIGKILL") {
|
||||
@@ -956,6 +1027,15 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
providers: params.providers,
|
||||
});
|
||||
const strategy = params.config.realtime.strategy;
|
||||
params.logger.info(
|
||||
formatGoogleMeetRealtimeVoiceModelLog({
|
||||
strategy,
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
fallbackModel: params.config.realtime.model,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
}),
|
||||
);
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
|
||||
let agentConsultActive = false;
|
||||
|
||||
@@ -830,6 +830,7 @@ export function buildGoogleRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
|
||||
return {
|
||||
id: "google",
|
||||
label: "Google Live Voice",
|
||||
defaultModel: GOOGLE_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 20,
|
||||
resolveConfig: ({ cfg, rawConfig }) => normalizeProviderConfig(rawConfig, cfg),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -249,6 +249,7 @@ export function buildMistralRealtimeTranscriptionProvider(): RealtimeTranscripti
|
||||
id: "mistral",
|
||||
label: "Mistral Realtime Transcription",
|
||||
aliases: ["mistral-realtime", "voxtral-realtime"],
|
||||
defaultModel: MISTRAL_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 45,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -44,6 +44,7 @@ const OPENAI_REALTIME_TRANSCRIPTION_URL = "wss://api.openai.com/v1/realtime?inte
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_CONNECT_TIMEOUT_MS = 10_000;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS = 5;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS = 1000;
|
||||
const OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL = "gpt-4o-transcribe";
|
||||
|
||||
function normalizeProviderConfig(
|
||||
config: RealtimeTranscriptionProviderConfig,
|
||||
@@ -174,6 +175,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
|
||||
id: "openai",
|
||||
label: "OpenAI Realtime Transcription",
|
||||
aliases: ["openai-realtime"],
|
||||
defaultModel: OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
@@ -188,7 +190,7 @@ export function buildOpenAIRealtimeTranscriptionProvider(): RealtimeTranscriptio
|
||||
...req,
|
||||
apiKey,
|
||||
language: config.language,
|
||||
model: config.model ?? "gpt-4o-transcribe",
|
||||
model: config.model ?? OPENAI_REALTIME_TRANSCRIPTION_DEFAULT_MODEL,
|
||||
prompt: config.prompt,
|
||||
silenceDurationMs: config.silenceDurationMs ?? 800,
|
||||
vadThreshold: config.vadThreshold ?? 0.5,
|
||||
|
||||
@@ -871,6 +871,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI Realtime Voice",
|
||||
defaultModel: OPENAI_REALTIME_DEFAULT_MODEL,
|
||||
autoSelectOrder: 10,
|
||||
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
|
||||
isConfigured: ({ providerConfig }) =>
|
||||
|
||||
@@ -1813,6 +1813,7 @@ export type RealtimeTranscriptionProviderPlugin = {
|
||||
id: RealtimeTranscriptionProviderId;
|
||||
label: string;
|
||||
aliases?: string[];
|
||||
defaultModel?: string;
|
||||
autoSelectOrder?: number;
|
||||
resolveConfig?: (
|
||||
ctx: RealtimeTranscriptionProviderResolveConfigContext,
|
||||
@@ -1830,6 +1831,7 @@ export type RealtimeVoiceProviderPlugin = {
|
||||
id: RealtimeVoiceProviderId;
|
||||
label: string;
|
||||
aliases?: string[];
|
||||
defaultModel?: string;
|
||||
autoSelectOrder?: number;
|
||||
resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig;
|
||||
isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;
|
||||
|
||||
Reference in New Issue
Block a user