diff --git a/CHANGELOG.md b/CHANGELOG.md index 41770ed7d76..c748752d0f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Docs: https://docs.openclaw.ai - Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers. - Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback. - Google Meet: add `chrome.audioBufferBytes` for generated command-pair SoX audio commands and lower the default buffer from SoX's 8192 bytes to 4096 bytes to reduce Chrome talk-back latency. +- Google Meet: split realtime provider config into agent-mode transcription and bidi-mode voice providers, and migrate legacy Gemini Live bidi configs with `doctor --fix`, so Gemini Live can back direct bidi fallback without breaking the default OpenClaw agent talk-back path. - Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent. - Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf. - Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 5a5a9e8df89..e03a944cae4 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -31,13 +31,13 @@ Google Meet participant support for OpenClaw — the plugin is explicit by desig Install the local audio dependencies and configure a realtime transcription provider plus regular OpenClaw TTS. OpenAI is the default transcription -provider; Google Gemini Live also works with `realtime.provider: "google"` for -`bidi` mode: +provider; Google Gemini Live also works as a separate `bidi` voice fallback with +`realtime.voiceProvider: "google"`: ```bash brew install blackhole-2ch sox export OPENAI_API_KEY=sk-... -# or +# only needed when realtime.voiceProvider is "google" for bidi mode export GEMINI_API_KEY=... ``` @@ -973,8 +973,9 @@ Workspace Developer Preview Program for Meet media APIs. The common Chrome agent path only needs the plugin enabled, BlackHole, SoX, a realtime transcription provider key, and a configured OpenClaw TTS provider. -OpenAI is the default transcription provider; set `realtime.provider: "google"` -to use Google Gemini Live for `bidi` mode: +OpenAI is the default transcription provider; set `realtime.voiceProvider` to +`"google"` and `realtime.model` to use Google Gemini Live for `bidi` mode +without changing the default agent-mode transcription provider: ```bash brew install blackhole-2ch sox @@ -1042,8 +1043,13 @@ Defaults: realtime voice provider answers participant speech directly and may call `openclaw_agent_consult` for deeper/tool-backed answers. - `mode: "transcribe"`: observe-only mode without the talk-back bridge. -- `realtime.provider: "openai"`: provider id used by `agent` mode for realtime - transcription and by `bidi` mode for realtime voice. +- `realtime.provider: "openai"`: compatibility fallback used when the scoped + provider fields below are unset. +- `realtime.transcriptionProvider: "openai"`: provider id used by `agent` mode + for realtime transcription. +- `realtime.voiceProvider`: provider id used by `bidi` mode for direct realtime + voice. Set this to `"google"` to use Gemini Live while keeping agent-mode + transcription on OpenAI. - `realtime.toolPolicy: "safe-read-only"` - `realtime.instructions`: brief spoken replies, with `openclaw_agent_consult` for deeper answers @@ -1089,13 +1095,15 @@ Optional overrides: }, defaultMode: "agent", realtime: { - provider: "google", + provider: "openai", + transcriptionProvider: "openai", + voiceProvider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", agentId: "jay", toolPolicy: "owner", introMessage: "Say exactly: I'm here.", providers: { google: { - model: "gemini-2.5-flash-native-audio-preview-12-2025", voice: "Kore", }, }, diff --git a/extensions/google-meet/doctor-contract-api.ts b/extensions/google-meet/doctor-contract-api.ts new file mode 100644 index 00000000000..db610ee157d --- /dev/null +++ b/extensions/google-meet/doctor-contract-api.ts @@ -0,0 +1 @@ +export { legacyConfigRules, normalizeCompatibilityConfig } from "./src/config-compat.js"; diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index eb9c3c0d05c..87fba57c8d8 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -29,6 +29,8 @@ import { convertGoogleMeetTtsAudioForBridge, extendGoogleMeetOutputEchoSuppression, isGoogleMeetLikelyAssistantEchoTranscript, + resolveGoogleMeetRealtimeProvider, + resolveGoogleMeetRealtimeTranscriptionProvider, startCommandAgentAudioBridge, startCommandRealtimeAudioBridge, } from "./src/realtime.js"; @@ -385,6 +387,7 @@ describe("google-meet plugin", () => { realtime: { strategy: "agent", provider: "openai", + transcriptionProvider: "openai", introMessage: "Say exactly: I'm here and listening.", toolPolicy: "safe-read-only", }, @@ -395,6 +398,87 @@ describe("google-meet plugin", () => { expect(resolveGoogleMeetConfig({}).realtime.instructions).toContain("openclaw_agent_consult"); }); + it("resolves separate realtime providers for agent transcription and bidi voice", () => { + expect( + resolveGoogleMeetConfig({ + realtime: { + provider: "openai", + transcriptionProvider: "openai", + voiceProvider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + }, + }).realtime, + ).toMatchObject({ + provider: "openai", + transcriptionProvider: "openai", + voiceProvider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + }); + }); + + it("uses voiceProvider for bidi and transcriptionProvider for agent mode resolution", () => { + const voiceProviders: RealtimeVoiceProviderPlugin[] = [ + { + id: "openai", + label: "OpenAI", + autoSelectOrder: 1, + isConfigured: () => true, + createBridge: () => { + throw new Error("unused"); + }, + }, + { + id: "google", + label: "Google", + autoSelectOrder: 2, + resolveConfig: ({ rawConfig }) => rawConfig, + isConfigured: () => true, + createBridge: () => { + throw new Error("unused"); + }, + }, + ]; + const transcriptionProviders: RealtimeTranscriptionProviderPlugin[] = [ + { + id: "openai", + label: "OpenAI", + autoSelectOrder: 1, + isConfigured: () => true, + createSession: () => { + throw new Error("unused"); + }, + }, + ]; + const config = resolveGoogleMeetConfig({ + realtime: { + provider: "openai", + transcriptionProvider: "openai", + voiceProvider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + }, + }); + + expect( + resolveGoogleMeetRealtimeProvider({ + config, + fullConfig: {} as never, + providers: voiceProviders, + }), + ).toMatchObject({ + provider: { id: "google" }, + providerConfig: { model: "gemini-2.5-flash-native-audio-preview-12-2025" }, + }); + expect( + resolveGoogleMeetRealtimeTranscriptionProvider({ + config, + fullConfig: {} as never, + providers: transcriptionProviders, + }), + ).toMatchObject({ + provider: { id: "openai" }, + }); + }); + it("declares barge-in config metadata in the plugin entry and manifest", () => { const manifest = JSON.parse( readFileSync(new URL("./openclaw.plugin.json", import.meta.url), "utf8"), diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index 4b8a004cd88..6da6ee71b43 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -161,7 +161,15 @@ const googleMeetConfigSchema = { }, "realtime.provider": { label: "Speech Provider", - help: "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider.", + help: "Compatibility fallback for both realtime transcription and bidi voice. Prefer realtime.transcriptionProvider and realtime.voiceProvider for new configs.", + }, + "realtime.transcriptionProvider": { + label: "Realtime Transcription Provider", + help: "Agent mode uses this provider to transcribe meeting audio before regular OpenClaw TTS answers.", + }, + "realtime.voiceProvider": { + label: "Bidi Voice Provider", + help: "Bidi mode uses this realtime voice provider. Falls back to realtime.provider when unset.", }, "realtime.model": { label: "Bidi Realtime Model", diff --git a/extensions/google-meet/openclaw.plugin.json b/extensions/google-meet/openclaw.plugin.json index dc68357152d..b909b035a6d 100644 --- a/extensions/google-meet/openclaw.plugin.json +++ b/extensions/google-meet/openclaw.plugin.json @@ -154,7 +154,15 @@ }, "realtime.provider": { "label": "Speech Provider", - "help": "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider." + "help": "Compatibility fallback for both realtime transcription and bidi voice. Prefer realtime.transcriptionProvider and realtime.voiceProvider for new configs." + }, + "realtime.transcriptionProvider": { + "label": "Realtime Transcription Provider", + "help": "Agent mode uses this provider to transcribe meeting audio before regular OpenClaw TTS answers." + }, + "realtime.voiceProvider": { + "label": "Bidi Voice Provider", + "help": "Bidi mode uses this realtime voice provider. Falls back to realtime.provider when unset." }, "realtime.model": { "label": "Bidi Realtime Model", @@ -431,6 +439,13 @@ "type": "string", "default": "openai" }, + "transcriptionProvider": { + "type": "string", + "default": "openai" + }, + "voiceProvider": { + "type": "string" + }, "model": { "type": "string" }, @@ -501,5 +516,8 @@ } } } + }, + "configContracts": { + "compatibilityMigrationPaths": ["plugins.entries.google-meet.config.realtime.provider"] } } diff --git a/extensions/google-meet/src/config-compat.test.ts b/extensions/google-meet/src/config-compat.test.ts new file mode 100644 index 00000000000..05c3472a269 --- /dev/null +++ b/extensions/google-meet/src/config-compat.test.ts @@ -0,0 +1,98 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types"; +import { describe, expect, it } from "vitest"; +import { + legacyConfigRules, + migrateGoogleMeetLegacyRealtimeProvider, + normalizeCompatibilityConfig, +} from "./config-compat.js"; + +describe("google-meet config compatibility", () => { + it("detects legacy Google realtime provider config", () => { + expect( + legacyConfigRules[0]?.match({ + provider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + }), + ).toBe(true); + }); + + it("migrates legacy Google bidi provider intent to scoped realtime providers", () => { + const config = { + plugins: { + entries: { + "google-meet": { + enabled: true, + config: { + defaultMode: "agent", + realtime: { + provider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + providers: { + google: { + voice: "Kore", + }, + }, + }, + }, + }, + }, + }, + } as OpenClawConfig; + + const migration = migrateGoogleMeetLegacyRealtimeProvider(config); + + expect(migration?.changes).toEqual([ + 'Moved Google Meet legacy realtime.provider="google" intent to realtime.voiceProvider="google" and realtime.transcriptionProvider="openai".', + ]); + expect( + ( + migration?.config.plugins?.entries?.["google-meet"] as { + config?: { realtime?: Record }; + } + ).config?.realtime, + ).toEqual({ + provider: "openai", + transcriptionProvider: "openai", + voiceProvider: "google", + model: "gemini-2.5-flash-native-audio-preview-12-2025", + providers: { + google: { + voice: "Kore", + }, + }, + }); + }); + + it("leaves fully scoped provider configs alone", () => { + const config = { + plugins: { + entries: { + "google-meet": { + config: { + realtime: { + provider: "google", + transcriptionProvider: "custom-stt", + voiceProvider: "custom-voice", + }, + }, + }, + }, + }, + } as OpenClawConfig; + + const migration = normalizeCompatibilityConfig({ cfg: config }); + + expect(migration.changes).toEqual([]); + expect( + ( + migration.config.plugins?.entries?.["google-meet"] as { + config?: { realtime?: Record }; + } + ).config?.realtime, + ).toEqual({ + provider: "google", + transcriptionProvider: "custom-stt", + voiceProvider: "custom-voice", + }); + }); +}); diff --git a/extensions/google-meet/src/config-compat.ts b/extensions/google-meet/src/config-compat.ts new file mode 100644 index 00000000000..c971852e4e8 --- /dev/null +++ b/extensions/google-meet/src/config-compat.ts @@ -0,0 +1,84 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types"; + +type LegacyConfigRule = { + path: Array; + message: string; + match: (value: unknown) => boolean; +}; + +function asRecord(value: unknown): Record | null { + return value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : null; +} + +function normalizeProviderId(value: unknown): string | undefined { + return typeof value === "string" && value.trim() ? value.trim().toLowerCase() : undefined; +} + +function hasOwn(record: Record, key: string): boolean { + return Object.prototype.hasOwnProperty.call(record, key); +} + +function hasLegacyGoogleRealtimeProvider(value: unknown): boolean { + const realtime = asRecord(value); + if (!realtime || normalizeProviderId(realtime.provider) !== "google") { + return false; + } + return !hasOwn(realtime, "voiceProvider") || !hasOwn(realtime, "transcriptionProvider"); +} + +export const legacyConfigRules: LegacyConfigRule[] = [ + { + path: ["plugins", "entries", "google-meet", "config", "realtime"], + message: + 'plugins.entries.google-meet.config.realtime.provider="google" is legacy for Gemini Live bidi mode; use realtime.voiceProvider="google" and realtime.transcriptionProvider="openai". Run "openclaw doctor --fix".', + match: hasLegacyGoogleRealtimeProvider, + }, +]; + +export function migrateGoogleMeetLegacyRealtimeProvider(config: OpenClawConfig): { + config: OpenClawConfig; + changes: string[]; +} | null { + const rawEntry = asRecord(config.plugins?.entries?.["google-meet"]); + const rawPluginConfig = asRecord(rawEntry?.config); + const rawRealtime = asRecord(rawPluginConfig?.realtime); + if (!rawRealtime || !hasLegacyGoogleRealtimeProvider(rawRealtime)) { + return null; + } + + const nextConfig = structuredClone(config); + const nextPlugins = asRecord(nextConfig.plugins) ?? {}; + nextConfig.plugins = nextPlugins; + const nextEntries = asRecord(nextPlugins.entries) ?? {}; + nextPlugins.entries = nextEntries; + const nextEntry = asRecord(nextEntries["google-meet"]) ?? {}; + nextEntries["google-meet"] = nextEntry; + const nextPluginConfig = asRecord(nextEntry.config) ?? {}; + nextEntry.config = nextPluginConfig; + const nextRealtime = asRecord(nextPluginConfig.realtime) ?? {}; + nextPluginConfig.realtime = nextRealtime; + + nextRealtime.provider = "openai"; + if (!hasOwn(nextRealtime, "transcriptionProvider")) { + nextRealtime.transcriptionProvider = "openai"; + } + if (!hasOwn(nextRealtime, "voiceProvider")) { + nextRealtime.voiceProvider = "google"; + } + + return { + config: nextConfig, + changes: [ + 'Moved Google Meet legacy realtime.provider="google" intent to realtime.voiceProvider="google" and realtime.transcriptionProvider="openai".', + ], + }; +} + +export function normalizeCompatibilityConfig({ cfg }: { cfg: OpenClawConfig }): { + config: OpenClawConfig; + changes: string[]; +} { + return migrateGoogleMeetLegacyRealtimeProvider(cfg) ?? { config: cfg, changes: [] }; +} diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index 5543b0c935b..327ed0d27fb 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -65,6 +65,8 @@ export type GoogleMeetConfig = { realtime: { strategy: GoogleMeetRealtimeStrategy; provider?: string; + transcriptionProvider?: string; + voiceProvider?: string; model?: string; instructions?: string; introMessage?: string; @@ -220,6 +222,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { realtime: { strategy: "agent", provider: "openai", + transcriptionProvider: "openai", instructions: DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS, introMessage: DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE, toolPolicy: "safe-read-only", @@ -536,6 +539,10 @@ export function resolveGoogleMeetConfigWithEnv( ), provider: normalizeOptionalString(realtime.provider) ?? DEFAULT_GOOGLE_MEET_CONFIG.realtime.provider, + transcriptionProvider: + normalizeOptionalString(realtime.transcriptionProvider) ?? + DEFAULT_GOOGLE_MEET_CONFIG.realtime.transcriptionProvider, + voiceProvider: normalizeOptionalString(realtime.voiceProvider), model: normalizeOptionalString(realtime.model) ?? DEFAULT_GOOGLE_MEET_CONFIG.realtime.model, instructions: normalizeOptionalString(realtime.instructions) ?? diff --git a/extensions/google-meet/src/realtime.ts b/extensions/google-meet/src/realtime.ts index c8d8ee92c4f..4e4191f7d56 100644 --- a/extensions/google-meet/src/realtime.ts +++ b/extensions/google-meet/src/realtime.ts @@ -357,8 +357,9 @@ export function resolveGoogleMeetRealtimeProvider(params: { fullConfig: OpenClawConfig; providers?: RealtimeVoiceProviderPlugin[]; }): ResolvedRealtimeProvider { + const providerId = params.config.realtime.voiceProvider ?? params.config.realtime.provider; return resolveConfiguredRealtimeVoiceProvider({ - configuredProviderId: params.config.realtime.provider, + configuredProviderId: providerId, providerConfigs: params.config.realtime.providers, cfg: params.fullConfig, providers: params.providers, @@ -376,19 +377,19 @@ export function resolveGoogleMeetRealtimeTranscriptionProvider(params: { if (providers.length === 0) { throw new Error("No configured realtime transcription provider registered"); } - const configuredProvider = params.config.realtime.provider + const providerId = + params.config.realtime.transcriptionProvider ?? params.config.realtime.provider; + const configuredProvider = providerId ? (params.providers?.find( - (entry) => - entry.id === params.config.realtime.provider || - entry.aliases?.includes(params.config.realtime.provider ?? ""), - ) ?? getRealtimeTranscriptionProvider(params.config.realtime.provider, params.fullConfig)) + (entry) => entry.id === providerId || entry.aliases?.includes(providerId), + ) ?? getRealtimeTranscriptionProvider(providerId, params.fullConfig)) : undefined; const provider = configuredProvider ?? providers[0]; if (!provider) { throw new Error("No configured realtime transcription provider registered"); } - const rawConfig = params.config.realtime.provider - ? (params.config.realtime.providers[params.config.realtime.provider] ?? + const rawConfig = providerId + ? (params.config.realtime.providers[providerId] ?? params.config.realtime.providers[provider.id] ?? {}) : (params.config.realtime.providers[provider.id] ?? {}); diff --git a/extensions/google-meet/src/runtime.ts b/extensions/google-meet/src/runtime.ts index a4588e55f15..023599f1562 100644 --- a/extensions/google-meet/src/runtime.ts +++ b/extensions/google-meet/src/runtime.ts @@ -402,9 +402,16 @@ export class GoogleMeetRuntime { realtime: { enabled: isGoogleMeetTalkBackMode(mode), strategy: mode === "bidi" ? "bidi" : "agent", - provider: mode === "bidi" ? this.params.config.realtime.provider : undefined, + provider: + mode === "bidi" + ? (this.params.config.realtime.voiceProvider ?? this.params.config.realtime.provider) + : undefined, model: mode === "bidi" ? this.params.config.realtime.model : undefined, - transcriptionProvider: mode === "agent" ? this.params.config.realtime.provider : undefined, + transcriptionProvider: + mode === "agent" + ? (this.params.config.realtime.transcriptionProvider ?? + this.params.config.realtime.provider) + : undefined, toolPolicy: this.params.config.realtime.toolPolicy, }, notes: [],