diff --git a/CHANGELOG.md b/CHANGELOG.md index ed3ed3cbe00..e87306a167c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai - Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers. - Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback. +- Google Meet: add `chrome.audioBufferBytes` for generated command-pair SoX audio commands and lower the default buffer from SoX's 8192 bytes to 4096 bytes to reduce Chrome talk-back latency. - Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent. - Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf. - Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc. diff --git a/docs/plugins/google-meet.md b/docs/plugins/google-meet.md index 6a28f474aa0..5a5a9e8df89 100644 --- a/docs/plugins/google-meet.md +++ b/docs/plugins/google-meet.md @@ -1016,6 +1016,10 @@ Defaults: - `chrome.audioFormat: "pcm16-24khz"`: command-pair audio format. Use `"g711-ulaw-8khz"` only for legacy/custom command pairs that still emit telephony audio. +- `chrome.audioBufferBytes: 4096`: SoX processing buffer for generated Chrome + command-pair audio commands. This is half of SoX's default 8192-byte buffer, + reducing default pipe latency while leaving room to raise it on busy hosts. + Values below SoX's minimum are clamped to 17 bytes. - `chrome.audioInputCommand`: SoX command reading from CoreAudio `BlackHole 2ch` and writing audio in `chrome.audioFormat` - `chrome.audioOutputCommand`: SoX command reading audio in `chrome.audioFormat` @@ -1622,7 +1626,8 @@ Chrome talk-back modes need `BlackHole 2ch` plus either: bridge and pipes audio in `chrome.audioFormat` between those commands and the selected provider. Agent mode uses realtime transcription plus regular TTS; bidi mode uses the realtime voice provider. The default Chrome path is 24 kHz - PCM16; 8 kHz G.711 mu-law remains available for legacy command pairs. + PCM16 with `chrome.audioBufferBytes: 4096`; 8 kHz G.711 mu-law remains + available for legacy command pairs. - `chrome.audioBridgeCommand`: an external bridge command owns the whole local audio path and must exit after starting or validating its daemon. This is only valid for `bidi` because `agent` mode needs direct command-pair access for TTS. diff --git a/extensions/google-meet/index.test.ts b/extensions/google-meet/index.test.ts index e07cf525644..eb9c3c0d05c 100644 --- a/extensions/google-meet/index.test.ts +++ b/extensions/google-meet/index.test.ts @@ -329,9 +329,12 @@ describe("google-meet plugin", () => { autoJoin: true, waitForInCallMs: 20000, audioFormat: "pcm16-24khz", + audioBufferBytes: 4096, audioInputCommand: [ "sox", "-q", + "--buffer", + "4096", "-t", "coreaudio", "BlackHole 2ch", @@ -351,6 +354,8 @@ describe("google-meet plugin", () => { audioOutputCommand: [ "sox", "-q", + "--buffer", + "4096", "-t", "raw", "-r", @@ -410,18 +415,21 @@ describe("google-meet plugin", () => { }; expect(entry.configSchema.uiHints).toMatchObject({ + "chrome.audioBufferBytes": expect.objectContaining({ advanced: true }), "chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }), "chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }), "chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }), "chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }), }); expect(manifest.uiHints).toMatchObject({ + "chrome.audioBufferBytes": expect.objectContaining({ advanced: true }), "chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }), "chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }), "chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }), "chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }), }); expect(manifest.configSchema?.properties?.chrome?.properties).toMatchObject({ + audioBufferBytes: expect.objectContaining({ type: "number", default: 4096 }), bargeInInputCommand: expect.objectContaining({ type: "array", items: { type: "string" }, @@ -467,6 +475,47 @@ describe("google-meet plugin", () => { }); }); + it("lets generated Chrome audio commands use a configured SoX buffer", () => { + const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 2048 } }); + + expect(config.chrome.audioBufferBytes).toBe(2048); + expect(config.chrome.audioInputCommand).toEqual([ + "sox", + "-q", + "--buffer", + "2048", + "-t", + "coreaudio", + "BlackHole 2ch", + "-t", + "raw", + "-r", + "24000", + "-c", + "1", + "-e", + "signed-integer", + "-b", + "16", + "-L", + "-", + ]); + expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual([ + "sox", + "-q", + "--buffer", + "2048", + ]); + }); + + it("clamps configured Chrome audio buffers above SoX's minimum", () => { + const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 1 } }); + + expect(config.chrome.audioBufferBytes).toBe(17); + expect(config.chrome.audioInputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]); + expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]); + }); + it("uses env fallbacks for OAuth, preview, and default meeting values", () => { expect( resolveGoogleMeetConfigWithEnv( diff --git a/extensions/google-meet/index.ts b/extensions/google-meet/index.ts index dc81c56e6bb..4b8a004cd88 100644 --- a/extensions/google-meet/index.ts +++ b/extensions/google-meet/index.ts @@ -82,6 +82,11 @@ const googleMeetConfigSchema = { help: "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.", advanced: true, }, + "chrome.audioBufferBytes": { + label: "Audio Buffer Bytes", + help: "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.", + advanced: true, + }, "chrome.audioInputCommand": { label: "Audio Input Command", help: "Command that writes meeting audio to stdout in chrome.audioFormat.", diff --git a/extensions/google-meet/openclaw.plugin.json b/extensions/google-meet/openclaw.plugin.json index 5ee9b6e2378..dc68357152d 100644 --- a/extensions/google-meet/openclaw.plugin.json +++ b/extensions/google-meet/openclaw.plugin.json @@ -93,6 +93,11 @@ "help": "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.", "advanced": true }, + "chrome.audioBufferBytes": { + "label": "Audio Buffer Bytes", + "help": "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.", + "advanced": true + }, "chrome.audioBridgeCommand": { "label": "Audio Bridge Command", "advanced": true @@ -272,11 +277,17 @@ "enum": ["pcm16-24khz", "g711-ulaw-8khz"], "default": "pcm16-24khz" }, + "audioBufferBytes": { + "type": "number", + "default": 4096 + }, "audioInputCommand": { "type": "array", "default": [ "sox", "-q", + "--buffer", + "4096", "-t", "coreaudio", "BlackHole 2ch", @@ -302,6 +313,8 @@ "default": [ "sox", "-q", + "--buffer", + "4096", "-t", "raw", "-r", diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index ae423e89052..5543b0c935b 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -28,6 +28,7 @@ export type GoogleMeetConfig = { chrome: { audioBackend: "blackhole-2ch"; audioFormat: GoogleMeetChromeAudioFormat; + audioBufferBytes: number; launch: boolean; browserProfile?: string; guestName: string; @@ -86,7 +87,15 @@ export type GoogleMeetConfig = { }; }; -export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [ +const SOX_DEFAULT_BUFFER_BYTES = 8192; +const SOX_MIN_BUFFER_BYTES = 17; +export const DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES = SOX_DEFAULT_BUFFER_BYTES / 2; + +function withSoxBuffer(command: readonly string[], bufferBytes: number): string[] { + return [command[0] ?? "sox", "-q", "--buffer", String(bufferBytes), ...command.slice(2)]; +} + +const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [ "sox", "-q", "-t", @@ -106,7 +115,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [ "-", ] as const; -export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [ +const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [ "sox", "-q", "-t", @@ -126,7 +135,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [ "BlackHole 2ch", ] as const; -const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [ +const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [ "rec", "-q", "-t", @@ -142,7 +151,7 @@ const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [ "-", ] as const; -const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [ +const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [ "play", "-q", "-t", @@ -158,6 +167,16 @@ const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [ "-", ] as const; +export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = withSoxBuffer( + DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE, + DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES, +); + +export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = withSoxBuffer( + DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE, + DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES, +); + const DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT: GoogleMeetChromeAudioFormat = "pcm16-24khz"; const DEFAULT_GOOGLE_MEET_BARGE_IN_RMS_THRESHOLD = 650; const DEFAULT_GOOGLE_MEET_BARGE_IN_PEAK_THRESHOLD = 2500; @@ -177,6 +196,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = { chrome: { audioBackend: "blackhole-2ch", audioFormat: DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT, + audioBufferBytes: DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES, launch: true, guestName: "OpenClaw Agent", reuseExistingTab: true, @@ -361,16 +381,36 @@ function resolveChromeAudioFormat(value: unknown): GoogleMeetChromeAudioFormat | } } -function defaultAudioInputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] { - return format === "g711-ulaw-8khz" - ? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND - : DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND; +function resolveAudioBufferBytes(value: unknown, fallback: number): number { + const number = resolveNumber(value, fallback); + if (!Number.isFinite(number) || number <= 0) { + return fallback; + } + return Math.max(SOX_MIN_BUFFER_BYTES, Math.trunc(number)); } -function defaultAudioOutputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] { - return format === "g711-ulaw-8khz" - ? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND - : DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND; +function defaultAudioInputCommand( + format: GoogleMeetChromeAudioFormat, + bufferBytes: number, +): string[] { + return withSoxBuffer( + format === "g711-ulaw-8khz" + ? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE + : DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE, + bufferBytes, + ); +} + +function defaultAudioOutputCommand( + format: GoogleMeetChromeAudioFormat, + bufferBytes: number, +): string[] { + return withSoxBuffer( + format === "g711-ulaw-8khz" + ? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE + : DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE, + bufferBytes, + ); } export function resolveGoogleMeetConfig(input: unknown): GoogleMeetConfig { @@ -392,6 +432,10 @@ export function resolveGoogleMeetConfigWithEnv( const audioFormat = resolveChromeAudioFormat(chrome.audioFormat) ?? (hasCustomAudioCommand ? "g711-ulaw-8khz" : DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioFormat); + const audioBufferBytes = resolveAudioBufferBytes( + chrome.audioBufferBytes, + DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioBufferBytes, + ); const chromeNode = asRecord(raw.chromeNode); const twilio = asRecord(raw.twilio); const voiceCall = asRecord(raw.voiceCall); @@ -421,6 +465,7 @@ export function resolveGoogleMeetConfigWithEnv( chrome: { audioBackend: "blackhole-2ch", audioFormat, + audioBufferBytes, launch: resolveBoolean(chrome.launch, DEFAULT_GOOGLE_MEET_CONFIG.chrome.launch), browserProfile: normalizeOptionalString(chrome.browserProfile), guestName: @@ -438,10 +483,10 @@ export function resolveGoogleMeetConfigWithEnv( chrome.waitForInCallMs, DEFAULT_GOOGLE_MEET_CONFIG.chrome.waitForInCallMs, ), - audioInputCommand: configuredAudioInputCommand ?? [...defaultAudioInputCommand(audioFormat)], - audioOutputCommand: configuredAudioOutputCommand ?? [ - ...defaultAudioOutputCommand(audioFormat), - ], + audioInputCommand: + configuredAudioInputCommand ?? defaultAudioInputCommand(audioFormat, audioBufferBytes), + audioOutputCommand: + configuredAudioOutputCommand ?? defaultAudioOutputCommand(audioFormat, audioBufferBytes), bargeInInputCommand: resolveStringArray(chrome.bargeInInputCommand), bargeInRmsThreshold: resolveNumber( chrome.bargeInRmsThreshold,