fix(google-meet): clamp audio buffer config

This commit is contained in:
Peter Steinberger
2026-05-04 03:38:26 +01:00
parent 571d75aab3
commit c956946b26
6 changed files with 135 additions and 17 deletions

View File

@@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
- Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback.
- Google Meet: add `chrome.audioBufferBytes` for generated command-pair SoX audio commands and lower the default buffer from SoX's 8192 bytes to 4096 bytes to reduce Chrome talk-back latency.
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
- Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf.
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.

View File

@@ -1016,6 +1016,10 @@ Defaults:
- `chrome.audioFormat: "pcm16-24khz"`: command-pair audio format. Use
`"g711-ulaw-8khz"` only for legacy/custom command pairs that still emit
telephony audio.
- `chrome.audioBufferBytes: 4096`: SoX processing buffer for generated Chrome
command-pair audio commands. This is half of SoX's default 8192-byte buffer,
reducing default pipe latency while leaving room to raise it on busy hosts.
Values below SoX's minimum are clamped to 17 bytes.
- `chrome.audioInputCommand`: SoX command reading from CoreAudio `BlackHole 2ch`
and writing audio in `chrome.audioFormat`
- `chrome.audioOutputCommand`: SoX command reading audio in `chrome.audioFormat`
@@ -1622,7 +1626,8 @@ Chrome talk-back modes need `BlackHole 2ch` plus either:
bridge and pipes audio in `chrome.audioFormat` between those commands and the
selected provider. Agent mode uses realtime transcription plus regular TTS;
bidi mode uses the realtime voice provider. The default Chrome path is 24 kHz
PCM16; 8 kHz G.711 mu-law remains available for legacy command pairs.
PCM16 with `chrome.audioBufferBytes: 4096`; 8 kHz G.711 mu-law remains
available for legacy command pairs.
- `chrome.audioBridgeCommand`: an external bridge command owns the whole local
audio path and must exit after starting or validating its daemon. This is only
valid for `bidi` because `agent` mode needs direct command-pair access for TTS.

View File

@@ -329,9 +329,12 @@ describe("google-meet plugin", () => {
autoJoin: true,
waitForInCallMs: 20000,
audioFormat: "pcm16-24khz",
audioBufferBytes: 4096,
audioInputCommand: [
"sox",
"-q",
"--buffer",
"4096",
"-t",
"coreaudio",
"BlackHole 2ch",
@@ -351,6 +354,8 @@ describe("google-meet plugin", () => {
audioOutputCommand: [
"sox",
"-q",
"--buffer",
"4096",
"-t",
"raw",
"-r",
@@ -410,18 +415,21 @@ describe("google-meet plugin", () => {
};
expect(entry.configSchema.uiHints).toMatchObject({
"chrome.audioBufferBytes": expect.objectContaining({ advanced: true }),
"chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }),
"chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }),
"chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }),
"chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }),
});
expect(manifest.uiHints).toMatchObject({
"chrome.audioBufferBytes": expect.objectContaining({ advanced: true }),
"chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }),
"chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }),
"chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }),
"chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }),
});
expect(manifest.configSchema?.properties?.chrome?.properties).toMatchObject({
audioBufferBytes: expect.objectContaining({ type: "number", default: 4096 }),
bargeInInputCommand: expect.objectContaining({
type: "array",
items: { type: "string" },
@@ -467,6 +475,47 @@ describe("google-meet plugin", () => {
});
});
it("lets generated Chrome audio commands use a configured SoX buffer", () => {
const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 2048 } });
expect(config.chrome.audioBufferBytes).toBe(2048);
expect(config.chrome.audioInputCommand).toEqual([
"sox",
"-q",
"--buffer",
"2048",
"-t",
"coreaudio",
"BlackHole 2ch",
"-t",
"raw",
"-r",
"24000",
"-c",
"1",
"-e",
"signed-integer",
"-b",
"16",
"-L",
"-",
]);
expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual([
"sox",
"-q",
"--buffer",
"2048",
]);
});
it("clamps configured Chrome audio buffers above SoX's minimum", () => {
const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 1 } });
expect(config.chrome.audioBufferBytes).toBe(17);
expect(config.chrome.audioInputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]);
expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]);
});
it("uses env fallbacks for OAuth, preview, and default meeting values", () => {
expect(
resolveGoogleMeetConfigWithEnv(

View File

@@ -82,6 +82,11 @@ const googleMeetConfigSchema = {
help: "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.",
advanced: true,
},
"chrome.audioBufferBytes": {
label: "Audio Buffer Bytes",
help: "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.",
advanced: true,
},
"chrome.audioInputCommand": {
label: "Audio Input Command",
help: "Command that writes meeting audio to stdout in chrome.audioFormat.",

View File

@@ -93,6 +93,11 @@
"help": "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.",
"advanced": true
},
"chrome.audioBufferBytes": {
"label": "Audio Buffer Bytes",
"help": "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.",
"advanced": true
},
"chrome.audioBridgeCommand": {
"label": "Audio Bridge Command",
"advanced": true
@@ -272,11 +277,17 @@
"enum": ["pcm16-24khz", "g711-ulaw-8khz"],
"default": "pcm16-24khz"
},
"audioBufferBytes": {
"type": "number",
"default": 4096
},
"audioInputCommand": {
"type": "array",
"default": [
"sox",
"-q",
"--buffer",
"4096",
"-t",
"coreaudio",
"BlackHole 2ch",
@@ -302,6 +313,8 @@
"default": [
"sox",
"-q",
"--buffer",
"4096",
"-t",
"raw",
"-r",

View File

@@ -28,6 +28,7 @@ export type GoogleMeetConfig = {
chrome: {
audioBackend: "blackhole-2ch";
audioFormat: GoogleMeetChromeAudioFormat;
audioBufferBytes: number;
launch: boolean;
browserProfile?: string;
guestName: string;
@@ -86,7 +87,15 @@ export type GoogleMeetConfig = {
};
};
export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
const SOX_DEFAULT_BUFFER_BYTES = 8192;
const SOX_MIN_BUFFER_BYTES = 17;
export const DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES = SOX_DEFAULT_BUFFER_BYTES / 2;
function withSoxBuffer(command: readonly string[], bufferBytes: number): string[] {
return [command[0] ?? "sox", "-q", "--buffer", String(bufferBytes), ...command.slice(2)];
}
const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [
"sox",
"-q",
"-t",
@@ -106,7 +115,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
"-",
] as const;
export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [
"sox",
"-q",
"-t",
@@ -126,7 +135,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
"BlackHole 2ch",
] as const;
const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [
"rec",
"-q",
"-t",
@@ -142,7 +151,7 @@ const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
"-",
] as const;
const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [
"play",
"-q",
"-t",
@@ -158,6 +167,16 @@ const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
"-",
] as const;
export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = withSoxBuffer(
DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE,
DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
);
export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = withSoxBuffer(
DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE,
DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
);
const DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT: GoogleMeetChromeAudioFormat = "pcm16-24khz";
const DEFAULT_GOOGLE_MEET_BARGE_IN_RMS_THRESHOLD = 650;
const DEFAULT_GOOGLE_MEET_BARGE_IN_PEAK_THRESHOLD = 2500;
@@ -177,6 +196,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
chrome: {
audioBackend: "blackhole-2ch",
audioFormat: DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT,
audioBufferBytes: DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
launch: true,
guestName: "OpenClaw Agent",
reuseExistingTab: true,
@@ -361,16 +381,36 @@ function resolveChromeAudioFormat(value: unknown): GoogleMeetChromeAudioFormat |
}
}
function defaultAudioInputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] {
return format === "g711-ulaw-8khz"
? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND
: DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND;
function resolveAudioBufferBytes(value: unknown, fallback: number): number {
const number = resolveNumber(value, fallback);
if (!Number.isFinite(number) || number <= 0) {
return fallback;
}
return Math.max(SOX_MIN_BUFFER_BYTES, Math.trunc(number));
}
function defaultAudioOutputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] {
return format === "g711-ulaw-8khz"
? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND
: DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND;
function defaultAudioInputCommand(
format: GoogleMeetChromeAudioFormat,
bufferBytes: number,
): string[] {
return withSoxBuffer(
format === "g711-ulaw-8khz"
? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE
: DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE,
bufferBytes,
);
}
function defaultAudioOutputCommand(
format: GoogleMeetChromeAudioFormat,
bufferBytes: number,
): string[] {
return withSoxBuffer(
format === "g711-ulaw-8khz"
? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE
: DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE,
bufferBytes,
);
}
export function resolveGoogleMeetConfig(input: unknown): GoogleMeetConfig {
@@ -392,6 +432,10 @@ export function resolveGoogleMeetConfigWithEnv(
const audioFormat =
resolveChromeAudioFormat(chrome.audioFormat) ??
(hasCustomAudioCommand ? "g711-ulaw-8khz" : DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioFormat);
const audioBufferBytes = resolveAudioBufferBytes(
chrome.audioBufferBytes,
DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioBufferBytes,
);
const chromeNode = asRecord(raw.chromeNode);
const twilio = asRecord(raw.twilio);
const voiceCall = asRecord(raw.voiceCall);
@@ -421,6 +465,7 @@ export function resolveGoogleMeetConfigWithEnv(
chrome: {
audioBackend: "blackhole-2ch",
audioFormat,
audioBufferBytes,
launch: resolveBoolean(chrome.launch, DEFAULT_GOOGLE_MEET_CONFIG.chrome.launch),
browserProfile: normalizeOptionalString(chrome.browserProfile),
guestName:
@@ -438,10 +483,10 @@ export function resolveGoogleMeetConfigWithEnv(
chrome.waitForInCallMs,
DEFAULT_GOOGLE_MEET_CONFIG.chrome.waitForInCallMs,
),
audioInputCommand: configuredAudioInputCommand ?? [...defaultAudioInputCommand(audioFormat)],
audioOutputCommand: configuredAudioOutputCommand ?? [
...defaultAudioOutputCommand(audioFormat),
],
audioInputCommand:
configuredAudioInputCommand ?? defaultAudioInputCommand(audioFormat, audioBufferBytes),
audioOutputCommand:
configuredAudioOutputCommand ?? defaultAudioOutputCommand(audioFormat, audioBufferBytes),
bargeInInputCommand: resolveStringArray(chrome.bargeInInputCommand),
bargeInRmsThreshold: resolveNumber(
chrome.bargeInRmsThreshold,