mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
fix(google-meet): clamp audio buffer config
This commit is contained in:
@@ -43,6 +43,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
|
||||
- Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback.
|
||||
- Google Meet: add `chrome.audioBufferBytes` for generated command-pair SoX audio commands and lower the default buffer from SoX's 8192 bytes to 4096 bytes to reduce Chrome talk-back latency.
|
||||
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
|
||||
- Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf.
|
||||
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.
|
||||
|
||||
@@ -1016,6 +1016,10 @@ Defaults:
|
||||
- `chrome.audioFormat: "pcm16-24khz"`: command-pair audio format. Use
|
||||
`"g711-ulaw-8khz"` only for legacy/custom command pairs that still emit
|
||||
telephony audio.
|
||||
- `chrome.audioBufferBytes: 4096`: SoX processing buffer for generated Chrome
|
||||
command-pair audio commands. This is half of SoX's default 8192-byte buffer,
|
||||
reducing default pipe latency while leaving room to raise it on busy hosts.
|
||||
Values below SoX's minimum are clamped to 17 bytes.
|
||||
- `chrome.audioInputCommand`: SoX command reading from CoreAudio `BlackHole 2ch`
|
||||
and writing audio in `chrome.audioFormat`
|
||||
- `chrome.audioOutputCommand`: SoX command reading audio in `chrome.audioFormat`
|
||||
@@ -1622,7 +1626,8 @@ Chrome talk-back modes need `BlackHole 2ch` plus either:
|
||||
bridge and pipes audio in `chrome.audioFormat` between those commands and the
|
||||
selected provider. Agent mode uses realtime transcription plus regular TTS;
|
||||
bidi mode uses the realtime voice provider. The default Chrome path is 24 kHz
|
||||
PCM16; 8 kHz G.711 mu-law remains available for legacy command pairs.
|
||||
PCM16 with `chrome.audioBufferBytes: 4096`; 8 kHz G.711 mu-law remains
|
||||
available for legacy command pairs.
|
||||
- `chrome.audioBridgeCommand`: an external bridge command owns the whole local
|
||||
audio path and must exit after starting or validating its daemon. This is only
|
||||
valid for `bidi` because `agent` mode needs direct command-pair access for TTS.
|
||||
|
||||
@@ -329,9 +329,12 @@ describe("google-meet plugin", () => {
|
||||
autoJoin: true,
|
||||
waitForInCallMs: 20000,
|
||||
audioFormat: "pcm16-24khz",
|
||||
audioBufferBytes: 4096,
|
||||
audioInputCommand: [
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"4096",
|
||||
"-t",
|
||||
"coreaudio",
|
||||
"BlackHole 2ch",
|
||||
@@ -351,6 +354,8 @@ describe("google-meet plugin", () => {
|
||||
audioOutputCommand: [
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"4096",
|
||||
"-t",
|
||||
"raw",
|
||||
"-r",
|
||||
@@ -410,18 +415,21 @@ describe("google-meet plugin", () => {
|
||||
};
|
||||
|
||||
expect(entry.configSchema.uiHints).toMatchObject({
|
||||
"chrome.audioBufferBytes": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }),
|
||||
});
|
||||
expect(manifest.uiHints).toMatchObject({
|
||||
"chrome.audioBufferBytes": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInInputCommand": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInRmsThreshold": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInPeakThreshold": expect.objectContaining({ advanced: true }),
|
||||
"chrome.bargeInCooldownMs": expect.objectContaining({ advanced: true }),
|
||||
});
|
||||
expect(manifest.configSchema?.properties?.chrome?.properties).toMatchObject({
|
||||
audioBufferBytes: expect.objectContaining({ type: "number", default: 4096 }),
|
||||
bargeInInputCommand: expect.objectContaining({
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
@@ -467,6 +475,47 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("lets generated Chrome audio commands use a configured SoX buffer", () => {
|
||||
const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 2048 } });
|
||||
|
||||
expect(config.chrome.audioBufferBytes).toBe(2048);
|
||||
expect(config.chrome.audioInputCommand).toEqual([
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"2048",
|
||||
"-t",
|
||||
"coreaudio",
|
||||
"BlackHole 2ch",
|
||||
"-t",
|
||||
"raw",
|
||||
"-r",
|
||||
"24000",
|
||||
"-c",
|
||||
"1",
|
||||
"-e",
|
||||
"signed-integer",
|
||||
"-b",
|
||||
"16",
|
||||
"-L",
|
||||
"-",
|
||||
]);
|
||||
expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual([
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"2048",
|
||||
]);
|
||||
});
|
||||
|
||||
it("clamps configured Chrome audio buffers above SoX's minimum", () => {
|
||||
const config = resolveGoogleMeetConfig({ chrome: { audioBufferBytes: 1 } });
|
||||
|
||||
expect(config.chrome.audioBufferBytes).toBe(17);
|
||||
expect(config.chrome.audioInputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]);
|
||||
expect(config.chrome.audioOutputCommand?.slice(0, 4)).toEqual(["sox", "-q", "--buffer", "17"]);
|
||||
});
|
||||
|
||||
it("uses env fallbacks for OAuth, preview, and default meeting values", () => {
|
||||
expect(
|
||||
resolveGoogleMeetConfigWithEnv(
|
||||
|
||||
@@ -82,6 +82,11 @@ const googleMeetConfigSchema = {
|
||||
help: "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.",
|
||||
advanced: true,
|
||||
},
|
||||
"chrome.audioBufferBytes": {
|
||||
label: "Audio Buffer Bytes",
|
||||
help: "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.",
|
||||
advanced: true,
|
||||
},
|
||||
"chrome.audioInputCommand": {
|
||||
label: "Audio Input Command",
|
||||
help: "Command that writes meeting audio to stdout in chrome.audioFormat.",
|
||||
|
||||
@@ -93,6 +93,11 @@
|
||||
"help": "Command-pair audio format. PCM16 24 kHz is the default Chrome/Meet path; G.711 mu-law 8 kHz remains available for legacy command pairs.",
|
||||
"advanced": true
|
||||
},
|
||||
"chrome.audioBufferBytes": {
|
||||
"label": "Audio Buffer Bytes",
|
||||
"help": "SoX processing buffer for generated Chrome command-pair audio commands. Lower values reduce latency but may underrun on busy hosts.",
|
||||
"advanced": true
|
||||
},
|
||||
"chrome.audioBridgeCommand": {
|
||||
"label": "Audio Bridge Command",
|
||||
"advanced": true
|
||||
@@ -272,11 +277,17 @@
|
||||
"enum": ["pcm16-24khz", "g711-ulaw-8khz"],
|
||||
"default": "pcm16-24khz"
|
||||
},
|
||||
"audioBufferBytes": {
|
||||
"type": "number",
|
||||
"default": 4096
|
||||
},
|
||||
"audioInputCommand": {
|
||||
"type": "array",
|
||||
"default": [
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"4096",
|
||||
"-t",
|
||||
"coreaudio",
|
||||
"BlackHole 2ch",
|
||||
@@ -302,6 +313,8 @@
|
||||
"default": [
|
||||
"sox",
|
||||
"-q",
|
||||
"--buffer",
|
||||
"4096",
|
||||
"-t",
|
||||
"raw",
|
||||
"-r",
|
||||
|
||||
@@ -28,6 +28,7 @@ export type GoogleMeetConfig = {
|
||||
chrome: {
|
||||
audioBackend: "blackhole-2ch";
|
||||
audioFormat: GoogleMeetChromeAudioFormat;
|
||||
audioBufferBytes: number;
|
||||
launch: boolean;
|
||||
browserProfile?: string;
|
||||
guestName: string;
|
||||
@@ -86,7 +87,15 @@ export type GoogleMeetConfig = {
|
||||
};
|
||||
};
|
||||
|
||||
export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
|
||||
const SOX_DEFAULT_BUFFER_BYTES = 8192;
|
||||
const SOX_MIN_BUFFER_BYTES = 17;
|
||||
export const DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES = SOX_DEFAULT_BUFFER_BYTES / 2;
|
||||
|
||||
function withSoxBuffer(command: readonly string[], bufferBytes: number): string[] {
|
||||
return [command[0] ?? "sox", "-q", "--buffer", String(bufferBytes), ...command.slice(2)];
|
||||
}
|
||||
|
||||
const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [
|
||||
"sox",
|
||||
"-q",
|
||||
"-t",
|
||||
@@ -106,7 +115,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
|
||||
"-",
|
||||
] as const;
|
||||
|
||||
export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
|
||||
const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [
|
||||
"sox",
|
||||
"-q",
|
||||
"-t",
|
||||
@@ -126,7 +135,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
|
||||
"BlackHole 2ch",
|
||||
] as const;
|
||||
|
||||
const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
|
||||
const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE = [
|
||||
"rec",
|
||||
"-q",
|
||||
"-t",
|
||||
@@ -142,7 +151,7 @@ const LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND = [
|
||||
"-",
|
||||
] as const;
|
||||
|
||||
const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
|
||||
const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE = [
|
||||
"play",
|
||||
"-q",
|
||||
"-t",
|
||||
@@ -158,6 +167,16 @@ const LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
|
||||
"-",
|
||||
] as const;
|
||||
|
||||
export const DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND = withSoxBuffer(
|
||||
DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE,
|
||||
DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
|
||||
);
|
||||
|
||||
export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = withSoxBuffer(
|
||||
DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE,
|
||||
DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
|
||||
);
|
||||
|
||||
const DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT: GoogleMeetChromeAudioFormat = "pcm16-24khz";
|
||||
const DEFAULT_GOOGLE_MEET_BARGE_IN_RMS_THRESHOLD = 650;
|
||||
const DEFAULT_GOOGLE_MEET_BARGE_IN_PEAK_THRESHOLD = 2500;
|
||||
@@ -177,6 +196,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
|
||||
chrome: {
|
||||
audioBackend: "blackhole-2ch",
|
||||
audioFormat: DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT,
|
||||
audioBufferBytes: DEFAULT_GOOGLE_MEET_AUDIO_BUFFER_BYTES,
|
||||
launch: true,
|
||||
guestName: "OpenClaw Agent",
|
||||
reuseExistingTab: true,
|
||||
@@ -361,16 +381,36 @@ function resolveChromeAudioFormat(value: unknown): GoogleMeetChromeAudioFormat |
|
||||
}
|
||||
}
|
||||
|
||||
function defaultAudioInputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] {
|
||||
return format === "g711-ulaw-8khz"
|
||||
? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND
|
||||
: DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND;
|
||||
function resolveAudioBufferBytes(value: unknown, fallback: number): number {
|
||||
const number = resolveNumber(value, fallback);
|
||||
if (!Number.isFinite(number) || number <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
return Math.max(SOX_MIN_BUFFER_BYTES, Math.trunc(number));
|
||||
}
|
||||
|
||||
function defaultAudioOutputCommand(format: GoogleMeetChromeAudioFormat): readonly string[] {
|
||||
return format === "g711-ulaw-8khz"
|
||||
? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND
|
||||
: DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND;
|
||||
function defaultAudioInputCommand(
|
||||
format: GoogleMeetChromeAudioFormat,
|
||||
bufferBytes: number,
|
||||
): string[] {
|
||||
return withSoxBuffer(
|
||||
format === "g711-ulaw-8khz"
|
||||
? LEGACY_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE
|
||||
: DEFAULT_GOOGLE_MEET_AUDIO_INPUT_COMMAND_BASE,
|
||||
bufferBytes,
|
||||
);
|
||||
}
|
||||
|
||||
function defaultAudioOutputCommand(
|
||||
format: GoogleMeetChromeAudioFormat,
|
||||
bufferBytes: number,
|
||||
): string[] {
|
||||
return withSoxBuffer(
|
||||
format === "g711-ulaw-8khz"
|
||||
? LEGACY_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE
|
||||
: DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND_BASE,
|
||||
bufferBytes,
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveGoogleMeetConfig(input: unknown): GoogleMeetConfig {
|
||||
@@ -392,6 +432,10 @@ export function resolveGoogleMeetConfigWithEnv(
|
||||
const audioFormat =
|
||||
resolveChromeAudioFormat(chrome.audioFormat) ??
|
||||
(hasCustomAudioCommand ? "g711-ulaw-8khz" : DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioFormat);
|
||||
const audioBufferBytes = resolveAudioBufferBytes(
|
||||
chrome.audioBufferBytes,
|
||||
DEFAULT_GOOGLE_MEET_CONFIG.chrome.audioBufferBytes,
|
||||
);
|
||||
const chromeNode = asRecord(raw.chromeNode);
|
||||
const twilio = asRecord(raw.twilio);
|
||||
const voiceCall = asRecord(raw.voiceCall);
|
||||
@@ -421,6 +465,7 @@ export function resolveGoogleMeetConfigWithEnv(
|
||||
chrome: {
|
||||
audioBackend: "blackhole-2ch",
|
||||
audioFormat,
|
||||
audioBufferBytes,
|
||||
launch: resolveBoolean(chrome.launch, DEFAULT_GOOGLE_MEET_CONFIG.chrome.launch),
|
||||
browserProfile: normalizeOptionalString(chrome.browserProfile),
|
||||
guestName:
|
||||
@@ -438,10 +483,10 @@ export function resolveGoogleMeetConfigWithEnv(
|
||||
chrome.waitForInCallMs,
|
||||
DEFAULT_GOOGLE_MEET_CONFIG.chrome.waitForInCallMs,
|
||||
),
|
||||
audioInputCommand: configuredAudioInputCommand ?? [...defaultAudioInputCommand(audioFormat)],
|
||||
audioOutputCommand: configuredAudioOutputCommand ?? [
|
||||
...defaultAudioOutputCommand(audioFormat),
|
||||
],
|
||||
audioInputCommand:
|
||||
configuredAudioInputCommand ?? defaultAudioInputCommand(audioFormat, audioBufferBytes),
|
||||
audioOutputCommand:
|
||||
configuredAudioOutputCommand ?? defaultAudioOutputCommand(audioFormat, audioBufferBytes),
|
||||
bargeInInputCommand: resolveStringArray(chrome.bargeInInputCommand),
|
||||
bargeInRmsThreshold: resolveNumber(
|
||||
chrome.bargeInRmsThreshold,
|
||||
|
||||
Reference in New Issue
Block a user