diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b31304497c..bf262d5788b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,6 +63,7 @@ Docs: https://docs.openclaw.ai - Discord: report unresolved configured bot-token SecretRefs during startup instead of treating the account as unconfigured. (#82009) Thanks @giodl73-repo. - Discord: pass an explicit Ogg muxer to ffmpeg when transcoding voice-message audio through staged temp files, restoring TTS voice-message delivery. Fixes #82074. Thanks @hwlbb. - Discord/Feishu: allow Discord voice uploads through RFC2544 fake-IP proxy DNS and pass Feishu's voice ffmpeg transcode through an explicit Ogg muxer. (#82088) Thanks @hwlbb and @6peng888. +- Audio/STT: pass explicit WAV/Ogg muxers to ffmpeg for whisper-cli and WhatsApp staged temp outputs so `.part` filenames do not break transcription or voice-message delivery. Fixes #82094. - CLI/config: preserve numeric-looking object keys such as Discord guild IDs during `config patch` recursive merges. (#81999) Thanks @giodl73-repo. - Gateway/OpenAI-compatible HTTP: forward `response_format` from `/v1/chat/completions` requests through agent stream params to upstream Chat Completions and Responses transports, restoring structured-output support. Fixes #82003. (#82004) Thanks @Lellansin. - Control UI/WebChat: let sidebar markdown code-block Copy buttons use the same delegated clipboard handler as chat messages. (#58709) Thanks @tikitoki. diff --git a/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts b/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts index 9020bb56a0e..2017b9ac811 100644 --- a/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts +++ b/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts @@ -778,6 +778,7 @@ describe("deliverWebReply", () => { expect(ffmpegArgList).toContain("48000"); expect(ffmpegArgList).toContain("-b:a"); expect(ffmpegArgList).toContain("64k"); + expect(ffmpegArgList.slice(-3, -1)).toEqual(["-f", "ogg"]); const mediaPayload = requireRecord( mockCallArg(msg.sendMedia, 0, 0, "sendMedia"), "sendMedia payload", diff --git a/extensions/whatsapp/src/outbound-media-contract.ts b/extensions/whatsapp/src/outbound-media-contract.ts index a40bc576d08..adf1c84ca87 100644 --- a/extensions/whatsapp/src/outbound-media-contract.ts +++ b/extensions/whatsapp/src/outbound-media-contract.ts @@ -214,6 +214,8 @@ async function transcodeToWhatsAppVoiceOpus(params: { "libopus", "-b:a", WHATSAPP_VOICE_BITRATE, + "-f", + "ogg", outputPath, ]); }, diff --git a/extensions/whatsapp/src/send.test.ts b/extensions/whatsapp/src/send.test.ts index 2f98bffb298..1fe2c601079 100644 --- a/extensions/whatsapp/src/send.test.ts +++ b/extensions/whatsapp/src/send.test.ts @@ -315,6 +315,8 @@ describe("web outbound", () => { "libopus", "-b:a", "64k", + "-f", + "ogg", ]); const outputPath = ffmpegArgs?.at(-1); expect(outputPath).toContain("/fs-safe-output-"); diff --git a/src/media-understanding/apply.test.ts b/src/media-understanding/apply.test.ts index 84d2ed624a3..0667a4209c2 100644 --- a/src/media-understanding/apply.test.ts +++ b/src/media-understanding/apply.test.ts @@ -846,12 +846,21 @@ describe("applyMediaUnderstanding", () => { expect(ctx.Transcript).toBe("whisper cpp ogg ok"); const ffmpegArgs = getRunFfmpegArgs(); - expect(ffmpegArgs).toHaveLength(10); + expect(ffmpegArgs).toHaveLength(12); expect(ffmpegArgs.slice(0, 2)).toEqual(["-y", "-i"]); expect(String(ffmpegArgs[2]).endsWith("telegram-voice.ogg")).toBe(true); - expect(ffmpegArgs.slice(3, 9)).toEqual(["-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le"]); - expect(String(ffmpegArgs[9])).toContain("telegram-voice.wav"); - expect(String(ffmpegArgs[9]).endsWith(".part")).toBe(true); + expect(ffmpegArgs.slice(3, 11)).toEqual([ + "-ac", + "1", + "-ar", + "16000", + "-c:a", + "pcm_s16le", + "-f", + "wav", + ]); + expect(String(ffmpegArgs[11])).toContain("telegram-voice.wav"); + expect(String(ffmpegArgs[11]).endsWith(".part")).toBe(true); const [command, args, options] = getRunExecCall(); expect(command).toBe("whisper-cli"); diff --git a/src/media-understanding/runner.entries.ts b/src/media-understanding/runner.entries.ts index 9eec4b46ed8..0780e88efc7 100644 --- a/src/media-understanding/runner.entries.ts +++ b/src/media-understanding/runner.entries.ts @@ -269,6 +269,8 @@ async function resolveCliMediaPath(params: { "16000", "-c:a", "pcm_s16le", + "-f", + "wav", outputPath, ]); },