From f4372613d84d3c775861590a85207797a724b6a2 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Tue, 14 Apr 2026 11:00:28 +0100 Subject: [PATCH] fix(media): remap AAC uploads to M4A (#66446) * fix(media): remap AAC uploads to M4A * fix(media): remap AAC uploads to M4A --- CHANGELOG.md | 1 + .../openai-compatible-audio.test.ts | 22 +++++++++++++++++++ .../openai-compatible-audio.ts | 16 +++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31efbbfb2d2..4fed61e150a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai - Agents/context engine: compact engine-owned sessions from the first tool-loop delta and preserve ingest fallback when `afterTurn` is absent, so long-running tool loops can stay bounded without dropping engine state. (#63555) Thanks @Bikkies. - Discord/native commands: return the real status card for native `/status` interactions instead of falling through to the synthetic `✅ Done.` ack when the generic dispatcher produces no visible reply. (#54629) Thanks @tkozzer and @vincentkoc. - Hooks/Ollama: let LLM-backed session-memory slug generation honor an explicit `agents.defaults.timeoutSeconds` override instead of always aborting after 15 seconds, so slow local Ollama runs stop silently dropping back to generic filenames. (#66237) Thanks @dmak and @vincentkoc. +- Media/transcription: remap `.aac` filenames to `.m4a` for OpenAI-compatible audio uploads so AAC voice notes stop failing MIME-sensitive transcription endpoints. (#66446) Thanks @ben-z. ## 2026.4.14-beta.1 diff --git a/src/media-understanding/openai-compatible-audio.test.ts b/src/media-understanding/openai-compatible-audio.test.ts index a4c29c3c601..15e1322d679 100644 --- a/src/media-understanding/openai-compatible-audio.test.ts +++ b/src/media-understanding/openai-compatible-audio.test.ts @@ -48,4 +48,26 @@ describe("transcribeOpenAiCompatibleAudio", () => { expect(headers.get("version")).toBeNull(); expect(headers.get("user-agent")).toBeNull(); }); + + it("remaps AAC uploads to an M4A filename before submitting the form", async () => { + const { fetchFn, getRequest } = createRequestCaptureJsonFetch({ text: "ok" }); + + await transcribeOpenAiCompatibleAudio({ + buffer: Buffer.from("audio"), + fileName: "voice-note.aac", + mime: "audio/aac", + apiKey: "test-key", + timeoutMs: 1000, + fetchFn, + provider: "openai", + defaultBaseUrl: "https://api.openai.com/v1", + defaultModel: "gpt-4o-transcribe", + }); + + const form = getRequest().init?.body; + expect(form).toBeInstanceOf(FormData); + const file = (form as FormData).get("file"); + expect(file).toBeInstanceOf(File); + expect((file as File).name).toBe("voice-note.m4a"); + }); }); diff --git a/src/media-understanding/openai-compatible-audio.ts b/src/media-understanding/openai-compatible-audio.ts index e967faf4f94..284d004bc83 100644 --- a/src/media-understanding/openai-compatible-audio.ts +++ b/src/media-understanding/openai-compatible-audio.ts @@ -18,6 +18,20 @@ function resolveModel(model: string | undefined, fallback: string): string { return trimmed || fallback; } +function resolveUploadFileName(fileName?: string, mime?: string): string { + const trimmed = fileName?.trim(); + const baseName = trimmed ? path.basename(trimmed) : "audio"; + const lowerMime = mime?.trim().toLowerCase(); + + if (/\.aac$/i.test(baseName)) { + return `${baseName.slice(0, -4) || "audio"}.m4a`; + } + if (!path.extname(baseName) && lowerMime === "audio/aac") { + return `${baseName || "audio"}.m4a`; + } + return baseName; +} + export async function transcribeOpenAiCompatibleAudio( params: OpenAiCompatibleAudioParams, ): Promise { @@ -40,7 +54,7 @@ export async function transcribeOpenAiCompatibleAudio( const model = resolveModel(params.model, params.defaultModel); const form = new FormData(); - const fileName = params.fileName?.trim() || path.basename(params.fileName) || "audio"; + const fileName = resolveUploadFileName(params.fileName, params.mime); const bytes = new Uint8Array(params.buffer); const blob = new Blob([bytes], { type: params.mime ?? "application/octet-stream",