From 1906dc01bf062195814d346d677c659159f524f3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 03:36:54 +0100 Subject: [PATCH] fix(elevenlabs): omit mp3 accept for pcm tts --- CHANGELOG.md | 1 + extensions/elevenlabs/tts.test.ts | 26 ++++++++++++++++++++++++++ extensions/elevenlabs/tts.ts | 11 ++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00328d54d22..6f2c09f78f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ Docs: https://docs.openclaw.ai - Dashboard/Windows: open Control UI and OAuth URLs through the system URL handler without `cmd.exe` parsing or PATH-based `rundll32` lookup, and reject non-HTTP browser-open inputs. Fixes #71098. Thanks @Sanjays2402. - Providers/OpenAI: separate API-key and Codex sign-in onboarding groups, and avoid replaying stale OpenAI Responses reasoning blocks after a model route switch. +- Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot. - Skills: honor legacy `metadata.clawdbot` requirements and installer hints when `metadata.openclaw` is absent, so older skills no longer appear ready when required binaries are missing. Fixes #71323. Thanks @chen-zhang-cs-code. - Browser/config: expand `~` in `browser.executablePath` before Chromium launch, so home-relative custom browser paths no longer fail with `ENOENT`. Fixes #67264. Thanks @Quratulain-bilal. - Telegram/streaming: hide tool-progress status updates by default while keeping explicit `streaming.preview.toolProgress` opt-in support for edited preview messages. Fixes #71320. Thanks @neeravmakwana. diff --git a/extensions/elevenlabs/tts.test.ts b/extensions/elevenlabs/tts.test.ts index 65d3accd79c..11042cfd321 100644 --- a/extensions/elevenlabs/tts.test.ts +++ b/extensions/elevenlabs/tts.test.ts @@ -24,6 +24,11 @@ describe("elevenlabs tts diagnostics", () => { }; } + function getHeadersFromFirstFetchCall(fetchMock: ReturnType): Headers { + const init = fetchMock.mock.calls[0]?.[1] as RequestInit | undefined; + return new Headers(init?.headers); + } + async function expectDefaultTtsRequestToThrow(message: string | RegExp) { await expect(elevenLabsTTS(createDefaultTtsRequest())).rejects.toThrow(message); } @@ -80,4 +85,25 @@ describe("elevenlabs tts diagnostics", () => { expect(streamed.getReadCount()).toBeLessThan(200); }); + + it("keeps the MPEG Accept header for MP3 output", async () => { + const fetchMock = vi.fn(async () => new Response(Buffer.from("mp3"))); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + await elevenLabsTTS(createDefaultTtsRequest()); + + expect(getHeadersFromFirstFetchCall(fetchMock).get("accept")).toBe("audio/mpeg"); + }); + + it("omits the MPEG Accept header for PCM telephony output", async () => { + const fetchMock = vi.fn(async () => new Response(Buffer.from("pcm"))); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + await elevenLabsTTS({ + ...createDefaultTtsRequest(), + outputFormat: "pcm_22050", + }); + + expect(getHeadersFromFirstFetchCall(fetchMock).has("accept")).toBe(false); + }); }); diff --git a/extensions/elevenlabs/tts.ts b/extensions/elevenlabs/tts.ts index 42f1d12a3b0..4e81cc1ef93 100644 --- a/extensions/elevenlabs/tts.ts +++ b/extensions/elevenlabs/tts.ts @@ -24,6 +24,14 @@ function assertElevenLabsVoiceSettings(settings: { requireInRange(settings.speed, 0.5, 2, "speed"); } +function resolveElevenLabsAcceptHeader(outputFormat: string): string | undefined { + const normalized = outputFormat.trim().toLowerCase(); + if (!normalized || normalized.startsWith("mp3_")) { + return "audio/mpeg"; + } + return undefined; +} + export async function elevenLabsTTS(params: { text: string; apiKey: string; @@ -70,6 +78,7 @@ export async function elevenLabsTTS(params: { if (outputFormat) { url.searchParams.set("output_format", outputFormat); } + const acceptHeader = resolveElevenLabsAcceptHeader(outputFormat); const { response, release } = await fetchWithSsrFGuard({ url: url.toString(), @@ -78,7 +87,7 @@ export async function elevenLabsTTS(params: { headers: { "xi-api-key": apiKey, "Content-Type": "application/json", - Accept: "audio/mpeg", + ...(acceptHeader ? { Accept: acceptHeader } : {}), }, body: JSON.stringify({ text,