From 202dd7590d1cbbb2ecf762bab31a7707f6da7900 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 16 May 2026 11:03:36 +0800 Subject: [PATCH] fix(providers): harden audio response schemas --- CHANGELOG.md | 1 + extensions/deepgram/audio.test.ts | 62 +++++++++- extensions/deepgram/audio.ts | 44 +++++-- .../media-understanding-provider.test.ts | 32 +++++ .../media-understanding-provider.ts | 8 +- extensions/elevenlabs/tts.test.ts | 33 ++++++ extensions/elevenlabs/tts.ts | 9 +- src/agents/provider-http-errors.ts | 48 ++++++++ .../openai-compatible-audio.test.ts | 17 +++ .../openai-compatible-audio.ts | 9 +- src/media-understanding/shared.ts | 1 + src/plugin-sdk/provider-http.ts | 3 + .../openai-compatible-speech-provider.test.ts | 112 ++++++++++++++++-- src/tts/openai-compatible-speech-provider.ts | 9 +- 14 files changed, 354 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 365a314876c..8e3b143a126 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai - Providers: reject malformed successful Runway, BytePlus, and Ollama embedding responses with provider-owned errors instead of raw parser/type failures, silent bad vectors, or long bogus polling. - Providers/images: reject malformed successful OpenAI-compatible, OpenAI, Google, fal, and OpenRouter image responses with provider-owned errors instead of raw shape failures, silent invalid base64 skips, or empty image results. - Providers/videos: reject malformed successful xAI, OpenRouter, and fal video create, poll, and result responses with provider-owned errors instead of raw parser failures or long bogus polling. +- Providers/audio: reject malformed successful OpenAI-compatible, ElevenLabs, and Deepgram speech responses with provider-owned errors instead of raw parser failures, wrong-shaped transcripts, or JSON/text bodies treated as audio. - Trajectory export: skip and report malformed session/runtime JSONL rows in `manifest.json` instead of letting wrong-shaped session rows crash support bundle export. - Voice calls: persist rejected inbound-call replay keys so duplicate carrier webhook retries stay ignored after a Gateway restart. - Config/doctor: copy fallback-enabled channel `allowFrom` entries into explicit `groupAllowFrom` allowlists during `openclaw doctor --fix`, preserving current group access without adding runtime fallback-transition flags. diff --git a/extensions/deepgram/audio.test.ts b/extensions/deepgram/audio.test.ts index 92f424d0cee..60ab395f776 100644 --- a/extensions/deepgram/audio.test.ts +++ b/extensions/deepgram/audio.test.ts @@ -3,7 +3,7 @@ import { createRequestCaptureJsonFetch, installPinnedHostnameTestHooks, } from "openclaw/plugin-sdk/test-env"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { transcribeDeepgramAudio } from "./audio.js"; installPinnedHostnameTestHooks(); @@ -83,4 +83,64 @@ describe("transcribeDeepgramAudio", () => { }), ).rejects.toThrow("Audio transcription response missing transcript"); }); + + it("wraps malformed successful transcription JSON with a stable provider error", async () => { + const fetchFn = vi.fn().mockResolvedValueOnce(new Response("{ nope")); + + await expect( + transcribeDeepgramAudio({ + buffer: Buffer.from("audio-bytes"), + fileName: "voice.wav", + apiKey: "test-key", + timeoutMs: 1234, + fetchFn, + }), + ).rejects.toThrow("Audio transcription failed: malformed JSON response"); + }); + + it("rejects non-object successful transcription JSON with a stable provider error", async () => { + const fetchFn = vi.fn().mockResolvedValueOnce(new Response(JSON.stringify([]))); + + await expect( + transcribeDeepgramAudio({ + buffer: Buffer.from("audio-bytes"), + fileName: "voice.wav", + apiKey: "test-key", + timeoutMs: 1234, + fetchFn, + }), + ).rejects.toThrow("Audio transcription failed: malformed JSON response"); + }); + + it("rejects wrong nested transcript shapes with a stable provider error", async () => { + const { fetchFn } = createRequestCaptureJsonFetch({ + results: { channels: { alternatives: [{ transcript: "hello" }] } }, + }); + + await expect( + transcribeDeepgramAudio({ + buffer: Buffer.from("audio-bytes"), + fileName: "voice.wav", + apiKey: "test-key", + timeoutMs: 1234, + fetchFn, + }), + ).rejects.toThrow("Audio transcription failed: malformed JSON response"); + }); + + it("rejects non-string transcript values with a stable provider error", async () => { + const { fetchFn } = createRequestCaptureJsonFetch({ + results: { channels: [{ alternatives: [{ transcript: 123 }] }] }, + }); + + await expect( + transcribeDeepgramAudio({ + buffer: Buffer.from("audio-bytes"), + fileName: "voice.wav", + apiKey: "test-key", + timeoutMs: 1234, + fetchFn, + }), + ).rejects.toThrow("Audio transcription failed: malformed JSON response"); + }); }); diff --git a/extensions/deepgram/audio.ts b/extensions/deepgram/audio.ts index 4c9a0a45516..bc797fb5e7f 100644 --- a/extensions/deepgram/audio.ts +++ b/extensions/deepgram/audio.ts @@ -5,6 +5,7 @@ import type { import { assertOkOrThrowHttpError, postTranscriptionRequest, + readProviderJsonObjectResponse, resolveProviderHttpRequestConfig, requireTranscriptionText, } from "openclaw/plugin-sdk/provider-http"; @@ -17,15 +18,36 @@ function resolveModel(model?: string): string { return trimmed || DEFAULT_DEEPGRAM_AUDIO_MODEL; } -type DeepgramTranscriptResponse = { - results?: { - channels?: Array<{ - alternatives?: Array<{ - transcript?: string; - }>; - }>; - }; -}; +function asRecord(value: unknown): Record | undefined { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function readDeepgramTranscript(payload: Record): string | undefined { + const results = asRecord(payload.results); + if (!results) { + return undefined; + } + if (!Array.isArray(results.channels)) { + throw new Error("Audio transcription failed: malformed JSON response"); + } + const channel = asRecord(results.channels[0]); + if (!channel) { + return undefined; + } + if (!Array.isArray(channel.alternatives)) { + throw new Error("Audio transcription failed: malformed JSON response"); + } + const alternative = asRecord(channel.alternatives[0]); + if (!alternative) { + return undefined; + } + if (alternative.transcript !== undefined && typeof alternative.transcript !== "string") { + throw new Error("Audio transcription failed: malformed JSON response"); + } + return alternative.transcript; +} export async function transcribeDeepgramAudio( params: AudioTranscriptionRequest, @@ -75,9 +97,9 @@ export async function transcribeDeepgramAudio( try { await assertOkOrThrowHttpError(res, "Audio transcription failed"); - const payload = (await res.json()) as DeepgramTranscriptResponse; + const payload = await readProviderJsonObjectResponse(res, "Audio transcription failed"); const transcript = requireTranscriptionText( - payload.results?.channels?.[0]?.alternatives?.[0]?.transcript, + readDeepgramTranscript(payload), "Audio transcription response missing transcript", ); return { text: transcript, model }; diff --git a/extensions/elevenlabs/media-understanding-provider.test.ts b/extensions/elevenlabs/media-understanding-provider.test.ts index b173bdc6b3c..24e449b5d1d 100644 --- a/extensions/elevenlabs/media-understanding-provider.test.ts +++ b/extensions/elevenlabs/media-understanding-provider.test.ts @@ -60,4 +60,36 @@ describe("elevenLabsMediaUnderstandingProvider", () => { expect(form.get("language_code")).toBe("en"); expect(form.get("file")).toBeInstanceOf(Blob); }); + + it("wraps malformed successful speech-to-text JSON with a stable provider error", async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response("{ nope")); + + await expect( + transcribeElevenLabsAudio({ + buffer: Buffer.from("audio"), + fileName: "voice.mp3", + mime: "audio/mpeg", + apiKey: "eleven-key", + model: "scribe_v2", + timeoutMs: 1000, + fetchFn: fetchMock, + }), + ).rejects.toThrow("ElevenLabs audio transcription failed: malformed JSON response"); + }); + + it("rejects non-object successful speech-to-text JSON with a stable provider error", async () => { + const fetchMock = vi.fn().mockResolvedValue(new Response(JSON.stringify([]))); + + await expect( + transcribeElevenLabsAudio({ + buffer: Buffer.from("audio"), + fileName: "voice.mp3", + mime: "audio/mpeg", + apiKey: "eleven-key", + model: "scribe_v2", + timeoutMs: 1000, + fetchFn: fetchMock, + }), + ).rejects.toThrow("ElevenLabs audio transcription failed: malformed JSON response"); + }); }); diff --git a/extensions/elevenlabs/media-understanding-provider.ts b/extensions/elevenlabs/media-understanding-provider.ts index 2c415b41597..d16692646cf 100644 --- a/extensions/elevenlabs/media-understanding-provider.ts +++ b/extensions/elevenlabs/media-understanding-provider.ts @@ -7,6 +7,7 @@ import { assertOkOrThrowHttpError, buildAudioTranscriptionFormData, postTranscriptionRequest, + readProviderJsonObjectResponse, resolveProviderHttpRequestConfig, requireTranscriptionText, } from "openclaw/plugin-sdk/provider-http"; @@ -61,9 +62,12 @@ export async function transcribeElevenLabsAudio( try { await assertOkOrThrowHttpError(response, "ElevenLabs audio transcription failed"); - const payload = (await response.json()) as { text?: string }; + const payload = await readProviderJsonObjectResponse( + response, + "ElevenLabs audio transcription failed", + ); const text = requireTranscriptionText( - payload.text, + typeof payload.text === "string" ? payload.text : undefined, "ElevenLabs audio transcription response missing text", ); return { text, model }; diff --git a/extensions/elevenlabs/tts.test.ts b/extensions/elevenlabs/tts.test.ts index ba00ad27011..0c090693093 100644 --- a/extensions/elevenlabs/tts.test.ts +++ b/extensions/elevenlabs/tts.test.ts @@ -112,6 +112,25 @@ describe("elevenlabs tts diagnostics", () => { expect(getHeadersFromFirstFetchCall(fetchMock).get("accept")).toBe("audio/mpeg"); }); + it("rejects JSON success bodies as malformed audio", async () => { + const fetchMock = vi.fn( + async () => + new Response(JSON.stringify({ error: "not audio" }), { + headers: { "content-type": "application/json" }, + }), + ); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + await expectDefaultTtsRequestToThrow("ElevenLabs API error: malformed audio response"); + }); + + it("rejects empty successful audio bodies as malformed audio", async () => { + const fetchMock = vi.fn(async () => new Response(new Uint8Array())); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + await expectDefaultTtsRequestToThrow("ElevenLabs API error: malformed audio response"); + }); + it("omits the MPEG Accept header for PCM telephony output", async () => { const fetchMock = vi.fn(async () => new Response(Buffer.from("pcm"))); globalThis.fetch = fetchMock as unknown as typeof fetch; @@ -176,4 +195,18 @@ describe("elevenlabs tts diagnostics", () => { expect(result.audioStream).toBeInstanceOf(ReadableStream); await result.release(); }); + + it("rejects JSON success stream responses as malformed audio", async () => { + const fetchMock = vi.fn( + async () => + new Response(JSON.stringify({ error: "not audio" }), { + headers: { "content-type": "application/json" }, + }), + ); + globalThis.fetch = fetchMock as unknown as typeof fetch; + + await expect(elevenLabsTTSStream(createDefaultTtsRequest())).rejects.toThrow( + "ElevenLabs API error: malformed audio response", + ); + }); }); diff --git a/extensions/elevenlabs/tts.ts b/extensions/elevenlabs/tts.ts index ea20cea088f..01ab294b6af 100644 --- a/extensions/elevenlabs/tts.ts +++ b/extensions/elevenlabs/tts.ts @@ -1,4 +1,8 @@ -import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; +import { + assertOkOrThrowProviderError, + assertProviderBinaryResponseContent, + readProviderBinaryResponse, +} from "openclaw/plugin-sdk/provider-http"; import { normalizeApplyTextNormalization, normalizeLanguageCode, @@ -143,7 +147,7 @@ export async function elevenLabsTTS(params: ElevenLabsTtsRequestParams): Promise try { await assertOkOrThrowProviderError(response, "ElevenLabs API error"); - return Buffer.from(await response.arrayBuffer()); + return Buffer.from(await readProviderBinaryResponse(response, "ElevenLabs API error", "audio")); } finally { await release(); } @@ -177,6 +181,7 @@ export async function elevenLabsTTSStream(params: ElevenLabsTtsRequestParams): P let handedOff = false; try { await assertOkOrThrowProviderError(response, "ElevenLabs API error"); + assertProviderBinaryResponseContent(response, "ElevenLabs API error", "audio"); if (!response.body) { throw new Error("ElevenLabs API response missing audio stream"); } diff --git a/src/agents/provider-http-errors.ts b/src/agents/provider-http-errors.ts index 0c00ec8aa05..ef35b2eba7a 100644 --- a/src/agents/provider-http-errors.ts +++ b/src/agents/provider-http-errors.ts @@ -171,3 +171,51 @@ export async function readProviderJsonResponse(response: Response, label: str throw new Error(`${label}: malformed JSON response`, { cause }); } } + +export async function readProviderJsonObjectResponse( + response: Response, + label: string, +): Promise> { + const payload = await readProviderJsonResponse(response, label); + const object = asObject(payload); + if (!object) { + throw new Error(`${label}: malformed JSON response`); + } + return object; +} + +function normalizeContentType(response: Response): string | undefined { + const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase(); + return contentType || undefined; +} + +export function assertProviderBinaryResponseContent( + response: Response, + label: string, + kind = "binary", +): void { + const contentType = normalizeContentType(response); + if (!contentType) { + return; + } + if ( + contentType === "application/json" || + contentType.endsWith("+json") || + contentType.startsWith("text/") + ) { + throw new Error(`${label}: malformed ${kind} response`); + } +} + +export async function readProviderBinaryResponse( + response: Response, + label: string, + kind = "binary", +): Promise { + assertProviderBinaryResponseContent(response, label, kind); + const bytes = new Uint8Array(await response.arrayBuffer()); + if (bytes.byteLength === 0) { + throw new Error(`${label}: malformed ${kind} response`); + } + return bytes; +} diff --git a/src/media-understanding/openai-compatible-audio.test.ts b/src/media-understanding/openai-compatible-audio.test.ts index 4a3e9e2ff76..2029b1ec288 100644 --- a/src/media-understanding/openai-compatible-audio.test.ts +++ b/src/media-understanding/openai-compatible-audio.test.ts @@ -88,4 +88,21 @@ describe("transcribeOpenAiCompatibleAudio", () => { }), ).rejects.toThrow("Audio transcription failed: malformed JSON response"); }); + + it("rejects non-object successful transcription JSON with a stable provider error", async () => { + const fetchFn = vi.fn().mockResolvedValueOnce(new Response(JSON.stringify([]))); + + await expect( + transcribeOpenAiCompatibleAudio({ + buffer: Buffer.from("audio"), + fileName: "note.mp3", + apiKey: "test-key", + timeoutMs: 1000, + fetchFn, + provider: "openai", + defaultBaseUrl: "https://api.openai.com/v1", + defaultModel: "gpt-4o-transcribe", + }), + ).rejects.toThrow("Audio transcription failed: malformed JSON response"); + }); }); diff --git a/src/media-understanding/openai-compatible-audio.ts b/src/media-understanding/openai-compatible-audio.ts index 6faebda7700..4cb93005f55 100644 --- a/src/media-understanding/openai-compatible-audio.ts +++ b/src/media-understanding/openai-compatible-audio.ts @@ -2,7 +2,7 @@ import { assertOkOrThrowHttpError, buildAudioTranscriptionFormData, postTranscriptionRequest, - readProviderJsonResponse, + readProviderJsonObjectResponse, resolveProviderHttpRequestConfig, requireTranscriptionText, } from "./shared.js"; @@ -65,12 +65,9 @@ export async function transcribeOpenAiCompatibleAudio( try { await assertOkOrThrowHttpError(res, "Audio transcription failed"); - const payload = await readProviderJsonResponse<{ text?: string }>( - res, - "Audio transcription failed", - ); + const payload = await readProviderJsonObjectResponse(res, "Audio transcription failed"); const text = requireTranscriptionText( - payload.text, + typeof payload.text === "string" ? payload.text : undefined, "Audio transcription response missing text", ); return { text, model }; diff --git a/src/media-understanding/shared.ts b/src/media-understanding/shared.ts index a6e2b218771..ddab6e35752 100644 --- a/src/media-understanding/shared.ts +++ b/src/media-understanding/shared.ts @@ -6,6 +6,7 @@ import { } from "../agents/provider-http-errors.js"; export { assertOkOrThrowHttpError, + readProviderJsonObjectResponse, readProviderJsonResponse, } from "../agents/provider-http-errors.js"; import type { diff --git a/src/plugin-sdk/provider-http.ts b/src/plugin-sdk/provider-http.ts index 67abef98ea9..4ae5a6423fd 100644 --- a/src/plugin-sdk/provider-http.ts +++ b/src/plugin-sdk/provider-http.ts @@ -4,11 +4,14 @@ export { assertOkOrThrowHttpError, assertOkOrThrowProviderError, + assertProviderBinaryResponseContent, createProviderHttpError, extractProviderErrorDetail, extractProviderRequestId, formatProviderErrorPayload, formatProviderHttpErrorMessage, + readProviderBinaryResponse, + readProviderJsonObjectResponse, readProviderJsonResponse, readResponseTextLimited, truncateErrorDetail, diff --git a/src/tts/openai-compatible-speech-provider.test.ts b/src/tts/openai-compatible-speech-provider.test.ts index bc354ccc9f0..c76caf8700e 100644 --- a/src/tts/openai-compatible-speech-provider.test.ts +++ b/src/tts/openai-compatible-speech-provider.test.ts @@ -1,21 +1,37 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import { createOpenAiCompatibleSpeechProvider } from "./openai-compatible-speech-provider.js"; -const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } = - vi.hoisted(() => ({ - assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), - postJsonRequestMock: vi.fn(), - resolveProviderHttpRequestConfigMock: vi.fn((params: Record) => ({ - baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://example.test/v1", - allowPrivateNetwork: false, - headers: new Headers(params.defaultHeaders as HeadersInit | undefined), - dispatcherPolicy: undefined, - })), - })); +const { + assertOkOrThrowHttpErrorMock, + postJsonRequestMock, + readProviderBinaryResponseMock, + resolveProviderHttpRequestConfigMock, +} = vi.hoisted(() => ({ + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + postJsonRequestMock: vi.fn(), + readProviderBinaryResponseMock: vi.fn(async (response: Response, label: string) => { + const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase(); + if (contentType === "application/json" || contentType?.startsWith("text/")) { + throw new Error(`${label}: malformed audio response`); + } + const bytes = new Uint8Array(await response.arrayBuffer()); + if (bytes.byteLength === 0) { + throw new Error(`${label}: malformed audio response`); + } + return bytes; + }), + resolveProviderHttpRequestConfigMock: vi.fn((params: Record) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://example.test/v1", + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders as HeadersInit | undefined), + dispatcherPolicy: undefined, + })), +})); vi.mock("openclaw/plugin-sdk/provider-http", () => ({ assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, postJsonRequest: postJsonRequestMock, + readProviderBinaryResponse: readProviderBinaryResponseMock, resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, })); @@ -35,6 +51,7 @@ describe("createOpenAiCompatibleSpeechProvider", () => { afterEach(() => { assertOkOrThrowHttpErrorMock.mockClear(); postJsonRequestMock.mockReset(); + readProviderBinaryResponseMock.mockClear(); resolveProviderHttpRequestConfigMock.mockClear(); vi.unstubAllEnvs(); }); @@ -159,4 +176,77 @@ describe("createOpenAiCompatibleSpeechProvider", () => { expect(result.voiceCompatible).toBe(true); expect(release).toHaveBeenCalledOnce(); }); + + it("rejects JSON success bodies from TTS responses as malformed audio", async () => { + const release = vi.fn(async () => {}); + postJsonRequestMock.mockResolvedValue({ + response: new Response(JSON.stringify({ error: "not audio" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release, + }); + vi.stubEnv("DEMO_API_KEY", "sk-env"); + + const provider = createOpenAiCompatibleSpeechProvider({ + id: "demo", + label: "Demo", + autoSelectOrder: 40, + models: ["demo-tts"], + voices: ["alloy"], + defaultModel: "demo-tts", + defaultVoice: "alloy", + defaultBaseUrl: "https://example.test/v1", + envKey: "DEMO_API_KEY", + responseFormats: ["mp3"], + defaultResponseFormat: "mp3", + voiceCompatibleResponseFormats: ["mp3"], + }); + + await expect( + provider.synthesize({ + text: "hello", + cfg: {} as never, + providerConfig: {}, + target: "voice-note", + timeoutMs: 1234, + }), + ).rejects.toThrow("Demo TTS API error: malformed audio response"); + expect(release).toHaveBeenCalledOnce(); + }); + + it("rejects empty successful TTS bodies as malformed audio", async () => { + const release = vi.fn(async () => {}); + postJsonRequestMock.mockResolvedValue({ + response: new Response(new Uint8Array(), { status: 200 }), + release, + }); + vi.stubEnv("DEMO_API_KEY", "sk-env"); + + const provider = createOpenAiCompatibleSpeechProvider({ + id: "demo", + label: "Demo", + autoSelectOrder: 40, + models: ["demo-tts"], + voices: ["alloy"], + defaultModel: "demo-tts", + defaultVoice: "alloy", + defaultBaseUrl: "https://example.test/v1", + envKey: "DEMO_API_KEY", + responseFormats: ["mp3"], + defaultResponseFormat: "mp3", + voiceCompatibleResponseFormats: ["mp3"], + }); + + await expect( + provider.synthesize({ + text: "hello", + cfg: {} as never, + providerConfig: {}, + target: "voice-note", + timeoutMs: 1234, + }), + ).rejects.toThrow("Demo TTS API error: malformed audio response"); + expect(release).toHaveBeenCalledOnce(); + }); }); diff --git a/src/tts/openai-compatible-speech-provider.ts b/src/tts/openai-compatible-speech-provider.ts index af2d5a707f0..e7d65546135 100644 --- a/src/tts/openai-compatible-speech-provider.ts +++ b/src/tts/openai-compatible-speech-provider.ts @@ -1,6 +1,7 @@ import { assertOkOrThrowHttpError, postJsonRequest, + readProviderBinaryResponse, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input"; @@ -382,7 +383,13 @@ export function createOpenAiCompatibleSpeechProvider< options.apiErrorLabel ?? `${options.label} TTS API error`, ); return { - audioBuffer: Buffer.from(await response.arrayBuffer()), + audioBuffer: Buffer.from( + await readProviderBinaryResponse( + response, + options.apiErrorLabel ?? `${options.label} TTS API error`, + "audio", + ), + ), outputFormat: responseFormat, fileExtension: responseFormatToFileExtension(responseFormat), voiceCompatible: options.voiceCompatibleResponseFormats.includes(responseFormat),