mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 12:14:46 +00:00
fix(providers): harden audio response schemas
This commit is contained in:
@@ -41,6 +41,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers: reject malformed successful Runway, BytePlus, and Ollama embedding responses with provider-owned errors instead of raw parser/type failures, silent bad vectors, or long bogus polling.
|
||||
- Providers/images: reject malformed successful OpenAI-compatible, OpenAI, Google, fal, and OpenRouter image responses with provider-owned errors instead of raw shape failures, silent invalid base64 skips, or empty image results.
|
||||
- Providers/videos: reject malformed successful xAI, OpenRouter, and fal video create, poll, and result responses with provider-owned errors instead of raw parser failures or long bogus polling.
|
||||
- Providers/audio: reject malformed successful OpenAI-compatible, ElevenLabs, and Deepgram speech responses with provider-owned errors instead of raw parser failures, wrong-shaped transcripts, or JSON/text bodies treated as audio.
|
||||
- Trajectory export: skip and report malformed session/runtime JSONL rows in `manifest.json` instead of letting wrong-shaped session rows crash support bundle export.
|
||||
- Voice calls: persist rejected inbound-call replay keys so duplicate carrier webhook retries stay ignored after a Gateway restart.
|
||||
- Config/doctor: copy fallback-enabled channel `allowFrom` entries into explicit `groupAllowFrom` allowlists during `openclaw doctor --fix`, preserving current group access without adding runtime fallback-transition flags.
|
||||
|
||||
@@ -3,7 +3,7 @@ import {
|
||||
createRequestCaptureJsonFetch,
|
||||
installPinnedHostnameTestHooks,
|
||||
} from "openclaw/plugin-sdk/test-env";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { transcribeDeepgramAudio } from "./audio.js";
|
||||
|
||||
installPinnedHostnameTestHooks();
|
||||
@@ -83,4 +83,64 @@ describe("transcribeDeepgramAudio", () => {
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription response missing transcript");
|
||||
});
|
||||
|
||||
it("wraps malformed successful transcription JSON with a stable provider error", async () => {
|
||||
const fetchFn = vi.fn<typeof fetch>().mockResolvedValueOnce(new Response("{ nope"));
|
||||
|
||||
await expect(
|
||||
transcribeDeepgramAudio({
|
||||
buffer: Buffer.from("audio-bytes"),
|
||||
fileName: "voice.wav",
|
||||
apiKey: "test-key",
|
||||
timeoutMs: 1234,
|
||||
fetchFn,
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects non-object successful transcription JSON with a stable provider error", async () => {
|
||||
const fetchFn = vi.fn<typeof fetch>().mockResolvedValueOnce(new Response(JSON.stringify([])));
|
||||
|
||||
await expect(
|
||||
transcribeDeepgramAudio({
|
||||
buffer: Buffer.from("audio-bytes"),
|
||||
fileName: "voice.wav",
|
||||
apiKey: "test-key",
|
||||
timeoutMs: 1234,
|
||||
fetchFn,
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects wrong nested transcript shapes with a stable provider error", async () => {
|
||||
const { fetchFn } = createRequestCaptureJsonFetch({
|
||||
results: { channels: { alternatives: [{ transcript: "hello" }] } },
|
||||
});
|
||||
|
||||
await expect(
|
||||
transcribeDeepgramAudio({
|
||||
buffer: Buffer.from("audio-bytes"),
|
||||
fileName: "voice.wav",
|
||||
apiKey: "test-key",
|
||||
timeoutMs: 1234,
|
||||
fetchFn,
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects non-string transcript values with a stable provider error", async () => {
|
||||
const { fetchFn } = createRequestCaptureJsonFetch({
|
||||
results: { channels: [{ alternatives: [{ transcript: 123 }] }] },
|
||||
});
|
||||
|
||||
await expect(
|
||||
transcribeDeepgramAudio({
|
||||
buffer: Buffer.from("audio-bytes"),
|
||||
fileName: "voice.wav",
|
||||
apiKey: "test-key",
|
||||
timeoutMs: 1234,
|
||||
fetchFn,
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ import type {
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postTranscriptionRequest,
|
||||
readProviderJsonObjectResponse,
|
||||
resolveProviderHttpRequestConfig,
|
||||
requireTranscriptionText,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
@@ -17,15 +18,36 @@ function resolveModel(model?: string): string {
|
||||
return trimmed || DEFAULT_DEEPGRAM_AUDIO_MODEL;
|
||||
}
|
||||
|
||||
type DeepgramTranscriptResponse = {
|
||||
results?: {
|
||||
channels?: Array<{
|
||||
alternatives?: Array<{
|
||||
transcript?: string;
|
||||
}>;
|
||||
}>;
|
||||
};
|
||||
};
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value)
|
||||
? (value as Record<string, unknown>)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function readDeepgramTranscript(payload: Record<string, unknown>): string | undefined {
|
||||
const results = asRecord(payload.results);
|
||||
if (!results) {
|
||||
return undefined;
|
||||
}
|
||||
if (!Array.isArray(results.channels)) {
|
||||
throw new Error("Audio transcription failed: malformed JSON response");
|
||||
}
|
||||
const channel = asRecord(results.channels[0]);
|
||||
if (!channel) {
|
||||
return undefined;
|
||||
}
|
||||
if (!Array.isArray(channel.alternatives)) {
|
||||
throw new Error("Audio transcription failed: malformed JSON response");
|
||||
}
|
||||
const alternative = asRecord(channel.alternatives[0]);
|
||||
if (!alternative) {
|
||||
return undefined;
|
||||
}
|
||||
if (alternative.transcript !== undefined && typeof alternative.transcript !== "string") {
|
||||
throw new Error("Audio transcription failed: malformed JSON response");
|
||||
}
|
||||
return alternative.transcript;
|
||||
}
|
||||
|
||||
export async function transcribeDeepgramAudio(
|
||||
params: AudioTranscriptionRequest,
|
||||
@@ -75,9 +97,9 @@ export async function transcribeDeepgramAudio(
|
||||
try {
|
||||
await assertOkOrThrowHttpError(res, "Audio transcription failed");
|
||||
|
||||
const payload = (await res.json()) as DeepgramTranscriptResponse;
|
||||
const payload = await readProviderJsonObjectResponse(res, "Audio transcription failed");
|
||||
const transcript = requireTranscriptionText(
|
||||
payload.results?.channels?.[0]?.alternatives?.[0]?.transcript,
|
||||
readDeepgramTranscript(payload),
|
||||
"Audio transcription response missing transcript",
|
||||
);
|
||||
return { text: transcript, model };
|
||||
|
||||
@@ -60,4 +60,36 @@ describe("elevenLabsMediaUnderstandingProvider", () => {
|
||||
expect(form.get("language_code")).toBe("en");
|
||||
expect(form.get("file")).toBeInstanceOf(Blob);
|
||||
});
|
||||
|
||||
it("wraps malformed successful speech-to-text JSON with a stable provider error", async () => {
|
||||
const fetchMock = vi.fn<typeof fetch>().mockResolvedValue(new Response("{ nope"));
|
||||
|
||||
await expect(
|
||||
transcribeElevenLabsAudio({
|
||||
buffer: Buffer.from("audio"),
|
||||
fileName: "voice.mp3",
|
||||
mime: "audio/mpeg",
|
||||
apiKey: "eleven-key",
|
||||
model: "scribe_v2",
|
||||
timeoutMs: 1000,
|
||||
fetchFn: fetchMock,
|
||||
}),
|
||||
).rejects.toThrow("ElevenLabs audio transcription failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects non-object successful speech-to-text JSON with a stable provider error", async () => {
|
||||
const fetchMock = vi.fn<typeof fetch>().mockResolvedValue(new Response(JSON.stringify([])));
|
||||
|
||||
await expect(
|
||||
transcribeElevenLabsAudio({
|
||||
buffer: Buffer.from("audio"),
|
||||
fileName: "voice.mp3",
|
||||
mime: "audio/mpeg",
|
||||
apiKey: "eleven-key",
|
||||
model: "scribe_v2",
|
||||
timeoutMs: 1000,
|
||||
fetchFn: fetchMock,
|
||||
}),
|
||||
).rejects.toThrow("ElevenLabs audio transcription failed: malformed JSON response");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
assertOkOrThrowHttpError,
|
||||
buildAudioTranscriptionFormData,
|
||||
postTranscriptionRequest,
|
||||
readProviderJsonObjectResponse,
|
||||
resolveProviderHttpRequestConfig,
|
||||
requireTranscriptionText,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
@@ -61,9 +62,12 @@ export async function transcribeElevenLabsAudio(
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "ElevenLabs audio transcription failed");
|
||||
const payload = (await response.json()) as { text?: string };
|
||||
const payload = await readProviderJsonObjectResponse(
|
||||
response,
|
||||
"ElevenLabs audio transcription failed",
|
||||
);
|
||||
const text = requireTranscriptionText(
|
||||
payload.text,
|
||||
typeof payload.text === "string" ? payload.text : undefined,
|
||||
"ElevenLabs audio transcription response missing text",
|
||||
);
|
||||
return { text, model };
|
||||
|
||||
@@ -112,6 +112,25 @@ describe("elevenlabs tts diagnostics", () => {
|
||||
expect(getHeadersFromFirstFetchCall(fetchMock).get("accept")).toBe("audio/mpeg");
|
||||
});
|
||||
|
||||
it("rejects JSON success bodies as malformed audio", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(JSON.stringify({ error: "not audio" }), {
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expectDefaultTtsRequestToThrow("ElevenLabs API error: malformed audio response");
|
||||
});
|
||||
|
||||
it("rejects empty successful audio bodies as malformed audio", async () => {
|
||||
const fetchMock = vi.fn(async () => new Response(new Uint8Array()));
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expectDefaultTtsRequestToThrow("ElevenLabs API error: malformed audio response");
|
||||
});
|
||||
|
||||
it("omits the MPEG Accept header for PCM telephony output", async () => {
|
||||
const fetchMock = vi.fn(async () => new Response(Buffer.from("pcm")));
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
@@ -176,4 +195,18 @@ describe("elevenlabs tts diagnostics", () => {
|
||||
expect(result.audioStream).toBeInstanceOf(ReadableStream);
|
||||
await result.release();
|
||||
});
|
||||
|
||||
it("rejects JSON success stream responses as malformed audio", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(JSON.stringify({ error: "not audio" }), {
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
);
|
||||
globalThis.fetch = fetchMock as unknown as typeof fetch;
|
||||
|
||||
await expect(elevenLabsTTSStream(createDefaultTtsRequest())).rejects.toThrow(
|
||||
"ElevenLabs API error: malformed audio response",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
|
||||
import {
|
||||
assertOkOrThrowProviderError,
|
||||
assertProviderBinaryResponseContent,
|
||||
readProviderBinaryResponse,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import {
|
||||
normalizeApplyTextNormalization,
|
||||
normalizeLanguageCode,
|
||||
@@ -143,7 +147,7 @@ export async function elevenLabsTTS(params: ElevenLabsTtsRequestParams): Promise
|
||||
try {
|
||||
await assertOkOrThrowProviderError(response, "ElevenLabs API error");
|
||||
|
||||
return Buffer.from(await response.arrayBuffer());
|
||||
return Buffer.from(await readProviderBinaryResponse(response, "ElevenLabs API error", "audio"));
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
@@ -177,6 +181,7 @@ export async function elevenLabsTTSStream(params: ElevenLabsTtsRequestParams): P
|
||||
let handedOff = false;
|
||||
try {
|
||||
await assertOkOrThrowProviderError(response, "ElevenLabs API error");
|
||||
assertProviderBinaryResponseContent(response, "ElevenLabs API error", "audio");
|
||||
if (!response.body) {
|
||||
throw new Error("ElevenLabs API response missing audio stream");
|
||||
}
|
||||
|
||||
@@ -171,3 +171,51 @@ export async function readProviderJsonResponse<T>(response: Response, label: str
|
||||
throw new Error(`${label}: malformed JSON response`, { cause });
|
||||
}
|
||||
}
|
||||
|
||||
export async function readProviderJsonObjectResponse(
|
||||
response: Response,
|
||||
label: string,
|
||||
): Promise<Record<string, unknown>> {
|
||||
const payload = await readProviderJsonResponse<unknown>(response, label);
|
||||
const object = asObject(payload);
|
||||
if (!object) {
|
||||
throw new Error(`${label}: malformed JSON response`);
|
||||
}
|
||||
return object;
|
||||
}
|
||||
|
||||
function normalizeContentType(response: Response): string | undefined {
|
||||
const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase();
|
||||
return contentType || undefined;
|
||||
}
|
||||
|
||||
export function assertProviderBinaryResponseContent(
|
||||
response: Response,
|
||||
label: string,
|
||||
kind = "binary",
|
||||
): void {
|
||||
const contentType = normalizeContentType(response);
|
||||
if (!contentType) {
|
||||
return;
|
||||
}
|
||||
if (
|
||||
contentType === "application/json" ||
|
||||
contentType.endsWith("+json") ||
|
||||
contentType.startsWith("text/")
|
||||
) {
|
||||
throw new Error(`${label}: malformed ${kind} response`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function readProviderBinaryResponse(
|
||||
response: Response,
|
||||
label: string,
|
||||
kind = "binary",
|
||||
): Promise<Uint8Array> {
|
||||
assertProviderBinaryResponseContent(response, label, kind);
|
||||
const bytes = new Uint8Array(await response.arrayBuffer());
|
||||
if (bytes.byteLength === 0) {
|
||||
throw new Error(`${label}: malformed ${kind} response`);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@@ -88,4 +88,21 @@ describe("transcribeOpenAiCompatibleAudio", () => {
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
|
||||
it("rejects non-object successful transcription JSON with a stable provider error", async () => {
|
||||
const fetchFn = vi.fn<typeof fetch>().mockResolvedValueOnce(new Response(JSON.stringify([])));
|
||||
|
||||
await expect(
|
||||
transcribeOpenAiCompatibleAudio({
|
||||
buffer: Buffer.from("audio"),
|
||||
fileName: "note.mp3",
|
||||
apiKey: "test-key",
|
||||
timeoutMs: 1000,
|
||||
fetchFn,
|
||||
provider: "openai",
|
||||
defaultBaseUrl: "https://api.openai.com/v1",
|
||||
defaultModel: "gpt-4o-transcribe",
|
||||
}),
|
||||
).rejects.toThrow("Audio transcription failed: malformed JSON response");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,7 +2,7 @@ import {
|
||||
assertOkOrThrowHttpError,
|
||||
buildAudioTranscriptionFormData,
|
||||
postTranscriptionRequest,
|
||||
readProviderJsonResponse,
|
||||
readProviderJsonObjectResponse,
|
||||
resolveProviderHttpRequestConfig,
|
||||
requireTranscriptionText,
|
||||
} from "./shared.js";
|
||||
@@ -65,12 +65,9 @@ export async function transcribeOpenAiCompatibleAudio(
|
||||
try {
|
||||
await assertOkOrThrowHttpError(res, "Audio transcription failed");
|
||||
|
||||
const payload = await readProviderJsonResponse<{ text?: string }>(
|
||||
res,
|
||||
"Audio transcription failed",
|
||||
);
|
||||
const payload = await readProviderJsonObjectResponse(res, "Audio transcription failed");
|
||||
const text = requireTranscriptionText(
|
||||
payload.text,
|
||||
typeof payload.text === "string" ? payload.text : undefined,
|
||||
"Audio transcription response missing text",
|
||||
);
|
||||
return { text, model };
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
} from "../agents/provider-http-errors.js";
|
||||
export {
|
||||
assertOkOrThrowHttpError,
|
||||
readProviderJsonObjectResponse,
|
||||
readProviderJsonResponse,
|
||||
} from "../agents/provider-http-errors.js";
|
||||
import type {
|
||||
|
||||
@@ -4,11 +4,14 @@
|
||||
export {
|
||||
assertOkOrThrowHttpError,
|
||||
assertOkOrThrowProviderError,
|
||||
assertProviderBinaryResponseContent,
|
||||
createProviderHttpError,
|
||||
extractProviderErrorDetail,
|
||||
extractProviderRequestId,
|
||||
formatProviderErrorPayload,
|
||||
formatProviderHttpErrorMessage,
|
||||
readProviderBinaryResponse,
|
||||
readProviderJsonObjectResponse,
|
||||
readProviderJsonResponse,
|
||||
readResponseTextLimited,
|
||||
truncateErrorDetail,
|
||||
|
||||
@@ -1,21 +1,37 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { createOpenAiCompatibleSpeechProvider } from "./openai-compatible-speech-provider.js";
|
||||
|
||||
const { assertOkOrThrowHttpErrorMock, postJsonRequestMock, resolveProviderHttpRequestConfigMock } =
|
||||
vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://example.test/v1",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
const {
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequestMock,
|
||||
readProviderBinaryResponseMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
readProviderBinaryResponseMock: vi.fn(async (response: Response, label: string) => {
|
||||
const contentType = response.headers.get("content-type")?.split(";")[0]?.trim().toLowerCase();
|
||||
if (contentType === "application/json" || contentType?.startsWith("text/")) {
|
||||
throw new Error(`${label}: malformed audio response`);
|
||||
}
|
||||
const bytes = new Uint8Array(await response.arrayBuffer());
|
||||
if (bytes.byteLength === 0) {
|
||||
throw new Error(`${label}: malformed audio response`);
|
||||
}
|
||||
return bytes;
|
||||
}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://example.test/v1",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
readProviderBinaryResponse: readProviderBinaryResponseMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
@@ -35,6 +51,7 @@ describe("createOpenAiCompatibleSpeechProvider", () => {
|
||||
afterEach(() => {
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
readProviderBinaryResponseMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
@@ -159,4 +176,77 @@ describe("createOpenAiCompatibleSpeechProvider", () => {
|
||||
expect(result.voiceCompatible).toBe(true);
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("rejects JSON success bodies from TTS responses as malformed audio", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(JSON.stringify({ error: "not audio" }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
release,
|
||||
});
|
||||
vi.stubEnv("DEMO_API_KEY", "sk-env");
|
||||
|
||||
const provider = createOpenAiCompatibleSpeechProvider({
|
||||
id: "demo",
|
||||
label: "Demo",
|
||||
autoSelectOrder: 40,
|
||||
models: ["demo-tts"],
|
||||
voices: ["alloy"],
|
||||
defaultModel: "demo-tts",
|
||||
defaultVoice: "alloy",
|
||||
defaultBaseUrl: "https://example.test/v1",
|
||||
envKey: "DEMO_API_KEY",
|
||||
responseFormats: ["mp3"],
|
||||
defaultResponseFormat: "mp3",
|
||||
voiceCompatibleResponseFormats: ["mp3"],
|
||||
});
|
||||
|
||||
await expect(
|
||||
provider.synthesize({
|
||||
text: "hello",
|
||||
cfg: {} as never,
|
||||
providerConfig: {},
|
||||
target: "voice-note",
|
||||
timeoutMs: 1234,
|
||||
}),
|
||||
).rejects.toThrow("Demo TTS API error: malformed audio response");
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("rejects empty successful TTS bodies as malformed audio", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: new Response(new Uint8Array(), { status: 200 }),
|
||||
release,
|
||||
});
|
||||
vi.stubEnv("DEMO_API_KEY", "sk-env");
|
||||
|
||||
const provider = createOpenAiCompatibleSpeechProvider({
|
||||
id: "demo",
|
||||
label: "Demo",
|
||||
autoSelectOrder: 40,
|
||||
models: ["demo-tts"],
|
||||
voices: ["alloy"],
|
||||
defaultModel: "demo-tts",
|
||||
defaultVoice: "alloy",
|
||||
defaultBaseUrl: "https://example.test/v1",
|
||||
envKey: "DEMO_API_KEY",
|
||||
responseFormats: ["mp3"],
|
||||
defaultResponseFormat: "mp3",
|
||||
voiceCompatibleResponseFormats: ["mp3"],
|
||||
});
|
||||
|
||||
await expect(
|
||||
provider.synthesize({
|
||||
text: "hello",
|
||||
cfg: {} as never,
|
||||
providerConfig: {},
|
||||
target: "voice-note",
|
||||
timeoutMs: 1234,
|
||||
}),
|
||||
).rejects.toThrow("Demo TTS API error: malformed audio response");
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
readProviderBinaryResponse,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
|
||||
@@ -382,7 +383,13 @@ export function createOpenAiCompatibleSpeechProvider<
|
||||
options.apiErrorLabel ?? `${options.label} TTS API error`,
|
||||
);
|
||||
return {
|
||||
audioBuffer: Buffer.from(await response.arrayBuffer()),
|
||||
audioBuffer: Buffer.from(
|
||||
await readProviderBinaryResponse(
|
||||
response,
|
||||
options.apiErrorLabel ?? `${options.label} TTS API error`,
|
||||
"audio",
|
||||
),
|
||||
),
|
||||
outputFormat: responseFormat,
|
||||
fileExtension: responseFormatToFileExtension(responseFormat),
|
||||
voiceCompatible: options.voiceCompatibleResponseFormats.includes(responseFormat),
|
||||
|
||||
Reference in New Issue
Block a user