import { asObject, normalizeApplyTextNormalization, normalizeLanguageCode, normalizeSeed, readResponseTextLimited, requireInRange, trimToUndefined, truncateErrorDetail, } from "openclaw/plugin-sdk/speech"; import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js"; function formatElevenLabsErrorPayload(payload: unknown): string | undefined { const root = asObject(payload); if (!root) { return undefined; } const detailObject = asObject(root.detail); const message = trimToUndefined(root.message) ?? trimToUndefined(detailObject?.message) ?? trimToUndefined(detailObject?.detail) ?? trimToUndefined(root.error); const code = trimToUndefined(root.code) ?? trimToUndefined(detailObject?.code) ?? trimToUndefined(detailObject?.status); if (message && code) { return `${truncateErrorDetail(message)} [code=${code}]`; } if (message) { return truncateErrorDetail(message); } if (code) { return `[code=${code}]`; } return undefined; } async function extractElevenLabsErrorDetail(response: Response): Promise { const rawBody = trimToUndefined(await readResponseTextLimited(response)); if (!rawBody) { return undefined; } try { return formatElevenLabsErrorPayload(JSON.parse(rawBody)) ?? truncateErrorDetail(rawBody); } catch { return truncateErrorDetail(rawBody); } } function assertElevenLabsVoiceSettings(settings: { stability: number; similarityBoost: number; style: number; useSpeakerBoost: boolean; speed: number; }) { requireInRange(settings.stability, 0, 1, "stability"); requireInRange(settings.similarityBoost, 0, 1, "similarityBoost"); requireInRange(settings.style, 0, 1, "style"); requireInRange(settings.speed, 0.5, 2, "speed"); } export async function elevenLabsTTS(params: { text: string; apiKey: string; baseUrl: string; voiceId: string; modelId: string; outputFormat: string; seed?: number; applyTextNormalization?: "auto" | "on" | "off"; languageCode?: string; latencyTier?: number; voiceSettings: { stability: number; similarityBoost: number; style: number; useSpeakerBoost: boolean; speed: number; }; timeoutMs: number; }): Promise { const { text, apiKey, baseUrl, voiceId, modelId, outputFormat, seed, applyTextNormalization, languageCode, latencyTier, voiceSettings, timeoutMs, } = params; if (!isValidElevenLabsVoiceId(voiceId)) { throw new Error("Invalid voiceId format"); } assertElevenLabsVoiceSettings(voiceSettings); const normalizedLanguage = normalizeLanguageCode(languageCode); const normalizedNormalization = normalizeApplyTextNormalization(applyTextNormalization); const normalizedSeed = normalizeSeed(seed); const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); try { const url = new URL(`${normalizeElevenLabsBaseUrl(baseUrl)}/v1/text-to-speech/${voiceId}`); if (outputFormat) { url.searchParams.set("output_format", outputFormat); } const response = await fetch(url.toString(), { method: "POST", headers: { "xi-api-key": apiKey, "Content-Type": "application/json", Accept: "audio/mpeg", }, body: JSON.stringify({ text, model_id: modelId, seed: normalizedSeed, apply_text_normalization: normalizedNormalization, language_code: normalizedLanguage, latency_optimization_level: latencyTier, voice_settings: { stability: voiceSettings.stability, similarity_boost: voiceSettings.similarityBoost, style: voiceSettings.style, use_speaker_boost: voiceSettings.useSpeakerBoost, speed: voiceSettings.speed, }, }), signal: controller.signal, }); if (!response.ok) { const detail = await extractElevenLabsErrorDetail(response); const requestId = trimToUndefined(response.headers.get("x-request-id")) ?? trimToUndefined(response.headers.get("request-id")); throw new Error( `ElevenLabs API error (${response.status})` + (detail ? `: ${detail}` : "") + (requestId ? ` [request_id=${requestId}]` : ""), ); } return Buffer.from(await response.arrayBuffer()); } finally { clearTimeout(timeout); } }