refactor(tts): remove legacy core speech builders

This commit is contained in:
Vincent Koc
2026-03-22 17:47:04 -07:00
parent a8b9763d66
commit 1d08ad4bac
3 changed files with 0 additions and 311 deletions

View File

@@ -1,127 +0,0 @@
import type { SpeechProviderPlugin } from "../../plugins/types.js";
import type { SpeechVoiceOption } from "../provider-types.js";
import { elevenLabsTTS } from "../tts-core.js";
const ELEVENLABS_TTS_MODELS = [
"eleven_multilingual_v2",
"eleven_turbo_v2_5",
"eleven_monolingual_v1",
] as const;
function normalizeElevenLabsBaseUrl(baseUrl: string | undefined): string {
const trimmed = baseUrl?.trim();
return trimmed?.replace(/\/+$/, "") || "https://api.elevenlabs.io";
}
export async function listElevenLabsVoices(params: {
apiKey: string;
baseUrl?: string;
}): Promise<SpeechVoiceOption[]> {
const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, {
headers: {
"xi-api-key": params.apiKey,
},
});
if (!res.ok) {
throw new Error(`ElevenLabs voices API error (${res.status})`);
}
const json = (await res.json()) as {
voices?: Array<{
voice_id?: string;
name?: string;
category?: string;
description?: string;
}>;
};
return Array.isArray(json.voices)
? json.voices
.map((voice) => ({
id: voice.voice_id?.trim() ?? "",
name: voice.name?.trim() || undefined,
category: voice.category?.trim() || undefined,
description: voice.description?.trim() || undefined,
}))
.filter((voice) => voice.id.length > 0)
: [];
}
export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
return {
id: "elevenlabs",
label: "ElevenLabs",
models: ELEVENLABS_TTS_MODELS,
listVoices: async (req) => {
const apiKey =
req.apiKey ||
req.config?.elevenlabs.apiKey ||
process.env.ELEVENLABS_API_KEY ||
process.env.XI_API_KEY;
if (!apiKey) {
throw new Error("ElevenLabs API key missing");
}
return listElevenLabsVoices({
apiKey,
baseUrl: req.baseUrl ?? req.config?.elevenlabs.baseUrl,
});
},
isConfigured: ({ config }) =>
Boolean(config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY),
synthesize: async (req) => {
const apiKey =
req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY;
if (!apiKey) {
throw new Error("ElevenLabs API key missing");
}
const outputFormat =
req.overrides?.elevenlabs?.outputFormat ??
(req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
const audioBuffer = await elevenLabsTTS({
text: req.text,
apiKey,
baseUrl: req.config.elevenlabs.baseUrl,
voiceId: req.overrides?.elevenlabs?.voiceId ?? req.config.elevenlabs.voiceId,
modelId: req.overrides?.elevenlabs?.modelId ?? req.config.elevenlabs.modelId,
outputFormat,
seed: req.overrides?.elevenlabs?.seed ?? req.config.elevenlabs.seed,
applyTextNormalization:
req.overrides?.elevenlabs?.applyTextNormalization ??
req.config.elevenlabs.applyTextNormalization,
languageCode: req.overrides?.elevenlabs?.languageCode ?? req.config.elevenlabs.languageCode,
voiceSettings: {
...req.config.elevenlabs.voiceSettings,
...req.overrides?.elevenlabs?.voiceSettings,
},
timeoutMs: req.config.timeoutMs,
});
return {
audioBuffer,
outputFormat,
fileExtension: req.target === "voice-note" ? ".opus" : ".mp3",
voiceCompatible: req.target === "voice-note",
};
},
synthesizeTelephony: async (req) => {
const apiKey =
req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY;
if (!apiKey) {
throw new Error("ElevenLabs API key missing");
}
const outputFormat = "pcm_22050";
const sampleRate = 22_050;
const audioBuffer = await elevenLabsTTS({
text: req.text,
apiKey,
baseUrl: req.config.elevenlabs.baseUrl,
voiceId: req.config.elevenlabs.voiceId,
modelId: req.config.elevenlabs.modelId,
outputFormat,
seed: req.config.elevenlabs.seed,
applyTextNormalization: req.config.elevenlabs.applyTextNormalization,
languageCode: req.config.elevenlabs.languageCode,
voiceSettings: req.config.elevenlabs.voiceSettings,
timeoutMs: req.config.timeoutMs,
});
return { audioBuffer, outputFormat, sampleRate };
},
};
}

View File

@@ -1,127 +0,0 @@
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
import path from "node:path";
import {
CHROMIUM_FULL_VERSION,
TRUSTED_CLIENT_TOKEN,
generateSecMsGecToken,
} from "node-edge-tts/dist/drm.js";
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
import { isVoiceCompatibleAudio } from "../../media/audio.js";
import type { SpeechProviderPlugin } from "../../plugins/types.js";
import type { SpeechVoiceOption } from "../provider-types.js";
import { edgeTTS, inferEdgeExtension } from "../tts-core.js";
const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3";
type MicrosoftVoiceListEntry = {
ShortName?: string;
FriendlyName?: string;
Locale?: string;
Gender?: string;
VoiceTag?: {
ContentCategories?: string[];
VoicePersonalities?: string[];
};
};
function buildMicrosoftVoiceHeaders(): Record<string, string> {
const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0";
return {
Authority: "speech.platform.bing.com",
Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
Accept: "*/*",
"User-Agent":
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` +
`(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`,
"Sec-MS-GEC": generateSecMsGecToken(),
"Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`,
};
}
function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined {
const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? [];
return personalities.length > 0 ? personalities.join(", ") : undefined;
}
export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
const response = await fetch(
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`,
{
headers: buildMicrosoftVoiceHeaders(),
},
);
if (!response.ok) {
throw new Error(`Microsoft voices API error (${response.status})`);
}
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined,
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
locale: voice.Locale?.trim() || undefined,
gender: voice.Gender?.trim() || undefined,
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
(value): value is string => value.trim().length > 0,
),
}))
.filter((voice) => voice.id.length > 0)
: [];
}
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
return {
id: "microsoft",
label: "Microsoft",
aliases: ["edge"],
listVoices: async () => await listMicrosoftVoices(),
isConfigured: ({ config }) => config.edge.enabled,
synthesize: async (req) => {
const tempRoot = resolvePreferredOpenClawTmpDir();
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
const fallbackOutputFormat =
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
try {
const runEdge = async (format: string) => {
const fileExtension = inferEdgeExtension(format);
const outputPath = path.join(tempDir, `speech${fileExtension}`);
await edgeTTS({
text: req.text,
outputPath,
config: {
...req.config.edge,
voice: req.overrides?.microsoft?.voice ?? req.config.edge.voice,
outputFormat: format,
},
timeoutMs: req.config.timeoutMs,
});
const audioBuffer = readFileSync(outputPath);
return {
audioBuffer,
outputFormat: format,
fileExtension,
voiceCompatible: isVoiceCompatibleAudio({ fileName: outputPath }),
};
};
try {
return await runEdge(outputFormat);
} catch (err) {
if (!fallbackOutputFormat || fallbackOutputFormat === outputFormat) {
throw err;
}
outputFormat = fallbackOutputFormat;
return await runEdge(outputFormat);
}
} finally {
rmSync(tempDir, { recursive: true, force: true });
}
},
};
}

View File

@@ -1,57 +0,0 @@
import type { SpeechProviderPlugin } from "../../plugins/types.js";
import { OPENAI_TTS_MODELS, OPENAI_TTS_VOICES, openaiTTS } from "../tts-core.js";
export function buildOpenAISpeechProvider(): SpeechProviderPlugin {
return {
id: "openai",
label: "OpenAI",
models: OPENAI_TTS_MODELS,
voices: OPENAI_TTS_VOICES,
listVoices: async () => OPENAI_TTS_VOICES.map((voice) => ({ id: voice, name: voice })),
isConfigured: ({ config }) => Boolean(config.openai.apiKey || process.env.OPENAI_API_KEY),
synthesize: async (req) => {
const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error("OpenAI API key missing");
}
const responseFormat = req.target === "voice-note" ? "opus" : "mp3";
const audioBuffer = await openaiTTS({
text: req.text,
apiKey,
baseUrl: req.config.openai.baseUrl,
model: req.overrides?.openai?.model ?? req.config.openai.model,
voice: req.overrides?.openai?.voice ?? req.config.openai.voice,
speed: req.overrides?.openai?.speed ?? req.config.openai.speed,
instructions: req.config.openai.instructions,
responseFormat,
timeoutMs: req.config.timeoutMs,
});
return {
audioBuffer,
outputFormat: responseFormat,
fileExtension: responseFormat === "opus" ? ".opus" : ".mp3",
voiceCompatible: req.target === "voice-note",
};
},
synthesizeTelephony: async (req) => {
const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error("OpenAI API key missing");
}
const outputFormat = "pcm";
const sampleRate = 24_000;
const audioBuffer = await openaiTTS({
text: req.text,
apiKey,
baseUrl: req.config.openai.baseUrl,
model: req.config.openai.model,
voice: req.config.openai.voice,
speed: req.config.openai.speed,
instructions: req.config.openai.instructions,
responseFormat: outputFormat,
timeoutMs: req.config.timeoutMs,
});
return { audioBuffer, outputFormat, sampleRate };
},
};
}