diff --git a/extensions/elevenlabs/index.ts b/extensions/elevenlabs/index.ts index b77c523d6a8..0a9c3cc6194 100644 --- a/extensions/elevenlabs/index.ts +++ b/extensions/elevenlabs/index.ts @@ -1,5 +1,5 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; -import { buildElevenLabsSpeechProvider } from "openclaw/plugin-sdk/speech"; +import { buildElevenLabsSpeechProvider } from "./speech-provider.js"; export default definePluginEntry({ id: "elevenlabs", diff --git a/extensions/elevenlabs/speech-provider.ts b/extensions/elevenlabs/speech-provider.ts new file mode 100644 index 00000000000..9ebce9811fe --- /dev/null +++ b/extensions/elevenlabs/speech-provider.ts @@ -0,0 +1,126 @@ +import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core"; +import { elevenLabsTTS, type SpeechVoiceOption } from "openclaw/plugin-sdk/speech-core"; + +const ELEVENLABS_TTS_MODELS = [ + "eleven_multilingual_v2", + "eleven_turbo_v2_5", + "eleven_monolingual_v1", +] as const; + +function normalizeElevenLabsBaseUrl(baseUrl: string | undefined): string { + const trimmed = baseUrl?.trim(); + return trimmed?.replace(/\/+$/, "") || "https://api.elevenlabs.io"; +} + +export async function listElevenLabsVoices(params: { + apiKey: string; + baseUrl?: string; +}): Promise { + const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, { + headers: { + "xi-api-key": params.apiKey, + }, + }); + if (!res.ok) { + throw new Error(`ElevenLabs voices API error (${res.status})`); + } + const json = (await res.json()) as { + voices?: Array<{ + voice_id?: string; + name?: string; + category?: string; + description?: string; + }>; + }; + return Array.isArray(json.voices) + ? json.voices + .map((voice) => ({ + id: voice.voice_id?.trim() ?? "", + name: voice.name?.trim() || undefined, + category: voice.category?.trim() || undefined, + description: voice.description?.trim() || undefined, + })) + .filter((voice) => voice.id.length > 0) + : []; +} + +export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin { + return { + id: "elevenlabs", + label: "ElevenLabs", + models: ELEVENLABS_TTS_MODELS, + listVoices: async (req) => { + const apiKey = + req.apiKey || + req.config?.elevenlabs.apiKey || + process.env.ELEVENLABS_API_KEY || + process.env.XI_API_KEY; + if (!apiKey) { + throw new Error("ElevenLabs API key missing"); + } + return listElevenLabsVoices({ + apiKey, + baseUrl: req.baseUrl ?? req.config?.elevenlabs.baseUrl, + }); + }, + isConfigured: ({ config }) => + Boolean(config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY), + synthesize: async (req) => { + const apiKey = + req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY; + if (!apiKey) { + throw new Error("ElevenLabs API key missing"); + } + const outputFormat = + req.overrides?.elevenlabs?.outputFormat ?? + (req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128"); + const audioBuffer = await elevenLabsTTS({ + text: req.text, + apiKey, + baseUrl: req.config.elevenlabs.baseUrl, + voiceId: req.overrides?.elevenlabs?.voiceId ?? req.config.elevenlabs.voiceId, + modelId: req.overrides?.elevenlabs?.modelId ?? req.config.elevenlabs.modelId, + outputFormat, + seed: req.overrides?.elevenlabs?.seed ?? req.config.elevenlabs.seed, + applyTextNormalization: + req.overrides?.elevenlabs?.applyTextNormalization ?? + req.config.elevenlabs.applyTextNormalization, + languageCode: req.overrides?.elevenlabs?.languageCode ?? req.config.elevenlabs.languageCode, + voiceSettings: { + ...req.config.elevenlabs.voiceSettings, + ...req.overrides?.elevenlabs?.voiceSettings, + }, + timeoutMs: req.config.timeoutMs, + }); + return { + audioBuffer, + outputFormat, + fileExtension: req.target === "voice-note" ? ".opus" : ".mp3", + voiceCompatible: req.target === "voice-note", + }; + }, + synthesizeTelephony: async (req) => { + const apiKey = + req.config.elevenlabs.apiKey || process.env.ELEVENLABS_API_KEY || process.env.XI_API_KEY; + if (!apiKey) { + throw new Error("ElevenLabs API key missing"); + } + const outputFormat = "pcm_22050"; + const sampleRate = 22_050; + const audioBuffer = await elevenLabsTTS({ + text: req.text, + apiKey, + baseUrl: req.config.elevenlabs.baseUrl, + voiceId: req.config.elevenlabs.voiceId, + modelId: req.config.elevenlabs.modelId, + outputFormat, + seed: req.config.elevenlabs.seed, + applyTextNormalization: req.config.elevenlabs.applyTextNormalization, + languageCode: req.config.elevenlabs.languageCode, + voiceSettings: req.config.elevenlabs.voiceSettings, + timeoutMs: req.config.timeoutMs, + }); + return { audioBuffer, outputFormat, sampleRate }; + }, + }; +} diff --git a/extensions/microsoft/index.ts b/extensions/microsoft/index.ts index 8a383faf277..04eff9d49a7 100644 --- a/extensions/microsoft/index.ts +++ b/extensions/microsoft/index.ts @@ -1,5 +1,5 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; -import { buildMicrosoftSpeechProvider } from "openclaw/plugin-sdk/speech"; +import { buildMicrosoftSpeechProvider } from "./speech-provider.js"; export default definePluginEntry({ id: "microsoft", diff --git a/extensions/microsoft/package.json b/extensions/microsoft/package.json index 85503a5f920..17916f9f5b3 100644 --- a/extensions/microsoft/package.json +++ b/extensions/microsoft/package.json @@ -4,6 +4,9 @@ "private": true, "description": "OpenClaw Microsoft speech plugin", "type": "module", + "dependencies": { + "node-edge-tts": "^1.2.10" + }, "openclaw": { "extensions": [ "./index.ts" diff --git a/extensions/microsoft/speech-provider.test.ts b/extensions/microsoft/speech-provider.test.ts new file mode 100644 index 00000000000..1c8ea5ed182 --- /dev/null +++ b/extensions/microsoft/speech-provider.test.ts @@ -0,0 +1,43 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { listMicrosoftVoices } from "./speech-provider.js"; + +const fetchMock = vi.fn(); + +describe("listMicrosoftVoices", () => { + afterEach(() => { + fetchMock.mockReset(); + vi.unstubAllGlobals(); + }); + + it("maps Microsoft voices to the shared speech voice shape", async () => { + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => [ + { + ShortName: "en-US-AvaMultilingualNeural", + FriendlyName: "Microsoft Ava", + Locale: "en-US", + Gender: "Female", + VoiceTag: { + ContentCategories: ["General"], + VoicePersonalities: ["Friendly", "Warm"], + }, + }, + ], + } as Response); + vi.stubGlobal("fetch", fetchMock); + + await expect(listMicrosoftVoices()).resolves.toEqual([ + { + id: "en-US-AvaMultilingualNeural", + name: "Microsoft Ava", + category: "General", + description: "Friendly, Warm", + locale: "en-US", + gender: "Female", + personalities: ["Friendly", "Warm"], + }, + ]); + }); +}); diff --git a/extensions/microsoft/speech-provider.ts b/extensions/microsoft/speech-provider.ts new file mode 100644 index 00000000000..b151c70562b --- /dev/null +++ b/extensions/microsoft/speech-provider.ts @@ -0,0 +1,130 @@ +import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import path from "node:path"; +import { + CHROMIUM_FULL_VERSION, + TRUSTED_CLIENT_TOKEN, + generateSecMsGecToken, +} from "node-edge-tts/dist/drm.js"; +import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core"; +import { + edgeTTS, + inferEdgeExtension, + isVoiceCompatibleAudio, + resolvePreferredOpenClawTmpDir, + type SpeechVoiceOption, +} from "openclaw/plugin-sdk/speech-core"; + +const DEFAULT_EDGE_OUTPUT_FORMAT = "audio-24khz-48kbitrate-mono-mp3"; + +type MicrosoftVoiceListEntry = { + ShortName?: string; + FriendlyName?: string; + Locale?: string; + Gender?: string; + VoiceTag?: { + ContentCategories?: string[]; + VoicePersonalities?: string[]; + }; +}; + +function buildMicrosoftVoiceHeaders(): Record { + const major = CHROMIUM_FULL_VERSION.split(".")[0] || "0"; + return { + Authority: "speech.platform.bing.com", + Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", + Accept: "*/*", + "User-Agent": + `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ` + + `(KHTML, like Gecko) Chrome/${major}.0.0.0 Safari/537.36 Edg/${major}.0.0.0`, + "Sec-MS-GEC": generateSecMsGecToken(), + "Sec-MS-GEC-Version": `1-${CHROMIUM_FULL_VERSION}`, + }; +} + +function formatMicrosoftVoiceDescription(entry: MicrosoftVoiceListEntry): string | undefined { + const personalities = entry.VoiceTag?.VoicePersonalities?.filter(Boolean) ?? []; + return personalities.length > 0 ? personalities.join(", ") : undefined; +} + +export async function listMicrosoftVoices(): Promise { + const response = await fetch( + "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" + + `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`, + { + headers: buildMicrosoftVoiceHeaders(), + }, + ); + if (!response.ok) { + throw new Error(`Microsoft voices API error (${response.status})`); + } + const voices = (await response.json()) as MicrosoftVoiceListEntry[]; + return Array.isArray(voices) + ? voices + .map((voice) => ({ + id: voice.ShortName?.trim() ?? "", + name: voice.FriendlyName?.trim() || voice.ShortName?.trim() || undefined, + category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0), + description: formatMicrosoftVoiceDescription(voice), + locale: voice.Locale?.trim() || undefined, + gender: voice.Gender?.trim() || undefined, + personalities: voice.VoiceTag?.VoicePersonalities?.filter( + (value): value is string => value.trim().length > 0, + ), + })) + .filter((voice) => voice.id.length > 0) + : []; +} + +export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin { + return { + id: "microsoft", + label: "Microsoft", + aliases: ["edge"], + listVoices: async () => await listMicrosoftVoices(), + isConfigured: ({ config }) => config.edge.enabled, + synthesize: async (req) => { + const tempRoot = resolvePreferredOpenClawTmpDir(); + mkdirSync(tempRoot, { recursive: true, mode: 0o700 }); + const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-")); + let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat; + const fallbackOutputFormat = + outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined; + + try { + const runEdge = async (format: string) => { + const fileExtension = inferEdgeExtension(format); + const outputPath = path.join(tempDir, `speech${fileExtension}`); + await edgeTTS({ + text: req.text, + outputPath, + config: { + ...req.config.edge, + voice: req.overrides?.microsoft?.voice ?? req.config.edge.voice, + outputFormat: format, + }, + timeoutMs: req.config.timeoutMs, + }); + const audioBuffer = readFileSync(outputPath); + return { + audioBuffer, + outputFormat: format, + fileExtension, + voiceCompatible: isVoiceCompatibleAudio({ fileName: outputPath }), + }; + }; + + try { + return await runEdge(outputFormat); + } catch (err) { + if (!fallbackOutputFormat || fallbackOutputFormat === outputFormat) { + throw err; + } + outputFormat = fallbackOutputFormat; + return await runEdge(outputFormat); + } + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }, + }; +} diff --git a/extensions/openai/index.ts b/extensions/openai/index.ts index 7ba31100085..1ea449d6205 100644 --- a/extensions/openai/index.ts +++ b/extensions/openai/index.ts @@ -1,9 +1,9 @@ import { buildOpenAIImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; -import { buildOpenAISpeechProvider } from "openclaw/plugin-sdk/speech"; import { openaiMediaUnderstandingProvider } from "./media-understanding-provider.js"; import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js"; import { buildOpenAIProvider } from "./openai-provider.js"; +import { buildOpenAISpeechProvider } from "./speech-provider.js"; export default definePluginEntry({ id: "openai", diff --git a/extensions/openai/speech-provider.ts b/extensions/openai/speech-provider.ts new file mode 100644 index 00000000000..44d129ff838 --- /dev/null +++ b/extensions/openai/speech-provider.ts @@ -0,0 +1,57 @@ +import type { SpeechProviderPlugin } from "openclaw/plugin-sdk/core"; +import { OPENAI_TTS_MODELS, OPENAI_TTS_VOICES, openaiTTS } from "openclaw/plugin-sdk/speech-core"; + +export function buildOpenAISpeechProvider(): SpeechProviderPlugin { + return { + id: "openai", + label: "OpenAI", + models: OPENAI_TTS_MODELS, + voices: OPENAI_TTS_VOICES, + listVoices: async () => OPENAI_TTS_VOICES.map((voice) => ({ id: voice, name: voice })), + isConfigured: ({ config }) => Boolean(config.openai.apiKey || process.env.OPENAI_API_KEY), + synthesize: async (req) => { + const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new Error("OpenAI API key missing"); + } + const responseFormat = req.target === "voice-note" ? "opus" : "mp3"; + const audioBuffer = await openaiTTS({ + text: req.text, + apiKey, + baseUrl: req.config.openai.baseUrl, + model: req.overrides?.openai?.model ?? req.config.openai.model, + voice: req.overrides?.openai?.voice ?? req.config.openai.voice, + speed: req.overrides?.openai?.speed ?? req.config.openai.speed, + instructions: req.config.openai.instructions, + responseFormat, + timeoutMs: req.config.timeoutMs, + }); + return { + audioBuffer, + outputFormat: responseFormat, + fileExtension: responseFormat === "opus" ? ".opus" : ".mp3", + voiceCompatible: req.target === "voice-note", + }; + }, + synthesizeTelephony: async (req) => { + const apiKey = req.config.openai.apiKey || process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new Error("OpenAI API key missing"); + } + const outputFormat = "pcm"; + const sampleRate = 24_000; + const audioBuffer = await openaiTTS({ + text: req.text, + apiKey, + baseUrl: req.config.openai.baseUrl, + model: req.config.openai.model, + voice: req.config.openai.voice, + speed: req.config.openai.speed, + instructions: req.config.openai.instructions, + responseFormat: outputFormat, + timeoutMs: req.config.timeoutMs, + }); + return { audioBuffer, outputFormat, sampleRate }; + }, + }; +} diff --git a/package.json b/package.json index 9e21f82abe4..454bd7ed133 100644 --- a/package.json +++ b/package.json @@ -165,6 +165,10 @@ "types": "./dist/plugin-sdk/speech-runtime.d.ts", "default": "./dist/plugin-sdk/speech-runtime.js" }, + "./plugin-sdk/speech-core": { + "types": "./dist/plugin-sdk/speech-core.d.ts", + "default": "./dist/plugin-sdk/speech-core.js" + }, "./plugin-sdk/plugin-runtime": { "types": "./dist/plugin-sdk/plugin-runtime.d.ts", "default": "./dist/plugin-sdk/plugin-runtime.js" diff --git a/scripts/lib/plugin-sdk-entrypoints.json b/scripts/lib/plugin-sdk-entrypoints.json index 09ebb1a44fd..4ba14b4a973 100644 --- a/scripts/lib/plugin-sdk-entrypoints.json +++ b/scripts/lib/plugin-sdk-entrypoints.json @@ -31,6 +31,7 @@ "text-runtime", "agent-runtime", "speech-runtime", + "speech-core", "plugin-runtime", "security-runtime", "gateway-runtime", diff --git a/src/plugin-sdk/speech-core.ts b/src/plugin-sdk/speech-core.ts new file mode 100644 index 00000000000..e4af7a69486 --- /dev/null +++ b/src/plugin-sdk/speech-core.ts @@ -0,0 +1,17 @@ +// Shared speech-provider implementation helpers for bundled and third-party plugins. + +export type { SpeechProviderPlugin } from "../plugins/types.js"; +export type { SpeechVoiceOption } from "../tts/provider-types.js"; + +export { + edgeTTS, + elevenLabsTTS, + inferEdgeExtension, + OPENAI_TTS_MODELS, + OPENAI_TTS_VOICES, + openaiTTS, + parseTtsDirectives, +} from "../tts/tts-core.js"; + +export { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; +export { isVoiceCompatibleAudio } from "../media/audio.js"; diff --git a/src/plugin-sdk/speech.ts b/src/plugin-sdk/speech.ts index 3fb9758ffdc..5a205985c1b 100644 --- a/src/plugin-sdk/speech.ts +++ b/src/plugin-sdk/speech.ts @@ -1,7 +1,7 @@ // Public speech-provider builders for bundled or third-party plugins. -export { buildElevenLabsSpeechProvider } from "../tts/providers/elevenlabs.js"; -export { buildMicrosoftSpeechProvider } from "../tts/providers/microsoft.js"; -export { buildOpenAISpeechProvider } from "../tts/providers/openai.js"; +export { buildElevenLabsSpeechProvider } from "../../extensions/elevenlabs/speech-provider.js"; +export { buildMicrosoftSpeechProvider } from "../../extensions/microsoft/speech-provider.js"; +export { buildOpenAISpeechProvider } from "../../extensions/openai/speech-provider.js"; export { parseTtsDirectives } from "../tts/tts-core.js"; export type { SpeechVoiceOption } from "../tts/provider-types.js"; diff --git a/src/tts/provider-registry.test.ts b/src/tts/provider-registry.test.ts index f2abd7bcba4..dfb12b7bdd2 100644 --- a/src/tts/provider-registry.test.ts +++ b/src/tts/provider-registry.test.ts @@ -1,62 +1,84 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; -import type { OpenClawConfig } from "../config/config.js"; -import { createEmptyPluginRegistry } from "../plugins/registry.js"; -import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; -const { loadOpenClawPluginsMock } = vi.hoisted(() => ({ - loadOpenClawPluginsMock: vi.fn(() => createEmptyPluginRegistry()), -})); +import type { OpenClawConfig } from "../config/config.js"; +import { createEmptyPluginRegistry } from "../plugins/registry-empty.js"; +import { resetPluginRuntimeStateForTest, setActivePluginRegistry } from "../plugins/runtime.js"; +import type { SpeechProviderPlugin } from "../plugins/types.js"; +import { getSpeechProvider, listSpeechProviders, normalizeSpeechProviderId } from "./provider-registry.js"; + +const loadOpenClawPluginsMock = vi.fn(); vi.mock("../plugins/loader.js", () => ({ - loadOpenClawPlugins: loadOpenClawPluginsMock, + loadOpenClawPlugins: (...args: Parameters) => + loadOpenClawPluginsMock(...args), })); -import { getSpeechProvider, listSpeechProviders } from "./provider-registry.js"; +function createSpeechProvider(id: string, aliases?: string[]): SpeechProviderPlugin { + return { + id, + ...(aliases ? { aliases } : {}), + isConfigured: () => true, + synthesize: async () => ({ + audioBuffer: Buffer.from("audio"), + outputFormat: "mp3", + voiceCompatible: false, + fileExtension: ".mp3", + }), + }; +} describe("speech provider registry", () => { - afterEach(() => { + beforeEach(() => { + resetPluginRuntimeStateForTest(); loadOpenClawPluginsMock.mockReset(); loadOpenClawPluginsMock.mockReturnValue(createEmptyPluginRegistry()); + }); + + afterEach(() => { resetPluginRuntimeStateForTest(); }); - it("does not load plugins for builtin provider lookup", () => { - const provider = getSpeechProvider("openai", {} as OpenClawConfig); + it("uses active plugin speech providers without reloading plugins", () => { + setActivePluginRegistry({ + ...createEmptyPluginRegistry(), + speechProviders: [ + { + pluginId: "test-openai", + provider: createSpeechProvider("openai"), + }, + ], + }); - expect(provider?.id).toBe("openai"); - expect(loadOpenClawPluginsMock).not.toHaveBeenCalled(); - }); - - it("does not load plugins when listing without config", () => { const providers = listSpeechProviders(); - expect(providers.map((provider) => provider.id)).toEqual(["openai", "elevenlabs", "microsoft"]); + expect(providers.map((provider) => provider.id)).toEqual(["openai"]); expect(loadOpenClawPluginsMock).not.toHaveBeenCalled(); }); - it("uses active plugin speech providers without loading from disk", () => { - const registry = createEmptyPluginRegistry(); - registry.speechProviders.push({ - pluginId: "custom-speech", - pluginName: "Custom Speech", - source: "test", - provider: { - id: "custom-speech", - label: "Custom Speech", - isConfigured: () => true, - synthesize: async () => ({ - audioBuffer: Buffer.from("audio"), - outputFormat: "mp3", - fileExtension: ".mp3", - voiceCompatible: false, - }), - }, + it("loads speech providers from plugins when config is provided", () => { + loadOpenClawPluginsMock.mockReturnValue({ + ...createEmptyPluginRegistry(), + speechProviders: [ + { + pluginId: "test-microsoft", + provider: createSpeechProvider("microsoft", ["edge"]), + }, + ], }); - setActivePluginRegistry(registry); - const provider = getSpeechProvider("custom-speech"); + const cfg = {} as OpenClawConfig; - expect(provider?.id).toBe("custom-speech"); - expect(loadOpenClawPluginsMock).not.toHaveBeenCalled(); + expect(listSpeechProviders(cfg).map((provider) => provider.id)).toEqual(["microsoft"]); + expect(getSpeechProvider("edge", cfg)?.id).toBe("microsoft"); + expect(loadOpenClawPluginsMock).toHaveBeenCalledWith({ config: cfg }); + }); + + it("returns no providers when neither plugins nor active registry provide speech support", () => { + expect(listSpeechProviders()).toEqual([]); + expect(getSpeechProvider("openai")).toBeUndefined(); + }); + + it("normalizes the legacy edge alias to microsoft", () => { + expect(normalizeSpeechProviderId("edge")).toBe("microsoft"); }); }); diff --git a/src/tts/provider-registry.ts b/src/tts/provider-registry.ts index 3dc7b4aa1bb..154f996fb13 100644 --- a/src/tts/provider-registry.ts +++ b/src/tts/provider-registry.ts @@ -3,15 +3,6 @@ import { loadOpenClawPlugins } from "../plugins/loader.js"; import { getActivePluginRegistry } from "../plugins/runtime.js"; import type { SpeechProviderPlugin } from "../plugins/types.js"; import type { SpeechProviderId } from "./provider-types.js"; -import { buildElevenLabsSpeechProvider } from "./providers/elevenlabs.js"; -import { buildMicrosoftSpeechProvider } from "./providers/microsoft.js"; -import { buildOpenAISpeechProvider } from "./providers/openai.js"; - -const BUILTIN_SPEECH_PROVIDER_BUILDERS = [ - buildOpenAISpeechProvider, - buildElevenLabsSpeechProvider, - buildMicrosoftSpeechProvider, -] as const satisfies readonly (() => SpeechProviderPlugin)[]; function trimToUndefined(value: string | undefined): string | undefined { const trimmed = value?.trim().toLowerCase(); @@ -66,9 +57,6 @@ function buildProviderMaps(cfg?: OpenClawConfig): { const aliases = new Map(); const maps = { canonical, aliases }; - for (const buildProvider of BUILTIN_SPEECH_PROVIDER_BUILDERS) { - registerSpeechProvider(maps, buildProvider()); - } for (const provider of resolveSpeechProviderPluginEntries(cfg)) { registerSpeechProvider(maps, provider); } @@ -88,10 +76,5 @@ export function getSpeechProvider( if (!normalized) { return undefined; } - - const local = buildProviderMaps().aliases.get(normalized); - if (local || !cfg) { - return local; - } return buildProviderMaps(cfg).aliases.get(normalized); } diff --git a/src/tts/providers/microsoft.test.ts b/src/tts/providers/microsoft.test.ts deleted file mode 100644 index 217f13d9633..00000000000 --- a/src/tts/providers/microsoft.test.ts +++ /dev/null @@ -1,66 +0,0 @@ -import { afterEach, describe, expect, it, vi } from "vitest"; -import { withFetchPreconnect } from "../../test-utils/fetch-mock.js"; -import { listMicrosoftVoices } from "./microsoft.js"; - -describe("listMicrosoftVoices", () => { - const originalFetch = globalThis.fetch; - - afterEach(() => { - globalThis.fetch = originalFetch; - vi.restoreAllMocks(); - }); - - it("maps Microsoft voice metadata into speech voice options", async () => { - globalThis.fetch = withFetchPreconnect( - vi.fn().mockResolvedValue( - new Response( - JSON.stringify([ - { - ShortName: "en-US-AvaNeural", - FriendlyName: "Microsoft Ava Online (Natural) - English (United States)", - Locale: "en-US", - Gender: "Female", - VoiceTag: { - ContentCategories: ["General"], - VoicePersonalities: ["Friendly", "Positive"], - }, - }, - ]), - { status: 200 }, - ), - ), - ); - - const voices = await listMicrosoftVoices(); - - expect(voices).toEqual([ - { - id: "en-US-AvaNeural", - name: "Microsoft Ava Online (Natural) - English (United States)", - category: "General", - description: "Friendly, Positive", - locale: "en-US", - gender: "Female", - personalities: ["Friendly", "Positive"], - }, - ]); - expect(globalThis.fetch).toHaveBeenCalledWith( - expect.stringContaining("/voices/list?trustedclienttoken="), - expect.objectContaining({ - headers: expect.objectContaining({ - Origin: "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", - "Sec-MS-GEC": expect.any(String), - "Sec-MS-GEC-Version": expect.stringContaining("1-"), - }), - }), - ); - }); - - it("throws on Microsoft voice list failures", async () => { - globalThis.fetch = withFetchPreconnect( - vi.fn().mockResolvedValue(new Response("nope", { status: 503 })), - ); - - await expect(listMicrosoftVoices()).rejects.toThrow("Microsoft voices API error (503)"); - }); -});