diff --git a/extensions/xiaomi/speech-provider.test.ts b/extensions/xiaomi/speech-provider.test.ts index 8d14845b4d4..6f7a45d43c3 100644 --- a/extensions/xiaomi/speech-provider.test.ts +++ b/extensions/xiaomi/speech-provider.test.ts @@ -82,6 +82,24 @@ describe("buildXiaomiSpeechProvider", () => { }); expect(config.voice).toBe("default_zh"); }); + + it("accepts generic model and speaker voice aliases", () => { + const config = provider.resolveConfig!({ + rawConfig: { + providers: { + xiaomi: { + modelId: "mimo-v2.5-tts-voicedesign", + speakerVoice: "Chloe", + }, + }, + }, + cfg: {} as never, + timeoutMs: 30000, + }); + + expect(config.model).toBe("mimo-v2.5-tts-voicedesign"); + expect(config.voice).toBe("Chloe"); + }); }); describe("parseDirectiveToken", () => { @@ -195,8 +213,8 @@ describe("buildXiaomiSpeechProvider", () => { cfg: {} as never, providerConfig: { apiKey: "sk-test", - model: "mimo-v2.5-tts-voicedesign", - voice: "Chloe", + modelId: "mimo-v2.5-tts-voicedesign", + speakerVoice: "Chloe", format: "wav", style: "Warm, bright, natural voice.", }, diff --git a/extensions/xiaomi/speech-provider.ts b/extensions/xiaomi/speech-provider.ts index f589a56557f..f9bf7833bb3 100644 --- a/extensions/xiaomi/speech-provider.ts +++ b/extensions/xiaomi/speech-provider.ts @@ -22,11 +22,7 @@ const XIAOMI_TTS_VOICE_DESIGN_MODEL = "mimo-v2.5-tts-voicedesign"; const DEFAULT_XIAOMI_TTS_VOICE_DESIGN_STYLE = "Warm, natural, and friendly voice with clear pronunciation and conversational pacing."; -const XIAOMI_TTS_MODELS = [ - "mimo-v2.5-tts", - "mimo-v2-tts", - XIAOMI_TTS_VOICE_DESIGN_MODEL, -] as const; +const XIAOMI_TTS_MODELS = ["mimo-v2.5-tts", "mimo-v2-tts", XIAOMI_TTS_VOICE_DESIGN_MODEL] as const; const XIAOMI_TTS_VOICES = [ "mimo_default", @@ -90,9 +86,12 @@ function normalizeXiaomiTtsProviderConfig( ), model: trimToUndefined(raw?.model) ?? + trimToUndefined(raw?.modelId) ?? trimToUndefined(process.env.XIAOMI_TTS_MODEL) ?? DEFAULT_XIAOMI_TTS_MODEL, voice: + trimToUndefined(raw?.speakerVoice) ?? + trimToUndefined(raw?.speakerVoiceId) ?? trimToUndefined(raw?.voice) ?? trimToUndefined(raw?.voiceId) ?? trimToUndefined(process.env.XIAOMI_TTS_VOICE) ?? @@ -114,8 +113,13 @@ function readXiaomiTtsProviderConfig(config: SpeechProviderConfig): XiaomiTtsPro path: "messages.tts.providers.xiaomi.apiKey", }) ?? normalized.apiKey, baseUrl: normalizeXiaomiTtsBaseUrl(trimToUndefined(config.baseUrl) ?? normalized.baseUrl), - model: trimToUndefined(config.model) ?? normalized.model, - voice: trimToUndefined(config.voice) ?? trimToUndefined(config.voiceId) ?? normalized.voice, + model: trimToUndefined(config.model) ?? trimToUndefined(config.modelId) ?? normalized.model, + voice: + trimToUndefined(config.speakerVoice) ?? + trimToUndefined(config.speakerVoiceId) ?? + trimToUndefined(config.voice) ?? + trimToUndefined(config.voiceId) ?? + normalized.voice, format: normalizeXiaomiTtsFormat(config.format) ?? normalized.format, style: trimToUndefined(config.style) ?? normalized.style, }; @@ -128,8 +132,12 @@ function readXiaomiTtsOverrides( return {}; } return { - model: trimToUndefined(overrides.model), - voice: trimToUndefined(overrides.voice) ?? trimToUndefined(overrides.voiceId), + model: trimToUndefined(overrides.model) ?? trimToUndefined(overrides.modelId), + voice: + trimToUndefined(overrides.speakerVoice) ?? + trimToUndefined(overrides.speakerVoiceId) ?? + trimToUndefined(overrides.voice) ?? + trimToUndefined(overrides.voiceId), format: normalizeXiaomiTtsFormat(overrides.format), style: trimToUndefined(overrides.style), }; @@ -197,11 +205,10 @@ function resolveXiaomiVoiceDesignStyle(style: string | undefined): string { return trimToUndefined(style) ?? DEFAULT_XIAOMI_TTS_VOICE_DESIGN_STYLE; } -function buildXiaomiTtsAudio(params: { - model: string; - voice: string; +function buildXiaomiTtsAudio(params: { model: string; voice: string; format: XiaomiTtsFormat }): { format: XiaomiTtsFormat; -}): { format: XiaomiTtsFormat; voice?: string } { + voice?: string; +} { if (isXiaomiVoiceDesignModel(params.model)) { return { format: params.format }; }