diff --git a/CHANGELOG.md b/CHANGELOG.md index 930b6853bb7..ff48909b87a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI-compatible: skip null or non-object streaming chunks from custom providers instead of failing the turn after partial output. Fixes #51112. - Providers/OpenAI-compatible: treat singular MLX-style `finish_reason: "tool_call"` as tool use instead of a provider error. Fixes #61499. - Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot. +- Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153. - Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens. - Plugins/Voice Call: terminate expired restored call sessions with the provider and restart restored max-duration timers with only the remaining duration, preventing stale outbound retry loops after Gateway restarts. Fixes #48739. Thanks @mira-solari. - Plugins/Voice Call: start provider STT after Telnyx outbound conversation greetings and pass configured Telnyx voice IDs through to the speak action. Fixes #56091. Thanks @Roshan. diff --git a/extensions/microsoft/speech-provider.test.ts b/extensions/microsoft/speech-provider.test.ts index fbe52717da3..9e77593fc2b 100644 --- a/extensions/microsoft/speech-provider.test.ts +++ b/extensions/microsoft/speech-provider.test.ts @@ -184,6 +184,27 @@ describe("buildMicrosoftSpeechProvider", () => { vi.restoreAllMocks(); }); + it("accepts legacy providers.edge voice config", () => { + const provider = buildMicrosoftSpeechProvider(); + + const resolved = provider.resolveConfig?.({ + cfg: TEST_CFG, + rawConfig: { + provider: "edge", + providers: { + edge: { + voice: "en-US-AvaNeural", + }, + }, + }, + timeoutMs: 1000, + }); + + expect(resolved).toMatchObject({ + voice: "en-US-AvaNeural", + }); + }); + it("switches to a Chinese voice for CJK text when no explicit voice override is set", async () => { const provider = buildMicrosoftSpeechProvider(); const edgeSpy = vi.spyOn(ttsModule, "edgeTTS").mockImplementation(async ({ outputPath }) => { diff --git a/extensions/microsoft/speech-provider.ts b/extensions/microsoft/speech-provider.ts index 0a1381fdd1e..ce07bd4f7eb 100644 --- a/extensions/microsoft/speech-provider.ts +++ b/extensions/microsoft/speech-provider.ts @@ -59,8 +59,9 @@ function normalizeMicrosoftProviderConfig( const providers = asObject(rawConfig.providers); const rawEdge = asObject(rawConfig.edge); const rawMicrosoft = asObject(rawConfig.microsoft); - const rawProvider = asObject(providers?.microsoft); - const raw = { ...rawEdge, ...rawMicrosoft, ...rawProvider }; + const rawProviderEdge = asObject(providers?.edge); + const rawProviderMicrosoft = asObject(providers?.microsoft); + const raw = { ...rawEdge, ...rawProviderEdge, ...rawMicrosoft, ...rawProviderMicrosoft }; const outputFormat = trimToUndefined(raw.outputFormat); return { enabled: asBoolean(raw.enabled) ?? true,