fix(tts): honor legacy edge voice config

2026-05-06 06:20:43 +00:00 · 2026-04-25 04:02:12 +01:00
parent 5569d6d9d3
commit 92b17af817
3 changed files with 25 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -78,6 +78,7 @@ Docs: https://docs.openclaw.ai
 - Providers/OpenAI-compatible: skip null or non-object streaming chunks from custom providers instead of failing the turn after partial output. Fixes #51112.
 - Providers/OpenAI-compatible: treat singular MLX-style `finish_reason: "tool_call"` as tool use instead of a provider error. Fixes #61499.
 - Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot.
+- Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153.
 - Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens.
 - Plugins/Voice Call: terminate expired restored call sessions with the provider and restart restored max-duration timers with only the remaining duration, preventing stale outbound retry loops after Gateway restarts. Fixes #48739. Thanks @mira-solari.
 - Plugins/Voice Call: start provider STT after Telnyx outbound conversation greetings and pass configured Telnyx voice IDs through to the speak action. Fixes #56091. Thanks @Roshan.
--- a/extensions/microsoft/speech-provider.test.ts
+++ b/extensions/microsoft/speech-provider.test.ts
@@ -184,6 +184,27 @@ describe("buildMicrosoftSpeechProvider", () => {
    vi.restoreAllMocks();
  });

+  it("accepts legacy providers.edge voice config", () => {
+    const provider = buildMicrosoftSpeechProvider();
+
+    const resolved = provider.resolveConfig?.({
+      cfg: TEST_CFG,
+      rawConfig: {
+        provider: "edge",
+        providers: {
+          edge: {
+            voice: "en-US-AvaNeural",
+          },
+        },
+      },
+      timeoutMs: 1000,
+    });
+
+    expect(resolved).toMatchObject({
+      voice: "en-US-AvaNeural",
+    });
+  });
+
  it("switches to a Chinese voice for CJK text when no explicit voice override is set", async () => {
    const provider = buildMicrosoftSpeechProvider();
    const edgeSpy = vi.spyOn(ttsModule, "edgeTTS").mockImplementation(async ({ outputPath }) => {
--- a/extensions/microsoft/speech-provider.ts
+++ b/extensions/microsoft/speech-provider.ts
@@ -59,8 +59,9 @@ function normalizeMicrosoftProviderConfig(
  const providers = asObject(rawConfig.providers);
  const rawEdge = asObject(rawConfig.edge);
  const rawMicrosoft = asObject(rawConfig.microsoft);
-  const rawProvider = asObject(providers?.microsoft);
-  const raw = { ...rawEdge, ...rawMicrosoft, ...rawProvider };
+  const rawProviderEdge = asObject(providers?.edge);
+  const rawProviderMicrosoft = asObject(providers?.microsoft);
+  const raw = { ...rawEdge, ...rawProviderEdge, ...rawMicrosoft, ...rawProviderMicrosoft };
  const outputFormat = trimToUndefined(raw.outputFormat);
  return {
    enabled: asBoolean(raw.enabled) ?? true,