fix(tts): surface voice status and harden providers

2026-05-06 05:20:43 +00:00 · 2026-04-26 03:51:01 +01:00
parent 1231f21679
commit 7a85c1a822
10 changed files with 551 additions and 16 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
 - ACP: send subagent and async-task completion wakes to external ACP harnesses as
  plain prompts instead of OpenClaw internal runtime-context envelopes, while
  keeping those envelopes out of ACP transcripts.
+- TTS/status: show configured TTS model, voice, and sanitized custom endpoint in `/status`, preserve OpenAI-compatible TTS instructions on custom endpoints, and retry empty Microsoft/Edge TTS output once. Addresses #46602, #47232, and #43936. Thanks @leekuangtao, @Huntterxx, and @rex993.
 - Agents/Claude: treat zero-token empty `stop` turns as failed provider output,
  retry once, repair replay, and allow configured model fallback instead of
  preserving them as successful silent replies. Fixes #71880. Thanks @MagnaAI.
--- a/docs/tools/tts.md
+++ b/docs/tools/tts.md
@@ -846,6 +846,8 @@ Notes:
  - success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
  - failure: `Error: ...` plus `Attempts: ...`
  - detailed diagnostics: `Attempt details: provider:outcome(reasonCode) latency`
+- `/status` shows the active TTS mode plus configured provider, model, voice,
+  and sanitized custom endpoint metadata when TTS is enabled.
 - OpenAI and ElevenLabs API failures now include parsed provider error detail and request id (when returned by the provider), which is surfaced in TTS errors/logs.

 ## Agent tool
--- a/extensions/microsoft/tts.test.ts
+++ b/extensions/microsoft/tts.test.ts
@@ -1,13 +1,20 @@
 import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import path from "node:path";
-import { afterEach, beforeAll, describe, expect, it } from "vitest";
+import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";

 let edgeTTS: typeof import("./tts.js").edgeTTS;

-function createEdgeTTSDeps(ttsPromise: (text: string, filePath: string) => Promise<void>) {
+function createEdgeTTSDeps(
+  ttsPromise: (text: string, filePath: string) => Promise<void>,
+  onConstruct?: () => void,
+) {
  return {
    EdgeTTS: class {
+      constructor() {
+        onConstruct?.();
+      }
+
      ttsPromise(text: string, filePath: string) {
        return ttsPromise(text, filePath);
      }
@@ -36,11 +43,35 @@ describe("edgeTTS empty audio validation", () => {
    }
  });

-  it("throws when the output file is 0 bytes", async () => {
+  it("rejects blank text before constructing Edge TTS", async () => {
    tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
    const outputPath = path.join(tempDir, "voice.mp3");
-
+    const onConstruct = vi.fn();
    const deps = createEdgeTTSDeps(async (_text: string, filePath: string) => {
+      writeFileSync(filePath, Buffer.from([0xff]));
+    }, onConstruct);
+
+    await expect(
+      edgeTTS(
+        {
+          text: " \n\t ",
+          outputPath,
+          config: baseEdgeConfig,
+          timeoutMs: 10000,
+        },
+        deps,
+      ),
+    ).rejects.toThrow("Microsoft TTS text cannot be empty");
+    expect(onConstruct).not.toHaveBeenCalled();
+  });
+
+  it("throws after one retry when the output file stays empty", async () => {
+    tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
+    const outputPath = path.join(tempDir, "voice.mp3");
+    const calls: string[] = [];
+
+    const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
+      calls.push(text);
      writeFileSync(filePath, "");
    });

@@ -54,7 +85,8 @@ describe("edgeTTS empty audio validation", () => {
        },
        deps,
      ),
-    ).rejects.toThrow("Edge TTS produced empty audio file");
+    ).rejects.toThrow("Edge TTS produced empty audio file after retry");
+    expect(calls).toEqual(["Hello", "Hello"]);
  });

  it("succeeds when the output file has content", async () => {
@@ -77,4 +109,78 @@ describe("edgeTTS empty audio validation", () => {
      ),
    ).resolves.toBeUndefined();
  });
+
+  it("retries once when the first output file is empty", async () => {
+    tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
+    const outputPath = path.join(tempDir, "voice.mp3");
+    const calls: string[] = [];
+
+    const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
+      calls.push(text);
+      writeFileSync(filePath, calls.length === 1 ? "" : Buffer.from([0xff, 0xfb, 0x90, 0x00]));
+    });
+
+    await expect(
+      edgeTTS(
+        {
+          text: "Hello",
+          outputPath,
+          config: baseEdgeConfig,
+          timeoutMs: 10000,
+        },
+        deps,
+      ),
+    ).resolves.toBeUndefined();
+    expect(calls).toEqual(["Hello", "Hello"]);
+  });
+
+  it("retries once when Edge TTS resolves without creating an output file", async () => {
+    tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
+    const outputPath = path.join(tempDir, "voice.mp3");
+    const calls: string[] = [];
+
+    const deps = createEdgeTTSDeps(async (text: string, filePath: string) => {
+      calls.push(text);
+      if (calls.length === 2) {
+        writeFileSync(filePath, Buffer.from([0xff, 0xfb, 0x90, 0x00]));
+      }
+    });
+
+    await expect(
+      edgeTTS(
+        {
+          text: "Hello",
+          outputPath,
+          config: baseEdgeConfig,
+          timeoutMs: 10000,
+        },
+        deps,
+      ),
+    ).resolves.toBeUndefined();
+    expect(calls).toEqual(["Hello", "Hello"]);
+  });
+
+  it("does not retry provider errors", async () => {
+    tempDir = mkdtempSync(path.join(tmpdir(), "tts-test-"));
+    const outputPath = path.join(tempDir, "voice.mp3");
+    const calls: string[] = [];
+
+    const deps = createEdgeTTSDeps(async (text: string) => {
+      calls.push(text);
+      throw new Error("upstream timeout");
+    });
+
+    await expect(
+      edgeTTS(
+        {
+          text: "Hello",
+          outputPath,
+          config: baseEdgeConfig,
+          timeoutMs: 10000,
+        },
+        deps,
+      ),
+    ).rejects.toThrow("upstream timeout");
+    expect(calls).toEqual(["Hello"]);
+  });
 });
--- a/extensions/microsoft/tts.ts
+++ b/extensions/microsoft/tts.ts
@@ -24,6 +24,26 @@ async function loadDefaultEdgeTTSDeps(): Promise<EdgeTTSDeps> {
  return { EdgeTTS };
 }

+function isMissingOutputFileError(error: unknown): boolean {
+  return (
+    typeof error === "object" &&
+    error !== null &&
+    "code" in error &&
+    (error as { code?: unknown }).code === "ENOENT"
+  );
+}
+
+function readOutputSize(outputPath: string): number {
+  try {
+    return statSync(outputPath).size;
+  } catch (error) {
+    if (isMissingOutputFileError(error)) {
+      return 0;
+    }
+    throw error;
+  }
+}
+
 export function inferEdgeExtension(outputFormat: string): string {
  const normalized = normalizeLowercaseStringOrEmpty(outputFormat);
  if (normalized.includes("webm")) {
@@ -61,6 +81,10 @@ export async function edgeTTS(
  deps?: EdgeTTSDeps,
 ): Promise<void> {
  const { text, outputPath, config, timeoutMs } = params;
+  if (text.trim().length === 0) {
+    throw new Error("Microsoft TTS text cannot be empty");
+  }
+
  const resolvedDeps = deps ?? (await loadDefaultEdgeTTSDeps());
  const tts = new resolvedDeps.EdgeTTS({
    voice: config.voice,
@@ -73,10 +97,12 @@ export async function edgeTTS(
    volume: config.volume,
    timeout: config.timeoutMs ?? timeoutMs,
  });
-  await tts.ttsPromise(text, outputPath);

-  const { size } = statSync(outputPath);
-  if (size === 0) {
-    throw new Error("Edge TTS produced empty audio file");
+  for (let attempt = 0; attempt < 2; attempt += 1) {
+    await tts.ttsPromise(text, outputPath);
+    if (readOutputSize(outputPath) > 0) {
+      return;
+    }
  }
+  throw new Error("Edge TTS produced empty audio file after retry");
 }
--- a/extensions/openai/tts.test.ts
+++ b/extensions/openai/tts.test.ts
@@ -91,9 +91,75 @@ describe("openai tts", () => {
      expect(resolveOpenAITtsInstructions("tts-1-hd", "Speak warmly")).toBeUndefined();
      expect(resolveOpenAITtsInstructions("gpt-4o-mini-tts", "   ")).toBeUndefined();
    });
+
+    it("preserves instructions for custom OpenAI-compatible TTS endpoints", () => {
+      expect(
+        resolveOpenAITtsInstructions("tts-1", " Speak warmly ", "https://tts.example.com/v1"),
+      ).toBe("Speak warmly");
+      expect(
+        resolveOpenAITtsInstructions("tts-1", " Speak warmly ", "https://api.openai.com/v1/"),
+      ).toBeUndefined();
+      expect(
+        resolveOpenAITtsInstructions("tts-1", "   ", "https://tts.example.com/v1"),
+      ).toBeUndefined();
+    });
  });

  describe("openaiTTS diagnostics", () => {
+    it("sends instructions to custom OpenAI-compatible endpoints", async () => {
+      const fetchMock = vi.fn(
+        async (_url: string | URL, _init?: RequestInit) =>
+          new Response(Buffer.from("audio-bytes"), { status: 200 }),
+      );
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      await openaiTTS({
+        text: "hello",
+        apiKey: "test-key",
+        baseUrl: "https://tts.example.com/v1",
+        model: "tts-1",
+        voice: "custom-voice",
+        instructions: " Speak warmly ",
+        responseFormat: "mp3",
+        timeoutMs: 5_000,
+      });
+
+      const [, init] = fetchMock.mock.calls[0] ?? [];
+      if (typeof init?.body !== "string") {
+        throw new Error("expected JSON request body");
+      }
+      const body = JSON.parse(init.body) as Record<string, unknown>;
+      expect(body.instructions).toBe("Speak warmly");
+      expect(body.model).toBe("tts-1");
+      expect(body.voice).toBe("custom-voice");
+    });
+
+    it("omits instructions for unsupported models on the official OpenAI endpoint", async () => {
+      const fetchMock = vi.fn(
+        async (_url: string | URL, _init?: RequestInit) =>
+          new Response(Buffer.from("audio-bytes"), { status: 200 }),
+      );
+      globalThis.fetch = fetchMock as unknown as typeof fetch;
+
+      await openaiTTS({
+        text: "hello",
+        apiKey: "test-key",
+        baseUrl: "https://api.openai.com/v1/",
+        model: "tts-1",
+        voice: "alloy",
+        instructions: "Speak warmly",
+        responseFormat: "mp3",
+        timeoutMs: 5_000,
+      });
+
+      const [, init] = fetchMock.mock.calls[0] ?? [];
+      if (typeof init?.body !== "string") {
+        throw new Error("expected JSON request body");
+      }
+      const body = JSON.parse(init.body) as Record<string, unknown>;
+      expect(body.instructions).toBeUndefined();
+    });
+
    it("includes parsed provider detail and request id for JSON API errors", async () => {
      const fetchMock = vi.fn(
        async () =>
--- a/extensions/openai/tts.ts
+++ b/extensions/openai/tts.ts
@@ -63,9 +63,16 @@ export function isValidOpenAIVoice(voice: string, baseUrl?: string): voice is Op
 export function resolveOpenAITtsInstructions(
  model: string,
  instructions?: string,
+  baseUrl?: string,
 ): string | undefined {
  const next = instructions?.trim();
-  return next && model.includes("gpt-4o-mini-tts") ? next : undefined;
+  if (!next) {
+    return undefined;
+  }
+  if (baseUrl !== undefined && isCustomOpenAIEndpoint(baseUrl)) {
+    return next;
+  }
+  return model.includes("gpt-4o-mini-tts") ? next : undefined;
 }

 export async function openaiTTS(params: {
@@ -81,7 +88,7 @@ export async function openaiTTS(params: {
 }): Promise<Buffer> {
  const { text, apiKey, baseUrl, model, voice, speed, instructions, responseFormat, timeoutMs } =
    params;
-  const effectiveInstructions = resolveOpenAITtsInstructions(model, instructions);
+  const effectiveInstructions = resolveOpenAITtsInstructions(model, instructions, baseUrl);

  if (!isValidOpenAIModel(model, baseUrl)) {
    throw new Error(`Invalid model: ${model}`);
--- a/src/auto-reply/status.test.ts
+++ b/src/auto-reply/status.test.ts
@@ -103,6 +103,39 @@ describe("buildStatusMessage", () => {
    expect(normalized).toContain("Queue: collect");
  });

+  it("shows sanitized TTS provider details in the voice status line", async () => {
+    await withTempHome(async () => {
+      const text = buildStatusMessage({
+        config: {
+          messages: {
+            tts: {
+              auto: "always",
+              provider: "openai",
+              providers: {
+                openai: {
+                  displayName: "NeuTTS local",
+                  baseUrl: "http://user:secret@127.0.0.1:18801/v1?token=hidden#fragment",
+                  model: "neutts-nano",
+                  voice: "clara",
+                },
+              },
+            },
+          },
+        } as unknown as OpenClawConfig,
+        agent: {},
+        now: 0,
+      });
+      const normalized = normalizeTestText(text);
+
+      expect(normalized).toContain(
+        "Voice: always · provider=openai · name=NeuTTS local · model=neutts-nano · voice=clara · endpoint=custom(http://127.0.0.1:18801/v1)",
+      );
+      expect(normalized).not.toContain("secret");
+      expect(normalized).not.toContain("token=hidden");
+      expect(normalized).not.toContain("fragment");
+    });
+  });
+
  it("shows the model runtime for CLI-backed providers", () => {
    const text = buildStatusMessage({
      config: {
--- a/src/status/status-message.ts
+++ b/src/status/status-message.ts
@@ -464,7 +464,25 @@ const formatVoiceModeLine = (
  if (!snapshot) {
    return null;
  }
-  return `🔊 Voice: ${snapshot.autoMode} · provider=${snapshot.provider} · limit=${snapshot.maxLength} · summary=${snapshot.summarize ? "on" : "off"}`;
+  const parts = [`🔊 Voice: ${snapshot.autoMode}`, `provider=${snapshot.provider}`];
+  if (snapshot.displayName) {
+    parts.push(`name=${snapshot.displayName}`);
+  }
+  if (snapshot.model) {
+    parts.push(`model=${snapshot.model}`);
+  }
+  if (snapshot.voice) {
+    parts.push(`voice=${snapshot.voice}`);
+  }
+  if (snapshot.baseUrl) {
+    parts.push(
+      snapshot.customBaseUrl
+        ? `endpoint=custom(${snapshot.baseUrl})`
+        : `endpoint=${snapshot.baseUrl}`,
+    );
+  }
+  parts.push(`limit=${snapshot.maxLength}`, `summary=${snapshot.summarize ? "on" : "off"}`);
+  return parts.join(" · ");
 };

 export function buildStatusMessage(args: StatusArgs): string {
--- a/src/tts/status-config.test.ts
+++ b/src/tts/status-config.test.ts
@@ -138,6 +138,162 @@ describe("resolveStatusTtsSnapshot", () => {
    });
  });

+  it("reports configured OpenAI TTS model, voice, and sanitized custom endpoint", async () => {
+    await withStatusTempHome(async () => {
+      expect(
+        resolveStatusTtsSnapshot({
+          cfg: {
+            messages: {
+              tts: {
+                auto: "always",
+                provider: "openai",
+                providers: {
+                  openai: {
+                    displayName: "NeuTTS local",
+                    baseUrl: "http://user:secret@127.0.0.1:18801/v1?token=hidden#fragment",
+                    model: "neutts-nano",
+                    voice: "clara",
+                  },
+                },
+              },
+            },
+          } as OpenClawConfig,
+        }),
+      ).toEqual({
+        autoMode: "always",
+        provider: "openai",
+        displayName: "NeuTTS local",
+        model: "neutts-nano",
+        voice: "clara",
+        baseUrl: "http://127.0.0.1:18801/v1",
+        customBaseUrl: true,
+        maxLength: 1500,
+        summarize: true,
+      });
+    });
+  });
+
+  it("omits default OpenAI endpoint details from status", async () => {
+    await withStatusTempHome(async () => {
+      expect(
+        resolveStatusTtsSnapshot({
+          cfg: {
+            messages: {
+              tts: {
+                auto: "always",
+                provider: "openai",
+                providers: {
+                  openai: {
+                    baseUrl: "https://api.openai.com/v1/",
+                    model: "gpt-4o-mini-tts",
+                    voice: "coral",
+                  },
+                },
+              },
+            },
+          } as OpenClawConfig,
+        }),
+      ).toEqual({
+        autoMode: "always",
+        provider: "openai",
+        model: "gpt-4o-mini-tts",
+        voice: "coral",
+        maxLength: 1500,
+        summarize: true,
+      });
+    });
+  });
+
+  it("reports merged per-agent provider metadata", async () => {
+    await withStatusTempHome(async () => {
+      expect(
+        resolveStatusTtsSnapshot({
+          cfg: {
+            messages: {
+              tts: {
+                auto: "off",
+                provider: "openai",
+                providers: {
+                  openai: {
+                    model: "gpt-4o-mini-tts",
+                    voice: "coral",
+                  },
+                },
+              },
+            },
+            agents: {
+              list: [
+                {
+                  id: "reader",
+                  tts: {
+                    auto: "always",
+                    providers: {
+                      openai: {
+                        voice: "nova",
+                      },
+                    },
+                  },
+                },
+              ],
+            },
+          } as OpenClawConfig,
+          agentId: "reader",
+        }),
+      ).toEqual({
+        autoMode: "always",
+        provider: "openai",
+        model: "gpt-4o-mini-tts",
+        voice: "nova",
+        maxLength: 1500,
+        summarize: true,
+      });
+    });
+  });
+
+  it("uses provider metadata for local provider prefs overrides", async () => {
+    await withStatusTempHome(async (home) => {
+      const prefsPath = path.join(home, ".openclaw", "settings", "tts.json");
+      fs.mkdirSync(path.dirname(prefsPath), { recursive: true });
+      fs.writeFileSync(
+        prefsPath,
+        JSON.stringify({
+          tts: {
+            auto: "always",
+            provider: "edge",
+          },
+        }),
+      );
+
+      expect(
+        resolveStatusTtsSnapshot({
+          cfg: {
+            messages: {
+              tts: {
+                provider: "openai",
+                prefsPath,
+                providers: {
+                  microsoft: {
+                    voice: "en-US-AvaMultilingualNeural",
+                  },
+                  openai: {
+                    model: "gpt-4o-mini-tts",
+                    voice: "coral",
+                  },
+                },
+              },
+            },
+          } as OpenClawConfig,
+        }),
+      ).toEqual({
+        autoMode: "always",
+        provider: "microsoft",
+        voice: "en-US-AvaMultilingualNeural",
+        maxLength: 1500,
+        summarize: true,
+      });
+    });
+  });
+
  it("derives the default prefs path from OPENCLAW_CONFIG_PATH when set", async () => {
    await withStatusTempHome(async (home) => {
      const stateDir = path.join(home, ".openclaw-dev");
--- a/src/tts/status-config.ts
+++ b/src/tts/status-config.ts
@@ -12,6 +12,8 @@ import { resolveEffectiveTtsConfig } from "./tts-config.js";

 const DEFAULT_TTS_MAX_LENGTH = 1500;
 const DEFAULT_TTS_SUMMARIZE = true;
+const DEFAULT_OPENAI_TTS_BASE_URL = "https://api.openai.com/v1";
+const MAX_STATUS_DETAIL_LENGTH = 96;

 type TtsUserPrefs = {
  tts?: {
@@ -26,6 +28,11 @@ type TtsUserPrefs = {
 type TtsStatusSnapshot = {
  autoMode: TtsAutoMode;
  provider: TtsProvider;
+  displayName?: string;
+  model?: string;
+  voice?: string;
+  baseUrl?: string;
+  customBaseUrl?: boolean;
  maxLength: number;
  summarize: boolean;
 };
@@ -78,6 +85,116 @@ function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefin
  return undefined;
 }

+function isObjectRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function normalizeStatusDetail(
+  value: unknown,
+  maxLength = MAX_STATUS_DETAIL_LENGTH,
+): string | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const normalized = value.trim().replace(/\s+/g, " ");
+  if (!normalized) {
+    return undefined;
+  }
+  return normalized.length > maxLength ? `${normalized.slice(0, maxLength - 3)}...` : normalized;
+}
+
+function sanitizeBaseUrlForStatus(value: unknown): string | undefined {
+  const raw = normalizeStatusDetail(value, 180);
+  if (!raw) {
+    return undefined;
+  }
+  try {
+    const parsed = new URL(raw);
+    parsed.username = "";
+    parsed.password = "";
+    parsed.search = "";
+    parsed.hash = "";
+    const sanitized = parsed.toString().replace(/\/+$/, "");
+    return normalizeStatusDetail(sanitized, 120);
+  } catch {
+    return "[invalid-url]";
+  }
+}
+
+function isCustomOpenAiTtsBaseUrl(baseUrl: string | undefined): boolean {
+  return baseUrl ? baseUrl.replace(/\/+$/, "") !== DEFAULT_OPENAI_TTS_BASE_URL : false;
+}
+
+function firstStatusDetail(
+  record: Record<string, unknown> | undefined,
+  keys: string[],
+): string | undefined {
+  if (!record) {
+    return undefined;
+  }
+  for (const key of keys) {
+    const value = normalizeStatusDetail(record[key]);
+    if (value) {
+      return value;
+    }
+  }
+  return undefined;
+}
+
+function resolveProviderConfigRecord(
+  raw: TtsConfig,
+  provider: TtsProvider,
+): Record<string, unknown> | undefined {
+  const rawRecord: Record<string, unknown> = isObjectRecord(raw)
+    ? (raw as Record<string, unknown>)
+    : {};
+  const providers: Record<string, unknown> = isObjectRecord(raw.providers) ? raw.providers : {};
+  if (provider === "microsoft") {
+    return {
+      ...(isObjectRecord(rawRecord.edge) ? rawRecord.edge : {}),
+      ...(isObjectRecord(rawRecord.microsoft) ? rawRecord.microsoft : {}),
+      ...(isObjectRecord(providers.edge) ? providers.edge : {}),
+      ...(isObjectRecord(providers.microsoft) ? providers.microsoft : {}),
+    };
+  }
+  const direct = rawRecord[provider];
+  const providerScoped = providers[provider];
+  if (isObjectRecord(providerScoped)) {
+    return providerScoped;
+  }
+  if (isObjectRecord(direct)) {
+    return direct;
+  }
+  return rawRecord;
+}
+
+function resolveStatusProviderDetails(raw: TtsConfig, provider: TtsProvider) {
+  if (provider === "auto") {
+    return {};
+  }
+  const record = resolveProviderConfigRecord(raw, provider);
+  const sanitizedBaseUrl = sanitizeBaseUrlForStatus(record?.baseUrl);
+  const customBaseUrl = provider === "openai" && isCustomOpenAiTtsBaseUrl(sanitizedBaseUrl);
+  const details: Partial<TtsStatusSnapshot> = {};
+  const displayName = firstStatusDetail(record, ["displayName"]);
+  if (displayName) {
+    details.displayName = displayName;
+  }
+  const model = firstStatusDetail(record, ["model", "modelId"]);
+  if (model) {
+    details.model = model;
+  }
+  const voice = firstStatusDetail(record, ["voice", "voiceId", "voiceName"]);
+  if (voice) {
+    details.voice = voice;
+  }
+  if (sanitizedBaseUrl && (provider !== "openai" || customBaseUrl)) {
+    details.baseUrl = sanitizedBaseUrl;
+    details.customBaseUrl = customBaseUrl;
+  }
+  return details;
+}
+
 export function resolveStatusTtsSnapshot(params: {
  cfg: OpenClawConfig;
  sessionAuto?: string;
@@ -95,12 +212,15 @@ export function resolveStatusTtsSnapshot(params: {
    return null;
  }

+  const provider =
+    normalizeConfiguredSpeechProviderId(prefs.tts?.provider) ??
+    normalizeConfiguredSpeechProviderId(raw.provider) ??
+    "auto";
+
  return {
    autoMode,
-    provider:
-      normalizeConfiguredSpeechProviderId(prefs.tts?.provider) ??
-      normalizeConfiguredSpeechProviderId(raw.provider) ??
-      "auto",
+    provider,
+    ...resolveStatusProviderDetails(raw, provider),
    maxLength: prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH,
    summarize: prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE,
  };