fix(tts): restore 3.28 schema compatibility and fallback observability (#57953)

* fix(tts): restore legacy config compatibility and fallback observability * fix(tts): surface fallback attempts in status and telephony * test(tts): cover /tts audio to /tts status fallback flow * docs(tts): align migration and fallback observability guidance * TTS: redact fallback logs and scope legacy plugin migration * Infra: dedupe UV_EXTRA_INDEX_URL in host env policy * Docs: scope doctor TTS migration to voice-call * voice-call: restore strict known TTS provider validation
2026-05-03 15:30:21 +00:00 · 2026-03-30 22:05:03 -05:00
parent 697dddbeb6
commit c918ab4faf
19 changed files with 838 additions and 154 deletions
--- a/extensions/speech-core/src/tts.ts
+++ b/extensions/speech-core/src/tts.ts
@@ -18,9 +18,10 @@ import type {
  TtsModelOverrideConfig,
  TtsProvider,
 } from "openclaw/plugin-sdk/config-runtime";
+import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
 import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
 import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
-import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
+import { isVerbose, logVerbose } from "openclaw/plugin-sdk/runtime-env";
 import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/sandbox";
 import { CONFIG_DIR, resolveUserPath, stripMarkdown } from "openclaw/plugin-sdk/text-runtime";
 import {
@@ -79,6 +80,8 @@ export type TtsResult = {
  error?: string;
  latencyMs?: number;
  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
  outputFormat?: string;
  voiceCompatible?: boolean;
 };
@@ -89,6 +92,8 @@ export type TtsSynthesisResult = {
  error?: string;
  latencyMs?: number;
  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
  outputFormat?: string;
  voiceCompatible?: boolean;
  fileExtension?: string;
@@ -100,6 +105,8 @@ export type TtsTelephonyResult = {
  error?: string;
  latencyMs?: number;
  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
  outputFormat?: string;
  sampleRate?: number;
 };
@@ -110,6 +117,8 @@ type TtsStatusEntry = {
  textLength: number;
  summarized: boolean;
  provider?: string;
+  fallbackFrom?: string;
+  attemptedProviders?: string[];
  latencyMs?: number;
  error?: string;
 };
@@ -536,13 +545,22 @@ function formatTtsProviderError(provider: TtsProvider, err: unknown): string {
  if (error.name === "AbortError") {
    return `${provider}: request timed out`;
  }
-  return `${provider}: ${error.message}`;
+  return `${provider}: ${redactSensitiveText(error.message)}`;
 }

-function buildTtsFailureResult(errors: string[]): { success: false; error: string } {
+function sanitizeTtsErrorForLog(err: unknown): string {
+  const raw = err instanceof Error ? err.message : String(err);
+  return redactSensitiveText(raw).replace(/\r/g, "\\r").replace(/\n/g, "\\n").replace(/\t/g, "\\t");
+}
+
+function buildTtsFailureResult(
+  errors: string[],
+  attemptedProviders?: string[],
+): { success: false; error: string; attemptedProviders?: string[] } {
  return {
    success: false,
    error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
+    attemptedProviders,
  };
 }

@@ -621,7 +639,10 @@ export async function textToSpeech(params: {
 }): Promise<TtsResult> {
  const synthesis = await synthesizeSpeech(params);
  if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
-    return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]);
+    return buildTtsFailureResult(
+      [synthesis.error ?? "TTS conversion failed"],
+      synthesis.attemptedProviders,
+    );
  }

  const tempRoot = resolvePreferredOpenClawTmpDir();
@@ -636,6 +657,8 @@ export async function textToSpeech(params: {
    audioPath,
    latencyMs: synthesis.latencyMs,
    provider: synthesis.provider,
+    fallbackFrom: synthesis.fallbackFrom,
+    attemptedProviders: synthesis.attemptedProviders,
    outputFormat: synthesis.outputFormat,
    voiceCompatible: synthesis.voiceCompatible,
  };
@@ -665,8 +688,14 @@ export async function synthesizeSpeech(params: {
  const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";

  const errors: string[] = [];
+  const attemptedProviders: string[] = [];
+  const primaryProvider = providers[0];
+  logVerbose(
+    `TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
+  );

  for (const provider of providers) {
+    attemptedProviders.push(provider);
    const providerStart = Date.now();
    try {
      const resolvedProvider = resolveReadySpeechProvider({
@@ -676,6 +705,7 @@ export async function synthesizeSpeech(params: {
        errors,
      });
      if (!resolvedProvider) {
+        logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
        continue;
      }
      const synthesis = await resolvedProvider.synthesize({
@@ -691,16 +721,28 @@ export async function synthesizeSpeech(params: {
        audioBuffer: synthesis.audioBuffer,
        latencyMs: Date.now() - providerStart,
        provider,
+        fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
+        attemptedProviders,
        outputFormat: synthesis.outputFormat,
        voiceCompatible: synthesis.voiceCompatible,
        fileExtension: synthesis.fileExtension,
      };
    } catch (err) {
-      errors.push(formatTtsProviderError(provider, err));
+      const errorMsg = formatTtsProviderError(provider, err);
+      errors.push(errorMsg);
+      const rawError = sanitizeTtsErrorForLog(err);
+      if (provider === primaryProvider) {
+        const hasFallbacks = providers.length > 1;
+        logVerbose(
+          `TTS: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
+        );
+      } else {
+        logVerbose(`TTS: ${provider} failed (${rawError}); trying next provider.`);
+      }
    }
  }

-  return buildTtsFailureResult(errors);
+  return buildTtsFailureResult(errors, attemptedProviders);
 }

 export async function textToSpeechTelephony(params: {
@@ -719,8 +761,11 @@ export async function textToSpeechTelephony(params: {

  const { config, providers } = setup;
  const errors: string[] = [];
+  const attemptedProviders: string[] = [];
+  const primaryProvider = providers[0];

  for (const provider of providers) {
+    attemptedProviders.push(provider);
    const providerStart = Date.now();
    try {
      const resolvedProvider = resolveReadySpeechProvider({
@@ -745,6 +790,8 @@ export async function textToSpeechTelephony(params: {
        audioBuffer: synthesis.audioBuffer,
        latencyMs: Date.now() - providerStart,
        provider,
+        fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
+        attemptedProviders,
        outputFormat: synthesis.outputFormat,
        sampleRate: synthesis.sampleRate,
      };
@@ -753,7 +800,7 @@ export async function textToSpeechTelephony(params: {
    }
  }

-  return buildTtsFailureResult(errors);
+  return buildTtsFailureResult(errors, attemptedProviders);
 }

 export async function listSpeechVoices(params: {
@@ -816,6 +863,16 @@ export async function maybeApplyTtsToPayload(params: {
    logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
  }

+  if (isVerbose()) {
+    const effectiveProvider = directives.overrides?.provider
+      ? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
+        getTtsProvider(config, prefsPath))
+      : getTtsProvider(config, prefsPath);
+    logVerbose(
+      `TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
+    );
+  }
+
  const cleanedText = directives.cleanedText;
  const trimmedCleaned = cleanedText.trim();
  const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
@@ -910,6 +967,8 @@ export async function maybeApplyTtsToPayload(params: {
      textLength: text.length,
      summarized: wasSummarized,
      provider: result.provider,
+      fallbackFrom: result.fallbackFrom,
+      attemptedProviders: result.attemptedProviders,
      latencyMs: result.latencyMs,
    };

@@ -928,6 +987,7 @@ export async function maybeApplyTtsToPayload(params: {
    success: false,
    textLength: text.length,
    summarized: wasSummarized,
+    attemptedProviders: result.attemptedProviders,
    error: result.error,
  };

@@ -941,4 +1001,6 @@ export const _test = {
  resolveModelOverridePolicy,
  summarizeText,
  getResolvedSpeechProviderConfig,
+  formatTtsProviderError,
+  sanitizeTtsErrorForLog,
 };