fix(tts): restore 3.28 schema compatibility and fallback observability (#57953)

* fix(tts): restore legacy config compatibility and fallback observability

* fix(tts): surface fallback attempts in status and telephony

* test(tts): cover /tts audio to /tts status fallback flow

* docs(tts): align migration and fallback observability guidance

* TTS: redact fallback logs and scope legacy plugin migration

* Infra: dedupe UV_EXTRA_INDEX_URL in host env policy

* Docs: scope doctor TTS migration to voice-call

* voice-call: restore strict known TTS provider validation
This commit is contained in:
Josh Avant
2026-03-30 22:05:03 -05:00
committed by GitHub
parent 697dddbeb6
commit c918ab4faf
19 changed files with 838 additions and 154 deletions

View File

@@ -18,9 +18,10 @@ import type {
TtsModelOverrideConfig,
TtsProvider,
} from "openclaw/plugin-sdk/config-runtime";
import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { isVerbose, logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/sandbox";
import { CONFIG_DIR, resolveUserPath, stripMarkdown } from "openclaw/plugin-sdk/text-runtime";
import {
@@ -79,6 +80,8 @@ export type TtsResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
voiceCompatible?: boolean;
};
@@ -89,6 +92,8 @@ export type TtsSynthesisResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
voiceCompatible?: boolean;
fileExtension?: string;
@@ -100,6 +105,8 @@ export type TtsTelephonyResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
sampleRate?: number;
};
@@ -110,6 +117,8 @@ type TtsStatusEntry = {
textLength: number;
summarized: boolean;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
latencyMs?: number;
error?: string;
};
@@ -536,13 +545,22 @@ function formatTtsProviderError(provider: TtsProvider, err: unknown): string {
if (error.name === "AbortError") {
return `${provider}: request timed out`;
}
return `${provider}: ${error.message}`;
return `${provider}: ${redactSensitiveText(error.message)}`;
}
function buildTtsFailureResult(errors: string[]): { success: false; error: string } {
function sanitizeTtsErrorForLog(err: unknown): string {
const raw = err instanceof Error ? err.message : String(err);
return redactSensitiveText(raw).replace(/\r/g, "\\r").replace(/\n/g, "\\n").replace(/\t/g, "\\t");
}
function buildTtsFailureResult(
errors: string[],
attemptedProviders?: string[],
): { success: false; error: string; attemptedProviders?: string[] } {
return {
success: false,
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
attemptedProviders,
};
}
@@ -621,7 +639,10 @@ export async function textToSpeech(params: {
}): Promise<TtsResult> {
const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]);
return buildTtsFailureResult(
[synthesis.error ?? "TTS conversion failed"],
synthesis.attemptedProviders,
);
}
const tempRoot = resolvePreferredOpenClawTmpDir();
@@ -636,6 +657,8 @@ export async function textToSpeech(params: {
audioPath,
latencyMs: synthesis.latencyMs,
provider: synthesis.provider,
fallbackFrom: synthesis.fallbackFrom,
attemptedProviders: synthesis.attemptedProviders,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
};
@@ -665,8 +688,14 @@ export async function synthesizeSpeech(params: {
const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
const errors: string[] = [];
const attemptedProviders: string[] = [];
const primaryProvider = providers[0];
logVerbose(
`TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
);
for (const provider of providers) {
attemptedProviders.push(provider);
const providerStart = Date.now();
try {
const resolvedProvider = resolveReadySpeechProvider({
@@ -676,6 +705,7 @@ export async function synthesizeSpeech(params: {
errors,
});
if (!resolvedProvider) {
logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
continue;
}
const synthesis = await resolvedProvider.synthesize({
@@ -691,16 +721,28 @@ export async function synthesizeSpeech(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
fileExtension: synthesis.fileExtension,
};
} catch (err) {
errors.push(formatTtsProviderError(provider, err));
const errorMsg = formatTtsProviderError(provider, err);
errors.push(errorMsg);
const rawError = sanitizeTtsErrorForLog(err);
if (provider === primaryProvider) {
const hasFallbacks = providers.length > 1;
logVerbose(
`TTS: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
);
} else {
logVerbose(`TTS: ${provider} failed (${rawError}); trying next provider.`);
}
}
}
return buildTtsFailureResult(errors);
return buildTtsFailureResult(errors, attemptedProviders);
}
export async function textToSpeechTelephony(params: {
@@ -719,8 +761,11 @@ export async function textToSpeechTelephony(params: {
const { config, providers } = setup;
const errors: string[] = [];
const attemptedProviders: string[] = [];
const primaryProvider = providers[0];
for (const provider of providers) {
attemptedProviders.push(provider);
const providerStart = Date.now();
try {
const resolvedProvider = resolveReadySpeechProvider({
@@ -745,6 +790,8 @@ export async function textToSpeechTelephony(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
outputFormat: synthesis.outputFormat,
sampleRate: synthesis.sampleRate,
};
@@ -753,7 +800,7 @@ export async function textToSpeechTelephony(params: {
}
}
return buildTtsFailureResult(errors);
return buildTtsFailureResult(errors, attemptedProviders);
}
export async function listSpeechVoices(params: {
@@ -816,6 +863,16 @@ export async function maybeApplyTtsToPayload(params: {
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
}
if (isVerbose()) {
const effectiveProvider = directives.overrides?.provider
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
getTtsProvider(config, prefsPath))
: getTtsProvider(config, prefsPath);
logVerbose(
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
);
}
const cleanedText = directives.cleanedText;
const trimmedCleaned = cleanedText.trim();
const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
@@ -910,6 +967,8 @@ export async function maybeApplyTtsToPayload(params: {
textLength: text.length,
summarized: wasSummarized,
provider: result.provider,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
latencyMs: result.latencyMs,
};
@@ -928,6 +987,7 @@ export async function maybeApplyTtsToPayload(params: {
success: false,
textLength: text.length,
summarized: wasSummarized,
attemptedProviders: result.attemptedProviders,
error: result.error,
};
@@ -941,4 +1001,6 @@ export const _test = {
resolveModelOverridePolicy,
summarizeText,
getResolvedSpeechProviderConfig,
formatTtsProviderError,
sanitizeTtsErrorForLog,
};