fix(tts): restore 3.28 schema compatibility and fallback observability (#57953)

* fix(tts): restore legacy config compatibility and fallback observability

* fix(tts): surface fallback attempts in status and telephony

* test(tts): cover /tts audio to /tts status fallback flow

* docs(tts): align migration and fallback observability guidance

* TTS: redact fallback logs and scope legacy plugin migration

* Infra: dedupe UV_EXTRA_INDEX_URL in host env policy

* Docs: scope doctor TTS migration to voice-call

* voice-call: restore strict known TTS provider validation
This commit is contained in:
Josh Avant
2026-03-30 22:05:03 -05:00
committed by GitHub
parent 697dddbeb6
commit c918ab4faf
19 changed files with 838 additions and 154 deletions

View File

@@ -89,7 +89,6 @@ enum HostEnvSecurityPolicy {
"UV_INDEX_URL",
"UV_EXTRA_INDEX_URL",
"UV_DEFAULT_INDEX",
"UV_EXTRA_INDEX_URL",
"LUA_PATH",
"LUA_CPATH",
"GEM_HOME",

View File

@@ -122,6 +122,10 @@ Current migrations:
- `routing.agents`/`routing.defaultAgentId``agents.list` + `agents.list[].default`
- `routing.agentToAgent``tools.agentToAgent`
- `routing.transcribeAudio``tools.media.audio.models`
- `messages.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `messages.tts.providers.<provider>`
- `channels.discord.voice.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.voice.tts.providers.<provider>`
- `channels.discord.accounts.<id>.voice.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.accounts.<id>.voice.tts.providers.<provider>`
- `plugins.entries.voice-call.config.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `plugins.entries.voice-call.config.tts.providers.<provider>`
- `bindings[].match.accountID``bindings[].match.accountId`
- For channels with named `accounts` but missing `accounts.default`, move account-scoped top-level single-account channel values into `channels.<channel>.accounts.default` when present
- `identity``agents.list[].identity`

View File

@@ -219,9 +219,11 @@ streaming speech on calls. You can override it under the plugin config with the
{
tts: {
provider: "elevenlabs",
elevenlabs: {
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
providers: {
elevenlabs: {
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
},
},
},
}
@@ -229,9 +231,11 @@ streaming speech on calls. You can override it under the plugin config with the
Notes:
- Legacy `tts.<provider>` keys inside plugin config (`openai`, `elevenlabs`, `microsoft`, `edge`) are auto-migrated to `tts.providers.<provider>` on load. Prefer the `providers` shape in committed config.
- **Microsoft speech is ignored for voice calls** (telephony audio needs PCM; the current Microsoft transport does not expose telephony PCM output).
- Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
- If a Twilio media stream is already active, Voice Call does not fall back to TwiML `<Say>`. If telephony TTS is unavailable in that state, the playback request fails instead of mixing two playback paths.
- When telephony TTS falls back to a secondary provider, Voice Call logs a warning with the provider chain (`from`, `to`, `attempts`) for debugging.
### More examples
@@ -242,7 +246,9 @@ Use core TTS only (no override):
messages: {
tts: {
provider: "openai",
openai: { voice: "alloy" },
providers: {
openai: { voice: "alloy" },
},
},
},
}
@@ -258,10 +264,12 @@ Override to ElevenLabs just for calls (keep core default elsewhere):
config: {
tts: {
provider: "elevenlabs",
elevenlabs: {
apiKey: "elevenlabs_key",
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
providers: {
elevenlabs: {
apiKey: "elevenlabs_key",
voiceId: "pMsXgVXv3BLzUgSXRplE",
modelId: "eleven_multilingual_v2",
},
},
},
},
@@ -280,9 +288,11 @@ Override only the OpenAI model for calls (deepmerge example):
"voice-call": {
config: {
tts: {
openai: {
model: "gpt-4o-mini-tts",
voice: "marin",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "marin",
},
},
},
},

View File

@@ -219,6 +219,7 @@ Then run:
- `modelOverrides`: allow the model to emit TTS directives (on by default).
- `allowProvider` defaults to `false` (provider switching is opt-in).
- `providers.<id>`: provider-owned settings keyed by speech provider id.
- Legacy direct provider blocks (`messages.tts.openai`, `messages.tts.elevenlabs`, `messages.tts.microsoft`, `messages.tts.edge`) are auto-migrated to `messages.tts.providers.<id>` on load.
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
- `timeoutMs`: request timeout (ms).
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
@@ -391,6 +392,9 @@ Notes:
- `off|always|inbound|tagged` are persession toggles (`/tts on` is an alias for `/tts always`).
- `limit` and `summary` are stored in local prefs, not the main config.
- `/tts audio` generates a one-off audio reply (does not toggle TTS on).
- `/tts status` includes fallback visibility for the latest attempt:
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
- failure: `Error: ...` plus `Attempts: ...`
## Agent tool

View File

@@ -219,6 +219,7 @@ Then run:
- `modelOverrides`: allow the model to emit TTS directives (on by default).
- `allowProvider` defaults to `false` (provider switching is opt-in).
- `providers.<id>`: provider-owned settings keyed by speech provider id.
- Legacy direct provider blocks (`messages.tts.openai`, `messages.tts.elevenlabs`, `messages.tts.microsoft`, `messages.tts.edge`) are auto-migrated to `messages.tts.providers.<id>` on load.
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
- `timeoutMs`: request timeout (ms).
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
@@ -391,6 +392,9 @@ Notes:
- `off|always|inbound|tagged` are persession toggles (`/tts on` is an alias for `/tts always`).
- `limit` and `summary` are stored in local prefs, not the main config.
- `/tts audio` generates a one-off audio reply (does not toggle TTS on).
- `/tts status` includes fallback visibility for the latest attempt:
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
- failure: `Error: ...` plus `Attempts: ...`
## Agent tool

View File

@@ -18,9 +18,10 @@ import type {
TtsModelOverrideConfig,
TtsProvider,
} from "openclaw/plugin-sdk/config-runtime";
import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { isVerbose, logVerbose } from "openclaw/plugin-sdk/runtime-env";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/sandbox";
import { CONFIG_DIR, resolveUserPath, stripMarkdown } from "openclaw/plugin-sdk/text-runtime";
import {
@@ -79,6 +80,8 @@ export type TtsResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
voiceCompatible?: boolean;
};
@@ -89,6 +92,8 @@ export type TtsSynthesisResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
voiceCompatible?: boolean;
fileExtension?: string;
@@ -100,6 +105,8 @@ export type TtsTelephonyResult = {
error?: string;
latencyMs?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
outputFormat?: string;
sampleRate?: number;
};
@@ -110,6 +117,8 @@ type TtsStatusEntry = {
textLength: number;
summarized: boolean;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
latencyMs?: number;
error?: string;
};
@@ -536,13 +545,22 @@ function formatTtsProviderError(provider: TtsProvider, err: unknown): string {
if (error.name === "AbortError") {
return `${provider}: request timed out`;
}
return `${provider}: ${error.message}`;
return `${provider}: ${redactSensitiveText(error.message)}`;
}
function buildTtsFailureResult(errors: string[]): { success: false; error: string } {
function sanitizeTtsErrorForLog(err: unknown): string {
const raw = err instanceof Error ? err.message : String(err);
return redactSensitiveText(raw).replace(/\r/g, "\\r").replace(/\n/g, "\\n").replace(/\t/g, "\\t");
}
function buildTtsFailureResult(
errors: string[],
attemptedProviders?: string[],
): { success: false; error: string; attemptedProviders?: string[] } {
return {
success: false,
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
attemptedProviders,
};
}
@@ -621,7 +639,10 @@ export async function textToSpeech(params: {
}): Promise<TtsResult> {
const synthesis = await synthesizeSpeech(params);
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]);
return buildTtsFailureResult(
[synthesis.error ?? "TTS conversion failed"],
synthesis.attemptedProviders,
);
}
const tempRoot = resolvePreferredOpenClawTmpDir();
@@ -636,6 +657,8 @@ export async function textToSpeech(params: {
audioPath,
latencyMs: synthesis.latencyMs,
provider: synthesis.provider,
fallbackFrom: synthesis.fallbackFrom,
attemptedProviders: synthesis.attemptedProviders,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
};
@@ -665,8 +688,14 @@ export async function synthesizeSpeech(params: {
const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
const errors: string[] = [];
const attemptedProviders: string[] = [];
const primaryProvider = providers[0];
logVerbose(
`TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
);
for (const provider of providers) {
attemptedProviders.push(provider);
const providerStart = Date.now();
try {
const resolvedProvider = resolveReadySpeechProvider({
@@ -676,6 +705,7 @@ export async function synthesizeSpeech(params: {
errors,
});
if (!resolvedProvider) {
logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
continue;
}
const synthesis = await resolvedProvider.synthesize({
@@ -691,16 +721,28 @@ export async function synthesizeSpeech(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
outputFormat: synthesis.outputFormat,
voiceCompatible: synthesis.voiceCompatible,
fileExtension: synthesis.fileExtension,
};
} catch (err) {
errors.push(formatTtsProviderError(provider, err));
const errorMsg = formatTtsProviderError(provider, err);
errors.push(errorMsg);
const rawError = sanitizeTtsErrorForLog(err);
if (provider === primaryProvider) {
const hasFallbacks = providers.length > 1;
logVerbose(
`TTS: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
);
} else {
logVerbose(`TTS: ${provider} failed (${rawError}); trying next provider.`);
}
}
}
return buildTtsFailureResult(errors);
return buildTtsFailureResult(errors, attemptedProviders);
}
export async function textToSpeechTelephony(params: {
@@ -719,8 +761,11 @@ export async function textToSpeechTelephony(params: {
const { config, providers } = setup;
const errors: string[] = [];
const attemptedProviders: string[] = [];
const primaryProvider = providers[0];
for (const provider of providers) {
attemptedProviders.push(provider);
const providerStart = Date.now();
try {
const resolvedProvider = resolveReadySpeechProvider({
@@ -745,6 +790,8 @@ export async function textToSpeechTelephony(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs: Date.now() - providerStart,
provider,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
outputFormat: synthesis.outputFormat,
sampleRate: synthesis.sampleRate,
};
@@ -753,7 +800,7 @@ export async function textToSpeechTelephony(params: {
}
}
return buildTtsFailureResult(errors);
return buildTtsFailureResult(errors, attemptedProviders);
}
export async function listSpeechVoices(params: {
@@ -816,6 +863,16 @@ export async function maybeApplyTtsToPayload(params: {
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
}
if (isVerbose()) {
const effectiveProvider = directives.overrides?.provider
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
getTtsProvider(config, prefsPath))
: getTtsProvider(config, prefsPath);
logVerbose(
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
);
}
const cleanedText = directives.cleanedText;
const trimmedCleaned = cleanedText.trim();
const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
@@ -910,6 +967,8 @@ export async function maybeApplyTtsToPayload(params: {
textLength: text.length,
summarized: wasSummarized,
provider: result.provider,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
latencyMs: result.latencyMs,
};
@@ -928,6 +987,7 @@ export async function maybeApplyTtsToPayload(params: {
success: false,
textLength: text.length,
summarized: wasSummarized,
attemptedProviders: result.attemptedProviders,
error: result.error,
};
@@ -941,4 +1001,6 @@ export const _test = {
resolveModelOverridePolicy,
summarizeText,
getResolvedSpeechProviderConfig,
formatTtsProviderError,
sanitizeTtsErrorForLog,
};

View File

@@ -84,21 +84,21 @@ const voiceCallConfigSchema = {
help: "Deep-merges with messages.tts (Microsoft is ignored for calls).",
advanced: true,
},
"tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
"tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
"tts.openai.apiKey": {
"tts.providers.openai.model": { label: "OpenAI TTS Model", advanced: true },
"tts.providers.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
"tts.providers.openai.apiKey": {
label: "OpenAI API Key",
sensitive: true,
advanced: true,
},
"tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
"tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
"tts.elevenlabs.apiKey": {
"tts.providers.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
"tts.providers.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
"tts.providers.elevenlabs.apiKey": {
label: "ElevenLabs API Key",
sensitive: true,
advanced: true,
},
"tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
"tts.providers.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
publicUrl: { label: "Public Webhook URL", advanced: true },
skipSignatureVerification: {
label: "Skip Signature Verification",

View File

@@ -104,33 +104,33 @@
"help": "Deep-merges with messages.tts (Microsoft is ignored for calls).",
"advanced": true
},
"tts.openai.model": {
"tts.providers.openai.model": {
"label": "OpenAI TTS Model",
"advanced": true
},
"tts.openai.voice": {
"tts.providers.openai.voice": {
"label": "OpenAI TTS Voice",
"advanced": true
},
"tts.openai.apiKey": {
"tts.providers.openai.apiKey": {
"label": "OpenAI API Key",
"sensitive": true,
"advanced": true
},
"tts.elevenlabs.modelId": {
"tts.providers.elevenlabs.modelId": {
"label": "ElevenLabs Model ID",
"advanced": true
},
"tts.elevenlabs.voiceId": {
"tts.providers.elevenlabs.voiceId": {
"label": "ElevenLabs Voice ID",
"advanced": true
},
"tts.elevenlabs.apiKey": {
"tts.providers.elevenlabs.apiKey": {
"label": "ElevenLabs API Key",
"sensitive": true,
"advanced": true
},
"tts.elevenlabs.baseUrl": {
"tts.providers.elevenlabs.baseUrl": {
"label": "ElevenLabs Base URL",
"advanced": true
},
@@ -455,127 +455,179 @@
}
}
},
"elevenlabs": {
"providers": {
"type": "object",
"additionalProperties": false,
"properties": {
"apiKey": {
"type": "string"
},
"baseUrl": {
"type": "string"
},
"voiceId": {
"type": "string"
},
"modelId": {
"type": "string"
},
"seed": {
"type": "integer",
"minimum": 0,
"maximum": 4294967295
},
"applyTextNormalization": {
"type": "string",
"enum": ["auto", "on", "off"]
},
"languageCode": {
"type": "string"
},
"voiceSettings": {
"openai": {
"type": "object",
"additionalProperties": false,
"properties": {
"stability": {
"type": "number",
"minimum": 0,
"maximum": 1
"apiKey": {
"type": "string"
},
"similarityBoost": {
"type": "number",
"minimum": 0,
"maximum": 1
"baseUrl": {
"type": "string"
},
"style": {
"type": "number",
"minimum": 0,
"maximum": 1
"model": {
"type": "string"
},
"useSpeakerBoost": {
"type": "boolean"
"voice": {
"type": "string"
},
"speed": {
"type": "number",
"minimum": 0.5,
"maximum": 2
"minimum": 0.25,
"maximum": 4.0
},
"instructions": {
"type": "string"
}
}
},
"elevenlabs": {
"type": "object",
"additionalProperties": false,
"properties": {
"apiKey": {
"type": "string"
},
"baseUrl": {
"type": "string"
},
"voiceId": {
"type": "string"
},
"modelId": {
"type": "string"
},
"seed": {
"type": "integer",
"minimum": 0,
"maximum": 4294967295
},
"applyTextNormalization": {
"type": "string",
"enum": ["auto", "on", "off"]
},
"languageCode": {
"type": "string"
},
"voiceSettings": {
"type": "object",
"additionalProperties": false,
"properties": {
"stability": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"similarityBoost": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"style": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"useSpeakerBoost": {
"type": "boolean"
},
"speed": {
"type": "number",
"minimum": 0.5,
"maximum": 2
}
}
}
}
},
"microsoft": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean"
},
"voice": {
"type": "string"
},
"lang": {
"type": "string"
},
"outputFormat": {
"type": "string"
},
"pitch": {
"type": "string"
},
"rate": {
"type": "string"
},
"volume": {
"type": "string"
},
"saveSubtitles": {
"type": "boolean"
},
"proxy": {
"type": "string"
},
"timeoutMs": {
"type": "integer",
"minimum": 1000,
"maximum": 120000
}
}
},
"edge": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean"
},
"voice": {
"type": "string"
},
"lang": {
"type": "string"
},
"outputFormat": {
"type": "string"
},
"pitch": {
"type": "string"
},
"rate": {
"type": "string"
},
"volume": {
"type": "string"
},
"saveSubtitles": {
"type": "boolean"
},
"proxy": {
"type": "string"
},
"timeoutMs": {
"type": "integer",
"minimum": 1000,
"maximum": 120000
}
}
}
}
},
"openai": {
"type": "object",
"additionalProperties": false,
"properties": {
"apiKey": {
"type": "string"
},
"additionalProperties": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
}
},
"baseUrl": {
"type": "string"
},
"model": {
"type": "string"
},
"voice": {
"type": "string"
},
"speed": {
"type": "number",
"minimum": 0.25,
"maximum": 4.0
},
"instructions": {
"type": "string"
}
}
},
"edge": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean"
},
"voice": {
"type": "string"
},
"lang": {
"type": "string"
},
"outputFormat": {
"type": "string"
},
"pitch": {
"type": "string"
},
"rate": {
"type": "string"
},
"volume": {
"type": "string"
},
"saveSubtitles": {
"type": "boolean"
},
"proxy": {
"type": "string"
},
"timeoutMs": {
"type": "integer",
"minimum": 1000,
"maximum": 120000
}
"additionalProperties": true
}
},
"prefsPath": {

View File

@@ -221,6 +221,7 @@ export async function createVoiceCallRuntime(params: {
coreConfig,
ttsOverride: config.tts,
runtime: ttsRuntime,
logger: log,
});
twilioProvider.setTTSProvider(ttsProvider);
log.info("[voice-call] Telephony TTS provider configured");

View File

@@ -1,4 +1,4 @@
import { afterEach, describe, expect, it } from "vitest";
import { afterEach, describe, expect, it, vi } from "vitest";
import type { VoiceCallTtsConfig } from "./config.js";
import type { CoreConfig } from "./core-bridge.js";
import { createTelephonyTtsProvider } from "./telephony-tts.js";
@@ -93,4 +93,27 @@ describe("createTelephonyTtsProvider deepMerge hardening", () => {
expect(openai.polluted).toBeUndefined();
expect(openai.model).toBe("safe");
});
it("logs fallback metadata when telephony TTS uses a fallback provider", async () => {
const warn = vi.fn();
const provider = createTelephonyTtsProvider({
coreConfig: createCoreConfig(),
runtime: {
textToSpeechTelephony: async () => ({
success: true,
audioBuffer: Buffer.alloc(2),
sampleRate: 8000,
provider: "microsoft",
fallbackFrom: "elevenlabs",
attemptedProviders: ["elevenlabs", "microsoft"],
}),
},
logger: { warn },
});
await provider.synthesizeForTelephony("hello");
expect(warn).toHaveBeenCalledWith(
"[voice-call] Telephony TTS fallback used from=elevenlabs to=microsoft attempts=elevenlabs -> microsoft",
);
});
});

View File

@@ -13,6 +13,8 @@ export type TelephonyTtsRuntime = {
audioBuffer?: Buffer;
sampleRate?: number;
provider?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
error?: string;
}>;
};
@@ -25,8 +27,11 @@ export function createTelephonyTtsProvider(params: {
coreConfig: CoreConfig;
ttsOverride?: VoiceCallTtsConfig;
runtime: TelephonyTtsRuntime;
logger?: {
warn?: (message: string) => void;
};
}): TelephonyTtsProvider {
const { coreConfig, ttsOverride, runtime } = params;
const { coreConfig, ttsOverride, runtime, logger } = params;
const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
return {
@@ -40,6 +45,16 @@ export function createTelephonyTtsProvider(params: {
throw new Error(result.error ?? "TTS conversion failed");
}
if (result.fallbackFrom && result.provider && result.fallbackFrom !== result.provider) {
const attemptedChain =
result.attemptedProviders && result.attemptedProviders.length > 0
? result.attemptedProviders.join(" -> ")
: `${result.fallbackFrom} -> ${result.provider}`;
logger?.warn?.(
`[voice-call] Telephony TTS fallback used from=${result.fallbackFrom} to=${result.provider} attempts=${attemptedChain}`,
);
}
return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
},
};

View File

@@ -0,0 +1,120 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const ttsMocks = vi.hoisted(() => ({
getResolvedSpeechProviderConfig: vi.fn(),
getLastTtsAttempt: vi.fn(),
getTtsMaxLength: vi.fn(),
getTtsProvider: vi.fn(),
isSummarizationEnabled: vi.fn(),
isTtsEnabled: vi.fn(),
isTtsProviderConfigured: vi.fn(),
resolveTtsConfig: vi.fn(),
resolveTtsPrefsPath: vi.fn(),
setLastTtsAttempt: vi.fn(),
setSummarizationEnabled: vi.fn(),
setTtsEnabled: vi.fn(),
setTtsMaxLength: vi.fn(),
setTtsProvider: vi.fn(),
textToSpeech: vi.fn(),
}));
vi.mock("../../globals.js", () => ({
logVerbose: vi.fn(),
}));
vi.mock("../../tts/provider-registry.js", () => ({
canonicalizeSpeechProviderId: vi.fn((provider: string) => provider),
getSpeechProvider: vi.fn(() => null),
listSpeechProviders: vi.fn(() => []),
}));
vi.mock("../../tts/tts.js", () => ttsMocks);
const { handleTtsCommands } = await import("./commands-tts.js");
function buildTtsParams(commandBodyNormalized: string): Parameters<typeof handleTtsCommands>[0] {
return {
cfg: {},
command: {
commandBodyNormalized,
isAuthorizedSender: true,
senderId: "owner",
channel: "telegram",
},
} as unknown as Parameters<typeof handleTtsCommands>[0];
}
describe("handleTtsCommands status fallback reporting", () => {
beforeEach(() => {
ttsMocks.resolveTtsConfig.mockReturnValue({});
ttsMocks.resolveTtsPrefsPath.mockReturnValue("/tmp/tts-prefs.json");
ttsMocks.isTtsEnabled.mockReturnValue(true);
ttsMocks.getTtsProvider.mockReturnValue("elevenlabs");
ttsMocks.isTtsProviderConfigured.mockReturnValue(true);
ttsMocks.getTtsMaxLength.mockReturnValue(1500);
ttsMocks.isSummarizationEnabled.mockReturnValue(true);
ttsMocks.getLastTtsAttempt.mockReturnValue(undefined);
});
it("shows fallback provider details for successful attempts", async () => {
ttsMocks.getLastTtsAttempt.mockReturnValue({
timestamp: Date.now() - 1_000,
success: true,
textLength: 128,
summarized: false,
provider: "microsoft",
fallbackFrom: "elevenlabs",
attemptedProviders: ["elevenlabs", "microsoft"],
latencyMs: 420,
});
const result = await handleTtsCommands(buildTtsParams("/tts status"), true);
expect(result?.shouldContinue).toBe(false);
expect(result?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
});
it("shows attempted provider chain for failed attempts", async () => {
ttsMocks.getLastTtsAttempt.mockReturnValue({
timestamp: Date.now() - 1_000,
success: false,
textLength: 128,
summarized: false,
error: "TTS conversion failed",
attemptedProviders: ["elevenlabs", "microsoft"],
latencyMs: 420,
});
const result = await handleTtsCommands(buildTtsParams("/tts status"), true);
expect(result?.shouldContinue).toBe(false);
expect(result?.reply?.text).toContain("Error: TTS conversion failed");
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
});
it("persists fallback metadata from /tts audio and renders it in /tts status", async () => {
let lastAttempt: Record<string, unknown> | undefined;
ttsMocks.getLastTtsAttempt.mockImplementation(() => lastAttempt);
ttsMocks.setLastTtsAttempt.mockImplementation((next: Record<string, unknown>) => {
lastAttempt = next;
});
ttsMocks.textToSpeech.mockResolvedValue({
success: true,
audioPath: "/tmp/fallback.ogg",
provider: "microsoft",
fallbackFrom: "elevenlabs",
attemptedProviders: ["elevenlabs", "microsoft"],
latencyMs: 175,
voiceCompatible: true,
});
const audioResult = await handleTtsCommands(buildTtsParams("/tts audio hello world"), true);
expect(audioResult?.shouldContinue).toBe(false);
expect(audioResult?.reply?.mediaUrl).toBe("/tmp/fallback.ogg");
const statusResult = await handleTtsCommands(buildTtsParams("/tts status"), true);
expect(statusResult?.shouldContinue).toBe(false);
expect(statusResult?.reply?.text).toContain("Provider: microsoft");
expect(statusResult?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
expect(statusResult?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
});
});

View File

@@ -135,6 +135,8 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
textLength: args.length,
summarized: false,
provider: result.provider,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
latencyMs: result.latencyMs,
});
const payload: ReplyPayload = {
@@ -150,6 +152,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
success: false,
textLength: args.length,
summarized: false,
attemptedProviders: result.attemptedProviders,
error: result.error,
latencyMs: Date.now() - start,
});
@@ -285,9 +288,18 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
if (last.success) {
lines.push(`Provider: ${last.provider ?? "unknown"}`);
if (last.fallbackFrom && last.provider && last.fallbackFrom !== last.provider) {
lines.push(`Fallback: ${last.fallbackFrom} -> ${last.provider}`);
}
if (last.attemptedProviders && last.attemptedProviders.length > 1) {
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
}
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
} else if (last.error) {
lines.push(`Error: ${last.error}`);
if (last.attemptedProviders && last.attemptedProviders.length > 0) {
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
}
}
}
return { shouldContinue: false, reply: { text: lines.join("\n") } };

View File

@@ -486,6 +486,81 @@ describe("config strict validation", () => {
});
});
it("accepts legacy messages.tts provider keys via auto-migration and reports legacyIssues", async () => {
await withTempHome(async (home) => {
await writeOpenClawConfig(home, {
messages: {
tts: {
provider: "elevenlabs",
elevenlabs: {
apiKey: "test-key",
voiceId: "voice-1",
},
},
},
});
const snap = await readConfigFileSnapshot();
expect(snap.valid).toBe(true);
expect(snap.legacyIssues.some((issue) => issue.path === "messages.tts")).toBe(true);
expect(snap.sourceConfig.messages?.tts?.providers?.elevenlabs).toEqual({
apiKey: "test-key",
voiceId: "voice-1",
});
expect(
(snap.sourceConfig.messages?.tts as Record<string, unknown> | undefined)?.elevenlabs,
).toBeUndefined();
});
});
it("accepts legacy plugins.entries.*.config.tts provider keys via auto-migration", async () => {
await withTempHome(async (home) => {
await writeOpenClawConfig(home, {
plugins: {
entries: {
"voice-call": {
config: {
tts: {
provider: "openai",
openai: {
model: "gpt-4o-mini-tts",
voice: "alloy",
},
},
},
},
},
},
});
const snap = await readConfigFileSnapshot();
expect(snap.valid).toBe(true);
expect(snap.legacyIssues.some((issue) => issue.path === "plugins.entries")).toBe(true);
const voiceCallTts = (
snap.sourceConfig.plugins?.entries as
| Record<
string,
{
config?: {
tts?: {
providers?: Record<string, unknown>;
openai?: unknown;
};
};
}
>
| undefined
)?.["voice-call"]?.config?.tts;
expect(voiceCallTts?.providers?.openai).toEqual({
model: "gpt-4o-mini-tts",
voice: "alloy",
});
expect(voiceCallTts?.openai).toBeUndefined();
});
});
it("does not mark resolved-only gateway.bind aliases as auto-migratable legacy", async () => {
await withTempHome(async (home) => {
await writeOpenClawConfig(home, {

View File

@@ -443,11 +443,13 @@ describe("config plugin validation", () => {
"voice-call-schema-fixture": {
config: {
tts: {
openai: {
baseUrl: "http://localhost:8880/v1",
voice: "alloy",
speed: 1.5,
instructions: "Speak in a cheerful tone",
providers: {
openai: {
baseUrl: "http://localhost:8880/v1",
voice: "alloy",
speed: 1.5,
instructions: "Speak in a cheerful tone",
},
},
},
},
@@ -458,6 +460,74 @@ describe("config plugin validation", () => {
expect(res.ok).toBe(true);
});
it("rejects out-of-range voice-call OpenAI TTS speed values", async () => {
const res = validateInSuite({
agents: { list: [{ id: "pi" }] },
plugins: {
enabled: true,
load: { paths: [voiceCallSchemaPluginDir] },
entries: {
"voice-call-schema-fixture": {
config: {
tts: {
providers: {
openai: {
speed: 10,
},
},
},
},
},
},
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(
res.issues.some(
(issue) =>
issue.path ===
"plugins.entries.voice-call-schema-fixture.config.tts.providers.openai.speed",
),
).toBe(true);
}
});
it("rejects out-of-range voice-call ElevenLabs voice settings", async () => {
const res = validateInSuite({
agents: { list: [{ id: "pi" }] },
plugins: {
enabled: true,
load: { paths: [voiceCallSchemaPluginDir] },
entries: {
"voice-call-schema-fixture": {
config: {
tts: {
providers: {
elevenlabs: {
voiceSettings: {
stability: 5,
},
},
},
},
},
},
},
},
});
expect(res.ok).toBe(false);
if (!res.ok) {
expect(
res.issues.some(
(issue) =>
issue.path ===
"plugins.entries.voice-call-schema-fixture.config.tts.providers.elevenlabs.voiceSettings.stability",
),
).toBe(true);
}
});
it("accepts known plugin ids and valid channel/heartbeat enums", async () => {
const res = validateInSuite({
agents: {

View File

@@ -79,6 +79,130 @@ describe("legacy migrate mention routing", () => {
});
});
describe("legacy migrate tts provider shape", () => {
it("moves messages.tts.<provider> keys into messages.tts.providers", () => {
const res = migrateLegacyConfig({
messages: {
tts: {
provider: "elevenlabs",
elevenlabs: {
apiKey: "test-key",
voiceId: "voice-1",
},
},
},
});
expect(res.changes).toContain(
"Moved messages.tts.elevenlabs → messages.tts.providers.elevenlabs.",
);
expect(res.config?.messages?.tts).toEqual({
provider: "elevenlabs",
providers: {
elevenlabs: {
apiKey: "test-key",
voiceId: "voice-1",
},
},
});
});
it("moves channels.discord.accounts.<id>.voice.tts.edge into providers.microsoft", () => {
const res = migrateLegacyConfig({
channels: {
discord: {
accounts: {
main: {
voice: {
tts: {
edge: {
voice: "en-US-JennyNeural",
},
},
},
},
},
},
},
});
expect(res.changes).toContain(
"Moved channels.discord.accounts.main.voice.tts.edge → channels.discord.accounts.main.voice.tts.providers.microsoft.",
);
const mainTts = (
res.config?.channels?.discord?.accounts as
| Record<string, { voice?: { tts?: Record<string, unknown> } }>
| undefined
)?.main?.voice?.tts;
expect(mainTts?.providers).toEqual({
microsoft: {
voice: "en-US-JennyNeural",
},
});
expect(mainTts?.edge).toBeUndefined();
});
it("moves plugins.entries.voice-call.config.tts.<provider> keys into providers", () => {
const res = migrateLegacyConfig({
plugins: {
entries: {
"voice-call": {
config: {
tts: {
provider: "openai",
openai: {
model: "gpt-4o-mini-tts",
voice: "alloy",
},
},
},
},
},
},
});
expect(res.changes).toContain(
"Moved plugins.entries.voice-call.config.tts.openai → plugins.entries.voice-call.config.tts.providers.openai.",
);
const voiceCallTts = (
res.config?.plugins?.entries as
| Record<string, { config?: { tts?: Record<string, unknown> } }>
| undefined
)?.["voice-call"]?.config?.tts;
expect(voiceCallTts).toEqual({
provider: "openai",
providers: {
openai: {
model: "gpt-4o-mini-tts",
voice: "alloy",
},
},
});
});
it("does not migrate legacy tts provider keys for unknown plugin ids", () => {
const res = migrateLegacyConfig({
plugins: {
entries: {
"third-party-plugin": {
config: {
tts: {
provider: "openai",
openai: {
model: "custom-tts",
},
},
},
},
},
},
});
expect(res.changes).toEqual([]);
expect(res.config).toBeNull();
});
});
describe("legacy migrate heartbeat config", () => {
it("moves top-level heartbeat into agents.defaults.heartbeat", () => {
const res = migrateLegacyConfig({

View File

@@ -33,6 +33,8 @@ const AGENT_HEARTBEAT_KEYS = new Set([
]);
const CHANNEL_HEARTBEAT_KEYS = new Set(["showOk", "showAlerts", "useIndicator"]);
const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const;
const LEGACY_TTS_PLUGIN_IDS = new Set(["voice-call"]);
function isLegacyGatewayBindHostAlias(value: unknown): boolean {
if (typeof value !== "string") {
@@ -124,6 +126,44 @@ function mergeLegacyIntoDefaults(params: {
params.raw[params.rootKey] = root;
}
function hasLegacyTtsProviderKeys(value: unknown): boolean {
const tts = getRecord(value);
if (!tts) {
return false;
}
return LEGACY_TTS_PROVIDER_KEYS.some((key) => Object.prototype.hasOwnProperty.call(tts, key));
}
function hasLegacyDiscordAccountTtsProviderKeys(value: unknown): boolean {
const accounts = getRecord(value);
if (!accounts) {
return false;
}
return Object.entries(accounts).some(([accountId, accountValue]) => {
if (isBlockedObjectKey(accountId)) {
return false;
}
const account = getRecord(accountValue);
const voice = getRecord(account?.voice);
return hasLegacyTtsProviderKeys(voice?.tts);
});
}
function hasLegacyPluginEntryTtsProviderKeys(value: unknown): boolean {
const entries = getRecord(value);
if (!entries) {
return false;
}
return Object.entries(entries).some(([pluginId, entryValue]) => {
if (isBlockedObjectKey(pluginId) || !LEGACY_TTS_PLUGIN_IDS.has(pluginId)) {
return false;
}
const entry = getRecord(entryValue);
const config = getRecord(entry?.config);
return hasLegacyTtsProviderKeys(config?.tts);
});
}
function getOrCreateTtsProviders(tts: Record<string, unknown>): Record<string, unknown> {
const providers = getRecord(tts.providers) ?? {};
tts.providers = providers;
@@ -195,6 +235,33 @@ const HEARTBEAT_RULE: LegacyConfigRule = {
"top-level heartbeat is not a valid config path; use agents.defaults.heartbeat (cadence/target/model settings) or channels.defaults.heartbeat (showOk/showAlerts/useIndicator).",
};
const LEGACY_TTS_RULES: LegacyConfigRule[] = [
{
path: ["messages", "tts"],
message:
"messages.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use messages.tts.providers.<provider> (auto-migrated on load).",
match: (value) => hasLegacyTtsProviderKeys(value),
},
{
path: ["channels", "discord", "voice", "tts"],
message:
"channels.discord.voice.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use channels.discord.voice.tts.providers.<provider> (auto-migrated on load).",
match: (value) => hasLegacyTtsProviderKeys(value),
},
{
path: ["channels", "discord", "accounts"],
message:
"channels.discord.accounts.<id>.voice.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use channels.discord.accounts.<id>.voice.tts.providers.<provider> (auto-migrated on load).",
match: (value) => hasLegacyDiscordAccountTtsProviderKeys(value),
},
{
path: ["plugins", "entries"],
message:
"plugins.entries.voice-call.config.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use plugins.entries.voice-call.config.tts.providers.<provider> (auto-migrated on load).",
match: (value) => hasLegacyPluginEntryTtsProviderKeys(value),
},
];
export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
defineLegacyConfigMigration({
// v2026.2.26 added a startup guard requiring gateway.controlUi.allowedOrigins (or the
@@ -307,6 +374,7 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
defineLegacyConfigMigration({
id: "tts.providers-generic-shape",
describe: "Move legacy bundled TTS config keys into messages.tts.providers",
legacyRules: LEGACY_TTS_RULES,
apply: (raw, changes) => {
const messages = getRecord(raw.messages);
migrateLegacyTtsConfig(getRecord(messages?.tts), "messages.tts", changes);
@@ -317,18 +385,35 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
migrateLegacyTtsConfig(getRecord(discordVoice?.tts), "channels.discord.voice.tts", changes);
const discordAccounts = getRecord(discord?.accounts);
if (!discordAccounts) {
if (discordAccounts) {
for (const [accountId, accountValue] of Object.entries(discordAccounts)) {
if (isBlockedObjectKey(accountId)) {
continue;
}
const account = getRecord(accountValue);
const voice = getRecord(account?.voice);
migrateLegacyTtsConfig(
getRecord(voice?.tts),
`channels.discord.accounts.${accountId}.voice.tts`,
changes,
);
}
}
const plugins = getRecord(raw.plugins);
const pluginEntries = getRecord(plugins?.entries);
if (!pluginEntries) {
return;
}
for (const [accountId, accountValue] of Object.entries(discordAccounts)) {
if (isBlockedObjectKey(accountId)) {
for (const [pluginId, entryValue] of Object.entries(pluginEntries)) {
if (isBlockedObjectKey(pluginId) || !LEGACY_TTS_PLUGIN_IDS.has(pluginId)) {
continue;
}
const account = getRecord(accountValue);
const voice = getRecord(account?.voice);
const entry = getRecord(entryValue);
const config = getRecord(entry?.config);
migrateLegacyTtsConfig(
getRecord(voice?.tts),
`channels.discord.accounts.${accountId}.voice.tts`,
getRecord(config?.tts),
`plugins.entries.${pluginId}.config.tts`,
changes,
);
}

View File

@@ -82,7 +82,6 @@
"UV_INDEX_URL",
"UV_EXTRA_INDEX_URL",
"UV_DEFAULT_INDEX",
"UV_EXTRA_INDEX_URL",
"LUA_PATH",
"LUA_CPATH",
"GEM_HOME",

View File

@@ -81,6 +81,8 @@ const {
resolveModelOverridePolicy,
summarizeText,
getResolvedSpeechProviderConfig,
formatTtsProviderError,
sanitizeTtsErrorForLog,
} = _test;
const mockAssistantMessage = (content: AssistantMessage["content"]): AssistantMessage => ({
@@ -655,6 +657,29 @@ describe("tts", () => {
});
});
describe("provider error redaction", () => {
it("redacts sensitive tokens in provider errors", () => {
const result = formatTtsProviderError(
"openai",
new Error("Authorization: Bearer sk-super-secret-token-1234567890"),
);
expect(result).toContain("openai:");
expect(result).toContain("Authorization: Bearer");
expect(result).not.toContain("sk-super-secret-token-1234567890");
});
it("escapes control characters in verbose fallback error logs", () => {
const result = sanitizeTtsErrorForLog(
new Error("failed\nAuthorization: Bearer sk-super-secret-token-1234567890\tboom"),
);
expect(result).toContain("\\n");
expect(result).toContain("\\t");
expect(result).not.toContain("sk-super-secret-token-1234567890");
});
});
describe("resolveTtsConfig openai.baseUrl", () => {
const baseCfg: OpenClawConfig = {
agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },