mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-29 18:12:52 +00:00
fix(tts): restore 3.28 schema compatibility and fallback observability (#57953)
* fix(tts): restore legacy config compatibility and fallback observability * fix(tts): surface fallback attempts in status and telephony * test(tts): cover /tts audio to /tts status fallback flow * docs(tts): align migration and fallback observability guidance * TTS: redact fallback logs and scope legacy plugin migration * Infra: dedupe UV_EXTRA_INDEX_URL in host env policy * Docs: scope doctor TTS migration to voice-call * voice-call: restore strict known TTS provider validation
This commit is contained in:
@@ -89,7 +89,6 @@ enum HostEnvSecurityPolicy {
|
||||
"UV_INDEX_URL",
|
||||
"UV_EXTRA_INDEX_URL",
|
||||
"UV_DEFAULT_INDEX",
|
||||
"UV_EXTRA_INDEX_URL",
|
||||
"LUA_PATH",
|
||||
"LUA_CPATH",
|
||||
"GEM_HOME",
|
||||
|
||||
@@ -122,6 +122,10 @@ Current migrations:
|
||||
- `routing.agents`/`routing.defaultAgentId` → `agents.list` + `agents.list[].default`
|
||||
- `routing.agentToAgent` → `tools.agentToAgent`
|
||||
- `routing.transcribeAudio` → `tools.media.audio.models`
|
||||
- `messages.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `messages.tts.providers.<provider>`
|
||||
- `channels.discord.voice.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.voice.tts.providers.<provider>`
|
||||
- `channels.discord.accounts.<id>.voice.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `channels.discord.accounts.<id>.voice.tts.providers.<provider>`
|
||||
- `plugins.entries.voice-call.config.tts.<provider>` (`openai`/`elevenlabs`/`microsoft`/`edge`) → `plugins.entries.voice-call.config.tts.providers.<provider>`
|
||||
- `bindings[].match.accountID` → `bindings[].match.accountId`
|
||||
- For channels with named `accounts` but missing `accounts.default`, move account-scoped top-level single-account channel values into `channels.<channel>.accounts.default` when present
|
||||
- `identity` → `agents.list[].identity`
|
||||
|
||||
@@ -219,9 +219,11 @@ streaming speech on calls. You can override it under the plugin config with the
|
||||
{
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -229,9 +231,11 @@ streaming speech on calls. You can override it under the plugin config with the
|
||||
|
||||
Notes:
|
||||
|
||||
- Legacy `tts.<provider>` keys inside plugin config (`openai`, `elevenlabs`, `microsoft`, `edge`) are auto-migrated to `tts.providers.<provider>` on load. Prefer the `providers` shape in committed config.
|
||||
- **Microsoft speech is ignored for voice calls** (telephony audio needs PCM; the current Microsoft transport does not expose telephony PCM output).
|
||||
- Core TTS is used when Twilio media streaming is enabled; otherwise calls fall back to provider native voices.
|
||||
- If a Twilio media stream is already active, Voice Call does not fall back to TwiML `<Say>`. If telephony TTS is unavailable in that state, the playback request fails instead of mixing two playback paths.
|
||||
- When telephony TTS falls back to a secondary provider, Voice Call logs a warning with the provider chain (`from`, `to`, `attempts`) for debugging.
|
||||
|
||||
### More examples
|
||||
|
||||
@@ -242,7 +246,9 @@ Use core TTS only (no override):
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: { voice: "alloy" },
|
||||
providers: {
|
||||
openai: { voice: "alloy" },
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -258,10 +264,12 @@ Override to ElevenLabs just for calls (keep core default elsewhere):
|
||||
config: {
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
apiKey: "elevenlabs_key",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "elevenlabs_key",
|
||||
voiceId: "pMsXgVXv3BLzUgSXRplE",
|
||||
modelId: "eleven_multilingual_v2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -280,9 +288,11 @@ Override only the OpenAI model for calls (deep‑merge example):
|
||||
"voice-call": {
|
||||
config: {
|
||||
tts: {
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "marin",
|
||||
providers: {
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "marin",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -219,6 +219,7 @@ Then run:
|
||||
- `modelOverrides`: allow the model to emit TTS directives (on by default).
|
||||
- `allowProvider` defaults to `false` (provider switching is opt-in).
|
||||
- `providers.<id>`: provider-owned settings keyed by speech provider id.
|
||||
- Legacy direct provider blocks (`messages.tts.openai`, `messages.tts.elevenlabs`, `messages.tts.microsoft`, `messages.tts.edge`) are auto-migrated to `messages.tts.providers.<id>` on load.
|
||||
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
|
||||
- `timeoutMs`: request timeout (ms).
|
||||
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
|
||||
@@ -391,6 +392,9 @@ Notes:
|
||||
- `off|always|inbound|tagged` are per‑session toggles (`/tts on` is an alias for `/tts always`).
|
||||
- `limit` and `summary` are stored in local prefs, not the main config.
|
||||
- `/tts audio` generates a one-off audio reply (does not toggle TTS on).
|
||||
- `/tts status` includes fallback visibility for the latest attempt:
|
||||
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
|
||||
- failure: `Error: ...` plus `Attempts: ...`
|
||||
|
||||
## Agent tool
|
||||
|
||||
|
||||
@@ -219,6 +219,7 @@ Then run:
|
||||
- `modelOverrides`: allow the model to emit TTS directives (on by default).
|
||||
- `allowProvider` defaults to `false` (provider switching is opt-in).
|
||||
- `providers.<id>`: provider-owned settings keyed by speech provider id.
|
||||
- Legacy direct provider blocks (`messages.tts.openai`, `messages.tts.elevenlabs`, `messages.tts.microsoft`, `messages.tts.edge`) are auto-migrated to `messages.tts.providers.<id>` on load.
|
||||
- `maxTextLength`: hard cap for TTS input (chars). `/tts audio` fails if exceeded.
|
||||
- `timeoutMs`: request timeout (ms).
|
||||
- `prefsPath`: override the local prefs JSON path (provider/limit/summary).
|
||||
@@ -391,6 +392,9 @@ Notes:
|
||||
- `off|always|inbound|tagged` are per‑session toggles (`/tts on` is an alias for `/tts always`).
|
||||
- `limit` and `summary` are stored in local prefs, not the main config.
|
||||
- `/tts audio` generates a one-off audio reply (does not toggle TTS on).
|
||||
- `/tts status` includes fallback visibility for the latest attempt:
|
||||
- success fallback: `Fallback: <primary> -> <used>` plus `Attempts: ...`
|
||||
- failure: `Error: ...` plus `Attempts: ...`
|
||||
|
||||
## Agent tool
|
||||
|
||||
|
||||
@@ -18,9 +18,10 @@ import type {
|
||||
TtsModelOverrideConfig,
|
||||
TtsProvider,
|
||||
} from "openclaw/plugin-sdk/config-runtime";
|
||||
import { redactSensitiveText } from "openclaw/plugin-sdk/logging-core";
|
||||
import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload";
|
||||
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime";
|
||||
import { logVerbose } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { isVerbose, logVerbose } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/sandbox";
|
||||
import { CONFIG_DIR, resolveUserPath, stripMarkdown } from "openclaw/plugin-sdk/text-runtime";
|
||||
import {
|
||||
@@ -79,6 +80,8 @@ export type TtsResult = {
|
||||
error?: string;
|
||||
latencyMs?: number;
|
||||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
outputFormat?: string;
|
||||
voiceCompatible?: boolean;
|
||||
};
|
||||
@@ -89,6 +92,8 @@ export type TtsSynthesisResult = {
|
||||
error?: string;
|
||||
latencyMs?: number;
|
||||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
outputFormat?: string;
|
||||
voiceCompatible?: boolean;
|
||||
fileExtension?: string;
|
||||
@@ -100,6 +105,8 @@ export type TtsTelephonyResult = {
|
||||
error?: string;
|
||||
latencyMs?: number;
|
||||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
outputFormat?: string;
|
||||
sampleRate?: number;
|
||||
};
|
||||
@@ -110,6 +117,8 @@ type TtsStatusEntry = {
|
||||
textLength: number;
|
||||
summarized: boolean;
|
||||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
latencyMs?: number;
|
||||
error?: string;
|
||||
};
|
||||
@@ -536,13 +545,22 @@ function formatTtsProviderError(provider: TtsProvider, err: unknown): string {
|
||||
if (error.name === "AbortError") {
|
||||
return `${provider}: request timed out`;
|
||||
}
|
||||
return `${provider}: ${error.message}`;
|
||||
return `${provider}: ${redactSensitiveText(error.message)}`;
|
||||
}
|
||||
|
||||
function buildTtsFailureResult(errors: string[]): { success: false; error: string } {
|
||||
function sanitizeTtsErrorForLog(err: unknown): string {
|
||||
const raw = err instanceof Error ? err.message : String(err);
|
||||
return redactSensitiveText(raw).replace(/\r/g, "\\r").replace(/\n/g, "\\n").replace(/\t/g, "\\t");
|
||||
}
|
||||
|
||||
function buildTtsFailureResult(
|
||||
errors: string[],
|
||||
attemptedProviders?: string[],
|
||||
): { success: false; error: string; attemptedProviders?: string[] } {
|
||||
return {
|
||||
success: false,
|
||||
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
|
||||
attemptedProviders,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -621,7 +639,10 @@ export async function textToSpeech(params: {
|
||||
}): Promise<TtsResult> {
|
||||
const synthesis = await synthesizeSpeech(params);
|
||||
if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) {
|
||||
return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]);
|
||||
return buildTtsFailureResult(
|
||||
[synthesis.error ?? "TTS conversion failed"],
|
||||
synthesis.attemptedProviders,
|
||||
);
|
||||
}
|
||||
|
||||
const tempRoot = resolvePreferredOpenClawTmpDir();
|
||||
@@ -636,6 +657,8 @@ export async function textToSpeech(params: {
|
||||
audioPath,
|
||||
latencyMs: synthesis.latencyMs,
|
||||
provider: synthesis.provider,
|
||||
fallbackFrom: synthesis.fallbackFrom,
|
||||
attemptedProviders: synthesis.attemptedProviders,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
voiceCompatible: synthesis.voiceCompatible,
|
||||
};
|
||||
@@ -665,8 +688,14 @@ export async function synthesizeSpeech(params: {
|
||||
const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file";
|
||||
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
const primaryProvider = providers[0];
|
||||
logVerbose(
|
||||
`TTS: starting with provider ${primaryProvider}, fallbacks: ${providers.slice(1).join(", ") || "none"}`,
|
||||
);
|
||||
|
||||
for (const provider of providers) {
|
||||
attemptedProviders.push(provider);
|
||||
const providerStart = Date.now();
|
||||
try {
|
||||
const resolvedProvider = resolveReadySpeechProvider({
|
||||
@@ -676,6 +705,7 @@ export async function synthesizeSpeech(params: {
|
||||
errors,
|
||||
});
|
||||
if (!resolvedProvider) {
|
||||
logVerbose(`TTS: provider ${provider} skipped (${errors[errors.length - 1]})`);
|
||||
continue;
|
||||
}
|
||||
const synthesis = await resolvedProvider.synthesize({
|
||||
@@ -691,16 +721,28 @@ export async function synthesizeSpeech(params: {
|
||||
audioBuffer: synthesis.audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
|
||||
attemptedProviders,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
voiceCompatible: synthesis.voiceCompatible,
|
||||
fileExtension: synthesis.fileExtension,
|
||||
};
|
||||
} catch (err) {
|
||||
errors.push(formatTtsProviderError(provider, err));
|
||||
const errorMsg = formatTtsProviderError(provider, err);
|
||||
errors.push(errorMsg);
|
||||
const rawError = sanitizeTtsErrorForLog(err);
|
||||
if (provider === primaryProvider) {
|
||||
const hasFallbacks = providers.length > 1;
|
||||
logVerbose(
|
||||
`TTS: primary provider ${provider} failed (${rawError})${hasFallbacks ? "; trying fallback providers." : "; no fallback providers configured."}`,
|
||||
);
|
||||
} else {
|
||||
logVerbose(`TTS: ${provider} failed (${rawError}); trying next provider.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buildTtsFailureResult(errors);
|
||||
return buildTtsFailureResult(errors, attemptedProviders);
|
||||
}
|
||||
|
||||
export async function textToSpeechTelephony(params: {
|
||||
@@ -719,8 +761,11 @@ export async function textToSpeechTelephony(params: {
|
||||
|
||||
const { config, providers } = setup;
|
||||
const errors: string[] = [];
|
||||
const attemptedProviders: string[] = [];
|
||||
const primaryProvider = providers[0];
|
||||
|
||||
for (const provider of providers) {
|
||||
attemptedProviders.push(provider);
|
||||
const providerStart = Date.now();
|
||||
try {
|
||||
const resolvedProvider = resolveReadySpeechProvider({
|
||||
@@ -745,6 +790,8 @@ export async function textToSpeechTelephony(params: {
|
||||
audioBuffer: synthesis.audioBuffer,
|
||||
latencyMs: Date.now() - providerStart,
|
||||
provider,
|
||||
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
|
||||
attemptedProviders,
|
||||
outputFormat: synthesis.outputFormat,
|
||||
sampleRate: synthesis.sampleRate,
|
||||
};
|
||||
@@ -753,7 +800,7 @@ export async function textToSpeechTelephony(params: {
|
||||
}
|
||||
}
|
||||
|
||||
return buildTtsFailureResult(errors);
|
||||
return buildTtsFailureResult(errors, attemptedProviders);
|
||||
}
|
||||
|
||||
export async function listSpeechVoices(params: {
|
||||
@@ -816,6 +863,16 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
|
||||
}
|
||||
|
||||
if (isVerbose()) {
|
||||
const effectiveProvider = directives.overrides?.provider
|
||||
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
|
||||
getTtsProvider(config, prefsPath))
|
||||
: getTtsProvider(config, prefsPath);
|
||||
logVerbose(
|
||||
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
|
||||
);
|
||||
}
|
||||
|
||||
const cleanedText = directives.cleanedText;
|
||||
const trimmedCleaned = cleanedText.trim();
|
||||
const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : "";
|
||||
@@ -910,6 +967,8 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
textLength: text.length,
|
||||
summarized: wasSummarized,
|
||||
provider: result.provider,
|
||||
fallbackFrom: result.fallbackFrom,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
latencyMs: result.latencyMs,
|
||||
};
|
||||
|
||||
@@ -928,6 +987,7 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
success: false,
|
||||
textLength: text.length,
|
||||
summarized: wasSummarized,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
error: result.error,
|
||||
};
|
||||
|
||||
@@ -941,4 +1001,6 @@ export const _test = {
|
||||
resolveModelOverridePolicy,
|
||||
summarizeText,
|
||||
getResolvedSpeechProviderConfig,
|
||||
formatTtsProviderError,
|
||||
sanitizeTtsErrorForLog,
|
||||
};
|
||||
|
||||
@@ -84,21 +84,21 @@ const voiceCallConfigSchema = {
|
||||
help: "Deep-merges with messages.tts (Microsoft is ignored for calls).",
|
||||
advanced: true,
|
||||
},
|
||||
"tts.openai.model": { label: "OpenAI TTS Model", advanced: true },
|
||||
"tts.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
|
||||
"tts.openai.apiKey": {
|
||||
"tts.providers.openai.model": { label: "OpenAI TTS Model", advanced: true },
|
||||
"tts.providers.openai.voice": { label: "OpenAI TTS Voice", advanced: true },
|
||||
"tts.providers.openai.apiKey": {
|
||||
label: "OpenAI API Key",
|
||||
sensitive: true,
|
||||
advanced: true,
|
||||
},
|
||||
"tts.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
|
||||
"tts.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
|
||||
"tts.elevenlabs.apiKey": {
|
||||
"tts.providers.elevenlabs.modelId": { label: "ElevenLabs Model ID", advanced: true },
|
||||
"tts.providers.elevenlabs.voiceId": { label: "ElevenLabs Voice ID", advanced: true },
|
||||
"tts.providers.elevenlabs.apiKey": {
|
||||
label: "ElevenLabs API Key",
|
||||
sensitive: true,
|
||||
advanced: true,
|
||||
},
|
||||
"tts.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
|
||||
"tts.providers.elevenlabs.baseUrl": { label: "ElevenLabs Base URL", advanced: true },
|
||||
publicUrl: { label: "Public Webhook URL", advanced: true },
|
||||
skipSignatureVerification: {
|
||||
label: "Skip Signature Verification",
|
||||
|
||||
@@ -104,33 +104,33 @@
|
||||
"help": "Deep-merges with messages.tts (Microsoft is ignored for calls).",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.openai.model": {
|
||||
"tts.providers.openai.model": {
|
||||
"label": "OpenAI TTS Model",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.openai.voice": {
|
||||
"tts.providers.openai.voice": {
|
||||
"label": "OpenAI TTS Voice",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.openai.apiKey": {
|
||||
"tts.providers.openai.apiKey": {
|
||||
"label": "OpenAI API Key",
|
||||
"sensitive": true,
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.modelId": {
|
||||
"tts.providers.elevenlabs.modelId": {
|
||||
"label": "ElevenLabs Model ID",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.voiceId": {
|
||||
"tts.providers.elevenlabs.voiceId": {
|
||||
"label": "ElevenLabs Voice ID",
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.apiKey": {
|
||||
"tts.providers.elevenlabs.apiKey": {
|
||||
"label": "ElevenLabs API Key",
|
||||
"sensitive": true,
|
||||
"advanced": true
|
||||
},
|
||||
"tts.elevenlabs.baseUrl": {
|
||||
"tts.providers.elevenlabs.baseUrl": {
|
||||
"label": "ElevenLabs Base URL",
|
||||
"advanced": true
|
||||
},
|
||||
@@ -455,127 +455,179 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"elevenlabs": {
|
||||
"providers": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceId": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelId": {
|
||||
"type": "string"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 4294967295
|
||||
},
|
||||
"applyTextNormalization": {
|
||||
"type": "string",
|
||||
"enum": ["auto", "on", "off"]
|
||||
},
|
||||
"languageCode": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceSettings": {
|
||||
"openai": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"stability": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"similarityBoost": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"style": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"useSpeakerBoost": {
|
||||
"type": "boolean"
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"speed": {
|
||||
"type": "number",
|
||||
"minimum": 0.5,
|
||||
"maximum": 2
|
||||
"minimum": 0.25,
|
||||
"maximum": 4.0
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"elevenlabs": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceId": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelId": {
|
||||
"type": "string"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 4294967295
|
||||
},
|
||||
"applyTextNormalization": {
|
||||
"type": "string",
|
||||
"enum": ["auto", "on", "off"]
|
||||
},
|
||||
"languageCode": {
|
||||
"type": "string"
|
||||
},
|
||||
"voiceSettings": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"stability": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"similarityBoost": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"style": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"useSpeakerBoost": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"speed": {
|
||||
"type": "number",
|
||||
"minimum": 0.5,
|
||||
"maximum": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"microsoft": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string"
|
||||
},
|
||||
"outputFormat": {
|
||||
"type": "string"
|
||||
},
|
||||
"pitch": {
|
||||
"type": "string"
|
||||
},
|
||||
"rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"volume": {
|
||||
"type": "string"
|
||||
},
|
||||
"saveSubtitles": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"proxy": {
|
||||
"type": "string"
|
||||
},
|
||||
"timeoutMs": {
|
||||
"type": "integer",
|
||||
"minimum": 1000,
|
||||
"maximum": 120000
|
||||
}
|
||||
}
|
||||
},
|
||||
"edge": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string"
|
||||
},
|
||||
"outputFormat": {
|
||||
"type": "string"
|
||||
},
|
||||
"pitch": {
|
||||
"type": "string"
|
||||
},
|
||||
"rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"volume": {
|
||||
"type": "string"
|
||||
},
|
||||
"saveSubtitles": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"proxy": {
|
||||
"type": "string"
|
||||
},
|
||||
"timeoutMs": {
|
||||
"type": "integer",
|
||||
"minimum": 1000,
|
||||
"maximum": 120000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"additionalProperties": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"speed": {
|
||||
"type": "number",
|
||||
"minimum": 0.25,
|
||||
"maximum": 4.0
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"edge": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string"
|
||||
},
|
||||
"outputFormat": {
|
||||
"type": "string"
|
||||
},
|
||||
"pitch": {
|
||||
"type": "string"
|
||||
},
|
||||
"rate": {
|
||||
"type": "string"
|
||||
},
|
||||
"volume": {
|
||||
"type": "string"
|
||||
},
|
||||
"saveSubtitles": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"proxy": {
|
||||
"type": "string"
|
||||
},
|
||||
"timeoutMs": {
|
||||
"type": "integer",
|
||||
"minimum": 1000,
|
||||
"maximum": 120000
|
||||
}
|
||||
"additionalProperties": true
|
||||
}
|
||||
},
|
||||
"prefsPath": {
|
||||
|
||||
@@ -221,6 +221,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
coreConfig,
|
||||
ttsOverride: config.tts,
|
||||
runtime: ttsRuntime,
|
||||
logger: log,
|
||||
});
|
||||
twilioProvider.setTTSProvider(ttsProvider);
|
||||
log.info("[voice-call] Telephony TTS provider configured");
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { VoiceCallTtsConfig } from "./config.js";
|
||||
import type { CoreConfig } from "./core-bridge.js";
|
||||
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
||||
@@ -93,4 +93,27 @@ describe("createTelephonyTtsProvider deepMerge hardening", () => {
|
||||
expect(openai.polluted).toBeUndefined();
|
||||
expect(openai.model).toBe("safe");
|
||||
});
|
||||
|
||||
it("logs fallback metadata when telephony TTS uses a fallback provider", async () => {
|
||||
const warn = vi.fn();
|
||||
const provider = createTelephonyTtsProvider({
|
||||
coreConfig: createCoreConfig(),
|
||||
runtime: {
|
||||
textToSpeechTelephony: async () => ({
|
||||
success: true,
|
||||
audioBuffer: Buffer.alloc(2),
|
||||
sampleRate: 8000,
|
||||
provider: "microsoft",
|
||||
fallbackFrom: "elevenlabs",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
}),
|
||||
},
|
||||
logger: { warn },
|
||||
});
|
||||
|
||||
await provider.synthesizeForTelephony("hello");
|
||||
expect(warn).toHaveBeenCalledWith(
|
||||
"[voice-call] Telephony TTS fallback used from=elevenlabs to=microsoft attempts=elevenlabs -> microsoft",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,6 +13,8 @@ export type TelephonyTtsRuntime = {
|
||||
audioBuffer?: Buffer;
|
||||
sampleRate?: number;
|
||||
provider?: string;
|
||||
fallbackFrom?: string;
|
||||
attemptedProviders?: string[];
|
||||
error?: string;
|
||||
}>;
|
||||
};
|
||||
@@ -25,8 +27,11 @@ export function createTelephonyTtsProvider(params: {
|
||||
coreConfig: CoreConfig;
|
||||
ttsOverride?: VoiceCallTtsConfig;
|
||||
runtime: TelephonyTtsRuntime;
|
||||
logger?: {
|
||||
warn?: (message: string) => void;
|
||||
};
|
||||
}): TelephonyTtsProvider {
|
||||
const { coreConfig, ttsOverride, runtime } = params;
|
||||
const { coreConfig, ttsOverride, runtime, logger } = params;
|
||||
const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
|
||||
|
||||
return {
|
||||
@@ -40,6 +45,16 @@ export function createTelephonyTtsProvider(params: {
|
||||
throw new Error(result.error ?? "TTS conversion failed");
|
||||
}
|
||||
|
||||
if (result.fallbackFrom && result.provider && result.fallbackFrom !== result.provider) {
|
||||
const attemptedChain =
|
||||
result.attemptedProviders && result.attemptedProviders.length > 0
|
||||
? result.attemptedProviders.join(" -> ")
|
||||
: `${result.fallbackFrom} -> ${result.provider}`;
|
||||
logger?.warn?.(
|
||||
`[voice-call] Telephony TTS fallback used from=${result.fallbackFrom} to=${result.provider} attempts=${attemptedChain}`,
|
||||
);
|
||||
}
|
||||
|
||||
return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
|
||||
},
|
||||
};
|
||||
|
||||
120
src/auto-reply/reply/commands-tts.test.ts
Normal file
120
src/auto-reply/reply/commands-tts.test.ts
Normal file
@@ -0,0 +1,120 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const ttsMocks = vi.hoisted(() => ({
|
||||
getResolvedSpeechProviderConfig: vi.fn(),
|
||||
getLastTtsAttempt: vi.fn(),
|
||||
getTtsMaxLength: vi.fn(),
|
||||
getTtsProvider: vi.fn(),
|
||||
isSummarizationEnabled: vi.fn(),
|
||||
isTtsEnabled: vi.fn(),
|
||||
isTtsProviderConfigured: vi.fn(),
|
||||
resolveTtsConfig: vi.fn(),
|
||||
resolveTtsPrefsPath: vi.fn(),
|
||||
setLastTtsAttempt: vi.fn(),
|
||||
setSummarizationEnabled: vi.fn(),
|
||||
setTtsEnabled: vi.fn(),
|
||||
setTtsMaxLength: vi.fn(),
|
||||
setTtsProvider: vi.fn(),
|
||||
textToSpeech: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("../../globals.js", () => ({
|
||||
logVerbose: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("../../tts/provider-registry.js", () => ({
|
||||
canonicalizeSpeechProviderId: vi.fn((provider: string) => provider),
|
||||
getSpeechProvider: vi.fn(() => null),
|
||||
listSpeechProviders: vi.fn(() => []),
|
||||
}));
|
||||
|
||||
vi.mock("../../tts/tts.js", () => ttsMocks);
|
||||
|
||||
const { handleTtsCommands } = await import("./commands-tts.js");
|
||||
|
||||
function buildTtsParams(commandBodyNormalized: string): Parameters<typeof handleTtsCommands>[0] {
|
||||
return {
|
||||
cfg: {},
|
||||
command: {
|
||||
commandBodyNormalized,
|
||||
isAuthorizedSender: true,
|
||||
senderId: "owner",
|
||||
channel: "telegram",
|
||||
},
|
||||
} as unknown as Parameters<typeof handleTtsCommands>[0];
|
||||
}
|
||||
|
||||
describe("handleTtsCommands status fallback reporting", () => {
|
||||
beforeEach(() => {
|
||||
ttsMocks.resolveTtsConfig.mockReturnValue({});
|
||||
ttsMocks.resolveTtsPrefsPath.mockReturnValue("/tmp/tts-prefs.json");
|
||||
ttsMocks.isTtsEnabled.mockReturnValue(true);
|
||||
ttsMocks.getTtsProvider.mockReturnValue("elevenlabs");
|
||||
ttsMocks.isTtsProviderConfigured.mockReturnValue(true);
|
||||
ttsMocks.getTtsMaxLength.mockReturnValue(1500);
|
||||
ttsMocks.isSummarizationEnabled.mockReturnValue(true);
|
||||
ttsMocks.getLastTtsAttempt.mockReturnValue(undefined);
|
||||
});
|
||||
|
||||
it("shows fallback provider details for successful attempts", async () => {
|
||||
ttsMocks.getLastTtsAttempt.mockReturnValue({
|
||||
timestamp: Date.now() - 1_000,
|
||||
success: true,
|
||||
textLength: 128,
|
||||
summarized: false,
|
||||
provider: "microsoft",
|
||||
fallbackFrom: "elevenlabs",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
latencyMs: 420,
|
||||
});
|
||||
|
||||
const result = await handleTtsCommands(buildTtsParams("/tts status"), true);
|
||||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(result?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
|
||||
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
});
|
||||
|
||||
it("shows attempted provider chain for failed attempts", async () => {
|
||||
ttsMocks.getLastTtsAttempt.mockReturnValue({
|
||||
timestamp: Date.now() - 1_000,
|
||||
success: false,
|
||||
textLength: 128,
|
||||
summarized: false,
|
||||
error: "TTS conversion failed",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
latencyMs: 420,
|
||||
});
|
||||
|
||||
const result = await handleTtsCommands(buildTtsParams("/tts status"), true);
|
||||
expect(result?.shouldContinue).toBe(false);
|
||||
expect(result?.reply?.text).toContain("Error: TTS conversion failed");
|
||||
expect(result?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
});
|
||||
|
||||
it("persists fallback metadata from /tts audio and renders it in /tts status", async () => {
|
||||
let lastAttempt: Record<string, unknown> | undefined;
|
||||
ttsMocks.getLastTtsAttempt.mockImplementation(() => lastAttempt);
|
||||
ttsMocks.setLastTtsAttempt.mockImplementation((next: Record<string, unknown>) => {
|
||||
lastAttempt = next;
|
||||
});
|
||||
ttsMocks.textToSpeech.mockResolvedValue({
|
||||
success: true,
|
||||
audioPath: "/tmp/fallback.ogg",
|
||||
provider: "microsoft",
|
||||
fallbackFrom: "elevenlabs",
|
||||
attemptedProviders: ["elevenlabs", "microsoft"],
|
||||
latencyMs: 175,
|
||||
voiceCompatible: true,
|
||||
});
|
||||
|
||||
const audioResult = await handleTtsCommands(buildTtsParams("/tts audio hello world"), true);
|
||||
expect(audioResult?.shouldContinue).toBe(false);
|
||||
expect(audioResult?.reply?.mediaUrl).toBe("/tmp/fallback.ogg");
|
||||
|
||||
const statusResult = await handleTtsCommands(buildTtsParams("/tts status"), true);
|
||||
expect(statusResult?.shouldContinue).toBe(false);
|
||||
expect(statusResult?.reply?.text).toContain("Provider: microsoft");
|
||||
expect(statusResult?.reply?.text).toContain("Fallback: elevenlabs -> microsoft");
|
||||
expect(statusResult?.reply?.text).toContain("Attempts: elevenlabs -> microsoft");
|
||||
});
|
||||
});
|
||||
@@ -135,6 +135,8 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
||||
textLength: args.length,
|
||||
summarized: false,
|
||||
provider: result.provider,
|
||||
fallbackFrom: result.fallbackFrom,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
latencyMs: result.latencyMs,
|
||||
});
|
||||
const payload: ReplyPayload = {
|
||||
@@ -150,6 +152,7 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
||||
success: false,
|
||||
textLength: args.length,
|
||||
summarized: false,
|
||||
attemptedProviders: result.attemptedProviders,
|
||||
error: result.error,
|
||||
latencyMs: Date.now() - start,
|
||||
});
|
||||
@@ -285,9 +288,18 @@ export const handleTtsCommands: CommandHandler = async (params, allowTextCommand
|
||||
lines.push(`Text: ${last.textLength} chars${last.summarized ? " (summarized)" : ""}`);
|
||||
if (last.success) {
|
||||
lines.push(`Provider: ${last.provider ?? "unknown"}`);
|
||||
if (last.fallbackFrom && last.provider && last.fallbackFrom !== last.provider) {
|
||||
lines.push(`Fallback: ${last.fallbackFrom} -> ${last.provider}`);
|
||||
}
|
||||
if (last.attemptedProviders && last.attemptedProviders.length > 1) {
|
||||
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
|
||||
}
|
||||
lines.push(`Latency: ${last.latencyMs ?? 0}ms`);
|
||||
} else if (last.error) {
|
||||
lines.push(`Error: ${last.error}`);
|
||||
if (last.attemptedProviders && last.attemptedProviders.length > 0) {
|
||||
lines.push(`Attempts: ${last.attemptedProviders.join(" -> ")}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return { shouldContinue: false, reply: { text: lines.join("\n") } };
|
||||
|
||||
@@ -486,6 +486,81 @@ describe("config strict validation", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("accepts legacy messages.tts provider keys via auto-migration and reports legacyIssues", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await writeOpenClawConfig(home, {
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
apiKey: "test-key",
|
||||
voiceId: "voice-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const snap = await readConfigFileSnapshot();
|
||||
|
||||
expect(snap.valid).toBe(true);
|
||||
expect(snap.legacyIssues.some((issue) => issue.path === "messages.tts")).toBe(true);
|
||||
expect(snap.sourceConfig.messages?.tts?.providers?.elevenlabs).toEqual({
|
||||
apiKey: "test-key",
|
||||
voiceId: "voice-1",
|
||||
});
|
||||
expect(
|
||||
(snap.sourceConfig.messages?.tts as Record<string, unknown> | undefined)?.elevenlabs,
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
it("accepts legacy plugins.entries.*.config.tts provider keys via auto-migration", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await writeOpenClawConfig(home, {
|
||||
plugins: {
|
||||
entries: {
|
||||
"voice-call": {
|
||||
config: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const snap = await readConfigFileSnapshot();
|
||||
|
||||
expect(snap.valid).toBe(true);
|
||||
expect(snap.legacyIssues.some((issue) => issue.path === "plugins.entries")).toBe(true);
|
||||
const voiceCallTts = (
|
||||
snap.sourceConfig.plugins?.entries as
|
||||
| Record<
|
||||
string,
|
||||
{
|
||||
config?: {
|
||||
tts?: {
|
||||
providers?: Record<string, unknown>;
|
||||
openai?: unknown;
|
||||
};
|
||||
};
|
||||
}
|
||||
>
|
||||
| undefined
|
||||
)?.["voice-call"]?.config?.tts;
|
||||
expect(voiceCallTts?.providers?.openai).toEqual({
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
});
|
||||
expect(voiceCallTts?.openai).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
it("does not mark resolved-only gateway.bind aliases as auto-migratable legacy", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
await writeOpenClawConfig(home, {
|
||||
|
||||
@@ -443,11 +443,13 @@ describe("config plugin validation", () => {
|
||||
"voice-call-schema-fixture": {
|
||||
config: {
|
||||
tts: {
|
||||
openai: {
|
||||
baseUrl: "http://localhost:8880/v1",
|
||||
voice: "alloy",
|
||||
speed: 1.5,
|
||||
instructions: "Speak in a cheerful tone",
|
||||
providers: {
|
||||
openai: {
|
||||
baseUrl: "http://localhost:8880/v1",
|
||||
voice: "alloy",
|
||||
speed: 1.5,
|
||||
instructions: "Speak in a cheerful tone",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -458,6 +460,74 @@ describe("config plugin validation", () => {
|
||||
expect(res.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects out-of-range voice-call OpenAI TTS speed values", async () => {
|
||||
const res = validateInSuite({
|
||||
agents: { list: [{ id: "pi" }] },
|
||||
plugins: {
|
||||
enabled: true,
|
||||
load: { paths: [voiceCallSchemaPluginDir] },
|
||||
entries: {
|
||||
"voice-call-schema-fixture": {
|
||||
config: {
|
||||
tts: {
|
||||
providers: {
|
||||
openai: {
|
||||
speed: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(res.ok).toBe(false);
|
||||
if (!res.ok) {
|
||||
expect(
|
||||
res.issues.some(
|
||||
(issue) =>
|
||||
issue.path ===
|
||||
"plugins.entries.voice-call-schema-fixture.config.tts.providers.openai.speed",
|
||||
),
|
||||
).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects out-of-range voice-call ElevenLabs voice settings", async () => {
|
||||
const res = validateInSuite({
|
||||
agents: { list: [{ id: "pi" }] },
|
||||
plugins: {
|
||||
enabled: true,
|
||||
load: { paths: [voiceCallSchemaPluginDir] },
|
||||
entries: {
|
||||
"voice-call-schema-fixture": {
|
||||
config: {
|
||||
tts: {
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
voiceSettings: {
|
||||
stability: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(res.ok).toBe(false);
|
||||
if (!res.ok) {
|
||||
expect(
|
||||
res.issues.some(
|
||||
(issue) =>
|
||||
issue.path ===
|
||||
"plugins.entries.voice-call-schema-fixture.config.tts.providers.elevenlabs.voiceSettings.stability",
|
||||
),
|
||||
).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it("accepts known plugin ids and valid channel/heartbeat enums", async () => {
|
||||
const res = validateInSuite({
|
||||
agents: {
|
||||
|
||||
@@ -79,6 +79,130 @@ describe("legacy migrate mention routing", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("legacy migrate tts provider shape", () => {
|
||||
it("moves messages.tts.<provider> keys into messages.tts.providers", () => {
|
||||
const res = migrateLegacyConfig({
|
||||
messages: {
|
||||
tts: {
|
||||
provider: "elevenlabs",
|
||||
elevenlabs: {
|
||||
apiKey: "test-key",
|
||||
voiceId: "voice-1",
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(res.changes).toContain(
|
||||
"Moved messages.tts.elevenlabs → messages.tts.providers.elevenlabs.",
|
||||
);
|
||||
expect(res.config?.messages?.tts).toEqual({
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
apiKey: "test-key",
|
||||
voiceId: "voice-1",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("moves channels.discord.accounts.<id>.voice.tts.edge into providers.microsoft", () => {
|
||||
const res = migrateLegacyConfig({
|
||||
channels: {
|
||||
discord: {
|
||||
accounts: {
|
||||
main: {
|
||||
voice: {
|
||||
tts: {
|
||||
edge: {
|
||||
voice: "en-US-JennyNeural",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(res.changes).toContain(
|
||||
"Moved channels.discord.accounts.main.voice.tts.edge → channels.discord.accounts.main.voice.tts.providers.microsoft.",
|
||||
);
|
||||
const mainTts = (
|
||||
res.config?.channels?.discord?.accounts as
|
||||
| Record<string, { voice?: { tts?: Record<string, unknown> } }>
|
||||
| undefined
|
||||
)?.main?.voice?.tts;
|
||||
expect(mainTts?.providers).toEqual({
|
||||
microsoft: {
|
||||
voice: "en-US-JennyNeural",
|
||||
},
|
||||
});
|
||||
expect(mainTts?.edge).toBeUndefined();
|
||||
});
|
||||
|
||||
it("moves plugins.entries.voice-call.config.tts.<provider> keys into providers", () => {
|
||||
const res = migrateLegacyConfig({
|
||||
plugins: {
|
||||
entries: {
|
||||
"voice-call": {
|
||||
config: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(res.changes).toContain(
|
||||
"Moved plugins.entries.voice-call.config.tts.openai → plugins.entries.voice-call.config.tts.providers.openai.",
|
||||
);
|
||||
const voiceCallTts = (
|
||||
res.config?.plugins?.entries as
|
||||
| Record<string, { config?: { tts?: Record<string, unknown> } }>
|
||||
| undefined
|
||||
)?.["voice-call"]?.config?.tts;
|
||||
expect(voiceCallTts).toEqual({
|
||||
provider: "openai",
|
||||
providers: {
|
||||
openai: {
|
||||
model: "gpt-4o-mini-tts",
|
||||
voice: "alloy",
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("does not migrate legacy tts provider keys for unknown plugin ids", () => {
|
||||
const res = migrateLegacyConfig({
|
||||
plugins: {
|
||||
entries: {
|
||||
"third-party-plugin": {
|
||||
config: {
|
||||
tts: {
|
||||
provider: "openai",
|
||||
openai: {
|
||||
model: "custom-tts",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(res.changes).toEqual([]);
|
||||
expect(res.config).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("legacy migrate heartbeat config", () => {
|
||||
it("moves top-level heartbeat into agents.defaults.heartbeat", () => {
|
||||
const res = migrateLegacyConfig({
|
||||
|
||||
@@ -33,6 +33,8 @@ const AGENT_HEARTBEAT_KEYS = new Set([
|
||||
]);
|
||||
|
||||
const CHANNEL_HEARTBEAT_KEYS = new Set(["showOk", "showAlerts", "useIndicator"]);
|
||||
const LEGACY_TTS_PROVIDER_KEYS = ["openai", "elevenlabs", "microsoft", "edge"] as const;
|
||||
const LEGACY_TTS_PLUGIN_IDS = new Set(["voice-call"]);
|
||||
|
||||
function isLegacyGatewayBindHostAlias(value: unknown): boolean {
|
||||
if (typeof value !== "string") {
|
||||
@@ -124,6 +126,44 @@ function mergeLegacyIntoDefaults(params: {
|
||||
params.raw[params.rootKey] = root;
|
||||
}
|
||||
|
||||
function hasLegacyTtsProviderKeys(value: unknown): boolean {
|
||||
const tts = getRecord(value);
|
||||
if (!tts) {
|
||||
return false;
|
||||
}
|
||||
return LEGACY_TTS_PROVIDER_KEYS.some((key) => Object.prototype.hasOwnProperty.call(tts, key));
|
||||
}
|
||||
|
||||
function hasLegacyDiscordAccountTtsProviderKeys(value: unknown): boolean {
|
||||
const accounts = getRecord(value);
|
||||
if (!accounts) {
|
||||
return false;
|
||||
}
|
||||
return Object.entries(accounts).some(([accountId, accountValue]) => {
|
||||
if (isBlockedObjectKey(accountId)) {
|
||||
return false;
|
||||
}
|
||||
const account = getRecord(accountValue);
|
||||
const voice = getRecord(account?.voice);
|
||||
return hasLegacyTtsProviderKeys(voice?.tts);
|
||||
});
|
||||
}
|
||||
|
||||
function hasLegacyPluginEntryTtsProviderKeys(value: unknown): boolean {
|
||||
const entries = getRecord(value);
|
||||
if (!entries) {
|
||||
return false;
|
||||
}
|
||||
return Object.entries(entries).some(([pluginId, entryValue]) => {
|
||||
if (isBlockedObjectKey(pluginId) || !LEGACY_TTS_PLUGIN_IDS.has(pluginId)) {
|
||||
return false;
|
||||
}
|
||||
const entry = getRecord(entryValue);
|
||||
const config = getRecord(entry?.config);
|
||||
return hasLegacyTtsProviderKeys(config?.tts);
|
||||
});
|
||||
}
|
||||
|
||||
function getOrCreateTtsProviders(tts: Record<string, unknown>): Record<string, unknown> {
|
||||
const providers = getRecord(tts.providers) ?? {};
|
||||
tts.providers = providers;
|
||||
@@ -195,6 +235,33 @@ const HEARTBEAT_RULE: LegacyConfigRule = {
|
||||
"top-level heartbeat is not a valid config path; use agents.defaults.heartbeat (cadence/target/model settings) or channels.defaults.heartbeat (showOk/showAlerts/useIndicator).",
|
||||
};
|
||||
|
||||
const LEGACY_TTS_RULES: LegacyConfigRule[] = [
|
||||
{
|
||||
path: ["messages", "tts"],
|
||||
message:
|
||||
"messages.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use messages.tts.providers.<provider> (auto-migrated on load).",
|
||||
match: (value) => hasLegacyTtsProviderKeys(value),
|
||||
},
|
||||
{
|
||||
path: ["channels", "discord", "voice", "tts"],
|
||||
message:
|
||||
"channels.discord.voice.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use channels.discord.voice.tts.providers.<provider> (auto-migrated on load).",
|
||||
match: (value) => hasLegacyTtsProviderKeys(value),
|
||||
},
|
||||
{
|
||||
path: ["channels", "discord", "accounts"],
|
||||
message:
|
||||
"channels.discord.accounts.<id>.voice.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use channels.discord.accounts.<id>.voice.tts.providers.<provider> (auto-migrated on load).",
|
||||
match: (value) => hasLegacyDiscordAccountTtsProviderKeys(value),
|
||||
},
|
||||
{
|
||||
path: ["plugins", "entries"],
|
||||
message:
|
||||
"plugins.entries.voice-call.config.tts.<provider> keys (openai/elevenlabs/microsoft/edge) are legacy; use plugins.entries.voice-call.config.tts.providers.<provider> (auto-migrated on load).",
|
||||
match: (value) => hasLegacyPluginEntryTtsProviderKeys(value),
|
||||
},
|
||||
];
|
||||
|
||||
export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
|
||||
defineLegacyConfigMigration({
|
||||
// v2026.2.26 added a startup guard requiring gateway.controlUi.allowedOrigins (or the
|
||||
@@ -307,6 +374,7 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
|
||||
defineLegacyConfigMigration({
|
||||
id: "tts.providers-generic-shape",
|
||||
describe: "Move legacy bundled TTS config keys into messages.tts.providers",
|
||||
legacyRules: LEGACY_TTS_RULES,
|
||||
apply: (raw, changes) => {
|
||||
const messages = getRecord(raw.messages);
|
||||
migrateLegacyTtsConfig(getRecord(messages?.tts), "messages.tts", changes);
|
||||
@@ -317,18 +385,35 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME: LegacyConfigMigrationSpec[] = [
|
||||
migrateLegacyTtsConfig(getRecord(discordVoice?.tts), "channels.discord.voice.tts", changes);
|
||||
|
||||
const discordAccounts = getRecord(discord?.accounts);
|
||||
if (!discordAccounts) {
|
||||
if (discordAccounts) {
|
||||
for (const [accountId, accountValue] of Object.entries(discordAccounts)) {
|
||||
if (isBlockedObjectKey(accountId)) {
|
||||
continue;
|
||||
}
|
||||
const account = getRecord(accountValue);
|
||||
const voice = getRecord(account?.voice);
|
||||
migrateLegacyTtsConfig(
|
||||
getRecord(voice?.tts),
|
||||
`channels.discord.accounts.${accountId}.voice.tts`,
|
||||
changes,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const plugins = getRecord(raw.plugins);
|
||||
const pluginEntries = getRecord(plugins?.entries);
|
||||
if (!pluginEntries) {
|
||||
return;
|
||||
}
|
||||
for (const [accountId, accountValue] of Object.entries(discordAccounts)) {
|
||||
if (isBlockedObjectKey(accountId)) {
|
||||
for (const [pluginId, entryValue] of Object.entries(pluginEntries)) {
|
||||
if (isBlockedObjectKey(pluginId) || !LEGACY_TTS_PLUGIN_IDS.has(pluginId)) {
|
||||
continue;
|
||||
}
|
||||
const account = getRecord(accountValue);
|
||||
const voice = getRecord(account?.voice);
|
||||
const entry = getRecord(entryValue);
|
||||
const config = getRecord(entry?.config);
|
||||
migrateLegacyTtsConfig(
|
||||
getRecord(voice?.tts),
|
||||
`channels.discord.accounts.${accountId}.voice.tts`,
|
||||
getRecord(config?.tts),
|
||||
`plugins.entries.${pluginId}.config.tts`,
|
||||
changes,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -82,7 +82,6 @@
|
||||
"UV_INDEX_URL",
|
||||
"UV_EXTRA_INDEX_URL",
|
||||
"UV_DEFAULT_INDEX",
|
||||
"UV_EXTRA_INDEX_URL",
|
||||
"LUA_PATH",
|
||||
"LUA_CPATH",
|
||||
"GEM_HOME",
|
||||
|
||||
@@ -81,6 +81,8 @@ const {
|
||||
resolveModelOverridePolicy,
|
||||
summarizeText,
|
||||
getResolvedSpeechProviderConfig,
|
||||
formatTtsProviderError,
|
||||
sanitizeTtsErrorForLog,
|
||||
} = _test;
|
||||
|
||||
const mockAssistantMessage = (content: AssistantMessage["content"]): AssistantMessage => ({
|
||||
@@ -655,6 +657,29 @@ describe("tts", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("provider error redaction", () => {
|
||||
it("redacts sensitive tokens in provider errors", () => {
|
||||
const result = formatTtsProviderError(
|
||||
"openai",
|
||||
new Error("Authorization: Bearer sk-super-secret-token-1234567890"),
|
||||
);
|
||||
|
||||
expect(result).toContain("openai:");
|
||||
expect(result).toContain("Authorization: Bearer");
|
||||
expect(result).not.toContain("sk-super-secret-token-1234567890");
|
||||
});
|
||||
|
||||
it("escapes control characters in verbose fallback error logs", () => {
|
||||
const result = sanitizeTtsErrorForLog(
|
||||
new Error("failed\nAuthorization: Bearer sk-super-secret-token-1234567890\tboom"),
|
||||
);
|
||||
|
||||
expect(result).toContain("\\n");
|
||||
expect(result).toContain("\\t");
|
||||
expect(result).not.toContain("sk-super-secret-token-1234567890");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveTtsConfig – openai.baseUrl", () => {
|
||||
const baseCfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "openai/gpt-4o-mini" } } },
|
||||
|
||||
Reference in New Issue
Block a user