TTS: add provider personas

This commit is contained in:
Barron Roth
2026-04-23 07:26:32 -07:00
committed by Ayaan Zaidi
parent 80219ed1b3
commit 0594fa3c4d
39 changed files with 2021 additions and 136 deletions

View File

@@ -3,11 +3,13 @@ export {
getLastTtsAttempt,
getResolvedSpeechProviderConfig,
getTtsMaxLength,
getTtsPersona,
getTtsProvider,
isSummarizationEnabled,
isTtsEnabled,
isTtsProviderConfigured,
listSpeechVoices,
listTtsPersonas,
maybeApplyTtsToPayload,
resolveExplicitTtsOverrides,
resolveTtsAutoMode,
@@ -19,6 +21,7 @@ export {
setTtsAutoMode,
setTtsEnabled,
setTtsMaxLength,
setTtsPersona,
setTtsProvider,
synthesizeSpeech,
textToSpeech,

View File

@@ -1,7 +1,12 @@
import { rmSync } from "node:fs";
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { SpeechProviderPlugin, SpeechSynthesisRequest } from "openclaw/plugin-sdk/speech-core";
import type { ReplyPayload } from "openclaw/plugin-sdk/reply-payload";
import type {
SpeechProviderPlugin,
SpeechProviderPrepareSynthesisContext,
SpeechSynthesisRequest,
} from "openclaw/plugin-sdk/speech-core";
import { afterEach, describe, expect, it, vi } from "vitest";
type MockSpeechSynthesisResult = Awaited<ReturnType<SpeechProviderPlugin["synthesize"]>>;
@@ -16,6 +21,9 @@ const synthesizeMock = vi.hoisted(() =>
}),
),
);
const prepareSynthesisMock = vi.hoisted(() =>
vi.fn(async (_ctx: SpeechProviderPrepareSynthesisContext) => undefined),
);
const listSpeechProvidersMock = vi.hoisted(() => vi.fn());
const getSpeechProviderMock = vi.hoisted(() => vi.fn());
@@ -31,6 +39,7 @@ vi.mock("../api.js", async () => {
label: "Mock",
autoSelectOrder: 1,
isConfigured: () => true,
prepareSynthesis: prepareSynthesisMock,
synthesize: synthesizeMock,
};
listSpeechProvidersMock.mockImplementation(() => [mockProvider]);
@@ -49,10 +58,40 @@ vi.mock("../api.js", async () => {
};
});
const { _test, maybeApplyTtsToPayload, resolveTtsConfig } = await import("./tts.js");
const {
_test,
getTtsPersona,
getTtsProvider,
maybeApplyTtsToPayload,
resolveTtsConfig,
synthesizeSpeech,
textToSpeechTelephony,
} = await import("./tts.js");
const nativeVoiceNoteChannels = ["discord", "feishu", "matrix", "telegram", "whatsapp"] as const;
function createMockSpeechProvider(
id = "mock",
options: Partial<SpeechProviderPlugin> = {},
): SpeechProviderPlugin {
return {
id,
label: id,
autoSelectOrder: id === "mock" ? 1 : 2,
isConfigured: () => true,
prepareSynthesis: prepareSynthesisMock,
synthesize: synthesizeMock,
...options,
};
}
function installSpeechProviders(providers: SpeechProviderPlugin[]): void {
listSpeechProvidersMock.mockImplementation(() => providers);
getSpeechProviderMock.mockImplementation(
(providerId: string) => providers.find((provider) => provider.id === providerId) ?? null,
);
}
function createTtsConfig(prefsName: string): OpenClawConfig {
return {
messages: {
@@ -102,6 +141,8 @@ async function expectTtsPayloadResult(params: {
describe("speech-core native voice-note routing", () => {
afterEach(() => {
synthesizeMock.mockClear();
prepareSynthesisMock.mockClear();
installSpeechProviders([createMockSpeechProvider()]);
});
it("keeps native voice-note channel support centralized", () => {
@@ -153,6 +194,268 @@ describe("speech-core native voice-note routing", () => {
audioAsVoice: undefined,
});
});
it("selects persona preferred provider before config fallback", () => {
const cfg: OpenClawConfig = {
messages: {
tts: {
enabled: true,
provider: "other",
persona: "alfred",
personas: {
alfred: {
label: "Alfred",
provider: "mock",
providers: {
mock: {
voice: "Algieba",
},
},
},
},
},
},
};
const config = resolveTtsConfig(cfg);
const prefsPath = "/tmp/openclaw-speech-core-persona-provider.json";
expect(getTtsPersona(config, prefsPath)?.id).toBe("alfred");
expect(getTtsProvider(config, prefsPath)).toBe("mock");
});
it("merges active persona provider binding into synthesis config", async () => {
const cfg: OpenClawConfig = {
messages: {
tts: {
enabled: true,
provider: "mock",
prefsPath: "/tmp/openclaw-speech-core-persona-merge.json",
providers: {
mock: {
model: "base-model",
voice: "base-voice",
},
},
persona: "alfred",
personas: {
alfred: {
provider: "mock",
providers: {
mock: {
voice: "persona-voice",
style: "dry",
},
},
},
},
},
},
};
const payload: ReplyPayload = {
text: "This reply should use persona-specific provider configuration.",
};
let mediaDir: string | undefined;
try {
const result = await maybeApplyTtsToPayload({
payload,
cfg,
channel: "slack",
kind: "final",
});
expect(synthesizeMock).toHaveBeenCalledWith(
expect.objectContaining({
providerConfig: expect.objectContaining({
model: "base-model",
voice: "persona-voice",
style: "dry",
}),
}),
);
expect(result.mediaUrl).toMatch(/voice-\d+\.ogg$/);
mediaDir = result.mediaUrl ? path.dirname(result.mediaUrl) : undefined;
} finally {
if (mediaDir) {
rmSync(mediaDir, { recursive: true, force: true });
}
}
});
it("does not mark skipped unregistered providers as missing persona bindings", async () => {
const result = await synthesizeSpeech({
text: "Use fallback provider.",
cfg: {
messages: {
tts: {
enabled: true,
provider: "missing",
persona: "alfred",
personas: {
alfred: {
providers: {
missing: {
voice: "configured-but-unregistered",
},
},
},
},
},
},
},
});
expect(result.success).toBe(true);
expect(result.attempts?.[0]).toMatchObject({
provider: "missing",
outcome: "skipped",
reasonCode: "no_provider_registered",
persona: "alfred",
});
expect(result.attempts?.[0]).not.toHaveProperty("personaBinding");
});
it("does not mark skipped telephony providers as missing persona bindings", async () => {
const result = await textToSpeechTelephony({
text: "Use telephony provider.",
cfg: {
messages: {
tts: {
enabled: true,
provider: "mock",
persona: "alfred",
personas: {
alfred: {
providers: {
mock: {
voice: "persona-voice",
},
},
},
},
},
},
},
});
expect(result.success).toBe(false);
expect(result.attempts?.[0]).toMatchObject({
provider: "mock",
outcome: "skipped",
reasonCode: "unsupported_for_telephony",
persona: "alfred",
});
expect(result.attempts?.[0]).not.toHaveProperty("personaBinding");
});
it("uses provider defaults when fallback policy allows missing persona bindings", async () => {
await synthesizeSpeech({
text: "Use neutral provider defaults.",
cfg: {
messages: {
tts: {
enabled: true,
provider: "mock",
persona: "alfred",
personas: {
alfred: {
fallbackPolicy: "provider-defaults",
prompt: {
profile: "A precise butler.",
},
},
},
},
},
},
});
expect(prepareSynthesisMock).toHaveBeenCalledWith(
expect.objectContaining({
persona: undefined,
personaProviderConfig: undefined,
}),
);
});
it("preserves persona prompts by default when provider bindings are missing", async () => {
await synthesizeSpeech({
text: "Use persona prompt.",
cfg: {
messages: {
tts: {
enabled: true,
provider: "mock",
persona: "alfred",
personas: {
alfred: {
prompt: {
profile: "A precise butler.",
},
},
},
},
},
},
});
expect(prepareSynthesisMock).toHaveBeenCalledWith(
expect.objectContaining({
persona: expect.objectContaining({ id: "alfred" }),
personaProviderConfig: undefined,
}),
);
});
it("skips unbound providers under fail policy while allowing bound fallbacks", async () => {
installSpeechProviders([
createMockSpeechProvider("mock", { autoSelectOrder: 1 }),
createMockSpeechProvider("fallback", { autoSelectOrder: 2 }),
]);
const result = await synthesizeSpeech({
text: "Use the first persona-bound provider.",
cfg: {
messages: {
tts: {
enabled: true,
provider: "mock",
persona: "alfred",
personas: {
alfred: {
fallbackPolicy: "fail",
providers: {
fallback: {
voice: "fallback-voice",
},
},
},
},
},
},
},
});
expect(result.success).toBe(true);
expect(result.provider).toBe("fallback");
expect(result.fallbackFrom).toBe("mock");
expect(result.attempts?.[0]).toMatchObject({
provider: "mock",
outcome: "skipped",
reasonCode: "not_configured",
persona: "alfred",
personaBinding: "missing",
error: "mock: persona alfred has no provider binding",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "fallback",
outcome: "success",
persona: "alfred",
personaBinding: "applied",
});
});
});
describe("speech-core per-agent TTS config", () => {

View File

@@ -12,6 +12,7 @@ import path from "node:path";
import { normalizeChannelId, type ChannelId } from "openclaw/plugin-sdk/channel-targets";
import type {
OpenClawConfig,
ResolvedTtsPersona,
TtsAutoMode,
TtsConfig,
TtsModelOverrideConfig,
@@ -40,6 +41,7 @@ import {
normalizeSpeechProviderId,
normalizeTtsAutoMode,
parseTtsDirectives,
resolveEffectiveTtsConfig,
type ResolvedTtsConfig,
type ResolvedTtsModelOverrides,
scheduleCleanup,
@@ -62,13 +64,13 @@ const DEFAULT_TIMEOUT_MS = 30_000;
const DEFAULT_TTS_MAX_LENGTH = 1500;
const DEFAULT_TTS_SUMMARIZE = true;
const DEFAULT_MAX_TEXT_LENGTH = 4096;
const BLOCKED_MERGE_KEYS = new Set(["__proto__", "prototype", "constructor"]);
type TtsUserPrefs = {
tts?: {
auto?: TtsAutoMode;
enabled?: boolean;
provider?: TtsProvider;
persona?: string | null;
maxLength?: number;
summarize?: boolean;
};
@@ -86,6 +88,8 @@ export type TtsProviderAttempt = {
provider: string;
outcome: "success" | "skipped" | "failed";
reasonCode: TtsAttemptReasonCode;
persona?: string;
personaBinding?: "applied" | "missing" | "none";
latencyMs?: number;
error?: string;
};
@@ -96,6 +100,7 @@ export type TtsResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -111,6 +116,7 @@ export type TtsSynthesisResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -126,6 +132,7 @@ export type TtsTelephonyResult = {
error?: string;
latencyMs?: number;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -139,6 +146,7 @@ type TtsStatusEntry = {
textLength: number;
summarized: boolean;
provider?: string;
persona?: string;
fallbackFrom?: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
@@ -162,6 +170,10 @@ function normalizeConfiguredSpeechProviderId(
return normalized === "edge" ? "microsoft" : normalized;
}
function normalizeTtsPersonaId(personaId: string | null | undefined): string | undefined {
return normalizeOptionalLowercaseString(personaId ?? undefined);
}
function resolveTtsPrefsPathValue(prefsPath: string | undefined): string {
if (prefsPath?.trim()) {
return resolveUserPath(prefsPath.trim());
@@ -229,6 +241,87 @@ function asProviderConfigMap(value: unknown): Record<string, unknown> {
: {};
}
function hasOwnProperty(value: object, key: string): boolean {
return Object.prototype.hasOwnProperty.call(value, key);
}
function normalizeProviderConfigMap(
value: unknown,
): Record<string, SpeechProviderConfig> | undefined {
const rawMap = asProviderConfigMap(value);
if (Object.keys(rawMap).length === 0) {
return undefined;
}
const next: Record<string, SpeechProviderConfig> = {};
for (const [providerId, providerConfig] of Object.entries(rawMap)) {
const normalized = normalizeConfiguredSpeechProviderId(providerId) ?? providerId;
next[normalized] = asProviderConfig(providerConfig);
}
return next;
}
function collectTtsPersonas(raw: TtsConfig): Record<string, ResolvedTtsPersona> {
const rawPersonas = asProviderConfigMap(raw.personas);
const personas: Record<string, ResolvedTtsPersona> = {};
for (const [id, value] of Object.entries(rawPersonas)) {
const normalizedId = normalizeTtsPersonaId(id);
if (!normalizedId || typeof value !== "object" || value === null || Array.isArray(value)) {
continue;
}
const persona = value as Omit<ResolvedTtsPersona, "id">;
personas[normalizedId] = {
...persona,
id: normalizedId,
provider: normalizeConfiguredSpeechProviderId(persona.provider) ?? persona.provider,
providers: normalizeProviderConfigMap(persona.providers),
};
}
return personas;
}
function resolvePersonaProviderConfig(
persona: ResolvedTtsPersona | undefined,
providerId: string,
): SpeechProviderConfig | undefined {
if (!persona?.providers) {
return undefined;
}
const normalized = normalizeConfiguredSpeechProviderId(providerId) ?? providerId;
if (hasOwnProperty(persona.providers, normalized)) {
return persona.providers[normalized];
}
if (hasOwnProperty(persona.providers, providerId)) {
return persona.providers[providerId];
}
return undefined;
}
function mergeProviderConfigWithPersona(params: {
providerConfig: SpeechProviderConfig;
persona?: ResolvedTtsPersona;
providerId: string;
}): {
providerConfig: SpeechProviderConfig;
personaProviderConfig?: SpeechProviderConfig;
personaBinding: "applied" | "missing" | "none";
} {
if (!params.persona) {
return { providerConfig: params.providerConfig, personaBinding: "none" };
}
const personaProviderConfig = resolvePersonaProviderConfig(params.persona, params.providerId);
if (!personaProviderConfig) {
return { providerConfig: params.providerConfig, personaBinding: "missing" };
}
return {
providerConfig: {
...params.providerConfig,
...personaProviderConfig,
},
personaProviderConfig,
personaBinding: "applied",
};
}
function resolveRawProviderConfig(
raw: TtsConfig | undefined,
providerId: string,
@@ -241,48 +334,6 @@ function resolveRawProviderConfig(
return asProviderConfig(direct);
}
function isPlainObject(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
function deepMergeDefined(base: unknown, override: unknown): unknown {
if (!isPlainObject(base) || !isPlainObject(override)) {
return override === undefined ? base : override;
}
const result: Record<string, unknown> = { ...base };
for (const [key, value] of Object.entries(override)) {
if (BLOCKED_MERGE_KEYS.has(key) || value === undefined) {
continue;
}
const existing = result[key];
result[key] = key in result ? deepMergeDefined(existing, value) : value;
}
return result;
}
function normalizeAgentConfigId(value: string | undefined | null): string {
return normalizeLowercaseStringOrEmpty(value);
}
function resolveAgentTtsOverride(
cfg: OpenClawConfig,
agentId: string | undefined,
): TtsConfig | undefined {
if (!agentId || !Array.isArray(cfg.agents?.list)) {
return undefined;
}
const normalized = normalizeAgentConfigId(agentId);
const agent = cfg.agents.list.find((entry) => normalizeAgentConfigId(entry.id) === normalized);
return agent?.tts;
}
function resolveEffectiveTtsRawConfig(cfg: OpenClawConfig, agentId?: string): TtsConfig {
const base = cfg.messages?.tts ?? {};
const override = resolveAgentTtsOverride(cfg, agentId);
return deepMergeDefined(base, override ?? {}) as TtsConfig;
}
function resolveLazyProviderConfig(
config: ResolvedTtsConfig,
providerId: string,
@@ -325,6 +376,8 @@ function collectDirectProviderConfigEntries(raw: TtsConfig): Record<string, Spee
"maxTextLength",
"mode",
"modelOverrides",
"persona",
"personas",
"prefsPath",
"provider",
"providers",
@@ -357,10 +410,11 @@ export function getResolvedSpeechProviderConfig(
}
export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): ResolvedTtsConfig {
const raw: TtsConfig = resolveEffectiveTtsRawConfig(cfg, agentId);
const raw: TtsConfig = resolveEffectiveTtsConfig(cfg, agentId);
const providerSource = raw.provider ? "config" : "default";
const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS;
const auto = resolveConfiguredTtsAutoMode(raw);
const persona = normalizeTtsPersonaId(raw.persona);
return {
auto,
mode: raw.mode ?? "final",
@@ -368,6 +422,8 @@ export function resolveTtsConfig(cfg: OpenClawConfig, agentId?: string): Resolve
normalizeConfiguredSpeechProviderId(raw.provider) ??
(providerSource === "config" ? (normalizeOptionalLowercaseString(raw.provider) ?? "") : ""),
providerSource,
persona,
personas: collectTtsPersonas(raw),
summaryModel: normalizeOptionalString(raw.summaryModel),
modelOverrides: resolveModelOverridePolicy(raw.modelOverrides),
providerConfigs: collectDirectProviderConfigEntries(raw),
@@ -418,7 +474,7 @@ function resolveEffectiveTtsAutoState(params: {
autoMode: TtsAutoMode;
prefsPath: string;
} {
const raw: TtsConfig = resolveEffectiveTtsRawConfig(params.cfg, params.agentId);
const raw: TtsConfig = resolveEffectiveTtsConfig(params.cfg, params.agentId);
const prefsPath = resolveTtsPrefsPathValue(raw.prefsPath);
const sessionAuto = normalizeTtsAutoMode(params.sessionAuto);
if (sessionAuto) {
@@ -443,6 +499,7 @@ export function buildTtsSystemPromptHint(
return undefined;
}
const _config = resolveTtsConfig(cfg, agentId);
const persona = getTtsPersona(_config, prefsPath);
const maxLength = getTtsMaxLength(prefsPath);
const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off";
const autoHint =
@@ -454,6 +511,9 @@ export function buildTtsSystemPromptHint(
return [
"Voice (TTS) is enabled.",
autoHint,
persona
? `Active TTS persona: ${persona.label ?? persona.id}${persona.description ? ` - ${persona.description}` : ""}.`
: undefined,
`Keep spoken text ≤${maxLength} chars to avoid auto-summary (summary ${summarize}).`,
"Use [[tts:...]] and optional [[tts:text]]...[[/tts:text]] to control voice/expressiveness.",
]
@@ -523,6 +583,13 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
if (prefsProvider) {
return prefsProvider;
}
const activePersona = resolveTtsPersonaFromPrefs(config, prefs);
const personaProvider =
canonicalizeSpeechProviderId(activePersona?.provider, config.sourceConfig) ??
normalizeConfiguredSpeechProviderId(activePersona?.provider);
if (personaProvider && getSpeechProvider(personaProvider, config.sourceConfig)) {
return personaProvider;
}
if (config.providerSource === "config") {
return normalizeConfiguredSpeechProviderId(config.provider) ?? config.provider;
}
@@ -542,6 +609,38 @@ export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): Tt
return config.provider;
}
function resolveTtsPersonaFromPrefs(
config: ResolvedTtsConfig,
prefs: TtsUserPrefs,
): ResolvedTtsPersona | undefined {
if (prefs.tts && hasOwnProperty(prefs.tts, "persona")) {
const prefsPersona = normalizeTtsPersonaId(prefs.tts.persona);
return prefsPersona ? config.personas[prefsPersona] : undefined;
}
const configPersona = normalizeTtsPersonaId(config.persona);
return configPersona ? config.personas[configPersona] : undefined;
}
export function getTtsPersona(
config: ResolvedTtsConfig,
prefsPath: string,
): ResolvedTtsPersona | undefined {
return resolveTtsPersonaFromPrefs(config, readPrefs(prefsPath));
}
export function listTtsPersonas(config: ResolvedTtsConfig): ResolvedTtsPersona[] {
return Object.values(config.personas).toSorted((left, right) => left.id.localeCompare(right.id));
}
export function setTtsPersona(prefsPath: string, persona: string | null | undefined): void {
updatePrefs(prefsPath, (prefs) => {
const next = { ...prefs.tts };
const normalized = normalizeTtsPersonaId(persona);
next.persona = normalized ?? null;
prefs.tts = next;
});
}
export function setTtsProvider(prefsPath: string, provider: TtsProvider): void {
updatePrefs(prefsPath, (prefs) => {
prefs.tts = { ...prefs.tts, provider: canonicalizeSpeechProviderId(provider) ?? provider };
@@ -714,17 +813,20 @@ function buildTtsFailureResult(
errors: string[],
attemptedProviders?: string[],
attempts?: TtsProviderAttempt[],
persona?: string,
): {
success: false;
error: string;
attemptedProviders?: string[];
attempts?: TtsProviderAttempt[];
persona?: string;
} {
return {
success: false,
error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`,
attemptedProviders,
attempts,
persona,
};
}
@@ -733,17 +835,22 @@ type TtsProviderReadyResolution =
kind: "ready";
provider: NonNullable<ReturnType<typeof getSpeechProvider>>;
providerConfig: SpeechProviderConfig;
personaProviderConfig?: SpeechProviderConfig;
synthesisPersona?: ResolvedTtsPersona;
personaBinding: "applied" | "missing" | "none";
}
| {
kind: "skip";
reasonCode: "no_provider_registered" | "not_configured" | "unsupported_for_telephony";
message: string;
personaBinding?: "missing";
};
function resolveReadySpeechProvider(params: {
provider: TtsProvider;
cfg: OpenClawConfig;
config: ResolvedTtsConfig;
persona?: ResolvedTtsPersona;
requireTelephony?: boolean;
}): TtsProviderReadyResolution {
const resolvedProvider = getSpeechProvider(params.provider, params.cfg);
@@ -759,10 +866,23 @@ function resolveReadySpeechProvider(params: {
resolvedProvider.id,
params.cfg,
);
const merged = mergeProviderConfigWithPersona({
providerConfig,
persona: params.persona,
providerId: resolvedProvider.id,
});
if (params.persona?.fallbackPolicy === "fail" && merged.personaBinding === "missing") {
return {
kind: "skip",
reasonCode: "not_configured",
message: `${params.provider}: persona ${params.persona.id} has no provider binding`,
personaBinding: "missing",
};
}
if (
!resolvedProvider.isConfigured({
cfg: params.cfg,
providerConfig,
providerConfig: merged.providerConfig,
timeoutMs: params.config.timeoutMs,
})
) {
@@ -782,7 +902,56 @@ function resolveReadySpeechProvider(params: {
return {
kind: "ready",
provider: resolvedProvider,
providerConfig,
providerConfig: merged.providerConfig,
personaProviderConfig: merged.personaProviderConfig,
synthesisPersona:
params.persona?.fallbackPolicy === "provider-defaults" && merged.personaBinding === "missing"
? undefined
: params.persona,
personaBinding: merged.personaBinding,
};
}
async function prepareSpeechSynthesis(params: {
provider: NonNullable<ReturnType<typeof getSpeechProvider>>;
text: string;
cfg: OpenClawConfig;
providerConfig: SpeechProviderConfig;
providerOverrides?: SpeechProviderOverrides;
persona?: ResolvedTtsPersona;
personaProviderConfig?: SpeechProviderConfig;
target: "audio-file" | "voice-note" | "telephony";
timeoutMs: number;
}): Promise<{
text: string;
providerConfig: SpeechProviderConfig;
providerOverrides?: SpeechProviderOverrides;
}> {
if (!params.provider.prepareSynthesis) {
return {
text: params.text,
providerConfig: params.providerConfig,
providerOverrides: params.providerOverrides,
};
}
const prepared = await params.provider.prepareSynthesis({
text: params.text,
cfg: params.cfg,
providerConfig: params.providerConfig,
providerOverrides: params.providerOverrides,
persona: params.persona,
personaProviderConfig: params.personaProviderConfig,
target: params.target,
timeoutMs: params.timeoutMs,
});
return {
text: prepared?.text ?? params.text,
providerConfig: prepared?.providerConfig
? { ...params.providerConfig, ...prepared.providerConfig }
: params.providerConfig,
providerOverrides: prepared?.providerOverrides
? { ...params.providerOverrides, ...prepared.providerOverrides }
: params.providerOverrides,
};
}
@@ -796,6 +965,7 @@ function resolveTtsRequestSetup(params: {
}):
| {
config: ResolvedTtsConfig;
persona?: ResolvedTtsPersona;
providers: TtsProvider[];
}
| {
@@ -814,6 +984,7 @@ function resolveTtsRequestSetup(params: {
canonicalizeSpeechProviderId(params.providerOverride, params.cfg) ?? userProvider;
return {
config,
persona: getTtsPersona(config, prefsPath),
providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg),
};
}
@@ -833,6 +1004,7 @@ export async function textToSpeech(params: {
return {
success: false,
error: synthesis.error ?? "TTS conversion failed",
persona: synthesis.persona,
attemptedProviders: synthesis.attemptedProviders,
attempts: synthesis.attempts,
};
@@ -850,6 +1022,7 @@ export async function textToSpeech(params: {
audioPath,
latencyMs: synthesis.latencyMs,
provider: synthesis.provider,
persona: synthesis.persona,
fallbackFrom: synthesis.fallbackFrom,
attemptedProviders: synthesis.attemptedProviders,
attempts: synthesis.attempts,
@@ -886,7 +1059,7 @@ export async function synthesizeSpeech(params: {
return { success: false, error: setup.error };
}
const { config, providers } = setup;
const { config, persona, providers } = setup;
const timeoutMs = params.timeoutMs ?? config.timeoutMs;
const target = supportsNativeVoiceNoteTts(params.channel) ? "voice-note" : "audio-file";
@@ -906,6 +1079,7 @@ export async function synthesizeSpeech(params: {
provider,
cfg: params.cfg,
config,
persona,
});
if (resolvedProvider.kind === "skip") {
errors.push(resolvedProvider.message);
@@ -913,17 +1087,32 @@ export async function synthesizeSpeech(params: {
provider,
outcome: "skipped",
reasonCode: resolvedProvider.reasonCode,
persona: persona?.id,
...(resolvedProvider.personaBinding
? { personaBinding: resolvedProvider.personaBinding }
: {}),
error: resolvedProvider.message,
});
logVerbose(`TTS: provider ${provider} skipped (${resolvedProvider.message})`);
continue;
}
const synthesis = await resolvedProvider.provider.synthesize({
const prepared = await prepareSpeechSynthesis({
provider: resolvedProvider.provider,
text: params.text,
cfg: params.cfg,
providerConfig: resolvedProvider.providerConfig,
target,
providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.provider.id],
persona: resolvedProvider.synthesisPersona,
personaProviderConfig: resolvedProvider.personaProviderConfig,
target,
timeoutMs,
});
const synthesis = await resolvedProvider.provider.synthesize({
text: prepared.text,
cfg: params.cfg,
providerConfig: prepared.providerConfig,
target,
providerOverrides: prepared.providerOverrides,
timeoutMs,
});
const latencyMs = Date.now() - providerStart;
@@ -931,6 +1120,8 @@ export async function synthesizeSpeech(params: {
provider,
outcome: "success",
reasonCode: "success",
persona: persona?.id,
personaBinding: resolvedProvider.personaBinding,
latencyMs,
});
return {
@@ -938,6 +1129,7 @@ export async function synthesizeSpeech(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs,
provider,
persona: persona?.id,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
attempts,
@@ -956,6 +1148,13 @@ export async function synthesizeSpeech(params: {
reasonCode:
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
latencyMs,
persona: persona?.id,
personaBinding:
resolvePersonaProviderConfig(persona, provider) != null
? "applied"
: persona
? "missing"
: "none",
error: errorMsg,
});
const rawError = sanitizeTtsErrorForLog(err);
@@ -970,7 +1169,7 @@ export async function synthesizeSpeech(params: {
}
}
return buildTtsFailureResult(errors, attemptedProviders, attempts);
return buildTtsFailureResult(errors, attemptedProviders, attempts, persona?.id);
}
export async function textToSpeechTelephony(params: {
@@ -987,7 +1186,7 @@ export async function textToSpeechTelephony(params: {
return { success: false, error: setup.error };
}
const { config, providers } = setup;
const { config, persona, providers } = setup;
const errors: string[] = [];
const attemptedProviders: string[] = [];
const attempts: TtsProviderAttempt[] = [];
@@ -1004,6 +1203,7 @@ export async function textToSpeechTelephony(params: {
provider,
cfg: params.cfg,
config,
persona,
requireTelephony: true,
});
if (resolvedProvider.kind === "skip") {
@@ -1012,28 +1212,32 @@ export async function textToSpeechTelephony(params: {
provider,
outcome: "skipped",
reasonCode: resolvedProvider.reasonCode,
persona: persona?.id,
...(resolvedProvider.personaBinding
? { personaBinding: resolvedProvider.personaBinding }
: {}),
error: resolvedProvider.message,
});
logVerbose(`TTS telephony: provider ${provider} skipped (${resolvedProvider.message})`);
continue;
}
const synthesizeTelephony = resolvedProvider.provider.synthesizeTelephony;
if (!synthesizeTelephony) {
const message = `${provider}: unsupported for telephony`;
errors.push(message);
attempts.push({
provider,
outcome: "skipped",
reasonCode: "unsupported_for_telephony",
error: message,
});
logVerbose(`TTS telephony: provider ${provider} skipped (${message})`);
continue;
}
const synthesis = await synthesizeTelephony({
const synthesizeTelephony = resolvedProvider.provider.synthesizeTelephony as NonNullable<
typeof resolvedProvider.provider.synthesizeTelephony
>;
const prepared = await prepareSpeechSynthesis({
provider: resolvedProvider.provider,
text: params.text,
cfg: params.cfg,
providerConfig: resolvedProvider.providerConfig,
persona: resolvedProvider.synthesisPersona,
personaProviderConfig: resolvedProvider.personaProviderConfig,
target: "telephony",
timeoutMs: config.timeoutMs,
});
const synthesis = await synthesizeTelephony({
text: prepared.text,
cfg: params.cfg,
providerConfig: prepared.providerConfig,
timeoutMs: config.timeoutMs,
});
const latencyMs = Date.now() - providerStart;
@@ -1041,6 +1245,8 @@ export async function textToSpeechTelephony(params: {
provider,
outcome: "success",
reasonCode: "success",
persona: persona?.id,
personaBinding: resolvedProvider.personaBinding,
latencyMs,
});
@@ -1049,6 +1255,7 @@ export async function textToSpeechTelephony(params: {
audioBuffer: synthesis.audioBuffer,
latencyMs,
provider,
persona: persona?.id,
fallbackFrom: provider !== primaryProvider ? primaryProvider : undefined,
attemptedProviders,
attempts,
@@ -1065,6 +1272,13 @@ export async function textToSpeechTelephony(params: {
reasonCode:
err instanceof Error && err.name === "AbortError" ? "timeout" : "provider_error",
latencyMs,
persona: persona?.id,
personaBinding:
resolvePersonaProviderConfig(persona, provider) != null
? "applied"
: persona
? "missing"
: "none",
error: errorMsg,
});
const rawError = sanitizeTtsErrorForLog(err);
@@ -1079,7 +1293,7 @@ export async function textToSpeechTelephony(params: {
}
}
return buildTtsFailureResult(errors, attemptedProviders, attempts);
return buildTtsFailureResult(errors, attemptedProviders, attempts, persona?.id);
}
export async function listSpeechVoices(params: {
@@ -1250,6 +1464,7 @@ export async function maybeApplyTtsToPayload(params: {
textLength: text.length,
summarized: wasSummarized,
provider: result.provider,
persona: result.persona,
fallbackFrom: result.fallbackFrom,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
@@ -1268,6 +1483,7 @@ export async function maybeApplyTtsToPayload(params: {
success: false,
textLength: text.length,
summarized: wasSummarized,
persona: result.persona,
attemptedProviders: result.attemptedProviders,
attempts: result.attempts,
error: result.error,