Files
openclaw/packages/speech-core/voice-models.ts
Vincent Koc 27b15a19e8 refactor(voice): catalog voice models through providers (#87794)
* refactor(providers): catalog voice models

* feat(tts): route speech through voice models

* refactor(tts): rename speaker selection fields

* refactor(tts): mark default speech models

* test(tts): type migrated speaker config assertions

* refactor(providers): avoid catalog merge map spread

* fix(tts): honor voice model fallbacks

* refactor(tts): move speech core into package

* chore(tts): register speech core knip workspace

* fix(tts): show migrated speaker voice in status

* fix(tts): satisfy speech core lint

* fix(tts): preserve explicit model aliases

* test(tts): narrow provider config assertion

* test(doctor): allow slow commitments repair check

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-05-29 04:46:45 +01:00

264 lines
7.9 KiB
TypeScript

export type VoiceModelCapability = "tts" | "realtime_transcription" | "realtime_voice";
export type VoiceModelCapabilities = Partial<Record<VoiceModelCapability, true>>;
export type VoiceModelRef = {
provider: string;
model: string;
timeoutMs?: number;
};
export type VoiceModelProvider = {
id: string;
aliases?: readonly string[];
label?: string;
defaultModel?: string | null;
models?: readonly string[];
};
export type VoiceModelCatalogEntry = {
kind: "voice";
provider: string;
model: string;
source: "static";
capabilities: VoiceModelCapabilities;
label?: string;
default?: boolean;
modes?: readonly string[];
};
export type VoiceProviderCandidate = {
provider: string;
voiceModel?: VoiceModelRef;
};
type VoiceModelConfig =
| string
| {
primary?: unknown;
fallbacks?: unknown;
timeoutMs?: unknown;
};
function normalizeString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function normalizeLowercaseString(value: unknown): string | undefined {
return normalizeString(value)?.toLowerCase();
}
function normalizeTimeoutMs(value: unknown): number | undefined {
return typeof value === "number" && Number.isFinite(value) && value > 0
? Math.floor(value)
: undefined;
}
function parseVoiceModelRef(value: unknown): VoiceModelRef | undefined {
const raw = normalizeString(value);
if (!raw) {
return undefined;
}
const slashIndex = raw.indexOf("/");
if (slashIndex <= 0 || slashIndex === raw.length - 1) {
return undefined;
}
const provider = normalizeLowercaseString(raw.slice(0, slashIndex));
const model = normalizeString(raw.slice(slashIndex + 1));
return provider && model ? { provider, model } : undefined;
}
function sameProvider(left: string | undefined, right: string | undefined): boolean {
const normalizedLeft = normalizeLowercaseString(left);
return Boolean(normalizedLeft && normalizedLeft === normalizeLowercaseString(right));
}
export function providerMatchesId(provider: VoiceModelProvider, providerId?: string): boolean {
return (
sameProvider(provider.id, providerId) ||
(provider.aliases ?? []).some((alias) => sameProvider(alias, providerId))
);
}
export function findVoiceModelProvider<T extends VoiceModelProvider>(params: {
providers: readonly T[];
providerId?: string;
}): T | undefined {
return params.providers.find((provider) => providerMatchesId(provider, params.providerId));
}
export function voiceProviderSupportsModel(
provider: VoiceModelProvider | undefined,
model: unknown,
): boolean {
if (!provider) {
return false;
}
const normalizedModel = normalizeString(model);
return [provider.defaultModel, ...(provider.models ?? [])].some(
(candidate) => normalizeString(candidate) === normalizedModel,
);
}
export function resolveVoiceModelRefs(config: unknown): VoiceModelRef[] {
const voiceModel = config as VoiceModelConfig | undefined;
if (typeof voiceModel === "string") {
const parsed = parseVoiceModelRef(voiceModel);
return parsed ? [parsed] : [];
}
if (typeof voiceModel !== "object" || voiceModel === null || Array.isArray(voiceModel)) {
return [];
}
const timeoutMs = normalizeTimeoutMs(voiceModel.timeoutMs);
const refs: VoiceModelRef[] = [];
const addRef = (value: unknown) => {
const parsed = parseVoiceModelRef(value);
if (parsed) {
refs.push({ ...parsed, ...(timeoutMs === undefined ? {} : { timeoutMs }) });
}
};
addRef(voiceModel.primary);
if (Array.isArray(voiceModel.fallbacks)) {
for (const fallback of voiceModel.fallbacks) {
addRef(fallback);
}
}
return refs;
}
export function resolveSupportedVoiceModelRefs(params: {
config: unknown;
providers: readonly VoiceModelProvider[];
providerId?: string;
}): VoiceModelRef[] {
return resolveVoiceModelRefs(params.config).flatMap((ref) => {
const provider = findVoiceModelProvider({
providers: params.providers,
providerId: ref.provider,
});
if (!provider || (params.providerId && !providerMatchesId(provider, params.providerId))) {
return [];
}
return voiceProviderSupportsModel(provider, ref.model)
? [{ ...ref, provider: provider.id }]
: [];
});
}
export function resolveVoiceProviderCandidates(params: {
primaryProvider: string;
providers: readonly VoiceModelProvider[];
voiceModelConfig?: unknown;
}): VoiceProviderCandidate[] {
const primary =
findVoiceModelProvider({ providers: params.providers, providerId: params.primaryProvider })
?.id ?? params.primaryProvider;
const candidates: VoiceProviderCandidate[] = [];
const seenProviders = new Set<string>();
const addCandidate = (candidate: VoiceProviderCandidate) => {
candidates.push(candidate);
seenProviders.add(candidate.provider);
};
const refs = resolveSupportedVoiceModelRefs({
config: params.voiceModelConfig,
providers: params.providers,
});
const primaryRefs = refs.filter((ref) => ref.provider === primary);
for (const voiceModel of primaryRefs) {
addCandidate({ provider: primary, voiceModel });
}
if (primaryRefs.length === 0) {
addCandidate({ provider: primary });
}
for (const voiceModel of refs) {
if (voiceModel.provider !== primary) {
addCandidate({ provider: voiceModel.provider, voiceModel });
}
}
for (const provider of params.providers) {
if (!seenProviders.has(provider.id)) {
addCandidate({ provider: provider.id });
}
}
return candidates;
}
export function resolvePrimaryVoiceProviderCandidate(params: {
primaryProvider: string;
providers: readonly VoiceModelProvider[];
voiceModelConfig?: unknown;
}): VoiceProviderCandidate {
const provider =
findVoiceModelProvider({ providers: params.providers, providerId: params.primaryProvider })
?.id ?? params.primaryProvider;
const voiceModel = resolveSupportedVoiceModelRefs({
config: params.voiceModelConfig,
providers: params.providers,
providerId: provider,
})[0];
return voiceModel ? { provider, voiceModel } : { provider };
}
export function getVoiceProviderConfig<TConfig extends Record<string, unknown>>(params: {
providerConfigs: Record<string, TConfig | undefined>;
provider: VoiceModelProvider;
configuredProviderId?: string;
}): TConfig {
const candidates = [
normalizeString(params.configuredProviderId),
params.provider.id,
...(params.provider.aliases ?? []),
].filter((key): key is string => Boolean(key));
const configuredKeys = Object.keys(params.providerConfigs);
for (const candidate of candidates) {
if (Object.hasOwn(params.providerConfigs, candidate)) {
return params.providerConfigs[candidate] ?? ({} as TConfig);
}
const normalizedCandidate = normalizeLowercaseString(candidate);
const matchingKey = configuredKeys.find(
(key) => normalizeLowercaseString(key) === normalizedCandidate,
);
if (matchingKey) {
return params.providerConfigs[matchingKey] ?? ({} as TConfig);
}
}
return {} as TConfig;
}
export function synthesizeVoiceModelCatalogEntries(params: {
provider: VoiceModelProvider;
capabilities: VoiceModelCapabilities;
modes?: readonly string[];
}): VoiceModelCatalogEntry[] {
const seen = new Set<string>();
const models = [params.provider.defaultModel, ...(params.provider.models ?? [])].flatMap(
(entry) => {
const model = normalizeString(entry);
if (!model || seen.has(model)) {
return [];
}
seen.add(model);
return [model];
},
);
return models.map((model) => {
const entry: VoiceModelCatalogEntry = {
kind: "voice",
provider: params.provider.id,
model,
source: "static",
capabilities: params.capabilities,
};
if (params.provider.label) {
entry.label = params.provider.label;
}
if (model === params.provider.defaultModel) {
entry.default = true;
}
if (params.modes) {
entry.modes = params.modes;
}
return entry;
});
}