fix(tts): merge allowlisted speech providers

This commit is contained in:
Peter Steinberger
2026-04-25 04:19:57 +01:00
parent cf07f01d0d
commit 73a6a2a6ab
4 changed files with 106 additions and 20 deletions

View File

@@ -84,6 +84,7 @@ Docs: https://docs.openclaw.ai
- Providers/OpenAI-compatible: skip null or non-object streaming chunks from custom providers instead of failing the turn after partial output. Fixes #51112.
- Providers/OpenAI-compatible: treat singular MLX-style `finish_reason: "tool_call"` as tool use instead of a provider error. Fixes #61499.
- Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot.
- Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850.
- Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153.
- macOS Talk Mode: retry failed local ElevenLabs stream playback through gateway `talk.speak` before falling back to the system voice, so configured ElevenLabs voices still play when streaming playback fails. Fixes #65662.
- Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens.

View File

@@ -1279,6 +1279,11 @@ Batches rapid text-only messages from the same sender into a single agent turn.
speed: 1.0,
},
},
microsoft: {
voice: "en-US-AvaMultilingualNeural",
lang: "en-US",
outputFormat: "audio-24khz-48kbitrate-mono-mp3",
},
openai: {
apiKey: "openai_api_key",
baseUrl: "https://api.openai.com/v1",
@@ -1295,6 +1300,7 @@ Batches rapid text-only messages from the same sender into a single agent turn.
- `summaryModel` overrides `agents.defaults.model.primary` for auto-summary.
- `modelOverrides` is enabled by default; `modelOverrides.allowProvider` defaults to `false` (opt-in).
- API keys fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`.
- Bundled speech providers are plugin-owned. If `plugins.allow` is set, include each TTS provider plugin you want to use, for example `microsoft` for Edge TTS. The legacy `edge` provider id is accepted as an alias for `microsoft`.
- `providers.openai.baseUrl` overrides the OpenAI TTS endpoint. Resolution order is config, then `OPENAI_TTS_BASE_URL`, then `https://api.openai.com/v1`.
- When `providers.openai.baseUrl` points to a non-OpenAI endpoint, OpenClaw treats it as an OpenAI-compatible TTS server and relaxes model/voice validation.

View File

@@ -203,7 +203,7 @@ describe("resolvePluginCapabilityProviders", () => {
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
});
it("keeps active capability providers even when cfg is passed", () => {
it("keeps active capability providers when cfg compat has no extra providers", () => {
const active = createEmptyPluginRegistry();
active.speechProviders.push({
pluginId: "microsoft",
@@ -233,11 +233,80 @@ describe("resolvePluginCapabilityProviders", () => {
expectResolvedCapabilityProviderIds(providers, ["microsoft"]);
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
expect(mocks.resolveRuntimePluginRegistry).not.toHaveBeenCalledWith({
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: expect.anything(),
});
});
it("merges active and allowlisted bundled capability providers when cfg is passed", () => {
const active = createEmptyPluginRegistry();
active.speechProviders.push({
pluginId: "openai",
pluginName: "openai",
source: "test",
provider: {
id: "openai",
label: "openai",
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.from("x"),
outputFormat: "mp3",
voiceCompatible: false,
fileExtension: ".mp3",
}),
},
} as never);
const loaded = createEmptyPluginRegistry();
loaded.speechProviders.push({
pluginId: "microsoft",
pluginName: "microsoft",
source: "test",
provider: {
id: "microsoft",
label: "microsoft",
aliases: ["edge"],
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.from("x"),
outputFormat: "mp3",
voiceCompatible: false,
fileExtension: ".mp3",
}),
},
} as never);
mocks.loadPluginManifestRegistry.mockReturnValue({
plugins: [
{
id: "microsoft",
origin: "bundled",
contracts: { speechProviders: ["microsoft"] },
},
] as never,
diagnostics: [],
});
mocks.resolveRuntimePluginRegistry.mockImplementation((params?: unknown) =>
params === undefined ? active : loaded,
);
const providers = resolvePluginCapabilityProviders({
key: "speechProviders",
cfg: {
plugins: { allow: ["openai", "microsoft"] },
messages: { tts: { provider: "edge" } },
} as OpenClawConfig,
});
expectResolvedCapabilityProviderIds(providers, ["openai", "microsoft"]);
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: expect.objectContaining({
plugins: expect.objectContaining({
allow: ["openai", "microsoft"],
}),
}),
});
});
it.each([
["memoryEmbeddingProviders", "memoryEmbeddingProviders"],
["speechProviders", "speechProviders"],

View File

@@ -98,6 +98,30 @@ function findProviderById<K extends CapabilityProviderRegistryKey>(
return undefined;
}
function mergeCapabilityProviders<K extends CapabilityProviderRegistryKey>(
left: PluginRegistry[K],
right: PluginRegistry[K],
): CapabilityProviderForKey<K>[] {
const merged = new Map<string, CapabilityProviderForKey<K>>();
const unnamed: CapabilityProviderForKey<K>[] = [];
const addEntries = (entries: PluginRegistry[K]) => {
for (const entry of entries) {
const provider = entry.provider as CapabilityProviderForKey<K> & { id?: string };
if (!provider.id) {
unnamed.push(provider);
continue;
}
if (!merged.has(provider.id)) {
merged.set(provider.id, provider);
}
}
};
addEntries(left);
addEntries(right);
return [...merged.values(), ...unnamed];
}
export function resolvePluginCapabilityProvider<K extends CapabilityProviderRegistryKey>(params: {
key: K;
providerId: string;
@@ -134,29 +158,15 @@ export function resolvePluginCapabilityProviders<K extends CapabilityProviderReg
}): CapabilityProviderForKey<K>[] {
const activeRegistry = resolveRuntimePluginRegistry();
const activeProviders = activeRegistry?.[params.key] ?? [];
if (activeProviders.length > 0 && params.key !== "memoryEmbeddingProviders") {
if (activeProviders.length > 0 && params.key !== "memoryEmbeddingProviders" && !params.cfg) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
const compatConfig = resolveCapabilityProviderConfig({ key: params.key, cfg: params.cfg });
const loadOptions = compatConfig === undefined ? undefined : { config: compatConfig };
const registry = resolveRuntimePluginRegistry(loadOptions);
const loadedProviders = registry?.[params.key] ?? [];
if (params.key !== "memoryEmbeddingProviders") {
return (registry?.[params.key] ?? []).map(
(entry) => entry.provider,
) as CapabilityProviderForKey<K>[];
return mergeCapabilityProviders(activeProviders, loadedProviders);
}
const merged = new Map<string, CapabilityProviderForKey<K>>();
for (const entry of activeProviders) {
const provider = entry.provider as CapabilityProviderForKey<K> & { id?: string };
if (provider.id) {
merged.set(provider.id, provider);
}
}
for (const entry of registry?.[params.key] ?? []) {
const provider = entry.provider as CapabilityProviderForKey<K> & { id?: string };
if (provider.id && !merged.has(provider.id)) {
merged.set(provider.id, provider);
}
}
return [...merged.values()];
return mergeCapabilityProviders(activeProviders, loadedProviders);
}