diff --git a/CHANGELOG.md b/CHANGELOG.md index 747a58bc816..5f51c46e971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ Docs: https://docs.openclaw.ai - Providers/OpenAI-compatible: skip null or non-object streaming chunks from custom providers instead of failing the turn after partial output. Fixes #51112. - Providers/OpenAI-compatible: treat singular MLX-style `finish_reason: "tool_call"` as tool use instead of a provider error. Fixes #61499. - Providers/ElevenLabs: omit the MP3-only `Accept` header for PCM telephony synthesis, so Voice Call requests for `pcm_22050` no longer receive MP3 audio. Fixes #67340. Thanks @marcchabot. +- Providers/Microsoft TTS: keep allowlisted bundled speech providers discoverable even when another speech plugin has already registered, so Edge/Microsoft TTS is available alongside OpenAI. Fixes #62117 and #66850. - Providers/Microsoft TTS: honor legacy `messages.tts.providers.edge` voice settings after normalizing Edge TTS to the Microsoft provider. Fixes #64153. - macOS Talk Mode: retry failed local ElevenLabs stream playback through gateway `talk.speak` before falling back to the system voice, so configured ElevenLabs voices still play when streaming playback fails. Fixes #65662. - Plugins/Voice Call: reap stale pre-answer calls by default, honor configured TTS timeouts for Twilio media-stream playback, and fail empty telephony audio instead of completing as silence. Fixes #42071; supersedes #60957. Thanks @Ryce and @sliekens. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index ad7fff44e7e..fe82d8dc5d5 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -1279,6 +1279,11 @@ Batches rapid text-only messages from the same sender into a single agent turn. speed: 1.0, }, }, + microsoft: { + voice: "en-US-AvaMultilingualNeural", + lang: "en-US", + outputFormat: "audio-24khz-48kbitrate-mono-mp3", + }, openai: { apiKey: "openai_api_key", baseUrl: "https://api.openai.com/v1", @@ -1295,6 +1300,7 @@ Batches rapid text-only messages from the same sender into a single agent turn. - `summaryModel` overrides `agents.defaults.model.primary` for auto-summary. - `modelOverrides` is enabled by default; `modelOverrides.allowProvider` defaults to `false` (opt-in). - API keys fall back to `ELEVENLABS_API_KEY`/`XI_API_KEY` and `OPENAI_API_KEY`. +- Bundled speech providers are plugin-owned. If `plugins.allow` is set, include each TTS provider plugin you want to use, for example `microsoft` for Edge TTS. The legacy `edge` provider id is accepted as an alias for `microsoft`. - `providers.openai.baseUrl` overrides the OpenAI TTS endpoint. Resolution order is config, then `OPENAI_TTS_BASE_URL`, then `https://api.openai.com/v1`. - When `providers.openai.baseUrl` points to a non-OpenAI endpoint, OpenClaw treats it as an OpenAI-compatible TTS server and relaxes model/voice validation. diff --git a/src/plugins/capability-provider-runtime.test.ts b/src/plugins/capability-provider-runtime.test.ts index 7cbd3018df8..e4bdc05169b 100644 --- a/src/plugins/capability-provider-runtime.test.ts +++ b/src/plugins/capability-provider-runtime.test.ts @@ -203,7 +203,7 @@ describe("resolvePluginCapabilityProviders", () => { expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith(); }); - it("keeps active capability providers even when cfg is passed", () => { + it("keeps active capability providers when cfg compat has no extra providers", () => { const active = createEmptyPluginRegistry(); active.speechProviders.push({ pluginId: "microsoft", @@ -233,11 +233,80 @@ describe("resolvePluginCapabilityProviders", () => { expectResolvedCapabilityProviderIds(providers, ["microsoft"]); expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith(); - expect(mocks.resolveRuntimePluginRegistry).not.toHaveBeenCalledWith({ + expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({ config: expect.anything(), }); }); + it("merges active and allowlisted bundled capability providers when cfg is passed", () => { + const active = createEmptyPluginRegistry(); + active.speechProviders.push({ + pluginId: "openai", + pluginName: "openai", + source: "test", + provider: { + id: "openai", + label: "openai", + isConfigured: () => true, + synthesize: async () => ({ + audioBuffer: Buffer.from("x"), + outputFormat: "mp3", + voiceCompatible: false, + fileExtension: ".mp3", + }), + }, + } as never); + const loaded = createEmptyPluginRegistry(); + loaded.speechProviders.push({ + pluginId: "microsoft", + pluginName: "microsoft", + source: "test", + provider: { + id: "microsoft", + label: "microsoft", + aliases: ["edge"], + isConfigured: () => true, + synthesize: async () => ({ + audioBuffer: Buffer.from("x"), + outputFormat: "mp3", + voiceCompatible: false, + fileExtension: ".mp3", + }), + }, + } as never); + mocks.loadPluginManifestRegistry.mockReturnValue({ + plugins: [ + { + id: "microsoft", + origin: "bundled", + contracts: { speechProviders: ["microsoft"] }, + }, + ] as never, + diagnostics: [], + }); + mocks.resolveRuntimePluginRegistry.mockImplementation((params?: unknown) => + params === undefined ? active : loaded, + ); + + const providers = resolvePluginCapabilityProviders({ + key: "speechProviders", + cfg: { + plugins: { allow: ["openai", "microsoft"] }, + messages: { tts: { provider: "edge" } }, + } as OpenClawConfig, + }); + + expectResolvedCapabilityProviderIds(providers, ["openai", "microsoft"]); + expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith(); + expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({ + config: expect.objectContaining({ + plugins: expect.objectContaining({ + allow: ["openai", "microsoft"], + }), + }), + }); + }); + it.each([ ["memoryEmbeddingProviders", "memoryEmbeddingProviders"], ["speechProviders", "speechProviders"], diff --git a/src/plugins/capability-provider-runtime.ts b/src/plugins/capability-provider-runtime.ts index d6362aa60e0..27fd2596d1a 100644 --- a/src/plugins/capability-provider-runtime.ts +++ b/src/plugins/capability-provider-runtime.ts @@ -98,6 +98,30 @@ function findProviderById( return undefined; } +function mergeCapabilityProviders( + left: PluginRegistry[K], + right: PluginRegistry[K], +): CapabilityProviderForKey[] { + const merged = new Map>(); + const unnamed: CapabilityProviderForKey[] = []; + const addEntries = (entries: PluginRegistry[K]) => { + for (const entry of entries) { + const provider = entry.provider as CapabilityProviderForKey & { id?: string }; + if (!provider.id) { + unnamed.push(provider); + continue; + } + if (!merged.has(provider.id)) { + merged.set(provider.id, provider); + } + } + }; + + addEntries(left); + addEntries(right); + return [...merged.values(), ...unnamed]; +} + export function resolvePluginCapabilityProvider(params: { key: K; providerId: string; @@ -134,29 +158,15 @@ export function resolvePluginCapabilityProviders[] { const activeRegistry = resolveRuntimePluginRegistry(); const activeProviders = activeRegistry?.[params.key] ?? []; - if (activeProviders.length > 0 && params.key !== "memoryEmbeddingProviders") { + if (activeProviders.length > 0 && params.key !== "memoryEmbeddingProviders" && !params.cfg) { return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey[]; } const compatConfig = resolveCapabilityProviderConfig({ key: params.key, cfg: params.cfg }); const loadOptions = compatConfig === undefined ? undefined : { config: compatConfig }; const registry = resolveRuntimePluginRegistry(loadOptions); + const loadedProviders = registry?.[params.key] ?? []; if (params.key !== "memoryEmbeddingProviders") { - return (registry?.[params.key] ?? []).map( - (entry) => entry.provider, - ) as CapabilityProviderForKey[]; + return mergeCapabilityProviders(activeProviders, loadedProviders); } - const merged = new Map>(); - for (const entry of activeProviders) { - const provider = entry.provider as CapabilityProviderForKey & { id?: string }; - if (provider.id) { - merged.set(provider.id, provider); - } - } - for (const entry of registry?.[params.key] ?? []) { - const provider = entry.provider as CapabilityProviderForKey & { id?: string }; - if (provider.id && !merged.has(provider.id)) { - merged.set(provider.id, provider); - } - } - return [...merged.values()]; + return mergeCapabilityProviders(activeProviders, loadedProviders); }