From 8142e67d633d668b2a7e2e5466f994fb9ef038ef Mon Sep 17 00:00:00 2001 From: Alex Knight Date: Sun, 3 May 2026 21:41:50 +1000 Subject: [PATCH] fix(plugins): start configured speech providers (#76540) * fix(plugins): start configured speech providers * fix(plugins): mirror tts provider selection --- CHANGELOG.md | 1 + src/plugins/channel-plugin-ids.test.ts | 82 ++++++++ src/plugins/gateway-startup-plugin-ids.ts | 75 +++++++ .../gateway-startup-speech-providers.ts | 189 ++++++++++++++++++ 4 files changed, 347 insertions(+) create mode 100644 src/plugins/gateway-startup-speech-providers.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index cbf5fac062a..bb5a76e164e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ Docs: https://docs.openclaw.ai - Config/doctor: cap `.clobbered.*` forensic snapshots per config path and serialize snapshot writes so repeated `doctor --fix` recovery loops cannot flood the config directory. Fixes #76454; carries forward #65649. Thanks @JUSTICEESSIELP, @rsnow, and @vincentkoc. - Feishu: suppress duplicate text when replies send native voice media while preserving captions for ordinary audio files and falling back to text plus attachment links when voice uploads fail. - Feishu: send the skipped reply text when `audioAsVoice` falls back to a generic file attachment after transcode failure, so voice-intent replies do not lose their caption. +- TTS/plugins: activate the configured speech provider plugin during Gateway startup, so Microsoft and Local CLI voice replies work immediately after selecting them instead of staying invisible in the startup plugin set. Fixes #76481. Thanks @amknight. - Feishu: keep packaged Feishu startup from bundling the Lark SDK's ESM `__dirname` path by loading the SDK as a plugin-local runtime dependency. Fixes #76291 and #76494. (#76392) Thanks @zqchris. - Plugins/npm: build package-local runtime dist files for publishable plugins and stop listing root-package-excluded plugin sidecars in the core package metadata, so npm plugin installs such as `@openclaw/diffs` and `@openclaw/discord` no longer publish source-only runtime payloads. Fixes #76426. Thanks @PrinceOfEgypt. - Channels/secrets: resolve SecretRef-backed channel credentials through external plugin secret contracts after the plugin split, covering runtime startup, target discovery, webhook auth, disabled-account enumeration, and late-bound web_search config. Fixes #76371. (#76449) Thanks @joshavant and @neeravmakwana. diff --git a/src/plugins/channel-plugin-ids.test.ts b/src/plugins/channel-plugin-ids.test.ts index 5c6ab6c8d0f..6ea59c958cd 100644 --- a/src/plugins/channel-plugin-ids.test.ts +++ b/src/plugins/channel-plugin-ids.test.ts @@ -131,6 +131,24 @@ function createManifestRegistryFixture(): PluginManifestRegistry { providers: ["demo-provider"], cliBackends: ["demo-cli"], }, + { + id: "microsoft", + channels: [], + origin: "bundled", + enabledByDefault: true, + providers: [], + cliBackends: [], + contracts: { speechProviders: ["microsoft"] }, + }, + { + id: "tts-local-cli", + channels: [], + origin: "bundled", + enabledByDefault: true, + providers: [], + cliBackends: [], + contracts: { speechProviders: ["tts-local-cli", "cli"] }, + }, { id: "anthropic", channels: [], @@ -575,6 +593,70 @@ describe("resolveGatewayStartupPluginIds", () => { }), ["demo-channel", "browser", "memory-core"], ], + [ + "includes configured bundled speech providers at startup", + { + channels: {}, + messages: { tts: { provider: "microsoft" } }, + } as OpenClawConfig, + ["browser", "microsoft", "memory-core"], + ], + [ + "includes bundled speech providers configured by provider block", + { + channels: {}, + messages: { tts: { providers: { "tts-local-cli": { command: "say" } } } }, + } as OpenClawConfig, + ["browser", "tts-local-cli", "memory-core"], + ], + [ + "maps legacy edge TTS selection to the Microsoft speech plugin", + { + channels: {}, + messages: { tts: { provider: "edge" } }, + } as OpenClawConfig, + ["browser", "microsoft", "memory-core"], + ], + [ + "includes active persona speech providers at startup", + { + channels: {}, + messages: { + tts: { + persona: "narrator", + personas: { + narrator: { + label: "Narrator", + provider: "microsoft", + }, + }, + }, + }, + } as OpenClawConfig, + ["browser", "microsoft", "memory-core"], + ], + [ + "honors disabled speech provider config blocks at startup", + { + channels: {}, + messages: { + tts: { + provider: "microsoft", + providers: { microsoft: { enabled: false } }, + }, + }, + } as OpenClawConfig, + ["browser", "memory-core"], + ], + [ + "honors explicit plugin disablement for configured speech providers", + { + channels: {}, + messages: { tts: { provider: "microsoft" } }, + plugins: { entries: { microsoft: { enabled: false } } }, + } as OpenClawConfig, + ["browser", "memory-core"], + ], [ "includes explicitly enabled non-channel sidecars in startup scope", createStartupConfig({ diff --git a/src/plugins/gateway-startup-plugin-ids.ts b/src/plugins/gateway-startup-plugin-ids.ts index 94e04cfaf9a..47942a69c9b 100644 --- a/src/plugins/gateway-startup-plugin-ids.ts +++ b/src/plugins/gateway-startup-plugin-ids.ts @@ -14,6 +14,10 @@ import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js"; import { hasExplicitChannelConfig } from "./channel-presence-policy.js"; import { collectPluginConfigContractMatches } from "./config-contracts.js"; import { resolveEffectivePluginActivationState } from "./config-state.js"; +import { + collectConfiguredSpeechProviderIds, + normalizeConfiguredSpeechProviderIdForStartup, +} from "./gateway-startup-speech-providers.js"; import type { InstalledPluginIndexRecord } from "./installed-plugin-index.js"; import type { PluginManifestRecord, PluginManifestRegistry } from "./manifest-registry.js"; import { @@ -161,6 +165,64 @@ function hasConfiguredActivationPath(params: { ); } +function manifestOwnsConfiguredSpeechProvider(params: { + manifest: PluginManifestRecord | undefined; + configuredSpeechProviderIds: ReadonlySet; +}): boolean { + if (params.configuredSpeechProviderIds.size === 0) { + return false; + } + return (params.manifest?.contracts?.speechProviders ?? []).some((providerId) => { + const normalized = normalizeConfiguredSpeechProviderIdForStartup(providerId); + return normalized ? params.configuredSpeechProviderIds.has(normalized) : false; + }); +} + +function canStartConfiguredSpeechProviderPlugin(params: { + plugin: InstalledPluginIndexRecord; + manifest: PluginManifestRecord | undefined; + config: OpenClawConfig; + pluginsConfig: ReturnType; + activationSource: { + plugins: ReturnType; + rootConfig?: OpenClawConfig; + }; + configuredSpeechProviderIds: ReadonlySet; +}): boolean { + if ( + !manifestOwnsConfiguredSpeechProvider({ + manifest: params.manifest, + configuredSpeechProviderIds: params.configuredSpeechProviderIds, + }) + ) { + return false; + } + if ( + params.pluginsConfig.deny.includes(params.plugin.pluginId) || + params.activationSource.plugins.deny.includes(params.plugin.pluginId) + ) { + return false; + } + if ( + params.pluginsConfig.entries[params.plugin.pluginId]?.enabled === false || + params.activationSource.plugins.entries[params.plugin.pluginId]?.enabled === false + ) { + return false; + } + if (params.plugin.origin === "bundled") { + return true; + } + const activationState = resolveEffectivePluginActivationState({ + id: params.plugin.pluginId, + origin: params.plugin.origin, + config: params.pluginsConfig, + rootConfig: params.config, + enabledByDefault: params.plugin.enabledByDefault, + activationSource: params.activationSource, + }); + return activationState.enabled && activationState.explicitlyEnabled; +} + function canStartConfiguredRootPlugin(params: { plugin: InstalledPluginIndexRecord; manifest: PluginManifestRecord | undefined; @@ -341,6 +403,7 @@ export function resolveGatewayStartupPluginPlanFromRegistry(params: { ); const startupDreamingPluginIds = resolveGatewayStartupDreamingPluginIds(params.config); const manifestLookup = createManifestRegistryLookup(params.manifestRegistry); + const configuredSpeechProviderIds = collectConfiguredSpeechProviderIds(activationSourceConfig); const memorySlotStartupPluginId = resolveMemorySlotStartupPluginId({ activationSourceConfig, activationSourcePlugins, @@ -390,6 +453,18 @@ export function resolveGatewayStartupPluginPlanFromRegistry(params: { ) { return true; } + if ( + canStartConfiguredSpeechProviderPlugin({ + plugin, + manifest, + config: params.config, + pluginsConfig, + activationSource, + configuredSpeechProviderIds, + }) + ) { + return true; + } if ( !shouldConsiderForGatewayStartup({ plugin, diff --git a/src/plugins/gateway-startup-speech-providers.ts b/src/plugins/gateway-startup-speech-providers.ts new file mode 100644 index 00000000000..c42771a7288 --- /dev/null +++ b/src/plugins/gateway-startup-speech-providers.ts @@ -0,0 +1,189 @@ +import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js"; + +const TTS_PROVIDER_CONFIG_RESERVED_KEYS = new Set([ + "auto", + "enabled", + "maxTextLength", + "mode", + "modelOverrides", + "persona", + "personas", + "prefsPath", + "provider", + "providers", + "summaryModel", + "timeoutMs", +]); + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function isConfigActivationValueEnabled(value: unknown): boolean { + if (value === false) { + return false; + } + if (isRecord(value) && value.enabled === false) { + return false; + } + return true; +} + +export function normalizeConfiguredSpeechProviderIdForStartup(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = normalizeOptionalLowercaseString(value); + if (!normalized) { + return undefined; + } + return normalized === "edge" ? "microsoft" : normalized; +} + +function resolveProviderConfigActivation( + ttsConfig: Record, + providerId: string, +): boolean | undefined { + let fromProviders: boolean | undefined; + if (isRecord(ttsConfig.providers)) { + for (const [key, providerConfig] of Object.entries(ttsConfig.providers)) { + if (normalizeConfiguredSpeechProviderIdForStartup(key) === providerId) { + fromProviders = isConfigActivationValueEnabled(providerConfig); + } + } + } + if (fromProviders !== undefined) { + return fromProviders; + } + + for (const [key, providerConfig] of Object.entries(ttsConfig)) { + if (TTS_PROVIDER_CONFIG_RESERVED_KEYS.has(key) || !isRecord(providerConfig)) { + continue; + } + if (normalizeConfiguredSpeechProviderIdForStartup(key) === providerId) { + return isConfigActivationValueEnabled(providerConfig); + } + } + return undefined; +} + +function addProviderIfEnabled( + target: Set, + ttsConfig: Record, + providerId: unknown, +): void { + const normalized = normalizeConfiguredSpeechProviderIdForStartup(providerId); + if (!normalized) { + return; + } + if (resolveProviderConfigActivation(ttsConfig, normalized) !== false) { + target.add(normalized); + } +} + +function findActivePersona( + ttsConfig: Record, +): Record | undefined { + const personaId = normalizeOptionalLowercaseString( + typeof ttsConfig.persona === "string" ? ttsConfig.persona : undefined, + ); + if (!personaId || !isRecord(ttsConfig.personas)) { + return undefined; + } + for (const [id, persona] of Object.entries(ttsConfig.personas)) { + if (normalizeOptionalLowercaseString(id) === personaId && isRecord(persona)) { + return persona; + } + } + return undefined; +} + +function addActivePersonaProvider(target: Set, ttsConfig: Record): void { + const persona = findActivePersona(ttsConfig); + if (!persona) { + return; + } + const provider = normalizeConfiguredSpeechProviderIdForStartup(persona.provider); + if (!provider) { + return; + } + const rootActivation = resolveProviderConfigActivation(ttsConfig, provider); + const personaActivation = resolveProviderConfigActivation(persona, provider); + if ((personaActivation ?? rootActivation) !== false) { + target.add(provider); + } +} + +function addConfiguredTtsProviderIds(target: Set, value: unknown): void { + if (!isRecord(value)) { + return; + } + addProviderIfEnabled(target, value, value.provider); + addActivePersonaProvider(target, value); + + if (isRecord(value.providers)) { + for (const [providerId, providerConfig] of Object.entries(value.providers)) { + if (isConfigActivationValueEnabled(providerConfig)) { + addProviderIfEnabled(target, value, providerId); + } + } + } + for (const [key, providerConfig] of Object.entries(value)) { + if (TTS_PROVIDER_CONFIG_RESERVED_KEYS.has(key) || !isRecord(providerConfig)) { + continue; + } + if (isConfigActivationValueEnabled(providerConfig)) { + addProviderIfEnabled(target, value, key); + } + } +} + +export function collectConfiguredSpeechProviderIds(config: OpenClawConfig): ReadonlySet { + const configured = new Set(); + addConfiguredTtsProviderIds(configured, config.messages?.tts); + + const agents = config.agents; + if (isRecord(agents) && Array.isArray(agents.list)) { + for (const agent of agents.list) { + if (isRecord(agent)) { + addConfiguredTtsProviderIds(configured, agent.tts); + } + } + } + + const channels = config.channels; + if (isRecord(channels)) { + for (const channelConfig of Object.values(channels)) { + if (!isRecord(channelConfig)) { + continue; + } + addConfiguredTtsProviderIds(configured, channelConfig.tts); + if (isRecord(channelConfig.voice)) { + addConfiguredTtsProviderIds(configured, channelConfig.voice.tts); + } + if (isRecord(channelConfig.accounts)) { + for (const accountConfig of Object.values(channelConfig.accounts)) { + if (!isRecord(accountConfig)) { + continue; + } + addConfiguredTtsProviderIds(configured, accountConfig.tts); + if (isRecord(accountConfig.voice)) { + addConfiguredTtsProviderIds(configured, accountConfig.voice.tts); + } + } + } + } + } + + const pluginEntries = config.plugins?.entries; + if (isRecord(pluginEntries)) { + for (const entry of Object.values(pluginEntries)) { + if (isRecord(entry) && isRecord(entry.config)) { + addConfiguredTtsProviderIds(configured, entry.config.tts); + } + } + } + + return configured; +}