fix(tts): keep speech fallback discovery scoped

This commit is contained in:
Peter Steinberger
2026-04-25 05:37:44 +01:00
parent b8239be46b
commit c11730fd09
6 changed files with 386 additions and 129 deletions

View File

@@ -26,6 +26,7 @@ describe("image-generation provider registry allowlist fallback", () => {
expect(getImageGenerationProvider("openai", cfg as OpenClawConfig)).toBeUndefined();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: compatConfig,
activate: false,
});
});
});

View File

@@ -27,6 +27,7 @@ describe("media-understanding provider registry allowlist fallback", () => {
expect(getMediaUnderstandingProvider("openai", registry)).toBeUndefined();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: compatConfig,
activate: false,
});
});
});

View File

@@ -82,6 +82,7 @@ function expectBundledCompatLoadPath(params: {
});
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: params.enablementCompat,
activate: false,
});
}
@@ -203,7 +204,36 @@ describe("resolvePluginCapabilityProviders", () => {
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
});
it("keeps active capability providers when cfg compat has no extra providers", () => {
it("uses active non-speech capability providers even when cfg is passed", () => {
const active = createEmptyPluginRegistry();
active.mediaUnderstandingProviders.push({
pluginId: "deepgram",
pluginName: "Deepgram",
source: "test",
provider: {
id: "deepgram",
capabilities: ["audio"],
},
} as never);
mocks.resolveRuntimePluginRegistry.mockReturnValue(active);
const providers = resolvePluginCapabilityProviders({
key: "mediaUnderstandingProviders",
cfg: {
tools: {
media: {
models: [{ provider: "deepgram" }],
},
},
} as OpenClawConfig,
});
expectResolvedCapabilityProviderIds(providers, ["deepgram"]);
expect(mocks.loadPluginManifestRegistry).not.toHaveBeenCalled();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
});
it("keeps active speech providers when cfg requests an active provider alias", () => {
const active = createEmptyPluginRegistry();
active.speechProviders.push({
pluginId: "microsoft",
@@ -222,9 +252,7 @@ describe("resolvePluginCapabilityProviders", () => {
}),
},
} as never);
mocks.resolveRuntimePluginRegistry.mockImplementation((params?: unknown) =>
params === undefined ? active : createEmptyPluginRegistry(),
);
mocks.resolveRuntimePluginRegistry.mockReturnValue(active);
const providers = resolvePluginCapabilityProviders({
key: "speechProviders",
@@ -235,10 +263,8 @@ describe("resolvePluginCapabilityProviders", () => {
});
expectResolvedCapabilityProviderIds(providers, ["microsoft"]);
expect(mocks.loadPluginManifestRegistry).not.toHaveBeenCalled();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith();
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: expect.anything(),
});
});
it("keeps active capability providers when cfg has no explicit plugin config", () => {
@@ -349,9 +375,94 @@ describe("resolvePluginCapabilityProviders", () => {
allow: ["openai", "microsoft"],
}),
}),
activate: false,
});
});
it("does not merge unrelated bundled capability providers when cfg requests one provider", () => {
const active = createEmptyPluginRegistry();
active.speechProviders.push({
pluginId: "openai",
pluginName: "openai",
source: "test",
provider: {
id: "openai",
label: "openai",
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.from("x"),
outputFormat: "mp3",
voiceCompatible: false,
fileExtension: ".mp3",
}),
},
} as never);
const loaded = createEmptyPluginRegistry();
loaded.speechProviders.push(
{
pluginId: "microsoft",
pluginName: "microsoft",
source: "test",
provider: {
id: "microsoft",
label: "microsoft",
aliases: ["edge"],
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.from("x"),
outputFormat: "mp3",
voiceCompatible: false,
fileExtension: ".mp3",
}),
},
} as never,
{
pluginId: "elevenlabs",
pluginName: "elevenlabs",
source: "test",
provider: {
id: "elevenlabs",
label: "elevenlabs",
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.from("x"),
outputFormat: "mp3",
voiceCompatible: false,
fileExtension: ".mp3",
}),
},
} as never,
);
mocks.loadPluginManifestRegistry.mockReturnValue({
plugins: [
{
id: "microsoft",
origin: "bundled",
contracts: { speechProviders: ["microsoft"] },
},
{
id: "elevenlabs",
origin: "bundled",
contracts: { speechProviders: ["elevenlabs"] },
},
] as never,
diagnostics: [],
});
mocks.resolveRuntimePluginRegistry.mockImplementation((params?: unknown) =>
params === undefined ? active : loaded,
);
const providers = resolvePluginCapabilityProviders({
key: "speechProviders",
cfg: {
plugins: { allow: ["openai", "microsoft", "elevenlabs"] },
messages: { tts: { provider: "edge" } },
} as OpenClawConfig,
});
expectResolvedCapabilityProviderIds(providers, ["openai", "microsoft"]);
});
it.each([
["memoryEmbeddingProviders", "memoryEmbeddingProviders"],
["speechProviders", "speechProviders"],
@@ -384,6 +495,7 @@ describe("resolvePluginCapabilityProviders", () => {
expectNoResolvedCapabilityProviders(providers);
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: expect.anything(),
activate: false,
});
});
@@ -424,7 +536,10 @@ describe("resolvePluginCapabilityProviders", () => {
config: undefined,
env: process.env,
});
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({ config: compatConfig });
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: compatConfig,
activate: false,
});
});
it("loads only the bundled owner plugin for a targeted provider lookup", () => {
@@ -488,6 +603,7 @@ describe("resolvePluginCapabilityProviders", () => {
});
expect(mocks.resolveRuntimePluginRegistry).toHaveBeenCalledWith({
config: enablementCompat,
activate: false,
});
});
});

View File

@@ -123,6 +123,81 @@ function mergeCapabilityProviders<K extends CapabilityProviderRegistryKey>(
return [...merged.values(), ...unnamed];
}
function addObjectKeys(target: Set<string>, value: unknown): void {
if (typeof value !== "object" || value === null || Array.isArray(value)) {
return;
}
for (const key of Object.keys(value)) {
const normalized = key.trim().toLowerCase();
if (normalized) {
target.add(normalized);
}
}
}
function addStringValue(target: Set<string>, value: unknown): void {
if (typeof value !== "string") {
return;
}
const normalized = value.trim().toLowerCase();
if (normalized) {
target.add(normalized);
}
}
function collectRequestedSpeechProviderIds(cfg: OpenClawConfig | undefined): Set<string> {
const requested = new Set<string>();
const tts =
typeof cfg?.messages?.tts === "object" && cfg.messages.tts !== null
? (cfg.messages.tts as Record<string, unknown>)
: undefined;
addStringValue(requested, tts?.provider);
addObjectKeys(requested, tts?.providers);
addObjectKeys(requested, cfg?.models?.providers);
return requested;
}
function removeActiveProviderIds(requested: Set<string>, entries: readonly unknown[]): void {
for (const entry of entries as Array<{ provider: { id?: unknown; aliases?: unknown } }>) {
const provider = entry.provider as { id?: unknown; aliases?: unknown };
if (typeof provider.id === "string") {
requested.delete(provider.id.toLowerCase());
}
if (Array.isArray(provider.aliases)) {
for (const alias of provider.aliases) {
if (typeof alias === "string") {
requested.delete(alias.toLowerCase());
}
}
}
}
}
function filterLoadedProvidersForRequestedConfig<K extends CapabilityProviderRegistryKey>(params: {
key: K;
requested: Set<string>;
entries: PluginRegistry[K];
}): PluginRegistry[K] {
if (params.key !== "speechProviders") {
return [] as unknown as PluginRegistry[K];
}
if (params.requested.size === 0) {
return [] as unknown as PluginRegistry[K];
}
return params.entries.filter((entry) => {
const provider = entry.provider as { id?: unknown; aliases?: unknown };
if (typeof provider.id === "string" && params.requested.has(provider.id.toLowerCase())) {
return true;
}
if (Array.isArray(provider.aliases)) {
return provider.aliases.some(
(alias) => typeof alias === "string" && params.requested.has(alias.toLowerCase()),
);
}
return false;
}) as PluginRegistry[K];
}
export function resolvePluginCapabilityProvider<K extends CapabilityProviderRegistryKey>(params: {
key: K;
providerId: string;
@@ -148,7 +223,8 @@ export function resolvePluginCapabilityProvider<K extends CapabilityProviderRegi
cfg: params.cfg,
pluginIds,
});
const loadOptions = compatConfig === undefined ? undefined : { config: compatConfig };
const loadOptions =
compatConfig === undefined ? undefined : { config: compatConfig, activate: false };
const registry = resolveRuntimePluginRegistry(loadOptions);
return findProviderById(registry?.[params.key] ?? [], params.providerId);
}
@@ -162,16 +238,39 @@ export function resolvePluginCapabilityProviders<K extends CapabilityProviderReg
if (
activeProviders.length > 0 &&
params.key !== "memoryEmbeddingProviders" &&
params.key !== "speechProviders" &&
!hasExplicitPluginConfig(params.cfg?.plugins)
) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
if (activeProviders.length > 0 && params.key === "speechProviders" && !params.cfg) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
const missingRequestedSpeechProviders =
activeProviders.length > 0 && params.key === "speechProviders"
? collectRequestedSpeechProviderIds(params.cfg)
: undefined;
if (missingRequestedSpeechProviders) {
removeActiveProviderIds(missingRequestedSpeechProviders, activeProviders);
if (missingRequestedSpeechProviders.size === 0) {
return activeProviders.map((entry) => entry.provider) as CapabilityProviderForKey<K>[];
}
}
const compatConfig = resolveCapabilityProviderConfig({ key: params.key, cfg: params.cfg });
const loadOptions = compatConfig === undefined ? undefined : { config: compatConfig };
const loadOptions =
compatConfig === undefined ? undefined : { config: compatConfig, activate: false };
const registry = resolveRuntimePluginRegistry(loadOptions);
const loadedProviders = registry?.[params.key] ?? [];
if (params.key !== "memoryEmbeddingProviders") {
return mergeCapabilityProviders(activeProviders, loadedProviders);
const mergeLoadedProviders =
activeProviders.length > 0
? filterLoadedProvidersForRequestedConfig({
key: params.key,
requested: missingRequestedSpeechProviders ?? new Set(),
entries: loadedProviders,
})
: loadedProviders;
return mergeCapabilityProviders(activeProviders, mergeLoadedProviders);
}
return mergeCapabilityProviders(activeProviders, loadedProviders);
}

View File

@@ -123,6 +123,7 @@ describe("speech provider registry", () => {
},
},
},
activate: false,
});
});

View File

@@ -5,7 +5,7 @@ import { createEmptyPluginRegistry } from "../../../src/plugins/registry-empty.j
import { setActivePluginRegistry } from "../../../src/plugins/runtime.js";
import type { SpeechProviderPlugin } from "../../../src/plugins/types.js";
import { resolveWorkspacePackagePublicModuleUrl } from "../../../src/test-utils/bundled-plugin-public-surface.js";
import { withEnv } from "../../../src/test-utils/env.js";
import { withEnv, withEnvAsync } from "../../../src/test-utils/env.js";
import type { ResolvedTtsConfig } from "../../../src/tts/tts-types.js";
type TtsRuntimeModule = typeof import("../../../src/tts/tts.js");
@@ -36,6 +36,41 @@ let getResolvedSpeechProviderConfig: TtsRuntimeModule["_test"]["getResolvedSpeec
let formatTtsProviderError: TtsRuntimeModule["_test"]["formatTtsProviderError"];
let sanitizeTtsErrorForLog: TtsRuntimeModule["_test"]["sanitizeTtsErrorForLog"];
const SPEECH_PROVIDER_ENV_KEYS = [
"ELEVENLABS_API_KEY",
"GEMINI_API_KEY",
"GOOGLE_API_KEY",
"GRADIUM_API_KEY",
"MINIMAX_API_KEY",
"OPENAI_API_KEY",
"VYDRA_API_KEY",
"XAI_API_KEY",
"XI_API_KEY",
] as const;
function isolatedSpeechProviderEnv(
overrides: Record<string, string | undefined> = {},
): Record<string, string | undefined> {
return {
...Object.fromEntries(SPEECH_PROVIDER_ENV_KEYS.map((key) => [key, undefined])),
...overrides,
};
}
function withIsolatedSpeechProviderEnv<T>(
overrides: Record<string, string | undefined>,
fn: () => T,
): T {
return withEnv(isolatedSpeechProviderEnv(overrides), fn);
}
async function withIsolatedSpeechProviderEnvAsync<T>(
overrides: Record<string, string | undefined>,
fn: () => Promise<T>,
): Promise<T> {
return await withEnvAsync(isolatedSpeechProviderEnv(overrides), fn);
}
vi.mock("@mariozechner/pi-ai", () => {
const getApiProvider = vi.fn(() => undefined);
return {
@@ -670,7 +705,7 @@ export function describeTtsConfigContract() {
expected: "microsoft",
},
] as const)("selects provider based on available API keys: $name", (testCase) => {
withEnv(testCase.env, () => {
withIsolatedSpeechProviderEnv(testCase.env, () => {
const config = {
auto: "off",
mode: "final",
@@ -693,7 +728,7 @@ export function describeTtsConfigContract() {
});
it("passes cfg into auto-selection so model-provider Google keys can configure TTS", () => {
withEnv(
withIsolatedSpeechProviderEnv(
{
OPENAI_API_KEY: undefined,
ELEVENLABS_API_KEY: undefined,
@@ -974,133 +1009,137 @@ export function describeTtsProviderRuntimeContract() {
describe("fallback readiness errors", () => {
it("continues synthesize fallback when primary readiness checks throw", async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "openai",
label: "OpenAI",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "openai", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
setActivePluginRegistry(registry);
await withIsolatedSpeechProviderEnvAsync({}, async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "openai",
label: "OpenAI",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-readiness-throw-token-1234567890\nboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "openai", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
setActivePluginRegistry(registry);
const result = await ttsRuntime.synthesizeSpeech({
text: "hello fallback",
cfg: {
messages: {
tts: {
provider: "openai",
const result = await ttsRuntime.synthesizeSpeech({
text: "hello fallback",
cfg: {
messages: {
tts: {
provider: "openai",
},
},
},
},
});
});
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected fallback synthesis success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("openai");
expect(result.attemptedProviders).toEqual(["openai", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "openai",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected fallback synthesis success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("openai");
expect(result.attemptedProviders).toEqual(["openai", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "openai",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
});
});
});
it("continues telephony fallback when primary readiness checks throw", async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "primary-throws",
label: "PrimaryThrows",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
synthesizeTelephony: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
sampleRate: 24000,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "primary-throws", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
setActivePluginRegistry(registry);
await withIsolatedSpeechProviderEnvAsync({}, async () => {
const throwingPrimary: SpeechProviderPlugin = {
id: "primary-throws",
label: "PrimaryThrows",
autoSelectOrder: 10,
resolveConfig: () => ({}),
isConfigured: () => {
throw new Error("Authorization: Bearer sk-telephony-throw-token-1234567890\tboom");
},
synthesize: async () => {
throw new Error("unexpected synthesize call");
},
};
const fallback: SpeechProviderPlugin = {
id: "microsoft",
label: "Microsoft",
autoSelectOrder: 20,
resolveConfig: () => ({}),
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: true,
}),
synthesizeTelephony: async () => ({
audioBuffer: createAudioBuffer(2),
outputFormat: "mp3",
sampleRate: 24000,
}),
};
const registry = createEmptyPluginRegistry();
registry.speechProviders = [
{ pluginId: "primary-throws", provider: throwingPrimary, source: "test" },
{ pluginId: "microsoft", provider: fallback, source: "test" },
];
setActivePluginRegistry(registry);
const result = await ttsRuntime.textToSpeechTelephony({
text: "hello telephony fallback",
cfg: {
messages: {
tts: {
provider: "primary-throws",
const result = await ttsRuntime.textToSpeechTelephony({
text: "hello telephony fallback",
cfg: {
messages: {
tts: {
provider: "primary-throws",
},
},
},
},
});
});
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected telephony fallback success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("primary-throws");
expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "primary-throws",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
expect(result.success).toBe(true);
if (!result.success) {
throw new Error("expected telephony fallback success");
}
expect(result.provider).toBe("microsoft");
expect(result.fallbackFrom).toBe("primary-throws");
expect(result.attemptedProviders).toEqual(["primary-throws", "microsoft"]);
expect(result.attempts?.[0]).toMatchObject({
provider: "primary-throws",
outcome: "failed",
reasonCode: "provider_error",
});
expect(result.attempts?.[1]).toMatchObject({
provider: "microsoft",
outcome: "success",
reasonCode: "success",
});
});
});