fix: register bundled TTS providers and route overrides correctly (#62846) (thanks @stainlu)

* fix(microsoft,elevenlabs): add enabledByDefault so speech providers register at runtime

* fix(tts): route generic directive tokens to the explicitly declared provider

Addresses the P2 Codex review on #62846 that flagged auto-enabling
ElevenLabs as a product regression for MiniMax users. Both providers
claim the generic `speed` token, and parseTtsDirectives walked
providers in autoSelectOrder with first-match-wins, so inputs like
`[[tts:provider=minimax speed=1.2]]` silently routed speed to
providerOverrides.elevenlabs once elevenlabs participated in every
parse pass.

The parser now pre-scans for `provider=` (honoring legacy last-wins
semantics) and routes generic tokens with the declared provider tried
first, falling back to autoSelectOrder when it doesn't handle the key.
Token order inside the directive no longer matters: `speed=1.2` before
or after `provider=minimax` both resolve to MiniMax.

Adds a regression test suite covering the exact ElevenLabs/MiniMax
speed collision plus fallback, mixed-token, last-wins, and
allowProvider-disabled cases. parseTtsDirectives had no prior test
coverage.

* fix(tts): prefer active provider for generic directives

* fix: register bundled TTS providers safely (#62846) (thanks @stainlu)

* fix: use exported TTS SDK seam (#62846) (thanks @stainlu)

---------

Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
stain lu
2026-04-16 17:56:38 +08:00
committed by GitHub
parent ecfaf64526
commit 6ea3cddf0d
7 changed files with 210 additions and 21 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Ollama/chat: strip the `ollama/` provider prefix from Ollama chat request model ids so configured refs like `ollama/qwen3:14b-q8_0` stop 404ing against the Ollama API. (#67457) Thanks @suboss87.
- QA/Matrix: split the private QA lab runtime into smaller tested modules, add Matrix media contract coverage for image understanding and generated-image delivery, and update the memory-dreaming QA sweep to assert the separate phase-report layout. (#67430) Thanks @gumadeiras.
- Agents/tools: resolve non-workspace host tilde paths against the OS home directory and keep edit recovery aligned with that same path target, so `~/...` host edit/write operations stop failing or reading back the wrong file when `OPENCLAW_HOME` differs. (#62804) Thanks @stainlu.
- Speech/TTS: auto-enable the bundled Microsoft and ElevenLabs speech providers, and route generic TTS directive tokens through the explicit or active provider first so overrides like `[[tts:speed=1.2]]` stop silently landing on the wrong provider. (#62846) Thanks @stainlu.
## 2026.4.15-beta.1

View File

@@ -5,9 +5,14 @@ import path from "node:path";
import type { Readable } from "node:stream";
import { ChannelType, type Client, ReadyListener } from "@buape/carbon";
import type { VoicePlugin } from "@buape/carbon/voice";
import { resolveAgentDir } from "openclaw/plugin-sdk/agent-runtime";
import { agentCommandFromIngress } from "openclaw/plugin-sdk/agent-runtime";
import { resolveTtsConfig, type ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime";
import {
agentCommandFromIngress,
getTtsProvider,
resolveAgentDir,
resolveTtsConfig,
resolveTtsPrefsPath,
type ResolvedTtsConfig,
} from "openclaw/plugin-sdk/agent-runtime";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { DiscordAccountConfig, TtsConfig } from "openclaw/plugin-sdk/config-runtime";
import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
@@ -809,6 +814,7 @@ export class DiscordVoiceManager {
const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides, {
cfg: ttsCfg,
providerConfigs: ttsConfig.providerConfigs,
preferredProviderId: getTtsProvider(ttsConfig, resolveTtsPrefsPath(ttsConfig)),
});
const rawSpeakText = directive.overrides.ttsText ?? directive.cleanedText.trim();
const speakText = sanitizeVoiceReplyTextForSpeech(rawSpeakText, speaker.label);

View File

@@ -1,5 +1,6 @@
{
"id": "elevenlabs",
"enabledByDefault": true,
"contracts": {
"speechProviders": ["elevenlabs"]
},

View File

@@ -1,5 +1,6 @@
{
"id": "microsoft",
"enabledByDefault": true,
"contracts": {
"speechProviders": ["microsoft"]
},

View File

@@ -1036,12 +1036,14 @@ export async function maybeApplyTtsToPayload(params: {
return params.payload;
}
const config = resolveTtsConfig(params.cfg);
const activeProvider = getTtsProvider(config, prefsPath);
const reply = resolveSendableOutboundReplyParts(params.payload);
const text = reply.text;
const directives = parseTtsDirectives(text, config.modelOverrides, {
cfg: params.cfg,
providerConfigs: config.providerConfigs,
preferredProviderId: activeProvider,
});
if (directives.warnings.length > 0) {
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
@@ -1049,9 +1051,8 @@ export async function maybeApplyTtsToPayload(params: {
if (isVerbose()) {
const effectiveProvider = directives.overrides?.provider
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
getTtsProvider(config, prefsPath))
: getTtsProvider(config, prefsPath);
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ?? activeProvider)
: activeProvider;
logVerbose(
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
);

147
src/tts/directives.test.ts Normal file
View File

@@ -0,0 +1,147 @@
import { describe, expect, it } from "vitest";
import type { SpeechProviderPlugin } from "../plugins/types.js";
import { parseTtsDirectives } from "./directives.js";
import type {
SpeechDirectiveTokenParseContext,
SpeechDirectiveTokenParseResult,
SpeechModelOverridePolicy,
} from "./provider-types.js";
function makeProvider(
id: string,
order: number,
parse: (ctx: SpeechDirectiveTokenParseContext) => SpeechDirectiveTokenParseResult | undefined,
): SpeechProviderPlugin {
return {
id,
label: id,
autoSelectOrder: order,
parseDirectiveToken: parse,
isConfigured: () => true,
synthesize: async () => ({
audioBuffer: Buffer.alloc(0),
outputFormat: "mp3",
fileExtension: ".mp3",
voiceCompatible: false,
}),
} as SpeechProviderPlugin;
}
const elevenlabs = makeProvider("elevenlabs", 10, ({ key, value }) => {
if (key === "speed") {
return { handled: true, overrides: { speed: Number(value) } };
}
if (key === "style") {
return { handled: true, overrides: { style: Number(value) } };
}
return undefined;
});
const minimax = makeProvider("minimax", 20, ({ key, value }) => {
if (key === "speed") {
return { handled: true, overrides: { speed: Number(value) } };
}
return undefined;
});
const fullPolicy: SpeechModelOverridePolicy = {
enabled: true,
allowText: true,
allowProvider: true,
allowVoice: true,
allowModelId: true,
allowVoiceSettings: true,
allowNormalization: true,
allowSeed: true,
};
describe("parseTtsDirectives provider-aware routing", () => {
it("routes generic speed to the explicitly declared provider", () => {
const result = parseTtsDirectives(
"hello [[tts:provider=minimax speed=1.2]] world",
fullPolicy,
{
providers: [elevenlabs, minimax],
},
);
expect(result.overrides.provider).toBe("minimax");
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
it("routes correctly when provider appears after the generic token", () => {
const result = parseTtsDirectives("[[tts:speed=1.2 provider=minimax]] hi", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.overrides.provider).toBe("minimax");
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
it("routes to the preferred provider when no provider token is declared", () => {
const result = parseTtsDirectives("[[tts:speed=1.5]]", fullPolicy, {
providers: [elevenlabs, minimax],
preferredProviderId: "minimax",
});
expect(result.overrides.provider).toBeUndefined();
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.5 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
it("falls back to autoSelectOrder when no provider hint is available", () => {
const result = parseTtsDirectives("[[tts:speed=1.5]]", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.overrides.provider).toBeUndefined();
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ speed: 1.5 });
expect(result.overrides.providerOverrides?.minimax).toBeUndefined();
});
it("falls through when the preferred provider does not handle the key", () => {
const result = parseTtsDirectives("[[tts:provider=minimax style=0.4]]", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.overrides.provider).toBe("minimax");
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 });
expect(result.overrides.providerOverrides?.minimax).toBeUndefined();
});
it("routes mixed tokens independently in the same directive", () => {
const result = parseTtsDirectives("[[tts:provider=minimax style=0.4 speed=1.2]]", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.overrides.provider).toBe("minimax");
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 });
});
it("keeps last-wins provider semantics", () => {
const result = parseTtsDirectives(
"[[tts:provider=elevenlabs provider=minimax speed=1.1]]",
fullPolicy,
{ providers: [elevenlabs, minimax] },
);
expect(result.overrides.provider).toBe("minimax");
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.1 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
it("ignores provider tokens when provider overrides are disabled", () => {
const policy: SpeechModelOverridePolicy = { ...fullPolicy, allowProvider: false };
const result = parseTtsDirectives("[[tts:provider=elevenlabs speed=1.2]]", policy, {
providers: [elevenlabs, minimax],
preferredProviderId: "minimax",
});
expect(result.overrides.provider).toBeUndefined();
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
});

View File

@@ -13,6 +13,7 @@ type ParseTtsDirectiveOptions = {
cfg?: OpenClawConfig;
providers?: readonly SpeechProviderPlugin[];
providerConfigs?: Record<string, SpeechProviderConfig>;
preferredProviderId?: string;
};
function buildProviderOrder(left: SpeechProviderPlugin, right: SpeechProviderPlugin): number {
@@ -38,6 +39,20 @@ function resolveDirectiveProviderConfig(
return options?.providerConfigs?.[provider.id];
}
function prioritizeProvider(
providers: readonly SpeechProviderPlugin[],
providerId: string | undefined,
): SpeechProviderPlugin[] {
if (!providerId) {
return [...providers];
}
const preferredProvider = providers.find((provider) => provider.id === providerId);
if (!preferredProvider) {
return [...providers];
}
return [preferredProvider, ...providers.filter((provider) => provider.id !== providerId)];
}
export function parseTtsDirectives(
text: string,
policy: SpeechModelOverridePolicy,
@@ -66,6 +81,37 @@ export function parseTtsDirectives(
cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => {
hasDirective = true;
const tokens = body.split(/\s+/).filter(Boolean);
let declaredProviderId: string | undefined;
if (policy.allowProvider) {
for (const token of tokens) {
const eqIndex = token.indexOf("=");
if (eqIndex === -1) {
continue;
}
const rawKey = token.slice(0, eqIndex).trim();
if (!rawKey || normalizeLowercaseStringOrEmpty(rawKey) !== "provider") {
continue;
}
const rawValue = token.slice(eqIndex + 1).trim();
if (!rawValue) {
continue;
}
const providerId = normalizeLowercaseStringOrEmpty(rawValue);
if (!providerId) {
warnings.push("invalid provider id");
continue;
}
declaredProviderId = providerId;
overrides.provider = providerId;
}
}
const orderedProviders = prioritizeProvider(
providers,
declaredProviderId ?? normalizeLowercaseStringOrEmpty(options?.preferredProviderId),
);
for (const token of tokens) {
const eqIndex = token.indexOf("=");
if (eqIndex === -1) {
@@ -78,19 +124,10 @@ export function parseTtsDirectives(
}
const key = normalizeLowercaseStringOrEmpty(rawKey);
if (key === "provider") {
if (policy.allowProvider) {
const providerId = normalizeLowercaseStringOrEmpty(rawValue);
if (providerId) {
overrides.provider = providerId;
} else {
warnings.push("invalid provider id");
}
}
continue;
}
let handled = false;
for (const provider of providers) {
for (const provider of orderedProviders) {
const parsed = provider.parseDirectiveToken?.({
key,
value: rawValue,
@@ -101,7 +138,6 @@ export function parseTtsDirectives(
if (!parsed?.handled) {
continue;
}
handled = true;
if (parsed.overrides) {
overrides.providerOverrides = {
...overrides.providerOverrides,
@@ -116,10 +152,6 @@ export function parseTtsDirectives(
}
break;
}
if (!handled) {
continue;
}
}
return "";
});