mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:10:44 +00:00
fix: register bundled TTS providers and route overrides correctly (#62846) (thanks @stainlu)
* fix(microsoft,elevenlabs): add enabledByDefault so speech providers register at runtime * fix(tts): route generic directive tokens to the explicitly declared provider Addresses the P2 Codex review on #62846 that flagged auto-enabling ElevenLabs as a product regression for MiniMax users. Both providers claim the generic `speed` token, and parseTtsDirectives walked providers in autoSelectOrder with first-match-wins, so inputs like `[[tts:provider=minimax speed=1.2]]` silently routed speed to providerOverrides.elevenlabs once elevenlabs participated in every parse pass. The parser now pre-scans for `provider=` (honoring legacy last-wins semantics) and routes generic tokens with the declared provider tried first, falling back to autoSelectOrder when it doesn't handle the key. Token order inside the directive no longer matters: `speed=1.2` before or after `provider=minimax` both resolve to MiniMax. Adds a regression test suite covering the exact ElevenLabs/MiniMax speed collision plus fallback, mixed-token, last-wins, and allowProvider-disabled cases. parseTtsDirectives had no prior test coverage. * fix(tts): prefer active provider for generic directives * fix: register bundled TTS providers safely (#62846) (thanks @stainlu) * fix: use exported TTS SDK seam (#62846) (thanks @stainlu) --------- Co-authored-by: Ayaan Zaidi <hi@obviy.us>
This commit is contained in:
@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Ollama/chat: strip the `ollama/` provider prefix from Ollama chat request model ids so configured refs like `ollama/qwen3:14b-q8_0` stop 404ing against the Ollama API. (#67457) Thanks @suboss87.
|
||||
- QA/Matrix: split the private QA lab runtime into smaller tested modules, add Matrix media contract coverage for image understanding and generated-image delivery, and update the memory-dreaming QA sweep to assert the separate phase-report layout. (#67430) Thanks @gumadeiras.
|
||||
- Agents/tools: resolve non-workspace host tilde paths against the OS home directory and keep edit recovery aligned with that same path target, so `~/...` host edit/write operations stop failing or reading back the wrong file when `OPENCLAW_HOME` differs. (#62804) Thanks @stainlu.
|
||||
- Speech/TTS: auto-enable the bundled Microsoft and ElevenLabs speech providers, and route generic TTS directive tokens through the explicit or active provider first so overrides like `[[tts:speed=1.2]]` stop silently landing on the wrong provider. (#62846) Thanks @stainlu.
|
||||
|
||||
## 2026.4.15-beta.1
|
||||
|
||||
|
||||
@@ -5,9 +5,14 @@ import path from "node:path";
|
||||
import type { Readable } from "node:stream";
|
||||
import { ChannelType, type Client, ReadyListener } from "@buape/carbon";
|
||||
import type { VoicePlugin } from "@buape/carbon/voice";
|
||||
import { resolveAgentDir } from "openclaw/plugin-sdk/agent-runtime";
|
||||
import { agentCommandFromIngress } from "openclaw/plugin-sdk/agent-runtime";
|
||||
import { resolveTtsConfig, type ResolvedTtsConfig } from "openclaw/plugin-sdk/agent-runtime";
|
||||
import {
|
||||
agentCommandFromIngress,
|
||||
getTtsProvider,
|
||||
resolveAgentDir,
|
||||
resolveTtsConfig,
|
||||
resolveTtsPrefsPath,
|
||||
type ResolvedTtsConfig,
|
||||
} from "openclaw/plugin-sdk/agent-runtime";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import type { DiscordAccountConfig, TtsConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
|
||||
@@ -809,6 +814,7 @@ export class DiscordVoiceManager {
|
||||
const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides, {
|
||||
cfg: ttsCfg,
|
||||
providerConfigs: ttsConfig.providerConfigs,
|
||||
preferredProviderId: getTtsProvider(ttsConfig, resolveTtsPrefsPath(ttsConfig)),
|
||||
});
|
||||
const rawSpeakText = directive.overrides.ttsText ?? directive.cleanedText.trim();
|
||||
const speakText = sanitizeVoiceReplyTextForSpeech(rawSpeakText, speaker.label);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"id": "elevenlabs",
|
||||
"enabledByDefault": true,
|
||||
"contracts": {
|
||||
"speechProviders": ["elevenlabs"]
|
||||
},
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{
|
||||
"id": "microsoft",
|
||||
"enabledByDefault": true,
|
||||
"contracts": {
|
||||
"speechProviders": ["microsoft"]
|
||||
},
|
||||
|
||||
@@ -1036,12 +1036,14 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
return params.payload;
|
||||
}
|
||||
const config = resolveTtsConfig(params.cfg);
|
||||
const activeProvider = getTtsProvider(config, prefsPath);
|
||||
|
||||
const reply = resolveSendableOutboundReplyParts(params.payload);
|
||||
const text = reply.text;
|
||||
const directives = parseTtsDirectives(text, config.modelOverrides, {
|
||||
cfg: params.cfg,
|
||||
providerConfigs: config.providerConfigs,
|
||||
preferredProviderId: activeProvider,
|
||||
});
|
||||
if (directives.warnings.length > 0) {
|
||||
logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`);
|
||||
@@ -1049,9 +1051,8 @@ export async function maybeApplyTtsToPayload(params: {
|
||||
|
||||
if (isVerbose()) {
|
||||
const effectiveProvider = directives.overrides?.provider
|
||||
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ??
|
||||
getTtsProvider(config, prefsPath))
|
||||
: getTtsProvider(config, prefsPath);
|
||||
? (canonicalizeSpeechProviderId(directives.overrides.provider, params.cfg) ?? activeProvider)
|
||||
: activeProvider;
|
||||
logVerbose(
|
||||
`TTS: auto mode enabled (${autoMode}), channel=${params.channel}, selected provider=${effectiveProvider}, config.provider=${config.provider}, config.providerSource=${config.providerSource}`,
|
||||
);
|
||||
|
||||
147
src/tts/directives.test.ts
Normal file
147
src/tts/directives.test.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { SpeechProviderPlugin } from "../plugins/types.js";
|
||||
import { parseTtsDirectives } from "./directives.js";
|
||||
import type {
|
||||
SpeechDirectiveTokenParseContext,
|
||||
SpeechDirectiveTokenParseResult,
|
||||
SpeechModelOverridePolicy,
|
||||
} from "./provider-types.js";
|
||||
|
||||
function makeProvider(
|
||||
id: string,
|
||||
order: number,
|
||||
parse: (ctx: SpeechDirectiveTokenParseContext) => SpeechDirectiveTokenParseResult | undefined,
|
||||
): SpeechProviderPlugin {
|
||||
return {
|
||||
id,
|
||||
label: id,
|
||||
autoSelectOrder: order,
|
||||
parseDirectiveToken: parse,
|
||||
isConfigured: () => true,
|
||||
synthesize: async () => ({
|
||||
audioBuffer: Buffer.alloc(0),
|
||||
outputFormat: "mp3",
|
||||
fileExtension: ".mp3",
|
||||
voiceCompatible: false,
|
||||
}),
|
||||
} as SpeechProviderPlugin;
|
||||
}
|
||||
|
||||
const elevenlabs = makeProvider("elevenlabs", 10, ({ key, value }) => {
|
||||
if (key === "speed") {
|
||||
return { handled: true, overrides: { speed: Number(value) } };
|
||||
}
|
||||
if (key === "style") {
|
||||
return { handled: true, overrides: { style: Number(value) } };
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
|
||||
const minimax = makeProvider("minimax", 20, ({ key, value }) => {
|
||||
if (key === "speed") {
|
||||
return { handled: true, overrides: { speed: Number(value) } };
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
|
||||
const fullPolicy: SpeechModelOverridePolicy = {
|
||||
enabled: true,
|
||||
allowText: true,
|
||||
allowProvider: true,
|
||||
allowVoice: true,
|
||||
allowModelId: true,
|
||||
allowVoiceSettings: true,
|
||||
allowNormalization: true,
|
||||
allowSeed: true,
|
||||
};
|
||||
|
||||
describe("parseTtsDirectives provider-aware routing", () => {
|
||||
it("routes generic speed to the explicitly declared provider", () => {
|
||||
const result = parseTtsDirectives(
|
||||
"hello [[tts:provider=minimax speed=1.2]] world",
|
||||
fullPolicy,
|
||||
{
|
||||
providers: [elevenlabs, minimax],
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.overrides.provider).toBe("minimax");
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
|
||||
});
|
||||
|
||||
it("routes correctly when provider appears after the generic token", () => {
|
||||
const result = parseTtsDirectives("[[tts:speed=1.2 provider=minimax]] hi", fullPolicy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBe("minimax");
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
|
||||
});
|
||||
|
||||
it("routes to the preferred provider when no provider token is declared", () => {
|
||||
const result = parseTtsDirectives("[[tts:speed=1.5]]", fullPolicy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
preferredProviderId: "minimax",
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBeUndefined();
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.5 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
|
||||
});
|
||||
|
||||
it("falls back to autoSelectOrder when no provider hint is available", () => {
|
||||
const result = parseTtsDirectives("[[tts:speed=1.5]]", fullPolicy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBeUndefined();
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ speed: 1.5 });
|
||||
expect(result.overrides.providerOverrides?.minimax).toBeUndefined();
|
||||
});
|
||||
|
||||
it("falls through when the preferred provider does not handle the key", () => {
|
||||
const result = parseTtsDirectives("[[tts:provider=minimax style=0.4]]", fullPolicy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBe("minimax");
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 });
|
||||
expect(result.overrides.providerOverrides?.minimax).toBeUndefined();
|
||||
});
|
||||
|
||||
it("routes mixed tokens independently in the same directive", () => {
|
||||
const result = parseTtsDirectives("[[tts:provider=minimax style=0.4 speed=1.2]]", fullPolicy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBe("minimax");
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 });
|
||||
});
|
||||
|
||||
it("keeps last-wins provider semantics", () => {
|
||||
const result = parseTtsDirectives(
|
||||
"[[tts:provider=elevenlabs provider=minimax speed=1.1]]",
|
||||
fullPolicy,
|
||||
{ providers: [elevenlabs, minimax] },
|
||||
);
|
||||
|
||||
expect(result.overrides.provider).toBe("minimax");
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.1 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
|
||||
});
|
||||
|
||||
it("ignores provider tokens when provider overrides are disabled", () => {
|
||||
const policy: SpeechModelOverridePolicy = { ...fullPolicy, allowProvider: false };
|
||||
const result = parseTtsDirectives("[[tts:provider=elevenlabs speed=1.2]]", policy, {
|
||||
providers: [elevenlabs, minimax],
|
||||
preferredProviderId: "minimax",
|
||||
});
|
||||
|
||||
expect(result.overrides.provider).toBeUndefined();
|
||||
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
|
||||
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -13,6 +13,7 @@ type ParseTtsDirectiveOptions = {
|
||||
cfg?: OpenClawConfig;
|
||||
providers?: readonly SpeechProviderPlugin[];
|
||||
providerConfigs?: Record<string, SpeechProviderConfig>;
|
||||
preferredProviderId?: string;
|
||||
};
|
||||
|
||||
function buildProviderOrder(left: SpeechProviderPlugin, right: SpeechProviderPlugin): number {
|
||||
@@ -38,6 +39,20 @@ function resolveDirectiveProviderConfig(
|
||||
return options?.providerConfigs?.[provider.id];
|
||||
}
|
||||
|
||||
function prioritizeProvider(
|
||||
providers: readonly SpeechProviderPlugin[],
|
||||
providerId: string | undefined,
|
||||
): SpeechProviderPlugin[] {
|
||||
if (!providerId) {
|
||||
return [...providers];
|
||||
}
|
||||
const preferredProvider = providers.find((provider) => provider.id === providerId);
|
||||
if (!preferredProvider) {
|
||||
return [...providers];
|
||||
}
|
||||
return [preferredProvider, ...providers.filter((provider) => provider.id !== providerId)];
|
||||
}
|
||||
|
||||
export function parseTtsDirectives(
|
||||
text: string,
|
||||
policy: SpeechModelOverridePolicy,
|
||||
@@ -66,6 +81,37 @@ export function parseTtsDirectives(
|
||||
cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => {
|
||||
hasDirective = true;
|
||||
const tokens = body.split(/\s+/).filter(Boolean);
|
||||
|
||||
let declaredProviderId: string | undefined;
|
||||
if (policy.allowProvider) {
|
||||
for (const token of tokens) {
|
||||
const eqIndex = token.indexOf("=");
|
||||
if (eqIndex === -1) {
|
||||
continue;
|
||||
}
|
||||
const rawKey = token.slice(0, eqIndex).trim();
|
||||
if (!rawKey || normalizeLowercaseStringOrEmpty(rawKey) !== "provider") {
|
||||
continue;
|
||||
}
|
||||
const rawValue = token.slice(eqIndex + 1).trim();
|
||||
if (!rawValue) {
|
||||
continue;
|
||||
}
|
||||
const providerId = normalizeLowercaseStringOrEmpty(rawValue);
|
||||
if (!providerId) {
|
||||
warnings.push("invalid provider id");
|
||||
continue;
|
||||
}
|
||||
declaredProviderId = providerId;
|
||||
overrides.provider = providerId;
|
||||
}
|
||||
}
|
||||
|
||||
const orderedProviders = prioritizeProvider(
|
||||
providers,
|
||||
declaredProviderId ?? normalizeLowercaseStringOrEmpty(options?.preferredProviderId),
|
||||
);
|
||||
|
||||
for (const token of tokens) {
|
||||
const eqIndex = token.indexOf("=");
|
||||
if (eqIndex === -1) {
|
||||
@@ -78,19 +124,10 @@ export function parseTtsDirectives(
|
||||
}
|
||||
const key = normalizeLowercaseStringOrEmpty(rawKey);
|
||||
if (key === "provider") {
|
||||
if (policy.allowProvider) {
|
||||
const providerId = normalizeLowercaseStringOrEmpty(rawValue);
|
||||
if (providerId) {
|
||||
overrides.provider = providerId;
|
||||
} else {
|
||||
warnings.push("invalid provider id");
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let handled = false;
|
||||
for (const provider of providers) {
|
||||
for (const provider of orderedProviders) {
|
||||
const parsed = provider.parseDirectiveToken?.({
|
||||
key,
|
||||
value: rawValue,
|
||||
@@ -101,7 +138,6 @@ export function parseTtsDirectives(
|
||||
if (!parsed?.handled) {
|
||||
continue;
|
||||
}
|
||||
handled = true;
|
||||
if (parsed.overrides) {
|
||||
overrides.providerOverrides = {
|
||||
...overrides.providerOverrides,
|
||||
@@ -116,10 +152,6 @@ export function parseTtsDirectives(
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!handled) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user