From a91baa16de69ac3d7f3eea76ed950f3110f4bbe4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 04:14:48 +0100 Subject: [PATCH] fix(tts): honor explicit directive providers --- CHANGELOG.md | 3 ++ docs/tools/tts.md | 4 ++ src/tts/directives.test.ts | 97 ++++++++++++++++++++++++++++++++++++-- src/tts/directives.ts | 47 +++++++++++++++--- src/tts/provider-types.ts | 1 + 5 files changed, 142 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e812e51a22..690cf9ac4af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,9 @@ Docs: https://docs.openclaw.ai - TTS: strip model-emitted TTS directives from streamed block text before channel delivery, including directives split across adjacent blocks, while preserving the accumulated raw reply for final-mode synthesis. Fixes #38937. +- TTS: keep explicit `provider=...` directive keys scoped to that provider and + warn on unsupported keys instead of letting another speech provider consume + overlapping keys. Fixes #60131. - ACP: send subagent and async-task completion wakes to external ACP harnesses as plain prompts instead of OpenClaw internal runtime-context envelopes, while keeping those envelopes out of ACP transcripts. diff --git a/docs/tools/tts.md b/docs/tools/tts.md index 600ed89da15..e9e7c385816 100644 --- a/docs/tools/tts.md +++ b/docs/tools/tts.md @@ -685,6 +685,10 @@ channel sees them, even when a directive is split across adjacent blocks. Final mode still parses the accumulated raw reply for TTS synthesis. `provider=...` directives are ignored unless `modelOverrides.allowProvider: true`. +When a reply declares `provider=...`, the other keys in that directive are +parsed only by that provider. Unsupported keys are stripped from visible text +and reported as TTS directive warnings instead of being routed to another +provider. Example reply payload: diff --git a/src/tts/directives.test.ts b/src/tts/directives.test.ts index 79ba1836aec..1e2afc1977f 100644 --- a/src/tts/directives.test.ts +++ b/src/tts/directives.test.ts @@ -11,10 +11,12 @@ function makeProvider( id: string, order: number, parse: (ctx: SpeechDirectiveTokenParseContext) => SpeechDirectiveTokenParseResult | undefined, + options?: { aliases?: string[] }, ): SpeechProviderPlugin { return { id, label: id, + aliases: options?.aliases, autoSelectOrder: order, parseDirectiveToken: parse, isConfigured: () => true, @@ -123,24 +125,111 @@ describe("parseTtsDirectives provider-aware routing", () => { expect(result.overrides.providerOverrides?.minimax).toBeUndefined(); }); - it("falls through when the preferred provider does not handle the key", () => { + it("does not fall through when the explicit provider does not handle the key", () => { const result = parseTtsDirectives("[[tts:provider=minimax style=0.4]]", fullPolicy, { providers: [elevenlabs, minimax], }); expect(result.overrides.provider).toBe("minimax"); - expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 }); + expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined(); expect(result.overrides.providerOverrides?.minimax).toBeUndefined(); + expect(result.warnings).toContain('unsupported minimax directive key "style"'); }); - it("routes mixed tokens independently in the same directive", () => { + it("keeps explicit-provider tokens scoped to the selected provider", () => { const result = parseTtsDirectives("[[tts:provider=minimax style=0.4 speed=1.2]]", fullPolicy, { providers: [elevenlabs, minimax], }); expect(result.overrides.provider).toBe("minimax"); expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 }); - expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ style: 0.4 }); + expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined(); + expect(result.warnings).toContain('unsupported minimax directive key "style"'); + }); + + it("does not route explicit provider tokens to another provider with overlapping keys", () => { + const openai = makeProvider("openai", 10, ({ key, value }) => { + if (key === "model") { + return { handled: true, overrides: { model: value } }; + } + return undefined; + }); + const elevenlabsModel = makeProvider("elevenlabs", 20, ({ key, value }) => { + if (key === "model") { + return { handled: true, overrides: { modelId: value } }; + } + return undefined; + }); + + const result = parseTtsDirectives("[[tts:provider=elevenlabs model=eleven_v3]]", fullPolicy, { + providers: [openai, elevenlabsModel], + }); + + expect(result.overrides.provider).toBe("elevenlabs"); + expect(result.overrides.providerOverrides?.elevenlabs).toEqual({ modelId: "eleven_v3" }); + expect(result.overrides.providerOverrides?.openai).toBeUndefined(); + expect(result.warnings).toEqual([]); + }); + + it("warns instead of routing prefixed tokens to another provider when provider is explicit", () => { + const result = parseTtsDirectives( + "[[tts:provider=elevenlabs openai_model=gpt-4o-mini-tts]]", + fullPolicy, + { providers: [elevenlabs, minimax] }, + ); + + expect(result.overrides.provider).toBe("elevenlabs"); + expect(result.overrides.providerOverrides).toBeUndefined(); + expect(result.warnings).toContain('unsupported elevenlabs directive key "openai_model"'); + }); + + it("passes the selected provider id to the chosen provider parser", () => { + let selectedProvider: string | undefined; + const selected = makeProvider("selected", 10, (ctx) => { + selectedProvider = ctx.selectedProvider; + return { handled: true, overrides: { voice: ctx.value } }; + }); + + const result = parseTtsDirectives("[[tts:provider=selected voice=test]]", fullPolicy, { + providers: [selected], + }); + + expect(result.overrides.providerOverrides?.selected).toEqual({ voice: "test" }); + expect(selectedProvider).toBe("selected"); + }); + + it("resolves explicit provider aliases without rewriting the requested provider value", () => { + const microsoft = makeProvider( + "microsoft", + 10, + ({ key, value }) => + key === "voice" ? { handled: true, overrides: { voice: value } } : undefined, + { aliases: ["edge"] }, + ); + + const result = parseTtsDirectives( + "[[tts:provider=edge voice=en-US-MichelleNeural]]", + fullPolicy, + { + providers: [microsoft], + }, + ); + + expect(result.overrides.provider).toBe("edge"); + expect(result.overrides.providerOverrides?.microsoft).toEqual({ + voice: "en-US-MichelleNeural", + }); + expect(result.warnings).toEqual([]); + }); + + it("warns once and drops non-provider tokens when the explicit provider is unknown", () => { + const result = parseTtsDirectives("[[tts:provider=missing speed=1.2 style=0.4]]", fullPolicy, { + providers: [elevenlabs, minimax], + }); + + expect(result.overrides.provider).toBe("missing"); + expect(result.overrides.providerOverrides).toBeUndefined(); + expect(result.warnings).toEqual(['unknown provider "missing"']); }); it("keeps last-wins provider semantics", () => { diff --git a/src/tts/directives.ts b/src/tts/directives.ts index aeb3df5da76..8cc00336900 100644 --- a/src/tts/directives.ts +++ b/src/tts/directives.ts @@ -64,6 +64,21 @@ function prioritizeProvider( return [preferredProvider, ...providers.filter((provider) => provider.id !== providerId)]; } +function resolveDirectiveProvider( + providers: readonly SpeechProviderPlugin[], + providerId: string, +): SpeechProviderPlugin | undefined { + const normalized = normalizeLowercaseStringOrEmpty(providerId); + if (!normalized) { + return undefined; + } + return providers.find( + (provider) => + provider.id === normalized || + provider.aliases?.some((alias) => normalizeLowercaseStringOrEmpty(alias) === normalized), + ); +} + function collectMarkdownCodeRanges(text: string): TextRange[] { const ranges: TextRange[] = []; const addMatches = (regex: RegExp) => { @@ -255,13 +270,26 @@ export function parseTtsDirectives( } } - let orderedProviders: SpeechProviderPlugin[] | undefined; - const getOrderedProviders = () => { - orderedProviders ??= prioritizeProvider( + let directiveProviders: SpeechProviderPlugin[] | undefined; + const getDirectiveProviders = () => { + if (directiveProviders) { + return directiveProviders; + } + if (declaredProviderId) { + const declaredProvider = resolveDirectiveProvider(getProviders(), declaredProviderId); + if (!declaredProvider) { + warnings.push(`unknown provider "${declaredProviderId}"`); + directiveProviders = []; + return directiveProviders; + } + directiveProviders = [declaredProvider]; + return directiveProviders; + } + directiveProviders = prioritizeProvider( getProviders(), - declaredProviderId ?? normalizeLowercaseStringOrEmpty(options?.preferredProviderId), + normalizeLowercaseStringOrEmpty(options?.preferredProviderId), ); - return orderedProviders; + return directiveProviders; }; for (const token of tokens) { @@ -279,11 +307,14 @@ export function parseTtsDirectives( continue; } - for (const provider of getOrderedProviders()) { + let handled = false; + const directiveProviders = getDirectiveProviders(); + for (const provider of directiveProviders) { const parsed = provider.parseDirectiveToken?.({ key, value: rawValue, policy, + selectedProvider: declaredProviderId ? provider.id : undefined, providerConfig: resolveDirectiveProviderConfig(provider, options), currentOverrides: overrides.providerOverrides?.[provider.id], }); @@ -302,8 +333,12 @@ export function parseTtsDirectives( if (parsed.warnings?.length) { warnings.push(...parsed.warnings); } + handled = true; break; } + if (!handled && declaredProviderId && directiveProviders.length > 0) { + warnings.push(`unsupported ${declaredProviderId} directive key "${key}"`); + } } return ""; }); diff --git a/src/tts/provider-types.ts b/src/tts/provider-types.ts index 39aa9eea8f4..48c010e64a3 100644 --- a/src/tts/provider-types.ts +++ b/src/tts/provider-types.ts @@ -96,6 +96,7 @@ export type SpeechDirectiveTokenParseContext = { key: string; value: string; policy: SpeechModelOverridePolicy; + selectedProvider?: SpeechProviderId; providerConfig?: SpeechProviderConfig; currentOverrides?: SpeechProviderOverrides; };