From b13545355d42701a1b2a5838b18101777c1bd808 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 04:13:09 +0100 Subject: [PATCH] fix(tts): parse bare tags and ignore code examples --- src/tts/directives.test.ts | 51 +++++++++++++++++++++++++ src/tts/directives.ts | 77 +++++++++++++++++++++++++++++++++++--- 2 files changed, 123 insertions(+), 5 deletions(-) diff --git a/src/tts/directives.test.ts b/src/tts/directives.test.ts index cee0ef6902a..f111f176397 100644 --- a/src/tts/directives.test.ts +++ b/src/tts/directives.test.ts @@ -166,4 +166,55 @@ describe("parseTtsDirectives provider-aware routing", () => { expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 }); expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined(); }); + + it("accepts bare tts tags as a tagged-mode trigger", () => { + const result = parseTtsDirectives("[[tts]] read this aloud", fullPolicy, { + providers: [elevenlabs, minimax], + }); + + expect(result.hasDirective).toBe(true); + expect(result.cleanedText).toBe(" read this aloud"); + expect(result.ttsText).toBeUndefined(); + }); + + it("accepts plain tts blocks as speak-and-show text", () => { + const result = parseTtsDirectives("[[tts]]hello world[[/tts]]", fullPolicy, { + providers: [elevenlabs, minimax], + }); + + expect(result.hasDirective).toBe(true); + expect(result.cleanedText).toBe("hello world"); + expect(result.ttsText).toBe("hello world"); + }); + + it("strips orphan closing tts tags", () => { + const result = parseTtsDirectives("spoken content[[/tts:text]]", fullPolicy, { + providers: [elevenlabs, minimax], + }); + + expect(result.hasDirective).toBe(true); + expect(result.cleanedText).toBe("spoken content"); + }); + + it("does not parse tts examples inside markdown code", () => { + const input = [ + "Use `[[tts:text]]` for hidden speech.", + "", + "```", + "[[tts:provider=elevenlabs voice=alloy]]", + "```", + "", + "Then continue normally.", + ].join("\n"); + const result = parseTtsDirectives(input, fullPolicy, { + providers: [elevenlabs, minimax], + }); + + expect(result).toEqual({ + cleanedText: input, + overrides: {}, + warnings: [], + hasDirective: false, + }); + }); }); diff --git a/src/tts/directives.ts b/src/tts/directives.ts index c572cde07e9..11b9aad0c79 100644 --- a/src/tts/directives.ts +++ b/src/tts/directives.ts @@ -16,6 +16,11 @@ type ParseTtsDirectiveOptions = { preferredProviderId?: string; }; +type TextRange = { + start: number; + end: number; +}; + function buildProviderOrder(left: SpeechProviderPlugin, right: SpeechProviderPlugin): number { const leftOrder = left.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; const rightOrder = right.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; @@ -53,6 +58,46 @@ function prioritizeProvider( return [preferredProvider, ...providers.filter((provider) => provider.id !== providerId)]; } +function collectMarkdownCodeRanges(text: string): TextRange[] { + const ranges: TextRange[] = []; + const addMatches = (regex: RegExp) => { + for (const match of text.matchAll(regex)) { + if (match.index == null) { + continue; + } + ranges.push({ start: match.index, end: match.index + match[0].length }); + } + }; + + addMatches(/```[\s\S]*?```/g); + addMatches(/~~~[\s\S]*?~~~/g); + addMatches(/^(?: {4}|\t).*(?:\n|$)/gm); + addMatches(/`+[^`\n]*`+/g); + + return ranges.toSorted((left, right) => left.start - right.start); +} + +function isInsideRange(index: number, ranges: readonly TextRange[]): boolean { + return ranges.some((range) => index >= range.start && index < range.end); +} + +function replaceOutsideMarkdownCode( + text: string, + regex: RegExp, + replace: (match: string, captures: readonly string[]) => string, +): string { + const codeRanges = collectMarkdownCodeRanges(text); + return text.replace(regex, (...args: unknown[]) => { + const match = String(args[0]); + const offset = args.at(-2); + if (typeof offset === "number" && isInsideRange(offset, codeRanges)) { + return match; + } + const captures = args.slice(1, -2).map((capture) => String(capture)); + return replace(match, captures); + }); +} + export function parseTtsDirectives( text: string, policy: SpeechModelOverridePolicy, @@ -62,7 +107,7 @@ export function parseTtsDirectives( return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false }; } - if (!/\[\[tts:/iu.test(text)) { + if (!/\[\[\s*\/?\s*tts(?:\s*:|\s*\]\])/iu.test(text)) { return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false }; } @@ -76,8 +121,8 @@ export function parseTtsDirectives( let cleanedText = text; let hasDirective = false; - const blockRegex = /\[\[tts:text\]\]([\s\S]*?)\[\[\/tts:text\]\]/gi; - cleanedText = cleanedText.replace(blockRegex, (_match, inner: string) => { + const blockRegex = /\[\[\s*tts\s*:\s*text\s*\]\]([\s\S]*?)\[\[\s*\/\s*tts\s*:\s*text\s*\]\]/gi; + cleanedText = replaceOutsideMarkdownCode(cleanedText, blockRegex, (_match, [inner = ""]) => { hasDirective = true; if (policy.allowText && overrides.ttsText == null) { overrides.ttsText = inner.trim(); @@ -85,8 +130,18 @@ export function parseTtsDirectives( return ""; }); - const directiveRegex = /\[\[tts:([^\]]+)\]\]/gi; - cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => { + const plainBlockRegex = /\[\[\s*tts\s*\]\]([\s\S]*?)\[\[\s*\/\s*tts\s*\]\]/gi; + cleanedText = replaceOutsideMarkdownCode(cleanedText, plainBlockRegex, (_match, [inner = ""]) => { + hasDirective = true; + const visible = inner.trim(); + if (policy.allowText && overrides.ttsText == null) { + overrides.ttsText = visible; + } + return visible; + }); + + const directiveRegex = /\[\[\s*tts\s*:\s*([^\]]+)\]\]/gi; + cleanedText = replaceOutsideMarkdownCode(cleanedText, directiveRegex, (_match, [body = ""]) => { hasDirective = true; const tokens = body.split(/\s+/).filter(Boolean); @@ -168,6 +223,18 @@ export function parseTtsDirectives( return ""; }); + const bareTagRegex = /\[\[\s*tts\s*\]\]/gi; + cleanedText = replaceOutsideMarkdownCode(cleanedText, bareTagRegex, () => { + hasDirective = true; + return ""; + }); + + const closingTagRegex = /\[\[\s*\/\s*tts(?:\s*:\s*[^\]]*)?\]\]/gi; + cleanedText = replaceOutsideMarkdownCode(cleanedText, closingTagRegex, () => { + hasDirective = true; + return ""; + }); + return { cleanedText, ttsText: overrides.ttsText,