fix(tts): parse bare tags and ignore code examples

This commit is contained in:
Peter Steinberger
2026-04-25 04:13:09 +01:00
parent a126a9013d
commit b13545355d
2 changed files with 123 additions and 5 deletions

View File

@@ -166,4 +166,55 @@ describe("parseTtsDirectives provider-aware routing", () => {
expect(result.overrides.providerOverrides?.minimax).toEqual({ speed: 1.2 });
expect(result.overrides.providerOverrides?.elevenlabs).toBeUndefined();
});
it("accepts bare tts tags as a tagged-mode trigger", () => {
const result = parseTtsDirectives("[[tts]] read this aloud", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.hasDirective).toBe(true);
expect(result.cleanedText).toBe(" read this aloud");
expect(result.ttsText).toBeUndefined();
});
it("accepts plain tts blocks as speak-and-show text", () => {
const result = parseTtsDirectives("[[tts]]hello world[[/tts]]", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.hasDirective).toBe(true);
expect(result.cleanedText).toBe("hello world");
expect(result.ttsText).toBe("hello world");
});
it("strips orphan closing tts tags", () => {
const result = parseTtsDirectives("spoken content[[/tts:text]]", fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result.hasDirective).toBe(true);
expect(result.cleanedText).toBe("spoken content");
});
it("does not parse tts examples inside markdown code", () => {
const input = [
"Use `[[tts:text]]` for hidden speech.",
"",
"```",
"[[tts:provider=elevenlabs voice=alloy]]",
"```",
"",
"Then continue normally.",
].join("\n");
const result = parseTtsDirectives(input, fullPolicy, {
providers: [elevenlabs, minimax],
});
expect(result).toEqual({
cleanedText: input,
overrides: {},
warnings: [],
hasDirective: false,
});
});
});

View File

@@ -16,6 +16,11 @@ type ParseTtsDirectiveOptions = {
preferredProviderId?: string;
};
type TextRange = {
start: number;
end: number;
};
function buildProviderOrder(left: SpeechProviderPlugin, right: SpeechProviderPlugin): number {
const leftOrder = left.autoSelectOrder ?? Number.MAX_SAFE_INTEGER;
const rightOrder = right.autoSelectOrder ?? Number.MAX_SAFE_INTEGER;
@@ -53,6 +58,46 @@ function prioritizeProvider(
return [preferredProvider, ...providers.filter((provider) => provider.id !== providerId)];
}
function collectMarkdownCodeRanges(text: string): TextRange[] {
const ranges: TextRange[] = [];
const addMatches = (regex: RegExp) => {
for (const match of text.matchAll(regex)) {
if (match.index == null) {
continue;
}
ranges.push({ start: match.index, end: match.index + match[0].length });
}
};
addMatches(/```[\s\S]*?```/g);
addMatches(/~~~[\s\S]*?~~~/g);
addMatches(/^(?: {4}|\t).*(?:\n|$)/gm);
addMatches(/`+[^`\n]*`+/g);
return ranges.toSorted((left, right) => left.start - right.start);
}
function isInsideRange(index: number, ranges: readonly TextRange[]): boolean {
return ranges.some((range) => index >= range.start && index < range.end);
}
function replaceOutsideMarkdownCode(
text: string,
regex: RegExp,
replace: (match: string, captures: readonly string[]) => string,
): string {
const codeRanges = collectMarkdownCodeRanges(text);
return text.replace(regex, (...args: unknown[]) => {
const match = String(args[0]);
const offset = args.at(-2);
if (typeof offset === "number" && isInsideRange(offset, codeRanges)) {
return match;
}
const captures = args.slice(1, -2).map((capture) => String(capture));
return replace(match, captures);
});
}
export function parseTtsDirectives(
text: string,
policy: SpeechModelOverridePolicy,
@@ -62,7 +107,7 @@ export function parseTtsDirectives(
return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
}
if (!/\[\[tts:/iu.test(text)) {
if (!/\[\[\s*\/?\s*tts(?:\s*:|\s*\]\])/iu.test(text)) {
return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
}
@@ -76,8 +121,8 @@ export function parseTtsDirectives(
let cleanedText = text;
let hasDirective = false;
const blockRegex = /\[\[tts:text\]\]([\s\S]*?)\[\[\/tts:text\]\]/gi;
cleanedText = cleanedText.replace(blockRegex, (_match, inner: string) => {
const blockRegex = /\[\[\s*tts\s*:\s*text\s*\]\]([\s\S]*?)\[\[\s*\/\s*tts\s*:\s*text\s*\]\]/gi;
cleanedText = replaceOutsideMarkdownCode(cleanedText, blockRegex, (_match, [inner = ""]) => {
hasDirective = true;
if (policy.allowText && overrides.ttsText == null) {
overrides.ttsText = inner.trim();
@@ -85,8 +130,18 @@ export function parseTtsDirectives(
return "";
});
const directiveRegex = /\[\[tts:([^\]]+)\]\]/gi;
cleanedText = cleanedText.replace(directiveRegex, (_match, body: string) => {
const plainBlockRegex = /\[\[\s*tts\s*\]\]([\s\S]*?)\[\[\s*\/\s*tts\s*\]\]/gi;
cleanedText = replaceOutsideMarkdownCode(cleanedText, plainBlockRegex, (_match, [inner = ""]) => {
hasDirective = true;
const visible = inner.trim();
if (policy.allowText && overrides.ttsText == null) {
overrides.ttsText = visible;
}
return visible;
});
const directiveRegex = /\[\[\s*tts\s*:\s*([^\]]+)\]\]/gi;
cleanedText = replaceOutsideMarkdownCode(cleanedText, directiveRegex, (_match, [body = ""]) => {
hasDirective = true;
const tokens = body.split(/\s+/).filter(Boolean);
@@ -168,6 +223,18 @@ export function parseTtsDirectives(
return "";
});
const bareTagRegex = /\[\[\s*tts\s*\]\]/gi;
cleanedText = replaceOutsideMarkdownCode(cleanedText, bareTagRegex, () => {
hasDirective = true;
return "";
});
const closingTagRegex = /\[\[\s*\/\s*tts(?:\s*:\s*[^\]]*)?\]\]/gi;
cleanedText = replaceOutsideMarkdownCode(cleanedText, closingTagRegex, () => {
hasDirective = true;
return "";
});
return {
cleanedText,
ttsText: overrides.ttsText,