From 0020a327b9842dc73862869cfbe6a43e402f14b5 Mon Sep 17 00:00:00 2001 From: zqchris Date: Mon, 20 Apr 2026 19:24:16 +0800 Subject: [PATCH] fix(agents): defuse unicode-whitespace MEDIA lines --- src/agents/tools/tts-tool.test.ts | 17 +++++++++++++++++ src/agents/tools/tts-tool.ts | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts index b057a7a2210..18786c29c57 100644 --- a/src/agents/tools/tts-tool.test.ts +++ b/src/agents/tools/tts-tool.test.ts @@ -81,6 +81,23 @@ describe("createTtsTool", () => { expect(rendered).toContain("[\u2060[audio_as_voice]]"); }); + it("defuses MEDIA lines with non-ASCII leading whitespace", async () => { + textToSpeechSpy.mockResolvedValue({ + success: true, + audioPath: "/tmp/reply.opus", + provider: "test", + voiceCompatible: true, + }); + + const spoken = "line1\n\u00A0MEDIA:/tmp/secret.png"; + const tool = createTtsTool(); + const result = await tool.execute("call-1", { text: spoken }); + + const rendered = (result.content as Array<{ type: string; text: string }>)[0].text; + expect(rendered).toContain("\u00A0\u2060MEDIA:/tmp/secret.png"); + expect(rendered).not.toMatch(/^\u00A0MEDIA:/m); + }); + it("defuses fenced-code delimiters embedded in the spoken text", async () => { textToSpeechSpy.mockResolvedValue({ success: true, diff --git a/src/agents/tools/tts-tool.ts b/src/agents/tools/tts-tool.ts index 0787fb9f0b7..1effdb065f9 100644 --- a/src/agents/tools/tts-tool.ts +++ b/src/agents/tools/tts-tool.ts @@ -26,7 +26,7 @@ const TtsToolSchema = Type.Object({ */ function sanitizeTranscriptForToolContent(text: string): string { return text - .replace(/^([ \t]*)MEDIA:/gim, "$1\u2060MEDIA:") + .replace(/^([^\S\r\n]*)MEDIA:/gim, "$1\u2060MEDIA:") .replace(/\[\[/g, "[\u2060[") .replace(/^([ \t]*)(`{3,})/gm, (_match, indent: string, fence: string) => { const [first = "", ...rest] = fence;