fix: unify assistant visible text sanitizers (#61729)

2026-04-27 00:52:05 +00:00 · 2026-04-06 14:38:43 +01:00
parent 980439b9e6
commit 712479eea1
6 changed files with 258 additions and 235 deletions
--- a/src/shared/text/assistant-visible-text.test.ts
+++ b/src/shared/text/assistant-visible-text.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, it } from "vitest";
-import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
+import {
+  sanitizeAssistantVisibleText,
+  stripAssistantInternalScaffolding,
+} from "./assistant-visible-text.js";
 import { stripModelSpecialTokens } from "./model-special-tokens.js";

 describe("stripAssistantInternalScaffolding", () => {
@@ -393,3 +396,29 @@ describe("stripAssistantInternalScaffolding", () => {
    });
  });
 });
+
+describe("sanitizeAssistantVisibleText", () => {
+  it("strips minimax, tool XML, downgraded tool markers, and think tags in one pass", () => {
+    const input = [
+      '<invoke name="read">payload</invoke></minimax:tool_call>',
+      '<tool_result>{"output":"hidden"}</tool_result>',
+      "[Tool Call: read (ID: toolu_1)]",
+      'Arguments: {"path":"/tmp/x"}',
+      "<think>secret</think>",
+      "Visible answer",
+    ].join("\n");
+
+    expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
+  });
+
+  it("strips relevant-memories blocks on the canonical user-visible path", () => {
+    const input = [
+      "<relevant-memories>",
+      "internal note",
+      "</relevant-memories>",
+      "Visible answer",
+    ].join("\n");
+
+    expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
+  });
+});
--- a/src/shared/text/assistant-visible-text.ts
+++ b/src/shared/text/assistant-visible-text.ts
@@ -249,6 +249,186 @@ export function stripToolCallXmlTags(text: string): string {
  return result;
 }

+/**
+ * Strip malformed Minimax tool invocations that leak into text content.
+ * Minimax sometimes embeds tool calls as XML in text blocks instead of
+ * proper structured tool calls.
+ */
+export function stripMinimaxToolCallXml(text: string): string {
+  if (!text || !/minimax:tool_call/i.test(text)) {
+    return text;
+  }
+
+  // Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
+  let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
+
+  // Remove stray minimax tool tags.
+  cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
+
+  return cleaned;
+}
+
+/**
+ * Strip downgraded tool call text representations that leak into user-visible
+ * text content when replaying history across providers.
+ */
+export function stripDowngradedToolCallText(text: string): string {
+  if (!text) {
+    return text;
+  }
+  if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
+    return text;
+  }
+
+  const consumeJsonish = (
+    input: string,
+    start: number,
+    options?: { allowLeadingNewlines?: boolean },
+  ): number | null => {
+    const { allowLeadingNewlines = false } = options ?? {};
+    let index = start;
+    while (index < input.length) {
+      const ch = input[index];
+      if (ch === " " || ch === "\t") {
+        index += 1;
+        continue;
+      }
+      if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
+        index += 1;
+        continue;
+      }
+      break;
+    }
+    if (index >= input.length) {
+      return null;
+    }
+
+    const startChar = input[index];
+    if (startChar === "{" || startChar === "[") {
+      let depth = 0;
+      let inString = false;
+      let escape = false;
+      for (let idx = index; idx < input.length; idx += 1) {
+        const ch = input[idx];
+        if (inString) {
+          if (escape) {
+            escape = false;
+          } else if (ch === "\\") {
+            escape = true;
+          } else if (ch === '"') {
+            inString = false;
+          }
+          continue;
+        }
+        if (ch === '"') {
+          inString = true;
+          continue;
+        }
+        if (ch === "{" || ch === "[") {
+          depth += 1;
+        } else if (ch === "}" || ch === "]") {
+          depth -= 1;
+          if (depth === 0) {
+            return idx + 1;
+          }
+        }
+      }
+      return null;
+    }
+
+    if (startChar === '"') {
+      let escape = false;
+      for (let idx = index + 1; idx < input.length; idx += 1) {
+        const ch = input[idx];
+        if (escape) {
+          escape = false;
+          continue;
+        }
+        if (ch === "\\") {
+          escape = true;
+          continue;
+        }
+        if (ch === '"') {
+          return idx + 1;
+        }
+      }
+      return null;
+    }
+
+    let end = index;
+    while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
+      end += 1;
+    }
+    return end;
+  };
+
+  const stripToolCalls = (input: string): string => {
+    const toolCallRe = /\[Tool Call:[^\]]*\]/gi;
+    let result = "";
+    let cursor = 0;
+    for (const match of input.matchAll(toolCallRe)) {
+      const start = match.index ?? 0;
+      if (start < cursor) {
+        continue;
+      }
+      result += input.slice(cursor, start);
+      let index = start + match[0].length;
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input[index] === "\r") {
+        index += 1;
+        if (input[index] === "\n") {
+          index += 1;
+        }
+      } else if (input[index] === "\n") {
+        index += 1;
+      }
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input.slice(index, index + 9).toLowerCase() === "arguments") {
+        index += 9;
+        if (input[index] === ":") {
+          index += 1;
+        }
+        if (input[index] === " ") {
+          index += 1;
+        }
+        const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
+        if (end !== null) {
+          index = end;
+        }
+      }
+      if (
+        (input[index] === "\n" || input[index] === "\r") &&
+        (result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
+      ) {
+        if (input[index] === "\r") {
+          index += 1;
+        }
+        if (input[index] === "\n") {
+          index += 1;
+        }
+      }
+      cursor = index;
+    }
+    result += input.slice(cursor);
+    return result;
+  };
+
+  // Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
+  let cleaned = stripToolCalls(text);
+
+  // Remove [Tool Result for ID ...] blocks and their content.
+  cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
+
+  // Remove [Historical context: ...] markers (self-contained within brackets).
+  cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
+
+  return cleaned.trim();
+}
+
 function stripRelevantMemoriesTags(text: string): string {
  if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
    return text;
@@ -293,3 +473,32 @@ export function stripAssistantInternalScaffolding(text: string): string {
  const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
  return withoutSpecialTokens.trimStart();
 }
+
+/**
+ * Canonical user-visible assistant text sanitizer for delivery and history
+ * extraction paths. Keeps prose, removes internal scaffolding.
+ */
+export function sanitizeAssistantVisibleText(text: string): string {
+  return sanitizeAssistantVisibleTextWithOptions(text, { trim: "both" });
+}
+
+export function sanitizeAssistantVisibleTextWithOptions(
+  text: string,
+  options?: { trim?: "none" | "both" },
+): string {
+  if (!text) {
+    return text;
+  }
+  const trimMode = options?.trim ?? "both";
+
+  const withoutMinimaxToolXml = stripMinimaxToolCallXml(text);
+  const withoutSpecialTokens = stripModelSpecialTokens(withoutMinimaxToolXml);
+  const withoutMemories = stripRelevantMemoriesTags(withoutSpecialTokens);
+  const withoutToolCallXml = stripToolCallXmlTags(withoutMemories);
+  const withoutDowngradedToolText = stripDowngradedToolCallText(withoutToolCallXml);
+  const sanitized = stripReasoningTagsFromText(withoutDowngradedToolText, {
+    mode: "strict",
+    trim: trimMode,
+  });
+  return trimMode === "both" ? sanitized.trim() : sanitized;
+}