fix: unify assistant visible text sanitizers (#61729)

2026-04-17 04:01:05 +00:00 · 2026-04-06 14:38:43 +01:00
parent 980439b9e6
commit 712479eea1
6 changed files with 258 additions and 235 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
 - Agents/context overflow: combine oversized and aggregate tool-result recovery in one repair pass, and restore a total-context overflow backstop during tool loops so recoverable sessions retry instead of failing early. (#61651) Thanks @Takhoffman.
 - Gateway/containers: auto-bind to `0.0.0.0` during container startup for Docker and Podman compatibility, while keeping host-side status and doctor checks on the hardened loopback default when `gateway.bind` is unset. (#61818) Thanks @openperf.
 - TUI/status: route `/status` through the shared session-status command and move the old gateway-wide diagnostic summary to `/gateway-status` (`/gwstatus`). Thanks @vincentkoc.
+- Agents/history: use one shared assistant-visible sanitizer across embedded delivery and chat-history extraction so leaked `<tool_call>` and `<tool_result>` XML blocks stay hidden from user-facing replies. (#61729) Thanks @openperf.

 ## 2026.4.5

--- a/src/agents/pi-embedded-utils.ts
+++ b/src/agents/pi-embedded-utils.ts
@@ -6,230 +6,21 @@ import {
  parseAssistantTextSignature,
  type AssistantPhase,
 } from "../shared/chat-message-content.js";
-import { stripToolCallXmlTags } from "../shared/text/assistant-visible-text.js";
+import { sanitizeAssistantVisibleText } from "../shared/text/assistant-visible-text.js";
 import { stripReasoningTagsFromText } from "../shared/text/reasoning-tags.js";
 import { sanitizeUserFacingText } from "./pi-embedded-helpers.js";
 import { formatToolDetail, resolveToolDisplay } from "./tool-display.js";

+export {
+  stripDowngradedToolCallText,
+  stripMinimaxToolCallXml,
+} from "../shared/text/assistant-visible-text.js";
+export { stripModelSpecialTokens } from "../shared/text/model-special-tokens.js";
+
 export function isAssistantMessage(msg: AgentMessage | undefined): msg is AssistantMessage {
  return msg?.role === "assistant";
 }

-/**
- * Strip malformed Minimax tool invocations that leak into text content.
- * Minimax sometimes embeds tool calls as XML in text blocks instead of
- * proper structured tool calls. This removes:
- * - <invoke name="...">...</invoke> blocks
- * - </minimax:tool_call> closing tags
- */
-export function stripMinimaxToolCallXml(text: string): string {
-  if (!text) {
-    return text;
-  }
-  if (!/minimax:tool_call/i.test(text)) {
-    return text;
-  }
-
-  // Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
-  let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
-
-  // Remove stray minimax tool tags.
-  cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
-
-  return cleaned;
-}
-
-/**
- * Strip model control tokens leaked into assistant text output.
- *
- * Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens
- * (e.g. `<|assistant|>`, `<|tool_call_result_begin|>`, `<｜begin▁of▁sentence｜>`)
- * in their responses. These use the universal `<|...|>` convention (ASCII or
- * full-width pipe variants) and should never reach end users.
- *
- * This is a provider bug — no upstream fix tracked yet.
- * Remove this function when upstream providers stop leaking tokens.
- * @see https://github.com/openclaw/openclaw/issues/40020
- */
-// Match both ASCII pipe <|...|> and full-width pipe <｜...｜> (U+FF5C) variants.
-const MODEL_SPECIAL_TOKEN_RE = /<[|｜][^|｜]*[|｜]>/g;
-
-export function stripModelSpecialTokens(text: string): string {
-  if (!text) {
-    return text;
-  }
-  if (!MODEL_SPECIAL_TOKEN_RE.test(text)) {
-    return text;
-  }
-  MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
-  return text.replace(MODEL_SPECIAL_TOKEN_RE, " ").replace(/  +/g, " ").trim();
-}
-
-/**
- * Strip downgraded tool call text representations that leak into text content.
- * When replaying history to Gemini, tool calls without `thought_signature` are
- * downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should
- * not be shown to users.
- */
-export function stripDowngradedToolCallText(text: string): string {
-  if (!text) {
-    return text;
-  }
-  if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
-    return text;
-  }
-
-  const consumeJsonish = (
-    input: string,
-    start: number,
-    options?: { allowLeadingNewlines?: boolean },
-  ): number | null => {
-    const { allowLeadingNewlines = false } = options ?? {};
-    let index = start;
-    while (index < input.length) {
-      const ch = input[index];
-      if (ch === " " || ch === "\t") {
-        index += 1;
-        continue;
-      }
-      if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
-        index += 1;
-        continue;
-      }
-      break;
-    }
-    if (index >= input.length) {
-      return null;
-    }
-
-    const startChar = input[index];
-    if (startChar === "{" || startChar === "[") {
-      let depth = 0;
-      let inString = false;
-      let escape = false;
-      for (let i = index; i < input.length; i += 1) {
-        const ch = input[i];
-        if (inString) {
-          if (escape) {
-            escape = false;
-          } else if (ch === "\\") {
-            escape = true;
-          } else if (ch === '"') {
-            inString = false;
-          }
-          continue;
-        }
-        if (ch === '"') {
-          inString = true;
-          continue;
-        }
-        if (ch === "{" || ch === "[") {
-          depth += 1;
-          continue;
-        }
-        if (ch === "}" || ch === "]") {
-          depth -= 1;
-          if (depth === 0) {
-            return i + 1;
-          }
-        }
-      }
-      return null;
-    }
-
-    if (startChar === '"') {
-      let escape = false;
-      for (let i = index + 1; i < input.length; i += 1) {
-        const ch = input[i];
-        if (escape) {
-          escape = false;
-          continue;
-        }
-        if (ch === "\\") {
-          escape = true;
-          continue;
-        }
-        if (ch === '"') {
-          return i + 1;
-        }
-      }
-      return null;
-    }
-
-    let end = index;
-    while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
-      end += 1;
-    }
-    return end;
-  };
-
-  const stripToolCalls = (input: string): string => {
-    const markerRe = /\[Tool Call:[^\]]*\]/gi;
-    let result = "";
-    let cursor = 0;
-    for (const match of input.matchAll(markerRe)) {
-      const start = match.index ?? 0;
-      if (start < cursor) {
-        continue;
-      }
-      result += input.slice(cursor, start);
-      let index = start + match[0].length;
-      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
-        index += 1;
-      }
-      if (input[index] === "\r") {
-        index += 1;
-        if (input[index] === "\n") {
-          index += 1;
-        }
-      } else if (input[index] === "\n") {
-        index += 1;
-      }
-      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
-        index += 1;
-      }
-      if (input.slice(index, index + 9).toLowerCase() === "arguments") {
-        index += 9;
-        if (input[index] === ":") {
-          index += 1;
-        }
-        if (input[index] === " ") {
-          index += 1;
-        }
-        const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
-        if (end !== null) {
-          index = end;
-        }
-      }
-      if (
-        (input[index] === "\n" || input[index] === "\r") &&
-        (result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
-      ) {
-        if (input[index] === "\r") {
-          index += 1;
-        }
-        if (input[index] === "\n") {
-          index += 1;
-        }
-      }
-      cursor = index;
-    }
-    result += input.slice(cursor);
-    return result;
-  };
-
-  // Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
-  let cleaned = stripToolCalls(text);
-
-  // Remove [Tool Result for ID ...] blocks and their content.
-  cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
-
-  // Remove [Historical context: ...] markers (self-contained within brackets).
-  cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
-
-  return cleaned.trim();
-}
-
 /**
 * Strip thinking tags and their content from text.
 * This is a safety net for cases where the model outputs <think> tags
@@ -240,11 +31,7 @@ export function stripThinkingTagsFromText(text: string): string {
 }

 function sanitizeAssistantText(text: string): string {
-  return stripThinkingTagsFromText(
-    stripToolCallXmlTags(
-      stripDowngradedToolCallText(stripModelSpecialTokens(stripMinimaxToolCallXml(text))),
-    ),
-  ).trim();
+  return sanitizeAssistantVisibleText(text);
 }

 function finalizeAssistantExtraction(msg: AssistantMessage, extracted: string): string {
--- a/src/agents/tools/chat-history-text.ts
+++ b/src/agents/tools/chat-history-text.ts
@@ -1,12 +1,7 @@
 import { extractTextFromChatContent } from "../../shared/chat-content.js";
+import { sanitizeAssistantVisibleTextWithOptions } from "../../shared/text/assistant-visible-text.js";
 import { sanitizeUserFacingText } from "../pi-embedded-helpers.js";
-import {
-  extractAssistantVisibleText,
-  stripDowngradedToolCallText,
-  stripMinimaxToolCallXml,
-  stripModelSpecialTokens,
-  stripThinkingTagsFromText,
-} from "../pi-embedded-utils.js";
+import { extractAssistantVisibleText } from "../pi-embedded-utils.js";

 export function stripToolMessages(messages: unknown[]): unknown[] {
  return messages.filter((msg) => {
@@ -23,12 +18,7 @@ export function stripToolMessages(messages: unknown[]): unknown[] {
 * This ensures user-facing text doesn't leak internal tool representations.
 */
 export function sanitizeTextContent(text: string): string {
-  if (!text) {
-    return text;
-  }
-  return stripThinkingTagsFromText(
-    stripDowngradedToolCallText(stripModelSpecialTokens(stripMinimaxToolCallXml(text))),
-  );
+  return sanitizeAssistantVisibleTextWithOptions(text, { trim: "none" });
 }

 export function hasAssistantPhaseMetadata(message: unknown): boolean {
--- a/src/agents/tools/sessions.test.ts
+++ b/src/agents/tools/sessions.test.ts
@@ -156,6 +156,13 @@ describe("sanitizeTextContent", () => {
    expect(result).not.toContain("Tool Call");
  });

+  it("strips tool_result XML via the shared assistant-visible sanitizer", () => {
+    const input = 'Prefix\n<tool_result>{"output":"hidden"}</tool_result>\nSuffix';
+    const result = sanitizeTextContent(input).trim();
+    expect(result).toBe("Prefix\n\nSuffix");
+    expect(result).not.toContain("tool_result");
+  });
+
  it("strips thinking tags", () => {
    const input = "Before <think>secret</think> after";
    const result = sanitizeTextContent(input).trim();
--- a/src/shared/text/assistant-visible-text.test.ts
+++ b/src/shared/text/assistant-visible-text.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, it } from "vitest";
-import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
+import {
+  sanitizeAssistantVisibleText,
+  stripAssistantInternalScaffolding,
+} from "./assistant-visible-text.js";
 import { stripModelSpecialTokens } from "./model-special-tokens.js";

 describe("stripAssistantInternalScaffolding", () => {
@@ -393,3 +396,29 @@ describe("stripAssistantInternalScaffolding", () => {
    });
  });
 });
+
+describe("sanitizeAssistantVisibleText", () => {
+  it("strips minimax, tool XML, downgraded tool markers, and think tags in one pass", () => {
+    const input = [
+      '<invoke name="read">payload</invoke></minimax:tool_call>',
+      '<tool_result>{"output":"hidden"}</tool_result>',
+      "[Tool Call: read (ID: toolu_1)]",
+      'Arguments: {"path":"/tmp/x"}',
+      "<think>secret</think>",
+      "Visible answer",
+    ].join("\n");
+
+    expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
+  });
+
+  it("strips relevant-memories blocks on the canonical user-visible path", () => {
+    const input = [
+      "<relevant-memories>",
+      "internal note",
+      "</relevant-memories>",
+      "Visible answer",
+    ].join("\n");
+
+    expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
+  });
+});
--- a/src/shared/text/assistant-visible-text.ts
+++ b/src/shared/text/assistant-visible-text.ts
@@ -249,6 +249,186 @@ export function stripToolCallXmlTags(text: string): string {
  return result;
 }

+/**
+ * Strip malformed Minimax tool invocations that leak into text content.
+ * Minimax sometimes embeds tool calls as XML in text blocks instead of
+ * proper structured tool calls.
+ */
+export function stripMinimaxToolCallXml(text: string): string {
+  if (!text || !/minimax:tool_call/i.test(text)) {
+    return text;
+  }
+
+  // Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
+  let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
+
+  // Remove stray minimax tool tags.
+  cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
+
+  return cleaned;
+}
+
+/**
+ * Strip downgraded tool call text representations that leak into user-visible
+ * text content when replaying history across providers.
+ */
+export function stripDowngradedToolCallText(text: string): string {
+  if (!text) {
+    return text;
+  }
+  if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
+    return text;
+  }
+
+  const consumeJsonish = (
+    input: string,
+    start: number,
+    options?: { allowLeadingNewlines?: boolean },
+  ): number | null => {
+    const { allowLeadingNewlines = false } = options ?? {};
+    let index = start;
+    while (index < input.length) {
+      const ch = input[index];
+      if (ch === " " || ch === "\t") {
+        index += 1;
+        continue;
+      }
+      if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
+        index += 1;
+        continue;
+      }
+      break;
+    }
+    if (index >= input.length) {
+      return null;
+    }
+
+    const startChar = input[index];
+    if (startChar === "{" || startChar === "[") {
+      let depth = 0;
+      let inString = false;
+      let escape = false;
+      for (let idx = index; idx < input.length; idx += 1) {
+        const ch = input[idx];
+        if (inString) {
+          if (escape) {
+            escape = false;
+          } else if (ch === "\\") {
+            escape = true;
+          } else if (ch === '"') {
+            inString = false;
+          }
+          continue;
+        }
+        if (ch === '"') {
+          inString = true;
+          continue;
+        }
+        if (ch === "{" || ch === "[") {
+          depth += 1;
+        } else if (ch === "}" || ch === "]") {
+          depth -= 1;
+          if (depth === 0) {
+            return idx + 1;
+          }
+        }
+      }
+      return null;
+    }
+
+    if (startChar === '"') {
+      let escape = false;
+      for (let idx = index + 1; idx < input.length; idx += 1) {
+        const ch = input[idx];
+        if (escape) {
+          escape = false;
+          continue;
+        }
+        if (ch === "\\") {
+          escape = true;
+          continue;
+        }
+        if (ch === '"') {
+          return idx + 1;
+        }
+      }
+      return null;
+    }
+
+    let end = index;
+    while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
+      end += 1;
+    }
+    return end;
+  };
+
+  const stripToolCalls = (input: string): string => {
+    const toolCallRe = /\[Tool Call:[^\]]*\]/gi;
+    let result = "";
+    let cursor = 0;
+    for (const match of input.matchAll(toolCallRe)) {
+      const start = match.index ?? 0;
+      if (start < cursor) {
+        continue;
+      }
+      result += input.slice(cursor, start);
+      let index = start + match[0].length;
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input[index] === "\r") {
+        index += 1;
+        if (input[index] === "\n") {
+          index += 1;
+        }
+      } else if (input[index] === "\n") {
+        index += 1;
+      }
+      while (index < input.length && (input[index] === " " || input[index] === "\t")) {
+        index += 1;
+      }
+      if (input.slice(index, index + 9).toLowerCase() === "arguments") {
+        index += 9;
+        if (input[index] === ":") {
+          index += 1;
+        }
+        if (input[index] === " ") {
+          index += 1;
+        }
+        const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
+        if (end !== null) {
+          index = end;
+        }
+      }
+      if (
+        (input[index] === "\n" || input[index] === "\r") &&
+        (result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
+      ) {
+        if (input[index] === "\r") {
+          index += 1;
+        }
+        if (input[index] === "\n") {
+          index += 1;
+        }
+      }
+      cursor = index;
+    }
+    result += input.slice(cursor);
+    return result;
+  };
+
+  // Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
+  let cleaned = stripToolCalls(text);
+
+  // Remove [Tool Result for ID ...] blocks and their content.
+  cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
+
+  // Remove [Historical context: ...] markers (self-contained within brackets).
+  cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
+
+  return cleaned.trim();
+}
+
 function stripRelevantMemoriesTags(text: string): string {
  if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
    return text;
@@ -293,3 +473,32 @@ export function stripAssistantInternalScaffolding(text: string): string {
  const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
  return withoutSpecialTokens.trimStart();
 }
+
+/**
+ * Canonical user-visible assistant text sanitizer for delivery and history
+ * extraction paths. Keeps prose, removes internal scaffolding.
+ */
+export function sanitizeAssistantVisibleText(text: string): string {
+  return sanitizeAssistantVisibleTextWithOptions(text, { trim: "both" });
+}
+
+export function sanitizeAssistantVisibleTextWithOptions(
+  text: string,
+  options?: { trim?: "none" | "both" },
+): string {
+  if (!text) {
+    return text;
+  }
+  const trimMode = options?.trim ?? "both";
+
+  const withoutMinimaxToolXml = stripMinimaxToolCallXml(text);
+  const withoutSpecialTokens = stripModelSpecialTokens(withoutMinimaxToolXml);
+  const withoutMemories = stripRelevantMemoriesTags(withoutSpecialTokens);
+  const withoutToolCallXml = stripToolCallXmlTags(withoutMemories);
+  const withoutDowngradedToolText = stripDowngradedToolCallText(withoutToolCallXml);
+  const sanitized = stripReasoningTagsFromText(withoutDowngradedToolText, {
+    mode: "strict",
+    trim: trimMode,
+  });
+  return trimMode === "both" ? sanitized.trim() : sanitized;
+}