fix: unify assistant visible text sanitizers (#61729)

This commit is contained in:
Peter Steinberger
2026-04-06 14:38:43 +01:00
parent 980439b9e6
commit 712479eea1
6 changed files with 258 additions and 235 deletions

View File

@@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai
- Agents/context overflow: combine oversized and aggregate tool-result recovery in one repair pass, and restore a total-context overflow backstop during tool loops so recoverable sessions retry instead of failing early. (#61651) Thanks @Takhoffman.
- Gateway/containers: auto-bind to `0.0.0.0` during container startup for Docker and Podman compatibility, while keeping host-side status and doctor checks on the hardened loopback default when `gateway.bind` is unset. (#61818) Thanks @openperf.
- TUI/status: route `/status` through the shared session-status command and move the old gateway-wide diagnostic summary to `/gateway-status` (`/gwstatus`). Thanks @vincentkoc.
- Agents/history: use one shared assistant-visible sanitizer across embedded delivery and chat-history extraction so leaked `<tool_call>` and `<tool_result>` XML blocks stay hidden from user-facing replies. (#61729) Thanks @openperf.
## 2026.4.5

View File

@@ -6,230 +6,21 @@ import {
parseAssistantTextSignature,
type AssistantPhase,
} from "../shared/chat-message-content.js";
import { stripToolCallXmlTags } from "../shared/text/assistant-visible-text.js";
import { sanitizeAssistantVisibleText } from "../shared/text/assistant-visible-text.js";
import { stripReasoningTagsFromText } from "../shared/text/reasoning-tags.js";
import { sanitizeUserFacingText } from "./pi-embedded-helpers.js";
import { formatToolDetail, resolveToolDisplay } from "./tool-display.js";
export {
stripDowngradedToolCallText,
stripMinimaxToolCallXml,
} from "../shared/text/assistant-visible-text.js";
export { stripModelSpecialTokens } from "../shared/text/model-special-tokens.js";
export function isAssistantMessage(msg: AgentMessage | undefined): msg is AssistantMessage {
return msg?.role === "assistant";
}
/**
* Strip malformed Minimax tool invocations that leak into text content.
* Minimax sometimes embeds tool calls as XML in text blocks instead of
* proper structured tool calls. This removes:
* - <invoke name="...">...</invoke> blocks
* - </minimax:tool_call> closing tags
*/
export function stripMinimaxToolCallXml(text: string): string {
if (!text) {
return text;
}
if (!/minimax:tool_call/i.test(text)) {
return text;
}
// Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
// Remove stray minimax tool tags.
cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
return cleaned;
}
/**
* Strip model control tokens leaked into assistant text output.
*
* Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens
* (e.g. `<|assistant|>`, `<|tool_call_result_begin|>`, `<begin▁of▁sentence>`)
* in their responses. These use the universal `<|...|>` convention (ASCII or
* full-width pipe variants) and should never reach end users.
*
* This is a provider bug — no upstream fix tracked yet.
* Remove this function when upstream providers stop leaking tokens.
* @see https://github.com/openclaw/openclaw/issues/40020
*/
// Match both ASCII pipe <|...|> and full-width pipe <...> (U+FF5C) variants.
const MODEL_SPECIAL_TOKEN_RE = /<[|][^|]*[|]>/g;
export function stripModelSpecialTokens(text: string): string {
if (!text) {
return text;
}
if (!MODEL_SPECIAL_TOKEN_RE.test(text)) {
return text;
}
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
return text.replace(MODEL_SPECIAL_TOKEN_RE, " ").replace(/ +/g, " ").trim();
}
/**
* Strip downgraded tool call text representations that leak into text content.
* When replaying history to Gemini, tool calls without `thought_signature` are
* downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should
* not be shown to users.
*/
export function stripDowngradedToolCallText(text: string): string {
if (!text) {
return text;
}
if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
return text;
}
const consumeJsonish = (
input: string,
start: number,
options?: { allowLeadingNewlines?: boolean },
): number | null => {
const { allowLeadingNewlines = false } = options ?? {};
let index = start;
while (index < input.length) {
const ch = input[index];
if (ch === " " || ch === "\t") {
index += 1;
continue;
}
if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
index += 1;
continue;
}
break;
}
if (index >= input.length) {
return null;
}
const startChar = input[index];
if (startChar === "{" || startChar === "[") {
let depth = 0;
let inString = false;
let escape = false;
for (let i = index; i < input.length; i += 1) {
const ch = input[i];
if (inString) {
if (escape) {
escape = false;
} else if (ch === "\\") {
escape = true;
} else if (ch === '"') {
inString = false;
}
continue;
}
if (ch === '"') {
inString = true;
continue;
}
if (ch === "{" || ch === "[") {
depth += 1;
continue;
}
if (ch === "}" || ch === "]") {
depth -= 1;
if (depth === 0) {
return i + 1;
}
}
}
return null;
}
if (startChar === '"') {
let escape = false;
for (let i = index + 1; i < input.length; i += 1) {
const ch = input[i];
if (escape) {
escape = false;
continue;
}
if (ch === "\\") {
escape = true;
continue;
}
if (ch === '"') {
return i + 1;
}
}
return null;
}
let end = index;
while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
end += 1;
}
return end;
};
const stripToolCalls = (input: string): string => {
const markerRe = /\[Tool Call:[^\]]*\]/gi;
let result = "";
let cursor = 0;
for (const match of input.matchAll(markerRe)) {
const start = match.index ?? 0;
if (start < cursor) {
continue;
}
result += input.slice(cursor, start);
let index = start + match[0].length;
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input[index] === "\r") {
index += 1;
if (input[index] === "\n") {
index += 1;
}
} else if (input[index] === "\n") {
index += 1;
}
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input.slice(index, index + 9).toLowerCase() === "arguments") {
index += 9;
if (input[index] === ":") {
index += 1;
}
if (input[index] === " ") {
index += 1;
}
const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
if (end !== null) {
index = end;
}
}
if (
(input[index] === "\n" || input[index] === "\r") &&
(result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
) {
if (input[index] === "\r") {
index += 1;
}
if (input[index] === "\n") {
index += 1;
}
}
cursor = index;
}
result += input.slice(cursor);
return result;
};
// Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
let cleaned = stripToolCalls(text);
// Remove [Tool Result for ID ...] blocks and their content.
cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
// Remove [Historical context: ...] markers (self-contained within brackets).
cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
return cleaned.trim();
}
/**
* Strip thinking tags and their content from text.
* This is a safety net for cases where the model outputs <think> tags
@@ -240,11 +31,7 @@ export function stripThinkingTagsFromText(text: string): string {
}
function sanitizeAssistantText(text: string): string {
return stripThinkingTagsFromText(
stripToolCallXmlTags(
stripDowngradedToolCallText(stripModelSpecialTokens(stripMinimaxToolCallXml(text))),
),
).trim();
return sanitizeAssistantVisibleText(text);
}
function finalizeAssistantExtraction(msg: AssistantMessage, extracted: string): string {

View File

@@ -1,12 +1,7 @@
import { extractTextFromChatContent } from "../../shared/chat-content.js";
import { sanitizeAssistantVisibleTextWithOptions } from "../../shared/text/assistant-visible-text.js";
import { sanitizeUserFacingText } from "../pi-embedded-helpers.js";
import {
extractAssistantVisibleText,
stripDowngradedToolCallText,
stripMinimaxToolCallXml,
stripModelSpecialTokens,
stripThinkingTagsFromText,
} from "../pi-embedded-utils.js";
import { extractAssistantVisibleText } from "../pi-embedded-utils.js";
export function stripToolMessages(messages: unknown[]): unknown[] {
return messages.filter((msg) => {
@@ -23,12 +18,7 @@ export function stripToolMessages(messages: unknown[]): unknown[] {
* This ensures user-facing text doesn't leak internal tool representations.
*/
export function sanitizeTextContent(text: string): string {
if (!text) {
return text;
}
return stripThinkingTagsFromText(
stripDowngradedToolCallText(stripModelSpecialTokens(stripMinimaxToolCallXml(text))),
);
return sanitizeAssistantVisibleTextWithOptions(text, { trim: "none" });
}
export function hasAssistantPhaseMetadata(message: unknown): boolean {

View File

@@ -156,6 +156,13 @@ describe("sanitizeTextContent", () => {
expect(result).not.toContain("Tool Call");
});
it("strips tool_result XML via the shared assistant-visible sanitizer", () => {
const input = 'Prefix\n<tool_result>{"output":"hidden"}</tool_result>\nSuffix';
const result = sanitizeTextContent(input).trim();
expect(result).toBe("Prefix\n\nSuffix");
expect(result).not.toContain("tool_result");
});
it("strips thinking tags", () => {
const input = "Before <think>secret</think> after";
const result = sanitizeTextContent(input).trim();

View File

@@ -1,5 +1,8 @@
import { describe, expect, it } from "vitest";
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
import {
sanitizeAssistantVisibleText,
stripAssistantInternalScaffolding,
} from "./assistant-visible-text.js";
import { stripModelSpecialTokens } from "./model-special-tokens.js";
describe("stripAssistantInternalScaffolding", () => {
@@ -393,3 +396,29 @@ describe("stripAssistantInternalScaffolding", () => {
});
});
});
describe("sanitizeAssistantVisibleText", () => {
it("strips minimax, tool XML, downgraded tool markers, and think tags in one pass", () => {
const input = [
'<invoke name="read">payload</invoke></minimax:tool_call>',
'<tool_result>{"output":"hidden"}</tool_result>',
"[Tool Call: read (ID: toolu_1)]",
'Arguments: {"path":"/tmp/x"}',
"<think>secret</think>",
"Visible answer",
].join("\n");
expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
});
it("strips relevant-memories blocks on the canonical user-visible path", () => {
const input = [
"<relevant-memories>",
"internal note",
"</relevant-memories>",
"Visible answer",
].join("\n");
expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
});
});

View File

@@ -249,6 +249,186 @@ export function stripToolCallXmlTags(text: string): string {
return result;
}
/**
* Strip malformed Minimax tool invocations that leak into text content.
* Minimax sometimes embeds tool calls as XML in text blocks instead of
* proper structured tool calls.
*/
export function stripMinimaxToolCallXml(text: string): string {
if (!text || !/minimax:tool_call/i.test(text)) {
return text;
}
// Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
// Remove stray minimax tool tags.
cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
return cleaned;
}
/**
* Strip downgraded tool call text representations that leak into user-visible
* text content when replaying history across providers.
*/
export function stripDowngradedToolCallText(text: string): string {
if (!text) {
return text;
}
if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
return text;
}
const consumeJsonish = (
input: string,
start: number,
options?: { allowLeadingNewlines?: boolean },
): number | null => {
const { allowLeadingNewlines = false } = options ?? {};
let index = start;
while (index < input.length) {
const ch = input[index];
if (ch === " " || ch === "\t") {
index += 1;
continue;
}
if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
index += 1;
continue;
}
break;
}
if (index >= input.length) {
return null;
}
const startChar = input[index];
if (startChar === "{" || startChar === "[") {
let depth = 0;
let inString = false;
let escape = false;
for (let idx = index; idx < input.length; idx += 1) {
const ch = input[idx];
if (inString) {
if (escape) {
escape = false;
} else if (ch === "\\") {
escape = true;
} else if (ch === '"') {
inString = false;
}
continue;
}
if (ch === '"') {
inString = true;
continue;
}
if (ch === "{" || ch === "[") {
depth += 1;
} else if (ch === "}" || ch === "]") {
depth -= 1;
if (depth === 0) {
return idx + 1;
}
}
}
return null;
}
if (startChar === '"') {
let escape = false;
for (let idx = index + 1; idx < input.length; idx += 1) {
const ch = input[idx];
if (escape) {
escape = false;
continue;
}
if (ch === "\\") {
escape = true;
continue;
}
if (ch === '"') {
return idx + 1;
}
}
return null;
}
let end = index;
while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
end += 1;
}
return end;
};
const stripToolCalls = (input: string): string => {
const toolCallRe = /\[Tool Call:[^\]]*\]/gi;
let result = "";
let cursor = 0;
for (const match of input.matchAll(toolCallRe)) {
const start = match.index ?? 0;
if (start < cursor) {
continue;
}
result += input.slice(cursor, start);
let index = start + match[0].length;
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input[index] === "\r") {
index += 1;
if (input[index] === "\n") {
index += 1;
}
} else if (input[index] === "\n") {
index += 1;
}
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
index += 1;
}
if (input.slice(index, index + 9).toLowerCase() === "arguments") {
index += 9;
if (input[index] === ":") {
index += 1;
}
if (input[index] === " ") {
index += 1;
}
const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
if (end !== null) {
index = end;
}
}
if (
(input[index] === "\n" || input[index] === "\r") &&
(result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
) {
if (input[index] === "\r") {
index += 1;
}
if (input[index] === "\n") {
index += 1;
}
}
cursor = index;
}
result += input.slice(cursor);
return result;
};
// Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
let cleaned = stripToolCalls(text);
// Remove [Tool Result for ID ...] blocks and their content.
cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
// Remove [Historical context: ...] markers (self-contained within brackets).
cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
return cleaned.trim();
}
function stripRelevantMemoriesTags(text: string): string {
if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
return text;
@@ -293,3 +473,32 @@ export function stripAssistantInternalScaffolding(text: string): string {
const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
return withoutSpecialTokens.trimStart();
}
/**
* Canonical user-visible assistant text sanitizer for delivery and history
* extraction paths. Keeps prose, removes internal scaffolding.
*/
export function sanitizeAssistantVisibleText(text: string): string {
return sanitizeAssistantVisibleTextWithOptions(text, { trim: "both" });
}
export function sanitizeAssistantVisibleTextWithOptions(
text: string,
options?: { trim?: "none" | "both" },
): string {
if (!text) {
return text;
}
const trimMode = options?.trim ?? "both";
const withoutMinimaxToolXml = stripMinimaxToolCallXml(text);
const withoutSpecialTokens = stripModelSpecialTokens(withoutMinimaxToolXml);
const withoutMemories = stripRelevantMemoriesTags(withoutSpecialTokens);
const withoutToolCallXml = stripToolCallXmlTags(withoutMemories);
const withoutDowngradedToolText = stripDowngradedToolCallText(withoutToolCallXml);
const sanitized = stripReasoningTagsFromText(withoutDowngradedToolText, {
mode: "strict",
trim: trimMode,
});
return trimMode === "both" ? sanitized.trim() : sanitized;
}