mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-27 00:52:05 +00:00
fix: unify assistant visible text sanitizers (#61729)
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
|
||||
import {
|
||||
sanitizeAssistantVisibleText,
|
||||
stripAssistantInternalScaffolding,
|
||||
} from "./assistant-visible-text.js";
|
||||
import { stripModelSpecialTokens } from "./model-special-tokens.js";
|
||||
|
||||
describe("stripAssistantInternalScaffolding", () => {
|
||||
@@ -393,3 +396,29 @@ describe("stripAssistantInternalScaffolding", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("sanitizeAssistantVisibleText", () => {
|
||||
it("strips minimax, tool XML, downgraded tool markers, and think tags in one pass", () => {
|
||||
const input = [
|
||||
'<invoke name="read">payload</invoke></minimax:tool_call>',
|
||||
'<tool_result>{"output":"hidden"}</tool_result>',
|
||||
"[Tool Call: read (ID: toolu_1)]",
|
||||
'Arguments: {"path":"/tmp/x"}',
|
||||
"<think>secret</think>",
|
||||
"Visible answer",
|
||||
].join("\n");
|
||||
|
||||
expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
|
||||
});
|
||||
|
||||
it("strips relevant-memories blocks on the canonical user-visible path", () => {
|
||||
const input = [
|
||||
"<relevant-memories>",
|
||||
"internal note",
|
||||
"</relevant-memories>",
|
||||
"Visible answer",
|
||||
].join("\n");
|
||||
|
||||
expect(sanitizeAssistantVisibleText(input)).toBe("Visible answer");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -249,6 +249,186 @@ export function stripToolCallXmlTags(text: string): string {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip malformed Minimax tool invocations that leak into text content.
|
||||
* Minimax sometimes embeds tool calls as XML in text blocks instead of
|
||||
* proper structured tool calls.
|
||||
*/
|
||||
export function stripMinimaxToolCallXml(text: string): string {
|
||||
if (!text || !/minimax:tool_call/i.test(text)) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Remove <invoke ...>...</invoke> blocks (non-greedy to handle multiple).
|
||||
let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, "");
|
||||
|
||||
// Remove stray minimax tool tags.
|
||||
cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, "");
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip downgraded tool call text representations that leak into user-visible
|
||||
* text content when replaying history across providers.
|
||||
*/
|
||||
export function stripDowngradedToolCallText(text: string): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
if (!/\[Tool (?:Call|Result)/i.test(text) && !/\[Historical context/i.test(text)) {
|
||||
return text;
|
||||
}
|
||||
|
||||
const consumeJsonish = (
|
||||
input: string,
|
||||
start: number,
|
||||
options?: { allowLeadingNewlines?: boolean },
|
||||
): number | null => {
|
||||
const { allowLeadingNewlines = false } = options ?? {};
|
||||
let index = start;
|
||||
while (index < input.length) {
|
||||
const ch = input[index];
|
||||
if (ch === " " || ch === "\t") {
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) {
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (index >= input.length) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const startChar = input[index];
|
||||
if (startChar === "{" || startChar === "[") {
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escape = false;
|
||||
for (let idx = index; idx < input.length; idx += 1) {
|
||||
const ch = input[idx];
|
||||
if (inString) {
|
||||
if (escape) {
|
||||
escape = false;
|
||||
} else if (ch === "\\") {
|
||||
escape = true;
|
||||
} else if (ch === '"') {
|
||||
inString = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (ch === '"') {
|
||||
inString = true;
|
||||
continue;
|
||||
}
|
||||
if (ch === "{" || ch === "[") {
|
||||
depth += 1;
|
||||
} else if (ch === "}" || ch === "]") {
|
||||
depth -= 1;
|
||||
if (depth === 0) {
|
||||
return idx + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (startChar === '"') {
|
||||
let escape = false;
|
||||
for (let idx = index + 1; idx < input.length; idx += 1) {
|
||||
const ch = input[idx];
|
||||
if (escape) {
|
||||
escape = false;
|
||||
continue;
|
||||
}
|
||||
if (ch === "\\") {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
if (ch === '"') {
|
||||
return idx + 1;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
let end = index;
|
||||
while (end < input.length && input[end] !== "\n" && input[end] !== "\r") {
|
||||
end += 1;
|
||||
}
|
||||
return end;
|
||||
};
|
||||
|
||||
const stripToolCalls = (input: string): string => {
|
||||
const toolCallRe = /\[Tool Call:[^\]]*\]/gi;
|
||||
let result = "";
|
||||
let cursor = 0;
|
||||
for (const match of input.matchAll(toolCallRe)) {
|
||||
const start = match.index ?? 0;
|
||||
if (start < cursor) {
|
||||
continue;
|
||||
}
|
||||
result += input.slice(cursor, start);
|
||||
let index = start + match[0].length;
|
||||
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
|
||||
index += 1;
|
||||
}
|
||||
if (input[index] === "\r") {
|
||||
index += 1;
|
||||
if (input[index] === "\n") {
|
||||
index += 1;
|
||||
}
|
||||
} else if (input[index] === "\n") {
|
||||
index += 1;
|
||||
}
|
||||
while (index < input.length && (input[index] === " " || input[index] === "\t")) {
|
||||
index += 1;
|
||||
}
|
||||
if (input.slice(index, index + 9).toLowerCase() === "arguments") {
|
||||
index += 9;
|
||||
if (input[index] === ":") {
|
||||
index += 1;
|
||||
}
|
||||
if (input[index] === " ") {
|
||||
index += 1;
|
||||
}
|
||||
const end = consumeJsonish(input, index, { allowLeadingNewlines: true });
|
||||
if (end !== null) {
|
||||
index = end;
|
||||
}
|
||||
}
|
||||
if (
|
||||
(input[index] === "\n" || input[index] === "\r") &&
|
||||
(result.endsWith("\n") || result.endsWith("\r") || result.length === 0)
|
||||
) {
|
||||
if (input[index] === "\r") {
|
||||
index += 1;
|
||||
}
|
||||
if (input[index] === "\n") {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
cursor = index;
|
||||
}
|
||||
result += input.slice(cursor);
|
||||
return result;
|
||||
};
|
||||
|
||||
// Remove [Tool Call: name (ID: ...)] blocks and their Arguments.
|
||||
let cleaned = stripToolCalls(text);
|
||||
|
||||
// Remove [Tool Result for ID ...] blocks and their content.
|
||||
cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, "");
|
||||
|
||||
// Remove [Historical context: ...] markers (self-contained within brackets).
|
||||
cleaned = cleaned.replace(/\[Historical context:[^\]]*\]\n?/gi, "");
|
||||
|
||||
return cleaned.trim();
|
||||
}
|
||||
|
||||
function stripRelevantMemoriesTags(text: string): string {
|
||||
if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
|
||||
return text;
|
||||
@@ -293,3 +473,32 @@ export function stripAssistantInternalScaffolding(text: string): string {
|
||||
const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
|
||||
return withoutSpecialTokens.trimStart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonical user-visible assistant text sanitizer for delivery and history
|
||||
* extraction paths. Keeps prose, removes internal scaffolding.
|
||||
*/
|
||||
export function sanitizeAssistantVisibleText(text: string): string {
|
||||
return sanitizeAssistantVisibleTextWithOptions(text, { trim: "both" });
|
||||
}
|
||||
|
||||
export function sanitizeAssistantVisibleTextWithOptions(
|
||||
text: string,
|
||||
options?: { trim?: "none" | "both" },
|
||||
): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
const trimMode = options?.trim ?? "both";
|
||||
|
||||
const withoutMinimaxToolXml = stripMinimaxToolCallXml(text);
|
||||
const withoutSpecialTokens = stripModelSpecialTokens(withoutMinimaxToolXml);
|
||||
const withoutMemories = stripRelevantMemoriesTags(withoutSpecialTokens);
|
||||
const withoutToolCallXml = stripToolCallXmlTags(withoutMemories);
|
||||
const withoutDowngradedToolText = stripDowngradedToolCallText(withoutToolCallXml);
|
||||
const sanitized = stripReasoningTagsFromText(withoutDowngradedToolText, {
|
||||
mode: "strict",
|
||||
trim: trimMode,
|
||||
});
|
||||
return trimMode === "both" ? sanitized.trim() : sanitized;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user