fix: strip leaked outbound tool-call scaffolding (#60619)

Co-authored-by: Frank Yang <frank.ekn@gmail.com>
This commit is contained in:
oliviareid-svg
2026-04-05 02:02:36 +08:00
committed by GitHub
parent 0cf9c6ec95
commit 7ff90c516a
4 changed files with 558 additions and 23 deletions

View File

@@ -1,5 +1,6 @@
import { describe, expect, it } from "vitest";
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
import { stripModelSpecialTokens } from "./model-special-tokens.js";
describe("stripAssistantInternalScaffolding", () => {
function expectVisibleText(input: string, expected: string) {
@@ -99,4 +100,271 @@ describe("stripAssistantInternalScaffolding", () => {
}
expectVisibleText(input, expected);
});
describe("tool-call XML stripping", () => {
it("strips closed <tool_call> blocks", () => {
expectVisibleText(
'Let me check.\n\n<tool_call> {"name": "read", "arguments": {"file_path": "test.md"}} </tool_call> after',
"Let me check.\n\n after",
);
});
it("strips closed <function_calls> blocks", () => {
expectVisibleText(
'Checking now. <function_calls>{"name": "exec", "args": {"cmd": "ls"}}</function_calls> Done.',
"Checking now. Done.",
);
});
it("hides dangling <tool_call> content to end-of-string", () => {
expectVisibleText(
'Let me run.\n<tool_call>\n{"name": "find", "arguments": {}}\n',
"Let me run.\n",
);
});
it("does not close early on </tool_call> text inside JSON strings", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"x","arguments":{"html":"<div></tool_call><span>leak</span>"}}',
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("does not close early on </tool_call> text inside single-quoted payload strings", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
"{'html':'</tool_call> leak','tail':'still hidden'}",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("does not close early on mismatched closing tool tags", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"read",',
"</function_calls>",
"still-hidden",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("hides truncated <tool_call openings that never reach >", () => {
expectVisibleText('prefix\n<tool_call\n{"name":"find","arguments":{}}', "prefix\n");
});
it("hides truncated <tool_call openings with attributes before JSON payload", () => {
expectVisibleText('prefix\n<tool_call name="find"\n{"arguments":{}}', "prefix\n");
});
it("preserves lone <tool_call> mentions in normal prose", () => {
expectVisibleText("Use <tool_call> to invoke tools.", "Use <tool_call> to invoke tools.");
});
it("strips self-closing <tool_call/> tags", () => {
expectVisibleText("prefix <tool_call/> suffix", "prefix suffix");
});
it("strips self-closing <function_calls .../> tags", () => {
expectVisibleText('prefix <function_calls name="x"/> suffix', "prefix suffix");
});
it("strips lone closing tool-call tags", () => {
expectVisibleText("prefix </tool_call> suffix", "prefix suffix");
expectVisibleText("prefix </function_calls> suffix", "prefix suffix");
});
it("preserves XML-style explanations after lone <tool_call> tags", () => {
expectVisibleText("Use <tool_call><arg> literally.", "Use <tool_call><arg> literally.");
});
it("preserves literal XML-style paired tool_call examples in prose", () => {
expectVisibleText(
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
);
});
it("preserves machine-style XML payload examples in prose", () => {
expectVisibleText(
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
);
});
it("preserves non-tool tag names that share the tool_call prefix", () => {
expectVisibleText(
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
);
});
it("preserves truncated <tool_call mentions in prose", () => {
expectVisibleText("Use <tool_call to invoke tools.", "Use <tool_call to invoke tools.");
});
it("preserves truncated <tool_call mentions with prose attributes", () => {
expectVisibleText(
'Use <tool_call name="find" to invoke tools.',
'Use <tool_call name="find" to invoke tools.',
);
});
it("still strips later JSON payloads after a truncated prose mention", () => {
expectVisibleText(
'Use <tool_call to invoke tools.\n<tool_call>{"name":"find"}</tool_call>',
"Use <tool_call to invoke tools.\n",
);
});
it("still strips later JSON payloads after a truncated closing-tag mention", () => {
expectVisibleText(
'Use </tool_call to explain tags.\n<tool_call>{"name":"find"}</tool_call>',
"Use </tool_call to explain tags.\n",
);
});
it("still closes a tool-call block when malformed payload opens a fenced code region", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"read",',
"```xml",
"<note>hi</note>",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("preserves truncated XML payload openings in prose", () => {
expectVisibleText(
'prefix\n<function_calls\n<invoke name="find">',
'prefix\n<function_calls\n<invoke name="find">',
);
});
it("hides truncated <function_calls openings with attributes before array payload", () => {
expectVisibleText('prefix\n<function_calls id="x"\n[{"name":"find"}]', "prefix\n");
});
it("preserves tool-call tags inside fenced code blocks", () => {
const input = [
"```xml",
'<tool_call> {"name": "find"} </tool_call>',
"```",
"",
"Visible text",
].join("\n");
expectVisibleText(input, input);
});
it("preserves inline code references to tool_call tags", () => {
expectVisibleText("Use `<tool_call>` to invoke tools.", "Use `<tool_call>` to invoke tools.");
});
});
describe("model special token stripping", () => {
it("strips Kimi/GLM special tokens in isolation", () => {
expectVisibleText("<|assistant|>Here is the answer<|end|>", "Here is the answer ");
});
it("strips full-width pipe DeepSeek tokens", () => {
expectVisibleText("<begin▁of▁sentence>Hello world", "Hello world");
});
it("strips special tokens mixed with normal text", () => {
expectVisibleText(
"Start <|tool_call_result_begin|>middle<|tool_call_result_end|> end",
"Start middle end",
);
});
it("preserves special-token-like syntax inside code blocks", () => {
expectVisibleText("Use <div>hello</div> in HTML", "Use <div>hello</div> in HTML");
});
it("strips special tokens combined with reasoning tags", () => {
const input = [
"<thinking>",
"internal reasoning",
"</thinking>",
"<|assistant|>Visible response",
].join("\n");
expectVisibleText(input, "Visible response");
});
it("preserves indentation in code blocks", () => {
const input = [
"<|assistant|>Here is the code:",
"",
"```python",
"def foo():",
" if True:",
" return 42",
"```",
].join("\n");
const expected = [
"Here is the code:",
"",
"```python",
"def foo():",
" if True:",
" return 42",
"```",
].join("\n");
expectVisibleText(input, expected);
});
it("preserves special tokens inside fenced code blocks", () => {
const input = [
"Here are the model tokens:",
"",
"```",
"<|assistant|>Hello<|end|>",
"```",
"",
"As you can see above.",
].join("\n");
expectVisibleText(input, input);
});
it("preserves special tokens inside inline code spans", () => {
expectVisibleText(
"The token `<|assistant|>` marks the start.",
"The token `<|assistant|>` marks the start.",
);
});
it("preserves malformed tokens that end inside inline code spans", () => {
expectVisibleText("Before <|token `code|>` after", "Before <|token `code|>` after");
});
it("preserves malformed tokens that end inside fenced code blocks", () => {
const input = ["Before <|token", "```js", "const x = 1;|>", "```", "after"].join("\n");
expectVisibleText(input, input);
});
it("resets special-token regex state between calls", () => {
expect(stripModelSpecialTokens("prefix <|assistant|>")).toBe("prefix ");
expect(stripModelSpecialTokens("<|assistant|>short")).toBe(" short");
});
});
});

View File

@@ -1,9 +1,247 @@
import { findCodeRegions, isInsideCode } from "./code-regions.js";
import { stripModelSpecialTokens } from "./model-special-tokens.js";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
const MEMORY_TAG_RE = /<\s*(\/?)\s*relevant[-_]memories\b[^<>]*>/gi;
const MEMORY_TAG_QUICK_RE = /<\s*\/?\s*relevant[-_]memories\b/i;
/**
* Strip XML-style tool call tags that models sometimes emit as plain text.
* This stateful pass hides content from an opening tag through the matching
* closing tag, or to end-of-string if the stream was truncated mid-tag.
*/
const TOOL_CALL_QUICK_RE = /<\s*\/?\s*(?:tool_call|function_calls?|tool_calls)\b/i;
const TOOL_CALL_TAG_NAMES = new Set(["tool_call", "function_call", "function_calls", "tool_calls"]);
const TOOL_CALL_JSON_PAYLOAD_START_RE =
/^(?:\s+[A-Za-z_:][-A-Za-z0-9_:.]*\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))*\s*(?:\r?\n\s*)?[[{]/;
function endsInsideQuotedString(text: string, start: number, end: number): boolean {
let quoteChar: "'" | '"' | null = null;
let isEscaped = false;
for (let idx = start; idx < end; idx += 1) {
const char = text[idx];
if (quoteChar === null) {
if (char === '"' || char === "'") {
quoteChar = char;
}
continue;
}
if (isEscaped) {
isEscaped = false;
continue;
}
if (char === "\\") {
isEscaped = true;
continue;
}
if (char === quoteChar) {
quoteChar = null;
}
}
return quoteChar !== null;
}
interface ParsedToolCallTag {
contentStart: number;
end: number;
isClose: boolean;
isSelfClosing: boolean;
tagName: string;
isTruncated: boolean;
}
function isToolCallBoundary(char: string | undefined): boolean {
return !char || /\s/.test(char) || char === "/" || char === ">";
}
function findTagCloseIndex(text: string, start: number): number {
let quoteChar: "'" | '"' | null = null;
let isEscaped = false;
for (let idx = start; idx < text.length; idx += 1) {
const char = text[idx];
if (quoteChar !== null) {
if (isEscaped) {
isEscaped = false;
continue;
}
if (char === "\\") {
isEscaped = true;
continue;
}
if (char === quoteChar) {
quoteChar = null;
}
continue;
}
if (char === '"' || char === "'") {
quoteChar = char;
continue;
}
if (char === "<") {
return -1;
}
if (char === ">") {
return idx;
}
}
return -1;
}
function looksLikeToolCallPayloadStart(text: string, start: number): boolean {
return TOOL_CALL_JSON_PAYLOAD_START_RE.test(text.slice(start));
}
function parseToolCallTagAt(text: string, start: number): ParsedToolCallTag | null {
if (text[start] !== "<") {
return null;
}
let cursor = start + 1;
while (cursor < text.length && /\s/.test(text[cursor])) {
cursor += 1;
}
let isClose = false;
if (text[cursor] === "/") {
isClose = true;
cursor += 1;
while (cursor < text.length && /\s/.test(text[cursor])) {
cursor += 1;
}
}
const nameStart = cursor;
while (cursor < text.length && /[A-Za-z_]/.test(text[cursor])) {
cursor += 1;
}
const tagName = text.slice(nameStart, cursor).toLowerCase();
if (!TOOL_CALL_TAG_NAMES.has(tagName) || !isToolCallBoundary(text[cursor])) {
return null;
}
const contentStart = cursor;
const closeIndex = findTagCloseIndex(text, cursor);
if (closeIndex === -1) {
return {
contentStart,
end: text.length,
isClose,
isSelfClosing: false,
tagName,
isTruncated: true,
};
}
return {
contentStart,
end: closeIndex + 1,
isClose,
isSelfClosing: !isClose && /\/\s*$/.test(text.slice(cursor, closeIndex)),
tagName,
isTruncated: false,
};
}
function stripToolCallXmlTags(text: string): string {
if (!text || !TOOL_CALL_QUICK_RE.test(text)) {
return text;
}
const codeRegions = findCodeRegions(text);
let result = "";
let lastIndex = 0;
let inToolCallBlock = false;
let toolCallContentStart = 0;
let toolCallBlockTagName: string | null = null;
const visibleTagBalance = new Map<string, number>();
for (let idx = 0; idx < text.length; idx += 1) {
if (text[idx] !== "<") {
continue;
}
if (!inToolCallBlock && isInsideCode(idx, codeRegions)) {
continue;
}
const tag = parseToolCallTagAt(text, idx);
if (!tag) {
continue;
}
if (!inToolCallBlock) {
result += text.slice(lastIndex, idx);
if (tag.isClose) {
if (tag.isTruncated) {
const preserveEnd = tag.contentStart;
result += text.slice(idx, preserveEnd);
lastIndex = preserveEnd;
idx = Math.max(idx, preserveEnd - 1);
continue;
}
const balance = visibleTagBalance.get(tag.tagName) ?? 0;
if (balance > 0) {
result += text.slice(idx, tag.end);
visibleTagBalance.set(tag.tagName, balance - 1);
}
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
continue;
}
if (tag.isSelfClosing) {
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
continue;
}
if (
!tag.isClose &&
looksLikeToolCallPayloadStart(text, tag.isTruncated ? tag.contentStart : tag.end)
) {
inToolCallBlock = true;
toolCallContentStart = tag.end;
toolCallBlockTagName = tag.tagName;
if (tag.isTruncated) {
lastIndex = text.length;
break;
}
} else {
const preserveEnd = tag.isTruncated ? tag.contentStart : tag.end;
result += text.slice(idx, preserveEnd);
if (!tag.isTruncated) {
visibleTagBalance.set(tag.tagName, (visibleTagBalance.get(tag.tagName) ?? 0) + 1);
}
lastIndex = preserveEnd;
idx = Math.max(idx, preserveEnd - 1);
continue;
}
} else if (
tag.isClose &&
tag.tagName === toolCallBlockTagName &&
!endsInsideQuotedString(text, toolCallContentStart, idx)
) {
inToolCallBlock = false;
toolCallBlockTagName = null;
}
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
}
if (!inToolCallBlock) {
result += text.slice(lastIndex);
}
return result;
}
function stripRelevantMemoriesTags(text: string): string {
if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
return text;
@@ -43,5 +281,8 @@ function stripRelevantMemoriesTags(text: string): string {
export function stripAssistantInternalScaffolding(text: string): string {
const withoutReasoning = stripReasoningTagsFromText(text, { mode: "preserve", trim: "start" });
return stripRelevantMemoriesTags(withoutReasoning).trimStart();
const withoutMemories = stripRelevantMemoriesTags(withoutReasoning);
const withoutToolCalls = stripToolCallXmlTags(withoutMemories);
const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
return withoutSpecialTokens.trimStart();
}

View File

@@ -0,0 +1,47 @@
/**
* Strip model control tokens leaked into assistant text output.
*
* Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens
* (e.g. `<|assistant|>`, `<|tool_call_result_begin|>`, `<begin▁of▁sentence>`)
* in their responses. These use the universal `<|...|>` convention (ASCII or
* full-width pipe variants) and should never reach end users.
*
* Matches inside fenced code blocks or inline code spans are preserved so
* that documentation / examples that reference these tokens are not corrupted.
*
* This is a provider bug — no upstream fix tracked yet.
* Remove this function when upstream providers stop leaking tokens.
* @see https://github.com/openclaw/openclaw/issues/40020
*/
import { findCodeRegions, isInsideCode } from "./code-regions.js";
// Match both ASCII pipe <|...|> and full-width pipe <...> (U+FF5C) variants.
const MODEL_SPECIAL_TOKEN_RE = /<[|][^|]*[|]>/g;
function overlapsCodeRegion(
start: number,
end: number,
codeRegions: { start: number; end: number }[],
): boolean {
return codeRegions.some((region) => start < region.end && end > region.start);
}
export function stripModelSpecialTokens(text: string): string {
if (!text) {
return text;
}
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
if (!MODEL_SPECIAL_TOKEN_RE.test(text)) {
return text;
}
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
const codeRegions = findCodeRegions(text);
return text.replace(MODEL_SPECIAL_TOKEN_RE, (match, offset) => {
const start = offset;
const end = start + match.length;
return isInsideCode(start, codeRegions) || overlapsCodeRegion(start, end, codeRegions)
? match
: " ";
});
}