mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-10 08:41:13 +00:00
fix: strip leaked outbound tool-call scaffolding (#60619)
Co-authored-by: Frank Yang <frank.ekn@gmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
|
||||
import { stripModelSpecialTokens } from "./model-special-tokens.js";
|
||||
|
||||
describe("stripAssistantInternalScaffolding", () => {
|
||||
function expectVisibleText(input: string, expected: string) {
|
||||
@@ -99,4 +100,271 @@ describe("stripAssistantInternalScaffolding", () => {
|
||||
}
|
||||
expectVisibleText(input, expected);
|
||||
});
|
||||
|
||||
describe("tool-call XML stripping", () => {
|
||||
it("strips closed <tool_call> blocks", () => {
|
||||
expectVisibleText(
|
||||
'Let me check.\n\n<tool_call> {"name": "read", "arguments": {"file_path": "test.md"}} </tool_call> after',
|
||||
"Let me check.\n\n after",
|
||||
);
|
||||
});
|
||||
|
||||
it("strips closed <function_calls> blocks", () => {
|
||||
expectVisibleText(
|
||||
'Checking now. <function_calls>{"name": "exec", "args": {"cmd": "ls"}}</function_calls> Done.',
|
||||
"Checking now. Done.",
|
||||
);
|
||||
});
|
||||
|
||||
it("hides dangling <tool_call> content to end-of-string", () => {
|
||||
expectVisibleText(
|
||||
'Let me run.\n<tool_call>\n{"name": "find", "arguments": {}}\n',
|
||||
"Let me run.\n",
|
||||
);
|
||||
});
|
||||
|
||||
it("does not close early on </tool_call> text inside JSON strings", () => {
|
||||
expectVisibleText(
|
||||
[
|
||||
"prefix",
|
||||
"<tool_call>",
|
||||
'{"name":"x","arguments":{"html":"<div></tool_call><span>leak</span>"}}',
|
||||
"</tool_call>",
|
||||
"suffix",
|
||||
].join("\n"),
|
||||
"prefix\n\nsuffix",
|
||||
);
|
||||
});
|
||||
|
||||
it("does not close early on </tool_call> text inside single-quoted payload strings", () => {
|
||||
expectVisibleText(
|
||||
[
|
||||
"prefix",
|
||||
"<tool_call>",
|
||||
"{'html':'</tool_call> leak','tail':'still hidden'}",
|
||||
"</tool_call>",
|
||||
"suffix",
|
||||
].join("\n"),
|
||||
"prefix\n\nsuffix",
|
||||
);
|
||||
});
|
||||
|
||||
it("does not close early on mismatched closing tool tags", () => {
|
||||
expectVisibleText(
|
||||
[
|
||||
"prefix",
|
||||
"<tool_call>",
|
||||
'{"name":"read",',
|
||||
"</function_calls>",
|
||||
"still-hidden",
|
||||
"</tool_call>",
|
||||
"suffix",
|
||||
].join("\n"),
|
||||
"prefix\n\nsuffix",
|
||||
);
|
||||
});
|
||||
|
||||
it("hides truncated <tool_call openings that never reach >", () => {
|
||||
expectVisibleText('prefix\n<tool_call\n{"name":"find","arguments":{}}', "prefix\n");
|
||||
});
|
||||
|
||||
it("hides truncated <tool_call openings with attributes before JSON payload", () => {
|
||||
expectVisibleText('prefix\n<tool_call name="find"\n{"arguments":{}}', "prefix\n");
|
||||
});
|
||||
|
||||
it("preserves lone <tool_call> mentions in normal prose", () => {
|
||||
expectVisibleText("Use <tool_call> to invoke tools.", "Use <tool_call> to invoke tools.");
|
||||
});
|
||||
|
||||
it("strips self-closing <tool_call/> tags", () => {
|
||||
expectVisibleText("prefix <tool_call/> suffix", "prefix suffix");
|
||||
});
|
||||
|
||||
it("strips self-closing <function_calls .../> tags", () => {
|
||||
expectVisibleText('prefix <function_calls name="x"/> suffix', "prefix suffix");
|
||||
});
|
||||
|
||||
it("strips lone closing tool-call tags", () => {
|
||||
expectVisibleText("prefix </tool_call> suffix", "prefix suffix");
|
||||
expectVisibleText("prefix </function_calls> suffix", "prefix suffix");
|
||||
});
|
||||
|
||||
it("preserves XML-style explanations after lone <tool_call> tags", () => {
|
||||
expectVisibleText("Use <tool_call><arg> literally.", "Use <tool_call><arg> literally.");
|
||||
});
|
||||
|
||||
it("preserves literal XML-style paired tool_call examples in prose", () => {
|
||||
expectVisibleText(
|
||||
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
|
||||
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves machine-style XML payload examples in prose", () => {
|
||||
expectVisibleText(
|
||||
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
|
||||
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves non-tool tag names that share the tool_call prefix", () => {
|
||||
expectVisibleText(
|
||||
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
|
||||
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves truncated <tool_call mentions in prose", () => {
|
||||
expectVisibleText("Use <tool_call to invoke tools.", "Use <tool_call to invoke tools.");
|
||||
});
|
||||
|
||||
it("preserves truncated <tool_call mentions with prose attributes", () => {
|
||||
expectVisibleText(
|
||||
'Use <tool_call name="find" to invoke tools.',
|
||||
'Use <tool_call name="find" to invoke tools.',
|
||||
);
|
||||
});
|
||||
|
||||
it("still strips later JSON payloads after a truncated prose mention", () => {
|
||||
expectVisibleText(
|
||||
'Use <tool_call to invoke tools.\n<tool_call>{"name":"find"}</tool_call>',
|
||||
"Use <tool_call to invoke tools.\n",
|
||||
);
|
||||
});
|
||||
|
||||
it("still strips later JSON payloads after a truncated closing-tag mention", () => {
|
||||
expectVisibleText(
|
||||
'Use </tool_call to explain tags.\n<tool_call>{"name":"find"}</tool_call>',
|
||||
"Use </tool_call to explain tags.\n",
|
||||
);
|
||||
});
|
||||
|
||||
it("still closes a tool-call block when malformed payload opens a fenced code region", () => {
|
||||
expectVisibleText(
|
||||
[
|
||||
"prefix",
|
||||
"<tool_call>",
|
||||
'{"name":"read",',
|
||||
"```xml",
|
||||
"<note>hi</note>",
|
||||
"</tool_call>",
|
||||
"suffix",
|
||||
].join("\n"),
|
||||
"prefix\n\nsuffix",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves truncated XML payload openings in prose", () => {
|
||||
expectVisibleText(
|
||||
'prefix\n<function_calls\n<invoke name="find">',
|
||||
'prefix\n<function_calls\n<invoke name="find">',
|
||||
);
|
||||
});
|
||||
|
||||
it("hides truncated <function_calls openings with attributes before array payload", () => {
|
||||
expectVisibleText('prefix\n<function_calls id="x"\n[{"name":"find"}]', "prefix\n");
|
||||
});
|
||||
|
||||
it("preserves tool-call tags inside fenced code blocks", () => {
|
||||
const input = [
|
||||
"```xml",
|
||||
'<tool_call> {"name": "find"} </tool_call>',
|
||||
"```",
|
||||
"",
|
||||
"Visible text",
|
||||
].join("\n");
|
||||
expectVisibleText(input, input);
|
||||
});
|
||||
|
||||
it("preserves inline code references to tool_call tags", () => {
|
||||
expectVisibleText("Use `<tool_call>` to invoke tools.", "Use `<tool_call>` to invoke tools.");
|
||||
});
|
||||
});
|
||||
|
||||
describe("model special token stripping", () => {
|
||||
it("strips Kimi/GLM special tokens in isolation", () => {
|
||||
expectVisibleText("<|assistant|>Here is the answer<|end|>", "Here is the answer ");
|
||||
});
|
||||
|
||||
it("strips full-width pipe DeepSeek tokens", () => {
|
||||
expectVisibleText("<|begin▁of▁sentence|>Hello world", "Hello world");
|
||||
});
|
||||
|
||||
it("strips special tokens mixed with normal text", () => {
|
||||
expectVisibleText(
|
||||
"Start <|tool_call_result_begin|>middle<|tool_call_result_end|> end",
|
||||
"Start middle end",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves special-token-like syntax inside code blocks", () => {
|
||||
expectVisibleText("Use <div>hello</div> in HTML", "Use <div>hello</div> in HTML");
|
||||
});
|
||||
|
||||
it("strips special tokens combined with reasoning tags", () => {
|
||||
const input = [
|
||||
"<thinking>",
|
||||
"internal reasoning",
|
||||
"</thinking>",
|
||||
"<|assistant|>Visible response",
|
||||
].join("\n");
|
||||
expectVisibleText(input, "Visible response");
|
||||
});
|
||||
|
||||
it("preserves indentation in code blocks", () => {
|
||||
const input = [
|
||||
"<|assistant|>Here is the code:",
|
||||
"",
|
||||
"```python",
|
||||
"def foo():",
|
||||
" if True:",
|
||||
" return 42",
|
||||
"```",
|
||||
].join("\n");
|
||||
const expected = [
|
||||
"Here is the code:",
|
||||
"",
|
||||
"```python",
|
||||
"def foo():",
|
||||
" if True:",
|
||||
" return 42",
|
||||
"```",
|
||||
].join("\n");
|
||||
expectVisibleText(input, expected);
|
||||
});
|
||||
|
||||
it("preserves special tokens inside fenced code blocks", () => {
|
||||
const input = [
|
||||
"Here are the model tokens:",
|
||||
"",
|
||||
"```",
|
||||
"<|assistant|>Hello<|end|>",
|
||||
"```",
|
||||
"",
|
||||
"As you can see above.",
|
||||
].join("\n");
|
||||
expectVisibleText(input, input);
|
||||
});
|
||||
|
||||
it("preserves special tokens inside inline code spans", () => {
|
||||
expectVisibleText(
|
||||
"The token `<|assistant|>` marks the start.",
|
||||
"The token `<|assistant|>` marks the start.",
|
||||
);
|
||||
});
|
||||
|
||||
it("preserves malformed tokens that end inside inline code spans", () => {
|
||||
expectVisibleText("Before <|token `code|>` after", "Before <|token `code|>` after");
|
||||
});
|
||||
|
||||
it("preserves malformed tokens that end inside fenced code blocks", () => {
|
||||
const input = ["Before <|token", "```js", "const x = 1;|>", "```", "after"].join("\n");
|
||||
expectVisibleText(input, input);
|
||||
});
|
||||
|
||||
it("resets special-token regex state between calls", () => {
|
||||
expect(stripModelSpecialTokens("prefix <|assistant|>")).toBe("prefix ");
|
||||
expect(stripModelSpecialTokens("<|assistant|>short")).toBe(" short");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,9 +1,247 @@
|
||||
import { findCodeRegions, isInsideCode } from "./code-regions.js";
|
||||
import { stripModelSpecialTokens } from "./model-special-tokens.js";
|
||||
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
|
||||
|
||||
const MEMORY_TAG_RE = /<\s*(\/?)\s*relevant[-_]memories\b[^<>]*>/gi;
|
||||
const MEMORY_TAG_QUICK_RE = /<\s*\/?\s*relevant[-_]memories\b/i;
|
||||
|
||||
/**
|
||||
* Strip XML-style tool call tags that models sometimes emit as plain text.
|
||||
* This stateful pass hides content from an opening tag through the matching
|
||||
* closing tag, or to end-of-string if the stream was truncated mid-tag.
|
||||
*/
|
||||
const TOOL_CALL_QUICK_RE = /<\s*\/?\s*(?:tool_call|function_calls?|tool_calls)\b/i;
|
||||
const TOOL_CALL_TAG_NAMES = new Set(["tool_call", "function_call", "function_calls", "tool_calls"]);
|
||||
const TOOL_CALL_JSON_PAYLOAD_START_RE =
|
||||
/^(?:\s+[A-Za-z_:][-A-Za-z0-9_:.]*\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))*\s*(?:\r?\n\s*)?[[{]/;
|
||||
|
||||
function endsInsideQuotedString(text: string, start: number, end: number): boolean {
|
||||
let quoteChar: "'" | '"' | null = null;
|
||||
let isEscaped = false;
|
||||
|
||||
for (let idx = start; idx < end; idx += 1) {
|
||||
const char = text[idx];
|
||||
if (quoteChar === null) {
|
||||
if (char === '"' || char === "'") {
|
||||
quoteChar = char;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isEscaped) {
|
||||
isEscaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === "\\") {
|
||||
isEscaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === quoteChar) {
|
||||
quoteChar = null;
|
||||
}
|
||||
}
|
||||
|
||||
return quoteChar !== null;
|
||||
}
|
||||
|
||||
interface ParsedToolCallTag {
|
||||
contentStart: number;
|
||||
end: number;
|
||||
isClose: boolean;
|
||||
isSelfClosing: boolean;
|
||||
tagName: string;
|
||||
isTruncated: boolean;
|
||||
}
|
||||
|
||||
function isToolCallBoundary(char: string | undefined): boolean {
|
||||
return !char || /\s/.test(char) || char === "/" || char === ">";
|
||||
}
|
||||
|
||||
function findTagCloseIndex(text: string, start: number): number {
|
||||
let quoteChar: "'" | '"' | null = null;
|
||||
let isEscaped = false;
|
||||
|
||||
for (let idx = start; idx < text.length; idx += 1) {
|
||||
const char = text[idx];
|
||||
if (quoteChar !== null) {
|
||||
if (isEscaped) {
|
||||
isEscaped = false;
|
||||
continue;
|
||||
}
|
||||
if (char === "\\") {
|
||||
isEscaped = true;
|
||||
continue;
|
||||
}
|
||||
if (char === quoteChar) {
|
||||
quoteChar = null;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '"' || char === "'") {
|
||||
quoteChar = char;
|
||||
continue;
|
||||
}
|
||||
if (char === "<") {
|
||||
return -1;
|
||||
}
|
||||
if (char === ">") {
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
function looksLikeToolCallPayloadStart(text: string, start: number): boolean {
|
||||
return TOOL_CALL_JSON_PAYLOAD_START_RE.test(text.slice(start));
|
||||
}
|
||||
|
||||
function parseToolCallTagAt(text: string, start: number): ParsedToolCallTag | null {
|
||||
if (text[start] !== "<") {
|
||||
return null;
|
||||
}
|
||||
|
||||
let cursor = start + 1;
|
||||
while (cursor < text.length && /\s/.test(text[cursor])) {
|
||||
cursor += 1;
|
||||
}
|
||||
|
||||
let isClose = false;
|
||||
if (text[cursor] === "/") {
|
||||
isClose = true;
|
||||
cursor += 1;
|
||||
while (cursor < text.length && /\s/.test(text[cursor])) {
|
||||
cursor += 1;
|
||||
}
|
||||
}
|
||||
|
||||
const nameStart = cursor;
|
||||
while (cursor < text.length && /[A-Za-z_]/.test(text[cursor])) {
|
||||
cursor += 1;
|
||||
}
|
||||
|
||||
const tagName = text.slice(nameStart, cursor).toLowerCase();
|
||||
if (!TOOL_CALL_TAG_NAMES.has(tagName) || !isToolCallBoundary(text[cursor])) {
|
||||
return null;
|
||||
}
|
||||
const contentStart = cursor;
|
||||
|
||||
const closeIndex = findTagCloseIndex(text, cursor);
|
||||
if (closeIndex === -1) {
|
||||
return {
|
||||
contentStart,
|
||||
end: text.length,
|
||||
isClose,
|
||||
isSelfClosing: false,
|
||||
tagName,
|
||||
isTruncated: true,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
contentStart,
|
||||
end: closeIndex + 1,
|
||||
isClose,
|
||||
isSelfClosing: !isClose && /\/\s*$/.test(text.slice(cursor, closeIndex)),
|
||||
tagName,
|
||||
isTruncated: false,
|
||||
};
|
||||
}
|
||||
|
||||
function stripToolCallXmlTags(text: string): string {
|
||||
if (!text || !TOOL_CALL_QUICK_RE.test(text)) {
|
||||
return text;
|
||||
}
|
||||
|
||||
const codeRegions = findCodeRegions(text);
|
||||
let result = "";
|
||||
let lastIndex = 0;
|
||||
let inToolCallBlock = false;
|
||||
let toolCallContentStart = 0;
|
||||
let toolCallBlockTagName: string | null = null;
|
||||
const visibleTagBalance = new Map<string, number>();
|
||||
|
||||
for (let idx = 0; idx < text.length; idx += 1) {
|
||||
if (text[idx] !== "<") {
|
||||
continue;
|
||||
}
|
||||
if (!inToolCallBlock && isInsideCode(idx, codeRegions)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tag = parseToolCallTagAt(text, idx);
|
||||
if (!tag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inToolCallBlock) {
|
||||
result += text.slice(lastIndex, idx);
|
||||
if (tag.isClose) {
|
||||
if (tag.isTruncated) {
|
||||
const preserveEnd = tag.contentStart;
|
||||
result += text.slice(idx, preserveEnd);
|
||||
lastIndex = preserveEnd;
|
||||
idx = Math.max(idx, preserveEnd - 1);
|
||||
continue;
|
||||
}
|
||||
const balance = visibleTagBalance.get(tag.tagName) ?? 0;
|
||||
if (balance > 0) {
|
||||
result += text.slice(idx, tag.end);
|
||||
visibleTagBalance.set(tag.tagName, balance - 1);
|
||||
}
|
||||
lastIndex = tag.end;
|
||||
idx = Math.max(idx, tag.end - 1);
|
||||
continue;
|
||||
}
|
||||
if (tag.isSelfClosing) {
|
||||
lastIndex = tag.end;
|
||||
idx = Math.max(idx, tag.end - 1);
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
!tag.isClose &&
|
||||
looksLikeToolCallPayloadStart(text, tag.isTruncated ? tag.contentStart : tag.end)
|
||||
) {
|
||||
inToolCallBlock = true;
|
||||
toolCallContentStart = tag.end;
|
||||
toolCallBlockTagName = tag.tagName;
|
||||
if (tag.isTruncated) {
|
||||
lastIndex = text.length;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
const preserveEnd = tag.isTruncated ? tag.contentStart : tag.end;
|
||||
result += text.slice(idx, preserveEnd);
|
||||
if (!tag.isTruncated) {
|
||||
visibleTagBalance.set(tag.tagName, (visibleTagBalance.get(tag.tagName) ?? 0) + 1);
|
||||
}
|
||||
lastIndex = preserveEnd;
|
||||
idx = Math.max(idx, preserveEnd - 1);
|
||||
continue;
|
||||
}
|
||||
} else if (
|
||||
tag.isClose &&
|
||||
tag.tagName === toolCallBlockTagName &&
|
||||
!endsInsideQuotedString(text, toolCallContentStart, idx)
|
||||
) {
|
||||
inToolCallBlock = false;
|
||||
toolCallBlockTagName = null;
|
||||
}
|
||||
|
||||
lastIndex = tag.end;
|
||||
idx = Math.max(idx, tag.end - 1);
|
||||
}
|
||||
|
||||
if (!inToolCallBlock) {
|
||||
result += text.slice(lastIndex);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function stripRelevantMemoriesTags(text: string): string {
|
||||
if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
|
||||
return text;
|
||||
@@ -43,5 +281,8 @@ function stripRelevantMemoriesTags(text: string): string {
|
||||
|
||||
export function stripAssistantInternalScaffolding(text: string): string {
|
||||
const withoutReasoning = stripReasoningTagsFromText(text, { mode: "preserve", trim: "start" });
|
||||
return stripRelevantMemoriesTags(withoutReasoning).trimStart();
|
||||
const withoutMemories = stripRelevantMemoriesTags(withoutReasoning);
|
||||
const withoutToolCalls = stripToolCallXmlTags(withoutMemories);
|
||||
const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
|
||||
return withoutSpecialTokens.trimStart();
|
||||
}
|
||||
|
||||
47
src/shared/text/model-special-tokens.ts
Normal file
47
src/shared/text/model-special-tokens.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
/**
|
||||
* Strip model control tokens leaked into assistant text output.
|
||||
*
|
||||
* Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens
|
||||
* (e.g. `<|assistant|>`, `<|tool_call_result_begin|>`, `<|begin▁of▁sentence|>`)
|
||||
* in their responses. These use the universal `<|...|>` convention (ASCII or
|
||||
* full-width pipe variants) and should never reach end users.
|
||||
*
|
||||
* Matches inside fenced code blocks or inline code spans are preserved so
|
||||
* that documentation / examples that reference these tokens are not corrupted.
|
||||
*
|
||||
* This is a provider bug — no upstream fix tracked yet.
|
||||
* Remove this function when upstream providers stop leaking tokens.
|
||||
* @see https://github.com/openclaw/openclaw/issues/40020
|
||||
*/
|
||||
import { findCodeRegions, isInsideCode } from "./code-regions.js";
|
||||
|
||||
// Match both ASCII pipe <|...|> and full-width pipe <|...|> (U+FF5C) variants.
|
||||
const MODEL_SPECIAL_TOKEN_RE = /<[||][^||]*[||]>/g;
|
||||
|
||||
function overlapsCodeRegion(
|
||||
start: number,
|
||||
end: number,
|
||||
codeRegions: { start: number; end: number }[],
|
||||
): boolean {
|
||||
return codeRegions.some((region) => start < region.end && end > region.start);
|
||||
}
|
||||
|
||||
export function stripModelSpecialTokens(text: string): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
|
||||
if (!MODEL_SPECIAL_TOKEN_RE.test(text)) {
|
||||
return text;
|
||||
}
|
||||
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
|
||||
|
||||
const codeRegions = findCodeRegions(text);
|
||||
return text.replace(MODEL_SPECIAL_TOKEN_RE, (match, offset) => {
|
||||
const start = offset;
|
||||
const end = start + match.length;
|
||||
return isInsideCode(start, codeRegions) || overlapsCodeRegion(start, end, codeRegions)
|
||||
? match
|
||||
: " ";
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user