fix: strip leaked outbound tool-call scaffolding (#60619)

Co-authored-by: Frank Yang <frank.ekn@gmail.com>
This commit is contained in:
oliviareid-svg
2026-04-05 02:02:36 +08:00
committed by GitHub
parent 0cf9c6ec95
commit 7ff90c516a
4 changed files with 558 additions and 23 deletions

View File

@@ -32,19 +32,12 @@ Docs: https://docs.openclaw.ai
- Prompt caching: keep prompt prefixes more reusable across transport fallback, deterministic MCP tool ordering, compaction, and embedded image history so follow-up turns hit cache more reliably. (#58036, #58037, #58038, #59054, #60603, #60691) Thanks @bcherny.
- Agents/cache: diagnostics: add prompt-cache break diagnostics, trace live cache scenarios through embedded runner paths, and show cache reuse explicitly in `openclaw status --verbose`. Thanks @vincentkoc.
- Agents/cache: stabilize cache-relevant system prompt fingerprints by normalizing equivalent structured prompt whitespace, line endings, hook-added system context, and runtime capability ordering so semantically unchanged prompts reuse KV/cache more reliably. Thanks @vincentkoc.
- Plugin SDK/config: export `OpenClawSchema` via `openclaw/plugin-sdk/config-schema` so external tooling can validate and introspect full `openclaw.json` config through a supported public subpath. (#60557) Thanks @feniix.
### Fixes
- Control UI/chat: add a per-session thinking-level picker in the chat header and mobile chat settings, and keep the browser bundle on UI-local thinking/session-key helpers so Safari no longer crashes on Node-only imports before rendering chat controls.
- Synology Chat/security: route webhook token comparison through the shared constant-time secret helper for consistency with other bundled plugins.
- Gateway/security: scope loopback browser-origin auth throttling by normalized origin so one localhost Control UI tab cannot lock out a different localhost browser origin after repeated auth failures.
- Node exec approvals: keep node-host `system.run` approvals bound to the prepared execution plan, so script-drift revalidation still runs after agent-side approval forwarding.
- Agents/pairing: merge completion announce delivery context with the requester session fallback so missing `to` still reaches the original channel, and include `operator.talk.secrets` in CLI default operator scopes for node-role device pairing approvals. (#56481) Thanks @maxpetrusenko.
- Models/MiniMax: honor `MINIMAX_API_HOST` for implicit bundled MiniMax provider catalogs so China-hosted API-key setups pick `api.minimaxi.com/anthropic` without manual provider config. (#34524) Thanks @caiqinghua.
- Usage/MiniMax: invert remaining-style `usage_percent` fields when MiniMax reports only remaining percentage data, so usage bars stop showing nearly-full remaining quota as nearly-exhausted usage. (#60254) Thanks @jwchmodx.
- Usage/MiniMax: prefer the chat-model `model_remains` entry and derive Coding Plan window labels from MiniMax interval timestamps so MiniMax usage snapshots stop picking zero-budget media rows and misreporting 4h windows as `5h`. (#52349) Thanks @IVY-AI-gif.
- Usage/MiniMax: let usage snapshots treat `minimax-portal` and MiniMax CN aliases as the same MiniMax quota surface, and prefer stored MiniMax OAuth before falling back to Coding Plan keys.
- MiniMax: advertise image input on bundled `MiniMax-M2.7` and `MiniMax-M2.7-highspeed` model definitions so image-capable flows can route through the M2.7 family correctly. (#54843) Thanks @MerlinMiao88888888.
- Agents/exec approvals: let `exec-approvals.json` agent security override stricter gateway tool defaults so approved subagents can use `security: "full"` without falling back to allowlist enforcement again. (#60310) Thanks @lml2468.
- Tasks/maintenance: mark stale cron runs and CLI tasks backed only by long-lived chat sessions as lost again so task cleanup does not keep dead work alive indefinitely. (#60310) Thanks @lml2468.
@@ -66,8 +59,6 @@ Docs: https://docs.openclaw.ai
- Matrix: recover more reliably when secret storage or recovery keys are missing by recreating secret storage during repair and backup reset, hold crypto snapshot locks during persistence, and surface explicit too-large attachment markers. (#59846, #59851, #60599, #60289)
- ACP/agents: inherit the target agent workspace for cross-agent ACP spawns and fall back safely when the inherited workspace no longer exists. (#58438) Thanks @zssggle-rgb.
- ACPX/Windows: preserve backslashes and absolute `.exe` paths in Claude CLI parsing, and fail fast on wrapper-script targets with guidance to use `cmd.exe /c`, `powershell.exe -File`, or `node <script>`. (#60689)
- Providers/OpenAI Codex: treat Codex CLI auth as the canonical source, stop persisting copied Codex OAuth secrets into `auth-profiles.json`, refresh expired Codex-managed tokens back into Codex storage, and keep OpenAI WebSocket fallback/cache paths stable across transport changes.
- Plugins/gateway: keep reserved admin RPC namespaces (`config.*`, `exec.approvals.*`, `wizard.*`, `update.*`) admin-only even for plugin-defined methods, and warn when a plugin tries to register a narrower scope.
- Gateway/Windows scheduled tasks: preserve Task Scheduler settings on reinstall, fail loudly when `/Run` does not start, and report fast failed restarts accurately instead of pretending they timed out after 60 seconds. (#59335) Thanks @tmimmanuel.
- Discord: keep REST, webhook, and monitor traffic on the configured proxy, preserve component-only media sends, honor `@everyone` and `@here` mention gates, keep ACK reactions on the active account, and split voice connect/playback timeouts so auto-join is more reliable. (#57465, #60361, #60345)
- WhatsApp: restore `channels.whatsapp.blockStreaming` and reset watchdog timeouts after reconnect so quiet chats stop falling into reconnect loops. (#60007, #60069)
@@ -82,7 +73,6 @@ Docs: https://docs.openclaw.ai
- Plugins/marketplace: block remote marketplace symlink escapes without breaking ordinary local marketplace install paths. (#60556) Thanks @eleqtrizit.
- Plugins/install: preserve unsafe override flags across linked plugin and hook-pack probes so local `--link` installs honor the documented override behavior. (#60624) Thanks @JerrettDavis.
- Config/All Settings: keep the raw config view intact when sensitive fields are blank instead of corrupting or dropping the rendered snapshot. (#28214) Thanks @solodmd.
- Google/cache: pass explicit `cachedContent` handles through direct Google transport params and agent extra params so prebuilt Gemini context caches can be targeted again. Thanks @vincentkoc.
- Security: preserve restrictive plugin-only tool allowlists, require owner access for `/allowlist add` and `/allowlist remove`, fail closed when `before_tool_call` hooks crash, block browser SSRF redirect bypasses earlier, and keep non-interactive auth-choice inference scoped to bundled and already-trusted plugins. (#58476, #59836, #59822, #58771, #59120)
- Exec approvals: reuse durable exact-command `allow-always` approvals in allowlist mode so identical reruns stop prompting, and tighten Windows interpreter/path approval handling so wrapper and malformed-path cases fail closed more consistently. (#59880, #59780, #58040, #59182)
- Agents/runtime: make default subagent allowlists, inherited skills/workspaces, and duplicate session-id resolution behave more predictably, and include value-shape hints in missing-parameter tool errors. (#59944, #59992, #59858, #55317)
@@ -93,10 +83,8 @@ Docs: https://docs.openclaw.ai
- Providers/GitHub Copilot: send IDE identity headers on runtime model requests and GitHub token exchange so IDE-authenticated Copilot runs stop failing with missing `Editor-Version`. (#60641) Thanks @VACInc and @vincentkoc.
- Model picker/providers: treat bundled BytePlus and Volcengine plan aliases as their native providers during setup, and expose their bundled standard/coding catalogs before auth so setup can suggest the right models. (#58819) Thanks @Luckymingxuan.
- Prompt caching: route Codex Responses and Anthropic Vertex through boundary-aware cache shaping, and report the actual outbound system prompt in cache traces so cache reuse and misses line up with what providers really receive. Thanks @vincentkoc.
- Google CLI: parse Gemini JSON `response` and `stats` usage fields so reply text and cache-aware token reporting work correctly in CLI-backed runs. (#60801) Thanks @vincentkoc.
- Agents/Kimi tool-call repair: preserve tool arguments that were already present on streamed tool calls when later malformed deltas fail reevaluation, while still dropping stale repair-only state before `toolcall_end`.
- MiniMax/pricing: keep bundled MiniMax highspeed pricing distinct in provider catalogs and preserve the lower M2.5 cache-read pricing when onboarding older MiniMax models. (#54214) Thanks @octo-patch.
- MiniMax/streaming: disable Anthropic-style thinking on MiniMax streaming requests so `reasoning_content` stops leaking into visible replies on M2.7 endpoints. (#55809) Thanks @moktamd.
- Agents/cache: preserve the full 3-turn prompt-cache image window across tool loops, keep colliding bundled MCP tool definitions deterministic, and reapply Anthropic Vertex cache shaping after payload hook replacements so KV/cache reuse stays stable. Thanks @vincentkoc.
- Device pairing: reject rotating device tokens into roles that were never approved during pairing, and keep reconnect role checks bounded to the paired device's approved role set. (#60462) Thanks @eleqtrizit.
- Mobile pairing/security: fail closed for internal `/pair` setup-code issuance, cleanup, and approval paths when gateway pairing scopes are missing, and keep approval-time requested-scope enforcement on the internal command path. (#55996) Thanks @coygeek.
@@ -119,10 +107,7 @@ Docs: https://docs.openclaw.ai
- Agents/subagents: honor allowlist validation, auth-profile handoff, and session override state when a subagent retries after `LiveSessionModelSwitchError`. (#58178) Thanks @openperf.
- Agents/exec: restore `host=node` routing for node-pinned and `host=auto` sessions, while still blocking sandboxed `auto` sessions from jumping to gateway. (#60788) Thanks @openperf.
- Agents/compaction: keep assistant tool calls and displaced tool results in the same compaction chunk so strict summarization providers stop rejecting orphaned tool pairs. (#58849) Thanks @openperf.
- Cron: suppress exact `NO_REPLY` sentinel direct-delivery payloads, keep silent direct replies from falling back into duplicate main-summary sends, and treat structured `deleteAfterRun` silent replies the same as text silent replies. (#45737) Thanks @openperf.
- Cron: keep exact silent-token detection case-insensitive again so mixed-case `NO_REPLY` outputs still stay silent in text and direct delivery paths. Thanks @obviyus.
- Core/approvals: share approval-not-found fallback classification through the narrow `plugin-sdk/error-runtime` seam so core `/approve` and Telegram stay aligned without widening `plugin-sdk/infra-runtime`. (#60932) Thanks @gumadeiras.
- Gateway/file handling: wrap extracted uploaded text and text-like attachment content as untrusted external content before forwarding it to models, reducing prompt-injection risk from user-supplied files. (#60277) Thanks @hxy91819.
- Outbound/sanitizer: strip leaked `<tool_call>`, `<function_calls>`, and model special tokens from shared user-visible assistant text, including truncated tool-call streams, so internal scaffolding no longer bleeds into replies across surfaces. (#60619) Thanks @oliviareid-svg.
## 2026.4.2
@@ -187,7 +172,6 @@ Docs: https://docs.openclaw.ai
- Browser/host inspection: keep static Chrome inspection helpers out of the activated browser runtime so `openclaw doctor browser` and related checks do not eagerly load the bundled browser plugin. (#59471) Thanks @vincentkoc.
- Browser/CDP: normalize trailing-dot localhost absolute-form hosts before loopback checks so remote CDP websocket URLs like `ws://localhost.:...` rewrite back to the configured remote host. (#59236) Thanks @mappel-nv.
- Browser/attach-only profiles: disconnect cached Playwright CDP sessions when stopping attach-only or remote CDP profiles, while still reporting never-started local managed profiles as not stopped. (#60097) Thanks @pedh.
- Browser/task cleanup: close tracked browser tabs and best-effort browser processes when cron-isolated agents and subagents finish, so background browser runs stop leaking orphaned sessions. (#60146) Thanks @BrianWang1990.
- Agents/output sanitization: strip namespaced `antml:thinking` blocks from user-visible text so Anthropic-style internal monologue tags do not leak into replies. (#59550) Thanks @obviyus.
- Kimi Coding/tools: normalize Anthropic tool payloads into the OpenAI-compatible function shape Kimi Coding expects so tool calls stop losing required arguments. (#59440) Thanks @obviyus.
- Image tool/paths: resolve relative local media paths against the agent `workspaceDir` instead of `process.cwd()` so inputs like `inbox/receipt.png` pass the local-path allowlist reliably. (#57222) Thanks Priyansh Gupta.
@@ -220,9 +204,6 @@ Docs: https://docs.openclaw.ai
- Browser/profiles: reject remote browser profile `cdpUrl` values that violate strict SSRF policy before saving config, with clearer validation errors for blocked endpoints. (#60477) Thanks @eleqtrizit.
- Browser/screenshots: stop sending `fromSurface: false` on CDP screenshots so managed Chrome 146+ browsers can capture images again. (#60682) Thanks @mvanhorn.
- Mattermost/slash commands: harden native slash-command callback token validation to use constant-time secret comparison, matching the existing interaction-token path.
- Agents/scheduling: route delayed follow-up requests toward cron only when cron is actually available, while keeping background `exec`/`process` guidance scoped to work that starts now. (#60811) Thanks @vincentkoc.
- Cron/security: reject unsafe custom `sessionTarget: "session:..."` IDs earlier during cron add, update, and execution so malformed custom session keys fail closed with clear errors.
- Feishu/cards: replace the legacy `wide_screen_mode` schema 1.x config with schema 2.0 `width_mode: "fill"` in interactive approval, launcher, markdown, and structured card builders so Feishu card sends stop failing with parse-card errors while preserving wide-card rendering. (#53395) Thanks @drvoss
## 2026.4.1
@@ -326,7 +307,6 @@ Docs: https://docs.openclaw.ai
- Control UI/agents: auto-load agent workspace files on initial Files panel open, and populate overview model/workspace/fallbacks from effective runtime agent metadata so defaulted models no longer show as `Not set`. (#56637) Thanks @dxsx84.
- Control UI/slash commands: make `/steer` and `/redirect` work from the chat command palette with visible pending state for active-run `/steer`, correct redirected-run tracking, and a single canonical `/steer` entry in the command menu. (#54625) Thanks @fuller-stack-dev.
- Cron/announce: preserve all deliverable text payloads for announce mode instead of collapsing to the last chunk, so multi-line cron reports deliver in full to Telegram forum topics.
- Cron/model overrides: keep explicit cron `payload.model` runs on the shared override fallback policy so they still inherit configured fallback chains without silently dropping to the agent primary model. (#58294) Thanks @aaronagent.
- Cron/isolated sessions: carry the full live-session provider, model, and auth-profile selection across retry restarts so cron jobs with model overrides no longer fail or loop on mid-run model-switch requests. (#57972) Thanks @issaba1.
- Diffs/config: preserve schema-shaped plugin config parsing from `diffsPluginConfigSchema.safeParse()`, so direct callers keep `defaults` and `security` sections instead of receiving flattened tool defaults. (#57904) Thanks @gumadeiras.
- Diffs: fall back to plain text when `lang` hints are invalid during diff render and viewer hydration, so bad or stale language values no longer break the diff viewer. (#57902) Thanks @gumadeiras.
@@ -633,7 +613,6 @@ Docs: https://docs.openclaw.ai
- Telegram/forum topics: keep native `/new` and `/reset` routed to the active topic by preserving the topic target on forum-thread command context. (#35963)
- Status/port diagnostics: treat single-process dual-stack loopback gateway listeners as healthy in `openclaw status --all`, suppressing false "port already in use" conflict warnings. (#53398) Thanks @DanWebb1949.
- CLI/Docker: treat loopback private-host CLI gateway connects as local for silent pairing auto-approval, while keeping remote backend and public-host CLI connects behind pairing. (#55113) Thanks @sar618.
- Slack/socket mode: mark the underlying socket client as shutting down before provider stop paths call Bolt teardown so stale-socket restarts stop leaking orphaned ping reconnect loops. (#56646) Thanks @hsiaoa.
## 2026.3.24

View File

@@ -1,5 +1,6 @@
import { describe, expect, it } from "vitest";
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
import { stripModelSpecialTokens } from "./model-special-tokens.js";
describe("stripAssistantInternalScaffolding", () => {
function expectVisibleText(input: string, expected: string) {
@@ -99,4 +100,271 @@ describe("stripAssistantInternalScaffolding", () => {
}
expectVisibleText(input, expected);
});
describe("tool-call XML stripping", () => {
it("strips closed <tool_call> blocks", () => {
expectVisibleText(
'Let me check.\n\n<tool_call> {"name": "read", "arguments": {"file_path": "test.md"}} </tool_call> after',
"Let me check.\n\n after",
);
});
it("strips closed <function_calls> blocks", () => {
expectVisibleText(
'Checking now. <function_calls>{"name": "exec", "args": {"cmd": "ls"}}</function_calls> Done.',
"Checking now. Done.",
);
});
it("hides dangling <tool_call> content to end-of-string", () => {
expectVisibleText(
'Let me run.\n<tool_call>\n{"name": "find", "arguments": {}}\n',
"Let me run.\n",
);
});
it("does not close early on </tool_call> text inside JSON strings", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"x","arguments":{"html":"<div></tool_call><span>leak</span>"}}',
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("does not close early on </tool_call> text inside single-quoted payload strings", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
"{'html':'</tool_call> leak','tail':'still hidden'}",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("does not close early on mismatched closing tool tags", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"read",',
"</function_calls>",
"still-hidden",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("hides truncated <tool_call openings that never reach >", () => {
expectVisibleText('prefix\n<tool_call\n{"name":"find","arguments":{}}', "prefix\n");
});
it("hides truncated <tool_call openings with attributes before JSON payload", () => {
expectVisibleText('prefix\n<tool_call name="find"\n{"arguments":{}}', "prefix\n");
});
it("preserves lone <tool_call> mentions in normal prose", () => {
expectVisibleText("Use <tool_call> to invoke tools.", "Use <tool_call> to invoke tools.");
});
it("strips self-closing <tool_call/> tags", () => {
expectVisibleText("prefix <tool_call/> suffix", "prefix suffix");
});
it("strips self-closing <function_calls .../> tags", () => {
expectVisibleText('prefix <function_calls name="x"/> suffix', "prefix suffix");
});
it("strips lone closing tool-call tags", () => {
expectVisibleText("prefix </tool_call> suffix", "prefix suffix");
expectVisibleText("prefix </function_calls> suffix", "prefix suffix");
});
it("preserves XML-style explanations after lone <tool_call> tags", () => {
expectVisibleText("Use <tool_call><arg> literally.", "Use <tool_call><arg> literally.");
});
it("preserves literal XML-style paired tool_call examples in prose", () => {
expectVisibleText(
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
"prefix <tool_call><arg>secret</arg></tool_call> suffix",
);
});
it("preserves machine-style XML payload examples in prose", () => {
expectVisibleText(
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
'prefix <function_calls><invoke name="find">secret</invoke></function_calls> suffix',
);
});
it("preserves non-tool tag names that share the tool_call prefix", () => {
expectVisibleText(
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
'prefix <tool_call-example>{"name":"read"}</tool_call-example> suffix',
);
});
it("preserves truncated <tool_call mentions in prose", () => {
expectVisibleText("Use <tool_call to invoke tools.", "Use <tool_call to invoke tools.");
});
it("preserves truncated <tool_call mentions with prose attributes", () => {
expectVisibleText(
'Use <tool_call name="find" to invoke tools.',
'Use <tool_call name="find" to invoke tools.',
);
});
it("still strips later JSON payloads after a truncated prose mention", () => {
expectVisibleText(
'Use <tool_call to invoke tools.\n<tool_call>{"name":"find"}</tool_call>',
"Use <tool_call to invoke tools.\n",
);
});
it("still strips later JSON payloads after a truncated closing-tag mention", () => {
expectVisibleText(
'Use </tool_call to explain tags.\n<tool_call>{"name":"find"}</tool_call>',
"Use </tool_call to explain tags.\n",
);
});
it("still closes a tool-call block when malformed payload opens a fenced code region", () => {
expectVisibleText(
[
"prefix",
"<tool_call>",
'{"name":"read",',
"```xml",
"<note>hi</note>",
"</tool_call>",
"suffix",
].join("\n"),
"prefix\n\nsuffix",
);
});
it("preserves truncated XML payload openings in prose", () => {
expectVisibleText(
'prefix\n<function_calls\n<invoke name="find">',
'prefix\n<function_calls\n<invoke name="find">',
);
});
it("hides truncated <function_calls openings with attributes before array payload", () => {
expectVisibleText('prefix\n<function_calls id="x"\n[{"name":"find"}]', "prefix\n");
});
it("preserves tool-call tags inside fenced code blocks", () => {
const input = [
"```xml",
'<tool_call> {"name": "find"} </tool_call>',
"```",
"",
"Visible text",
].join("\n");
expectVisibleText(input, input);
});
it("preserves inline code references to tool_call tags", () => {
expectVisibleText("Use `<tool_call>` to invoke tools.", "Use `<tool_call>` to invoke tools.");
});
});
describe("model special token stripping", () => {
it("strips Kimi/GLM special tokens in isolation", () => {
expectVisibleText("<|assistant|>Here is the answer<|end|>", "Here is the answer ");
});
it("strips full-width pipe DeepSeek tokens", () => {
expectVisibleText("<begin▁of▁sentence>Hello world", "Hello world");
});
it("strips special tokens mixed with normal text", () => {
expectVisibleText(
"Start <|tool_call_result_begin|>middle<|tool_call_result_end|> end",
"Start middle end",
);
});
it("preserves special-token-like syntax inside code blocks", () => {
expectVisibleText("Use <div>hello</div> in HTML", "Use <div>hello</div> in HTML");
});
it("strips special tokens combined with reasoning tags", () => {
const input = [
"<thinking>",
"internal reasoning",
"</thinking>",
"<|assistant|>Visible response",
].join("\n");
expectVisibleText(input, "Visible response");
});
it("preserves indentation in code blocks", () => {
const input = [
"<|assistant|>Here is the code:",
"",
"```python",
"def foo():",
" if True:",
" return 42",
"```",
].join("\n");
const expected = [
"Here is the code:",
"",
"```python",
"def foo():",
" if True:",
" return 42",
"```",
].join("\n");
expectVisibleText(input, expected);
});
it("preserves special tokens inside fenced code blocks", () => {
const input = [
"Here are the model tokens:",
"",
"```",
"<|assistant|>Hello<|end|>",
"```",
"",
"As you can see above.",
].join("\n");
expectVisibleText(input, input);
});
it("preserves special tokens inside inline code spans", () => {
expectVisibleText(
"The token `<|assistant|>` marks the start.",
"The token `<|assistant|>` marks the start.",
);
});
it("preserves malformed tokens that end inside inline code spans", () => {
expectVisibleText("Before <|token `code|>` after", "Before <|token `code|>` after");
});
it("preserves malformed tokens that end inside fenced code blocks", () => {
const input = ["Before <|token", "```js", "const x = 1;|>", "```", "after"].join("\n");
expectVisibleText(input, input);
});
it("resets special-token regex state between calls", () => {
expect(stripModelSpecialTokens("prefix <|assistant|>")).toBe("prefix ");
expect(stripModelSpecialTokens("<|assistant|>short")).toBe(" short");
});
});
});

View File

@@ -1,9 +1,247 @@
import { findCodeRegions, isInsideCode } from "./code-regions.js";
import { stripModelSpecialTokens } from "./model-special-tokens.js";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
const MEMORY_TAG_RE = /<\s*(\/?)\s*relevant[-_]memories\b[^<>]*>/gi;
const MEMORY_TAG_QUICK_RE = /<\s*\/?\s*relevant[-_]memories\b/i;
/**
* Strip XML-style tool call tags that models sometimes emit as plain text.
* This stateful pass hides content from an opening tag through the matching
* closing tag, or to end-of-string if the stream was truncated mid-tag.
*/
const TOOL_CALL_QUICK_RE = /<\s*\/?\s*(?:tool_call|function_calls?|tool_calls)\b/i;
const TOOL_CALL_TAG_NAMES = new Set(["tool_call", "function_call", "function_calls", "tool_calls"]);
const TOOL_CALL_JSON_PAYLOAD_START_RE =
/^(?:\s+[A-Za-z_:][-A-Za-z0-9_:.]*\s*=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))*\s*(?:\r?\n\s*)?[[{]/;
function endsInsideQuotedString(text: string, start: number, end: number): boolean {
let quoteChar: "'" | '"' | null = null;
let isEscaped = false;
for (let idx = start; idx < end; idx += 1) {
const char = text[idx];
if (quoteChar === null) {
if (char === '"' || char === "'") {
quoteChar = char;
}
continue;
}
if (isEscaped) {
isEscaped = false;
continue;
}
if (char === "\\") {
isEscaped = true;
continue;
}
if (char === quoteChar) {
quoteChar = null;
}
}
return quoteChar !== null;
}
interface ParsedToolCallTag {
contentStart: number;
end: number;
isClose: boolean;
isSelfClosing: boolean;
tagName: string;
isTruncated: boolean;
}
function isToolCallBoundary(char: string | undefined): boolean {
return !char || /\s/.test(char) || char === "/" || char === ">";
}
function findTagCloseIndex(text: string, start: number): number {
let quoteChar: "'" | '"' | null = null;
let isEscaped = false;
for (let idx = start; idx < text.length; idx += 1) {
const char = text[idx];
if (quoteChar !== null) {
if (isEscaped) {
isEscaped = false;
continue;
}
if (char === "\\") {
isEscaped = true;
continue;
}
if (char === quoteChar) {
quoteChar = null;
}
continue;
}
if (char === '"' || char === "'") {
quoteChar = char;
continue;
}
if (char === "<") {
return -1;
}
if (char === ">") {
return idx;
}
}
return -1;
}
function looksLikeToolCallPayloadStart(text: string, start: number): boolean {
return TOOL_CALL_JSON_PAYLOAD_START_RE.test(text.slice(start));
}
function parseToolCallTagAt(text: string, start: number): ParsedToolCallTag | null {
if (text[start] !== "<") {
return null;
}
let cursor = start + 1;
while (cursor < text.length && /\s/.test(text[cursor])) {
cursor += 1;
}
let isClose = false;
if (text[cursor] === "/") {
isClose = true;
cursor += 1;
while (cursor < text.length && /\s/.test(text[cursor])) {
cursor += 1;
}
}
const nameStart = cursor;
while (cursor < text.length && /[A-Za-z_]/.test(text[cursor])) {
cursor += 1;
}
const tagName = text.slice(nameStart, cursor).toLowerCase();
if (!TOOL_CALL_TAG_NAMES.has(tagName) || !isToolCallBoundary(text[cursor])) {
return null;
}
const contentStart = cursor;
const closeIndex = findTagCloseIndex(text, cursor);
if (closeIndex === -1) {
return {
contentStart,
end: text.length,
isClose,
isSelfClosing: false,
tagName,
isTruncated: true,
};
}
return {
contentStart,
end: closeIndex + 1,
isClose,
isSelfClosing: !isClose && /\/\s*$/.test(text.slice(cursor, closeIndex)),
tagName,
isTruncated: false,
};
}
function stripToolCallXmlTags(text: string): string {
if (!text || !TOOL_CALL_QUICK_RE.test(text)) {
return text;
}
const codeRegions = findCodeRegions(text);
let result = "";
let lastIndex = 0;
let inToolCallBlock = false;
let toolCallContentStart = 0;
let toolCallBlockTagName: string | null = null;
const visibleTagBalance = new Map<string, number>();
for (let idx = 0; idx < text.length; idx += 1) {
if (text[idx] !== "<") {
continue;
}
if (!inToolCallBlock && isInsideCode(idx, codeRegions)) {
continue;
}
const tag = parseToolCallTagAt(text, idx);
if (!tag) {
continue;
}
if (!inToolCallBlock) {
result += text.slice(lastIndex, idx);
if (tag.isClose) {
if (tag.isTruncated) {
const preserveEnd = tag.contentStart;
result += text.slice(idx, preserveEnd);
lastIndex = preserveEnd;
idx = Math.max(idx, preserveEnd - 1);
continue;
}
const balance = visibleTagBalance.get(tag.tagName) ?? 0;
if (balance > 0) {
result += text.slice(idx, tag.end);
visibleTagBalance.set(tag.tagName, balance - 1);
}
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
continue;
}
if (tag.isSelfClosing) {
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
continue;
}
if (
!tag.isClose &&
looksLikeToolCallPayloadStart(text, tag.isTruncated ? tag.contentStart : tag.end)
) {
inToolCallBlock = true;
toolCallContentStart = tag.end;
toolCallBlockTagName = tag.tagName;
if (tag.isTruncated) {
lastIndex = text.length;
break;
}
} else {
const preserveEnd = tag.isTruncated ? tag.contentStart : tag.end;
result += text.slice(idx, preserveEnd);
if (!tag.isTruncated) {
visibleTagBalance.set(tag.tagName, (visibleTagBalance.get(tag.tagName) ?? 0) + 1);
}
lastIndex = preserveEnd;
idx = Math.max(idx, preserveEnd - 1);
continue;
}
} else if (
tag.isClose &&
tag.tagName === toolCallBlockTagName &&
!endsInsideQuotedString(text, toolCallContentStart, idx)
) {
inToolCallBlock = false;
toolCallBlockTagName = null;
}
lastIndex = tag.end;
idx = Math.max(idx, tag.end - 1);
}
if (!inToolCallBlock) {
result += text.slice(lastIndex);
}
return result;
}
function stripRelevantMemoriesTags(text: string): string {
if (!text || !MEMORY_TAG_QUICK_RE.test(text)) {
return text;
@@ -43,5 +281,8 @@ function stripRelevantMemoriesTags(text: string): string {
export function stripAssistantInternalScaffolding(text: string): string {
const withoutReasoning = stripReasoningTagsFromText(text, { mode: "preserve", trim: "start" });
return stripRelevantMemoriesTags(withoutReasoning).trimStart();
const withoutMemories = stripRelevantMemoriesTags(withoutReasoning);
const withoutToolCalls = stripToolCallXmlTags(withoutMemories);
const withoutSpecialTokens = stripModelSpecialTokens(withoutToolCalls);
return withoutSpecialTokens.trimStart();
}

View File

@@ -0,0 +1,47 @@
/**
* Strip model control tokens leaked into assistant text output.
*
* Models like GLM-5 and DeepSeek sometimes emit internal delimiter tokens
* (e.g. `<|assistant|>`, `<|tool_call_result_begin|>`, `<begin▁of▁sentence>`)
* in their responses. These use the universal `<|...|>` convention (ASCII or
* full-width pipe variants) and should never reach end users.
*
* Matches inside fenced code blocks or inline code spans are preserved so
* that documentation / examples that reference these tokens are not corrupted.
*
* This is a provider bug — no upstream fix tracked yet.
* Remove this function when upstream providers stop leaking tokens.
* @see https://github.com/openclaw/openclaw/issues/40020
*/
import { findCodeRegions, isInsideCode } from "./code-regions.js";
// Match both ASCII pipe <|...|> and full-width pipe <...> (U+FF5C) variants.
const MODEL_SPECIAL_TOKEN_RE = /<[|][^|]*[|]>/g;
function overlapsCodeRegion(
start: number,
end: number,
codeRegions: { start: number; end: number }[],
): boolean {
return codeRegions.some((region) => start < region.end && end > region.start);
}
export function stripModelSpecialTokens(text: string): string {
if (!text) {
return text;
}
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
if (!MODEL_SPECIAL_TOKEN_RE.test(text)) {
return text;
}
MODEL_SPECIAL_TOKEN_RE.lastIndex = 0;
const codeRegions = findCodeRegions(text);
return text.replace(MODEL_SPECIAL_TOKEN_RE, (match, offset) => {
const start = offset;
const end = start + match.length;
return isInsideCode(start, codeRegions) || overlapsCodeRegion(start, end, codeRegions)
? match
: " ";
});
}