From 0ecda680c8ca52896b0e1c8dcfc57c8b25f15795 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 02:38:16 +0100 Subject: [PATCH] fix: strip legacy tool-call text from replies --- CHANGELOG.md | 1 + .../src/auto-reply/deliver-reply.test.ts | 22 ++++++++ ...ded-helpers.sanitizeuserfacingtext.test.ts | 10 ++++ .../sanitize-user-facing-text.ts | 6 ++- src/auto-reply/reply/reply-utils.test.ts | 13 +++++ .../text/assistant-visible-text.test.ts | 35 +++++++++++++ src/shared/text/assistant-visible-text.ts | 51 +++++++++++++++++++ src/shared/text/tool-call-shaped-text.test.ts | 8 +++ src/shared/text/tool-call-shaped-text.ts | 8 +++ 9 files changed, 152 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ff8fe951d5..306441a4c02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Replies: strip legacy `[TOOL_CALL]{tool => ..., args => ...}[/TOOL_CALL]` pseudo-call text from user-facing replies and flag it in tool-call diagnostics instead of showing raw tool syntax in channels. Fixes #63610. Thanks @canh0chua. - WhatsApp: close long-lived web sockets through Baileys `end(error)` before falling back to raw websocket close, so listener teardown runs Baileys cleanup instead of leaving zombie sockets. Fixes #52442. Thanks @essendigitalgroup-cyber. - Gateway/sessions: move hot transcript reads and mirror appends onto async bounded IO with serialized parent-linked writes, keeping large session histories from stalling Gateway requests and channel replies. Fixes #75656. Thanks @DerFlash. - macOS/Voice Wake: accept trigger-only phrases in the built-in Voice Wake test, matching the settings UI and runtime trigger-only path instead of requiring extra command text after the wake word. Fixes #64986. Thanks @zoiks65. diff --git a/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts b/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts index 315868a3f45..d921163535a 100644 --- a/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts +++ b/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts @@ -271,6 +271,28 @@ describe("deliverWebReply", () => { expect(vi.mocked(msg.reply).mock.calls[0]?.[0]).toBe("Before\n\nAfter\n"); }); + it("strips legacy uppercase TOOL_CALL text before WhatsApp text delivery", async () => { + const msg = makeMsg(); + + await deliverWebReply({ + replyResult: { + text: [ + "Before", + '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]', + "After", + ].join("\n"), + }, + msg, + maxMediaBytes: 1024 * 1024, + textLimit: 4000, + replyLogger, + skipLog: true, + }); + + expect(msg.reply).toHaveBeenCalledTimes(1); + expect(vi.mocked(msg.reply).mock.calls[0]?.[0]).toBe("Before\n\nAfter"); + }); + it("keeps quote threading on every text chunk for a threaded reply", async () => { const msg = makeMsg(); cacheInboundMessageMeta("work", "15551234567@s.whatsapp.net", "reply-1", { diff --git a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts index e8aa4629ad4..f877b9e8150 100644 --- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts @@ -217,6 +217,16 @@ describe("sanitizeUserFacingText", () => { expect(sanitizeUserFacingText("A\n[tool calls omitted]\n[tool calls omitted]\nB")).toBe("A\nB"); }); + it("strips legacy uppercase TOOL_CALL blocks before user-facing delivery", () => { + const input = [ + "Before", + '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]', + "After", + ].join("\n"); + + expect(sanitizeUserFacingText(input)).toBe("Before\n\nAfter"); + }); + it("keeps ordinary inline mentions of the replay placeholder", () => { expect(sanitizeUserFacingText("What does [tool calls omitted] mean?")).toBe( "What does [tool calls omitted] mean?", diff --git a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts index 7c3553ec137..8afa434e561 100644 --- a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts +++ b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts @@ -12,6 +12,7 @@ import { normalizeLowercaseStringOrEmpty, normalizeOptionalLowercaseString, } from "../../shared/string-coerce.js"; +import { stripLegacyBracketToolCallBlocks } from "../../shared/text/assistant-visible-text.js"; import { formatExecDeniedUserMessage } from "../exec-approval-result.js"; import { stripInternalRuntimeContext } from "../internal-runtime-context.js"; import { stableStringify } from "../stable-stringify.js"; @@ -404,7 +405,8 @@ export function sanitizeUserFacingText(text: unknown, opts?: { errorContext?: bo // It is internal scaffolding, so drop standalone placeholder lines before delivery // while preserving ordinary inline mentions a user may be discussing. const withoutPlaceholder = stripToolCallsOmittedPlaceholderLines(stripped); - const trimmed = withoutPlaceholder.trim(); + const withoutToolCallBlocks = stripLegacyBracketToolCallBlocks(withoutPlaceholder); + const trimmed = withoutToolCallBlocks.trim(); if (!trimmed) { return ""; } @@ -467,6 +469,6 @@ export function sanitizeUserFacingText(text: unknown, opts?: { errorContext?: bo } } - const withoutLeadingEmptyLines = withoutPlaceholder.replace(/^(?:[ \t]*\r?\n)+/, ""); + const withoutLeadingEmptyLines = withoutToolCallBlocks.replace(/^(?:[ \t]*\r?\n)+/, ""); return collapseConsecutiveDuplicateBlocks(withoutLeadingEmptyLines); } diff --git a/src/auto-reply/reply/reply-utils.test.ts b/src/auto-reply/reply/reply-utils.test.ts index 7e7786eeb8c..0e386e7fba4 100644 --- a/src/auto-reply/reply/reply-utils.test.ts +++ b/src/auto-reply/reply/reply-utils.test.ts @@ -208,6 +208,19 @@ describe("normalizeReplyPayload", () => { expect(result!.mediaUrl).toBe("https://example.com/img.png"); }); + it("strips legacy uppercase TOOL_CALL blocks from normalized replies", () => { + const result = normalizeReplyPayload({ + text: [ + "Before", + '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]', + "After", + ].join("\n"), + }); + + expect(result).not.toBeNull(); + expect(result!.text).toBe("Before\n\nAfter"); + }); + it("does not compile Slack directives unless interactive replies are enabled", () => { const result = normalizeReplyPayload({ text: "hello [[slack_buttons: Retry:retry, Ignore:ignore]]", diff --git a/src/shared/text/assistant-visible-text.test.ts b/src/shared/text/assistant-visible-text.test.ts index 7dfb1392cd2..4b63e9221b8 100644 --- a/src/shared/text/assistant-visible-text.test.ts +++ b/src/shared/text/assistant-visible-text.test.ts @@ -179,6 +179,41 @@ describe("stripAssistantInternalScaffolding", () => { ); }); + it("strips legacy uppercase TOOL_CALL blocks with hash-style payloads", () => { + expectVisibleText( + [ + "Before", + '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]', + "After", + ].join("\n"), + "Before\n\nAfter", + ); + }); + + it("hides dangling legacy uppercase TOOL_CALL blocks to end-of-string", () => { + expectVisibleText( + 'Before\n[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}', + "Before\n", + ); + }); + + it("preserves literal legacy TOOL_CALL examples without tool args payloads", () => { + expectVisibleText( + "Use `[TOOL_CALL]` only when describing legacy logs.", + "Use `[TOOL_CALL]` only when describing legacy logs.", + ); + }); + + it("preserves legacy uppercase TOOL_CALL blocks inside fenced code", () => { + const input = [ + "```text", + '[TOOL_CALL]{tool => "web_search", args => {"query":"x"}}[/TOOL_CALL]', + "```", + "Visible", + ].join("\n"); + expectVisibleText(input, input); + }); + it("strips Qwen-style with nested XML", () => { expectVisibleText( "prefix\n/home/user\nsuffix", diff --git a/src/shared/text/assistant-visible-text.ts b/src/shared/text/assistant-visible-text.ts index 7d6822cb585..f6c3be0a8c4 100644 --- a/src/shared/text/assistant-visible-text.ts +++ b/src/shared/text/assistant-visible-text.ts @@ -10,6 +10,7 @@ import { const MEMORY_TAG_RE = /<\s*(\/?)\s*relevant[-_]memories\b[^<>]*>/gi; const MEMORY_TAG_QUICK_RE = /<\s*\/?\s*relevant[-_]memories\b/i; +const LEGACY_BRACKET_TOOL_CALL_QUICK_RE = /\[\s*\/?\s*TOOL_CALL\s*\]/i; /** * Strip XML-style tool call tags that models sometimes emit as plain text. @@ -353,6 +354,55 @@ export function stripMinimaxToolCallXml(text: string): string { return cleaned; } +function isLegacyBracketToolCallPayload(value: string): boolean { + return ( + /\btool\s*=>\s*["'][A-Za-z_][A-Za-z0-9_.:-]{0,119}["']/i.test(value) && + /\bargs\s*=>/i.test(value) + ); +} + +export function stripLegacyBracketToolCallBlocks(text: string): string { + if (!text || !LEGACY_BRACKET_TOOL_CALL_QUICK_RE.test(text)) { + return text; + } + + const codeRegions = findCodeRegions(text); + let result = ""; + let cursor = 0; + while (cursor < text.length) { + const openMatch = /\[\s*TOOL_CALL\s*\]/gi.exec(text.slice(cursor)); + if (!openMatch?.[0]) { + result += text.slice(cursor); + break; + } + const openStart = cursor + (openMatch.index ?? 0); + const payloadStart = openStart + openMatch[0].length; + if (isInsideCode(openStart, codeRegions)) { + result += text.slice(cursor, payloadStart); + cursor = payloadStart; + continue; + } + + const closeMatch = /\[\s*\/\s*TOOL_CALL\s*\]/gi.exec(text.slice(payloadStart)); + const closeStart = + closeMatch?.[0] && !isInsideCode(payloadStart + (closeMatch.index ?? 0), codeRegions) + ? payloadStart + (closeMatch.index ?? 0) + : -1; + const payloadEnd = closeStart >= 0 ? closeStart : text.length; + const payload = text.slice(payloadStart, payloadEnd); + if (!isLegacyBracketToolCallPayload(payload)) { + result += text.slice(cursor, payloadStart); + cursor = payloadStart; + continue; + } + + result += text.slice(cursor, openStart); + cursor = closeStart >= 0 ? closeStart + (closeMatch?.[0].length ?? 0) : text.length; + } + + return result; +} + /** * Strip downgraded tool call text representations that leak into user-visible * text content when replaying history across providers. @@ -621,6 +671,7 @@ function applyAssistantVisibleTextStagePipeline( cleaned = stripToolCallXmlTags(cleaned, { stripFunctionCallsXmlPayloads: options.stripFunctionCallsXmlPayloads, }); + cleaned = stripLegacyBracketToolCallBlocks(cleaned); cleaned = stripPlainTextToolCallBlocks(cleaned); if (!options.preserveDowngradedToolText) { cleaned = stripDowngradedToolCallText(cleaned); diff --git a/src/shared/text/tool-call-shaped-text.test.ts b/src/shared/text/tool-call-shaped-text.test.ts index c29db68bdb2..025603bfbb6 100644 --- a/src/shared/text/tool-call-shaped-text.test.ts +++ b/src/shared/text/tool-call-shaped-text.test.ts @@ -29,6 +29,14 @@ describe("detectToolCallShapedText", () => { }); }); + it("detects legacy uppercase TOOL_CALL assistant text", () => { + expect( + detectToolCallShapedText( + '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]', + ), + ).toEqual({ kind: "bracketed_tool_call", toolName: "web_search" }); + }); + it("ignores normal JSON and prose mentions", () => { expect(detectToolCallShapedText('{"status":"ok","message":"done"}')).toBeNull(); expect(detectToolCallShapedText("Use tool_call tags only in examples.")).toBeNull(); diff --git a/src/shared/text/tool-call-shaped-text.ts b/src/shared/text/tool-call-shaped-text.ts index 91a4bca1f29..2700eab6e4f 100644 --- a/src/shared/text/tool-call-shaped-text.ts +++ b/src/shared/text/tool-call-shaped-text.ts @@ -199,6 +199,14 @@ function detectXmlToolCall(text: string): ToolCallShapedTextDetection | null { } function detectBracketedToolCall(text: string): ToolCallShapedTextDetection | null { + const legacyMatch = + /\[\s*TOOL_CALL\s*\]\s*{[\s\S]{0,8000}?\btool\s*=>\s*["']([A-Za-z_][A-Za-z0-9_.:-]{0,119})["'][\s\S]{0,8000}?\bargs\s*=>[\s\S]*?(?:\[\s*\/\s*TOOL_CALL\s*\]|$)/i.exec( + text, + ); + if (legacyMatch?.[1]) { + return { kind: "bracketed_tool_call", toolName: legacyMatch[1] }; + } + const match = /^\s*\[([A-Za-z_][A-Za-z0-9_.:-]{0,119})\]\s+[\s\S]*?\[END_TOOL_REQUEST\]\s*$/i.exec(text); if (!match?.[1]) {