fix: strip legacy tool-call text from replies

2026-05-06 07:00:43 +00:00 · 2026-05-02 02:38:16 +01:00
parent 9cbd07a9bf
commit 0ecda680c8
9 changed files with 152 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Replies: strip legacy `[TOOL_CALL]{tool => ..., args => ...}[/TOOL_CALL]` pseudo-call text from user-facing replies and flag it in tool-call diagnostics instead of showing raw tool syntax in channels. Fixes #63610. Thanks @canh0chua.
 - WhatsApp: close long-lived web sockets through Baileys `end(error)` before falling back to raw websocket close, so listener teardown runs Baileys cleanup instead of leaving zombie sockets. Fixes #52442. Thanks @essendigitalgroup-cyber.
 - Gateway/sessions: move hot transcript reads and mirror appends onto async bounded IO with serialized parent-linked writes, keeping large session histories from stalling Gateway requests and channel replies. Fixes #75656. Thanks @DerFlash.
 - macOS/Voice Wake: accept trigger-only phrases in the built-in Voice Wake test, matching the settings UI and runtime trigger-only path instead of requiring extra command text after the wake word. Fixes #64986. Thanks @zoiks65.
--- a/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts
+++ b/extensions/whatsapp/src/auto-reply/deliver-reply.test.ts
@@ -271,6 +271,28 @@ describe("deliverWebReply", () => {
    expect(vi.mocked(msg.reply).mock.calls[0]?.[0]).toBe("Before\n\nAfter\n");
  });

+  it("strips legacy uppercase TOOL_CALL text before WhatsApp text delivery", async () => {
+    const msg = makeMsg();
+
+    await deliverWebReply({
+      replyResult: {
+        text: [
+          "Before",
+          '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]',
+          "After",
+        ].join("\n"),
+      },
+      msg,
+      maxMediaBytes: 1024 * 1024,
+      textLimit: 4000,
+      replyLogger,
+      skipLog: true,
+    });
+
+    expect(msg.reply).toHaveBeenCalledTimes(1);
+    expect(vi.mocked(msg.reply).mock.calls[0]?.[0]).toBe("Before\n\nAfter");
+  });
+
  it("keeps quote threading on every text chunk for a threaded reply", async () => {
    const msg = makeMsg();
    cacheInboundMessageMeta("work", "15551234567@s.whatsapp.net", "reply-1", {
--- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts
+++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts
@@ -217,6 +217,16 @@ describe("sanitizeUserFacingText", () => {
    expect(sanitizeUserFacingText("A\n[tool calls omitted]\n[tool calls omitted]\nB")).toBe("A\nB");
  });

+  it("strips legacy uppercase TOOL_CALL blocks before user-facing delivery", () => {
+    const input = [
+      "Before",
+      '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]',
+      "After",
+    ].join("\n");
+
+    expect(sanitizeUserFacingText(input)).toBe("Before\n\nAfter");
+  });
+
  it("keeps ordinary inline mentions of the replay placeholder", () => {
    expect(sanitizeUserFacingText("What does [tool calls omitted] mean?")).toBe(
      "What does [tool calls omitted] mean?",
--- a/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts
+++ b/src/agents/pi-embedded-helpers/sanitize-user-facing-text.ts
@@ -12,6 +12,7 @@ import {
  normalizeLowercaseStringOrEmpty,
  normalizeOptionalLowercaseString,
 } from "../../shared/string-coerce.js";
+import { stripLegacyBracketToolCallBlocks } from "../../shared/text/assistant-visible-text.js";
 import { formatExecDeniedUserMessage } from "../exec-approval-result.js";
 import { stripInternalRuntimeContext } from "../internal-runtime-context.js";
 import { stableStringify } from "../stable-stringify.js";
@@ -404,7 +405,8 @@ export function sanitizeUserFacingText(text: unknown, opts?: { errorContext?: bo
  // It is internal scaffolding, so drop standalone placeholder lines before delivery
  // while preserving ordinary inline mentions a user may be discussing.
  const withoutPlaceholder = stripToolCallsOmittedPlaceholderLines(stripped);
-  const trimmed = withoutPlaceholder.trim();
+  const withoutToolCallBlocks = stripLegacyBracketToolCallBlocks(withoutPlaceholder);
+  const trimmed = withoutToolCallBlocks.trim();
  if (!trimmed) {
    return "";
  }
@@ -467,6 +469,6 @@ export function sanitizeUserFacingText(text: unknown, opts?: { errorContext?: bo
    }
  }

-  const withoutLeadingEmptyLines = withoutPlaceholder.replace(/^(?:[ \t]*\r?\n)+/, "");
+  const withoutLeadingEmptyLines = withoutToolCallBlocks.replace(/^(?:[ \t]*\r?\n)+/, "");
  return collapseConsecutiveDuplicateBlocks(withoutLeadingEmptyLines);
 }
--- a/src/auto-reply/reply/reply-utils.test.ts
+++ b/src/auto-reply/reply/reply-utils.test.ts
@@ -208,6 +208,19 @@ describe("normalizeReplyPayload", () => {
    expect(result!.mediaUrl).toBe("https://example.com/img.png");
  });

+  it("strips legacy uppercase TOOL_CALL blocks from normalized replies", () => {
+    const result = normalizeReplyPayload({
+      text: [
+        "Before",
+        '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]',
+        "After",
+      ].join("\n"),
+    });
+
+    expect(result).not.toBeNull();
+    expect(result!.text).toBe("Before\n\nAfter");
+  });
+
  it("does not compile Slack directives unless interactive replies are enabled", () => {
    const result = normalizeReplyPayload({
      text: "hello [[slack_buttons: Retry:retry, Ignore:ignore]]",
--- a/src/shared/text/assistant-visible-text.test.ts
+++ b/src/shared/text/assistant-visible-text.test.ts
@@ -179,6 +179,41 @@ describe("stripAssistantInternalScaffolding", () => {
      );
    });

+    it("strips legacy uppercase TOOL_CALL blocks with hash-style payloads", () => {
+      expectVisibleText(
+        [
+          "Before",
+          '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]',
+          "After",
+        ].join("\n"),
+        "Before\n\nAfter",
+      );
+    });
+
+    it("hides dangling legacy uppercase TOOL_CALL blocks to end-of-string", () => {
+      expectVisibleText(
+        'Before\n[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}',
+        "Before\n",
+      );
+    });
+
+    it("preserves literal legacy TOOL_CALL examples without tool args payloads", () => {
+      expectVisibleText(
+        "Use `[TOOL_CALL]` only when describing legacy logs.",
+        "Use `[TOOL_CALL]` only when describing legacy logs.",
+      );
+    });
+
+    it("preserves legacy uppercase TOOL_CALL blocks inside fenced code", () => {
+      const input = [
+        "```text",
+        '[TOOL_CALL]{tool => "web_search", args => {"query":"x"}}[/TOOL_CALL]',
+        "```",
+        "Visible",
+      ].join("\n");
+      expectVisibleText(input, input);
+    });
+
    it("strips Qwen-style <tool_call> with nested <function=...> XML", () => {
      expectVisibleText(
        "prefix\n<tool_call><function=read><parameter=path>/home/user</parameter></function></tool_call>\nsuffix",
--- a/src/shared/text/assistant-visible-text.ts
+++ b/src/shared/text/assistant-visible-text.ts
@@ -10,6 +10,7 @@ import {

 const MEMORY_TAG_RE = /<\s*(\/?)\s*relevant[-_]memories\b[^<>]*>/gi;
 const MEMORY_TAG_QUICK_RE = /<\s*\/?\s*relevant[-_]memories\b/i;
+const LEGACY_BRACKET_TOOL_CALL_QUICK_RE = /\[\s*\/?\s*TOOL_CALL\s*\]/i;

 /**
 * Strip XML-style tool call tags that models sometimes emit as plain text.
@@ -353,6 +354,55 @@ export function stripMinimaxToolCallXml(text: string): string {
  return cleaned;
 }

+function isLegacyBracketToolCallPayload(value: string): boolean {
+  return (
+    /\btool\s*=>\s*["'][A-Za-z_][A-Za-z0-9_.:-]{0,119}["']/i.test(value) &&
+    /\bargs\s*=>/i.test(value)
+  );
+}
+
+export function stripLegacyBracketToolCallBlocks(text: string): string {
+  if (!text || !LEGACY_BRACKET_TOOL_CALL_QUICK_RE.test(text)) {
+    return text;
+  }
+
+  const codeRegions = findCodeRegions(text);
+  let result = "";
+  let cursor = 0;
+  while (cursor < text.length) {
+    const openMatch = /\[\s*TOOL_CALL\s*\]/gi.exec(text.slice(cursor));
+    if (!openMatch?.[0]) {
+      result += text.slice(cursor);
+      break;
+    }
+    const openStart = cursor + (openMatch.index ?? 0);
+    const payloadStart = openStart + openMatch[0].length;
+    if (isInsideCode(openStart, codeRegions)) {
+      result += text.slice(cursor, payloadStart);
+      cursor = payloadStart;
+      continue;
+    }
+
+    const closeMatch = /\[\s*\/\s*TOOL_CALL\s*\]/gi.exec(text.slice(payloadStart));
+    const closeStart =
+      closeMatch?.[0] && !isInsideCode(payloadStart + (closeMatch.index ?? 0), codeRegions)
+        ? payloadStart + (closeMatch.index ?? 0)
+        : -1;
+    const payloadEnd = closeStart >= 0 ? closeStart : text.length;
+    const payload = text.slice(payloadStart, payloadEnd);
+    if (!isLegacyBracketToolCallPayload(payload)) {
+      result += text.slice(cursor, payloadStart);
+      cursor = payloadStart;
+      continue;
+    }
+
+    result += text.slice(cursor, openStart);
+    cursor = closeStart >= 0 ? closeStart + (closeMatch?.[0].length ?? 0) : text.length;
+  }
+
+  return result;
+}
+
 /**
 * Strip downgraded tool call text representations that leak into user-visible
 * text content when replaying history across providers.
@@ -621,6 +671,7 @@ function applyAssistantVisibleTextStagePipeline(
    cleaned = stripToolCallXmlTags(cleaned, {
      stripFunctionCallsXmlPayloads: options.stripFunctionCallsXmlPayloads,
    });
+    cleaned = stripLegacyBracketToolCallBlocks(cleaned);
    cleaned = stripPlainTextToolCallBlocks(cleaned);
    if (!options.preserveDowngradedToolText) {
      cleaned = stripDowngradedToolCallText(cleaned);
--- a/src/shared/text/tool-call-shaped-text.test.ts
+++ b/src/shared/text/tool-call-shaped-text.test.ts
@@ -29,6 +29,14 @@ describe("detectToolCallShapedText", () => {
    });
  });

+  it("detects legacy uppercase TOOL_CALL assistant text", () => {
+    expect(
+      detectToolCallShapedText(
+        '[TOOL_CALL]{tool => "web_search", args => {"query":"NET stock price"}}[/TOOL_CALL]',
+      ),
+    ).toEqual({ kind: "bracketed_tool_call", toolName: "web_search" });
+  });
+
  it("ignores normal JSON and prose mentions", () => {
    expect(detectToolCallShapedText('{"status":"ok","message":"done"}')).toBeNull();
    expect(detectToolCallShapedText("Use tool_call tags only in examples.")).toBeNull();
--- a/src/shared/text/tool-call-shaped-text.ts
+++ b/src/shared/text/tool-call-shaped-text.ts
@@ -199,6 +199,14 @@ function detectXmlToolCall(text: string): ToolCallShapedTextDetection | null {
 }

 function detectBracketedToolCall(text: string): ToolCallShapedTextDetection | null {
+  const legacyMatch =
+    /\[\s*TOOL_CALL\s*\]\s*{[\s\S]{0,8000}?\btool\s*=>\s*["']([A-Za-z_][A-Za-z0-9_.:-]{0,119})["'][\s\S]{0,8000}?\bargs\s*=>[\s\S]*?(?:\[\s*\/\s*TOOL_CALL\s*\]|$)/i.exec(
+      text,
+    );
+  if (legacyMatch?.[1]) {
+    return { kind: "bracketed_tool_call", toolName: legacyMatch[1] };
+  }
+
  const match =
    /^\s*\[([A-Za-z_][A-Za-z0-9_.:-]{0,119})\]\s+[\s\S]*?\[END_TOOL_REQUEST\]\s*$/i.exec(text);
  if (!match?.[1]) {