From f9181835e8700f3e2c56ebe7cdec050276f75458 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 09:14:44 +0100
Subject: [PATCH] fix(agents): warn on fake local tool calls

---
 CHANGELOG.md                                  |   1 +
 docs/gateway/local-models.md                  |   5 +
 ...bedded-subscribe.handlers.messages.test.ts |  56 ++++-
 ...pi-embedded-subscribe.handlers.messages.ts |   2 +
 ...mbedded-subscribe.tool-text-diagnostics.ts |  86 +++++++
 src/shared/text/tool-call-shaped-text.test.ts |  37 +++
 src/shared/text/tool-call-shaped-text.ts      | 232 ++++++++++++++++++
 7 files changed, 418 insertions(+), 1 deletion(-)
 create mode 100644 src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts
 create mode 100644 src/shared/text/tool-call-shaped-text.test.ts
 create mode 100644 src/shared/text/tool-call-shaped-text.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05b13a2d66a..98caeae33d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 - CLI/update: keep the automatic post-update completion refresh on the core-command tree so it no longer stages bundled plugin runtime deps before the Gateway restart path, avoiding `.24` update hangs and 1006 disconnect cascades. Fixes #72665. Thanks @sakalaboator and @He-Pin.
 - Agents/Bedrock: stop heartbeat runs from persisting blank user transcript turns and repair existing blank user text messages before replay, preventing AWS Bedrock `ContentBlock` blank-text validation failures. Fixes #72640 and #72622. Thanks @goldzulu.
 - Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357.
+- Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw.
 - LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow.
 - Docker/setup: route Docker onboarding defaults for host-side LM Studio and Ollama through `host.docker.internal` and add the Linux host-gateway mapping to the bundled Compose file, so containerized gateways can reach local providers without using container loopback. Fixes #68684; supersedes #68702. Thanks @safrano9999 and @skolez.
 - Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub.
diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md
index 2967a39ed50..65c52e39985 100644
--- a/docs/gateway/local-models.md
+++ b/docs/gateway/local-models.md
@@ -169,6 +169,11 @@ Compatibility notes for stricter OpenAI-compatible backends:
   those into real tool calls only when the name exactly matches a registered
   tool for the turn; otherwise the block is treated as unsupported text and is
   hidden from user-visible replies.
+- If a model emits JSON, XML, or ReAct-style text that looks like a tool call
+  but the provider did not emit a structured invocation, OpenClaw leaves it as
+  text and logs a warning with the run id, provider/model, detected pattern, and
+  tool name when available. Treat that as provider/model tool-call
+  incompatibility, not a completed tool run.
 - Some smaller or stricter local backends are unstable with OpenClaw's full
   agent-runtime prompt shape, especially when tool schemas are included. If the
   backend works for tiny direct `/v1/chat/completions` calls but fails on normal
diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
index 74de65e45c6..62e36cf6d44 100644
--- a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts
@@ -81,6 +81,8 @@ function createMessageEndContext(
     emitBlockReply?: ReturnType<typeof vi.fn>;
     finalizeAssistantTexts?: ReturnType<typeof vi.fn>;
     consumeReplyDirectives?: ReturnType<typeof vi.fn>;
+    warn?: ReturnType<typeof vi.fn>;
+    builtinToolNames?: ReadonlySet<string>;
     state?: Record<string, unknown>;
   } = {},
 ) {
@@ -118,7 +120,8 @@ function createMessageEndContext(
     noteLastAssistant: vi.fn(),
     recordAssistantUsage: vi.fn(),
     commitAssistantUsage: vi.fn(),
-    log: { debug: vi.fn(), warn: vi.fn() },
+    log: { debug: vi.fn(), warn: params.warn ?? vi.fn() },
+    builtinToolNames: params.builtinToolNames,
     stripBlockTags: (text: string) => text,
     finalizeAssistantTexts: params.finalizeAssistantTexts ?? vi.fn(),
     emitBlockReply: params.emitBlockReply ?? vi.fn(),
@@ -604,6 +607,57 @@ describe("handleMessageUpdate", () => {
 });
 
 describe("handleMessageEnd", () => {
+  it("warns when assistant text only pretends to call a registered tool", () => {
+    const warn = vi.fn();
+    const ctx = createMessageEndContext({
+      warn,
+      builtinToolNames: new Set(["read"]),
+    });
+
+    void handleMessageEnd(ctx, {
+      type: "message_end",
+      message: {
+        role: "assistant",
+        provider: "ollama",
+        model: "qwen-local",
+        content: [{ type: "text", text: '{"name":"read","arguments":{"path":"README.md"}}' }],
+        stopReason: "stop",
+      },
+    } as never);
+
+    expect(warn).toHaveBeenCalledWith(
+      "Assistant reply looks like a tool call, but no structured tool invocation was emitted; treating it as text.",
+      expect.objectContaining({
+        runId: "run-1",
+        sessionId: "session-1",
+        provider: "ollama",
+        model: "qwen-local",
+        pattern: "json_tool_call",
+        toolName: "read",
+        registeredTool: true,
+      }),
+    );
+  });
+
+  it("does not warn when the assistant emitted a structured tool call", () => {
+    const warn = vi.fn();
+    const ctx = createMessageEndContext({
+      warn,
+      builtinToolNames: new Set(["read"]),
+    });
+
+    void handleMessageEnd(ctx, {
+      type: "message_end",
+      message: {
+        role: "assistant",
+        content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
+        stopReason: "toolUse",
+      },
+    } as never);
+
+    expect(warn).not.toHaveBeenCalled();
+  });
+
   it("suppresses commentary-phase replies from user-visible output", () => {
     const onAgentEvent = vi.fn();
     const emitBlockReply = vi.fn();
diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts
index fc026b537e8..0a318715929 100644
--- a/src/agents/pi-embedded-subscribe.handlers.messages.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts
@@ -27,6 +27,7 @@ import type {
 } from "./pi-embedded-subscribe.handlers.types.js";
 import { isPromiseLike } from "./pi-embedded-subscribe.promise.js";
 import { appendRawStream } from "./pi-embedded-subscribe.raw-stream.js";
+import { warnIfAssistantEmittedToolText } from "./pi-embedded-subscribe.tool-text-diagnostics.js";
 import {
   extractAssistantText,
   extractAssistantThinking,
@@ -674,6 +675,7 @@ export function handleMessageEnd(
     rawText,
     rawThinking: extractAssistantThinking(assistantMessage),
   });
+  warnIfAssistantEmittedToolText(ctx, assistantMessage);
 
   const text = resolveSilentReplyFallbackText({
     text: ctx.stripBlockTags(rawVisibleText, { thinking: false, final: false }),
diff --git a/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts b/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts
new file mode 100644
index 00000000000..5f64d828563
--- /dev/null
+++ b/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts
@@ -0,0 +1,86 @@
+import type { AssistantMessage } from "@mariozechner/pi-ai";
+import { extractTextFromChatContent } from "../shared/chat-content.js";
+import { normalizeOptionalString } from "../shared/string-coerce.js";
+import { detectToolCallShapedText } from "../shared/text/tool-call-shaped-text.js";
+import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
+import { normalizeToolName } from "./tool-policy.js";
+
+function hasStructuredToolInvocation(message: AssistantMessage): boolean {
+  if (!Array.isArray(message.content)) {
+    return false;
+  }
+  return message.content.some((block) => {
+    if (!block || typeof block !== "object") {
+      return false;
+    }
+    const record = block as unknown as Record<string, unknown>;
+    const type = typeof record.type === "string" ? record.type.trim() : "";
+    if (
+      type === "toolCall" ||
+      type === "toolUse" ||
+      type === "tool_call" ||
+      type === "tool_use" ||
+      type === "functionCall" ||
+      type === "function_call"
+    ) {
+      return true;
+    }
+    return Array.isArray(record.tool_calls) || Array.isArray(record.toolCalls);
+  });
+}
+
+function extractAssistantTextForToolDiagnostics(message: AssistantMessage): string {
+  return (
+    extractTextFromChatContent(message.content, {
+      joinWith: "\n",
+      normalizeText: (text) => text.trim(),
+    }) ?? ""
+  );
+}
+
+function isRegisteredToolName(
+  toolName: string | undefined,
+  registeredToolNames: ReadonlySet<string> | undefined,
+): boolean | undefined {
+  if (!toolName || !registeredToolNames) {
+    return undefined;
+  }
+  const normalized = normalizeToolName(toolName);
+  for (const registeredToolName of registeredToolNames) {
+    if (normalizeToolName(registeredToolName) === normalized) {
+      return true;
+    }
+  }
+  return false;
+}
+
+export function warnIfAssistantEmittedToolText(
+  ctx: EmbeddedPiSubscribeContext,
+  assistantMessage: AssistantMessage,
+) {
+  if (hasStructuredToolInvocation(assistantMessage)) {
+    return;
+  }
+  const detection = detectToolCallShapedText(
+    extractAssistantTextForToolDiagnostics(assistantMessage),
+  );
+  if (!detection) {
+    return;
+  }
+  const provider = normalizeOptionalString((assistantMessage as { provider?: unknown }).provider);
+  const model = normalizeOptionalString((assistantMessage as { model?: unknown }).model);
+  const registeredTool = isRegisteredToolName(detection.toolName, ctx.builtinToolNames);
+  const sessionId = normalizeOptionalString((ctx.params.session as { id?: unknown }).id);
+  ctx.log.warn(
+    "Assistant reply looks like a tool call, but no structured tool invocation was emitted; treating it as text.",
+    {
+      runId: ctx.params.runId,
+      ...(sessionId ? { sessionId } : {}),
+      ...(provider ? { provider } : {}),
+      ...(model ? { model } : {}),
+      pattern: detection.kind,
+      ...(detection.toolName ? { toolName: detection.toolName } : {}),
+      ...(registeredTool !== undefined ? { registeredTool } : {}),
+    },
+  );
+}
diff --git a/src/shared/text/tool-call-shaped-text.test.ts b/src/shared/text/tool-call-shaped-text.test.ts
new file mode 100644
index 00000000000..c29db68bdb2
--- /dev/null
+++ b/src/shared/text/tool-call-shaped-text.test.ts
@@ -0,0 +1,37 @@
+import { describe, expect, it } from "vitest";
+import { detectToolCallShapedText } from "./tool-call-shaped-text.js";
+
+describe("detectToolCallShapedText", () => {
+  it("detects standalone OpenAI-style function-call JSON", () => {
+    expect(detectToolCallShapedText('{"name":"read","arguments":{"path":"README.md"}}')).toEqual({
+      kind: "json_tool_call",
+      toolName: "read",
+    });
+  });
+
+  it("detects fenced tool_calls JSON", () => {
+    expect(
+      detectToolCallShapedText(
+        '```json\n{"tool_calls":[{"function":{"name":"web_search","arguments":{"query":"x"}}}]}\n```',
+      ),
+    ).toEqual({ kind: "json_tool_call", toolName: "web_search" });
+  });
+
+  it("detects XML and ReAct-style tool text", () => {
+    expect(
+      detectToolCallShapedText(
+        "<tool_call><function=read><parameter=path>README.md</parameter></function></tool_call>",
+      ),
+    ).toEqual({ kind: "xml_tool_call", toolName: "read" });
+    expect(detectToolCallShapedText('Action: exec\nAction Input: {"command":"pwd"}')).toEqual({
+      kind: "react_action",
+      toolName: "exec",
+    });
+  });
+
+  it("ignores normal JSON and prose mentions", () => {
+    expect(detectToolCallShapedText('{"status":"ok","message":"done"}')).toBeNull();
+    expect(detectToolCallShapedText("Use tool_call tags only in examples.")).toBeNull();
+    expect(detectToolCallShapedText("Use <tool_call> to invoke tools.")).toBeNull();
+  });
+});
diff --git a/src/shared/text/tool-call-shaped-text.ts b/src/shared/text/tool-call-shaped-text.ts
new file mode 100644
index 00000000000..91a4bca1f29
--- /dev/null
+++ b/src/shared/text/tool-call-shaped-text.ts
@@ -0,0 +1,232 @@
+export type ToolCallShapedTextDetection = {
+  kind: "json_tool_call" | "xml_tool_call" | "bracketed_tool_call" | "react_action";
+  toolName?: string;
+};
+
+const TOOL_TEXT_PREFILTER_RE =
+  /(?:tool[_\s-]?calls?|function[_\s-]?call|["'](?:name|tool_name|function|arguments|args|input|parameters|tool_calls)["']|<\s*tool_call\b|Action\s*:|\[END_TOOL_REQUEST\])/i;
+const MAX_SCAN_CHARS = 20_000;
+const MAX_JSON_CANDIDATES = 20;
+const MAX_JSON_CANDIDATE_CHARS = 8_000;
+
+function asRecord(value: unknown): Record<string, unknown> | undefined {
+  return value && typeof value === "object" && !Array.isArray(value)
+    ? (value as Record<string, unknown>)
+    : undefined;
+}
+
+function readTrimmedString(value: unknown): string | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const trimmed = value.trim();
+  return trimmed ? trimmed : undefined;
+}
+
+function readToolName(record: Record<string, unknown>): string | undefined {
+  return (
+    readTrimmedString(record.name) ??
+    readTrimmedString(record.tool_name) ??
+    readTrimmedString(record.tool) ??
+    readTrimmedString(record.function_name)
+  );
+}
+
+function hasToolArgs(record: Record<string, unknown>): boolean {
+  return "arguments" in record || "args" in record || "input" in record || "parameters" in record;
+}
+
+function classifyJsonValue(value: unknown): ToolCallShapedTextDetection | null {
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      const detection = classifyJsonValue(item);
+      if (detection) {
+        return detection;
+      }
+    }
+    return null;
+  }
+
+  const record = asRecord(value);
+  if (!record) {
+    return null;
+  }
+
+  const toolCalls = record.tool_calls ?? record.toolCalls;
+  if (Array.isArray(toolCalls)) {
+    for (const toolCall of toolCalls) {
+      const detection = classifyJsonValue(toolCall);
+      if (detection) {
+        return detection;
+      }
+    }
+    return { kind: "json_tool_call" };
+  }
+
+  const functionRecord = asRecord(record.function);
+  if (functionRecord) {
+    const toolName = readToolName(functionRecord);
+    if (toolName && hasToolArgs(functionRecord)) {
+      return { kind: "json_tool_call", toolName };
+    }
+  }
+
+  const toolName = readToolName(record);
+  if (toolName && hasToolArgs(record)) {
+    return { kind: "json_tool_call", toolName };
+  }
+
+  const type = readTrimmedString(record.type)?.toLowerCase();
+  if (
+    toolName &&
+    (type === "tool_call" ||
+      type === "toolcall" ||
+      type === "tooluse" ||
+      type === "tool_use" ||
+      type === "function_call" ||
+      type === "functioncall")
+  ) {
+    return { kind: "json_tool_call", toolName };
+  }
+
+  return null;
+}
+
+function collectFencedJsonCandidates(text: string): string[] {
+  const candidates: string[] = [];
+  const fenceRe = /```(?:json|tool|tool_call|function_call)?[^\n\r]*[\r\n]([\s\S]*?)```/gi;
+  for (const match of text.matchAll(fenceRe)) {
+    const candidate = match[1]?.trim();
+    if (candidate && candidate.length <= MAX_JSON_CANDIDATE_CHARS) {
+      candidates.push(candidate);
+    }
+  }
+  return candidates;
+}
+
+function findBalancedJsonEnd(text: string, start: number): number | null {
+  const opening = text[start];
+  const closing = opening === "{" ? "}" : opening === "[" ? "]" : "";
+  if (!closing) {
+    return null;
+  }
+
+  const stack = [closing];
+  let inString = false;
+  let escaped = false;
+  for (let index = start + 1; index < text.length; index += 1) {
+    if (index - start > MAX_JSON_CANDIDATE_CHARS) {
+      return null;
+    }
+    const ch = text[index];
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch === "\\") {
+        escaped = true;
+      } else if (ch === '"') {
+        inString = false;
+      }
+      continue;
+    }
+    if (ch === '"') {
+      inString = true;
+      continue;
+    }
+    if (ch === "{" || ch === "[") {
+      stack.push(ch === "{" ? "}" : "]");
+      continue;
+    }
+    if (ch === "}" || ch === "]") {
+      if (stack.at(-1) !== ch) {
+        return null;
+      }
+      stack.pop();
+      if (stack.length === 0) {
+        return index + 1;
+      }
+    }
+  }
+  return null;
+}
+
+function collectBalancedJsonCandidates(text: string): string[] {
+  const candidates: string[] = [];
+  for (let index = 0; index < text.length && candidates.length < MAX_JSON_CANDIDATES; index += 1) {
+    const ch = text[index];
+    if (ch !== "{" && ch !== "[") {
+      continue;
+    }
+    const end = findBalancedJsonEnd(text, index);
+    if (end === null) {
+      continue;
+    }
+    const candidate = text.slice(index, end).trim();
+    if (candidate.length > 1) {
+      candidates.push(candidate);
+    }
+    index = end - 1;
+  }
+  return candidates;
+}
+
+function detectJsonToolCall(text: string): ToolCallShapedTextDetection | null {
+  const candidates = [...collectFencedJsonCandidates(text), ...collectBalancedJsonCandidates(text)];
+  for (const candidate of candidates) {
+    try {
+      const detection = classifyJsonValue(JSON.parse(candidate));
+      if (detection) {
+        return detection;
+      }
+    } catch {
+      // Text only needs to be diagnostic-grade; malformed JSON stays text.
+    }
+  }
+  return null;
+}
+
+function detectXmlToolCall(text: string): ToolCallShapedTextDetection | null {
+  if (!/<\s*tool_call\b/i.test(text)) {
+    return null;
+  }
+  if (!/<\s*function=/i.test(text) && !/["']name["']\s*:\s*["'][^"']{1,120}["']/i.test(text)) {
+    return null;
+  }
+  const toolName =
+    /<\s*function=([A-Za-z0-9_.:-]{1,120})\b/i.exec(text)?.[1] ??
+    /["']name["']\s*:\s*["']([^"']{1,120})["']/i.exec(text)?.[1]?.trim();
+  return { kind: "xml_tool_call", ...(toolName ? { toolName } : {}) };
+}
+
+function detectBracketedToolCall(text: string): ToolCallShapedTextDetection | null {
+  const match =
+    /^\s*\[([A-Za-z_][A-Za-z0-9_.:-]{0,119})\]\s+[\s\S]*?\[END_TOOL_REQUEST\]\s*$/i.exec(text);
+  if (!match?.[1]) {
+    return null;
+  }
+  return { kind: "bracketed_tool_call", toolName: match[1] };
+}
+
+function detectReactAction(text: string): ToolCallShapedTextDetection | null {
+  const match =
+    /(?:^|\n)\s*Action\s*:\s*([A-Za-z_][A-Za-z0-9_.:-]{0,119})\s*(?:\r?\n)+\s*Action Input\s*:/i.exec(
+      text,
+    );
+  if (!match?.[1]) {
+    return null;
+  }
+  return { kind: "react_action", toolName: match[1] };
+}
+
+export function detectToolCallShapedText(text: string): ToolCallShapedTextDetection | null {
+  const trimmed = text.slice(0, MAX_SCAN_CHARS).trim();
+  if (!trimmed || !TOOL_TEXT_PREFILTER_RE.test(trimmed)) {
+    return null;
+  }
+  return (
+    detectBracketedToolCall(trimmed) ??
+    detectXmlToolCall(trimmed) ??
+    detectJsonToolCall(trimmed) ??
+    detectReactAction(trimmed)
+  );
+}