From f9181835e8700f3e2c56ebe7cdec050276f75458 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 09:14:44 +0100 Subject: [PATCH] fix(agents): warn on fake local tool calls --- CHANGELOG.md | 1 + docs/gateway/local-models.md | 5 + ...bedded-subscribe.handlers.messages.test.ts | 56 ++++- ...pi-embedded-subscribe.handlers.messages.ts | 2 + ...mbedded-subscribe.tool-text-diagnostics.ts | 86 +++++++ src/shared/text/tool-call-shaped-text.test.ts | 37 +++ src/shared/text/tool-call-shaped-text.ts | 232 ++++++++++++++++++ 7 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts create mode 100644 src/shared/text/tool-call-shaped-text.test.ts create mode 100644 src/shared/text/tool-call-shaped-text.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 05b13a2d66a..98caeae33d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai - CLI/update: keep the automatic post-update completion refresh on the core-command tree so it no longer stages bundled plugin runtime deps before the Gateway restart path, avoiding `.24` update hangs and 1006 disconnect cascades. Fixes #72665. Thanks @sakalaboator and @He-Pin. - Agents/Bedrock: stop heartbeat runs from persisting blank user transcript turns and repair existing blank user text messages before replay, preventing AWS Bedrock `ContentBlock` blank-text validation failures. Fixes #72640 and #72622. Thanks @goldzulu. - Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357. +- Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw. - LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow. - Docker/setup: route Docker onboarding defaults for host-side LM Studio and Ollama through `host.docker.internal` and add the Linux host-gateway mapping to the bundled Compose file, so containerized gateways can reach local providers without using container loopback. Fixes #68684; supersedes #68702. Thanks @safrano9999 and @skolez. - Agents/LM Studio: strip prior-turn Gemma 4 reasoning from OpenAI-compatible replay while preserving active tool-call continuation reasoning. Fixes #68704. Thanks @chip-snomo and @Kailigithub. diff --git a/docs/gateway/local-models.md b/docs/gateway/local-models.md index 2967a39ed50..65c52e39985 100644 --- a/docs/gateway/local-models.md +++ b/docs/gateway/local-models.md @@ -169,6 +169,11 @@ Compatibility notes for stricter OpenAI-compatible backends: those into real tool calls only when the name exactly matches a registered tool for the turn; otherwise the block is treated as unsupported text and is hidden from user-visible replies. +- If a model emits JSON, XML, or ReAct-style text that looks like a tool call + but the provider did not emit a structured invocation, OpenClaw leaves it as + text and logs a warning with the run id, provider/model, detected pattern, and + tool name when available. Treat that as provider/model tool-call + incompatibility, not a completed tool run. - Some smaller or stricter local backends are unstable with OpenClaw's full agent-runtime prompt shape, especially when tool schemas are included. If the backend works for tiny direct `/v1/chat/completions` calls but fails on normal diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts index 74de65e45c6..62e36cf6d44 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.test.ts @@ -81,6 +81,8 @@ function createMessageEndContext( emitBlockReply?: ReturnType; finalizeAssistantTexts?: ReturnType; consumeReplyDirectives?: ReturnType; + warn?: ReturnType; + builtinToolNames?: ReadonlySet; state?: Record; } = {}, ) { @@ -118,7 +120,8 @@ function createMessageEndContext( noteLastAssistant: vi.fn(), recordAssistantUsage: vi.fn(), commitAssistantUsage: vi.fn(), - log: { debug: vi.fn(), warn: vi.fn() }, + log: { debug: vi.fn(), warn: params.warn ?? vi.fn() }, + builtinToolNames: params.builtinToolNames, stripBlockTags: (text: string) => text, finalizeAssistantTexts: params.finalizeAssistantTexts ?? vi.fn(), emitBlockReply: params.emitBlockReply ?? vi.fn(), @@ -604,6 +607,57 @@ describe("handleMessageUpdate", () => { }); describe("handleMessageEnd", () => { + it("warns when assistant text only pretends to call a registered tool", () => { + const warn = vi.fn(); + const ctx = createMessageEndContext({ + warn, + builtinToolNames: new Set(["read"]), + }); + + void handleMessageEnd(ctx, { + type: "message_end", + message: { + role: "assistant", + provider: "ollama", + model: "qwen-local", + content: [{ type: "text", text: '{"name":"read","arguments":{"path":"README.md"}}' }], + stopReason: "stop", + }, + } as never); + + expect(warn).toHaveBeenCalledWith( + "Assistant reply looks like a tool call, but no structured tool invocation was emitted; treating it as text.", + expect.objectContaining({ + runId: "run-1", + sessionId: "session-1", + provider: "ollama", + model: "qwen-local", + pattern: "json_tool_call", + toolName: "read", + registeredTool: true, + }), + ); + }); + + it("does not warn when the assistant emitted a structured tool call", () => { + const warn = vi.fn(); + const ctx = createMessageEndContext({ + warn, + builtinToolNames: new Set(["read"]), + }); + + void handleMessageEnd(ctx, { + type: "message_end", + message: { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], + stopReason: "toolUse", + }, + } as never); + + expect(warn).not.toHaveBeenCalled(); + }); + it("suppresses commentary-phase replies from user-visible output", () => { const onAgentEvent = vi.fn(); const emitBlockReply = vi.fn(); diff --git a/src/agents/pi-embedded-subscribe.handlers.messages.ts b/src/agents/pi-embedded-subscribe.handlers.messages.ts index fc026b537e8..0a318715929 100644 --- a/src/agents/pi-embedded-subscribe.handlers.messages.ts +++ b/src/agents/pi-embedded-subscribe.handlers.messages.ts @@ -27,6 +27,7 @@ import type { } from "./pi-embedded-subscribe.handlers.types.js"; import { isPromiseLike } from "./pi-embedded-subscribe.promise.js"; import { appendRawStream } from "./pi-embedded-subscribe.raw-stream.js"; +import { warnIfAssistantEmittedToolText } from "./pi-embedded-subscribe.tool-text-diagnostics.js"; import { extractAssistantText, extractAssistantThinking, @@ -674,6 +675,7 @@ export function handleMessageEnd( rawText, rawThinking: extractAssistantThinking(assistantMessage), }); + warnIfAssistantEmittedToolText(ctx, assistantMessage); const text = resolveSilentReplyFallbackText({ text: ctx.stripBlockTags(rawVisibleText, { thinking: false, final: false }), diff --git a/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts b/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts new file mode 100644 index 00000000000..5f64d828563 --- /dev/null +++ b/src/agents/pi-embedded-subscribe.tool-text-diagnostics.ts @@ -0,0 +1,86 @@ +import type { AssistantMessage } from "@mariozechner/pi-ai"; +import { extractTextFromChatContent } from "../shared/chat-content.js"; +import { normalizeOptionalString } from "../shared/string-coerce.js"; +import { detectToolCallShapedText } from "../shared/text/tool-call-shaped-text.js"; +import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js"; +import { normalizeToolName } from "./tool-policy.js"; + +function hasStructuredToolInvocation(message: AssistantMessage): boolean { + if (!Array.isArray(message.content)) { + return false; + } + return message.content.some((block) => { + if (!block || typeof block !== "object") { + return false; + } + const record = block as unknown as Record; + const type = typeof record.type === "string" ? record.type.trim() : ""; + if ( + type === "toolCall" || + type === "toolUse" || + type === "tool_call" || + type === "tool_use" || + type === "functionCall" || + type === "function_call" + ) { + return true; + } + return Array.isArray(record.tool_calls) || Array.isArray(record.toolCalls); + }); +} + +function extractAssistantTextForToolDiagnostics(message: AssistantMessage): string { + return ( + extractTextFromChatContent(message.content, { + joinWith: "\n", + normalizeText: (text) => text.trim(), + }) ?? "" + ); +} + +function isRegisteredToolName( + toolName: string | undefined, + registeredToolNames: ReadonlySet | undefined, +): boolean | undefined { + if (!toolName || !registeredToolNames) { + return undefined; + } + const normalized = normalizeToolName(toolName); + for (const registeredToolName of registeredToolNames) { + if (normalizeToolName(registeredToolName) === normalized) { + return true; + } + } + return false; +} + +export function warnIfAssistantEmittedToolText( + ctx: EmbeddedPiSubscribeContext, + assistantMessage: AssistantMessage, +) { + if (hasStructuredToolInvocation(assistantMessage)) { + return; + } + const detection = detectToolCallShapedText( + extractAssistantTextForToolDiagnostics(assistantMessage), + ); + if (!detection) { + return; + } + const provider = normalizeOptionalString((assistantMessage as { provider?: unknown }).provider); + const model = normalizeOptionalString((assistantMessage as { model?: unknown }).model); + const registeredTool = isRegisteredToolName(detection.toolName, ctx.builtinToolNames); + const sessionId = normalizeOptionalString((ctx.params.session as { id?: unknown }).id); + ctx.log.warn( + "Assistant reply looks like a tool call, but no structured tool invocation was emitted; treating it as text.", + { + runId: ctx.params.runId, + ...(sessionId ? { sessionId } : {}), + ...(provider ? { provider } : {}), + ...(model ? { model } : {}), + pattern: detection.kind, + ...(detection.toolName ? { toolName: detection.toolName } : {}), + ...(registeredTool !== undefined ? { registeredTool } : {}), + }, + ); +} diff --git a/src/shared/text/tool-call-shaped-text.test.ts b/src/shared/text/tool-call-shaped-text.test.ts new file mode 100644 index 00000000000..c29db68bdb2 --- /dev/null +++ b/src/shared/text/tool-call-shaped-text.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "vitest"; +import { detectToolCallShapedText } from "./tool-call-shaped-text.js"; + +describe("detectToolCallShapedText", () => { + it("detects standalone OpenAI-style function-call JSON", () => { + expect(detectToolCallShapedText('{"name":"read","arguments":{"path":"README.md"}}')).toEqual({ + kind: "json_tool_call", + toolName: "read", + }); + }); + + it("detects fenced tool_calls JSON", () => { + expect( + detectToolCallShapedText( + '```json\n{"tool_calls":[{"function":{"name":"web_search","arguments":{"query":"x"}}}]}\n```', + ), + ).toEqual({ kind: "json_tool_call", toolName: "web_search" }); + }); + + it("detects XML and ReAct-style tool text", () => { + expect( + detectToolCallShapedText( + "README.md", + ), + ).toEqual({ kind: "xml_tool_call", toolName: "read" }); + expect(detectToolCallShapedText('Action: exec\nAction Input: {"command":"pwd"}')).toEqual({ + kind: "react_action", + toolName: "exec", + }); + }); + + it("ignores normal JSON and prose mentions", () => { + expect(detectToolCallShapedText('{"status":"ok","message":"done"}')).toBeNull(); + expect(detectToolCallShapedText("Use tool_call tags only in examples.")).toBeNull(); + expect(detectToolCallShapedText("Use to invoke tools.")).toBeNull(); + }); +}); diff --git a/src/shared/text/tool-call-shaped-text.ts b/src/shared/text/tool-call-shaped-text.ts new file mode 100644 index 00000000000..91a4bca1f29 --- /dev/null +++ b/src/shared/text/tool-call-shaped-text.ts @@ -0,0 +1,232 @@ +export type ToolCallShapedTextDetection = { + kind: "json_tool_call" | "xml_tool_call" | "bracketed_tool_call" | "react_action"; + toolName?: string; +}; + +const TOOL_TEXT_PREFILTER_RE = + /(?:tool[_\s-]?calls?|function[_\s-]?call|["'](?:name|tool_name|function|arguments|args|input|parameters|tool_calls)["']|<\s*tool_call\b|Action\s*:|\[END_TOOL_REQUEST\])/i; +const MAX_SCAN_CHARS = 20_000; +const MAX_JSON_CANDIDATES = 20; +const MAX_JSON_CANDIDATE_CHARS = 8_000; + +function asRecord(value: unknown): Record | undefined { + return value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function readTrimmedString(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +function readToolName(record: Record): string | undefined { + return ( + readTrimmedString(record.name) ?? + readTrimmedString(record.tool_name) ?? + readTrimmedString(record.tool) ?? + readTrimmedString(record.function_name) + ); +} + +function hasToolArgs(record: Record): boolean { + return "arguments" in record || "args" in record || "input" in record || "parameters" in record; +} + +function classifyJsonValue(value: unknown): ToolCallShapedTextDetection | null { + if (Array.isArray(value)) { + for (const item of value) { + const detection = classifyJsonValue(item); + if (detection) { + return detection; + } + } + return null; + } + + const record = asRecord(value); + if (!record) { + return null; + } + + const toolCalls = record.tool_calls ?? record.toolCalls; + if (Array.isArray(toolCalls)) { + for (const toolCall of toolCalls) { + const detection = classifyJsonValue(toolCall); + if (detection) { + return detection; + } + } + return { kind: "json_tool_call" }; + } + + const functionRecord = asRecord(record.function); + if (functionRecord) { + const toolName = readToolName(functionRecord); + if (toolName && hasToolArgs(functionRecord)) { + return { kind: "json_tool_call", toolName }; + } + } + + const toolName = readToolName(record); + if (toolName && hasToolArgs(record)) { + return { kind: "json_tool_call", toolName }; + } + + const type = readTrimmedString(record.type)?.toLowerCase(); + if ( + toolName && + (type === "tool_call" || + type === "toolcall" || + type === "tooluse" || + type === "tool_use" || + type === "function_call" || + type === "functioncall") + ) { + return { kind: "json_tool_call", toolName }; + } + + return null; +} + +function collectFencedJsonCandidates(text: string): string[] { + const candidates: string[] = []; + const fenceRe = /```(?:json|tool|tool_call|function_call)?[^\n\r]*[\r\n]([\s\S]*?)```/gi; + for (const match of text.matchAll(fenceRe)) { + const candidate = match[1]?.trim(); + if (candidate && candidate.length <= MAX_JSON_CANDIDATE_CHARS) { + candidates.push(candidate); + } + } + return candidates; +} + +function findBalancedJsonEnd(text: string, start: number): number | null { + const opening = text[start]; + const closing = opening === "{" ? "}" : opening === "[" ? "]" : ""; + if (!closing) { + return null; + } + + const stack = [closing]; + let inString = false; + let escaped = false; + for (let index = start + 1; index < text.length; index += 1) { + if (index - start > MAX_JSON_CANDIDATE_CHARS) { + return null; + } + const ch = text[index]; + if (inString) { + if (escaped) { + escaped = false; + } else if (ch === "\\") { + escaped = true; + } else if (ch === '"') { + inString = false; + } + continue; + } + if (ch === '"') { + inString = true; + continue; + } + if (ch === "{" || ch === "[") { + stack.push(ch === "{" ? "}" : "]"); + continue; + } + if (ch === "}" || ch === "]") { + if (stack.at(-1) !== ch) { + return null; + } + stack.pop(); + if (stack.length === 0) { + return index + 1; + } + } + } + return null; +} + +function collectBalancedJsonCandidates(text: string): string[] { + const candidates: string[] = []; + for (let index = 0; index < text.length && candidates.length < MAX_JSON_CANDIDATES; index += 1) { + const ch = text[index]; + if (ch !== "{" && ch !== "[") { + continue; + } + const end = findBalancedJsonEnd(text, index); + if (end === null) { + continue; + } + const candidate = text.slice(index, end).trim(); + if (candidate.length > 1) { + candidates.push(candidate); + } + index = end - 1; + } + return candidates; +} + +function detectJsonToolCall(text: string): ToolCallShapedTextDetection | null { + const candidates = [...collectFencedJsonCandidates(text), ...collectBalancedJsonCandidates(text)]; + for (const candidate of candidates) { + try { + const detection = classifyJsonValue(JSON.parse(candidate)); + if (detection) { + return detection; + } + } catch { + // Text only needs to be diagnostic-grade; malformed JSON stays text. + } + } + return null; +} + +function detectXmlToolCall(text: string): ToolCallShapedTextDetection | null { + if (!/<\s*tool_call\b/i.test(text)) { + return null; + } + if (!/<\s*function=/i.test(text) && !/["']name["']\s*:\s*["'][^"']{1,120}["']/i.test(text)) { + return null; + } + const toolName = + /<\s*function=([A-Za-z0-9_.:-]{1,120})\b/i.exec(text)?.[1] ?? + /["']name["']\s*:\s*["']([^"']{1,120})["']/i.exec(text)?.[1]?.trim(); + return { kind: "xml_tool_call", ...(toolName ? { toolName } : {}) }; +} + +function detectBracketedToolCall(text: string): ToolCallShapedTextDetection | null { + const match = + /^\s*\[([A-Za-z_][A-Za-z0-9_.:-]{0,119})\]\s+[\s\S]*?\[END_TOOL_REQUEST\]\s*$/i.exec(text); + if (!match?.[1]) { + return null; + } + return { kind: "bracketed_tool_call", toolName: match[1] }; +} + +function detectReactAction(text: string): ToolCallShapedTextDetection | null { + const match = + /(?:^|\n)\s*Action\s*:\s*([A-Za-z_][A-Za-z0-9_.:-]{0,119})\s*(?:\r?\n)+\s*Action Input\s*:/i.exec( + text, + ); + if (!match?.[1]) { + return null; + } + return { kind: "react_action", toolName: match[1] }; +} + +export function detectToolCallShapedText(text: string): ToolCallShapedTextDetection | null { + const trimmed = text.slice(0, MAX_SCAN_CHARS).trim(); + if (!trimmed || !TOOL_TEXT_PREFILTER_RE.test(trimmed)) { + return null; + } + return ( + detectBracketedToolCall(trimmed) ?? + detectXmlToolCall(trimmed) ?? + detectJsonToolCall(trimmed) ?? + detectReactAction(trimmed) + ); +}