From d8ee97c4668d565f05aeaefdf989fa8e8dfc97a0 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Thu, 12 Mar 2026 03:28:22 -0400 Subject: [PATCH] Agents: recover malformed Anthropic-compatible tool call args (#42835) * Agents: recover malformed anthropic tool call args * Agents: add malformed tool call regression test * Changelog: note Kimi tool call arg recovery * Agents: repair toolcall end message snapshots * Agents: narrow Kimi tool call arg repair --- CHANGELOG.md | 1 + .../pi-embedded-runner/run/attempt.test.ts | 132 +++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 261 ++++++++++++++++++ 3 files changed, 394 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce7e683df38..39da7caaf37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai - iMessage/self-chat echo dedupe: drop reflected duplicate copies only when a matching `is_from_me` event was just seen for the same chat, text, and `created_at`, preventing self-chat loops without broad text-only suppression. Related to #32166. (#38440) Thanks @vincentkoc. - Mattermost/block streaming: fix duplicate message delivery (one threaded, one top-level) when block streaming is active by excluding `replyToId` from the block reply dedup key and adding an explicit `threading` dock to the Mattermost plugin. (#41362) Thanks @mathiasnagler and @vincentkoc. - BlueBubbles/self-chat echo dedupe: drop reflected duplicate webhook copies only when a matching `fromMe` event was just seen for the same chat, body, and timestamp, preventing self-chat loops without broad webhook suppression. Related to #32166. (#38442) Thanks @vincentkoc. +- Models/Kimi Coding: send `anthropic-messages` tools in native Anthropic format again so `kimi-coding` stops degrading tool calls into XML/plain-text pseudo invocations instead of real `tool_use` blocks. (#38669, #39907, #40552) Thanks @opriz. ## 2026.3.11 diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 9821adc0e0b..33a4f9654df 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -13,6 +13,7 @@ import { shouldInjectOllamaCompatNumCtx, decodeHtmlEntitiesInObject, wrapOllamaCompatNumCtx, + wrapStreamFnRepairMalformedToolCallArguments, wrapStreamFnTrimToolCallNames, } from "./attempt.js"; @@ -430,6 +431,137 @@ describe("wrapStreamFnTrimToolCallNames", () => { }); }); +describe("wrapStreamFnRepairMalformedToolCallArguments", () => { + function createFakeStream(params: { events: unknown[]; resultMessage: unknown }): { + result: () => Promise; + [Symbol.asyncIterator]: () => AsyncIterator; + } { + return { + async result() { + return params.resultMessage; + }, + [Symbol.asyncIterator]() { + return (async function* () { + for (const event of params.events) { + yield event; + } + })(); + }, + }; + } + + async function invokeWrappedStream(baseFn: (...args: never[]) => unknown) { + const wrappedFn = wrapStreamFnRepairMalformedToolCallArguments(baseFn as never); + return await wrappedFn({} as never, {} as never, {} as never); + } + + it("repairs anthropic-compatible tool arguments when trailing junk follows valid JSON", async () => { + const partialToolCall = { type: "toolCall", name: "read", arguments: {} }; + const streamedToolCall = { type: "toolCall", name: "read", arguments: {} }; + const endMessageToolCall = { type: "toolCall", name: "read", arguments: {} }; + const finalToolCall = { type: "toolCall", name: "read", arguments: {} }; + const partialMessage = { role: "assistant", content: [partialToolCall] }; + const endMessage = { role: "assistant", content: [endMessageToolCall] }; + const finalMessage = { role: "assistant", content: [finalToolCall] }; + const baseFn = vi.fn(() => + createFakeStream({ + events: [ + { + type: "toolcall_delta", + contentIndex: 0, + delta: '{"path":"/tmp/report.txt"}', + partial: partialMessage, + }, + { + type: "toolcall_delta", + contentIndex: 0, + delta: "xx", + partial: partialMessage, + }, + { + type: "toolcall_end", + contentIndex: 0, + toolCall: streamedToolCall, + partial: partialMessage, + message: endMessage, + }, + ], + resultMessage: finalMessage, + }), + ); + + const stream = await invokeWrappedStream(baseFn); + for await (const _item of stream) { + // drain + } + const result = await stream.result(); + + expect(partialToolCall.arguments).toEqual({ path: "/tmp/report.txt" }); + expect(streamedToolCall.arguments).toEqual({ path: "/tmp/report.txt" }); + expect(endMessageToolCall.arguments).toEqual({ path: "/tmp/report.txt" }); + expect(finalToolCall.arguments).toEqual({ path: "/tmp/report.txt" }); + expect(result).toBe(finalMessage); + }); + + it("keeps incomplete partial JSON unchanged until a complete object exists", async () => { + const partialToolCall = { type: "toolCall", name: "read", arguments: {} }; + const partialMessage = { role: "assistant", content: [partialToolCall] }; + const baseFn = vi.fn(() => + createFakeStream({ + events: [ + { + type: "toolcall_delta", + contentIndex: 0, + delta: '{"path":"/tmp', + partial: partialMessage, + }, + ], + resultMessage: { role: "assistant", content: [partialToolCall] }, + }), + ); + + const stream = await invokeWrappedStream(baseFn); + for await (const _item of stream) { + // drain + } + + expect(partialToolCall.arguments).toEqual({}); + }); + + it("does not repair tool arguments when trailing junk exceeds the Kimi-specific allowance", async () => { + const partialToolCall = { type: "toolCall", name: "read", arguments: {} }; + const streamedToolCall = { type: "toolCall", name: "read", arguments: {} }; + const partialMessage = { role: "assistant", content: [partialToolCall] }; + const baseFn = vi.fn(() => + createFakeStream({ + events: [ + { + type: "toolcall_delta", + contentIndex: 0, + delta: '{"path":"/tmp/report.txt"}oops', + partial: partialMessage, + }, + { + type: "toolcall_end", + contentIndex: 0, + toolCall: streamedToolCall, + partial: partialMessage, + }, + ], + resultMessage: { role: "assistant", content: [partialToolCall] }, + }), + ); + + const stream = await invokeWrappedStream(baseFn); + for await (const _item of stream) { + // drain + } + + expect(partialToolCall.arguments).toEqual({}); + expect(streamedToolCall.arguments).toEqual({}); + }); +}); + describe("isOllamaCompatProvider", () => { it("detects native ollama provider id", () => { expect( diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 2f5f3d04d5f..790323b8232 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -436,6 +436,258 @@ export function wrapStreamFnTrimToolCallNames( }; } +function extractBalancedJsonPrefix(raw: string): string | null { + let start = 0; + while (start < raw.length && /\s/.test(raw[start] ?? "")) { + start += 1; + } + const startChar = raw[start]; + if (startChar !== "{" && startChar !== "[") { + return null; + } + + let depth = 0; + let inString = false; + let escaped = false; + for (let i = start; i < raw.length; i += 1) { + const char = raw[i]; + if (char === undefined) { + break; + } + if (inString) { + if (escaped) { + escaped = false; + } else if (char === "\\") { + escaped = true; + } else if (char === '"') { + inString = false; + } + continue; + } + if (char === '"') { + inString = true; + continue; + } + if (char === "{" || char === "[") { + depth += 1; + continue; + } + if (char === "}" || char === "]") { + depth -= 1; + if (depth === 0) { + return raw.slice(start, i + 1); + } + } + } + return null; +} + +const MAX_TOOLCALL_REPAIR_BUFFER_CHARS = 64_000; +const MAX_TOOLCALL_REPAIR_TRAILING_CHARS = 3; +const TOOLCALL_REPAIR_ALLOWED_TRAILING_RE = /^[^\s{}[\]":,\\]{1,3}$/; + +function shouldAttemptMalformedToolCallRepair(partialJson: string, delta: string): boolean { + if (/[}\]]/.test(delta)) { + return true; + } + const trimmedDelta = delta.trim(); + return ( + trimmedDelta.length > 0 && + trimmedDelta.length <= MAX_TOOLCALL_REPAIR_TRAILING_CHARS && + /[}\]]/.test(partialJson) + ); +} + +type ToolCallArgumentRepair = { + args: Record; + trailingSuffix: string; +}; + +function tryParseMalformedToolCallArguments(raw: string): ToolCallArgumentRepair | undefined { + if (!raw.trim()) { + return undefined; + } + try { + JSON.parse(raw); + return undefined; + } catch { + const jsonPrefix = extractBalancedJsonPrefix(raw); + if (!jsonPrefix) { + return undefined; + } + const suffix = raw.slice(raw.indexOf(jsonPrefix) + jsonPrefix.length).trim(); + if ( + suffix.length === 0 || + suffix.length > MAX_TOOLCALL_REPAIR_TRAILING_CHARS || + !TOOLCALL_REPAIR_ALLOWED_TRAILING_RE.test(suffix) + ) { + return undefined; + } + try { + const parsed = JSON.parse(jsonPrefix) as unknown; + return parsed && typeof parsed === "object" && !Array.isArray(parsed) + ? { args: parsed as Record, trailingSuffix: suffix } + : undefined; + } catch { + return undefined; + } + } +} + +function repairToolCallArgumentsInMessage( + message: unknown, + contentIndex: number, + repairedArgs: Record, +): void { + if (!message || typeof message !== "object") { + return; + } + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) { + return; + } + const block = content[contentIndex]; + if (!block || typeof block !== "object") { + return; + } + const typedBlock = block as { type?: unknown; arguments?: unknown }; + if (!isToolCallBlockType(typedBlock.type)) { + return; + } + typedBlock.arguments = repairedArgs; +} + +function repairMalformedToolCallArgumentsInMessage( + message: unknown, + repairedArgsByIndex: Map>, +): void { + if (!message || typeof message !== "object") { + return; + } + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) { + return; + } + for (const [index, repairedArgs] of repairedArgsByIndex.entries()) { + repairToolCallArgumentsInMessage(message, index, repairedArgs); + } +} + +function wrapStreamRepairMalformedToolCallArguments( + stream: ReturnType, +): ReturnType { + const partialJsonByIndex = new Map(); + const repairedArgsByIndex = new Map>(); + const disabledIndices = new Set(); + const loggedRepairIndices = new Set(); + const originalResult = stream.result.bind(stream); + stream.result = async () => { + const message = await originalResult(); + repairMalformedToolCallArgumentsInMessage(message, repairedArgsByIndex); + partialJsonByIndex.clear(); + repairedArgsByIndex.clear(); + disabledIndices.clear(); + loggedRepairIndices.clear(); + return message; + }; + + const originalAsyncIterator = stream[Symbol.asyncIterator].bind(stream); + (stream as { [Symbol.asyncIterator]: typeof originalAsyncIterator })[Symbol.asyncIterator] = + function () { + const iterator = originalAsyncIterator(); + return { + async next() { + const result = await iterator.next(); + if (!result.done && result.value && typeof result.value === "object") { + const event = result.value as { + type?: unknown; + contentIndex?: unknown; + delta?: unknown; + partial?: unknown; + message?: unknown; + toolCall?: unknown; + }; + if ( + typeof event.contentIndex === "number" && + Number.isInteger(event.contentIndex) && + event.type === "toolcall_delta" && + typeof event.delta === "string" + ) { + if (disabledIndices.has(event.contentIndex)) { + return result; + } + const nextPartialJson = + (partialJsonByIndex.get(event.contentIndex) ?? "") + event.delta; + if (nextPartialJson.length > MAX_TOOLCALL_REPAIR_BUFFER_CHARS) { + partialJsonByIndex.delete(event.contentIndex); + repairedArgsByIndex.delete(event.contentIndex); + disabledIndices.add(event.contentIndex); + return result; + } + partialJsonByIndex.set(event.contentIndex, nextPartialJson); + if (shouldAttemptMalformedToolCallRepair(nextPartialJson, event.delta)) { + const repair = tryParseMalformedToolCallArguments(nextPartialJson); + if (repair) { + repairedArgsByIndex.set(event.contentIndex, repair.args); + repairToolCallArgumentsInMessage(event.partial, event.contentIndex, repair.args); + repairToolCallArgumentsInMessage(event.message, event.contentIndex, repair.args); + if (!loggedRepairIndices.has(event.contentIndex)) { + loggedRepairIndices.add(event.contentIndex); + log.warn( + `repairing kimi-coding tool call arguments after ${repair.trailingSuffix.length} trailing chars`, + ); + } + } + } + } + if ( + typeof event.contentIndex === "number" && + Number.isInteger(event.contentIndex) && + event.type === "toolcall_end" + ) { + const repairedArgs = repairedArgsByIndex.get(event.contentIndex); + if (repairedArgs) { + if (event.toolCall && typeof event.toolCall === "object") { + (event.toolCall as { arguments?: unknown }).arguments = repairedArgs; + } + repairToolCallArgumentsInMessage(event.partial, event.contentIndex, repairedArgs); + repairToolCallArgumentsInMessage(event.message, event.contentIndex, repairedArgs); + } + partialJsonByIndex.delete(event.contentIndex); + disabledIndices.delete(event.contentIndex); + loggedRepairIndices.delete(event.contentIndex); + } + } + return result; + }, + async return(value?: unknown) { + return iterator.return?.(value) ?? { done: true as const, value: undefined }; + }, + async throw(error?: unknown) { + return iterator.throw?.(error) ?? { done: true as const, value: undefined }; + }, + }; + }; + + return stream; +} + +export function wrapStreamFnRepairMalformedToolCallArguments(baseFn: StreamFn): StreamFn { + return (model, context, options) => { + const maybeStream = baseFn(model, context, options); + if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) { + return Promise.resolve(maybeStream).then((stream) => + wrapStreamRepairMalformedToolCallArguments(stream), + ); + } + return wrapStreamRepairMalformedToolCallArguments(maybeStream); + }; +} + +function shouldRepairMalformedAnthropicToolCallArguments(provider?: string): boolean { + return normalizeProviderId(provider ?? "") === "kimi-coding"; +} + // --------------------------------------------------------------------------- // xAI / Grok: decode HTML entities in tool call arguments // --------------------------------------------------------------------------- @@ -1379,6 +1631,15 @@ export async function runEmbeddedAttempt( allowedToolNames, ); + if ( + params.model.api === "anthropic-messages" && + shouldRepairMalformedAnthropicToolCallArguments(params.provider) + ) { + activeSession.agent.streamFn = wrapStreamFnRepairMalformedToolCallArguments( + activeSession.agent.streamFn, + ); + } + if (isXaiProvider(params.provider, params.modelId)) { activeSession.agent.streamFn = wrapStreamFnDecodeXaiToolCallArguments( activeSession.agent.streamFn,