From b1e4b6b65e2cc1dac0de9ba1ed4e218c14efeef2 Mon Sep 17 00:00:00 2001 From: Yzx <53250620+849261680@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:16:10 +0800 Subject: [PATCH] fix(agents): coerce non-text/image MCP tool-result blocks to text (fixes #90710) (#90728) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: - The PR converts wider MCP CallToolResult content blocks into text/image AgentToolResult blocks at the bundle-MCP materialization boundary and adds regression tests. - PR surface: Source +36, Tests +66. Total +102 across 2 files. - Reproducibility: yes. Source inspection shows current main lets MCP resource/audio blocks cross into a text/ ... a spawned stdio MCP server; I did not run a live hosted Anthropic API round trip in this read-only review. Automerge notes: - No ClawSweeper repair was needed after automerge opt-in. Validation: - ClawSweeper review passed for head f70dccf33ed1cd3e4daacdfb3c352b39e23283b1. - Required merge gates passed before the squash merge. Prepared head SHA: f70dccf33ed1cd3e4daacdfb3c352b39e23283b1 Review: https://github.com/openclaw/openclaw/pull/90728#issuecomment-4634126025 Co-authored-by: 宇宙熊Yzx <53250620+849261680@users.noreply.github.com> Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com> Approved-by: takhoffman Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com> --- src/agents/agent-bundle-mcp-materialize.ts | 42 +++++++++++- ...agent-bundle-mcp-tools.materialize.test.ts | 66 +++++++++++++++++++ 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/agents/agent-bundle-mcp-materialize.ts b/src/agents/agent-bundle-mcp-materialize.ts index 2860c911ba5..f1190260630 100644 --- a/src/agents/agent-bundle-mcp-materialize.ts +++ b/src/agents/agent-bundle-mcp-materialize.ts @@ -1,6 +1,6 @@ /** Materializes configured MCP catalog entries into agent tools and runtime helpers. */ import crypto from "node:crypto"; -import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import type { CallToolResult, ContentBlock } from "@modelcontextprotocol/sdk/types.js"; import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce"; import type { OpenClawConfig } from "../config/types.openclaw.js"; import { logWarn } from "../logger.js"; @@ -20,13 +20,49 @@ import { normalizeToolParameterSchema } from "./agent-tools-parameter-schema.js" import type { AgentToolResult } from "./runtime/index.js"; import type { AnyAgentTool } from "./tools/common.js"; +type ToolResultContentBlock = AgentToolResult["content"][number]; + +// AgentToolResult only carries text/image, but an MCP CallToolResult can also +// return resource_link, resource, and audio blocks (MCP SDK ContentBlock union). +// Coercing those into the text/image contract here keeps the boundary honest so +// downstream provider converters never build an image block with undefined +// data/media_type, which makes Anthropic 400 and poisons the whole session +// history (every later turn replays the bad block and 400s too). See #90710. +function mcpContentBlockToToolResult(block: ContentBlock): ToolResultContentBlock { + switch (block.type) { + case "text": + return { type: "text", text: block.text }; + case "image": + // Only emit an image when the base64 source is actually present. + if (block.data && block.mimeType) { + return { type: "image", data: block.data, mimeType: block.mimeType }; + } + return { type: "text", text: JSON.stringify(block) }; + case "audio": + return { type: "text", text: `[audio ${block.mimeType}]` }; + case "resource_link": { + const label = block.title ?? block.name; + return { type: "text", text: label ? `[${label}] ${block.uri}` : block.uri }; + } + case "resource": { + const resource = block.resource; + const text = "text" in resource ? resource.text : undefined; + return { type: "text", text: text ?? resource.uri }; + } + default: + // Forward-compat / untrusted-server guard: stringify any block type the + // installed MCP SDK union does not cover instead of dropping it. + return { type: "text", text: JSON.stringify(block) }; + } +} + function toAgentToolResult(params: { serverName: string; toolName: string; result: CallToolResult; }): AgentToolResult { - const content = Array.isArray(params.result.content) - ? (params.result.content as AgentToolResult["content"]) + const content: AgentToolResult["content"] = Array.isArray(params.result.content) + ? params.result.content.map(mcpContentBlockToToolResult) : []; const structuredContentBlock = params.result.structuredContent !== undefined diff --git a/src/agents/agent-bundle-mcp-tools.materialize.test.ts b/src/agents/agent-bundle-mcp-tools.materialize.test.ts index 8558dc7712b..47d85680f34 100644 --- a/src/agents/agent-bundle-mcp-tools.materialize.test.ts +++ b/src/agents/agent-bundle-mcp-tools.materialize.test.ts @@ -156,6 +156,72 @@ describe("createBundleMcpToolRuntime", () => { }); }); + it("coerces non-text/image MCP tool-result blocks to text (resource_link/resource/audio)", async () => { + // resource_link/resource/audio blocks have no base64 image source; if they + // leaked into the provider image branch Anthropic would 400 on an image with + // undefined data/media_type and poison the whole session history (#90710). + const runtime = await materializeBundleMcpToolsForRun({ + runtime: makeToolRuntime({ + result: { + content: [ + { type: "text", text: "intro" }, + { + type: "resource_link", + uri: "https://example.com/a.docx", + name: "a.docx", + title: "Quarterly report", + }, + { + type: "resource_link", + uri: "https://example.com/bare", + name: "", + }, + { + type: "resource", + resource: { uri: "memo://one", text: "memo body" }, + }, + { + type: "resource", + resource: { uri: "blob://two", blob: "AAAA", mimeType: "application/pdf" }, + }, + { type: "audio", data: "AAAA", mimeType: "audio/mpeg" }, + { type: "image", data: "iVBOR", mimeType: "image/png" }, + ], + isError: false, + } as CallToolResult, + }), + }); + + const result = await runtime.tools[0].execute("call-bundle-probe", {}, undefined, undefined); + + expect(result.content).toEqual([ + { type: "text", text: "intro" }, + { type: "text", text: "[Quarterly report] https://example.com/a.docx" }, + { type: "text", text: "https://example.com/bare" }, + { type: "text", text: "memo body" }, + { type: "text", text: "blob://two" }, + { type: "text", text: "[audio audio/mpeg]" }, + { type: "image", data: "iVBOR", mimeType: "image/png" }, + ]); + }); + + it("coerces a malformed image block (missing base64 source) to text", async () => { + // A real-world poison case: image block with undefined data/media_type. + const runtime = await materializeBundleMcpToolsForRun({ + runtime: makeToolRuntime({ + result: { + content: [{ type: "image" } as unknown as CallToolResult["content"][number]], + isError: false, + } as CallToolResult, + }), + }); + + const result = await runtime.tools[0].execute("call-bundle-probe", {}, undefined, undefined); + + expect(result.content).toHaveLength(1); + expect(result.content[0]).toEqual({ type: "text", text: JSON.stringify({ type: "image" }) }); + }); + it("disambiguates bundle MCP tools that collide with existing tool names", async () => { const runtime = await materializeBundleMcpToolsForRun({ runtime: makeToolRuntime(),