fix(agents): coerce non-text/image MCP tool-result blocks to text (fixes #90710) (#90728)

Summary:
- The PR converts wider MCP CallToolResult content blocks into text/image AgentToolResult blocks at the bundle-MCP materialization boundary and adds regression tests.
- PR surface: Source +36, Tests +66. Total +102 across 2 files.
- Reproducibility: yes. Source inspection shows current main lets MCP resource/audio blocks cross into a text/ ...  a spawned stdio MCP server; I did not run a live hosted Anthropic API round trip in this read-only review.

Automerge notes:
- No ClawSweeper repair was needed after automerge opt-in.

Validation:
- ClawSweeper review passed for head f70dccf33e.
- Required merge gates passed before the squash merge.

Prepared head SHA: f70dccf33e
Review: https://github.com/openclaw/openclaw/pull/90728#issuecomment-4634126025

Co-authored-by: 宇宙熊Yzx <53250620+849261680@users.noreply.github.com>
Co-authored-by: clawsweeper[bot] <274271284+clawsweeper[bot]@users.noreply.github.com>
Approved-by: takhoffman
Co-authored-by: takhoffman <781889+takhoffman@users.noreply.github.com>
This commit is contained in:
Yzx
2026-06-06 10:16:10 +08:00
committed by GitHub
parent 9e29375cec
commit b1e4b6b65e
2 changed files with 105 additions and 3 deletions

View File

@@ -1,6 +1,6 @@
/** Materializes configured MCP catalog entries into agent tools and runtime helpers. */
import crypto from "node:crypto";
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
import type { CallToolResult, ContentBlock } from "@modelcontextprotocol/sdk/types.js";
import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { logWarn } from "../logger.js";
@@ -20,13 +20,49 @@ import { normalizeToolParameterSchema } from "./agent-tools-parameter-schema.js"
import type { AgentToolResult } from "./runtime/index.js";
import type { AnyAgentTool } from "./tools/common.js";
type ToolResultContentBlock = AgentToolResult<unknown>["content"][number];
// AgentToolResult only carries text/image, but an MCP CallToolResult can also
// return resource_link, resource, and audio blocks (MCP SDK ContentBlock union).
// Coercing those into the text/image contract here keeps the boundary honest so
// downstream provider converters never build an image block with undefined
// data/media_type, which makes Anthropic 400 and poisons the whole session
// history (every later turn replays the bad block and 400s too). See #90710.
function mcpContentBlockToToolResult(block: ContentBlock): ToolResultContentBlock {
switch (block.type) {
case "text":
return { type: "text", text: block.text };
case "image":
// Only emit an image when the base64 source is actually present.
if (block.data && block.mimeType) {
return { type: "image", data: block.data, mimeType: block.mimeType };
}
return { type: "text", text: JSON.stringify(block) };
case "audio":
return { type: "text", text: `[audio ${block.mimeType}]` };
case "resource_link": {
const label = block.title ?? block.name;
return { type: "text", text: label ? `[${label}] ${block.uri}` : block.uri };
}
case "resource": {
const resource = block.resource;
const text = "text" in resource ? resource.text : undefined;
return { type: "text", text: text ?? resource.uri };
}
default:
// Forward-compat / untrusted-server guard: stringify any block type the
// installed MCP SDK union does not cover instead of dropping it.
return { type: "text", text: JSON.stringify(block) };
}
}
function toAgentToolResult(params: {
serverName: string;
toolName: string;
result: CallToolResult;
}): AgentToolResult<unknown> {
const content = Array.isArray(params.result.content)
? (params.result.content as AgentToolResult<unknown>["content"])
const content: AgentToolResult<unknown>["content"] = Array.isArray(params.result.content)
? params.result.content.map(mcpContentBlockToToolResult)
: [];
const structuredContentBlock =
params.result.structuredContent !== undefined

View File

@@ -156,6 +156,72 @@ describe("createBundleMcpToolRuntime", () => {
});
});
it("coerces non-text/image MCP tool-result blocks to text (resource_link/resource/audio)", async () => {
// resource_link/resource/audio blocks have no base64 image source; if they
// leaked into the provider image branch Anthropic would 400 on an image with
// undefined data/media_type and poison the whole session history (#90710).
const runtime = await materializeBundleMcpToolsForRun({
runtime: makeToolRuntime({
result: {
content: [
{ type: "text", text: "intro" },
{
type: "resource_link",
uri: "https://example.com/a.docx",
name: "a.docx",
title: "Quarterly report",
},
{
type: "resource_link",
uri: "https://example.com/bare",
name: "",
},
{
type: "resource",
resource: { uri: "memo://one", text: "memo body" },
},
{
type: "resource",
resource: { uri: "blob://two", blob: "AAAA", mimeType: "application/pdf" },
},
{ type: "audio", data: "AAAA", mimeType: "audio/mpeg" },
{ type: "image", data: "iVBOR", mimeType: "image/png" },
],
isError: false,
} as CallToolResult,
}),
});
const result = await runtime.tools[0].execute("call-bundle-probe", {}, undefined, undefined);
expect(result.content).toEqual([
{ type: "text", text: "intro" },
{ type: "text", text: "[Quarterly report] https://example.com/a.docx" },
{ type: "text", text: "https://example.com/bare" },
{ type: "text", text: "memo body" },
{ type: "text", text: "blob://two" },
{ type: "text", text: "[audio audio/mpeg]" },
{ type: "image", data: "iVBOR", mimeType: "image/png" },
]);
});
it("coerces a malformed image block (missing base64 source) to text", async () => {
// A real-world poison case: image block with undefined data/media_type.
const runtime = await materializeBundleMcpToolsForRun({
runtime: makeToolRuntime({
result: {
content: [{ type: "image" } as unknown as CallToolResult["content"][number]],
isError: false,
} as CallToolResult,
}),
});
const result = await runtime.tools[0].execute("call-bundle-probe", {}, undefined, undefined);
expect(result.content).toHaveLength(1);
expect(result.content[0]).toEqual({ type: "text", text: JSON.stringify({ type: "image" }) });
});
it("disambiguates bundle MCP tools that collide with existing tool names", async () => {
const runtime = await materializeBundleMcpToolsForRun({
runtime: makeToolRuntime(),