fix(mcp): include image source for screenshot results (#90902)

* fix(mcp): emit image content with base64 source

* fix(mcp): keep plugin tool images in SDK schema

* test(mcp): exercise image bridge end to end

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
mushuiyu886
2026-06-27 07:39:40 +08:00
committed by GitHub
parent 43dd34262e
commit a846b879ec
2 changed files with 113 additions and 1 deletions

View File

@@ -13,6 +13,39 @@ type CallPluginToolParams = {
arguments?: unknown;
};
function isRecord(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === "object" && !Array.isArray(value);
}
function toMcpContentBlock(block: unknown): unknown {
if (!isRecord(block)) {
return { type: "text", text: coerceChatContentText(block) };
}
if (block.type !== "image") {
return block;
}
if (typeof block.data === "string" && typeof block.mimeType === "string") {
return block;
}
const source = block.source;
if (
isRecord(source) &&
source.type === "base64" &&
typeof source.data === "string" &&
typeof source.media_type === "string"
) {
return {
type: "image",
data: source.data,
mimeType: source.media_type,
};
}
return { type: "text", text: coerceChatContentText(block) };
}
function resolveJsonSchemaForTool(tool: AnyAgentTool): Record<string, unknown> {
const params = tool.parameters;
if (params && typeof params === "object" && "type" in params) {
@@ -59,7 +92,7 @@ export function createPluginToolsMcpHandlers(tools: AnyAgentTool[]) {
: result;
return {
content: Array.isArray(rawContent)
? rawContent
? rawContent.map(toMcpContentBlock)
: [{ type: "text", text: coerceChatContentText(rawContent) }],
};
} catch (err) {

View File

@@ -1,4 +1,7 @@
// Plugin MCP serve tests cover serving plugin tools over MCP.
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
type HookContext,
@@ -180,6 +183,82 @@ describe("plugin tools MCP server", () => {
expect(result.content).toEqual([{ type: "text", text: "Stored." }]);
});
it("serializes source-shaped image tool content with pinned MCP image blocks", async () => {
const execute = vi.fn().mockResolvedValue({
content: [
{ type: "text", text: "browser screenshot" },
{
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: "iVBORw0KGgo=",
},
},
],
});
const tool = {
name: "browser_screenshot",
description: "Capture a browser screenshot",
parameters: { type: "object", properties: {} },
execute,
} as unknown as AnyAgentTool;
const handlers = createPluginToolsMcpHandlers([tool]);
const result = await handlers.callTool({
name: "browser_screenshot",
arguments: {},
});
expect(result.content).toEqual([
{ type: "text", text: "browser screenshot" },
{ type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" },
]);
expect(() => CallToolResultSchema.parse(result)).not.toThrow();
});
it("delivers source-shaped images through a real MCP client", async () => {
const execute = vi.fn().mockResolvedValue({
content: [
{ type: "text", text: "browser screenshot" },
{
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: "iVBORw0KGgo=",
},
},
],
});
const tool = {
name: "browser_screenshot",
description: "Capture a browser screenshot",
parameters: { type: "object", properties: {} },
execute,
} as unknown as AnyAgentTool;
const { createToolsMcpServer } =
await vi.importActual<typeof import("./tools-stdio-server.js")>("./tools-stdio-server.js");
const server = createToolsMcpServer({ name: "plugin-tools-image-test", tools: [tool] });
const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
const client = new Client(
{ name: "plugin-tools-image-test-client", version: "0.0.0" },
{ capabilities: {} },
);
await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
try {
const result = await client.callTool({ name: "browser_screenshot", arguments: {} });
expect(result.content).toEqual([
{ type: "text", text: "browser screenshot" },
{ type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" },
]);
} finally {
await client.close();
await server.close();
}
});
it("serializes plugin tool results that do not use the MCP content envelope", async () => {
const execute = vi.fn().mockResolvedValue({
provider: "kitchen-sink-search",