diff --git a/src/mcp/plugin-tools-handlers.ts b/src/mcp/plugin-tools-handlers.ts index 879bf7d32d4..23ac442ed89 100644 --- a/src/mcp/plugin-tools-handlers.ts +++ b/src/mcp/plugin-tools-handlers.ts @@ -13,6 +13,39 @@ type CallPluginToolParams = { arguments?: unknown; }; +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === "object" && !Array.isArray(value); +} + +function toMcpContentBlock(block: unknown): unknown { + if (!isRecord(block)) { + return { type: "text", text: coerceChatContentText(block) }; + } + if (block.type !== "image") { + return block; + } + + if (typeof block.data === "string" && typeof block.mimeType === "string") { + return block; + } + + const source = block.source; + if ( + isRecord(source) && + source.type === "base64" && + typeof source.data === "string" && + typeof source.media_type === "string" + ) { + return { + type: "image", + data: source.data, + mimeType: source.media_type, + }; + } + + return { type: "text", text: coerceChatContentText(block) }; +} + function resolveJsonSchemaForTool(tool: AnyAgentTool): Record { const params = tool.parameters; if (params && typeof params === "object" && "type" in params) { @@ -59,7 +92,7 @@ export function createPluginToolsMcpHandlers(tools: AnyAgentTool[]) { : result; return { content: Array.isArray(rawContent) - ? rawContent + ? rawContent.map(toMcpContentBlock) : [{ type: "text", text: coerceChatContentText(rawContent) }], }; } catch (err) { diff --git a/src/mcp/plugin-tools-serve.test.ts b/src/mcp/plugin-tools-serve.test.ts index c035248a040..0c77ef54233 100644 --- a/src/mcp/plugin-tools-serve.test.ts +++ b/src/mcp/plugin-tools-serve.test.ts @@ -1,4 +1,7 @@ // Plugin MCP serve tests cover serving plugin tools over MCP. +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js"; import { afterEach, describe, expect, it, vi } from "vitest"; import { type HookContext, @@ -180,6 +183,82 @@ describe("plugin tools MCP server", () => { expect(result.content).toEqual([{ type: "text", text: "Stored." }]); }); + it("serializes source-shaped image tool content with pinned MCP image blocks", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [ + { type: "text", text: "browser screenshot" }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "iVBORw0KGgo=", + }, + }, + ], + }); + const tool = { + name: "browser_screenshot", + description: "Capture a browser screenshot", + parameters: { type: "object", properties: {} }, + execute, + } as unknown as AnyAgentTool; + + const handlers = createPluginToolsMcpHandlers([tool]); + const result = await handlers.callTool({ + name: "browser_screenshot", + arguments: {}, + }); + + expect(result.content).toEqual([ + { type: "text", text: "browser screenshot" }, + { type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" }, + ]); + expect(() => CallToolResultSchema.parse(result)).not.toThrow(); + }); + + it("delivers source-shaped images through a real MCP client", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [ + { type: "text", text: "browser screenshot" }, + { + type: "image", + source: { + type: "base64", + media_type: "image/png", + data: "iVBORw0KGgo=", + }, + }, + ], + }); + const tool = { + name: "browser_screenshot", + description: "Capture a browser screenshot", + parameters: { type: "object", properties: {} }, + execute, + } as unknown as AnyAgentTool; + const { createToolsMcpServer } = + await vi.importActual("./tools-stdio-server.js"); + const server = createToolsMcpServer({ name: "plugin-tools-image-test", tools: [tool] }); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + const client = new Client( + { name: "plugin-tools-image-test-client", version: "0.0.0" }, + { capabilities: {} }, + ); + + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]); + try { + const result = await client.callTool({ name: "browser_screenshot", arguments: {} }); + expect(result.content).toEqual([ + { type: "text", text: "browser screenshot" }, + { type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" }, + ]); + } finally { + await client.close(); + await server.close(); + } + }); + it("serializes plugin tool results that do not use the MCP content envelope", async () => { const execute = vi.fn().mockResolvedValue({ provider: "kitchen-sink-search",