mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-30 23:53:37 +00:00
fix(mcp): include image source for screenshot results (#90902)
* fix(mcp): emit image content with base64 source * fix(mcp): keep plugin tool images in SDK schema * test(mcp): exercise image bridge end to end --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -13,6 +13,39 @@ type CallPluginToolParams = {
|
||||
arguments?: unknown;
|
||||
};
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return value !== null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function toMcpContentBlock(block: unknown): unknown {
|
||||
if (!isRecord(block)) {
|
||||
return { type: "text", text: coerceChatContentText(block) };
|
||||
}
|
||||
if (block.type !== "image") {
|
||||
return block;
|
||||
}
|
||||
|
||||
if (typeof block.data === "string" && typeof block.mimeType === "string") {
|
||||
return block;
|
||||
}
|
||||
|
||||
const source = block.source;
|
||||
if (
|
||||
isRecord(source) &&
|
||||
source.type === "base64" &&
|
||||
typeof source.data === "string" &&
|
||||
typeof source.media_type === "string"
|
||||
) {
|
||||
return {
|
||||
type: "image",
|
||||
data: source.data,
|
||||
mimeType: source.media_type,
|
||||
};
|
||||
}
|
||||
|
||||
return { type: "text", text: coerceChatContentText(block) };
|
||||
}
|
||||
|
||||
function resolveJsonSchemaForTool(tool: AnyAgentTool): Record<string, unknown> {
|
||||
const params = tool.parameters;
|
||||
if (params && typeof params === "object" && "type" in params) {
|
||||
@@ -59,7 +92,7 @@ export function createPluginToolsMcpHandlers(tools: AnyAgentTool[]) {
|
||||
: result;
|
||||
return {
|
||||
content: Array.isArray(rawContent)
|
||||
? rawContent
|
||||
? rawContent.map(toMcpContentBlock)
|
||||
: [{ type: "text", text: coerceChatContentText(rawContent) }],
|
||||
};
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
// Plugin MCP serve tests cover serving plugin tools over MCP.
|
||||
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
||||
import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
|
||||
import { CallToolResultSchema } from "@modelcontextprotocol/sdk/types.js";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
type HookContext,
|
||||
@@ -180,6 +183,82 @@ describe("plugin tools MCP server", () => {
|
||||
expect(result.content).toEqual([{ type: "text", text: "Stored." }]);
|
||||
});
|
||||
|
||||
it("serializes source-shaped image tool content with pinned MCP image blocks", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [
|
||||
{ type: "text", text: "browser screenshot" },
|
||||
{
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: "image/png",
|
||||
data: "iVBORw0KGgo=",
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
const tool = {
|
||||
name: "browser_screenshot",
|
||||
description: "Capture a browser screenshot",
|
||||
parameters: { type: "object", properties: {} },
|
||||
execute,
|
||||
} as unknown as AnyAgentTool;
|
||||
|
||||
const handlers = createPluginToolsMcpHandlers([tool]);
|
||||
const result = await handlers.callTool({
|
||||
name: "browser_screenshot",
|
||||
arguments: {},
|
||||
});
|
||||
|
||||
expect(result.content).toEqual([
|
||||
{ type: "text", text: "browser screenshot" },
|
||||
{ type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" },
|
||||
]);
|
||||
expect(() => CallToolResultSchema.parse(result)).not.toThrow();
|
||||
});
|
||||
|
||||
it("delivers source-shaped images through a real MCP client", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [
|
||||
{ type: "text", text: "browser screenshot" },
|
||||
{
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: "image/png",
|
||||
data: "iVBORw0KGgo=",
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
const tool = {
|
||||
name: "browser_screenshot",
|
||||
description: "Capture a browser screenshot",
|
||||
parameters: { type: "object", properties: {} },
|
||||
execute,
|
||||
} as unknown as AnyAgentTool;
|
||||
const { createToolsMcpServer } =
|
||||
await vi.importActual<typeof import("./tools-stdio-server.js")>("./tools-stdio-server.js");
|
||||
const server = createToolsMcpServer({ name: "plugin-tools-image-test", tools: [tool] });
|
||||
const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
|
||||
const client = new Client(
|
||||
{ name: "plugin-tools-image-test-client", version: "0.0.0" },
|
||||
{ capabilities: {} },
|
||||
);
|
||||
|
||||
await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
|
||||
try {
|
||||
const result = await client.callTool({ name: "browser_screenshot", arguments: {} });
|
||||
expect(result.content).toEqual([
|
||||
{ type: "text", text: "browser screenshot" },
|
||||
{ type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" },
|
||||
]);
|
||||
} finally {
|
||||
await client.close();
|
||||
await server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("serializes plugin tool results that do not use the MCP content envelope", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
provider: "kitchen-sink-search",
|
||||
|
||||
Reference in New Issue
Block a user