fix(agents): migrate remaining media artifacts

This commit is contained in:
Peter Steinberger
2026-03-21 20:57:20 -07:00
parent e7e4c68caf
commit 7ac312b8fe
5 changed files with 196 additions and 9 deletions

View File

@@ -1,5 +1,8 @@
import { describe, expect, test } from "vitest";
import { parseAvailableTags } from "./common.js";
import { imageResult, parseAvailableTags } from "./common.js";
const PNG_1X1_BASE64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAusB9Wn8n0sAAAAASUVORK5CYII=";
describe("parseAvailableTags", () => {
test("returns undefined for non-array inputs", () => {
@@ -26,3 +29,48 @@ describe("parseAvailableTags", () => {
]);
});
});
describe("imageResult", () => {
test("stores media delivery in details.media instead of MEDIA text", async () => {
const result = await imageResult({
label: "test:image",
path: "/tmp/test.png",
base64: PNG_1X1_BASE64,
mimeType: "image/png",
});
expect(result.content).toEqual([
{
type: "image",
data: PNG_1X1_BASE64,
mimeType: "image/png",
},
]);
expect(result.details).toEqual({
path: "/tmp/test.png",
media: {
mediaUrl: "/tmp/test.png",
},
});
});
test("keeps extra text without MEDIA text fallback", async () => {
const result = await imageResult({
label: "test:image",
path: "/tmp/test.png",
base64: PNG_1X1_BASE64,
mimeType: "image/png",
extraText: "label text",
});
expect(result.content?.[0]).toEqual({
type: "text",
text: "label text",
});
expect(result.content?.[1]).toEqual({
type: "image",
data: PNG_1X1_BASE64,
mimeType: "image/png",
});
expect(JSON.stringify(result.content)).not.toContain("MEDIA:");
});
});

View File

@@ -251,19 +251,29 @@ export async function imageResult(params: {
imageSanitization?: ImageSanitizationLimits;
}): Promise<AgentToolResult<unknown>> {
const content: AgentToolResult<unknown>["content"] = [
{
type: "text",
text: params.extraText ?? `MEDIA:${params.path}`,
},
...(params.extraText ? [{ type: "text" as const, text: params.extraText }] : []),
{
type: "image",
data: params.base64,
mimeType: params.mimeType,
},
];
const detailsMedia =
params.details?.media &&
typeof params.details.media === "object" &&
!Array.isArray(params.details.media)
? (params.details.media as Record<string, unknown>)
: undefined;
const result: AgentToolResult<unknown> = {
content,
details: { path: params.path, ...params.details },
details: {
path: params.path,
...params.details,
media: {
...detailsMedia,
mediaUrl: params.path,
},
},
};
return await sanitizeToolResultImages(result, params.label, params.imageSanitization);
}

View File

@@ -7,10 +7,26 @@ const gatewayMocks = vi.hoisted(() => ({
const nodeUtilsMocks = vi.hoisted(() => ({
resolveNodeId: vi.fn(async () => "node-1"),
resolveNode: vi.fn(async () => ({ nodeId: "node-1", remoteIp: "127.0.0.1" })),
listNodes: vi.fn(async () => [] as Array<{ nodeId: string; commands?: string[] }>),
resolveNodeIdFromList: vi.fn(() => "node-1"),
}));
const nodesCameraMocks = vi.hoisted(() => ({
cameraTempPath: vi.fn(({ facing }: { facing?: string }) =>
facing ? `/tmp/camera-${facing}.jpg` : "/tmp/camera.jpg",
),
parseCameraClipPayload: vi.fn(),
parseCameraSnapPayload: vi.fn(() => ({
base64: "ZmFrZQ==",
format: "jpg",
width: 800,
height: 600,
})),
writeCameraClipPayloadToFile: vi.fn(),
writeCameraPayloadToFile: vi.fn(async () => undefined),
}));
const screenMocks = vi.hoisted(() => ({
parseScreenRecordPayload: vi.fn(() => ({
base64: "ZmFrZQ==",
@@ -31,10 +47,19 @@ vi.mock("./gateway.js", () => ({
vi.mock("./nodes-utils.js", () => ({
resolveNodeId: nodeUtilsMocks.resolveNodeId,
resolveNode: nodeUtilsMocks.resolveNode,
listNodes: nodeUtilsMocks.listNodes,
resolveNodeIdFromList: nodeUtilsMocks.resolveNodeIdFromList,
}));
vi.mock("../../cli/nodes-camera.js", () => ({
cameraTempPath: nodesCameraMocks.cameraTempPath,
parseCameraClipPayload: nodesCameraMocks.parseCameraClipPayload,
parseCameraSnapPayload: nodesCameraMocks.parseCameraSnapPayload,
writeCameraClipPayloadToFile: nodesCameraMocks.writeCameraClipPayloadToFile,
writeCameraPayloadToFile: nodesCameraMocks.writeCameraPayloadToFile,
}));
vi.mock("../../cli/nodes-screen.js", () => ({
parseScreenRecordPayload: screenMocks.parseScreenRecordPayload,
screenRecordTempPath: screenMocks.screenRecordTempPath,
@@ -49,8 +74,12 @@ describe("createNodesTool screen_record duration guardrails", () => {
gatewayMocks.readGatewayCallOptions.mockReset();
gatewayMocks.readGatewayCallOptions.mockReturnValue({});
nodeUtilsMocks.resolveNodeId.mockClear();
nodeUtilsMocks.resolveNode.mockClear();
screenMocks.parseScreenRecordPayload.mockClear();
screenMocks.writeScreenRecordToFile.mockClear();
nodesCameraMocks.cameraTempPath.mockClear();
nodesCameraMocks.parseCameraSnapPayload.mockClear();
nodesCameraMocks.writeCameraPayloadToFile.mockClear();
});
it("marks nodes as owner-only", () => {
@@ -136,4 +165,84 @@ describe("createNodesTool screen_record duration guardrails", () => {
});
expect(prepareCall?.params).not.toHaveProperty("rawCommand");
});
it("returns camera snaps via details.media.mediaUrls", async () => {
gatewayMocks.callGatewayTool.mockResolvedValue({ payload: { ok: true } });
const tool = createNodesTool();
const result = await tool.execute("call-1", {
action: "camera_snap",
node: "macbook",
facing: "front",
});
expect(result?.details).toEqual({
snaps: [
{
facing: "front",
path: "/tmp/camera-front.jpg",
width: 800,
height: 600,
},
],
media: {
mediaUrls: ["/tmp/camera-front.jpg"],
},
});
expect(JSON.stringify(result?.content ?? [])).not.toContain("MEDIA:");
});
it("returns latest photos via details.media.mediaUrls", async () => {
gatewayMocks.callGatewayTool.mockResolvedValue({
payload: {
photos: [
{ base64: "ZmFrZQ==", format: "jpg", width: 800, height: 600, createdAt: "now" },
{ base64: "YmFy", format: "jpg", width: 1024, height: 768 },
],
},
});
nodesCameraMocks.cameraTempPath
.mockReturnValueOnce("/tmp/photo-1.jpg")
.mockReturnValueOnce("/tmp/photo-2.jpg");
nodesCameraMocks.parseCameraSnapPayload
.mockReturnValueOnce({
base64: "ZmFrZQ==",
format: "jpg",
width: 800,
height: 600,
})
.mockReturnValueOnce({
base64: "YmFy",
format: "jpg",
width: 1024,
height: 768,
});
const tool = createNodesTool();
const result = await tool.execute("call-1", {
action: "photos_latest",
node: "macbook",
});
expect(result?.details).toEqual({
photos: [
{
index: 0,
path: "/tmp/photo-1.jpg",
width: 800,
height: 600,
createdAt: "now",
},
{
index: 1,
path: "/tmp/photo-2.jpg",
width: 1024,
height: 768,
},
],
media: {
mediaUrls: ["/tmp/photo-1.jpg", "/tmp/photo-2.jpg"],
},
});
expect(JSON.stringify(result?.content ?? [])).not.toContain("MEDIA:");
});
});

View File

@@ -327,7 +327,17 @@ export function createNodesTool(options?: {
});
}
const result: AgentToolResult<unknown> = { content, details };
const result: AgentToolResult<unknown> = {
content,
details: {
snaps: details,
media: {
mediaUrls: details
.map((entry) => entry.path)
.filter((path): path is string => typeof path === "string"),
},
},
};
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
}
case "photos_latest": {
@@ -401,7 +411,6 @@ export function createNodesTool(options?: {
invalidPayloadMessage: "invalid photos.latest payload",
});
content.push({ type: "text", text: `MEDIA:${filePath}` });
if (options?.modelHasVision && photo.base64) {
content.push({
type: "image",
@@ -424,7 +433,17 @@ export function createNodesTool(options?: {
});
}
const result: AgentToolResult<unknown> = { content, details };
const result: AgentToolResult<unknown> = {
content,
details: {
photos: details,
media: {
mediaUrls: details
.map((entry) => entry.path)
.filter((path): path is string => typeof path === "string"),
},
},
};
return await sanitizeToolResultImages(result, "nodes:photos_latest", imageSanitization);
}
case "camera_list":