diff --git a/src/agents/tool-images.e2e.test.ts b/src/agents/tool-images.e2e.test.ts index e5dff0a9e91..be93b6440f9 100644 --- a/src/agents/tool-images.e2e.test.ts +++ b/src/agents/tool-images.e2e.test.ts @@ -2,6 +2,106 @@ import sharp from "sharp"; import { describe, expect, it } from "vitest"; import { sanitizeContentBlocksImages, sanitizeImageBlocks } from "./tool-images.js"; +describe("base64 validation", () => { + it("rejects invalid base64 characters and replaces with error text", async () => { + const blocks = [ + { + type: "image" as const, + data: "not-valid-base64!!!@#$%", + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + expect(out.length).toBe(1); + expect(out[0].type).toBe("text"); + if (out[0].type === "text") { + expect(out[0].text).toContain("omitted image payload"); + expect(out[0].text).toContain("invalid"); + } + }); + + it("strips data URL prefix and processes valid base64", async () => { + // Create a small valid image + const jpeg = await sharp({ + create: { + width: 10, + height: 10, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .jpeg() + .toBuffer(); + + const base64 = jpeg.toString("base64"); + const dataUrl = `data:image/jpeg;base64,${base64}`; + + const blocks = [ + { + type: "image" as const, + data: dataUrl, + mimeType: "image/jpeg", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + expect(out.length).toBe(1); + expect(out[0].type).toBe("image"); + }); + + it("rejects base64 with invalid padding", async () => { + const blocks = [ + { + type: "image" as const, + data: "SGVsbG8===", // too many padding chars + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + expect(out.length).toBe(1); + expect(out[0].type).toBe("text"); + if (out[0].type === "text") { + expect(out[0].text).toContain("omitted image payload"); + } + }); + + it("rejects base64 with invalid length", async () => { + const blocks = [ + { + type: "image" as const, + data: "AAAAA", // length 5 without padding is invalid (remainder 1) + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + expect(out.length).toBe(1); + expect(out[0].type).toBe("text"); + if (out[0].type === "text") { + expect(out[0].text).toContain("omitted image payload"); + } + }); + + it("handles empty base64 data gracefully", async () => { + const blocks = [ + { + type: "image" as const, + data: " ", + mimeType: "image/png", + }, + ]; + + const out = await sanitizeContentBlocksImages(blocks, "test"); + expect(out.length).toBe(1); + expect(out[0].type).toBe("text"); + if (out[0].type === "text") { + expect(out[0].text).toContain("omitted empty image payload"); + } + }); +}); + describe("tool image sanitizing", () => { it("shrinks oversized images to <=5MB", async () => { const width = 2800; diff --git a/src/agents/tool-images.ts b/src/agents/tool-images.ts index 897c82ef4c2..e3c6476a326 100644 --- a/src/agents/tool-images.ts +++ b/src/agents/tool-images.ts @@ -17,6 +17,54 @@ const MAX_IMAGE_DIMENSION_PX = 2000; const MAX_IMAGE_BYTES = 5 * 1024 * 1024; const log = createSubsystemLogger("agents/tool-images"); +// Valid base64 character set (standard + URL-safe variants) +const BASE64_REGEX = /^[A-Za-z0-9+/=_-]*$/; + +/** + * Validates and normalizes base64 image data before processing. + * - Strips data URL prefixes (e.g., "data:image/png;base64,") + * - Validates base64 character set + * - Ensures the string is not empty after trimming + * + * Returns the cleaned base64 string or throws an error if invalid. + */ +function validateAndNormalizeBase64(base64: string): string { + let data = base64.trim(); + + // Strip data URL prefix if present (e.g., "data:image/png;base64,...") + const dataUrlMatch = data.match(/^data:[^;]+;base64,(.*)$/i); + if (dataUrlMatch) { + data = dataUrlMatch[1].trim(); + } + + if (!data) { + throw new Error("Base64 data is empty"); + } + + // Check for valid base64 characters + // Node's Buffer.from silently ignores invalid chars, but Anthropic API rejects them + if (!BASE64_REGEX.test(data)) { + throw new Error("Base64 data contains invalid characters"); + } + + // Validate base64 padding (should be 0, 1, or 2 '=' chars at end) + const paddingMatch = data.match(/=+$/); + if (paddingMatch && paddingMatch[0].length > 2) { + throw new Error("Base64 data has invalid padding"); + } + + // Check that length is valid for base64 (must be multiple of 4 when padded) + // Remove padding for length check, then verify + const withoutPadding = data.replace(/=+$/, ""); + const remainder = withoutPadding.length % 4; + if (remainder === 1) { + // A single char remainder is always invalid in base64 + throw new Error("Base64 data has invalid length"); + } + + return data; +} + function isImageBlock(block: unknown): block is ImageContentBlock { if (!block || typeof block !== "object") { return false; @@ -160,8 +208,8 @@ export async function sanitizeContentBlocksImages( continue; } - const data = block.data.trim(); - if (!data) { + const rawData = block.data.trim(); + if (!rawData) { out.push({ type: "text", text: `[${label}] omitted empty image payload`, @@ -170,6 +218,11 @@ export async function sanitizeContentBlocksImages( } try { + // Validate and normalize base64 before processing + // This catches invalid base64 that Buffer.from() would silently accept + // but Anthropic's API would reject, preventing permanent session corruption + const data = validateAndNormalizeBase64(rawData); + const inferredMimeType = inferMimeTypeFromBase64(data); const mimeType = inferredMimeType ?? block.mimeType; const resized = await resizeImageBase64IfNeeded({