diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index d1c78d86740..069db3d76fd 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -185,6 +185,35 @@ describe("loadWebMedia", () => { }); }); + it("allows host-read CSV files", async () => { + const csvFile = path.join(fixtureRoot, "data.csv"); + await fs.writeFile(csvFile, "name,value\nfoo,1\nbar,2\n", "utf8"); + const result = await loadWebMedia(csvFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }); + expect(result.kind).toBe("document"); + expect(result.contentType).toBe("text/csv"); + }); + + it("rejects binary data disguised as a CSV file", async () => { + const fakeCsv = path.join(fixtureRoot, "evil.csv"); + // Write a PNG header — binary, not text + await fs.writeFile(fakeCsv, Buffer.from(TINY_PNG_BASE64, "base64")); + await expect( + loadWebMedia(fakeCsv, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }), + ).rejects.toMatchObject({ + code: "path-not-allowed", + }); + }); + it("rejects traversal-style canvas media paths before filesystem access", async () => { await expect( loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`), diff --git a/src/media/web-media.ts b/src/media/web-media.ts index 9205d6fb273..ec987bbe3fb 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -83,6 +83,7 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([ "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "text/csv", ]); const MB = 1024 * 1024; @@ -133,6 +134,13 @@ function assertHostReadMediaAllowed(params: { return; } const normalizedMime = normalizeMimeType(params.contentType); + // CSV exception: content sniffers report text/plain for CSV because CSV is structurally + // indistinguishable from plain text at the byte level. Allow it when: + // - The extension-derived MIME is text/csv (operator intent) + // - The content sniffed as text/plain (confirming valid text, not binary data) + if (sniffedMime === "text/plain" && normalizedMime === "text/csv") { + return; + } if ( params.kind === "document" && normalizedMime &&