From 856c88f25fd5a51b7b13bddb63729b745efce72c Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Wed, 15 Apr 2026 17:07:39 +0800 Subject: [PATCH] fix: validate full host-read text payload --- src/media/web-media.test.ts | 24 ++++++++++++++++++++++++ src/media/web-media.ts | 28 +++++++++++++--------------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index fbfd6d062f9..a35a6db94d7 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -250,6 +250,30 @@ describe("loadWebMedia", () => { }); }); + it.each([ + { label: "CSV", fileName: "prefix-tail.csv" }, + { label: "Markdown", fileName: "prefix-tail.md" }, + ])( + "rejects %s files with a text prefix and binary tail after the old sample window", + async ({ fileName }) => { + const fakeTextFile = path.join(fixtureRoot, fileName); + const textPrefix = Buffer.from(`name,value\n${"row,1\n".repeat(1400)}`, "utf8"); + expect(textPrefix.length).toBeGreaterThan(8192); + const binaryTail = Buffer.from([0x00, 0xff, 0x10, 0x80]); + await fs.writeFile(fakeTextFile, Buffer.concat([textPrefix, binaryTail])); + await expect( + loadWebMedia(fakeTextFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }), + ).rejects.toMatchObject({ + code: "path-not-allowed", + }); + }, + ); + it("rejects traversal-style canvas media paths before filesystem access", async () => { await expect( loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`), diff --git a/src/media/web-media.ts b/src/media/web-media.ts index 9fa01e942e1..b2689164a6b 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -89,7 +89,6 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([ // file-type returns undefined (no magic bytes) for plain-text formats like CSV and // Markdown, so host-read needs an explicit "this really decodes as text" fallback. const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]); -const HOST_READ_TEXT_SAMPLE_BYTES = 8192; const MB = 1024 * 1024; const WORDISH_CHAR = /[\p{L}\p{N}]/u; @@ -155,30 +154,29 @@ function getTextStats(text: string): { printableRatio: number; wordishRatio: num return { printableRatio: printable / total, wordishRatio: wordish / total }; } -function decodeHostReadTextSample(buffer: Buffer): string | undefined { - const sample = buffer.subarray(0, Math.min(buffer.length, HOST_READ_TEXT_SAMPLE_BYTES)); - if (sample.length === 0) { +function decodeHostReadText(buffer: Buffer): string | undefined { + if (buffer.length === 0) { return ""; } - const utf16Charset = resolveUtf16Charset(sample); + const utf16Charset = resolveUtf16Charset(buffer); try { if (utf16Charset === "utf-16be") { - const evenSample = sample.length % 2 === 0 ? sample : sample.subarray(0, sample.length - 1); - if (evenSample.length === 0) { + const evenBuffer = buffer.length % 2 === 0 ? buffer : buffer.subarray(0, buffer.length - 1); + if (evenBuffer.length === 0) { return ""; } - const swapped = Buffer.alloc(evenSample.length); - for (let i = 0; i + 1 < evenSample.length; i += 2) { - swapped[i] = evenSample[i + 1]; - swapped[i + 1] = evenSample[i]; + const swapped = Buffer.alloc(evenBuffer.length); + for (let i = 0; i + 1 < evenBuffer.length; i += 2) { + swapped[i] = evenBuffer[i + 1]; + swapped[i + 1] = evenBuffer[i]; } return new TextDecoder("utf-16le").decode(swapped); } if (utf16Charset === "utf-16le") { - const evenSample = sample.length % 2 === 0 ? sample : sample.subarray(0, sample.length - 1); - return new TextDecoder("utf-16le").decode(evenSample); + const evenBuffer = buffer.length % 2 === 0 ? buffer : buffer.subarray(0, buffer.length - 1); + return new TextDecoder("utf-16le").decode(evenBuffer); } - return new TextDecoder("utf-8", { fatal: true }).decode(sample); + return new TextDecoder("utf-8", { fatal: true }).decode(buffer); } catch { return undefined; } @@ -191,7 +189,7 @@ function isValidatedHostReadText(buffer?: Buffer): boolean { if (buffer.length === 0) { return true; } - const text = decodeHostReadTextSample(buffer); + const text = decodeHostReadText(buffer); if (text === undefined) { return false; }