From ac89e9d964d33193fae41e6512e2d72e75940cb9 Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Wed, 15 Apr 2026 18:28:55 +0800 Subject: [PATCH] fix: allow single-byte host-read text --- src/media/web-media.test.ts | 51 +++++++++++++++++++++++++++++++++++++ src/media/web-media.ts | 25 +++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index dc0d7f7d254..da0a9ae4abb 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -303,6 +303,57 @@ describe("loadWebMedia", () => { }, ); + it.each([ + { + label: "CSV", + fileName: "legacy.csv", + contentType: "text/csv", + body: Buffer.from("caf\xe9,ni\xf1o\n", "latin1"), + }, + { + label: "Markdown", + fileName: "legacy.md", + contentType: "text/markdown", + body: Buffer.from("R\xe9sum\xe9\nni\xf1o\n", "latin1"), + }, + ])( + "loads valid single-byte encoded %s files when host-read capability is enabled", + async ({ fileName, contentType, body }) => { + const textFile = path.join(fixtureRoot, fileName); + await fs.writeFile(textFile, body); + const result = await loadWebMedia(textFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }); + expect(result.kind).toBe("document"); + expect(result.contentType).toBe(contentType); + }, + ); + + it.each([ + { label: "CSV", fileName: "high-bytes.csv" }, + { label: "Markdown", fileName: "high-bytes.md" }, + ])("rejects high-byte opaque data disguised as %s", async ({ fileName }) => { + const fakeTextFile = path.join(fixtureRoot, fileName); + const opaqueBinary = Buffer.alloc(9000); + for (let i = 0; i < opaqueBinary.length; i += 1) { + opaqueBinary[i] = 0xa0 + (i % 96); + } + await fs.writeFile(fakeTextFile, opaqueBinary); + await expect( + loadWebMedia(fakeTextFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }), + ).rejects.toMatchObject({ + code: "path-not-allowed", + }); + }); + it("rejects traversal-style canvas media paths before filesystem access", async () => { await expect( loadWebMedia(`${CANVAS_HOST_PATH}/documents/../collection.media/tiny.png`), diff --git a/src/media/web-media.ts b/src/media/web-media.ts index 008e80a8a59..01713b7ba18 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -148,6 +148,25 @@ function getTextStats(text: string): { printableRatio: number } { return { printableRatio: printable / total }; } +function hasSingleByteTextShape(buffer: Buffer): boolean { + if (buffer.length === 0) { + return true; + } + let asciiText = 0; + let control = 0; + for (const byte of buffer) { + if (byte === 9 || byte === 10 || byte === 13 || (byte >= 0x20 && byte <= 0x7e)) { + asciiText += 1; + continue; + } + if (byte < 0x20 || byte === 0x7f) { + control += 1; + } + } + const total = buffer.length; + return control === 0 && asciiText / total >= 0.5; +} + function decodeHostReadText(buffer: Buffer): string | undefined { if (buffer.length === 0) { return ""; @@ -172,7 +191,11 @@ function decodeHostReadText(buffer: Buffer): string | undefined { } return new TextDecoder("utf-8", { fatal: true }).decode(buffer); } catch { - return undefined; + if (!hasSingleByteTextShape(buffer)) { + return undefined; + } + // WHATWG latin1 decodes common Excel-style single-byte exports via Windows-1252 mapping. + return new TextDecoder("latin1").decode(buffer); } }