From a55d38ed6c5fc3b6493ebff04ac99cc261c9a1cb Mon Sep 17 00:00:00 2001 From: Chen Chia Yang Date: Wed, 15 Apr 2026 18:48:32 +0800 Subject: [PATCH] fix(media): tighten hasSingleByteTextShape to reject mixed ASCII/high-byte blobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Raise the ASCII floor to 70% and add an explicit 30% high-byte cap. The previous 50% threshold accepted alternating 0x41/0xFF buffers (50% ASCII, 0 control bytes), which decoded through Latin-1 and passed the printable-ratio gate — allowing opaque binary data to slip through as a CSV or Markdown document. Real single-byte text exports (e.g. Excel Latin-1 CSVs with accented chars like é, ñ) rarely exceed 20-25% high bytes, so the tighter thresholds do not regress legitimate input. Adds a regression test: 9000 bytes alternating 'A'/0xFF must be rejected as path-not-allowed. Co-Authored-By: Claude Sonnet 4.6 --- src/media/web-media.test.ts | 25 +++++++++++++++++++++++++ src/media/web-media.ts | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index da0a9ae4abb..f46dca2136e 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -332,6 +332,31 @@ describe("loadWebMedia", () => { }, ); + it.each([ + { label: "CSV", fileName: "alternating-high.csv" }, + { label: "Markdown", fileName: "alternating-high.md" }, + ])("rejects alternating ASCII/high-byte data disguised as %s", async ({ fileName }) => { + const fakeTextFile = path.join(fixtureRoot, fileName); + // Alternating 0x41 ('A') and 0xFF — exactly 50% ASCII, 50% high bytes. + // With the old 50% threshold hasSingleByteTextShape would accept this; + // the tightened 70%/30% thresholds must reject it. + const mixed = Buffer.alloc(9000); + for (let i = 0; i < mixed.length; i += 1) { + mixed[i] = i % 2 === 0 ? 0x41 : 0xff; + } + await fs.writeFile(fakeTextFile, mixed); + await expect( + loadWebMedia(fakeTextFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }), + ).rejects.toMatchObject({ + code: "path-not-allowed", + }); + }); + it.each([ { label: "CSV", fileName: "high-bytes.csv" }, { label: "Markdown", fileName: "high-bytes.md" }, diff --git a/src/media/web-media.ts b/src/media/web-media.ts index 01713b7ba18..c42256ac8e3 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -164,7 +164,8 @@ function hasSingleByteTextShape(buffer: Buffer): boolean { } } const total = buffer.length; - return control === 0 && asciiText / total >= 0.5; + const highBytes = total - asciiText - control; + return control === 0 && asciiText / total >= 0.7 && highBytes / total <= 0.3; } function decodeHostReadText(buffer: Buffer): string | undefined {