diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index da0a9ae4abb..f46dca2136e 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -332,6 +332,31 @@ describe("loadWebMedia", () => { }, ); + it.each([ + { label: "CSV", fileName: "alternating-high.csv" }, + { label: "Markdown", fileName: "alternating-high.md" }, + ])("rejects alternating ASCII/high-byte data disguised as %s", async ({ fileName }) => { + const fakeTextFile = path.join(fixtureRoot, fileName); + // Alternating 0x41 ('A') and 0xFF — exactly 50% ASCII, 50% high bytes. + // With the old 50% threshold hasSingleByteTextShape would accept this; + // the tightened 70%/30% thresholds must reject it. + const mixed = Buffer.alloc(9000); + for (let i = 0; i < mixed.length; i += 1) { + mixed[i] = i % 2 === 0 ? 0x41 : 0xff; + } + await fs.writeFile(fakeTextFile, mixed); + await expect( + loadWebMedia(fakeTextFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + readFile: async (filePath) => await fs.readFile(filePath), + hostReadCapability: true, + }), + ).rejects.toMatchObject({ + code: "path-not-allowed", + }); + }); + it.each([ { label: "CSV", fileName: "high-bytes.csv" }, { label: "Markdown", fileName: "high-bytes.md" }, diff --git a/src/media/web-media.ts b/src/media/web-media.ts index 01713b7ba18..c42256ac8e3 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -164,7 +164,8 @@ function hasSingleByteTextShape(buffer: Buffer): boolean { } } const total = buffer.length; - return control === 0 && asciiText / total >= 0.5; + const highBytes = total - asciiText - control; + return control === 0 && asciiText / total >= 0.7 && highBytes / total <= 0.3; } function decodeHostReadText(buffer: Buffer): string | undefined {