From f653bcc54e1ebdc66f10ccb0fa5cc5c922f306c5 Mon Sep 17 00:00:00 2001 From: Chen Chia Yang Date: Wed, 15 Apr 2026 15:35:23 +0800 Subject: [PATCH] =?UTF-8?q?fix(media):=20address=20Greptile=20review=20?= =?UTF-8?q?=E2=80=94=20fix=20dead-code=20exception=20and=20broken=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues flagged by Greptile: 1. CSV/Markdown exception was dead code. file-type v22 returns undefined (not "text/plain") for plain-text buffers that have no binary magic bytes. The guard `sniffedMime === "text/plain"` was therefore always false, so the early-return never fired and CSV uploads continued to be rejected. Fix: check `!sniffedMime` (no binary signature) and add a null-byte scan of the first 8 KiB to rule out binary data that happens to have no known magic bytes. Pass buffer into assertHostReadMediaAllowed to enable this. 2. "rejects binary disguised as CSV" test used PNG bytes. assertHostReadMediaAllowed allows all image kinds unconditionally (sniffedKind === "image" → early return), so the promise resolved instead of rejecting. The test would have failed with "Received promise resolved". Fix: use ZIP magic bytes (PK\x03\x04). file-type detects application/zip, which is not image/audio/video, so it falls through to the final throw. --- src/media/web-media.test.ts | 5 +++-- src/media/web-media.ts | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/media/web-media.test.ts b/src/media/web-media.test.ts index 94376d2a4f5..213c43354e7 100644 --- a/src/media/web-media.test.ts +++ b/src/media/web-media.test.ts @@ -213,8 +213,9 @@ describe("loadWebMedia", () => { it("rejects binary data disguised as a CSV file", async () => { const fakeCsv = path.join(fixtureRoot, "evil.csv"); - // Write a PNG header — binary, not text - await fs.writeFile(fakeCsv, Buffer.from(TINY_PNG_BASE64, "base64")); + // Write ZIP magic bytes — file-type detects application/zip (not image, not CSV), + // so it is rejected by the host-read policy rather than allowed as an image. + await fs.writeFile(fakeCsv, Buffer.from([0x50, 0x4b, 0x03, 0x04])); await expect( loadWebMedia(fakeCsv, { maxBytes: 1024 * 1024, diff --git a/src/media/web-media.ts b/src/media/web-media.ts index c7a9c203efc..244f31500ea 100644 --- a/src/media/web-media.ts +++ b/src/media/web-media.ts @@ -86,6 +86,9 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([ "text/csv", "text/markdown", ]); +// file-type returns undefined (no magic bytes) for plain-text formats like CSV and +// Markdown. These MIME types are allowed via extension + null-byte check only. +const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]); const MB = 1024 * 1024; function formatMb(bytes: number, digits = 2): string { @@ -115,6 +118,7 @@ function assertHostReadMediaAllowed(params: { contentType?: string; filePath?: string; kind: MediaKind | undefined; + buffer?: Buffer; }): void { const sniffedKind = kindFromMime(params.sniffedContentType); if (sniffedKind === "image" || sniffedKind === "audio" || sniffedKind === "video") { @@ -135,13 +139,18 @@ function assertHostReadMediaAllowed(params: { return; } const normalizedMime = normalizeMimeType(params.contentType); - // CSV and Markdown exception: content sniffers report text/plain for both formats - // because they are structurally indistinguishable from plain text at the byte level. - // Allow them when: + // CSV / Markdown exception: file-type v22 returns undefined (not "text/plain") for + // plain-text buffers that have no binary magic bytes. Allow these formats when: + // - sniffedMime is undefined (no binary signature detected by file-type) // - The extension-derived MIME is text/csv or text/markdown (operator intent) - // - The content sniffed as text/plain (confirming valid text, not binary data) - const TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]); - if (sniffedMime === "text/plain" && normalizedMime && TEXT_PLAIN_ALIASES.has(normalizedMime)) { + // - The buffer contains no null bytes (rules out binary data with no known signature) + if ( + !sniffedMime && + normalizedMime && + HOST_READ_TEXT_PLAIN_ALIASES.has(normalizedMime) && + params.buffer && + !params.buffer.subarray(0, 8192).includes(0x00) + ) { return; } if ( @@ -403,6 +412,7 @@ async function loadWebMediaInternal( contentType: mime, filePath: mediaUrl, kind, + buffer: data, }); } let fileName = path.basename(mediaUrl) || undefined;