fix(media): address Greptile review — fix dead-code exception and broken test

Two issues flagged by Greptile:

1. CSV/Markdown exception was dead code.
   file-type v22 returns undefined (not "text/plain") for plain-text buffers
   that have no binary magic bytes. The guard `sniffedMime === "text/plain"`
   was therefore always false, so the early-return never fired and CSV uploads
   continued to be rejected.

   Fix: check `!sniffedMime` (no binary signature) and add a null-byte scan
   of the first 8 KiB to rule out binary data that happens to have no known
   magic bytes. Pass buffer into assertHostReadMediaAllowed to enable this.

2. "rejects binary disguised as CSV" test used PNG bytes.
   assertHostReadMediaAllowed allows all image kinds unconditionally
   (sniffedKind === "image" → early return), so the promise resolved instead
   of rejecting. The test would have failed with "Received promise resolved".

   Fix: use ZIP magic bytes (PK\x03\x04). file-type detects application/zip,
   which is not image/audio/video, so it falls through to the final throw.
This commit is contained in:
Chen Chia Yang
2026-04-15 15:35:23 +08:00
committed by Frank Yang
parent cbc040c3ab
commit f653bcc54e
2 changed files with 19 additions and 8 deletions

View File

@@ -213,8 +213,9 @@ describe("loadWebMedia", () => {
it("rejects binary data disguised as a CSV file", async () => {
const fakeCsv = path.join(fixtureRoot, "evil.csv");
// Write a PNG header — binary, not text
await fs.writeFile(fakeCsv, Buffer.from(TINY_PNG_BASE64, "base64"));
// Write ZIP magic bytes — file-type detects application/zip (not image, not CSV),
// so it is rejected by the host-read policy rather than allowed as an image.
await fs.writeFile(fakeCsv, Buffer.from([0x50, 0x4b, 0x03, 0x04]));
await expect(
loadWebMedia(fakeCsv, {
maxBytes: 1024 * 1024,

View File

@@ -86,6 +86,9 @@ const HOST_READ_ALLOWED_DOCUMENT_MIMES = new Set([
"text/csv",
"text/markdown",
]);
// file-type returns undefined (no magic bytes) for plain-text formats like CSV and
// Markdown. These MIME types are allowed via extension + null-byte check only.
const HOST_READ_TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
const MB = 1024 * 1024;
function formatMb(bytes: number, digits = 2): string {
@@ -115,6 +118,7 @@ function assertHostReadMediaAllowed(params: {
contentType?: string;
filePath?: string;
kind: MediaKind | undefined;
buffer?: Buffer;
}): void {
const sniffedKind = kindFromMime(params.sniffedContentType);
if (sniffedKind === "image" || sniffedKind === "audio" || sniffedKind === "video") {
@@ -135,13 +139,18 @@ function assertHostReadMediaAllowed(params: {
return;
}
const normalizedMime = normalizeMimeType(params.contentType);
// CSV and Markdown exception: content sniffers report text/plain for both formats
// because they are structurally indistinguishable from plain text at the byte level.
// Allow them when:
// CSV / Markdown exception: file-type v22 returns undefined (not "text/plain") for
// plain-text buffers that have no binary magic bytes. Allow these formats when:
// - sniffedMime is undefined (no binary signature detected by file-type)
// - The extension-derived MIME is text/csv or text/markdown (operator intent)
// - The content sniffed as text/plain (confirming valid text, not binary data)
const TEXT_PLAIN_ALIASES = new Set(["text/csv", "text/markdown"]);
if (sniffedMime === "text/plain" && normalizedMime && TEXT_PLAIN_ALIASES.has(normalizedMime)) {
// - The buffer contains no null bytes (rules out binary data with no known signature)
if (
!sniffedMime &&
normalizedMime &&
HOST_READ_TEXT_PLAIN_ALIASES.has(normalizedMime) &&
params.buffer &&
!params.buffer.subarray(0, 8192).includes(0x00)
) {
return;
}
if (
@@ -403,6 +412,7 @@ async function loadWebMediaInternal(
contentType: mime,
filePath: mediaUrl,
kind,
buffer: data,
});
}
let fileName = path.basename(mediaUrl) || undefined;