From e7b87217a2113c5011545c9d4dcd174eb506fb55 Mon Sep 17 00:00:00 2001 From: volcano303 <75143900+volcano303@users.noreply.github.com> Date: Mon, 27 Apr 2026 18:13:41 +0200 Subject: [PATCH] fix(media): anchor sanitizeMimeType regex and reject trailing junk Add an end anchor to the type/subtype match and explicitly accept the RFC 9110 ;parameter tail. Inputs like "image/png")).toBeUndefined(); + expect(sanitizeMimeType("image/png\nx-injected: yes")).toBeUndefined(); + expect(sanitizeMimeType("application/json garbage data")).toBeUndefined(); + expect(sanitizeMimeType("image/png/extra")).toBeUndefined(); + }); + + it("rejects an embedded newline before the parameter separator", () => { + expect(sanitizeMimeType("image/png\n;charset=utf-8")).toBeUndefined(); + expect(sanitizeMimeType("image/png \n; charset=utf-8")).toBeUndefined(); + }); + + it("rejects a bare or whitespace-only parameter section", () => { + expect(sanitizeMimeType("image/png;")).toBeUndefined(); + expect(sanitizeMimeType("image/png; ")).toBeUndefined(); + expect(sanitizeMimeType("image/png;\t")).toBeUndefined(); + }); + + it("rejects empty, whitespace, or non-string input", () => { + expect(sanitizeMimeType("")).toBeUndefined(); + expect(sanitizeMimeType(" ")).toBeUndefined(); + expect(sanitizeMimeType(undefined)).toBeUndefined(); + }); + + it("rejects values without a subtype", () => { + expect(sanitizeMimeType("image/")).toBeUndefined(); + expect(sanitizeMimeType("/png")).toBeUndefined(); + expect(sanitizeMimeType("image")).toBeUndefined(); + }); +}); diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts index 2c8b65f4ed2..135440ebc2b 100644 --- a/src/media-understanding/apply.ts +++ b/src/media-understanding/apply.ts @@ -76,12 +76,15 @@ const TEXT_EXT_MIME = new Map([ [".xml", "application/xml"], ]); -function sanitizeMimeType(value?: string): string | undefined { +// Reject inputs with trailing junk after the type/subtype to defend against +// callers that compare the original string elsewhere; permit the standard +// `;param=value` parameter tail (RFC 9110 ยง8.3) and discard it. +export function sanitizeMimeType(value?: string): string | undefined { const trimmed = normalizeOptionalLowercaseString(value); if (!trimmed) { return undefined; } - const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)/); + const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)(?:[ \t]*;[ \t]*\S.*)?$/); return match?.[1]; }