From e7b87217a2113c5011545c9d4dcd174eb506fb55 Mon Sep 17 00:00:00 2001
From: volcano303 <75143900+volcano303@users.noreply.github.com>
Date: Mon, 27 Apr 2026 18:13:41 +0200
Subject: [PATCH] fix(media): anchor sanitizeMimeType regex and reject trailing
junk
Add an end anchor to the type/subtype match and explicitly accept the
RFC 9110 ;parameter tail. Inputs like "image/png")).toBeUndefined();
+ expect(sanitizeMimeType("image/png\nx-injected: yes")).toBeUndefined();
+ expect(sanitizeMimeType("application/json garbage data")).toBeUndefined();
+ expect(sanitizeMimeType("image/png/extra")).toBeUndefined();
+ });
+
+ it("rejects an embedded newline before the parameter separator", () => {
+ expect(sanitizeMimeType("image/png\n;charset=utf-8")).toBeUndefined();
+ expect(sanitizeMimeType("image/png \n; charset=utf-8")).toBeUndefined();
+ });
+
+ it("rejects a bare or whitespace-only parameter section", () => {
+ expect(sanitizeMimeType("image/png;")).toBeUndefined();
+ expect(sanitizeMimeType("image/png; ")).toBeUndefined();
+ expect(sanitizeMimeType("image/png;\t")).toBeUndefined();
+ });
+
+ it("rejects empty, whitespace, or non-string input", () => {
+ expect(sanitizeMimeType("")).toBeUndefined();
+ expect(sanitizeMimeType(" ")).toBeUndefined();
+ expect(sanitizeMimeType(undefined)).toBeUndefined();
+ });
+
+ it("rejects values without a subtype", () => {
+ expect(sanitizeMimeType("image/")).toBeUndefined();
+ expect(sanitizeMimeType("/png")).toBeUndefined();
+ expect(sanitizeMimeType("image")).toBeUndefined();
+ });
+});
diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts
index 2c8b65f4ed2..135440ebc2b 100644
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@@ -76,12 +76,15 @@ const TEXT_EXT_MIME = new Map([
[".xml", "application/xml"],
]);
-function sanitizeMimeType(value?: string): string | undefined {
+// Reject inputs with trailing junk after the type/subtype to defend against
+// callers that compare the original string elsewhere; permit the standard
+// `;param=value` parameter tail (RFC 9110 ยง8.3) and discard it.
+export function sanitizeMimeType(value?: string): string | undefined {
const trimmed = normalizeOptionalLowercaseString(value);
if (!trimmed) {
return undefined;
}
- const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)/);
+ const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)(?:[ \t]*;[ \t]*\S.*)?$/);
return match?.[1];
}