diff --git a/src/media-understanding/apply.sanitize-mime.test.ts b/src/media-understanding/apply.sanitize-mime.test.ts new file mode 100644 index 00000000000..7d57dadd747 --- /dev/null +++ b/src/media-understanding/apply.sanitize-mime.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from "vitest"; +import { sanitizeMimeType } from "./apply.js"; + +describe("sanitizeMimeType", () => { + it("returns a clean MIME for a well-formed value", () => { + expect(sanitizeMimeType("image/png")).toBe("image/png"); + expect(sanitizeMimeType("application/json")).toBe("application/json"); + }); + + it("lowercases the result", () => { + expect(sanitizeMimeType("IMAGE/PNG")).toBe("image/png"); + expect(sanitizeMimeType("Application/JSON")).toBe("application/json"); + }); + + it("trims surrounding whitespace", () => { + expect(sanitizeMimeType(" image/png ")).toBe("image/png"); + }); + + it("accepts the RFC 9110 ;parameter tail and strips it", () => { + expect(sanitizeMimeType("text/html; charset=utf-8")).toBe("text/html"); + expect(sanitizeMimeType("application/json;charset=utf-8")).toBe("application/json"); + expect(sanitizeMimeType("multipart/form-data; boundary=xxx")).toBe("multipart/form-data"); + }); + + it("rejects values with trailing junk that is not a parameter", () => { + expect(sanitizeMimeType("image/png")).toBeUndefined(); + expect(sanitizeMimeType("image/png\nx-injected: yes")).toBeUndefined(); + expect(sanitizeMimeType("application/json garbage data")).toBeUndefined(); + expect(sanitizeMimeType("image/png/extra")).toBeUndefined(); + }); + + it("rejects an embedded newline before the parameter separator", () => { + expect(sanitizeMimeType("image/png\n;charset=utf-8")).toBeUndefined(); + expect(sanitizeMimeType("image/png \n; charset=utf-8")).toBeUndefined(); + }); + + it("rejects a bare or whitespace-only parameter section", () => { + expect(sanitizeMimeType("image/png;")).toBeUndefined(); + expect(sanitizeMimeType("image/png; ")).toBeUndefined(); + expect(sanitizeMimeType("image/png;\t")).toBeUndefined(); + }); + + it("rejects empty, whitespace, or non-string input", () => { + expect(sanitizeMimeType("")).toBeUndefined(); + expect(sanitizeMimeType(" ")).toBeUndefined(); + expect(sanitizeMimeType(undefined)).toBeUndefined(); + }); + + it("rejects values without a subtype", () => { + expect(sanitizeMimeType("image/")).toBeUndefined(); + expect(sanitizeMimeType("/png")).toBeUndefined(); + expect(sanitizeMimeType("image")).toBeUndefined(); + }); +}); diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts index 2c8b65f4ed2..135440ebc2b 100644 --- a/src/media-understanding/apply.ts +++ b/src/media-understanding/apply.ts @@ -76,12 +76,15 @@ const TEXT_EXT_MIME = new Map([ [".xml", "application/xml"], ]); -function sanitizeMimeType(value?: string): string | undefined { +// Reject inputs with trailing junk after the type/subtype to defend against +// callers that compare the original string elsewhere; permit the standard +// `;param=value` parameter tail (RFC 9110 ยง8.3) and discard it. +export function sanitizeMimeType(value?: string): string | undefined { const trimmed = normalizeOptionalLowercaseString(value); if (!trimmed) { return undefined; } - const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)/); + const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)(?:[ \t]*;[ \t]*\S.*)?$/); return match?.[1]; }