diff --git a/src/media-understanding/apply.sanitize-mime.test.ts b/src/media-understanding/apply.sanitize-mime.test.ts
new file mode 100644
index 00000000000..7d57dadd747
--- /dev/null
+++ b/src/media-understanding/apply.sanitize-mime.test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, it } from "vitest";
+import { sanitizeMimeType } from "./apply.js";
+
+describe("sanitizeMimeType", () => {
+ it("returns a clean MIME for a well-formed value", () => {
+ expect(sanitizeMimeType("image/png")).toBe("image/png");
+ expect(sanitizeMimeType("application/json")).toBe("application/json");
+ });
+
+ it("lowercases the result", () => {
+ expect(sanitizeMimeType("IMAGE/PNG")).toBe("image/png");
+ expect(sanitizeMimeType("Application/JSON")).toBe("application/json");
+ });
+
+ it("trims surrounding whitespace", () => {
+ expect(sanitizeMimeType(" image/png ")).toBe("image/png");
+ });
+
+ it("accepts the RFC 9110 ;parameter tail and strips it", () => {
+ expect(sanitizeMimeType("text/html; charset=utf-8")).toBe("text/html");
+ expect(sanitizeMimeType("application/json;charset=utf-8")).toBe("application/json");
+ expect(sanitizeMimeType("multipart/form-data; boundary=xxx")).toBe("multipart/form-data");
+ });
+
+ it("rejects values with trailing junk that is not a parameter", () => {
+ expect(sanitizeMimeType("image/png")).toBeUndefined();
+ expect(sanitizeMimeType("image/png\nx-injected: yes")).toBeUndefined();
+ expect(sanitizeMimeType("application/json garbage data")).toBeUndefined();
+ expect(sanitizeMimeType("image/png/extra")).toBeUndefined();
+ });
+
+ it("rejects an embedded newline before the parameter separator", () => {
+ expect(sanitizeMimeType("image/png\n;charset=utf-8")).toBeUndefined();
+ expect(sanitizeMimeType("image/png \n; charset=utf-8")).toBeUndefined();
+ });
+
+ it("rejects a bare or whitespace-only parameter section", () => {
+ expect(sanitizeMimeType("image/png;")).toBeUndefined();
+ expect(sanitizeMimeType("image/png; ")).toBeUndefined();
+ expect(sanitizeMimeType("image/png;\t")).toBeUndefined();
+ });
+
+ it("rejects empty, whitespace, or non-string input", () => {
+ expect(sanitizeMimeType("")).toBeUndefined();
+ expect(sanitizeMimeType(" ")).toBeUndefined();
+ expect(sanitizeMimeType(undefined)).toBeUndefined();
+ });
+
+ it("rejects values without a subtype", () => {
+ expect(sanitizeMimeType("image/")).toBeUndefined();
+ expect(sanitizeMimeType("/png")).toBeUndefined();
+ expect(sanitizeMimeType("image")).toBeUndefined();
+ });
+});
diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts
index 2c8b65f4ed2..135440ebc2b 100644
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@@ -76,12 +76,15 @@ const TEXT_EXT_MIME = new Map([
[".xml", "application/xml"],
]);
-function sanitizeMimeType(value?: string): string | undefined {
+// Reject inputs with trailing junk after the type/subtype to defend against
+// callers that compare the original string elsewhere; permit the standard
+// `;param=value` parameter tail (RFC 9110 ยง8.3) and discard it.
+export function sanitizeMimeType(value?: string): string | undefined {
const trimmed = normalizeOptionalLowercaseString(value);
if (!trimmed) {
return undefined;
}
- const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)/);
+ const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)(?:[ \t]*;[ \t]*\S.*)?$/);
return match?.[1];
}