fix(media): tighten sanitizeMimeType anchoring (#73229)

* fix(media): tighten sanitizeMimeType anchoring * fix(media): tighten sanitizeMimeType anchoring * fix(media): tighten sanitizeMimeType anchoring
2026-05-06 10:40:43 +00:00 · 2026-04-27 21:48:36 -07:00
parent d59f001507
commit 6fadc56802
3 changed files with 29 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/hooks: route non-delivered hook completion and error summaries to the target agent's main session instead of the default agent session, preserving multi-agent hook isolation. Fixes #24693; carries forward #68667. Thanks @abersonFAC and @bluesky6868.
 - Control UI/models: request the configured Gateway model-list view so dashboards with only `models.providers.*.models` show those configured models first instead of flooding the picker with the full built-in catalog. Fixes #65405. Thanks @wbyanclaw.
 - CLI/models: keep default-model and allowlist pickers on explicit `models.providers.*.models` entries when `models.mode` is `replace` instead of loading the full built-in catalog. Fixes #64950. Thanks @mrozentsvayg.
+- Media/security: tighten media-understanding MIME sanitization so parameterized MIME values stay end-anchored and malformed whitespace or suffix payloads are rejected before file-context handling. Fixes #9795; carries forward #68225 with related review/test context from #61016/#68456. Thanks @ymaxgit, @bluesky6868, and @shamsulalam1114.
 - Discord: own the Carbon interaction listener and hand off Discord slash/component handling asynchronously, so compaction or long session locks no longer trip `InteractionEventListener` listener timeouts. Fixes #73204. Thanks @slideshow-dingo.
 - Compaction/diagnostics: keep unknown compaction failure classifications stable while logging sanitized detail for unclassified provider errors such as missing Ollama provider adapters. Thanks @gzsiang.
 - Models/fallbacks: record first-class `model.fallback_step` trajectory events with from/to models, failure detail, chain position, and final outcome so support exports preserve the primary model failure even when a later fallback also fails. Fixes #71744. Thanks @nikolaykazakovvs-ux.
--- a/src/media-understanding/apply.sanitize-mime.test.ts
+++ b/src/media-understanding/apply.sanitize-mime.test.ts
@@ -20,6 +20,10 @@ describe("sanitizeMimeType", () => {
    expect(sanitizeMimeType("text/html; charset=utf-8")).toBe("text/html");
    expect(sanitizeMimeType("application/json;charset=utf-8")).toBe("application/json");
    expect(sanitizeMimeType("multipart/form-data; boundary=xxx")).toBe("multipart/form-data");
+    expect(sanitizeMimeType('text/plain; charset="utf-8"')).toBe("text/plain");
+    expect(sanitizeMimeType("text/plain; filename*=utf-8''file%20name.txt")).toBe("text/plain");
+    expect(sanitizeMimeType('text/plain; title="a;b"')).toBe("text/plain");
+    expect(sanitizeMimeType('text/plain; title="a\\\"b"')).toBe("text/plain");
  });

  it("rejects values with trailing junk that is not a parameter", () => {
@@ -32,6 +36,7 @@ describe("sanitizeMimeType", () => {
  it("rejects an embedded newline before the parameter separator", () => {
    expect(sanitizeMimeType("image/png\n;charset=utf-8")).toBeUndefined();
    expect(sanitizeMimeType("image/png \n; charset=utf-8")).toBeUndefined();
+    expect(sanitizeMimeType("image/png; charset=utf-8\n; boundary=xxx")).toBeUndefined();
  });

  it("rejects a bare or whitespace-only parameter section", () => {
@@ -40,6 +45,16 @@ describe("sanitizeMimeType", () => {
    expect(sanitizeMimeType("image/png;\t")).toBeUndefined();
  });

+  it("rejects malformed parameter tails", () => {
+    expect(sanitizeMimeType("image/png; charset")).toBeUndefined();
+    expect(sanitizeMimeType("image/png; charset=utf-8<script>")).toBeUndefined();
+    expect(sanitizeMimeType("image/png; charset=utf-8 garbage")).toBeUndefined();
+  });
+
+  it("rejects non-ASCII values before lowercasing the result", () => {
+    expect(sanitizeMimeType("\u212Amage/png")).toBeUndefined();
+  });
+
  it("rejects empty, whitespace, or non-string input", () => {
    expect(sanitizeMimeType("")).toBeUndefined();
    expect(sanitizeMimeType("   ")).toBeUndefined();
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@@ -12,7 +12,7 @@ import {
 import { wrapExternalContent } from "../security/external-content.js";
 import {
  normalizeLowercaseStringOrEmpty,
-  normalizeOptionalLowercaseString,
+  normalizeOptionalString,
 } from "../shared/string-coerce.js";
 import type { ActiveMediaModel } from "./active-model.types.js";
 import { resolveAttachmentKind } from "./attachments.js";
@@ -79,13 +79,22 @@ const TEXT_EXT_MIME = new Map<string, string>([
 // Reject inputs with trailing junk after the type/subtype to defend against
 // callers that compare the original string elsewhere; permit the standard
 // `;param=value` parameter tail (RFC 9110 §8.3) and discard it.
+const MIME_TYPE = String.raw`([a-z0-9!#$&^_.+-]+/[a-z0-9!#$&^_.+-]+)`;
+const HTTP_TOKEN = String.raw`[a-z0-9!#$%&'*+.^_\x60|~-]+`;
+const HTTP_QUOTED_STRING = String.raw`"(?:[\t !#-\[\]-~]|\\[\t -~])*"`;
+const MIME_PARAMETER = String.raw`[ \t]*;[ \t]*${HTTP_TOKEN}=(?:${HTTP_TOKEN}|${HTTP_QUOTED_STRING})`;
+const MIME_TYPE_WITH_OPTIONAL_PARAMS = new RegExp(
+  String.raw`^${MIME_TYPE}(?:${MIME_PARAMETER})*$`,
+  "i",
+);
+
 export function sanitizeMimeType(value?: string): string | undefined {
-  const trimmed = normalizeOptionalLowercaseString(value);
+  const trimmed = normalizeOptionalString(value);
  if (!trimmed) {
    return undefined;
  }
-  const match = trimmed.match(/^([a-z0-9!#$&^_.+-]+\/[a-z0-9!#$&^_.+-]+)(?:[ \t]*;[ \t]*\S.*)?$/);
-  return match?.[1];
+  const match = trimmed.match(MIME_TYPE_WITH_OPTIONAL_PARAMS);
+  return match?.[1]?.toLowerCase();
 }

 function resolveFileLimits(cfg: OpenClawConfig) {