From df192c514cae2a834a40a4554d22f242adbc070f Mon Sep 17 00:00:00 2001 From: Agustin Rivera <31522568+eleqtrizit@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:46:20 -0700 Subject: [PATCH] fix(media): fail closed on attachment canonicalization (#66022) * fix(media): fail closed on attachment canonicalization * fix(media): clarify attachment skip failures * fix(media): preserve attachment URL fallback * fix(media): preserve getPath URL fallback on blocked local paths * changelog: note media attachment canonicalization fail-closed (#66022) --------- Co-authored-by: Devin Robison --- CHANGELOG.md | 1 + src/media-understanding/attachments.cache.ts | 145 ++++++++++++------ src/media-understanding/errors.ts | 1 + .../media-understanding-misc.test.ts | 31 +++- .../media-understanding-url-fallback.test.ts | 109 +++++++++++++ 5 files changed, 240 insertions(+), 47 deletions(-) create mode 100644 src/media-understanding/media-understanding-url-fallback.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c92e685eba..d9904bcf2a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai - Models/Codex: include `apiKey` in the codex provider catalog output so the Pi ModelRegistry validator no longer rejects the entry and silently drops all custom models from every provider in `models.json`. (#66180) Thanks @hoyyeva. - Slack/interactions: apply the configured global `allowFrom` owner allowlist to channel block-action and modal interactive events, require an expected sender id for cross-verification, and reject ambiguous channel types so interactive triggers can no longer bypass the documented allowlist intent in channels without a `users` list. Open-by-default behavior is preserved when no allowlists are configured. (#66028) Thanks @eleqtrizit. +- Media-understanding/attachments: fail closed when a local attachment path cannot be canonically resolved via `realpath`, so a `realpath` error can no longer downgrade the canonical-roots allowlist check to a non-canonical comparison; attachments that also have a URL still fall back to the network fetch path. (#66022) Thanks @eleqtrizit. ## 2026.4.14-beta.1 diff --git a/src/media-understanding/attachments.cache.ts b/src/media-understanding/attachments.cache.ts index 7f52fd462da..4e0f06d5da0 100644 --- a/src/media-understanding/attachments.cache.ts +++ b/src/media-understanding/attachments.cache.ts @@ -86,6 +86,7 @@ export class MediaAttachmentCache { timeoutMs: number; }): Promise { const entry = await this.ensureEntry(params.attachmentIndex); + const url = entry.attachment.url?.trim(); if (entry.buffer) { if (entry.buffer.length > params.maxBytes) { throw new MediaUnderstandingSkipError( @@ -102,39 +103,48 @@ export class MediaAttachmentCache { } if (entry.resolvedPath) { - const size = await this.ensureLocalStat(entry); - if (entry.resolvedPath) { - if (size !== undefined && size > params.maxBytes) { - throw new MediaUnderstandingSkipError( - "maxBytes", - `Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`, - ); - } - const { buffer, filePath } = await this.readLocalBuffer({ - attachmentIndex: params.attachmentIndex, - filePath: entry.resolvedPath, - maxBytes: params.maxBytes, - }); - entry.resolvedPath = filePath; - entry.buffer = buffer; - entry.bufferMime = - entry.bufferMime ?? - entry.attachment.mime ?? - (await detectMime({ + try { + const size = await this.ensureLocalStat(entry); + if (entry.resolvedPath) { + if (size !== undefined && size > params.maxBytes) { + throw new MediaUnderstandingSkipError( + "maxBytes", + `Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`, + ); + } + const { buffer, filePath } = await this.readLocalBuffer({ + attachmentIndex: params.attachmentIndex, + filePath: entry.resolvedPath, + maxBytes: params.maxBytes, + }); + entry.resolvedPath = filePath; + entry.buffer = buffer; + entry.bufferMime = + entry.bufferMime ?? + entry.attachment.mime ?? + (await detectMime({ + buffer, + filePath, + })); + entry.bufferFileName = path.basename(filePath) || `media-${params.attachmentIndex + 1}`; + return { buffer, - filePath, - })); - entry.bufferFileName = path.basename(filePath) || `media-${params.attachmentIndex + 1}`; - return { - buffer, - mime: entry.bufferMime, - fileName: entry.bufferFileName, - size: buffer.length, - }; + mime: entry.bufferMime, + fileName: entry.bufferFileName, + size: buffer.length, + }; + } + } catch (err) { + if ( + !(err instanceof MediaUnderstandingSkipError) || + !url || + (err.reason !== "blocked" && err.reason !== "empty") + ) { + throw err; + } } } - const url = entry.attachment.url?.trim(); if (!url) { throw new MediaUnderstandingSkipError( "empty", @@ -186,13 +196,22 @@ export class MediaAttachmentCache { const entry = await this.ensureEntry(params.attachmentIndex); if (entry.resolvedPath) { if (params.maxBytes) { - const size = await this.ensureLocalStat(entry); - if (entry.resolvedPath) { - if (size !== undefined && size > params.maxBytes) { - throw new MediaUnderstandingSkipError( - "maxBytes", - `Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`, - ); + try { + const size = await this.ensureLocalStat(entry); + if (entry.resolvedPath) { + if (size !== undefined && size > params.maxBytes) { + throw new MediaUnderstandingSkipError( + "maxBytes", + `Attachment ${params.attachmentIndex + 1} exceeds maxBytes ${params.maxBytes}`, + ); + } + } + } catch (err) { + if ( + !(err instanceof MediaUnderstandingSkipError) || + (err.reason !== "blocked" && err.reason !== "empty") + ) { + throw err; } } } @@ -279,7 +298,10 @@ export class MediaAttachmentCache { `Blocked attachment path outside allowed roots: ${entry.attachment.path ?? entry.attachment.url ?? "(unknown)"}`, ); } - return undefined; + throw new MediaUnderstandingSkipError( + "blocked", + `Attachment ${entry.attachment.index + 1} path is outside allowed roots.`, + ); } if (entry.statSize !== undefined) { return entry.statSize; @@ -289,9 +311,19 @@ export class MediaAttachmentCache { const stat = await fs.stat(currentPath); if (!stat.isFile()) { entry.resolvedPath = undefined; - return undefined; + throw new MediaUnderstandingSkipError( + "empty", + `Attachment ${entry.attachment.index + 1} path is not a regular file.`, + ); + } + const canonicalPath = await this.resolveCanonicalLocalPath(currentPath); + if (!canonicalPath) { + entry.resolvedPath = undefined; + throw new MediaUnderstandingSkipError( + "blocked", + `Attachment ${entry.attachment.index + 1} could not be canonicalized.`, + ); } - const canonicalPath = await fs.realpath(currentPath).catch(() => currentPath); const canonicalRoots = await this.getCanonicalLocalPathRoots(); if (!isInboundPathAllowed({ filePath: canonicalPath, roots: canonicalRoots })) { entry.resolvedPath = undefined; @@ -300,12 +332,18 @@ export class MediaAttachmentCache { `Blocked canonicalized attachment path outside allowed roots: ${canonicalPath}`, ); } - return undefined; + throw new MediaUnderstandingSkipError( + "blocked", + `Attachment ${entry.attachment.index + 1} path is outside allowed roots.`, + ); } entry.resolvedPath = canonicalPath; entry.statSize = stat.size; return stat.size; } catch (err) { + if (err instanceof MediaUnderstandingSkipError) { + throw err; + } entry.resolvedPath = undefined; if (shouldLogVerbose()) { logVerbose(`Failed to read attachment ${entry.attachment.index + 1}: ${String(err)}`); @@ -346,15 +384,21 @@ export class MediaAttachmentCache { if (!stat.isFile()) { throw new MediaUnderstandingSkipError( "empty", - `Attachment ${params.attachmentIndex + 1} has no path or URL.`, + `Attachment ${params.attachmentIndex + 1} path is not a regular file.`, + ); + } + const canonicalPath = await this.resolveCanonicalLocalPath(params.filePath); + if (!canonicalPath) { + throw new MediaUnderstandingSkipError( + "blocked", + `Attachment ${params.attachmentIndex + 1} could not be canonicalized.`, ); } - const canonicalPath = await fs.realpath(params.filePath).catch(() => params.filePath); const canonicalRoots = await this.getCanonicalLocalPathRoots(); if (!isInboundPathAllowed({ filePath: canonicalPath, roots: canonicalRoots })) { throw new MediaUnderstandingSkipError( - "empty", - `Attachment ${params.attachmentIndex + 1} has no path or URL.`, + "blocked", + `Attachment ${params.attachmentIndex + 1} path is outside allowed roots.`, ); } const buffer = await handle.readFile(); @@ -369,4 +413,17 @@ export class MediaAttachmentCache { await handle.close().catch(() => {}); } } + + private async resolveCanonicalLocalPath(filePath: string): Promise { + try { + return await fs.realpath(filePath); + } catch (err) { + if (shouldLogVerbose()) { + logVerbose( + `Blocked attachment path when canonicalization failed: ${filePath} (${String(err)})`, + ); + } + return undefined; + } + } } diff --git a/src/media-understanding/errors.ts b/src/media-understanding/errors.ts index 8f0b8b78aa0..77a31368178 100644 --- a/src/media-understanding/errors.ts +++ b/src/media-understanding/errors.ts @@ -3,6 +3,7 @@ export type MediaUnderstandingSkipReason = | "timeout" | "unsupported" | "empty" + | "blocked" | "tooSmall"; export class MediaUnderstandingSkipError extends Error { diff --git a/src/media-understanding/media-understanding-misc.test.ts b/src/media-understanding/media-understanding-misc.test.ts index 1f7751ce1bb..8794eb01469 100644 --- a/src/media-understanding/media-understanding-misc.test.ts +++ b/src/media-understanding/media-understanding-misc.test.ts @@ -70,7 +70,7 @@ describe("media understanding attachments SSRF", () => { await expect( cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), - ).rejects.toThrow(/has no path or URL/i); + ).rejects.toThrow(/outside allowed roots/i); }); it("blocks directory attachments even inside configured roots", async () => { @@ -85,7 +85,7 @@ describe("media understanding attachments SSRF", () => { await expect( cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), - ).rejects.toThrow(/has no path or URL/i); + ).rejects.toThrow(/not a regular file/i); }); }); @@ -106,7 +106,7 @@ describe("media understanding attachments SSRF", () => { await expect( cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), - ).rejects.toThrow(/has no path or URL/i); + ).rejects.toThrow(/outside allowed roots/i); }); }); @@ -169,4 +169,29 @@ describe("media understanding attachments SSRF", () => { expect(openedFlags).toBe(fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW); }); }); + + it("rejects local attachments when canonicalization fails", async () => { + await withTempDir({ prefix: "openclaw-media-cache-realpath-failure-" }, async (base) => { + const allowedRoot = path.join(base, "allowed"); + const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); + await fs.mkdir(allowedRoot, { recursive: true }); + await fs.writeFile(attachmentPath, "ok"); + + const cache = new MediaAttachmentCache([{ index: 0, path: attachmentPath }], { + localPathRoots: [allowedRoot], + }); + const originalRealpath = fs.realpath.bind(fs); + + vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { + if (String(candidatePath) === attachmentPath) { + throw new Error("EACCES"); + } + return await originalRealpath(candidatePath); + }); + + await expect( + cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), + ).rejects.toThrow(/could not be canonicalized/i); + }); + }); }); diff --git a/src/media-understanding/media-understanding-url-fallback.test.ts b/src/media-understanding/media-understanding-url-fallback.test.ts new file mode 100644 index 00000000000..350deb57eed --- /dev/null +++ b/src/media-understanding/media-understanding-url-fallback.test.ts @@ -0,0 +1,109 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { withTempDir } from "../test-helpers/temp-dir.js"; +import { withFetchPreconnect } from "../test-utils/fetch-mock.js"; +import { MediaAttachmentCache } from "./attachments.js"; + +const originalFetch = globalThis.fetch; + +describe("media understanding attachment URL fallback", () => { + afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it("getPath falls back to URL fetch when local path is blocked", async () => { + await withTempDir({ prefix: "openclaw-media-cache-getpath-url-fallback-" }, async (base) => { + const allowedRoot = path.join(base, "allowed"); + const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); + const fallbackUrl = "https://example.com/fallback.jpg"; + await fs.mkdir(allowedRoot, { recursive: true }); + await fs.writeFile(attachmentPath, "ok"); + + const cache = new MediaAttachmentCache( + [{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }], + { + localPathRoots: [allowedRoot], + }, + ); + const originalRealpath = fs.realpath.bind(fs); + const fetchSpy = vi.fn( + async () => + new Response(Buffer.from("fallback-buffer"), { + status: 200, + headers: { + "content-type": "image/jpeg", + }, + }), + ); + + globalThis.fetch = withFetchPreconnect(fetchSpy); + vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { + if (String(candidatePath) === attachmentPath) { + throw new Error("EACCES"); + } + return await originalRealpath(candidatePath); + }); + + const result = await cache.getPath({ + attachmentIndex: 0, + maxBytes: 1024, + timeoutMs: 1000, + }); + // getPath should fall through to getBuffer URL fetch, write a temp file, + // and return a path to that temp file instead of throwing. + expect(result.path).toBeTruthy(); + expect(fetchSpy).toHaveBeenCalledTimes(1); + expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything()); + // Clean up the temp file + if (result.cleanup) { + await result.cleanup(); + } + }); + }); + + it("falls back to URL fetch when local attachment canonicalization fails", async () => { + await withTempDir({ prefix: "openclaw-media-cache-url-fallback-" }, async (base) => { + const allowedRoot = path.join(base, "allowed"); + const attachmentPath = path.join(allowedRoot, "voice-note.m4a"); + const fallbackUrl = "https://example.com/fallback.jpg"; + await fs.mkdir(allowedRoot, { recursive: true }); + await fs.writeFile(attachmentPath, "ok"); + + const cache = new MediaAttachmentCache( + [{ index: 0, path: attachmentPath, url: fallbackUrl, mime: "image/jpeg" }], + { + localPathRoots: [allowedRoot], + }, + ); + const originalRealpath = fs.realpath.bind(fs); + const fetchSpy = vi.fn( + async () => + new Response(Buffer.from("fallback-buffer"), { + status: 200, + headers: { + "content-type": "image/jpeg", + }, + }), + ); + + globalThis.fetch = withFetchPreconnect(fetchSpy); + vi.spyOn(fs, "realpath").mockImplementation(async (candidatePath) => { + if (String(candidatePath) === attachmentPath) { + throw new Error("EACCES"); + } + return await originalRealpath(candidatePath); + }); + + const result = await cache.getBuffer({ + attachmentIndex: 0, + maxBytes: 1024, + timeoutMs: 1000, + }); + expect(result.buffer.toString()).toBe("fallback-buffer"); + expect(fetchSpy).toHaveBeenCalledTimes(1); + expect(fetchSpy).toHaveBeenCalledWith(fallbackUrl, expect.anything()); + }); + }); +});