From 86556fcd47a1f3a113d6f6343958cb7df8f87b3d Mon Sep 17 00:00:00 2001 From: Neerav Makwana <261249544+neeravmakwana@users.noreply.github.com> Date: Fri, 24 Apr 2026 21:08:34 -0400 Subject: [PATCH] fix(media): apply web fetch ssrf policy to media --- src/agents/tools/image-generate-tool.test.ts | 28 +++++++++++++++++++ src/agents/tools/image-generate-tool.ts | 5 ++++ src/agents/tools/image-tool.test.ts | 27 ++++++++++++++++++ src/agents/tools/image-tool.ts | 2 ++ src/media-understanding/apply.ts | 1 + src/media-understanding/attachments.cache.ts | 11 +++++++- .../audio-transcription-runner.ts | 8 +++--- .../media-understanding-misc.test.ts | 23 +++++++++++++++ src/media-understanding/runtime.ts | 1 + 9 files changed, 101 insertions(+), 5 deletions(-) diff --git a/src/agents/tools/image-generate-tool.test.ts b/src/agents/tools/image-generate-tool.test.ts index 1f53fe09a0e..74e2f6782e1 100644 --- a/src/agents/tools/image-generate-tool.test.ts +++ b/src/agents/tools/image-generate-tool.test.ts @@ -702,6 +702,34 @@ describe("createImageGenerateTool", () => { ); }); + it("passes web_fetch SSRF policy to remote reference images", async () => { + stubImageGenerationProviders(); + stubEditedImageFlow({ width: 1024, height: 1024 }); + const tool = requireImageGenerateTool( + createImageGenerateTool({ + config: { + agents: { + defaults: { imageGenerationModel: { primary: "google/gemini-3-pro-image-preview" } }, + }, + tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } }, + }, + workspaceDir: process.cwd(), + }), + ); + + await tool.execute("call-edit-rfc2544", { + prompt: "Use this reference.", + image: "http://198.18.0.153/reference.png", + }); + + expect(webMedia.loadWebMedia).toHaveBeenCalledWith( + "http://198.18.0.153/reference.png", + expect.objectContaining({ + ssrfPolicy: { allowRfc2544BenchmarkRange: true }, + }), + ); + }); + it("ignores non-finite mediaMaxMb when loading reference images", async () => { stubImageGenerationProviders(); stubEditedImageFlow({ width: 3200, height: 1800 }); diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index d3946ecb80c..18368e8965e 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -18,6 +18,7 @@ import type { ImageGenerationResolution, ImageGenerationSourceImage, } from "../../image-generation/types.js"; +import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js"; import { getImageMetadata } from "../../media/image-ops.js"; import { @@ -415,6 +416,7 @@ async function loadReferenceImages(params: { maxBytes?: number; workspaceDir?: string; sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null; + ssrfPolicy?: SsrFPolicy; }): Promise< Array<{ sourceImage: ImageGenerationSourceImage; @@ -491,6 +493,7 @@ async function loadReferenceImages(params: { : await loadWebMedia(resolvedPath ?? resolvedImage, { maxBytes: params.maxBytes, localRoots, + ssrfPolicy: params.ssrfPolicy, }); if (media.kind !== "image") { throw new ToolInputError(`Unsupported media type: ${media.kind}`); @@ -549,6 +552,7 @@ export function createImageGenerateTool(options?: { } const effectiveCfg = applyImageGenerationModelConfigDefaults(cfg, imageGenerationModelConfig) ?? cfg; + const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy; const sandboxConfig = options?.sandbox && options.sandbox.root.trim() ? { @@ -646,6 +650,7 @@ export function createImageGenerateTool(options?: { maxBytes: configuredMediaMaxBytes, workspaceDir: options?.workspaceDir, sandboxConfig, + ssrfPolicy: remoteMediaSsrfPolicy, }); const inputImages = loadedReferenceImages.map((entry) => entry.sourceImage); const modeCaps = diff --git a/src/agents/tools/image-tool.test.ts b/src/agents/tools/image-tool.test.ts index 0b446ef92be..716f1915945 100644 --- a/src/agents/tools/image-tool.test.ts +++ b/src/agents/tools/image-tool.test.ts @@ -1172,6 +1172,33 @@ describe("image tool implicit imageModel config", () => { }); }); + it("passes web_fetch SSRF policy to remote image references", async () => { + const fetch = vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.startsWith("http://198.18.0.153/")) { + return new Response(Buffer.from(ONE_PIXEL_PNG_B64, "base64"), { + headers: { "content-type": "image/png" }, + }); + } + return new Response( + JSON.stringify({ content: "ok", base_resp: { status_code: 0, status_msg: "" } }), + ); + }); + global.fetch = withFetchPreconnect(fetch); + vi.stubEnv("MINIMAX_API_KEY", "minimax-test"); + + await withTempAgentDir(async (agentDir) => { + const cfg: OpenClawConfig = { + ...createMinimaxImageConfig(), + tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } }, + }; + const tool = createRequiredImageTool({ config: cfg, agentDir }); + + await expectImageToolExecOk(tool, "http://198.18.0.153/reference.png"); + expect(fetch).toHaveBeenCalledWith("http://198.18.0.153/reference.png", expect.any(Object)); + }); + }); + it("sandboxes image paths like the read tool", async () => { await withTempSandboxState(async ({ agentDir, sandboxRoot }) => { await fs.writeFile(path.join(sandboxRoot, "img.png"), "fake", "utf8"); diff --git a/src/agents/tools/image-tool.ts b/src/agents/tools/image-tool.ts index ae71bb3f76a..42bb93a9f51 100644 --- a/src/agents/tools/image-tool.ts +++ b/src/agents/tools/image-tool.ts @@ -307,6 +307,7 @@ export function createImageTool(options?: { if (!imageModelConfig) { return null; } + const remoteMediaSsrfPolicy = options?.config?.tools?.web?.fetch?.ssrfPolicy; // If model has native vision, images in the prompt are auto-injected // so this tool is only needed when image wasn't provided in the prompt @@ -491,6 +492,7 @@ export function createImageTool(options?: { : await loadWebMedia(resolvedPath ?? resolvedImage, { maxBytes, localRoots: mediaLocalRoots, + ssrfPolicy: remoteMediaSsrfPolicy, }); if (media.kind !== "image") { throw new Error(`Unsupported media type: ${media.kind}`); diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts index 34e69ee8e70..2b87dfa87b0 100644 --- a/src/media-understanding/apply.ts +++ b/src/media-understanding/apply.ts @@ -495,6 +495,7 @@ export async function applyMediaUnderstanding(params: { const providerRegistry = buildProviderRegistry(params.providers, cfg); const cache = createMediaAttachmentCache(attachments, { localPathRoots: resolveMediaAttachmentLocalRoots({ cfg, ctx }), + ssrfPolicy: cfg.tools?.web?.fetch?.ssrfPolicy, }); try { diff --git a/src/media-understanding/attachments.cache.ts b/src/media-understanding/attachments.cache.ts index c5664a7ab82..d3f18ce6e7d 100644 --- a/src/media-understanding/attachments.cache.ts +++ b/src/media-understanding/attachments.cache.ts @@ -2,6 +2,7 @@ import { constants as fsConstants } from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; import { logVerbose, shouldLogVerbose } from "../globals.js"; +import type { SsrFPolicy } from "../infra/net/ssrf.js"; import { isAbortError } from "../infra/unhandled-rejections.js"; import { fetchRemoteMedia, MediaFetchError } from "../media/fetch.js"; import { isInboundPathAllowed, mergeInboundPathRoots } from "../media/inbound-path-policy.js"; @@ -51,6 +52,7 @@ function getDefaultLocalPathRoots(): readonly string[] { export type MediaAttachmentCacheOptions = { localPathRoots?: readonly string[]; includeDefaultLocalPathRoots?: boolean; + ssrfPolicy?: SsrFPolicy; }; function resolveRequestUrl(input: RequestInfo | URL): string { @@ -67,10 +69,12 @@ export class MediaAttachmentCache { private readonly entries = new Map(); private readonly attachments: MediaAttachment[]; private readonly localPathRoots: readonly string[]; + private readonly ssrfPolicy: SsrFPolicy | undefined; private canonicalLocalPathRoots?: Promise; constructor(attachments: MediaAttachment[], options?: MediaAttachmentCacheOptions) { this.attachments = attachments; + this.ssrfPolicy = options?.ssrfPolicy; this.localPathRoots = options?.includeDefaultLocalPathRoots === false ? mergeInboundPathRoots(options.localPathRoots) @@ -155,7 +159,12 @@ export class MediaAttachmentCache { try { const fetchImpl = (input: RequestInfo | URL, init?: RequestInit) => fetchWithTimeout(resolveRequestUrl(input), init ?? {}, params.timeoutMs, globalThis.fetch); - const fetched = await fetchRemoteMedia({ url, fetchImpl, maxBytes: params.maxBytes }); + const fetched = await fetchRemoteMedia({ + url, + fetchImpl, + maxBytes: params.maxBytes, + ssrfPolicy: this.ssrfPolicy, + }); entry.buffer = fetched.buffer; entry.bufferMime = entry.attachment.mime ?? diff --git a/src/media-understanding/audio-transcription-runner.ts b/src/media-understanding/audio-transcription-runner.ts index 2110cd85039..8033f4b8dc3 100644 --- a/src/media-understanding/audio-transcription-runner.ts +++ b/src/media-understanding/audio-transcription-runner.ts @@ -24,10 +24,10 @@ export async function runAudioTranscription(params: { } const providerRegistry = buildProviderRegistry(params.providers, params.cfg); - const cache = createMediaAttachmentCache( - attachments, - params.localPathRoots ? { localPathRoots: params.localPathRoots } : undefined, - ); + const cache = createMediaAttachmentCache(attachments, { + ...(params.localPathRoots ? { localPathRoots: params.localPathRoots } : {}), + ssrfPolicy: params.cfg.tools?.web?.fetch?.ssrfPolicy, + }); try { const result = await runCapability({ diff --git a/src/media-understanding/media-understanding-misc.test.ts b/src/media-understanding/media-understanding-misc.test.ts index a8414a740e4..dd845db6e06 100644 --- a/src/media-understanding/media-understanding-misc.test.ts +++ b/src/media-understanding/media-understanding-misc.test.ts @@ -67,6 +67,29 @@ describe("media understanding attachments SSRF", () => { expect(fetchSpy).not.toHaveBeenCalled(); }); + it("allows RFC2544 benchmark-range URLs only when media fetch policy opts in", async () => { + const url = "http://198.18.0.153/file.jpg"; + const deniedCache = new MediaAttachmentCache([{ index: 0, url }]); + await expect( + deniedCache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), + ).rejects.toThrow(/private|internal|blocked/i); + + const fetchSpy = vi.fn().mockResolvedValue( + new Response("image", { + headers: { "content-type": "image/jpeg" }, + }), + ); + globalThis.fetch = withFetchPreconnect(fetchSpy); + const allowedCache = new MediaAttachmentCache([{ index: 0, url }], { + ssrfPolicy: { allowRfc2544BenchmarkRange: true }, + }); + + await expect( + allowedCache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }), + ).resolves.toMatchObject({ mime: "image/jpeg" }); + expect(fetchSpy).toHaveBeenCalledTimes(1); + }); + it("reads local attachments inside configured roots", async () => { await withLocalAttachmentCache("openclaw-media-cache-allowed-", async ({ cache }) => { const result = await cache.getBuffer({ attachmentIndex: 0, maxBytes: 1024, timeoutMs: 1000 }); diff --git a/src/media-understanding/runtime.ts b/src/media-understanding/runtime.ts index 20fea18f925..3efc20844a7 100644 --- a/src/media-understanding/runtime.ts +++ b/src/media-understanding/runtime.ts @@ -68,6 +68,7 @@ export async function runMediaUnderstandingFile( const providerRegistry = buildProviderRegistry(undefined, params.cfg); const cache = createMediaAttachmentCache(attachments, { localPathRoots: [path.dirname(params.filePath)], + ssrfPolicy: params.cfg.tools?.web?.fetch?.ssrfPolicy, }); try {