fix(media): cover generation reference media ssrf policy

This commit is contained in:
Neerav Makwana
2026-04-24 21:19:07 -04:00
committed by Peter Steinberger
parent 86556fcd47
commit 1bb5a96577
5 changed files with 139 additions and 3 deletions

View File

@@ -705,6 +705,26 @@ describe("createImageGenerateTool", () => {
it("passes web_fetch SSRF policy to remote reference images", async () => {
stubImageGenerationProviders();
stubEditedImageFlow({ width: 1024, height: 1024 });
const defaultTool = requireImageGenerateTool(
createImageGenerateTool({
config: {
agents: {
defaults: { imageGenerationModel: { primary: "google/gemini-3-pro-image-preview" } },
},
},
workspaceDir: process.cwd(),
}),
);
await defaultTool.execute("call-edit-rfc2544-default", {
prompt: "Use this reference.",
image: "http://198.18.0.153/reference.png",
});
expect(webMedia.loadWebMedia).toHaveBeenLastCalledWith(
"http://198.18.0.153/reference.png",
expect.not.objectContaining({ ssrfPolicy: expect.anything() }),
);
const tool = requireImageGenerateTool(
createImageGenerateTool({
config: {

View File

@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import * as mediaStore from "../../media/store.js";
import * as webMedia from "../../media/web-media.js";
import * as musicGenerationRuntime from "../../music-generation/runtime.js";
import * as musicGenerateBackground from "./music-generate-background.js";
import { createMusicGenerateTool } from "./music-generate-tool.js";
@@ -92,9 +93,15 @@ const musicGenerateBackgroundMocks = vi.hoisted(() => ({
vi.mock("../../config/config.js", () => configMocks);
vi.mock("../../media/store.js", () => mediaStoreMocks);
vi.mock("../../media/web-media.js", () => ({
loadWebMedia: vi.fn(),
}));
vi.mock("../../media/web-media.js", async () => {
const actual = await vi.importActual<typeof import("../../media/web-media.js")>(
"../../media/web-media.js",
);
return {
...actual,
loadWebMedia: vi.fn(),
};
});
vi.mock("../../music-generation/runtime.js", () => musicGenerationRuntimeMocks);
vi.mock("./music-generate-background.js", () => musicGenerateBackgroundMocks);
vi.mock("../../tasks/runtime-internal.js", () => taskRuntimeInternalMocks);
@@ -509,4 +516,63 @@ describe("createMusicGenerateTool", () => {
},
});
});
it("passes web_fetch SSRF policy when loading reference images", async () => {
vi.spyOn(musicGenerationRuntime, "listRuntimeMusicGenerationProviders").mockReturnValue([
{
id: "minimax",
defaultModel: "music-2.5+",
models: ["music-2.5+"],
capabilities: {
edit: { enabled: true, maxInputImages: 1 },
},
generateMusic: vi.fn(async () => {
throw new Error("not used");
}),
},
]);
vi.spyOn(webMedia, "loadWebMedia").mockResolvedValue({
kind: "image",
buffer: Buffer.from("image"),
contentType: "image/png",
});
vi.spyOn(musicGenerationRuntime, "generateMusic").mockResolvedValue({
provider: "minimax",
model: "music-2.5+",
attempts: [],
ignoredOverrides: [],
tracks: [{ buffer: Buffer.from("music"), mimeType: "audio/mpeg" }],
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-night-drive.mp3",
id: "generated-night-drive.mp3",
size: 11,
contentType: "audio/mpeg",
});
const tool = createMusicGenerateTool({
config: asConfig({
agents: {
defaults: {
musicGenerationModel: { primary: "minimax/music-2.5+" },
},
},
tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } },
}),
});
if (!tool) {
throw new Error("expected music_generate tool");
}
await tool.execute("call-1", {
prompt: "night-drive synthwave",
image: "http://198.18.0.153/reference.png",
});
expect(webMedia.loadWebMedia).toHaveBeenCalledWith(
"http://198.18.0.153/reference.png",
expect.objectContaining({
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
}),
);
});
});

View File

@@ -2,6 +2,7 @@ import { Type } from "typebox";
import { loadConfig } from "../../config/config.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
import {
@@ -236,6 +237,7 @@ async function loadReferenceImages(params: {
inputs: string[];
workspaceDir?: string;
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
ssrfPolicy?: SsrFPolicy;
}): Promise<
Array<{
sourceImage: MusicGenerationSourceImage;
@@ -303,6 +305,7 @@ async function loadReferenceImages(params: {
})
: await loadWebMedia(resolvedPath ?? resolvedInput, {
localRoots,
ssrfPolicy: params.ssrfPolicy,
});
if (media.kind !== "image") {
throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
@@ -540,10 +543,12 @@ export function createMusicGenerateTool(options?: {
musicGenerationModelConfig,
modelOverride: model,
});
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
const loadedReferenceImages = await loadReferenceImages({
inputs: imageInputs,
workspaceDir: options?.workspaceDir,
sandboxConfig,
ssrfPolicy: remoteMediaSsrfPolicy,
});
validateMusicGenerationCapabilities({
provider: selectedProvider,

View File

@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import * as mediaStore from "../../media/store.js";
import * as webMedia from "../../media/web-media.js";
import * as videoGenerationRuntime from "../../video-generation/runtime.js";
import * as videoGenerateBackground from "./video-generate-background.js";
import { createVideoGenerateTool } from "./video-generate-tool.js";
@@ -755,6 +756,43 @@ describe("createVideoGenerateTool", () => {
expect(call.inputImages?.[1]?.role).toBe("last_frame");
});
it("passes web_fetch SSRF policy when loading reference assets", async () => {
mockVideoPluginProvider({
imageToVideo: { enabled: true, maxInputImages: 1 },
});
vi.spyOn(webMedia, "loadWebMedia").mockResolvedValue({
kind: "image",
buffer: Buffer.from("image"),
contentType: "image/png",
});
mockSavedVideoResult();
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "video-plugin/vid-v1" },
},
},
tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } },
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
await tool.execute("call-1", {
prompt: "lobster",
image: "/tmp/reference.png",
});
expect(webMedia.loadWebMedia).toHaveBeenCalledWith(
"/tmp/reference.png",
expect.objectContaining({
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
}),
);
});
it("rejects audio data: URLs via the templated rejection branch", async () => {
mockVideoPluginProvider({
maxInputAudios: 1,

View File

@@ -2,6 +2,7 @@ import { Type } from "typebox";
import { loadConfig } from "../../config/config.js";
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
import {
@@ -430,6 +431,7 @@ async function loadReferenceAssets(params: {
maxBytes?: number;
workspaceDir?: string;
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
ssrfPolicy?: SsrFPolicy;
}): Promise<
Array<{
sourceAsset: VideoGenerationSourceAsset;
@@ -520,6 +522,7 @@ async function loadReferenceAssets(params: {
: await loadWebMedia(resolvedPath ?? resolvedInput, {
maxBytes: params.maxBytes,
localRoots,
ssrfPolicy: params.ssrfPolicy,
});
if (media.kind !== params.expectedKind) {
throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
@@ -810,6 +813,7 @@ export function createVideoGenerateTool(options?: {
const action = resolveAction(args);
const effectiveCfg =
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
if (action === "list") {
return createVideoGenerateListActionResult(effectiveCfg);
@@ -900,6 +904,7 @@ export function createVideoGenerateTool(options?: {
expectedKind: "image",
workspaceDir: options?.workspaceDir,
sandboxConfig,
ssrfPolicy: remoteMediaSsrfPolicy,
});
// Attach roles to the loaded image assets (positional, by index into images[]).
for (let i = 0; i < loadedReferenceImages.length; i++) {
@@ -913,6 +918,7 @@ export function createVideoGenerateTool(options?: {
expectedKind: "video",
workspaceDir: options?.workspaceDir,
sandboxConfig,
ssrfPolicy: remoteMediaSsrfPolicy,
});
for (let i = 0; i < loadedReferenceVideos.length; i++) {
const role = videoRoles[i];
@@ -925,6 +931,7 @@ export function createVideoGenerateTool(options?: {
expectedKind: "audio",
workspaceDir: options?.workspaceDir,
sandboxConfig,
ssrfPolicy: remoteMediaSsrfPolicy,
});
for (let i = 0; i < loadedReferenceAudios.length; i++) {
const role = audioRoles[i];