mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:20:43 +00:00
fix(media): cover generation reference media ssrf policy
This commit is contained in:
committed by
Peter Steinberger
parent
86556fcd47
commit
1bb5a96577
@@ -705,6 +705,26 @@ describe("createImageGenerateTool", () => {
|
||||
it("passes web_fetch SSRF policy to remote reference images", async () => {
|
||||
stubImageGenerationProviders();
|
||||
stubEditedImageFlow({ width: 1024, height: 1024 });
|
||||
const defaultTool = requireImageGenerateTool(
|
||||
createImageGenerateTool({
|
||||
config: {
|
||||
agents: {
|
||||
defaults: { imageGenerationModel: { primary: "google/gemini-3-pro-image-preview" } },
|
||||
},
|
||||
},
|
||||
workspaceDir: process.cwd(),
|
||||
}),
|
||||
);
|
||||
|
||||
await defaultTool.execute("call-edit-rfc2544-default", {
|
||||
prompt: "Use this reference.",
|
||||
image: "http://198.18.0.153/reference.png",
|
||||
});
|
||||
expect(webMedia.loadWebMedia).toHaveBeenLastCalledWith(
|
||||
"http://198.18.0.153/reference.png",
|
||||
expect.not.objectContaining({ ssrfPolicy: expect.anything() }),
|
||||
);
|
||||
|
||||
const tool = requireImageGenerateTool(
|
||||
createImageGenerateTool({
|
||||
config: {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import * as mediaStore from "../../media/store.js";
|
||||
import * as webMedia from "../../media/web-media.js";
|
||||
import * as musicGenerationRuntime from "../../music-generation/runtime.js";
|
||||
import * as musicGenerateBackground from "./music-generate-background.js";
|
||||
import { createMusicGenerateTool } from "./music-generate-tool.js";
|
||||
@@ -92,9 +93,15 @@ const musicGenerateBackgroundMocks = vi.hoisted(() => ({
|
||||
|
||||
vi.mock("../../config/config.js", () => configMocks);
|
||||
vi.mock("../../media/store.js", () => mediaStoreMocks);
|
||||
vi.mock("../../media/web-media.js", () => ({
|
||||
loadWebMedia: vi.fn(),
|
||||
}));
|
||||
vi.mock("../../media/web-media.js", async () => {
|
||||
const actual = await vi.importActual<typeof import("../../media/web-media.js")>(
|
||||
"../../media/web-media.js",
|
||||
);
|
||||
return {
|
||||
...actual,
|
||||
loadWebMedia: vi.fn(),
|
||||
};
|
||||
});
|
||||
vi.mock("../../music-generation/runtime.js", () => musicGenerationRuntimeMocks);
|
||||
vi.mock("./music-generate-background.js", () => musicGenerateBackgroundMocks);
|
||||
vi.mock("../../tasks/runtime-internal.js", () => taskRuntimeInternalMocks);
|
||||
@@ -509,4 +516,63 @@ describe("createMusicGenerateTool", () => {
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("passes web_fetch SSRF policy when loading reference images", async () => {
|
||||
vi.spyOn(musicGenerationRuntime, "listRuntimeMusicGenerationProviders").mockReturnValue([
|
||||
{
|
||||
id: "minimax",
|
||||
defaultModel: "music-2.5+",
|
||||
models: ["music-2.5+"],
|
||||
capabilities: {
|
||||
edit: { enabled: true, maxInputImages: 1 },
|
||||
},
|
||||
generateMusic: vi.fn(async () => {
|
||||
throw new Error("not used");
|
||||
}),
|
||||
},
|
||||
]);
|
||||
vi.spyOn(webMedia, "loadWebMedia").mockResolvedValue({
|
||||
kind: "image",
|
||||
buffer: Buffer.from("image"),
|
||||
contentType: "image/png",
|
||||
});
|
||||
vi.spyOn(musicGenerationRuntime, "generateMusic").mockResolvedValue({
|
||||
provider: "minimax",
|
||||
model: "music-2.5+",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
tracks: [{ buffer: Buffer.from("music"), mimeType: "audio/mpeg" }],
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
|
||||
path: "/tmp/generated-night-drive.mp3",
|
||||
id: "generated-night-drive.mp3",
|
||||
size: 11,
|
||||
contentType: "audio/mpeg",
|
||||
});
|
||||
const tool = createMusicGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
musicGenerationModel: { primary: "minimax/music-2.5+" },
|
||||
},
|
||||
},
|
||||
tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } },
|
||||
}),
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected music_generate tool");
|
||||
}
|
||||
|
||||
await tool.execute("call-1", {
|
||||
prompt: "night-drive synthwave",
|
||||
image: "http://198.18.0.153/reference.png",
|
||||
});
|
||||
|
||||
expect(webMedia.loadWebMedia).toHaveBeenCalledWith(
|
||||
"http://198.18.0.153/reference.png",
|
||||
expect.objectContaining({
|
||||
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Type } from "typebox";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
|
||||
import {
|
||||
@@ -236,6 +237,7 @@ async function loadReferenceImages(params: {
|
||||
inputs: string[];
|
||||
workspaceDir?: string;
|
||||
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
}): Promise<
|
||||
Array<{
|
||||
sourceImage: MusicGenerationSourceImage;
|
||||
@@ -303,6 +305,7 @@ async function loadReferenceImages(params: {
|
||||
})
|
||||
: await loadWebMedia(resolvedPath ?? resolvedInput, {
|
||||
localRoots,
|
||||
ssrfPolicy: params.ssrfPolicy,
|
||||
});
|
||||
if (media.kind !== "image") {
|
||||
throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
|
||||
@@ -540,10 +543,12 @@ export function createMusicGenerateTool(options?: {
|
||||
musicGenerationModelConfig,
|
||||
modelOverride: model,
|
||||
});
|
||||
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
|
||||
const loadedReferenceImages = await loadReferenceImages({
|
||||
inputs: imageInputs,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
sandboxConfig,
|
||||
ssrfPolicy: remoteMediaSsrfPolicy,
|
||||
});
|
||||
validateMusicGenerationCapabilities({
|
||||
provider: selectedProvider,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import * as mediaStore from "../../media/store.js";
|
||||
import * as webMedia from "../../media/web-media.js";
|
||||
import * as videoGenerationRuntime from "../../video-generation/runtime.js";
|
||||
import * as videoGenerateBackground from "./video-generate-background.js";
|
||||
import { createVideoGenerateTool } from "./video-generate-tool.js";
|
||||
@@ -755,6 +756,43 @@ describe("createVideoGenerateTool", () => {
|
||||
expect(call.inputImages?.[1]?.role).toBe("last_frame");
|
||||
});
|
||||
|
||||
it("passes web_fetch SSRF policy when loading reference assets", async () => {
|
||||
mockVideoPluginProvider({
|
||||
imageToVideo: { enabled: true, maxInputImages: 1 },
|
||||
});
|
||||
vi.spyOn(webMedia, "loadWebMedia").mockResolvedValue({
|
||||
kind: "image",
|
||||
buffer: Buffer.from("image"),
|
||||
contentType: "image/png",
|
||||
});
|
||||
mockSavedVideoResult();
|
||||
const tool = createVideoGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "video-plugin/vid-v1" },
|
||||
},
|
||||
},
|
||||
tools: { web: { fetch: { ssrfPolicy: { allowRfc2544BenchmarkRange: true } } } },
|
||||
}),
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected video_generate tool");
|
||||
}
|
||||
|
||||
await tool.execute("call-1", {
|
||||
prompt: "lobster",
|
||||
image: "/tmp/reference.png",
|
||||
});
|
||||
|
||||
expect(webMedia.loadWebMedia).toHaveBeenCalledWith(
|
||||
"/tmp/reference.png",
|
||||
expect.objectContaining({
|
||||
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects audio data: URLs via the templated rejection branch", async () => {
|
||||
mockVideoPluginProvider({
|
||||
maxInputAudios: 1,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { Type } from "typebox";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
|
||||
import {
|
||||
@@ -430,6 +431,7 @@ async function loadReferenceAssets(params: {
|
||||
maxBytes?: number;
|
||||
workspaceDir?: string;
|
||||
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
|
||||
ssrfPolicy?: SsrFPolicy;
|
||||
}): Promise<
|
||||
Array<{
|
||||
sourceAsset: VideoGenerationSourceAsset;
|
||||
@@ -520,6 +522,7 @@ async function loadReferenceAssets(params: {
|
||||
: await loadWebMedia(resolvedPath ?? resolvedInput, {
|
||||
maxBytes: params.maxBytes,
|
||||
localRoots,
|
||||
ssrfPolicy: params.ssrfPolicy,
|
||||
});
|
||||
if (media.kind !== params.expectedKind) {
|
||||
throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
|
||||
@@ -810,6 +813,7 @@ export function createVideoGenerateTool(options?: {
|
||||
const action = resolveAction(args);
|
||||
const effectiveCfg =
|
||||
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
|
||||
const remoteMediaSsrfPolicy = effectiveCfg.tools?.web?.fetch?.ssrfPolicy;
|
||||
|
||||
if (action === "list") {
|
||||
return createVideoGenerateListActionResult(effectiveCfg);
|
||||
@@ -900,6 +904,7 @@ export function createVideoGenerateTool(options?: {
|
||||
expectedKind: "image",
|
||||
workspaceDir: options?.workspaceDir,
|
||||
sandboxConfig,
|
||||
ssrfPolicy: remoteMediaSsrfPolicy,
|
||||
});
|
||||
// Attach roles to the loaded image assets (positional, by index into images[]).
|
||||
for (let i = 0; i < loadedReferenceImages.length; i++) {
|
||||
@@ -913,6 +918,7 @@ export function createVideoGenerateTool(options?: {
|
||||
expectedKind: "video",
|
||||
workspaceDir: options?.workspaceDir,
|
||||
sandboxConfig,
|
||||
ssrfPolicy: remoteMediaSsrfPolicy,
|
||||
});
|
||||
for (let i = 0; i < loadedReferenceVideos.length; i++) {
|
||||
const role = videoRoles[i];
|
||||
@@ -925,6 +931,7 @@ export function createVideoGenerateTool(options?: {
|
||||
expectedKind: "audio",
|
||||
workspaceDir: options?.workspaceDir,
|
||||
sandboxConfig,
|
||||
ssrfPolicy: remoteMediaSsrfPolicy,
|
||||
});
|
||||
for (let i = 0; i < loadedReferenceAudios.length; i++) {
|
||||
const role = audioRoles[i];
|
||||
|
||||
Reference in New Issue
Block a user