From 0902ee723b7629b120bf01ebbaa700b8ac95232c Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 29 May 2026 15:46:27 +0200 Subject: [PATCH] fix(provider): bound Vydra and Comfy media downloads --- CHANGELOG.md | 2 +- .../comfy/image-generation-provider.test.ts | 57 ++++++++++++++++ .../comfy/music-generation-provider.test.ts | 65 +++++++++++++++++++ .../comfy/video-generation-provider.test.ts | 59 +++++++++++++++++ extensions/comfy/workflow-runtime.ts | 40 +++++++++++- .../vydra/image-generation-provider.test.ts | 22 +++++++ extensions/vydra/image-generation-provider.ts | 2 + extensions/vydra/shared.ts | 40 +++++++++--- extensions/vydra/speech-provider.test.ts | 33 ++++++++++ extensions/vydra/speech-provider.ts | 2 + .../vydra/video-generation-provider.test.ts | 23 +++++++ extensions/vydra/video-generation-provider.ts | 2 + 12 files changed, 336 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bab4ec4205..d76d4fe182d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ Docs: https://docs.openclaw.ai ### Fixes -- Providers: bound generated video downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, and Google providers, and bound generated FAL image downloads. +- Providers: bound generated media downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, Google, Vydra, and Comfy providers. - Cron: retry recurring jobs after transient model rate limits before waiting for the next scheduled slot. ## 2026.5.28 diff --git a/extensions/comfy/image-generation-provider.test.ts b/extensions/comfy/image-generation-provider.test.ts index 92d0bd53467..cff790126a9 100644 --- a/extensions/comfy/image-generation-provider.test.ts +++ b/extensions/comfy/image-generation-provider.test.ts @@ -201,6 +201,63 @@ describe("comfy image-generation provider", () => { }); }); + it("rejects generated image downloads that exceed the configured media cap", async () => { + setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "local-prompt-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "local-prompt-1": { + outputs: { + "9": { + images: [{ filename: "generated.png", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("too-large"), { + status: 200, + headers: { "content-type": "image/png" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "comfy", + model: "workflow", + prompt: "draw a lobster", + cfg: { + ...buildComfyConfig({ + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }), + agents: { defaults: { mediaMaxMb: 0.000001 } }, + } as never, + }), + ).rejects.toThrow("Comfy image output download exceeds 1 bytes"); + }); + it("reports malformed local workflow submit JSON as a provider error", async () => { setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); const release = vi.fn(async () => {}); diff --git a/extensions/comfy/music-generation-provider.test.ts b/extensions/comfy/music-generation-provider.test.ts index b19c7ba6902..8b5a0d18b28 100644 --- a/extensions/comfy/music-generation-provider.test.ts +++ b/extensions/comfy/music-generation-provider.test.ts @@ -98,4 +98,69 @@ describe("comfy music-generation provider", () => { }, }); }); + + it("rejects generated music downloads that exceed the configured media cap", async () => { + setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "music-job-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "music-job-1": { + outputs: { + "9": { + audio: [{ filename: "song.mp3", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("too-large"), { + status: 200, + headers: { "content-type": "audio/mpeg" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyMusicGenerationProvider(); + await expect( + provider.generateMusic({ + provider: "comfy", + model: "workflow", + prompt: "gentle ambient synth loop", + cfg: { + plugins: { + entries: { + comfy: { + config: { + music: { + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }, + }, + }, + }, + }, + agents: { defaults: { mediaMaxMb: 0.000001 } }, + } as never, + }), + ).rejects.toThrow("Comfy music output download exceeds 1 bytes"); + }); }); diff --git a/extensions/comfy/video-generation-provider.test.ts b/extensions/comfy/video-generation-provider.test.ts index c901a90db73..2d47eef597f 100644 --- a/extensions/comfy/video-generation-provider.test.ts +++ b/extensions/comfy/video-generation-provider.test.ts @@ -144,6 +144,65 @@ describe("comfy video-generation provider", () => { }); }); + it("rejects generated video downloads that exceed the configured media cap", async () => { + setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); + fetchWithSsrFGuardMock + .mockResolvedValueOnce({ + response: new Response(JSON.stringify({ prompt_id: "local-video-1" }), { + status: 200, + headers: { "content-type": "application/json" }, + }), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response( + JSON.stringify({ + "local-video-1": { + outputs: { + "9": { + gifs: [{ filename: "generated.mp4", subfolder: "", type: "output" }], + }, + }, + }, + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ), + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: new Response(Buffer.from("too-large"), { + status: 200, + headers: { "content-type": "video/mp4" }, + }), + release: vi.fn(async () => {}), + }); + + const provider = buildComfyVideoGenerationProvider(); + await expect( + provider.generateVideo({ + provider: "comfy", + model: "workflow", + prompt: "animate a lobster", + cfg: { + ...buildComfyConfig({ + video: { + workflow: { + "6": { inputs: { text: "" } }, + "9": { inputs: {} }, + }, + promptNodeId: "6", + outputNodeId: "9", + }, + }), + agents: { defaults: { mediaMaxMb: 0.000001 } }, + } as never, + }), + ).rejects.toThrow("Comfy video output download exceeds 1 bytes"); + }); + it("uses cloud endpoints for video workflows", async () => { mockComfyProviderApiKey(); setComfyFetchGuardForTesting(fetchWithSsrFGuardMock); diff --git a/extensions/comfy/workflow-runtime.ts b/extensions/comfy/workflow-runtime.ts index b08405072f6..03a275caf49 100644 --- a/extensions/comfy/workflow-runtime.ts +++ b/extensions/comfy/workflow-runtime.ts @@ -12,6 +12,7 @@ import { normalizeBaseUrl, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; +import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime"; import { normalizeSecretInputString, resolveSecretInputString, @@ -39,6 +40,8 @@ const DEFAULT_PROMPT_INPUT_NAME = "text"; const DEFAULT_INPUT_IMAGE_INPUT_NAME = "image"; const DEFAULT_POLL_INTERVAL_MS = 1_500; const DEFAULT_TIMEOUT_MS = 5 * 60_000; +const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024; +const DEFAULT_GENERATED_MEDIA_MAX_BYTES = 16 * 1024 * 1024; export const DEFAULT_COMFY_MODEL = "workflow"; @@ -113,6 +116,19 @@ export function setComfyFetchGuardForTesting(impl: typeof fetchWithSsrFGuard | n comfyFetchGuard = impl ?? fetchWithSsrFGuard; } +function resolveComfyGeneratedOutputMaxBytes(params: { + cfg: OpenClawConfig; + capability: ComfyCapability; +}): number { + const configured = params.cfg.agents?.defaults?.mediaMaxMb; + if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) { + return Math.floor(configured * 1024 * 1024); + } + return params.capability === "image" + ? DEFAULT_GENERATED_IMAGE_MAX_BYTES + : DEFAULT_GENERATED_MEDIA_MAX_BYTES; +} + function readConfigBoolean(config: ComfyProviderConfig, key: string): boolean | undefined { return asBoolean(config[key]); } @@ -505,6 +521,7 @@ async function downloadOutputFile(params: { file: ComfyOutputFile; mode: ComfyMode; capability: ComfyCapability; + maxBytes: number; }): Promise<{ buffer: Buffer; mimeType: string }> { const fileName = normalizeOptionalString(params.file.filename) || normalizeOptionalString(params.file.name); @@ -557,7 +574,15 @@ async function downloadOutputFile(params: { normalizeOptionalString(redirected.response.headers.get("content-type")) || "application/octet-stream"; return { - buffer: Buffer.from(await redirected.response.arrayBuffer()), + buffer: await readResponseWithLimit(redirected.response, params.maxBytes, { + chunkTimeoutMs: params.timeoutMs, + onOverflow: ({ maxBytes }) => + new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`), + onIdleTimeout: ({ chunkTimeoutMs }) => + new Error( + `Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`, + ), + }), mimeType, }; } finally { @@ -570,7 +595,13 @@ async function downloadOutputFile(params: { normalizeOptionalString(firstResponse.response.headers.get("content-type")) || "application/octet-stream"; return { - buffer: Buffer.from(await firstResponse.response.arrayBuffer()), + buffer: await readResponseWithLimit(firstResponse.response, params.maxBytes, { + chunkTimeoutMs: params.timeoutMs, + onOverflow: ({ maxBytes }) => + new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`), + onIdleTimeout: ({ chunkTimeoutMs }) => + new Error(`Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`), + }), mimeType, }; } finally { @@ -794,6 +825,10 @@ export async function runComfyWorkflow(params: { } const assets: ComfyGeneratedAsset[] = []; + const maxOutputBytes = resolveComfyGeneratedOutputMaxBytes({ + cfg: params.cfg, + capability: params.capability, + }); let assetIndex = 0; for (const output of outputFiles) { const downloaded = await downloadOutputFile({ @@ -805,6 +840,7 @@ export async function runComfyWorkflow(params: { file: output.file, mode, capability: params.capability, + maxBytes: maxOutputBytes, }); assetIndex += 1; const originalName = diff --git a/extensions/vydra/image-generation-provider.test.ts b/extensions/vydra/image-generation-provider.test.ts index 0a9860e49bd..9099b6c3db4 100644 --- a/extensions/vydra/image-generation-provider.test.ts +++ b/extensions/vydra/image-generation-provider.test.ts @@ -71,6 +71,28 @@ describe("vydra image-generation provider", () => { }); }); + it("rejects generated image downloads that exceed the configured media cap", async () => { + stubVydraApiKey(); + stubFetch( + jsonResponse({ + jobId: "job-123", + status: "completed", + imageUrl: "https://cdn.vydra.ai/generated/test.png", + }), + binaryResponse("too-large", "image/png"), + ); + + const provider = buildVydraImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "vydra", + model: "grok-imagine", + prompt: "draw a cat", + cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } }, + }), + ).rejects.toThrow("Vydra image download exceeds 1 bytes"); + }); + it("passes request SSRF policy to the image creation request", async () => { stubVydraApiKey(); const fetchMock = stubFetch( diff --git a/extensions/vydra/image-generation-provider.ts b/extensions/vydra/image-generation-provider.ts index fc71fb88626..066448e057c 100644 --- a/extensions/vydra/image-generation-provider.ts +++ b/extensions/vydra/image-generation-provider.ts @@ -6,6 +6,7 @@ import { downloadVydraAsset, extractVydraResultUrls, resolveCompletedVydraPayload, + resolveVydraGeneratedMediaMaxBytes, resolveVydraResponseJobId, resolveVydraResponseStatus, resolveVydraRequestContext, @@ -92,6 +93,7 @@ export function buildVydraImageGenerationProvider(): ImageGenerationProvider { kind: "image", timeoutMs: req.timeoutMs, fetchFn, + maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "image" }), }); return { images: [ diff --git a/extensions/vydra/shared.ts b/extensions/vydra/shared.ts index 894d4bdf732..04681915fb5 100644 --- a/extensions/vydra/shared.ts +++ b/extensions/vydra/shared.ts @@ -11,6 +11,7 @@ import { type ProviderOperationDeadline, type ProviderOperationTimeoutMs, } from "openclaw/plugin-sdk/provider-http"; +import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime"; import { normalizeOptionalLowercaseString, normalizeOptionalString, @@ -22,6 +23,9 @@ export const DEFAULT_VYDRA_VIDEO_MODEL = "veo3"; export const DEFAULT_VYDRA_SPEECH_MODEL = "elevenlabs/tts"; export const DEFAULT_VYDRA_VOICE_ID = "21m00Tcm4TlvDq8ikWAM"; const DEFAULT_HTTP_TIMEOUT_MS = 120_000; +const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024; +const DEFAULT_GENERATED_AUDIO_MAX_BYTES = 16 * 1024 * 1024; +const DEFAULT_GENERATED_VIDEO_MAX_BYTES = 16 * 1024 * 1024; const POLL_INTERVAL_MS = 2_500; const MAX_POLL_ATTEMPTS = 120; type VydraAuthStore = Parameters[0]["store"]; @@ -210,27 +214,47 @@ function resolveVydraHttpTimeoutMs(timeoutMs: ProviderOperationTimeoutMs | undef return resolved; } +export function resolveVydraGeneratedMediaMaxBytes(params: { + cfg: { agents?: { defaults?: { mediaMaxMb?: number } } }; + kind: VydraMediaKind; +}): number { + const configured = params.cfg.agents?.defaults?.mediaMaxMb; + if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) { + return Math.floor(configured * 1024 * 1024); + } + if (params.kind === "image") { + return DEFAULT_GENERATED_IMAGE_MAX_BYTES; + } + if (params.kind === "audio") { + return DEFAULT_GENERATED_AUDIO_MAX_BYTES; + } + return DEFAULT_GENERATED_VIDEO_MAX_BYTES; +} + export async function downloadVydraAsset(params: { url: string; kind: VydraMediaKind; timeoutMs?: ProviderOperationTimeoutMs; fetchFn: typeof fetch; + maxBytes: number; }): Promise<{ buffer: Buffer; mimeType: string; fileName: string }> { - const response = await fetchWithTimeout( - params.url, - { method: "GET" }, - resolveVydraHttpTimeoutMs(params.timeoutMs), - params.fetchFn, - ); + const timeoutMs = resolveVydraHttpTimeoutMs(params.timeoutMs); + const response = await fetchWithTimeout(params.url, { method: "GET" }, timeoutMs, params.fetchFn); await assertOkOrThrowHttpError(response, `Vydra ${params.kind} download failed`); const mimeType = response.headers.get("content-type")?.trim() || (params.kind === "image" ? "image/png" : params.kind === "audio" ? "audio/mpeg" : "video/mp4"); - const arrayBuffer = await response.arrayBuffer(); + const buffer = await readResponseWithLimit(response, params.maxBytes, { + chunkTimeoutMs: timeoutMs, + onOverflow: ({ maxBytes }) => + new Error(`Vydra ${params.kind} download exceeds ${maxBytes} bytes`), + onIdleTimeout: ({ chunkTimeoutMs }) => + new Error(`Vydra ${params.kind} download stalled after ${chunkTimeoutMs}ms`), + }); const extension = resolveVydraFileExtension(params.kind, mimeType); const fileStem = params.kind === "image" ? "image" : params.kind === "audio" ? "audio" : "video"; return { - buffer: Buffer.from(arrayBuffer), + buffer, mimeType, fileName: `${fileStem}-1.${extension}`, }; diff --git a/extensions/vydra/speech-provider.test.ts b/extensions/vydra/speech-provider.test.ts index 17fd9fc7478..23d208f4616 100644 --- a/extensions/vydra/speech-provider.test.ts +++ b/extensions/vydra/speech-provider.test.ts @@ -69,4 +69,37 @@ describe("vydra speech provider", () => { expect(result.fileExtension).toBe(".mp3"); expect(result.audioBuffer).toEqual(Buffer.from("mp3-data")); }); + + it("rejects generated audio downloads that exceed the configured media cap", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + new Response( + JSON.stringify({ + audioUrl: "https://cdn.vydra.ai/generated/test.mp3", + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ) + .mockResolvedValueOnce( + new Response(Buffer.from("too-large"), { + status: 200, + headers: { "Content-Type": "audio/mpeg" }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + await expect( + provider.synthesize({ + text: "OpenClaw test", + cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } } as never, + providerConfig: { apiKey: "vydra-test-key" }, + target: "audio-file", + timeoutMs: 30_000, + }), + ).rejects.toThrow("Vydra audio download exceeds 1 bytes"); + }); }); diff --git a/extensions/vydra/speech-provider.ts b/extensions/vydra/speech-provider.ts index 43e8630cdff..c60981f9f36 100644 --- a/extensions/vydra/speech-provider.ts +++ b/extensions/vydra/speech-provider.ts @@ -17,6 +17,7 @@ import { downloadVydraAsset, extractVydraResultUrls, normalizeVydraBaseUrl, + resolveVydraGeneratedMediaMaxBytes, trimToUndefined, } from "./shared.js"; @@ -137,6 +138,7 @@ export function buildVydraSpeechProvider(): SpeechProviderPlugin { kind: "audio", timeoutMs: req.timeoutMs, fetchFn, + maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "audio" }), }); return { audioBuffer: audio.buffer, diff --git a/extensions/vydra/video-generation-provider.test.ts b/extensions/vydra/video-generation-provider.test.ts index cea449e8ab9..56926ec1a1a 100644 --- a/extensions/vydra/video-generation-provider.test.ts +++ b/extensions/vydra/video-generation-provider.test.ts @@ -72,6 +72,29 @@ describe("vydra video-generation provider", () => { }); }); + it("rejects generated video downloads that exceed the configured media cap", async () => { + stubVydraApiKey(); + stubFetch( + jsonResponse({ jobId: "job-123", status: "processing" }), + jsonResponse({ + jobId: "job-123", + status: "completed", + videoUrl: "https://cdn.vydra.ai/generated/test.mp4", + }), + binaryResponse("too-large", "video/mp4"), + ); + + const provider = buildVydraVideoGenerationProvider(); + await expect( + provider.generateVideo({ + provider: "vydra", + model: "veo3", + prompt: "tiny city at sunrise", + cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } }, + }), + ).rejects.toThrow("Vydra video download exceeds 1 bytes"); + }); + it("requires a remote image url for kling", async () => { stubVydraApiKey(); vi.stubGlobal("fetch", vi.fn()); diff --git a/extensions/vydra/video-generation-provider.ts b/extensions/vydra/video-generation-provider.ts index 6513d3a6dd8..7503996a786 100644 --- a/extensions/vydra/video-generation-provider.ts +++ b/extensions/vydra/video-generation-provider.ts @@ -12,6 +12,7 @@ import { downloadVydraAsset, extractVydraResultUrls, resolveCompletedVydraPayload, + resolveVydraGeneratedMediaMaxBytes, resolveVydraResponseJobId, resolveVydraResponseStatus, resolveVydraRequestContext, @@ -131,6 +132,7 @@ export function buildVydraVideoGenerationProvider(): VideoGenerationProvider { defaultTimeoutMs: DEFAULT_VYDRA_VIDEO_TIMEOUT_MS, }), fetchFn, + maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "video" }), }); return { videos: [