fix(provider): bound Vydra and Comfy media downloads

This commit is contained in:
Vincent Koc
2026-05-29 15:46:27 +02:00
parent c093e4508d
commit 0902ee723b
12 changed files with 336 additions and 11 deletions

View File

@@ -6,7 +6,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Providers: bound generated video downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, and Google providers, and bound generated FAL image downloads.
- Providers: bound generated media downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, Google, Vydra, and Comfy providers.
- Cron: retry recurring jobs after transient model rate limits before waiting for the next scheduled slot.
## 2026.5.28

View File

@@ -201,6 +201,63 @@ describe("comfy image-generation provider", () => {
});
});
it("rejects generated image downloads that exceed the configured media cap", async () => {
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
fetchWithSsrFGuardMock
.mockResolvedValueOnce({
response: new Response(JSON.stringify({ prompt_id: "local-prompt-1" }), {
status: 200,
headers: { "content-type": "application/json" },
}),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
"local-prompt-1": {
outputs: {
"9": {
images: [{ filename: "generated.png", subfolder: "", type: "output" }],
},
},
},
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(Buffer.from("too-large"), {
status: 200,
headers: { "content-type": "image/png" },
}),
release: vi.fn(async () => {}),
});
const provider = buildComfyImageGenerationProvider();
await expect(
provider.generateImage({
provider: "comfy",
model: "workflow",
prompt: "draw a lobster",
cfg: {
...buildComfyConfig({
workflow: {
"6": { inputs: { text: "" } },
"9": { inputs: {} },
},
promptNodeId: "6",
outputNodeId: "9",
}),
agents: { defaults: { mediaMaxMb: 0.000001 } },
} as never,
}),
).rejects.toThrow("Comfy image output download exceeds 1 bytes");
});
it("reports malformed local workflow submit JSON as a provider error", async () => {
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
const release = vi.fn(async () => {});

View File

@@ -98,4 +98,69 @@ describe("comfy music-generation provider", () => {
},
});
});
it("rejects generated music downloads that exceed the configured media cap", async () => {
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
fetchWithSsrFGuardMock
.mockResolvedValueOnce({
response: new Response(JSON.stringify({ prompt_id: "music-job-1" }), {
status: 200,
headers: { "content-type": "application/json" },
}),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
"music-job-1": {
outputs: {
"9": {
audio: [{ filename: "song.mp3", subfolder: "", type: "output" }],
},
},
},
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(Buffer.from("too-large"), {
status: 200,
headers: { "content-type": "audio/mpeg" },
}),
release: vi.fn(async () => {}),
});
const provider = buildComfyMusicGenerationProvider();
await expect(
provider.generateMusic({
provider: "comfy",
model: "workflow",
prompt: "gentle ambient synth loop",
cfg: {
plugins: {
entries: {
comfy: {
config: {
music: {
workflow: {
"6": { inputs: { text: "" } },
"9": { inputs: {} },
},
promptNodeId: "6",
outputNodeId: "9",
},
},
},
},
},
agents: { defaults: { mediaMaxMb: 0.000001 } },
} as never,
}),
).rejects.toThrow("Comfy music output download exceeds 1 bytes");
});
});

View File

@@ -144,6 +144,65 @@ describe("comfy video-generation provider", () => {
});
});
it("rejects generated video downloads that exceed the configured media cap", async () => {
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
fetchWithSsrFGuardMock
.mockResolvedValueOnce({
response: new Response(JSON.stringify({ prompt_id: "local-video-1" }), {
status: 200,
headers: { "content-type": "application/json" },
}),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(
JSON.stringify({
"local-video-1": {
outputs: {
"9": {
gifs: [{ filename: "generated.mp4", subfolder: "", type: "output" }],
},
},
},
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
),
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: new Response(Buffer.from("too-large"), {
status: 200,
headers: { "content-type": "video/mp4" },
}),
release: vi.fn(async () => {}),
});
const provider = buildComfyVideoGenerationProvider();
await expect(
provider.generateVideo({
provider: "comfy",
model: "workflow",
prompt: "animate a lobster",
cfg: {
...buildComfyConfig({
video: {
workflow: {
"6": { inputs: { text: "" } },
"9": { inputs: {} },
},
promptNodeId: "6",
outputNodeId: "9",
},
}),
agents: { defaults: { mediaMaxMb: 0.000001 } },
} as never,
}),
).rejects.toThrow("Comfy video output download exceeds 1 bytes");
});
it("uses cloud endpoints for video workflows", async () => {
mockComfyProviderApiKey();
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);

View File

@@ -12,6 +12,7 @@ import {
normalizeBaseUrl,
resolveProviderHttpRequestConfig,
} from "openclaw/plugin-sdk/provider-http";
import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime";
import {
normalizeSecretInputString,
resolveSecretInputString,
@@ -39,6 +40,8 @@ const DEFAULT_PROMPT_INPUT_NAME = "text";
const DEFAULT_INPUT_IMAGE_INPUT_NAME = "image";
const DEFAULT_POLL_INTERVAL_MS = 1_500;
const DEFAULT_TIMEOUT_MS = 5 * 60_000;
const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024;
const DEFAULT_GENERATED_MEDIA_MAX_BYTES = 16 * 1024 * 1024;
export const DEFAULT_COMFY_MODEL = "workflow";
@@ -113,6 +116,19 @@ export function setComfyFetchGuardForTesting(impl: typeof fetchWithSsrFGuard | n
comfyFetchGuard = impl ?? fetchWithSsrFGuard;
}
function resolveComfyGeneratedOutputMaxBytes(params: {
cfg: OpenClawConfig;
capability: ComfyCapability;
}): number {
const configured = params.cfg.agents?.defaults?.mediaMaxMb;
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
return Math.floor(configured * 1024 * 1024);
}
return params.capability === "image"
? DEFAULT_GENERATED_IMAGE_MAX_BYTES
: DEFAULT_GENERATED_MEDIA_MAX_BYTES;
}
function readConfigBoolean(config: ComfyProviderConfig, key: string): boolean | undefined {
return asBoolean(config[key]);
}
@@ -505,6 +521,7 @@ async function downloadOutputFile(params: {
file: ComfyOutputFile;
mode: ComfyMode;
capability: ComfyCapability;
maxBytes: number;
}): Promise<{ buffer: Buffer; mimeType: string }> {
const fileName =
normalizeOptionalString(params.file.filename) || normalizeOptionalString(params.file.name);
@@ -557,7 +574,15 @@ async function downloadOutputFile(params: {
normalizeOptionalString(redirected.response.headers.get("content-type")) ||
"application/octet-stream";
return {
buffer: Buffer.from(await redirected.response.arrayBuffer()),
buffer: await readResponseWithLimit(redirected.response, params.maxBytes, {
chunkTimeoutMs: params.timeoutMs,
onOverflow: ({ maxBytes }) =>
new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`),
onIdleTimeout: ({ chunkTimeoutMs }) =>
new Error(
`Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`,
),
}),
mimeType,
};
} finally {
@@ -570,7 +595,13 @@ async function downloadOutputFile(params: {
normalizeOptionalString(firstResponse.response.headers.get("content-type")) ||
"application/octet-stream";
return {
buffer: Buffer.from(await firstResponse.response.arrayBuffer()),
buffer: await readResponseWithLimit(firstResponse.response, params.maxBytes, {
chunkTimeoutMs: params.timeoutMs,
onOverflow: ({ maxBytes }) =>
new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`),
onIdleTimeout: ({ chunkTimeoutMs }) =>
new Error(`Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`),
}),
mimeType,
};
} finally {
@@ -794,6 +825,10 @@ export async function runComfyWorkflow(params: {
}
const assets: ComfyGeneratedAsset[] = [];
const maxOutputBytes = resolveComfyGeneratedOutputMaxBytes({
cfg: params.cfg,
capability: params.capability,
});
let assetIndex = 0;
for (const output of outputFiles) {
const downloaded = await downloadOutputFile({
@@ -805,6 +840,7 @@ export async function runComfyWorkflow(params: {
file: output.file,
mode,
capability: params.capability,
maxBytes: maxOutputBytes,
});
assetIndex += 1;
const originalName =

View File

@@ -71,6 +71,28 @@ describe("vydra image-generation provider", () => {
});
});
it("rejects generated image downloads that exceed the configured media cap", async () => {
stubVydraApiKey();
stubFetch(
jsonResponse({
jobId: "job-123",
status: "completed",
imageUrl: "https://cdn.vydra.ai/generated/test.png",
}),
binaryResponse("too-large", "image/png"),
);
const provider = buildVydraImageGenerationProvider();
await expect(
provider.generateImage({
provider: "vydra",
model: "grok-imagine",
prompt: "draw a cat",
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } },
}),
).rejects.toThrow("Vydra image download exceeds 1 bytes");
});
it("passes request SSRF policy to the image creation request", async () => {
stubVydraApiKey();
const fetchMock = stubFetch(

View File

@@ -6,6 +6,7 @@ import {
downloadVydraAsset,
extractVydraResultUrls,
resolveCompletedVydraPayload,
resolveVydraGeneratedMediaMaxBytes,
resolveVydraResponseJobId,
resolveVydraResponseStatus,
resolveVydraRequestContext,
@@ -92,6 +93,7 @@ export function buildVydraImageGenerationProvider(): ImageGenerationProvider {
kind: "image",
timeoutMs: req.timeoutMs,
fetchFn,
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "image" }),
});
return {
images: [

View File

@@ -11,6 +11,7 @@ import {
type ProviderOperationDeadline,
type ProviderOperationTimeoutMs,
} from "openclaw/plugin-sdk/provider-http";
import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
@@ -22,6 +23,9 @@ export const DEFAULT_VYDRA_VIDEO_MODEL = "veo3";
export const DEFAULT_VYDRA_SPEECH_MODEL = "elevenlabs/tts";
export const DEFAULT_VYDRA_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
const DEFAULT_HTTP_TIMEOUT_MS = 120_000;
const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024;
const DEFAULT_GENERATED_AUDIO_MAX_BYTES = 16 * 1024 * 1024;
const DEFAULT_GENERATED_VIDEO_MAX_BYTES = 16 * 1024 * 1024;
const POLL_INTERVAL_MS = 2_500;
const MAX_POLL_ATTEMPTS = 120;
type VydraAuthStore = Parameters<typeof resolveApiKeyForProvider>[0]["store"];
@@ -210,27 +214,47 @@ function resolveVydraHttpTimeoutMs(timeoutMs: ProviderOperationTimeoutMs | undef
return resolved;
}
export function resolveVydraGeneratedMediaMaxBytes(params: {
cfg: { agents?: { defaults?: { mediaMaxMb?: number } } };
kind: VydraMediaKind;
}): number {
const configured = params.cfg.agents?.defaults?.mediaMaxMb;
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
return Math.floor(configured * 1024 * 1024);
}
if (params.kind === "image") {
return DEFAULT_GENERATED_IMAGE_MAX_BYTES;
}
if (params.kind === "audio") {
return DEFAULT_GENERATED_AUDIO_MAX_BYTES;
}
return DEFAULT_GENERATED_VIDEO_MAX_BYTES;
}
export async function downloadVydraAsset(params: {
url: string;
kind: VydraMediaKind;
timeoutMs?: ProviderOperationTimeoutMs;
fetchFn: typeof fetch;
maxBytes: number;
}): Promise<{ buffer: Buffer; mimeType: string; fileName: string }> {
const response = await fetchWithTimeout(
params.url,
{ method: "GET" },
resolveVydraHttpTimeoutMs(params.timeoutMs),
params.fetchFn,
);
const timeoutMs = resolveVydraHttpTimeoutMs(params.timeoutMs);
const response = await fetchWithTimeout(params.url, { method: "GET" }, timeoutMs, params.fetchFn);
await assertOkOrThrowHttpError(response, `Vydra ${params.kind} download failed`);
const mimeType =
response.headers.get("content-type")?.trim() ||
(params.kind === "image" ? "image/png" : params.kind === "audio" ? "audio/mpeg" : "video/mp4");
const arrayBuffer = await response.arrayBuffer();
const buffer = await readResponseWithLimit(response, params.maxBytes, {
chunkTimeoutMs: timeoutMs,
onOverflow: ({ maxBytes }) =>
new Error(`Vydra ${params.kind} download exceeds ${maxBytes} bytes`),
onIdleTimeout: ({ chunkTimeoutMs }) =>
new Error(`Vydra ${params.kind} download stalled after ${chunkTimeoutMs}ms`),
});
const extension = resolveVydraFileExtension(params.kind, mimeType);
const fileStem = params.kind === "image" ? "image" : params.kind === "audio" ? "audio" : "video";
return {
buffer: Buffer.from(arrayBuffer),
buffer,
mimeType,
fileName: `${fileStem}-1.${extension}`,
};

View File

@@ -69,4 +69,37 @@ describe("vydra speech provider", () => {
expect(result.fileExtension).toBe(".mp3");
expect(result.audioBuffer).toEqual(Buffer.from("mp3-data"));
});
it("rejects generated audio downloads that exceed the configured media cap", async () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
new Response(
JSON.stringify({
audioUrl: "https://cdn.vydra.ai/generated/test.mp3",
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
)
.mockResolvedValueOnce(
new Response(Buffer.from("too-large"), {
status: 200,
headers: { "Content-Type": "audio/mpeg" },
}),
);
vi.stubGlobal("fetch", fetchMock);
await expect(
provider.synthesize({
text: "OpenClaw test",
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } } as never,
providerConfig: { apiKey: "vydra-test-key" },
target: "audio-file",
timeoutMs: 30_000,
}),
).rejects.toThrow("Vydra audio download exceeds 1 bytes");
});
});

View File

@@ -17,6 +17,7 @@ import {
downloadVydraAsset,
extractVydraResultUrls,
normalizeVydraBaseUrl,
resolveVydraGeneratedMediaMaxBytes,
trimToUndefined,
} from "./shared.js";
@@ -137,6 +138,7 @@ export function buildVydraSpeechProvider(): SpeechProviderPlugin {
kind: "audio",
timeoutMs: req.timeoutMs,
fetchFn,
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "audio" }),
});
return {
audioBuffer: audio.buffer,

View File

@@ -72,6 +72,29 @@ describe("vydra video-generation provider", () => {
});
});
it("rejects generated video downloads that exceed the configured media cap", async () => {
stubVydraApiKey();
stubFetch(
jsonResponse({ jobId: "job-123", status: "processing" }),
jsonResponse({
jobId: "job-123",
status: "completed",
videoUrl: "https://cdn.vydra.ai/generated/test.mp4",
}),
binaryResponse("too-large", "video/mp4"),
);
const provider = buildVydraVideoGenerationProvider();
await expect(
provider.generateVideo({
provider: "vydra",
model: "veo3",
prompt: "tiny city at sunrise",
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } },
}),
).rejects.toThrow("Vydra video download exceeds 1 bytes");
});
it("requires a remote image url for kling", async () => {
stubVydraApiKey();
vi.stubGlobal("fetch", vi.fn());

View File

@@ -12,6 +12,7 @@ import {
downloadVydraAsset,
extractVydraResultUrls,
resolveCompletedVydraPayload,
resolveVydraGeneratedMediaMaxBytes,
resolveVydraResponseJobId,
resolveVydraResponseStatus,
resolveVydraRequestContext,
@@ -131,6 +132,7 @@ export function buildVydraVideoGenerationProvider(): VideoGenerationProvider {
defaultTimeoutMs: DEFAULT_VYDRA_VIDEO_TIMEOUT_MS,
}),
fetchFn,
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "video" }),
});
return {
videos: [