mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-03 18:04:06 +00:00
fix(provider): bound Vydra and Comfy media downloads
This commit is contained in:
@@ -6,7 +6,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Providers: bound generated video downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, and Google providers, and bound generated FAL image downloads.
|
||||
- Providers: bound generated media downloads from OpenAI, Runway, xAI, MiniMax, BytePlus, DashScope-compatible, FAL, OpenRouter, Google, Vydra, and Comfy providers.
|
||||
- Cron: retry recurring jobs after transient model rate limits before waiting for the next scheduled slot.
|
||||
|
||||
## 2026.5.28
|
||||
|
||||
@@ -201,6 +201,63 @@ describe("comfy image-generation provider", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects generated image downloads that exceed the configured media cap", async () => {
|
||||
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
|
||||
fetchWithSsrFGuardMock
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ prompt_id: "local-prompt-1" }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(
|
||||
JSON.stringify({
|
||||
"local-prompt-1": {
|
||||
outputs: {
|
||||
"9": {
|
||||
images: [{ filename: "generated.png", subfolder: "", type: "output" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(Buffer.from("too-large"), {
|
||||
status: 200,
|
||||
headers: { "content-type": "image/png" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildComfyImageGenerationProvider();
|
||||
await expect(
|
||||
provider.generateImage({
|
||||
provider: "comfy",
|
||||
model: "workflow",
|
||||
prompt: "draw a lobster",
|
||||
cfg: {
|
||||
...buildComfyConfig({
|
||||
workflow: {
|
||||
"6": { inputs: { text: "" } },
|
||||
"9": { inputs: {} },
|
||||
},
|
||||
promptNodeId: "6",
|
||||
outputNodeId: "9",
|
||||
}),
|
||||
agents: { defaults: { mediaMaxMb: 0.000001 } },
|
||||
} as never,
|
||||
}),
|
||||
).rejects.toThrow("Comfy image output download exceeds 1 bytes");
|
||||
});
|
||||
|
||||
it("reports malformed local workflow submit JSON as a provider error", async () => {
|
||||
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
|
||||
const release = vi.fn(async () => {});
|
||||
|
||||
@@ -98,4 +98,69 @@ describe("comfy music-generation provider", () => {
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects generated music downloads that exceed the configured media cap", async () => {
|
||||
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
|
||||
fetchWithSsrFGuardMock
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ prompt_id: "music-job-1" }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(
|
||||
JSON.stringify({
|
||||
"music-job-1": {
|
||||
outputs: {
|
||||
"9": {
|
||||
audio: [{ filename: "song.mp3", subfolder: "", type: "output" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(Buffer.from("too-large"), {
|
||||
status: 200,
|
||||
headers: { "content-type": "audio/mpeg" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildComfyMusicGenerationProvider();
|
||||
await expect(
|
||||
provider.generateMusic({
|
||||
provider: "comfy",
|
||||
model: "workflow",
|
||||
prompt: "gentle ambient synth loop",
|
||||
cfg: {
|
||||
plugins: {
|
||||
entries: {
|
||||
comfy: {
|
||||
config: {
|
||||
music: {
|
||||
workflow: {
|
||||
"6": { inputs: { text: "" } },
|
||||
"9": { inputs: {} },
|
||||
},
|
||||
promptNodeId: "6",
|
||||
outputNodeId: "9",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agents: { defaults: { mediaMaxMb: 0.000001 } },
|
||||
} as never,
|
||||
}),
|
||||
).rejects.toThrow("Comfy music output download exceeds 1 bytes");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -144,6 +144,65 @@ describe("comfy video-generation provider", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects generated video downloads that exceed the configured media cap", async () => {
|
||||
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
|
||||
fetchWithSsrFGuardMock
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(JSON.stringify({ prompt_id: "local-video-1" }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(
|
||||
JSON.stringify({
|
||||
"local-video-1": {
|
||||
outputs: {
|
||||
"9": {
|
||||
gifs: [{ filename: "generated.mp4", subfolder: "", type: "output" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
},
|
||||
),
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: new Response(Buffer.from("too-large"), {
|
||||
status: 200,
|
||||
headers: { "content-type": "video/mp4" },
|
||||
}),
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildComfyVideoGenerationProvider();
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "comfy",
|
||||
model: "workflow",
|
||||
prompt: "animate a lobster",
|
||||
cfg: {
|
||||
...buildComfyConfig({
|
||||
video: {
|
||||
workflow: {
|
||||
"6": { inputs: { text: "" } },
|
||||
"9": { inputs: {} },
|
||||
},
|
||||
promptNodeId: "6",
|
||||
outputNodeId: "9",
|
||||
},
|
||||
}),
|
||||
agents: { defaults: { mediaMaxMb: 0.000001 } },
|
||||
} as never,
|
||||
}),
|
||||
).rejects.toThrow("Comfy video output download exceeds 1 bytes");
|
||||
});
|
||||
|
||||
it("uses cloud endpoints for video workflows", async () => {
|
||||
mockComfyProviderApiKey();
|
||||
setComfyFetchGuardForTesting(fetchWithSsrFGuardMock);
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
normalizeBaseUrl,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime";
|
||||
import {
|
||||
normalizeSecretInputString,
|
||||
resolveSecretInputString,
|
||||
@@ -39,6 +40,8 @@ const DEFAULT_PROMPT_INPUT_NAME = "text";
|
||||
const DEFAULT_INPUT_IMAGE_INPUT_NAME = "image";
|
||||
const DEFAULT_POLL_INTERVAL_MS = 1_500;
|
||||
const DEFAULT_TIMEOUT_MS = 5 * 60_000;
|
||||
const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024;
|
||||
const DEFAULT_GENERATED_MEDIA_MAX_BYTES = 16 * 1024 * 1024;
|
||||
|
||||
export const DEFAULT_COMFY_MODEL = "workflow";
|
||||
|
||||
@@ -113,6 +116,19 @@ export function setComfyFetchGuardForTesting(impl: typeof fetchWithSsrFGuard | n
|
||||
comfyFetchGuard = impl ?? fetchWithSsrFGuard;
|
||||
}
|
||||
|
||||
function resolveComfyGeneratedOutputMaxBytes(params: {
|
||||
cfg: OpenClawConfig;
|
||||
capability: ComfyCapability;
|
||||
}): number {
|
||||
const configured = params.cfg.agents?.defaults?.mediaMaxMb;
|
||||
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
|
||||
return Math.floor(configured * 1024 * 1024);
|
||||
}
|
||||
return params.capability === "image"
|
||||
? DEFAULT_GENERATED_IMAGE_MAX_BYTES
|
||||
: DEFAULT_GENERATED_MEDIA_MAX_BYTES;
|
||||
}
|
||||
|
||||
function readConfigBoolean(config: ComfyProviderConfig, key: string): boolean | undefined {
|
||||
return asBoolean(config[key]);
|
||||
}
|
||||
@@ -505,6 +521,7 @@ async function downloadOutputFile(params: {
|
||||
file: ComfyOutputFile;
|
||||
mode: ComfyMode;
|
||||
capability: ComfyCapability;
|
||||
maxBytes: number;
|
||||
}): Promise<{ buffer: Buffer; mimeType: string }> {
|
||||
const fileName =
|
||||
normalizeOptionalString(params.file.filename) || normalizeOptionalString(params.file.name);
|
||||
@@ -557,7 +574,15 @@ async function downloadOutputFile(params: {
|
||||
normalizeOptionalString(redirected.response.headers.get("content-type")) ||
|
||||
"application/octet-stream";
|
||||
return {
|
||||
buffer: Buffer.from(await redirected.response.arrayBuffer()),
|
||||
buffer: await readResponseWithLimit(redirected.response, params.maxBytes, {
|
||||
chunkTimeoutMs: params.timeoutMs,
|
||||
onOverflow: ({ maxBytes }) =>
|
||||
new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`),
|
||||
onIdleTimeout: ({ chunkTimeoutMs }) =>
|
||||
new Error(
|
||||
`Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`,
|
||||
),
|
||||
}),
|
||||
mimeType,
|
||||
};
|
||||
} finally {
|
||||
@@ -570,7 +595,13 @@ async function downloadOutputFile(params: {
|
||||
normalizeOptionalString(firstResponse.response.headers.get("content-type")) ||
|
||||
"application/octet-stream";
|
||||
return {
|
||||
buffer: Buffer.from(await firstResponse.response.arrayBuffer()),
|
||||
buffer: await readResponseWithLimit(firstResponse.response, params.maxBytes, {
|
||||
chunkTimeoutMs: params.timeoutMs,
|
||||
onOverflow: ({ maxBytes }) =>
|
||||
new Error(`Comfy ${params.capability} output download exceeds ${maxBytes} bytes`),
|
||||
onIdleTimeout: ({ chunkTimeoutMs }) =>
|
||||
new Error(`Comfy ${params.capability} output download stalled after ${chunkTimeoutMs}ms`),
|
||||
}),
|
||||
mimeType,
|
||||
};
|
||||
} finally {
|
||||
@@ -794,6 +825,10 @@ export async function runComfyWorkflow(params: {
|
||||
}
|
||||
|
||||
const assets: ComfyGeneratedAsset[] = [];
|
||||
const maxOutputBytes = resolveComfyGeneratedOutputMaxBytes({
|
||||
cfg: params.cfg,
|
||||
capability: params.capability,
|
||||
});
|
||||
let assetIndex = 0;
|
||||
for (const output of outputFiles) {
|
||||
const downloaded = await downloadOutputFile({
|
||||
@@ -805,6 +840,7 @@ export async function runComfyWorkflow(params: {
|
||||
file: output.file,
|
||||
mode,
|
||||
capability: params.capability,
|
||||
maxBytes: maxOutputBytes,
|
||||
});
|
||||
assetIndex += 1;
|
||||
const originalName =
|
||||
|
||||
@@ -71,6 +71,28 @@ describe("vydra image-generation provider", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects generated image downloads that exceed the configured media cap", async () => {
|
||||
stubVydraApiKey();
|
||||
stubFetch(
|
||||
jsonResponse({
|
||||
jobId: "job-123",
|
||||
status: "completed",
|
||||
imageUrl: "https://cdn.vydra.ai/generated/test.png",
|
||||
}),
|
||||
binaryResponse("too-large", "image/png"),
|
||||
);
|
||||
|
||||
const provider = buildVydraImageGenerationProvider();
|
||||
await expect(
|
||||
provider.generateImage({
|
||||
provider: "vydra",
|
||||
model: "grok-imagine",
|
||||
prompt: "draw a cat",
|
||||
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } },
|
||||
}),
|
||||
).rejects.toThrow("Vydra image download exceeds 1 bytes");
|
||||
});
|
||||
|
||||
it("passes request SSRF policy to the image creation request", async () => {
|
||||
stubVydraApiKey();
|
||||
const fetchMock = stubFetch(
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
downloadVydraAsset,
|
||||
extractVydraResultUrls,
|
||||
resolveCompletedVydraPayload,
|
||||
resolveVydraGeneratedMediaMaxBytes,
|
||||
resolveVydraResponseJobId,
|
||||
resolveVydraResponseStatus,
|
||||
resolveVydraRequestContext,
|
||||
@@ -92,6 +93,7 @@ export function buildVydraImageGenerationProvider(): ImageGenerationProvider {
|
||||
kind: "image",
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "image" }),
|
||||
});
|
||||
return {
|
||||
images: [
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
type ProviderOperationDeadline,
|
||||
type ProviderOperationTimeoutMs,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { readResponseWithLimit } from "openclaw/plugin-sdk/response-limit-runtime";
|
||||
import {
|
||||
normalizeOptionalLowercaseString,
|
||||
normalizeOptionalString,
|
||||
@@ -22,6 +23,9 @@ export const DEFAULT_VYDRA_VIDEO_MODEL = "veo3";
|
||||
export const DEFAULT_VYDRA_SPEECH_MODEL = "elevenlabs/tts";
|
||||
export const DEFAULT_VYDRA_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
|
||||
const DEFAULT_HTTP_TIMEOUT_MS = 120_000;
|
||||
const DEFAULT_GENERATED_IMAGE_MAX_BYTES = 6 * 1024 * 1024;
|
||||
const DEFAULT_GENERATED_AUDIO_MAX_BYTES = 16 * 1024 * 1024;
|
||||
const DEFAULT_GENERATED_VIDEO_MAX_BYTES = 16 * 1024 * 1024;
|
||||
const POLL_INTERVAL_MS = 2_500;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
type VydraAuthStore = Parameters<typeof resolveApiKeyForProvider>[0]["store"];
|
||||
@@ -210,27 +214,47 @@ function resolveVydraHttpTimeoutMs(timeoutMs: ProviderOperationTimeoutMs | undef
|
||||
return resolved;
|
||||
}
|
||||
|
||||
export function resolveVydraGeneratedMediaMaxBytes(params: {
|
||||
cfg: { agents?: { defaults?: { mediaMaxMb?: number } } };
|
||||
kind: VydraMediaKind;
|
||||
}): number {
|
||||
const configured = params.cfg.agents?.defaults?.mediaMaxMb;
|
||||
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
|
||||
return Math.floor(configured * 1024 * 1024);
|
||||
}
|
||||
if (params.kind === "image") {
|
||||
return DEFAULT_GENERATED_IMAGE_MAX_BYTES;
|
||||
}
|
||||
if (params.kind === "audio") {
|
||||
return DEFAULT_GENERATED_AUDIO_MAX_BYTES;
|
||||
}
|
||||
return DEFAULT_GENERATED_VIDEO_MAX_BYTES;
|
||||
}
|
||||
|
||||
export async function downloadVydraAsset(params: {
|
||||
url: string;
|
||||
kind: VydraMediaKind;
|
||||
timeoutMs?: ProviderOperationTimeoutMs;
|
||||
fetchFn: typeof fetch;
|
||||
maxBytes: number;
|
||||
}): Promise<{ buffer: Buffer; mimeType: string; fileName: string }> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
resolveVydraHttpTimeoutMs(params.timeoutMs),
|
||||
params.fetchFn,
|
||||
);
|
||||
const timeoutMs = resolveVydraHttpTimeoutMs(params.timeoutMs);
|
||||
const response = await fetchWithTimeout(params.url, { method: "GET" }, timeoutMs, params.fetchFn);
|
||||
await assertOkOrThrowHttpError(response, `Vydra ${params.kind} download failed`);
|
||||
const mimeType =
|
||||
response.headers.get("content-type")?.trim() ||
|
||||
(params.kind === "image" ? "image/png" : params.kind === "audio" ? "audio/mpeg" : "video/mp4");
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
const buffer = await readResponseWithLimit(response, params.maxBytes, {
|
||||
chunkTimeoutMs: timeoutMs,
|
||||
onOverflow: ({ maxBytes }) =>
|
||||
new Error(`Vydra ${params.kind} download exceeds ${maxBytes} bytes`),
|
||||
onIdleTimeout: ({ chunkTimeoutMs }) =>
|
||||
new Error(`Vydra ${params.kind} download stalled after ${chunkTimeoutMs}ms`),
|
||||
});
|
||||
const extension = resolveVydraFileExtension(params.kind, mimeType);
|
||||
const fileStem = params.kind === "image" ? "image" : params.kind === "audio" ? "audio" : "video";
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
buffer,
|
||||
mimeType,
|
||||
fileName: `${fileStem}-1.${extension}`,
|
||||
};
|
||||
|
||||
@@ -69,4 +69,37 @@ describe("vydra speech provider", () => {
|
||||
expect(result.fileExtension).toBe(".mp3");
|
||||
expect(result.audioBuffer).toEqual(Buffer.from("mp3-data"));
|
||||
});
|
||||
|
||||
it("rejects generated audio downloads that exceed the configured media cap", async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
audioUrl: "https://cdn.vydra.ai/generated/test.mp3",
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response(Buffer.from("too-large"), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "audio/mpeg" },
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
await expect(
|
||||
provider.synthesize({
|
||||
text: "OpenClaw test",
|
||||
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } } as never,
|
||||
providerConfig: { apiKey: "vydra-test-key" },
|
||||
target: "audio-file",
|
||||
timeoutMs: 30_000,
|
||||
}),
|
||||
).rejects.toThrow("Vydra audio download exceeds 1 bytes");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -17,6 +17,7 @@ import {
|
||||
downloadVydraAsset,
|
||||
extractVydraResultUrls,
|
||||
normalizeVydraBaseUrl,
|
||||
resolveVydraGeneratedMediaMaxBytes,
|
||||
trimToUndefined,
|
||||
} from "./shared.js";
|
||||
|
||||
@@ -137,6 +138,7 @@ export function buildVydraSpeechProvider(): SpeechProviderPlugin {
|
||||
kind: "audio",
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "audio" }),
|
||||
});
|
||||
return {
|
||||
audioBuffer: audio.buffer,
|
||||
|
||||
@@ -72,6 +72,29 @@ describe("vydra video-generation provider", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects generated video downloads that exceed the configured media cap", async () => {
|
||||
stubVydraApiKey();
|
||||
stubFetch(
|
||||
jsonResponse({ jobId: "job-123", status: "processing" }),
|
||||
jsonResponse({
|
||||
jobId: "job-123",
|
||||
status: "completed",
|
||||
videoUrl: "https://cdn.vydra.ai/generated/test.mp4",
|
||||
}),
|
||||
binaryResponse("too-large", "video/mp4"),
|
||||
);
|
||||
|
||||
const provider = buildVydraVideoGenerationProvider();
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
prompt: "tiny city at sunrise",
|
||||
cfg: { agents: { defaults: { mediaMaxMb: 0.000001 } } },
|
||||
}),
|
||||
).rejects.toThrow("Vydra video download exceeds 1 bytes");
|
||||
});
|
||||
|
||||
it("requires a remote image url for kling", async () => {
|
||||
stubVydraApiKey();
|
||||
vi.stubGlobal("fetch", vi.fn());
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
downloadVydraAsset,
|
||||
extractVydraResultUrls,
|
||||
resolveCompletedVydraPayload,
|
||||
resolveVydraGeneratedMediaMaxBytes,
|
||||
resolveVydraResponseJobId,
|
||||
resolveVydraResponseStatus,
|
||||
resolveVydraRequestContext,
|
||||
@@ -131,6 +132,7 @@ export function buildVydraVideoGenerationProvider(): VideoGenerationProvider {
|
||||
defaultTimeoutMs: DEFAULT_VYDRA_VIDEO_TIMEOUT_MS,
|
||||
}),
|
||||
fetchFn,
|
||||
maxBytes: resolveVydraGeneratedMediaMaxBytes({ cfg: req.cfg, kind: "video" }),
|
||||
});
|
||||
return {
|
||||
videos: [
|
||||
|
||||
Reference in New Issue
Block a user