fix(media): preserve oversized video generation delivery

This commit is contained in:
Peter Steinberger
2026-04-25 12:41:31 +01:00
parent 150f3e472b
commit 3507efa4ec
8 changed files with 143 additions and 14 deletions

View File

@@ -55,6 +55,12 @@ While a job is in flight, duplicate `video_generate` calls in the same session r
Outside of session-backed agent runs (for example, direct tool invocations), the tool falls back to inline generation and returns the final media path in the same turn.
Generated video files are saved under OpenClaw-managed media storage when the
provider returns bytes. The default generated-video save cap follows the video
media limit, and `agents.defaults.mediaMaxMb` raises it for larger renders.
When a provider also returns a hosted output URL, OpenClaw can deliver that URL
instead of failing the task if local persistence rejects an oversized file.
### Task lifecycle
Each `video_generate` request moves through four states:

View File

@@ -138,6 +138,7 @@ describe("fal video generation provider", () => {
);
expect(result.videos).toHaveLength(1);
expect(result.videos[0]?.mimeType).toBe("video/mp4");
expect(result.videos[0]?.url).toBe("https://fal.run/files/video.mp4");
expect(result.metadata).toEqual({
requestId: "req-123",
});

View File

@@ -97,6 +97,7 @@ async function downloadFalVideo(
const mimeType = normalizeOptionalString(response.headers.get("content-type")) ?? "video/mp4";
const arrayBuffer = await response.arrayBuffer();
return {
url,
buffer: Buffer.from(arrayBuffer),
mimeType,
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,

View File

@@ -19,7 +19,10 @@ import type {
ImageGenerationSourceImage,
} from "../../image-generation/types.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
import {
resolveConfiguredMediaMaxBytes,
resolveGeneratedMediaMaxBytes,
} from "../../media/configured-max-bytes.js";
import { getImageMetadata } from "../../media/image-ops.js";
import {
classifyMediaReferenceSource,
@@ -646,6 +649,7 @@ export function createImageGenerateTool(options?: {
});
const count = resolveRequestedCount(params);
const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(effectiveCfg);
const mediaMaxBytes = resolveGeneratedMediaMaxBytes(effectiveCfg, "image");
const loadedReferenceImages = await loadReferenceImages({
imageInputs,
maxBytes: configuredMediaMaxBytes,
@@ -728,7 +732,7 @@ export function createImageGenerateTool(options?: {
image.buffer,
image.mimeType,
"tool-image-generation",
configuredMediaMaxBytes,
mediaMaxBytes,
filename || image.fileName,
),
),

View File

@@ -4,7 +4,7 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
import { resolveGeneratedMediaMaxBytes } from "../../media/configured-max-bytes.js";
import {
classifyMediaReferenceSource,
normalizeMediaReferenceSource,
@@ -389,14 +389,14 @@ async function executeMusicGenerationJob(params: {
progressSummary: "Saving generated music",
});
}
const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(params.effectiveCfg);
const mediaMaxBytes = resolveGeneratedMediaMaxBytes(params.effectiveCfg, "audio");
const savedTracks = await Promise.all(
result.tracks.map((track) =>
saveMediaBuffer(
track.buffer,
track.mimeType,
"tool-music-generation",
configuredMediaMaxBytes,
mediaMaxBytes,
params.filename || track.fileName,
),
),

View File

@@ -1,5 +1,6 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import { MAX_VIDEO_BYTES } from "../../media/constants.js";
import * as mediaStore from "../../media/store.js";
import * as webMedia from "../../media/web-media.js";
import * as videoGenerationRuntime from "../../video-generation/runtime.js";
@@ -185,6 +186,51 @@ describe("createVideoGenerateTool", () => {
expect(taskExecutorMocks.completeTaskRunByRunId).not.toHaveBeenCalled();
});
it("uses the video media cap when mediaMaxMb is not configured", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "qwen",
model: "wan2.6-t2v",
attempts: [],
ignoredOverrides: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
});
const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-lobster.mp4",
id: "generated-lobster.mp4",
size: 11,
contentType: "video/mp4",
});
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
await tool.execute("call-default-cap", { prompt: "friendly lobster surfing" });
expect(saveSpy).toHaveBeenCalledWith(
Buffer.from("video-bytes"),
"video/mp4",
"tool-video-generation",
MAX_VIDEO_BYTES,
"lobster.mp4",
);
});
it("surfaces url-only generated videos without saving local files", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "vydra",
@@ -233,6 +279,56 @@ describe("createVideoGenerateTool", () => {
});
});
it("falls back to the provider URL when generated video persistence exceeds the media cap", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "fal",
model: "fal-ai/minimax/video-01-live",
attempts: [],
ignoredOverrides: [],
videos: [
{
buffer: Buffer.from("large-video-bytes"),
url: "https://fal.run/files/generated-lobster.mp4",
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockRejectedValueOnce(
new Error("Media exceeds 16MB limit"),
);
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "fal/fal-ai/minimax/video-01-live" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-url-fallback", {
prompt: "friendly lobster surfing",
});
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(text).toContain("Generated 1 video with fal/fal-ai/minimax/video-01-live.");
expect(text).toContain("MEDIA:https://fal.run/files/generated-lobster.mp4");
expect(result.details).toMatchObject({
provider: "fal",
model: "fal-ai/minimax/video-01-live",
count: 1,
media: {
mediaUrls: ["https://fal.run/files/generated-lobster.mp4"],
},
paths: ["https://fal.run/files/generated-lobster.mp4"],
});
});
it("starts background generation and wakes the session with url-only MEDIA lines", async () => {
taskExecutorMocks.createRunningTaskRun.mockReturnValue({
taskId: "task-123",

View File

@@ -4,7 +4,7 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { formatErrorMessage } from "../../infra/errors.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js";
import { resolveGeneratedMediaMaxBytes } from "../../media/configured-max-bytes.js";
import {
classifyMediaReferenceSource,
normalizeMediaReferenceSource,
@@ -559,6 +559,10 @@ type ExecutedVideoGeneration = {
wakeResult: string;
};
function isGeneratedMediaSizeLimitError(error: unknown): boolean {
return error instanceof Error && /^Media exceeds \d+MB limit$/.test(error.message);
}
async function executeVideoGenerationJob(params: {
effectiveCfg: OpenClawConfig;
prompt: string;
@@ -628,18 +632,30 @@ async function executeVideoGenerationJob(params: {
);
}
const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(params.effectiveCfg);
const savedVideos = await Promise.all(
bufferVideos.map((video) =>
saveMediaBuffer(
const mediaMaxBytes = resolveGeneratedMediaMaxBytes(params.effectiveCfg, "video");
const savedVideos: Array<Awaited<ReturnType<typeof saveMediaBuffer>>> = [];
for (const video of bufferVideos) {
try {
const saved = await saveMediaBuffer(
video.buffer,
video.mimeType,
"tool-video-generation",
configuredMediaMaxBytes,
mediaMaxBytes,
params.filename || video.fileName,
),
),
);
);
savedVideos.push(saved);
} catch (error) {
if (video.url && isGeneratedMediaSizeLimitError(error)) {
urlOnlyVideos.push({
url: video.url,
mimeType: video.mimeType,
fileName: video.fileName,
});
continue;
}
throw error;
}
}
const totalCount = savedVideos.length + urlOnlyVideos.length;
const requestedDurationSeconds =
result.normalization?.durationSeconds?.requested ??

View File

@@ -1,4 +1,5 @@
import type { OpenClawConfig } from "../config/types.openclaw.js";
import { maxBytesForKind, type MediaKind } from "./constants.js";
const MB = 1024 * 1024;
@@ -10,6 +11,10 @@ export function resolveConfiguredMediaMaxBytes(cfg?: OpenClawConfig): number | u
return undefined;
}
export function resolveGeneratedMediaMaxBytes(cfg: OpenClawConfig | undefined, kind: MediaKind) {
return resolveConfiguredMediaMaxBytes(cfg) ?? maxBytesForKind(kind);
}
export function resolveChannelAccountMediaMaxMb(params: {
cfg: OpenClawConfig;
channel?: string | null;