diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index 1deb0aa436a..c62af2c1daa 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -55,6 +55,12 @@ While a job is in flight, duplicate `video_generate` calls in the same session r Outside of session-backed agent runs (for example, direct tool invocations), the tool falls back to inline generation and returns the final media path in the same turn. +Generated video files are saved under OpenClaw-managed media storage when the +provider returns bytes. The default generated-video save cap follows the video +media limit, and `agents.defaults.mediaMaxMb` raises it for larger renders. +When a provider also returns a hosted output URL, OpenClaw can deliver that URL +instead of failing the task if local persistence rejects an oversized file. + ### Task lifecycle Each `video_generate` request moves through four states: diff --git a/extensions/fal/video-generation-provider.test.ts b/extensions/fal/video-generation-provider.test.ts index 319e4a2215d..71ddc8ab701 100644 --- a/extensions/fal/video-generation-provider.test.ts +++ b/extensions/fal/video-generation-provider.test.ts @@ -138,6 +138,7 @@ describe("fal video generation provider", () => { ); expect(result.videos).toHaveLength(1); expect(result.videos[0]?.mimeType).toBe("video/mp4"); + expect(result.videos[0]?.url).toBe("https://fal.run/files/video.mp4"); expect(result.metadata).toEqual({ requestId: "req-123", }); diff --git a/extensions/fal/video-generation-provider.ts b/extensions/fal/video-generation-provider.ts index 116a52ec646..ce4d145b201 100644 --- a/extensions/fal/video-generation-provider.ts +++ b/extensions/fal/video-generation-provider.ts @@ -97,6 +97,7 @@ async function downloadFalVideo( const mimeType = normalizeOptionalString(response.headers.get("content-type")) ?? "video/mp4"; const arrayBuffer = await response.arrayBuffer(); return { + url, buffer: Buffer.from(arrayBuffer), mimeType, fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`, diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index d154ff789dc..7b616814517 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -19,7 +19,10 @@ import type { ImageGenerationSourceImage, } from "../../image-generation/types.js"; import type { SsrFPolicy } from "../../infra/net/ssrf.js"; -import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js"; +import { + resolveConfiguredMediaMaxBytes, + resolveGeneratedMediaMaxBytes, +} from "../../media/configured-max-bytes.js"; import { getImageMetadata } from "../../media/image-ops.js"; import { classifyMediaReferenceSource, @@ -646,6 +649,7 @@ export function createImageGenerateTool(options?: { }); const count = resolveRequestedCount(params); const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(effectiveCfg); + const mediaMaxBytes = resolveGeneratedMediaMaxBytes(effectiveCfg, "image"); const loadedReferenceImages = await loadReferenceImages({ imageInputs, maxBytes: configuredMediaMaxBytes, @@ -728,7 +732,7 @@ export function createImageGenerateTool(options?: { image.buffer, image.mimeType, "tool-image-generation", - configuredMediaMaxBytes, + mediaMaxBytes, filename || image.fileName, ), ), diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index 648986eb25c..001a3ebdab9 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -4,7 +4,7 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { formatErrorMessage } from "../../infra/errors.js"; import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; -import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js"; +import { resolveGeneratedMediaMaxBytes } from "../../media/configured-max-bytes.js"; import { classifyMediaReferenceSource, normalizeMediaReferenceSource, @@ -389,14 +389,14 @@ async function executeMusicGenerationJob(params: { progressSummary: "Saving generated music", }); } - const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(params.effectiveCfg); + const mediaMaxBytes = resolveGeneratedMediaMaxBytes(params.effectiveCfg, "audio"); const savedTracks = await Promise.all( result.tracks.map((track) => saveMediaBuffer( track.buffer, track.mimeType, "tool-music-generation", - configuredMediaMaxBytes, + mediaMaxBytes, params.filename || track.fileName, ), ), diff --git a/src/agents/tools/video-generate-tool.test.ts b/src/agents/tools/video-generate-tool.test.ts index e15879cff14..b24fe985730 100644 --- a/src/agents/tools/video-generate-tool.test.ts +++ b/src/agents/tools/video-generate-tool.test.ts @@ -1,5 +1,6 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../config/config.js"; +import { MAX_VIDEO_BYTES } from "../../media/constants.js"; import * as mediaStore from "../../media/store.js"; import * as webMedia from "../../media/web-media.js"; import * as videoGenerationRuntime from "../../video-generation/runtime.js"; @@ -185,6 +186,51 @@ describe("createVideoGenerateTool", () => { expect(taskExecutorMocks.completeTaskRunByRunId).not.toHaveBeenCalled(); }); + it("uses the video media cap when mediaMaxMb is not configured", async () => { + vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({ + provider: "qwen", + model: "wan2.6-t2v", + attempts: [], + ignoredOverrides: [], + videos: [ + { + buffer: Buffer.from("video-bytes"), + mimeType: "video/mp4", + fileName: "lobster.mp4", + }, + ], + }); + const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({ + path: "/tmp/generated-lobster.mp4", + id: "generated-lobster.mp4", + size: 11, + contentType: "video/mp4", + }); + + const tool = createVideoGenerateTool({ + config: asConfig({ + agents: { + defaults: { + videoGenerationModel: { primary: "qwen/wan2.6-t2v" }, + }, + }, + }), + }); + if (!tool) { + throw new Error("expected video_generate tool"); + } + + await tool.execute("call-default-cap", { prompt: "friendly lobster surfing" }); + + expect(saveSpy).toHaveBeenCalledWith( + Buffer.from("video-bytes"), + "video/mp4", + "tool-video-generation", + MAX_VIDEO_BYTES, + "lobster.mp4", + ); + }); + it("surfaces url-only generated videos without saving local files", async () => { vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({ provider: "vydra", @@ -233,6 +279,56 @@ describe("createVideoGenerateTool", () => { }); }); + it("falls back to the provider URL when generated video persistence exceeds the media cap", async () => { + vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({ + provider: "fal", + model: "fal-ai/minimax/video-01-live", + attempts: [], + ignoredOverrides: [], + videos: [ + { + buffer: Buffer.from("large-video-bytes"), + url: "https://fal.run/files/generated-lobster.mp4", + mimeType: "video/mp4", + fileName: "lobster.mp4", + }, + ], + }); + vi.spyOn(mediaStore, "saveMediaBuffer").mockRejectedValueOnce( + new Error("Media exceeds 16MB limit"), + ); + + const tool = createVideoGenerateTool({ + config: asConfig({ + agents: { + defaults: { + videoGenerationModel: { primary: "fal/fal-ai/minimax/video-01-live" }, + }, + }, + }), + }); + if (!tool) { + throw new Error("expected video_generate tool"); + } + + const result = await tool.execute("call-url-fallback", { + prompt: "friendly lobster surfing", + }); + const text = (result.content?.[0] as { text: string } | undefined)?.text ?? ""; + + expect(text).toContain("Generated 1 video with fal/fal-ai/minimax/video-01-live."); + expect(text).toContain("MEDIA:https://fal.run/files/generated-lobster.mp4"); + expect(result.details).toMatchObject({ + provider: "fal", + model: "fal-ai/minimax/video-01-live", + count: 1, + media: { + mediaUrls: ["https://fal.run/files/generated-lobster.mp4"], + }, + paths: ["https://fal.run/files/generated-lobster.mp4"], + }); + }); + it("starts background generation and wakes the session with url-only MEDIA lines", async () => { taskExecutorMocks.createRunningTaskRun.mockReturnValue({ taskId: "task-123", diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 045f4be33b6..835c12d31b0 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -4,7 +4,7 @@ import type { OpenClawConfig } from "../../config/types.openclaw.js"; import { formatErrorMessage } from "../../infra/errors.js"; import type { SsrFPolicy } from "../../infra/net/ssrf.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; -import { resolveConfiguredMediaMaxBytes } from "../../media/configured-max-bytes.js"; +import { resolveGeneratedMediaMaxBytes } from "../../media/configured-max-bytes.js"; import { classifyMediaReferenceSource, normalizeMediaReferenceSource, @@ -559,6 +559,10 @@ type ExecutedVideoGeneration = { wakeResult: string; }; +function isGeneratedMediaSizeLimitError(error: unknown): boolean { + return error instanceof Error && /^Media exceeds \d+MB limit$/.test(error.message); +} + async function executeVideoGenerationJob(params: { effectiveCfg: OpenClawConfig; prompt: string; @@ -628,18 +632,30 @@ async function executeVideoGenerationJob(params: { ); } - const configuredMediaMaxBytes = resolveConfiguredMediaMaxBytes(params.effectiveCfg); - const savedVideos = await Promise.all( - bufferVideos.map((video) => - saveMediaBuffer( + const mediaMaxBytes = resolveGeneratedMediaMaxBytes(params.effectiveCfg, "video"); + const savedVideos: Array>> = []; + for (const video of bufferVideos) { + try { + const saved = await saveMediaBuffer( video.buffer, video.mimeType, "tool-video-generation", - configuredMediaMaxBytes, + mediaMaxBytes, params.filename || video.fileName, - ), - ), - ); + ); + savedVideos.push(saved); + } catch (error) { + if (video.url && isGeneratedMediaSizeLimitError(error)) { + urlOnlyVideos.push({ + url: video.url, + mimeType: video.mimeType, + fileName: video.fileName, + }); + continue; + } + throw error; + } + } const totalCount = savedVideos.length + urlOnlyVideos.length; const requestedDurationSeconds = result.normalization?.durationSeconds?.requested ?? diff --git a/src/media/configured-max-bytes.ts b/src/media/configured-max-bytes.ts index 1963e978866..4018d984032 100644 --- a/src/media/configured-max-bytes.ts +++ b/src/media/configured-max-bytes.ts @@ -1,4 +1,5 @@ import type { OpenClawConfig } from "../config/types.openclaw.js"; +import { maxBytesForKind, type MediaKind } from "./constants.js"; const MB = 1024 * 1024; @@ -10,6 +11,10 @@ export function resolveConfiguredMediaMaxBytes(cfg?: OpenClawConfig): number | u return undefined; } +export function resolveGeneratedMediaMaxBytes(cfg: OpenClawConfig | undefined, kind: MediaKind) { + return resolveConfiguredMediaMaxBytes(cfg) ?? maxBytesForKind(kind); +} + export function resolveChannelAccountMediaMaxMb(params: { cfg: OpenClawConfig; channel?: string | null;