video_generate: support url-only delivery (#61988) (thanks @xieyongliang) (#61988)

Co-authored-by: George Zhang <georgezhangtj97@gmail.com>
This commit is contained in:
xieyongliang
2026-04-11 18:08:30 +08:00
committed by GitHub
parent 52800131d2
commit e0a2c568b2
11 changed files with 281 additions and 37 deletions

View File

@@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai
### Changes
- Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
### Fixes
- WhatsApp: honor the configured default account when the active listener helper is used without an explicit account id, so named default accounts do not get registered under `default`. (#53918) Thanks @yhyatt.

View File

@@ -1,2 +1,2 @@
7a9bb7a5e4b243e2123af94301ba363d57eddab2baa6378d16cd37a1cb8a55f7 plugin-sdk-api-baseline.json
2bdca027d5fda72399479569927cd34d18b56b242e4b12ac45e7c2352e551c77 plugin-sdk-api-baseline.jsonl
7a5c71593c9efbb936b9632f0b381a6c603e9bce44706b312a0172504fa51ef6 plugin-sdk-api-baseline.json
0b044de57266d20561838a5ae0edbaacaa53b323d4c8c068e701a48f92f0a264 plugin-sdk-api-baseline.jsonl

View File

@@ -127,7 +127,55 @@ describe("createVideoGenerateTool", () => {
expect(taskExecutorMocks.completeTaskRunByRunId).not.toHaveBeenCalled();
});
it("starts background generation and wakes the session with MEDIA lines", async () => {
it("surfaces url-only generated videos without saving local files", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "vydra",
model: "veo3",
attempts: [],
ignoredOverrides: [],
videos: [
{
url: "https://example.com/generated-lobster.mp4",
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
metadata: { taskId: "task-1" },
});
const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer");
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "vydra/veo3" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-url", { prompt: "friendly lobster surfing" });
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(saveSpy).not.toHaveBeenCalled();
expect(text).toContain("Generated 1 video with vydra/veo3.");
expect(text).toContain("MEDIA:https://example.com/generated-lobster.mp4");
expect(result.details).toMatchObject({
provider: "vydra",
model: "veo3",
count: 1,
media: {
mediaUrls: ["https://example.com/generated-lobster.mp4"],
},
paths: ["https://example.com/generated-lobster.mp4"],
metadata: { taskId: "task-1" },
});
});
it("starts background generation and wakes the session with url-only MEDIA lines", async () => {
taskExecutorMocks.createRunningTaskRun.mockReturnValue({
taskId: "task-123",
runtime: "cli",
@@ -143,33 +191,28 @@ describe("createVideoGenerateTool", () => {
const wakeSpy = vi
.spyOn(videoGenerateBackground, "wakeVideoGenerationTaskCompletion")
.mockResolvedValue(undefined);
const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer");
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "qwen",
model: "wan2.6-t2v",
provider: "vydra",
model: "veo3",
attempts: [],
ignoredOverrides: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
url: "https://example.com/generated-lobster.mp4",
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
metadata: { taskId: "task-1" },
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-lobster.mp4",
id: "generated-lobster.mp4",
size: 11,
contentType: "video/mp4",
});
let scheduledWork: (() => Promise<void>) | undefined;
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
videoGenerationModel: { primary: "vydra/veo3" },
},
},
}),
@@ -200,6 +243,7 @@ describe("createVideoGenerateTool", () => {
});
expect(typeof scheduledWork).toBe("function");
await scheduledWork?.();
expect(saveSpy).not.toHaveBeenCalled();
expect(taskExecutorMocks.recordTaskRunProgressByRunId).toHaveBeenCalledWith(
expect.objectContaining({
runId: expect.stringMatching(/^tool:video_generate:/),
@@ -217,7 +261,8 @@ describe("createVideoGenerateTool", () => {
taskId: "task-123",
}),
status: "ok",
result: expect.stringContaining("MEDIA:/tmp/generated-lobster.mp4"),
mediaUrls: ["https://example.com/generated-lobster.mp4"],
result: expect.stringContaining("MEDIA:https://example.com/generated-lobster.mp4"),
}),
);
});

View File

@@ -535,6 +535,10 @@ type ExecutedVideoGeneration = {
provider: string;
model: string;
savedPaths: string[];
/** URLs of url-only assets that were not saved locally. */
urlOnlyUrls: string[];
/** Total generated video count, including url-only assets. */
count: number;
contentText: string;
details: Record<string, unknown>;
wakeResult: string;
@@ -587,8 +591,28 @@ async function executeVideoGenerationJob(params: {
});
}
const urlOnlyVideos: Array<{ url: string; mimeType: string; fileName?: string }> = [];
const bufferVideos: Array<(typeof result.videos)[number] & { buffer: Buffer }> = [];
for (const video of result.videos) {
if (video.buffer) {
bufferVideos.push(video as (typeof result.videos)[number] & { buffer: Buffer });
continue;
}
if (video.url) {
urlOnlyVideos.push({
url: video.url,
mimeType: video.mimeType,
fileName: video.fileName,
});
continue;
}
throw new Error(
`Provider ${result.provider} returned a video asset with neither buffer nor url — cannot deliver.`,
);
}
const savedVideos = await Promise.all(
result.videos.map((video) =>
bufferVideos.map((video) =>
saveMediaBuffer(
video.buffer,
video.mimeType,
@@ -598,6 +622,7 @@ async function executeVideoGenerationJob(params: {
),
),
);
const totalCount = savedVideos.length + urlOnlyVideos.length;
const requestedDurationSeconds =
result.normalization?.durationSeconds?.requested ??
(typeof result.metadata?.requestedDurationSeconds === "number" &&
@@ -646,8 +671,12 @@ async function executeVideoGenerationJob(params: {
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === params.size &&
Boolean(normalizedAspectRatio));
const allMediaUrls = [
...savedVideos.map((video) => video.path),
...urlOnlyVideos.map((video) => video.url),
];
const lines = [
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
`Generated ${totalCount} video${totalCount === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
...(warning ? [`Warning: ${warning}`] : []),
typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
@@ -655,22 +684,25 @@ async function executeVideoGenerationJob(params: {
? `Duration normalized: requested ${requestedDurationSeconds}s; used ${normalizedDurationSeconds}s.`
: null,
...savedVideos.map((video) => `MEDIA:${video.path}`),
...urlOnlyVideos.map((video) => `MEDIA:${video.url}`),
].filter((entry): entry is string => Boolean(entry));
return {
provider: result.provider,
model: result.model,
savedPaths: savedVideos.map((video) => video.path),
urlOnlyUrls: urlOnlyVideos.map((video) => video.url),
count: totalCount,
contentText: lines.join("\n"),
wakeResult: lines.join("\n"),
details: {
provider: result.provider,
model: result.model,
count: savedVideos.length,
count: totalCount,
media: {
mediaUrls: savedVideos.map((video) => video.path),
mediaUrls: allMediaUrls,
},
paths: savedVideos.map((video) => video.path),
paths: allMediaUrls,
...buildTaskRunDetails(params.taskHandle),
...buildMediaReferenceDetails({
entries: params.loadedReferenceImages,
@@ -931,7 +963,7 @@ export function createVideoGenerateTool(options?: {
handle: taskHandle,
provider: executed.provider,
model: executed.model,
count: executed.savedPaths.length,
count: executed.count,
paths: executed.savedPaths,
});
try {
@@ -941,7 +973,7 @@ export function createVideoGenerateTool(options?: {
status: "ok",
statusLabel: "completed successfully",
result: executed.wakeResult,
mediaUrls: executed.savedPaths,
mediaUrls: [...executed.savedPaths, ...executed.urlOnlyUrls],
});
} catch (error) {
log.warn("Video generation completion wake failed after successful generation", {
@@ -1025,7 +1057,7 @@ export function createVideoGenerateTool(options?: {
handle: taskHandle,
provider: executed.provider,
model: executed.model,
count: executed.savedPaths.length,
count: executed.count,
paths: executed.savedPaths,
});

View File

@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { Command } from "commander";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { runRegisteredCli } from "../test-utils/command-runner.js";
import { registerCapabilityCli } from "./capability-cli.js";
@@ -58,6 +58,7 @@ const mocks = vi.hoisted(() => ({
model: "gpt-4.1-mini",
})),
generateImage: vi.fn(),
generateVideo: vi.fn(),
transcribeAudioFile: vi.fn(async () => ({ text: "meeting notes" })),
textToSpeech: vi.fn(async () => ({
success: true,
@@ -202,7 +203,7 @@ vi.mock("../image-generation/runtime.js", () => ({
}));
vi.mock("../video-generation/runtime.js", () => ({
generateVideo: vi.fn(),
generateVideo: mocks.generateVideo,
listRuntimeVideoGenerationProviders: vi.fn(() => []),
}));
@@ -238,6 +239,10 @@ vi.mock("../web-fetch/runtime.js", () => ({
}));
describe("capability cli", () => {
afterEach(() => {
vi.unstubAllGlobals();
});
beforeEach(() => {
mocks.runtime.log.mockClear();
mocks.runtime.error.mockClear();
@@ -278,6 +283,7 @@ describe("capability cli", () => {
}) as never);
mocks.describeImageFile.mockClear();
mocks.generateImage.mockReset();
mocks.generateVideo.mockReset();
mocks.transcribeAudioFile.mockClear();
mocks.textToSpeech.mockClear();
mocks.setTtsProvider.mockClear();
@@ -434,6 +440,85 @@ describe("capability cli", () => {
);
});
it("streams url-only generated videos to --output paths", async () => {
mocks.generateVideo.mockResolvedValue({
provider: "vydra",
model: "veo3",
attempts: [],
videos: [
{
url: "https://example.com/generated-video.mp4",
mimeType: "video/mp4",
fileName: "provider-name.mp4",
},
],
});
const fetchMock = vi.fn(
async () =>
new Response(Buffer.from("video-bytes"), {
status: 200,
headers: { "content-type": "video/mp4" },
}),
);
vi.stubGlobal("fetch", fetchMock);
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-video-generate-"));
const outputBase = path.join(tempDir, "result");
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"video",
"generate",
"--prompt",
"friendly lobster",
"--output",
outputBase,
"--json",
],
});
const outputPath = `${outputBase}.mp4`;
expect(fetchMock).toHaveBeenCalledWith(
"https://example.com/generated-video.mp4",
expect.objectContaining({ signal: expect.any(AbortSignal) }),
);
expect(await fs.readFile(outputPath, "utf8")).toBe("video-bytes");
expect(mocks.runtime.writeJson).toHaveBeenCalledWith(
expect.objectContaining({
capability: "video.generate",
provider: "vydra",
outputs: [
expect.objectContaining({
path: outputPath,
mimeType: "video/mp4",
size: 11,
}),
],
}),
);
});
it("fails video generate when a provider returns an undeliverable asset", async () => {
mocks.generateVideo.mockResolvedValue({
provider: "vydra",
model: "veo3",
attempts: [],
videos: [{ mimeType: "video/mp4" }],
});
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: ["capability", "video", "generate", "--prompt", "friendly lobster", "--json"],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
expect.stringContaining("Video asset at index 0 has neither buffer nor url"),
);
});
it("routes audio transcribe through transcription, not realtime", async () => {
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,

View File

@@ -815,17 +815,55 @@ async function runVideoGenerate(params: { prompt: string; model?: string; output
modelOverride: params.model,
});
const outputs = await Promise.all(
result.videos.map(async (video, index) => ({
...(await writeOutputAsset({
buffer: video.buffer,
mimeType: video.mimeType,
originalFilename: video.fileName,
outputPath: params.output,
outputIndex: index,
outputCount: result.videos.length,
subdir: "generated",
})),
})),
result.videos.map(async (video, index) => {
if (!video.buffer && !video.url) {
throw new Error(`Video asset at index ${index} has neither buffer nor url`);
}
let videoBuffer = video.buffer;
if (!videoBuffer && video.url) {
const response = await fetch(video.url, { signal: AbortSignal.timeout(120_000) });
if (!response.ok) {
throw new Error(`Failed to download video from ${video.url}: ${response.status}`);
}
if (params.output && response.body) {
const { pipeline } = await import("node:stream/promises");
const { Readable } = await import("node:stream");
const { createWriteStream } = await import("node:fs");
const mimeType = normalizeMimeType(video.mimeType);
const ext =
extensionForMime(mimeType) ||
path.extname(video.fileName ?? "") ||
path.extname(params.output ?? "");
const resolvedOutput = path.resolve(params.output);
const parsed = path.parse(resolvedOutput);
const filePath =
result.videos.length <= 1
? path.join(parsed.dir, `${parsed.name}${ext}`)
: path.join(parsed.dir, `${parsed.name}-${String(index + 1)}${ext}`);
await fs.mkdir(path.dirname(filePath), { recursive: true });
await pipeline(
Readable.fromWeb(response.body as import("node:stream/web").ReadableStream),
createWriteStream(filePath),
);
const stat = await fs.stat(filePath);
return { path: filePath, mimeType: video.mimeType, size: stat.size };
}
videoBuffer = Buffer.from(await response.arrayBuffer());
}
return {
...(await writeOutputAsset({
buffer: videoBuffer!,
mimeType: video.mimeType,
originalFilename: video.fileName,
outputPath: params.output,
outputIndex: index,
outputCount: result.videos.length,
subdir: "generated",
})),
};
}),
);
return {
ok: true,

View File

@@ -22,7 +22,12 @@ import type {
} from "../video-generation/types.js";
export type GeneratedVideoAsset = {
buffer: Buffer;
/** Raw video bytes. Either buffer or url must be present. */
buffer?: Buffer;
/** Pre-signed or provider-hosted URL for the video. When set and buffer is
* absent, callers can deliver or download the asset without requiring the
* provider to materialize the full file in memory first. */
url?: string;
mimeType: string;
fileName?: string;
metadata?: Record<string, unknown>;

View File

@@ -25,6 +25,7 @@ export type GenerateVideoParams = {
inputImages?: VideoGenerationSourceAsset[];
inputVideos?: VideoGenerationSourceAsset[];
inputAudios?: VideoGenerationSourceAsset[];
/** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */
providerOptions?: Record<string, unknown>;
};

View File

@@ -517,6 +517,30 @@ describe("video-generation runtime", () => {
).rejects.toThrow(/supports at most 4s per video, 6s requested/);
});
it("rejects provider results that contain undeliverable assets", async () => {
mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
mocks.getVideoGenerationProvider.mockReturnValue({
id: "video-plugin",
capabilities: {},
generateVideo: async () => ({
videos: [{ mimeType: "video/mp4" }],
}),
});
await expect(
generateVideo({
cfg: {
agents: {
defaults: {
videoGenerationModel: { primary: "video-plugin/vid-v1" },
},
},
} as OpenClawConfig,
prompt: "animate a cat",
}),
).rejects.toThrow(/neither buffer nor url is set/);
});
it("lists runtime video-generation providers through the provider registry", () => {
const providers: VideoGenerationProvider[] = [
{

View File

@@ -265,6 +265,13 @@ export async function generateVideo(
if (!Array.isArray(result.videos) || result.videos.length === 0) {
throw new Error("Video generation provider returned no videos.");
}
for (const [index, video] of result.videos.entries()) {
if (!video.buffer && !video.url) {
throw new Error(
`Video generation provider returned an undeliverable asset at index ${index}: neither buffer nor url is set.`,
);
}
}
return {
videos: result.videos,
provider: candidate.provider,

View File

@@ -3,7 +3,12 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
import type { MediaNormalizationEntry } from "../media-generation/normalization.types.js";
export type GeneratedVideoAsset = {
buffer: Buffer;
/** Raw video bytes. Required for local delivery; omit when url is provided instead. */
buffer?: Buffer;
/** External URL for the video (for example a pre-signed cloud storage URL).
* When set and buffer is absent, delivery surfaces can forward the URL
* without downloading the full video into memory first. */
url?: string;
mimeType: string;
fileName?: string;
metadata?: Record<string, unknown>;