mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
Co-authored-by: George Zhang <georgezhangtj97@gmail.com>
This commit is contained in:
@@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
|
||||
|
||||
### Fixes
|
||||
|
||||
- WhatsApp: honor the configured default account when the active listener helper is used without an explicit account id, so named default accounts do not get registered under `default`. (#53918) Thanks @yhyatt.
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
7a9bb7a5e4b243e2123af94301ba363d57eddab2baa6378d16cd37a1cb8a55f7 plugin-sdk-api-baseline.json
|
||||
2bdca027d5fda72399479569927cd34d18b56b242e4b12ac45e7c2352e551c77 plugin-sdk-api-baseline.jsonl
|
||||
7a5c71593c9efbb936b9632f0b381a6c603e9bce44706b312a0172504fa51ef6 plugin-sdk-api-baseline.json
|
||||
0b044de57266d20561838a5ae0edbaacaa53b323d4c8c068e701a48f92f0a264 plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -127,7 +127,55 @@ describe("createVideoGenerateTool", () => {
|
||||
expect(taskExecutorMocks.completeTaskRunByRunId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("starts background generation and wakes the session with MEDIA lines", async () => {
|
||||
it("surfaces url-only generated videos without saving local files", async () => {
|
||||
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
videos: [
|
||||
{
|
||||
url: "https://example.com/generated-lobster.mp4",
|
||||
mimeType: "video/mp4",
|
||||
fileName: "lobster.mp4",
|
||||
},
|
||||
],
|
||||
metadata: { taskId: "task-1" },
|
||||
});
|
||||
const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer");
|
||||
|
||||
const tool = createVideoGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "vydra/veo3" },
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected video_generate tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-url", { prompt: "friendly lobster surfing" });
|
||||
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
|
||||
|
||||
expect(saveSpy).not.toHaveBeenCalled();
|
||||
expect(text).toContain("Generated 1 video with vydra/veo3.");
|
||||
expect(text).toContain("MEDIA:https://example.com/generated-lobster.mp4");
|
||||
expect(result.details).toMatchObject({
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
count: 1,
|
||||
media: {
|
||||
mediaUrls: ["https://example.com/generated-lobster.mp4"],
|
||||
},
|
||||
paths: ["https://example.com/generated-lobster.mp4"],
|
||||
metadata: { taskId: "task-1" },
|
||||
});
|
||||
});
|
||||
|
||||
it("starts background generation and wakes the session with url-only MEDIA lines", async () => {
|
||||
taskExecutorMocks.createRunningTaskRun.mockReturnValue({
|
||||
taskId: "task-123",
|
||||
runtime: "cli",
|
||||
@@ -143,33 +191,28 @@ describe("createVideoGenerateTool", () => {
|
||||
const wakeSpy = vi
|
||||
.spyOn(videoGenerateBackground, "wakeVideoGenerationTaskCompletion")
|
||||
.mockResolvedValue(undefined);
|
||||
const saveSpy = vi.spyOn(mediaStore, "saveMediaBuffer");
|
||||
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
|
||||
provider: "qwen",
|
||||
model: "wan2.6-t2v",
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
videos: [
|
||||
{
|
||||
buffer: Buffer.from("video-bytes"),
|
||||
url: "https://example.com/generated-lobster.mp4",
|
||||
mimeType: "video/mp4",
|
||||
fileName: "lobster.mp4",
|
||||
},
|
||||
],
|
||||
metadata: { taskId: "task-1" },
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
|
||||
path: "/tmp/generated-lobster.mp4",
|
||||
id: "generated-lobster.mp4",
|
||||
size: 11,
|
||||
contentType: "video/mp4",
|
||||
});
|
||||
|
||||
let scheduledWork: (() => Promise<void>) | undefined;
|
||||
const tool = createVideoGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
|
||||
videoGenerationModel: { primary: "vydra/veo3" },
|
||||
},
|
||||
},
|
||||
}),
|
||||
@@ -200,6 +243,7 @@ describe("createVideoGenerateTool", () => {
|
||||
});
|
||||
expect(typeof scheduledWork).toBe("function");
|
||||
await scheduledWork?.();
|
||||
expect(saveSpy).not.toHaveBeenCalled();
|
||||
expect(taskExecutorMocks.recordTaskRunProgressByRunId).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
runId: expect.stringMatching(/^tool:video_generate:/),
|
||||
@@ -217,7 +261,8 @@ describe("createVideoGenerateTool", () => {
|
||||
taskId: "task-123",
|
||||
}),
|
||||
status: "ok",
|
||||
result: expect.stringContaining("MEDIA:/tmp/generated-lobster.mp4"),
|
||||
mediaUrls: ["https://example.com/generated-lobster.mp4"],
|
||||
result: expect.stringContaining("MEDIA:https://example.com/generated-lobster.mp4"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
@@ -535,6 +535,10 @@ type ExecutedVideoGeneration = {
|
||||
provider: string;
|
||||
model: string;
|
||||
savedPaths: string[];
|
||||
/** URLs of url-only assets that were not saved locally. */
|
||||
urlOnlyUrls: string[];
|
||||
/** Total generated video count, including url-only assets. */
|
||||
count: number;
|
||||
contentText: string;
|
||||
details: Record<string, unknown>;
|
||||
wakeResult: string;
|
||||
@@ -587,8 +591,28 @@ async function executeVideoGenerationJob(params: {
|
||||
});
|
||||
}
|
||||
|
||||
const urlOnlyVideos: Array<{ url: string; mimeType: string; fileName?: string }> = [];
|
||||
const bufferVideos: Array<(typeof result.videos)[number] & { buffer: Buffer }> = [];
|
||||
for (const video of result.videos) {
|
||||
if (video.buffer) {
|
||||
bufferVideos.push(video as (typeof result.videos)[number] & { buffer: Buffer });
|
||||
continue;
|
||||
}
|
||||
if (video.url) {
|
||||
urlOnlyVideos.push({
|
||||
url: video.url,
|
||||
mimeType: video.mimeType,
|
||||
fileName: video.fileName,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
throw new Error(
|
||||
`Provider ${result.provider} returned a video asset with neither buffer nor url — cannot deliver.`,
|
||||
);
|
||||
}
|
||||
|
||||
const savedVideos = await Promise.all(
|
||||
result.videos.map((video) =>
|
||||
bufferVideos.map((video) =>
|
||||
saveMediaBuffer(
|
||||
video.buffer,
|
||||
video.mimeType,
|
||||
@@ -598,6 +622,7 @@ async function executeVideoGenerationJob(params: {
|
||||
),
|
||||
),
|
||||
);
|
||||
const totalCount = savedVideos.length + urlOnlyVideos.length;
|
||||
const requestedDurationSeconds =
|
||||
result.normalization?.durationSeconds?.requested ??
|
||||
(typeof result.metadata?.requestedDurationSeconds === "number" &&
|
||||
@@ -646,8 +671,12 @@ async function executeVideoGenerationJob(params: {
|
||||
typeof result.metadata?.requestedSize === "string" &&
|
||||
result.metadata.requestedSize === params.size &&
|
||||
Boolean(normalizedAspectRatio));
|
||||
const allMediaUrls = [
|
||||
...savedVideos.map((video) => video.path),
|
||||
...urlOnlyVideos.map((video) => video.url),
|
||||
];
|
||||
const lines = [
|
||||
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
|
||||
`Generated ${totalCount} video${totalCount === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
|
||||
...(warning ? [`Warning: ${warning}`] : []),
|
||||
typeof requestedDurationSeconds === "number" &&
|
||||
typeof normalizedDurationSeconds === "number" &&
|
||||
@@ -655,22 +684,25 @@ async function executeVideoGenerationJob(params: {
|
||||
? `Duration normalized: requested ${requestedDurationSeconds}s; used ${normalizedDurationSeconds}s.`
|
||||
: null,
|
||||
...savedVideos.map((video) => `MEDIA:${video.path}`),
|
||||
...urlOnlyVideos.map((video) => `MEDIA:${video.url}`),
|
||||
].filter((entry): entry is string => Boolean(entry));
|
||||
|
||||
return {
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
savedPaths: savedVideos.map((video) => video.path),
|
||||
urlOnlyUrls: urlOnlyVideos.map((video) => video.url),
|
||||
count: totalCount,
|
||||
contentText: lines.join("\n"),
|
||||
wakeResult: lines.join("\n"),
|
||||
details: {
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
count: savedVideos.length,
|
||||
count: totalCount,
|
||||
media: {
|
||||
mediaUrls: savedVideos.map((video) => video.path),
|
||||
mediaUrls: allMediaUrls,
|
||||
},
|
||||
paths: savedVideos.map((video) => video.path),
|
||||
paths: allMediaUrls,
|
||||
...buildTaskRunDetails(params.taskHandle),
|
||||
...buildMediaReferenceDetails({
|
||||
entries: params.loadedReferenceImages,
|
||||
@@ -931,7 +963,7 @@ export function createVideoGenerateTool(options?: {
|
||||
handle: taskHandle,
|
||||
provider: executed.provider,
|
||||
model: executed.model,
|
||||
count: executed.savedPaths.length,
|
||||
count: executed.count,
|
||||
paths: executed.savedPaths,
|
||||
});
|
||||
try {
|
||||
@@ -941,7 +973,7 @@ export function createVideoGenerateTool(options?: {
|
||||
status: "ok",
|
||||
statusLabel: "completed successfully",
|
||||
result: executed.wakeResult,
|
||||
mediaUrls: executed.savedPaths,
|
||||
mediaUrls: [...executed.savedPaths, ...executed.urlOnlyUrls],
|
||||
});
|
||||
} catch (error) {
|
||||
log.warn("Video generation completion wake failed after successful generation", {
|
||||
@@ -1025,7 +1057,7 @@ export function createVideoGenerateTool(options?: {
|
||||
handle: taskHandle,
|
||||
provider: executed.provider,
|
||||
model: executed.model,
|
||||
count: executed.savedPaths.length,
|
||||
count: executed.count,
|
||||
paths: executed.savedPaths,
|
||||
});
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { Command } from "commander";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { runRegisteredCli } from "../test-utils/command-runner.js";
|
||||
import { registerCapabilityCli } from "./capability-cli.js";
|
||||
|
||||
@@ -58,6 +58,7 @@ const mocks = vi.hoisted(() => ({
|
||||
model: "gpt-4.1-mini",
|
||||
})),
|
||||
generateImage: vi.fn(),
|
||||
generateVideo: vi.fn(),
|
||||
transcribeAudioFile: vi.fn(async () => ({ text: "meeting notes" })),
|
||||
textToSpeech: vi.fn(async () => ({
|
||||
success: true,
|
||||
@@ -202,7 +203,7 @@ vi.mock("../image-generation/runtime.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../video-generation/runtime.js", () => ({
|
||||
generateVideo: vi.fn(),
|
||||
generateVideo: mocks.generateVideo,
|
||||
listRuntimeVideoGenerationProviders: vi.fn(() => []),
|
||||
}));
|
||||
|
||||
@@ -238,6 +239,10 @@ vi.mock("../web-fetch/runtime.js", () => ({
|
||||
}));
|
||||
|
||||
describe("capability cli", () => {
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
mocks.runtime.log.mockClear();
|
||||
mocks.runtime.error.mockClear();
|
||||
@@ -278,6 +283,7 @@ describe("capability cli", () => {
|
||||
}) as never);
|
||||
mocks.describeImageFile.mockClear();
|
||||
mocks.generateImage.mockReset();
|
||||
mocks.generateVideo.mockReset();
|
||||
mocks.transcribeAudioFile.mockClear();
|
||||
mocks.textToSpeech.mockClear();
|
||||
mocks.setTtsProvider.mockClear();
|
||||
@@ -434,6 +440,85 @@ describe("capability cli", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("streams url-only generated videos to --output paths", async () => {
|
||||
mocks.generateVideo.mockResolvedValue({
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
attempts: [],
|
||||
videos: [
|
||||
{
|
||||
url: "https://example.com/generated-video.mp4",
|
||||
mimeType: "video/mp4",
|
||||
fileName: "provider-name.mp4",
|
||||
},
|
||||
],
|
||||
});
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(Buffer.from("video-bytes"), {
|
||||
status: 200,
|
||||
headers: { "content-type": "video/mp4" },
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-video-generate-"));
|
||||
const outputBase = path.join(tempDir, "result");
|
||||
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"video",
|
||||
"generate",
|
||||
"--prompt",
|
||||
"friendly lobster",
|
||||
"--output",
|
||||
outputBase,
|
||||
"--json",
|
||||
],
|
||||
});
|
||||
|
||||
const outputPath = `${outputBase}.mp4`;
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://example.com/generated-video.mp4",
|
||||
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
||||
);
|
||||
expect(await fs.readFile(outputPath, "utf8")).toBe("video-bytes");
|
||||
expect(mocks.runtime.writeJson).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
capability: "video.generate",
|
||||
provider: "vydra",
|
||||
outputs: [
|
||||
expect.objectContaining({
|
||||
path: outputPath,
|
||||
mimeType: "video/mp4",
|
||||
size: 11,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("fails video generate when a provider returns an undeliverable asset", async () => {
|
||||
mocks.generateVideo.mockResolvedValue({
|
||||
provider: "vydra",
|
||||
model: "veo3",
|
||||
attempts: [],
|
||||
videos: [{ mimeType: "video/mp4" }],
|
||||
});
|
||||
|
||||
await expect(
|
||||
runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: ["capability", "video", "generate", "--prompt", "friendly lobster", "--json"],
|
||||
}),
|
||||
).rejects.toThrow("exit 1");
|
||||
expect(mocks.runtime.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining("Video asset at index 0 has neither buffer nor url"),
|
||||
);
|
||||
});
|
||||
|
||||
it("routes audio transcribe through transcription, not realtime", async () => {
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
|
||||
@@ -815,17 +815,55 @@ async function runVideoGenerate(params: { prompt: string; model?: string; output
|
||||
modelOverride: params.model,
|
||||
});
|
||||
const outputs = await Promise.all(
|
||||
result.videos.map(async (video, index) => ({
|
||||
...(await writeOutputAsset({
|
||||
buffer: video.buffer,
|
||||
mimeType: video.mimeType,
|
||||
originalFilename: video.fileName,
|
||||
outputPath: params.output,
|
||||
outputIndex: index,
|
||||
outputCount: result.videos.length,
|
||||
subdir: "generated",
|
||||
})),
|
||||
})),
|
||||
result.videos.map(async (video, index) => {
|
||||
if (!video.buffer && !video.url) {
|
||||
throw new Error(`Video asset at index ${index} has neither buffer nor url`);
|
||||
}
|
||||
|
||||
let videoBuffer = video.buffer;
|
||||
if (!videoBuffer && video.url) {
|
||||
const response = await fetch(video.url, { signal: AbortSignal.timeout(120_000) });
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to download video from ${video.url}: ${response.status}`);
|
||||
}
|
||||
if (params.output && response.body) {
|
||||
const { pipeline } = await import("node:stream/promises");
|
||||
const { Readable } = await import("node:stream");
|
||||
const { createWriteStream } = await import("node:fs");
|
||||
const mimeType = normalizeMimeType(video.mimeType);
|
||||
const ext =
|
||||
extensionForMime(mimeType) ||
|
||||
path.extname(video.fileName ?? "") ||
|
||||
path.extname(params.output ?? "");
|
||||
const resolvedOutput = path.resolve(params.output);
|
||||
const parsed = path.parse(resolvedOutput);
|
||||
const filePath =
|
||||
result.videos.length <= 1
|
||||
? path.join(parsed.dir, `${parsed.name}${ext}`)
|
||||
: path.join(parsed.dir, `${parsed.name}-${String(index + 1)}${ext}`);
|
||||
await fs.mkdir(path.dirname(filePath), { recursive: true });
|
||||
await pipeline(
|
||||
Readable.fromWeb(response.body as import("node:stream/web").ReadableStream),
|
||||
createWriteStream(filePath),
|
||||
);
|
||||
const stat = await fs.stat(filePath);
|
||||
return { path: filePath, mimeType: video.mimeType, size: stat.size };
|
||||
}
|
||||
videoBuffer = Buffer.from(await response.arrayBuffer());
|
||||
}
|
||||
|
||||
return {
|
||||
...(await writeOutputAsset({
|
||||
buffer: videoBuffer!,
|
||||
mimeType: video.mimeType,
|
||||
originalFilename: video.fileName,
|
||||
outputPath: params.output,
|
||||
outputIndex: index,
|
||||
outputCount: result.videos.length,
|
||||
subdir: "generated",
|
||||
})),
|
||||
};
|
||||
}),
|
||||
);
|
||||
return {
|
||||
ok: true,
|
||||
|
||||
@@ -22,7 +22,12 @@ import type {
|
||||
} from "../video-generation/types.js";
|
||||
|
||||
export type GeneratedVideoAsset = {
|
||||
buffer: Buffer;
|
||||
/** Raw video bytes. Either buffer or url must be present. */
|
||||
buffer?: Buffer;
|
||||
/** Pre-signed or provider-hosted URL for the video. When set and buffer is
|
||||
* absent, callers can deliver or download the asset without requiring the
|
||||
* provider to materialize the full file in memory first. */
|
||||
url?: string;
|
||||
mimeType: string;
|
||||
fileName?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
|
||||
@@ -25,6 +25,7 @@ export type GenerateVideoParams = {
|
||||
inputImages?: VideoGenerationSourceAsset[];
|
||||
inputVideos?: VideoGenerationSourceAsset[];
|
||||
inputAudios?: VideoGenerationSourceAsset[];
|
||||
/** Arbitrary provider-specific options forwarded as-is to provider.generateVideo. */
|
||||
providerOptions?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
|
||||
@@ -517,6 +517,30 @@ describe("video-generation runtime", () => {
|
||||
).rejects.toThrow(/supports at most 4s per video, 6s requested/);
|
||||
});
|
||||
|
||||
it("rejects provider results that contain undeliverable assets", async () => {
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
|
||||
mocks.getVideoGenerationProvider.mockReturnValue({
|
||||
id: "video-plugin",
|
||||
capabilities: {},
|
||||
generateVideo: async () => ({
|
||||
videos: [{ mimeType: "video/mp4" }],
|
||||
}),
|
||||
});
|
||||
|
||||
await expect(
|
||||
generateVideo({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "video-plugin/vid-v1" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
prompt: "animate a cat",
|
||||
}),
|
||||
).rejects.toThrow(/neither buffer nor url is set/);
|
||||
});
|
||||
|
||||
it("lists runtime video-generation providers through the provider registry", () => {
|
||||
const providers: VideoGenerationProvider[] = [
|
||||
{
|
||||
|
||||
@@ -265,6 +265,13 @@ export async function generateVideo(
|
||||
if (!Array.isArray(result.videos) || result.videos.length === 0) {
|
||||
throw new Error("Video generation provider returned no videos.");
|
||||
}
|
||||
for (const [index, video] of result.videos.entries()) {
|
||||
if (!video.buffer && !video.url) {
|
||||
throw new Error(
|
||||
`Video generation provider returned an undeliverable asset at index ${index}: neither buffer nor url is set.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
return {
|
||||
videos: result.videos,
|
||||
provider: candidate.provider,
|
||||
|
||||
@@ -3,7 +3,12 @@ import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import type { MediaNormalizationEntry } from "../media-generation/normalization.types.js";
|
||||
|
||||
export type GeneratedVideoAsset = {
|
||||
buffer: Buffer;
|
||||
/** Raw video bytes. Required for local delivery; omit when url is provided instead. */
|
||||
buffer?: Buffer;
|
||||
/** External URL for the video (for example a pre-signed cloud storage URL).
|
||||
* When set and buffer is absent, delivery surfaces can forward the URL
|
||||
* without downloading the full video into memory first. */
|
||||
url?: string;
|
||||
mimeType: string;
|
||||
fileName?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
|
||||
Reference in New Issue
Block a user