feat(agents): add video_generate tool

This commit is contained in:
Peter Steinberger
2026-04-05 18:42:08 +01:00
parent b5e87be7f0
commit 5790435975
26 changed files with 1249 additions and 35 deletions

View File

@@ -31,6 +31,7 @@ import { createSessionsYieldTool } from "./tools/sessions-yield-tool.js";
import { createSubagentsTool } from "./tools/subagents-tool.js";
import { createTtsTool } from "./tools/tts-tool.js";
import { createUpdatePlanTool } from "./tools/update-plan-tool.js";
import { createVideoGenerateTool } from "./tools/video-generate-tool.js";
import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js";
import { resolveWorkspaceRoot } from "./workspace-dir.js";
@@ -159,6 +160,13 @@ export function createOpenClawTools(
sandbox,
fsPolicy: options?.fsPolicy,
});
const videoGenerateTool = createVideoGenerateTool({
config: options?.config,
agentDir: options?.agentDir,
workspaceDir,
sandbox,
fsPolicy: options?.fsPolicy,
});
const pdfTool = options?.agentDir?.trim()
? createPdfTool({
config: options?.config,
@@ -216,6 +224,7 @@ export function createOpenClawTools(
config: options?.config,
}),
...(imageGenerateTool ? [imageGenerateTool] : []),
...(videoGenerateTool ? [videoGenerateTool] : []),
createGatewayTool({
agentSessionKey: options?.agentSessionKey,
config: options?.config,

View File

@@ -0,0 +1,91 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import * as videoGenerationRuntime from "../video-generation/runtime.js";
import { createOpenClawTools } from "./openclaw-tools.js";
vi.mock("../plugins/tools.js", () => ({
resolvePluginTools: () => [],
copyPluginToolMeta: () => undefined,
getPluginToolMeta: () => undefined,
}));
function asConfig(value: unknown): OpenClawConfig {
return value as OpenClawConfig;
}
function stubVideoGenerationProviders() {
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([
{
id: "qwen",
defaultModel: "wan2.6-t2v",
models: ["wan2.6-t2v"],
capabilities: {
maxVideos: 1,
maxInputImages: 1,
maxInputVideos: 4,
maxDurationSeconds: 10,
supportsSize: true,
supportsAspectRatio: true,
supportsResolution: true,
supportsAudio: true,
supportsWatermark: true,
},
generateVideo: vi.fn(async () => {
throw new Error("not used");
}),
},
]);
}
describe("openclaw tools video generation registration", () => {
beforeEach(() => {
vi.stubEnv("QWEN_API_KEY", "");
vi.stubEnv("MODELSTUDIO_API_KEY", "");
vi.stubEnv("DASHSCOPE_API_KEY", "");
});
afterEach(() => {
vi.restoreAllMocks();
vi.unstubAllEnvs();
});
it("registers video_generate when video-generation config is present", () => {
const tools = createOpenClawTools({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: {
primary: "qwen/wan2.6-t2v",
},
},
},
}),
agentDir: "/tmp/openclaw-agent-main",
});
expect(tools.map((tool) => tool.name)).toContain("video_generate");
});
it("registers video_generate when a compatible provider has env-backed auth", () => {
stubVideoGenerationProviders();
vi.stubEnv("QWEN_API_KEY", "qwen-test");
const tools = createOpenClawTools({
config: asConfig({}),
agentDir: "/tmp/openclaw-agent-main",
});
expect(tools.map((tool) => tool.name)).toContain("video_generate");
});
it("omits video_generate when config is absent and no compatible provider auth exists", () => {
stubVideoGenerationProviders();
const tools = createOpenClawTools({
config: asConfig({}),
agentDir: "/tmp/openclaw-agent-main",
});
expect(tools.map((tool) => tool.name)).not.toContain("video_generate");
});
});

View File

@@ -46,6 +46,9 @@ vi.mock("./tools/gateway-tool.js", () => ({
vi.mock("./tools/image-generate-tool.js", () => ({
createImageGenerateTool: mockToolFactory("image_generate_stub"),
}));
vi.mock("./tools/video-generate-tool.js", () => ({
createVideoGenerateTool: mockToolFactory("video_generate_stub"),
}));
vi.mock("./tools/image-tool.js", () => ({
createImageTool: mockToolFactory("image_stub"),
}));

View File

@@ -265,6 +265,10 @@ describe("extractToolResultMediaPaths", () => {
expect(isToolResultMediaTrusted("image_generate")).toBe(true);
});
it("trusts video_generate local MEDIA paths", () => {
expect(isToolResultMediaTrusted("video_generate")).toBe(true);
});
it("does not trust local MEDIA paths for MCP-provenance results", () => {
expect(
filterToolResultMediaUrls("browser", ["/tmp/screenshot.png"], {

View File

@@ -156,6 +156,7 @@ const TRUSTED_TOOL_RESULT_MEDIA = new Set([
"sessions_spawn",
"subagents",
"tts",
"video_generate",
"web_fetch",
"web_search",
"x_search",

View File

@@ -32,6 +32,7 @@ const coreTools = [
stubActionTool("session_status", ["get", "show"]),
stubTool("tts"),
stubTool("image_generate"),
stubTool("video_generate"),
stubTool("web_fetch"),
stubTool("image"),
stubTool("pdf"),

View File

@@ -23,6 +23,10 @@ vi.mock("../tools/image-generate-tool.js", () => ({
createImageGenerateTool: () => stubTool("image_generate"),
}));
vi.mock("../tools/video-generate-tool.js", () => ({
createVideoGenerateTool: () => stubTool("video_generate"),
}));
vi.mock("../tools/web-tools.js", () => ({
createWebSearchTool: () => null,
createWebFetchTool: () => null,

View File

@@ -10,6 +10,7 @@ describe("tool-catalog", () => {
expect(policy!.allow).toContain("x_search");
expect(policy!.allow).toContain("web_fetch");
expect(policy!.allow).toContain("image_generate");
expect(policy!.allow).toContain("video_generate");
expect(policy!.allow).toContain("update_plan");
});
});

View File

@@ -277,6 +277,14 @@ const CORE_TOOL_DEFINITIONS: CoreToolDefinition[] = [
profiles: ["coding"],
includeInOpenClawGroup: true,
},
{
id: "video_generate",
label: "video_generate",
description: "Video generation",
sectionId: "media",
profiles: ["coding"],
includeInOpenClawGroup: true,
},
{
id: "tts",
label: "tts",

View File

@@ -640,6 +640,28 @@ export const TOOL_DISPLAY_CONFIG: ToolDisplayConfig = {
},
},
},
video_generate: {
emoji: "🎬",
title: "Video Generation",
actions: {
generate: {
label: "generate",
detailKeys: [
"prompt",
"model",
"durationSeconds",
"resolution",
"aspectRatio",
"audio",
"watermark",
],
},
list: {
label: "list",
detailKeys: ["provider", "model"],
},
},
},
pdf: {
emoji: "📑",
title: "PDF",

View File

@@ -32,9 +32,16 @@ export function applyImageGenerationModelConfigDefaults(
return applyAgentDefaultModelConfig(cfg, "imageGenerationModel", imageGenerationModelConfig);
}
export function applyVideoGenerationModelConfigDefaults(
cfg: OpenClawConfig | undefined,
videoGenerationModelConfig: ToolModelConfig,
): OpenClawConfig | undefined {
return applyAgentDefaultModelConfig(cfg, "videoGenerationModel", videoGenerationModelConfig);
}
function applyAgentDefaultModelConfig(
cfg: OpenClawConfig | undefined,
key: "imageModel" | "imageGenerationModel",
key: "imageModel" | "imageGenerationModel" | "videoGenerationModel",
modelConfig: ToolModelConfig,
): OpenClawConfig | undefined {
if (!cfg) {

View File

@@ -0,0 +1,91 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import * as mediaStore from "../../media/store.js";
import * as videoGenerationRuntime from "../../video-generation/runtime.js";
import { createVideoGenerateTool } from "./video-generate-tool.js";
function asConfig(value: unknown): OpenClawConfig {
return value as OpenClawConfig;
}
describe("createVideoGenerateTool", () => {
beforeEach(() => {
vi.restoreAllMocks();
});
afterEach(() => {
vi.unstubAllEnvs();
});
it("returns null when no video-generation config or auth-backed provider is available", () => {
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([]);
expect(createVideoGenerateTool({ config: asConfig({}) })).toBeNull();
});
it("registers when video-generation config is present", () => {
expect(
createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
},
},
}),
}),
).not.toBeNull();
});
it("generates videos, saves them, and emits MEDIA paths", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "qwen",
model: "wan2.6-t2v",
attempts: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
metadata: { taskId: "task-1" },
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-lobster.mp4",
id: "generated-lobster.mp4",
size: 11,
contentType: "video/mp4",
});
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
},
},
}),
});
expect(tool).not.toBeNull();
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-1", { prompt: "friendly lobster surfing" });
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(text).toContain("Generated 1 video with qwen/wan2.6-t2v.");
expect(text).toContain("MEDIA:/tmp/generated-lobster.mp4");
expect(result.details).toMatchObject({
provider: "qwen",
model: "wan2.6-t2v",
count: 1,
media: {
mediaUrls: ["/tmp/generated-lobster.mp4"],
},
paths: ["/tmp/generated-lobster.mp4"],
metadata: { taskId: "task-1" },
});
});
});

View File

@@ -0,0 +1,735 @@
import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import { loadConfig } from "../../config/config.js";
import { saveMediaBuffer } from "../../media/store.js";
import { loadWebMedia } from "../../media/web-media.js";
import { readSnakeCaseParamRaw } from "../../param-key.js";
import { getProviderEnvVars } from "../../secrets/provider-env-vars.js";
import { resolveUserPath } from "../../utils.js";
import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js";
import {
generateVideo,
listRuntimeVideoGenerationProviders,
} from "../../video-generation/runtime.js";
import type {
VideoGenerationProvider,
VideoGenerationResolution,
VideoGenerationSourceAsset,
} from "../../video-generation/types.js";
import { normalizeProviderId } from "../provider-id.js";
import {
ToolInputError,
readNumberParam,
readStringArrayParam,
readStringParam,
} from "./common.js";
import { decodeDataUrl } from "./image-tool.helpers.js";
import {
applyVideoGenerationModelConfigDefaults,
resolveMediaToolLocalRoots,
} from "./media-tool-shared.js";
import {
buildToolModelConfigFromCandidates,
coerceToolModelConfig,
hasAuthForProvider,
hasToolModelConfig,
resolveDefaultModelRef,
type ToolModelConfig,
} from "./model-config.helpers.js";
import {
createSandboxBridgeReadFile,
resolveSandboxedBridgeMediaPath,
type AnyAgentTool,
type SandboxFsBridge,
type ToolFsPolicy,
} from "./tool-runtime.helpers.js";
const MAX_INPUT_IMAGES = 5;
const MAX_INPUT_VIDEOS = 4;
const SUPPORTED_ASPECT_RATIOS = new Set([
"1:1",
"2:3",
"3:2",
"3:4",
"4:3",
"4:5",
"5:4",
"9:16",
"16:9",
"21:9",
]);
const VideoGenerateToolSchema = Type.Object({
action: Type.Optional(
Type.String({
description:
'Optional action: "generate" (default) or "list" to inspect available providers/models.',
}),
),
prompt: Type.Optional(Type.String({ description: "Video generation prompt." })),
image: Type.Optional(
Type.String({
description: "Optional single reference image path or URL.",
}),
),
images: Type.Optional(
Type.Array(Type.String(), {
description: `Optional reference images (up to ${MAX_INPUT_IMAGES}).`,
}),
),
video: Type.Optional(
Type.String({
description: "Optional single reference video path or URL.",
}),
),
videos: Type.Optional(
Type.Array(Type.String(), {
description: `Optional reference videos (up to ${MAX_INPUT_VIDEOS}).`,
}),
),
model: Type.Optional(
Type.String({ description: "Optional provider/model override, e.g. qwen/wan2.6-t2v." }),
),
filename: Type.Optional(
Type.String({
description:
"Optional output filename hint. OpenClaw preserves the basename and saves under its managed media directory.",
}),
),
size: Type.Optional(
Type.String({
description: "Optional size hint like 1280x720 or 1920x1080 when the provider supports it.",
}),
),
aspectRatio: Type.Optional(
Type.String({
description:
"Optional aspect ratio hint: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, or 21:9.",
}),
),
resolution: Type.Optional(
Type.String({
description: "Optional resolution hint: 480P, 720P, or 1080P.",
}),
),
durationSeconds: Type.Optional(
Type.Number({
description: "Optional target duration in seconds.",
minimum: 1,
}),
),
audio: Type.Optional(
Type.Boolean({
description: "Optional audio toggle when the provider supports generated audio.",
}),
),
watermark: Type.Optional(
Type.Boolean({
description: "Optional watermark toggle when the provider supports it.",
}),
),
});
function getVideoGenerationProviderAuthEnvVars(providerId: string): string[] {
return getProviderEnvVars(providerId);
}
function resolveVideoGenerationModelCandidates(params: {
cfg?: OpenClawConfig;
agentDir?: string;
}): Array<string | undefined> {
const providerDefaults = new Map<string, string>();
for (const provider of listRuntimeVideoGenerationProviders({ config: params.cfg })) {
const providerId = provider.id.trim();
const modelId = provider.defaultModel?.trim();
if (
!providerId ||
!modelId ||
providerDefaults.has(providerId) ||
!isVideoGenerationProviderConfigured({
provider,
cfg: params.cfg,
agentDir: params.agentDir,
})
) {
continue;
}
providerDefaults.set(providerId, `${providerId}/${modelId}`);
}
const primaryProvider = resolveDefaultModelRef(params.cfg).provider;
const orderedProviders = [
primaryProvider,
...[...providerDefaults.keys()]
.filter((providerId) => providerId !== primaryProvider)
.toSorted(),
];
const orderedRefs: string[] = [];
const seen = new Set<string>();
for (const providerId of orderedProviders) {
const ref = providerDefaults.get(providerId);
if (!ref || seen.has(ref)) {
continue;
}
seen.add(ref);
orderedRefs.push(ref);
}
return orderedRefs;
}
export function resolveVideoGenerationModelConfigForTool(params: {
cfg?: OpenClawConfig;
agentDir?: string;
}): ToolModelConfig | null {
const explicit = coerceToolModelConfig(params.cfg?.agents?.defaults?.videoGenerationModel);
if (hasToolModelConfig(explicit)) {
return explicit;
}
return buildToolModelConfigFromCandidates({
explicit,
agentDir: params.agentDir,
candidates: resolveVideoGenerationModelCandidates(params),
isProviderConfigured: (providerId) =>
isVideoGenerationProviderConfigured({
providerId,
cfg: params.cfg,
agentDir: params.agentDir,
}),
});
}
function isVideoGenerationProviderConfigured(params: {
provider?: VideoGenerationProvider;
providerId?: string;
cfg?: OpenClawConfig;
agentDir?: string;
}): boolean {
const provider =
params.provider ??
listRuntimeVideoGenerationProviders({ config: params.cfg }).find((candidate) => {
const normalizedId = normalizeProviderId(params.providerId ?? "");
return (
normalizeProviderId(candidate.id) === normalizedId ||
(candidate.aliases ?? []).some((alias) => normalizeProviderId(alias) === normalizedId)
);
});
if (!provider) {
return params.providerId
? hasAuthForProvider({ provider: params.providerId, agentDir: params.agentDir })
: false;
}
if (provider.isConfigured) {
return provider.isConfigured({
cfg: params.cfg,
agentDir: params.agentDir,
});
}
return hasAuthForProvider({ provider: provider.id, agentDir: params.agentDir });
}
function resolveAction(args: Record<string, unknown>): "generate" | "list" {
const raw = readStringParam(args, "action");
if (!raw) {
return "generate";
}
const normalized = raw.trim().toLowerCase();
if (normalized === "generate" || normalized === "list") {
return normalized;
}
throw new ToolInputError('action must be "generate" or "list"');
}
function normalizeResolution(raw: string | undefined): VideoGenerationResolution | undefined {
const normalized = raw?.trim().toUpperCase();
if (!normalized) {
return undefined;
}
if (normalized === "480P" || normalized === "720P" || normalized === "1080P") {
return normalized;
}
throw new ToolInputError("resolution must be one of 480P, 720P, or 1080P");
}
function normalizeAspectRatio(raw: string | undefined): string | undefined {
const normalized = raw?.trim();
if (!normalized) {
return undefined;
}
if (SUPPORTED_ASPECT_RATIOS.has(normalized)) {
return normalized;
}
throw new ToolInputError(
"aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, or 21:9",
);
}
function readBooleanParam(params: Record<string, unknown>, key: string): boolean | undefined {
const raw = readSnakeCaseParamRaw(params, key);
if (typeof raw === "boolean") {
return raw;
}
if (typeof raw === "string") {
const normalized = raw.trim().toLowerCase();
if (normalized === "true") {
return true;
}
if (normalized === "false") {
return false;
}
}
return undefined;
}
function normalizeReferenceInputs(params: {
args: Record<string, unknown>;
singularKey: "image" | "video";
pluralKey: "images" | "videos";
maxCount: number;
}): string[] {
const single = readStringParam(params.args, params.singularKey);
const multiple = readStringArrayParam(params.args, params.pluralKey);
const combined = [...(single ? [single] : []), ...(multiple ?? [])];
const deduped: string[] = [];
const seen = new Set<string>();
for (const candidate of combined) {
const trimmed = candidate.trim();
const dedupe = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
if (!dedupe || seen.has(dedupe)) {
continue;
}
seen.add(dedupe);
deduped.push(trimmed);
}
if (deduped.length > params.maxCount) {
throw new ToolInputError(
`Too many reference ${params.pluralKey}: ${deduped.length} provided, maximum is ${params.maxCount}.`,
);
}
return deduped;
}
function resolveSelectedVideoGenerationProvider(params: {
config?: OpenClawConfig;
videoGenerationModelConfig: ToolModelConfig;
modelOverride?: string;
}): VideoGenerationProvider | undefined {
const selectedRef =
parseVideoGenerationModelRef(params.modelOverride) ??
parseVideoGenerationModelRef(params.videoGenerationModelConfig.primary);
if (!selectedRef) {
return undefined;
}
const selectedProvider = normalizeProviderId(selectedRef.provider);
return listRuntimeVideoGenerationProviders({ config: params.config }).find(
(provider) =>
normalizeProviderId(provider.id) === selectedProvider ||
(provider.aliases ?? []).some((alias) => normalizeProviderId(alias) === selectedProvider),
);
}
function validateVideoGenerationCapabilities(params: {
provider: VideoGenerationProvider | undefined;
inputImageCount: number;
inputVideoCount: number;
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
durationSeconds?: number;
audio?: boolean;
watermark?: boolean;
}) {
const provider = params.provider;
if (!provider) {
return;
}
const caps = provider.capabilities;
if (params.inputImageCount > 0) {
const maxInputImages = caps.maxInputImages ?? MAX_INPUT_IMAGES;
if (params.inputImageCount > maxInputImages) {
throw new ToolInputError(
`${provider.id} supports at most ${maxInputImages} reference image${maxInputImages === 1 ? "" : "s"}.`,
);
}
}
if (params.inputVideoCount > 0) {
const maxInputVideos = caps.maxInputVideos ?? MAX_INPUT_VIDEOS;
if (params.inputVideoCount > maxInputVideos) {
throw new ToolInputError(
`${provider.id} supports at most ${maxInputVideos} reference video${maxInputVideos === 1 ? "" : "s"}.`,
);
}
}
if (params.size && !caps.supportsSize) {
throw new ToolInputError(`${provider.id} does not support size overrides.`);
}
if (params.aspectRatio && !caps.supportsAspectRatio) {
throw new ToolInputError(`${provider.id} does not support aspectRatio overrides.`);
}
if (params.resolution && !caps.supportsResolution) {
throw new ToolInputError(`${provider.id} does not support resolution overrides.`);
}
if (
typeof params.durationSeconds === "number" &&
Number.isFinite(params.durationSeconds) &&
typeof caps.maxDurationSeconds === "number" &&
params.durationSeconds > caps.maxDurationSeconds
) {
throw new ToolInputError(
`${provider.id} supports at most ${caps.maxDurationSeconds} seconds per video.`,
);
}
if (typeof params.audio === "boolean" && !caps.supportsAudio) {
throw new ToolInputError(`${provider.id} does not support audio toggles.`);
}
if (typeof params.watermark === "boolean" && !caps.supportsWatermark) {
throw new ToolInputError(`${provider.id} does not support watermark toggles.`);
}
}
type VideoGenerateSandboxConfig = {
root: string;
bridge: SandboxFsBridge;
};
async function loadReferenceAssets(params: {
inputs: string[];
expectedKind: "image" | "video";
maxBytes?: number;
workspaceDir?: string;
sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
}): Promise<
Array<{
sourceAsset: VideoGenerationSourceAsset;
resolvedInput: string;
rewrittenFrom?: string;
}>
> {
const loaded: Array<{
sourceAsset: VideoGenerationSourceAsset;
resolvedInput: string;
rewrittenFrom?: string;
}> = [];
for (const rawInput of params.inputs) {
const trimmed = rawInput.trim();
const inputRaw = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
if (!inputRaw) {
throw new ToolInputError(`${params.expectedKind} required (empty string in array)`);
}
const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(inputRaw);
const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(inputRaw);
const isFileUrl = /^file:/i.test(inputRaw);
const isHttpUrl = /^https?:\/\//i.test(inputRaw);
const isDataUrl = /^data:/i.test(inputRaw);
if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
throw new ToolInputError(
`Unsupported ${params.expectedKind} reference: ${rawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`,
);
}
if (params.sandboxConfig && isHttpUrl) {
throw new ToolInputError(
`Sandboxed video_generate does not allow remote ${params.expectedKind} URLs.`,
);
}
const resolvedInput = (() => {
if (params.sandboxConfig) {
return inputRaw;
}
if (inputRaw.startsWith("~")) {
return resolveUserPath(inputRaw);
}
return inputRaw;
})();
if (isHttpUrl && !params.sandboxConfig) {
loaded.push({
sourceAsset: { url: resolvedInput },
resolvedInput,
});
continue;
}
const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = isDataUrl
? { resolved: "" }
: params.sandboxConfig
? await resolveSandboxedBridgeMediaPath({
sandbox: params.sandboxConfig,
mediaPath: resolvedInput,
inboundFallbackDir: "media/inbound",
})
: {
resolved: resolvedInput.startsWith("file://")
? resolvedInput.slice("file://".length)
: resolvedInput,
};
const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
const localRoots = resolveMediaToolLocalRoots(
params.workspaceDir,
{
workspaceOnly: params.sandboxConfig?.workspaceOnly === true,
},
resolvedPath ? [resolvedPath] : undefined,
);
const media = isDataUrl
? params.expectedKind === "image"
? decodeDataUrl(resolvedInput)
: (() => {
throw new ToolInputError("Video data: URLs are not supported for video_generate.");
})()
: params.sandboxConfig
? await loadWebMedia(resolvedPath ?? resolvedInput, {
maxBytes: params.maxBytes,
sandboxValidated: true,
readFile: createSandboxBridgeReadFile({ sandbox: params.sandboxConfig }),
})
: await loadWebMedia(resolvedPath ?? resolvedInput, {
maxBytes: params.maxBytes,
localRoots,
});
if (media.kind !== params.expectedKind) {
throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
}
const mimeType = "mimeType" in media ? media.mimeType : media.contentType;
const fileName = "fileName" in media ? media.fileName : undefined;
loaded.push({
sourceAsset: {
buffer: media.buffer,
mimeType,
fileName,
},
resolvedInput,
...(resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}),
});
}
return loaded;
}
export function createVideoGenerateTool(options?: {
config?: OpenClawConfig;
agentDir?: string;
workspaceDir?: string;
sandbox?: VideoGenerateSandboxConfig;
fsPolicy?: ToolFsPolicy;
}): AnyAgentTool | null {
const cfg: OpenClawConfig = options?.config ?? loadConfig();
const videoGenerationModelConfig = resolveVideoGenerationModelConfigForTool({
cfg,
agentDir: options?.agentDir,
});
if (!videoGenerationModelConfig) {
return null;
}
const sandboxConfig = options?.sandbox
? {
root: options.sandbox.root,
bridge: options.sandbox.bridge,
workspaceOnly: options.fsPolicy?.workspaceOnly === true,
}
: null;
return {
label: "Video Generation",
name: "video_generate",
displaySummary: "Generate videos",
description:
"Generate videos using configured providers. Generated videos are saved under OpenClaw-managed media storage and delivered automatically as attachments.",
parameters: VideoGenerateToolSchema,
execute: async (_toolCallId, rawArgs) => {
const args = rawArgs as Record<string, unknown>;
const action = resolveAction(args);
const effectiveCfg =
applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
if (action === "list") {
const providers = listRuntimeVideoGenerationProviders({ config: effectiveCfg });
if (providers.length === 0) {
return {
content: [{ type: "text", text: "No video-generation providers are registered." }],
details: { providers: [] },
};
}
const lines = providers.map((provider) => {
const authHints = getVideoGenerationProviderAuthEnvVars(provider.id);
const capabilities = [
provider.capabilities.maxVideos ? `maxVideos=${provider.capabilities.maxVideos}` : null,
provider.capabilities.maxInputImages
? `maxInputImages=${provider.capabilities.maxInputImages}`
: null,
provider.capabilities.maxInputVideos
? `maxInputVideos=${provider.capabilities.maxInputVideos}`
: null,
provider.capabilities.maxDurationSeconds
? `maxDurationSeconds=${provider.capabilities.maxDurationSeconds}`
: null,
provider.capabilities.supportsResolution ? "resolution" : null,
provider.capabilities.supportsAspectRatio ? "aspectRatio" : null,
provider.capabilities.supportsSize ? "size" : null,
provider.capabilities.supportsAudio ? "audio" : null,
provider.capabilities.supportsWatermark ? "watermark" : null,
]
.filter((entry): entry is string => Boolean(entry))
.join(", ");
return [
`${provider.id}: default=${provider.defaultModel ?? "none"}`,
provider.models?.length ? `models=${provider.models.join(", ")}` : null,
capabilities ? `capabilities=${capabilities}` : null,
authHints.length > 0 ? `auth=${authHints.join(" / ")}` : null,
]
.filter((entry): entry is string => Boolean(entry))
.join(" | ");
});
return {
content: [{ type: "text", text: lines.join("\n") }],
details: {
providers: providers.map((provider) => ({
id: provider.id,
defaultModel: provider.defaultModel,
models: provider.models ?? [],
authEnvVars: getVideoGenerationProviderAuthEnvVars(provider.id),
capabilities: provider.capabilities,
})),
},
};
}
const prompt = readStringParam(args, "prompt", { required: true });
const model = readStringParam(args, "model");
const filename = readStringParam(args, "filename");
const size = readStringParam(args, "size");
const aspectRatio = normalizeAspectRatio(readStringParam(args, "aspectRatio"));
const resolution = normalizeResolution(readStringParam(args, "resolution"));
const durationSeconds = readNumberParam(args, "durationSeconds", {
integer: true,
strict: true,
});
const audio = readBooleanParam(args, "audio");
const watermark = readBooleanParam(args, "watermark");
const imageInputs = normalizeReferenceInputs({
args,
singularKey: "image",
pluralKey: "images",
maxCount: MAX_INPUT_IMAGES,
});
const videoInputs = normalizeReferenceInputs({
args,
singularKey: "video",
pluralKey: "videos",
maxCount: MAX_INPUT_VIDEOS,
});
const selectedProvider = resolveSelectedVideoGenerationProvider({
config: effectiveCfg,
videoGenerationModelConfig,
modelOverride: model,
});
const loadedReferenceImages = await loadReferenceAssets({
inputs: imageInputs,
expectedKind: "image",
workspaceDir: options?.workspaceDir,
sandboxConfig,
});
const loadedReferenceVideos = await loadReferenceAssets({
inputs: videoInputs,
expectedKind: "video",
workspaceDir: options?.workspaceDir,
sandboxConfig,
});
validateVideoGenerationCapabilities({
provider: selectedProvider,
inputImageCount: loadedReferenceImages.length,
inputVideoCount: loadedReferenceVideos.length,
size,
aspectRatio,
resolution,
durationSeconds,
audio,
watermark,
});
const result = await generateVideo({
cfg: effectiveCfg,
prompt,
agentDir: options?.agentDir,
modelOverride: model,
size,
aspectRatio,
resolution,
durationSeconds,
audio,
watermark,
inputImages: loadedReferenceImages.map((entry) => entry.sourceAsset),
inputVideos: loadedReferenceVideos.map((entry) => entry.sourceAsset),
});
const savedVideos = await Promise.all(
result.videos.map((video) =>
saveMediaBuffer(
video.buffer,
video.mimeType,
"tool-video-generation",
undefined,
filename || video.fileName,
),
),
);
const lines = [
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
...savedVideos.map((video) => `MEDIA:${video.path}`),
];
return {
content: [{ type: "text", text: lines.join("\n") }],
details: {
provider: result.provider,
model: result.model,
count: savedVideos.length,
media: {
mediaUrls: savedVideos.map((video) => video.path),
},
paths: savedVideos.map((video) => video.path),
...(loadedReferenceImages.length === 1
? {
image: loadedReferenceImages[0]?.resolvedInput,
...(loadedReferenceImages[0]?.rewrittenFrom
? { rewrittenFrom: loadedReferenceImages[0].rewrittenFrom }
: {}),
}
: loadedReferenceImages.length > 1
? {
images: loadedReferenceImages.map((entry) => ({
image: entry.resolvedInput,
...(entry.rewrittenFrom ? { rewrittenFrom: entry.rewrittenFrom } : {}),
})),
}
: {}),
...(loadedReferenceVideos.length === 1
? {
video: loadedReferenceVideos[0]?.resolvedInput,
...(loadedReferenceVideos[0]?.rewrittenFrom
? { videoRewrittenFrom: loadedReferenceVideos[0].rewrittenFrom }
: {}),
}
: loadedReferenceVideos.length > 1
? {
videos: loadedReferenceVideos.map((entry) => ({
video: entry.resolvedInput,
...(entry.rewrittenFrom ? { rewrittenFrom: entry.rewrittenFrom } : {}),
})),
}
: {}),
...(size ? { size } : {}),
...(aspectRatio ? { aspectRatio } : {}),
...(resolution ? { resolution } : {}),
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
...(typeof audio === "boolean" ? { audio } : {}),
...(typeof watermark === "boolean" ? { watermark } : {}),
...(filename ? { filename } : {}),
attempts: result.attempts,
metadata: result.metadata,
},
};
},
};
}

View File

@@ -242,6 +242,37 @@ describe("config cli", () => {
expect(written.gateway?.auth).toEqual({ mode: "token" });
});
it("writes agents.defaults.videoGenerationModel.primary without disturbing sibling defaults", async () => {
const resolved: OpenClawConfig = {
agents: {
defaults: {
model: "openai/gpt-5.4",
imageGenerationModel: {
primary: "openai/gpt-image-1",
},
},
},
};
setSnapshot(resolved, resolved);
await runConfigCommand([
"config",
"set",
"agents.defaults.videoGenerationModel.primary",
"qwen/wan2.6-t2v",
]);
expect(mockWriteConfigFile).toHaveBeenCalledTimes(1);
const written = mockWriteConfigFile.mock.calls[0]?.[0];
expect(written.agents?.defaults?.model).toBe("openai/gpt-5.4");
expect(written.agents?.defaults?.imageGenerationModel).toEqual({
primary: "openai/gpt-image-1",
});
expect(written.agents?.defaults?.videoGenerationModel).toEqual({
primary: "qwen/wan2.6-t2v",
});
});
it("drops gateway.auth.password when switching mode to token", async () => {
const resolved: OpenClawConfig = {
gateway: {

View File

@@ -40,4 +40,25 @@ describe("generated base config schema", () => {
expect(hooksInternalProperties?.handlers).toBeUndefined();
expect(uiHints["hooks.internal.handlers"]).toBeUndefined();
});
it("includes videoGenerationModel in the public schema payload", () => {
const agentDefaultsProperties = (
GENERATED_BASE_CONFIG_SCHEMA.schema as {
properties?: {
agents?: {
properties?: {
defaults?: {
properties?: Record<string, unknown>;
};
};
};
};
}
).properties?.agents?.properties?.defaults?.properties;
const uiHints = GENERATED_BASE_CONFIG_SCHEMA.uiHints as Record<string, unknown>;
expect(agentDefaultsProperties?.videoGenerationModel).toBeDefined();
expect(uiHints["agents.defaults.videoGenerationModel.primary"]).toBeDefined();
expect(uiHints["agents.defaults.videoGenerationModel.fallbacks"]).toBeDefined();
});
});

View File

@@ -11,4 +11,15 @@ describe("agent defaults schema", () => {
}),
).not.toThrow();
});
it("accepts videoGenerationModel", () => {
expect(() =>
AgentDefaultsSchema.parse({
videoGenerationModel: {
primary: "qwen/wan2.6-t2v",
fallbacks: ["minimax/video-01"],
},
}),
).not.toThrow();
});
});