fix(cli): forward video generation options

This commit is contained in:
Peter Steinberger
2026-04-25 11:29:15 +01:00
parent b85b106b10
commit 87aa0f813c
3 changed files with 135 additions and 4 deletions

View File

@@ -114,7 +114,7 @@ This table maps common inference tasks to the corresponding infer command.
| Describe an image file | `openclaw infer image describe --file ./image.png --json` | `--model` must be an image-capable `<provider/model>` |
| Transcribe audio | `openclaw infer audio transcribe --file ./memo.m4a --json` | `--model` must be `<provider/model>` |
| Synthesize speech | `openclaw infer tts convert --text "..." --output ./speech.mp3 --json` | `tts status` is gateway-oriented |
| Generate a video | `openclaw infer video generate --prompt "..." --json` | |
| Generate a video | `openclaw infer video generate --prompt "..." --json` | Supports provider hints such as `--resolution` |
| Describe a video file | `openclaw infer video describe --file ./clip.mp4 --json` | `--model` must be `<provider/model>` |
| Search the web | `openclaw infer web search --query "..." --json` | |
| Fetch a web page | `openclaw infer web fetch --url https://example.com --json` | |
@@ -223,13 +223,14 @@ Use `video` for generation and description.
```bash
openclaw infer video generate --prompt "cinematic sunset over the ocean" --json
openclaw infer video generate --prompt "slow drone shot over a forest lake" --json
openclaw infer video generate --prompt "slow drone shot over a forest lake" --resolution 768P --duration 6 --json
openclaw infer video describe --file ./clip.mp4 --json
openclaw infer video describe --file ./clip.mp4 --model openai/gpt-4.1-mini --json
```
Notes:
- `video generate` accepts `--size`, `--aspect-ratio`, `--resolution`, `--duration`, `--audio`, `--watermark`, and `--timeout-ms` and forwards them to the video-generation runtime.
- `--model` must be `<provider/model>` for `video describe`.
## Web

View File

@@ -577,6 +577,61 @@ describe("capability cli", () => {
);
});
it("passes video generation parameters through to runtime", async () => {
mocks.generateVideo.mockResolvedValue({
provider: "minimax",
model: "MiniMax-Hailuo-2.3",
attempts: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
mimeType: "video/mp4",
fileName: "provider-name.mp4",
},
],
});
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"video",
"generate",
"--prompt",
"friendly lobster",
"--model",
"minimax/MiniMax-Hailuo-2.3",
"--size",
"1280x768",
"--aspect-ratio",
"16:9",
"--resolution",
"768p",
"--duration",
"6",
"--audio",
"--watermark",
"--timeout-ms",
"300000",
"--json",
],
});
expect(mocks.generateVideo).toHaveBeenCalledWith(
expect.objectContaining({
prompt: "friendly lobster",
modelOverride: "minimax/MiniMax-Hailuo-2.3",
size: "1280x768",
aspectRatio: "16:9",
resolution: "768P",
durationSeconds: 6,
audio: true,
watermark: true,
timeoutMs: 300000,
}),
);
});
it("fails video generate when a provider returns an undeliverable asset", async () => {
mocks.generateVideo.mockResolvedValue({
provider: "vydra",

View File

@@ -61,6 +61,7 @@ import {
textToSpeech,
} from "../tts/tts.js";
import { generateVideo, listRuntimeVideoGenerationProviders } from "../video-generation/runtime.js";
import type { VideoGenerationResolution } from "../video-generation/types.js";
import {
isWebFetchProviderConfigured,
resolveWebFetchDefinition,
@@ -267,7 +268,19 @@ const CAPABILITY_METADATA: CapabilityMetadata[] = [
id: "video.generate",
description: "Generate video files with configured video providers.",
transports: ["local"],
flags: ["--prompt", "--model", "--output", "--json"],
flags: [
"--prompt",
"--model",
"--size",
"--aspect-ratio",
"--resolution",
"--duration",
"--audio",
"--watermark",
"--timeout-ms",
"--output",
"--json",
],
resultShape: "saved video files plus attempts",
},
{
@@ -822,7 +835,48 @@ async function runAudioTranscribe(params: {
} satisfies CapabilityEnvelope;
}
async function runVideoGenerate(params: { prompt: string; model?: string; output?: string }) {
function parseOptionalFiniteNumber(
raw: string | number | undefined,
label: string,
): number | undefined {
if (raw === undefined || (typeof raw === "string" && raw.trim() === "")) {
return undefined;
}
const value = Number(raw);
if (!Number.isFinite(value)) {
throw new Error(`${label} must be a finite number`);
}
return value;
}
function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
const normalized = raw?.trim().toUpperCase();
if (!normalized) {
return undefined;
}
if (
normalized === "480P" ||
normalized === "720P" ||
normalized === "768P" ||
normalized === "1080P"
) {
return normalized;
}
throw new Error("video resolution must be one of 480P, 720P, 768P, or 1080P");
}
async function runVideoGenerate(params: {
prompt: string;
model?: string;
output?: string;
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
durationSeconds?: number;
audio?: boolean;
watermark?: boolean;
timeoutMs?: number;
}) {
const cfg = loadConfig();
const agentDir = resolveAgentDir(cfg, resolveDefaultAgentId(cfg));
const result = await generateVideo({
@@ -830,6 +884,13 @@ async function runVideoGenerate(params: { prompt: string; model?: string; output
agentDir,
prompt: params.prompt,
modelOverride: params.model,
size: params.size,
aspectRatio: params.aspectRatio,
resolution: params.resolution,
durationSeconds: params.durationSeconds,
audio: params.audio,
watermark: params.watermark,
timeoutMs: params.timeoutMs,
});
const outputs = await Promise.all(
result.videos.map(async (video, index) => {
@@ -1680,6 +1741,13 @@ export function registerCapabilityCli(program: Command) {
.description("Generate video")
.requiredOption("--prompt <text>", "Prompt text")
.option("--model <provider/model>", "Model override")
.option("--size <size>", "Size hint like 1280x720")
.option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
.option("--resolution <value>", "Resolution hint: 480P, 720P, 768P, or 1080P")
.option("--duration <seconds>", "Target duration in seconds")
.option("--audio", "Enable generated audio when supported")
.option("--watermark", "Request provider watermark when supported")
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
.option("--output <path>", "Output path")
.option("--json", "Output JSON", false)
.action(async (opts) => {
@@ -1688,6 +1756,13 @@ export function registerCapabilityCli(program: Command) {
prompt: String(opts.prompt),
model: opts.model as string | undefined,
output: opts.output as string | undefined,
size: opts.size as string | undefined,
aspectRatio: opts.aspectRatio as string | undefined,
resolution: normalizeVideoResolution(opts.resolution as string | undefined),
durationSeconds: parseOptionalFiniteNumber(opts.duration, "--duration"),
audio: opts.audio === true ? true : undefined,
watermark: opts.watermark === true ? true : undefined,
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
});
emitJsonOrText(defaultRuntime, Boolean(opts.json), result, formatEnvelopeForText);
});