mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 17:20:45 +00:00
fix(cli): forward video generation options
This commit is contained in:
@@ -114,7 +114,7 @@ This table maps common inference tasks to the corresponding infer command.
|
||||
| Describe an image file | `openclaw infer image describe --file ./image.png --json` | `--model` must be an image-capable `<provider/model>` |
|
||||
| Transcribe audio | `openclaw infer audio transcribe --file ./memo.m4a --json` | `--model` must be `<provider/model>` |
|
||||
| Synthesize speech | `openclaw infer tts convert --text "..." --output ./speech.mp3 --json` | `tts status` is gateway-oriented |
|
||||
| Generate a video | `openclaw infer video generate --prompt "..." --json` | |
|
||||
| Generate a video | `openclaw infer video generate --prompt "..." --json` | Supports provider hints such as `--resolution` |
|
||||
| Describe a video file | `openclaw infer video describe --file ./clip.mp4 --json` | `--model` must be `<provider/model>` |
|
||||
| Search the web | `openclaw infer web search --query "..." --json` | |
|
||||
| Fetch a web page | `openclaw infer web fetch --url https://example.com --json` | |
|
||||
@@ -223,13 +223,14 @@ Use `video` for generation and description.
|
||||
|
||||
```bash
|
||||
openclaw infer video generate --prompt "cinematic sunset over the ocean" --json
|
||||
openclaw infer video generate --prompt "slow drone shot over a forest lake" --json
|
||||
openclaw infer video generate --prompt "slow drone shot over a forest lake" --resolution 768P --duration 6 --json
|
||||
openclaw infer video describe --file ./clip.mp4 --json
|
||||
openclaw infer video describe --file ./clip.mp4 --model openai/gpt-4.1-mini --json
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `video generate` accepts `--size`, `--aspect-ratio`, `--resolution`, `--duration`, `--audio`, `--watermark`, and `--timeout-ms` and forwards them to the video-generation runtime.
|
||||
- `--model` must be `<provider/model>` for `video describe`.
|
||||
|
||||
## Web
|
||||
|
||||
@@ -577,6 +577,61 @@ describe("capability cli", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes video generation parameters through to runtime", async () => {
|
||||
mocks.generateVideo.mockResolvedValue({
|
||||
provider: "minimax",
|
||||
model: "MiniMax-Hailuo-2.3",
|
||||
attempts: [],
|
||||
videos: [
|
||||
{
|
||||
buffer: Buffer.from("video-bytes"),
|
||||
mimeType: "video/mp4",
|
||||
fileName: "provider-name.mp4",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"video",
|
||||
"generate",
|
||||
"--prompt",
|
||||
"friendly lobster",
|
||||
"--model",
|
||||
"minimax/MiniMax-Hailuo-2.3",
|
||||
"--size",
|
||||
"1280x768",
|
||||
"--aspect-ratio",
|
||||
"16:9",
|
||||
"--resolution",
|
||||
"768p",
|
||||
"--duration",
|
||||
"6",
|
||||
"--audio",
|
||||
"--watermark",
|
||||
"--timeout-ms",
|
||||
"300000",
|
||||
"--json",
|
||||
],
|
||||
});
|
||||
|
||||
expect(mocks.generateVideo).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
prompt: "friendly lobster",
|
||||
modelOverride: "minimax/MiniMax-Hailuo-2.3",
|
||||
size: "1280x768",
|
||||
aspectRatio: "16:9",
|
||||
resolution: "768P",
|
||||
durationSeconds: 6,
|
||||
audio: true,
|
||||
watermark: true,
|
||||
timeoutMs: 300000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("fails video generate when a provider returns an undeliverable asset", async () => {
|
||||
mocks.generateVideo.mockResolvedValue({
|
||||
provider: "vydra",
|
||||
|
||||
@@ -61,6 +61,7 @@ import {
|
||||
textToSpeech,
|
||||
} from "../tts/tts.js";
|
||||
import { generateVideo, listRuntimeVideoGenerationProviders } from "../video-generation/runtime.js";
|
||||
import type { VideoGenerationResolution } from "../video-generation/types.js";
|
||||
import {
|
||||
isWebFetchProviderConfigured,
|
||||
resolveWebFetchDefinition,
|
||||
@@ -267,7 +268,19 @@ const CAPABILITY_METADATA: CapabilityMetadata[] = [
|
||||
id: "video.generate",
|
||||
description: "Generate video files with configured video providers.",
|
||||
transports: ["local"],
|
||||
flags: ["--prompt", "--model", "--output", "--json"],
|
||||
flags: [
|
||||
"--prompt",
|
||||
"--model",
|
||||
"--size",
|
||||
"--aspect-ratio",
|
||||
"--resolution",
|
||||
"--duration",
|
||||
"--audio",
|
||||
"--watermark",
|
||||
"--timeout-ms",
|
||||
"--output",
|
||||
"--json",
|
||||
],
|
||||
resultShape: "saved video files plus attempts",
|
||||
},
|
||||
{
|
||||
@@ -822,7 +835,48 @@ async function runAudioTranscribe(params: {
|
||||
} satisfies CapabilityEnvelope;
|
||||
}
|
||||
|
||||
async function runVideoGenerate(params: { prompt: string; model?: string; output?: string }) {
|
||||
function parseOptionalFiniteNumber(
|
||||
raw: string | number | undefined,
|
||||
label: string,
|
||||
): number | undefined {
|
||||
if (raw === undefined || (typeof raw === "string" && raw.trim() === "")) {
|
||||
return undefined;
|
||||
}
|
||||
const value = Number(raw);
|
||||
if (!Number.isFinite(value)) {
|
||||
throw new Error(`${label} must be a finite number`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
|
||||
const normalized = raw?.trim().toUpperCase();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if (
|
||||
normalized === "480P" ||
|
||||
normalized === "720P" ||
|
||||
normalized === "768P" ||
|
||||
normalized === "1080P"
|
||||
) {
|
||||
return normalized;
|
||||
}
|
||||
throw new Error("video resolution must be one of 480P, 720P, 768P, or 1080P");
|
||||
}
|
||||
|
||||
async function runVideoGenerate(params: {
|
||||
prompt: string;
|
||||
model?: string;
|
||||
output?: string;
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: VideoGenerationResolution;
|
||||
durationSeconds?: number;
|
||||
audio?: boolean;
|
||||
watermark?: boolean;
|
||||
timeoutMs?: number;
|
||||
}) {
|
||||
const cfg = loadConfig();
|
||||
const agentDir = resolveAgentDir(cfg, resolveDefaultAgentId(cfg));
|
||||
const result = await generateVideo({
|
||||
@@ -830,6 +884,13 @@ async function runVideoGenerate(params: { prompt: string; model?: string; output
|
||||
agentDir,
|
||||
prompt: params.prompt,
|
||||
modelOverride: params.model,
|
||||
size: params.size,
|
||||
aspectRatio: params.aspectRatio,
|
||||
resolution: params.resolution,
|
||||
durationSeconds: params.durationSeconds,
|
||||
audio: params.audio,
|
||||
watermark: params.watermark,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const outputs = await Promise.all(
|
||||
result.videos.map(async (video, index) => {
|
||||
@@ -1680,6 +1741,13 @@ export function registerCapabilityCli(program: Command) {
|
||||
.description("Generate video")
|
||||
.requiredOption("--prompt <text>", "Prompt text")
|
||||
.option("--model <provider/model>", "Model override")
|
||||
.option("--size <size>", "Size hint like 1280x720")
|
||||
.option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
|
||||
.option("--resolution <value>", "Resolution hint: 480P, 720P, 768P, or 1080P")
|
||||
.option("--duration <seconds>", "Target duration in seconds")
|
||||
.option("--audio", "Enable generated audio when supported")
|
||||
.option("--watermark", "Request provider watermark when supported")
|
||||
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
|
||||
.option("--output <path>", "Output path")
|
||||
.option("--json", "Output JSON", false)
|
||||
.action(async (opts) => {
|
||||
@@ -1688,6 +1756,13 @@ export function registerCapabilityCli(program: Command) {
|
||||
prompt: String(opts.prompt),
|
||||
model: opts.model as string | undefined,
|
||||
output: opts.output as string | undefined,
|
||||
size: opts.size as string | undefined,
|
||||
aspectRatio: opts.aspectRatio as string | undefined,
|
||||
resolution: normalizeVideoResolution(opts.resolution as string | undefined),
|
||||
durationSeconds: parseOptionalFiniteNumber(opts.duration, "--duration"),
|
||||
audio: opts.audio === true ? true : undefined,
|
||||
watermark: opts.watermark === true ? true : undefined,
|
||||
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
|
||||
});
|
||||
emitJsonOrText(defaultRuntime, Boolean(opts.json), result, formatEnvelopeForText);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user