feat(agents): add video_generate tool

2026-04-12 01:31:08 +00:00 · 2026-04-05 18:42:08 +01:00
parent b5e87be7f0
commit 5790435975
26 changed files with 1249 additions and 35 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -191,6 +191,7 @@ Docs: https://docs.openclaw.ai
 - Update/npm: prefer the npm binary that owns the installed global OpenClaw prefix so mixed Homebrew-plus-nvm setups update the right install. (#60153) Thanks @jayeshp19.
 - Windows/restart: clean up stale gateway listeners before Windows self-restart and treat listener and argv probe failures as inconclusive, so scheduled-task relaunch no longer falls into an `EADDRINUSE` retry loop. (#60480) Thanks @arifahmedjoy.
 - Plugins: suppress trust-warning noise during non-activating snapshot and CLI metadata loads. (#61427) Thanks @gumadeiras.
+- Agents/video generation: accept `agents.defaults.videoGenerationModel` in strict config validation and `openclaw config set/get`, so gateways using `video_generate` no longer fail to boot after enabling a video model.

 ## 2026.4.2

--- a/apps/shared/OpenClawKit/Sources/OpenClawKit/Resources/tool-display.json
+++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/Resources/tool-display.json
@@ -1030,6 +1030,31 @@
        }
      }
    },
+    "video_generate": {
+      "emoji": "🎬",
+      "title": "Video Generation",
+      "actions": {
+        "generate": {
+          "label": "generate",
+          "detailKeys": [
+            "prompt",
+            "model",
+            "durationSeconds",
+            "resolution",
+            "aspectRatio",
+            "audio",
+            "watermark"
+          ]
+        },
+        "list": {
+          "label": "list",
+          "detailKeys": [
+            "provider",
+            "model"
+          ]
+        }
+      }
+    },
    "pdf": {
      "emoji": "📑",
      "title": "PDF",
--- a/docs/concepts/models.md
+++ b/docs/concepts/models.md
@@ -30,7 +30,7 @@ Related:
  falls back to `agents.defaults.imageModel`, then the resolved session/default
  model.
 - `agents.defaults.imageGenerationModel` is used by the shared image-generation capability. If omitted, `image_generate` can still infer an auth-backed provider default. It tries the current default provider first, then the remaining registered image-generation providers in provider-id order. If you set a specific provider/model, also configure that provider's auth/API key.
- `agents.defaults.videoGenerationModel` is used by the shared video-generation capability. Unlike image generation, this does not infer a provider default today. Set an explicit `provider/model` such as `qwen/wan2.6-t2v`, and configure that provider's auth/API key too.
+- `agents.defaults.videoGenerationModel` is used by the shared video-generation capability. If omitted, `video_generate` can still infer an auth-backed provider default. It tries the current default provider first, then the remaining registered video-generation providers in provider-id order. If you set a specific provider/model, also configure that provider's auth/API key.
 - Per-agent defaults can override `agents.defaults.model` via `agents.list[].model` plus bindings (see [/concepts/multi-agent](/concepts/multi-agent)).

 ## Quick model policy
@@ -252,4 +252,5 @@ This applies whenever OpenClaw regenerates `models.json`, including command-driv
 - [Model Providers](/concepts/model-providers) — provider routing and auth
 - [Model Failover](/concepts/model-failover) — fallback chains
 - [Image Generation](/tools/image-generation) — image model configuration
+- [Video Generation](/tools/video-generation) — video model configuration
 - [Configuration Reference](/gateway/configuration-reference#agent-defaults) — model config keys
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -1026,9 +1026,9 @@ Time format in system prompt. Default: `auto` (OS preference).
  - If you select a provider/model directly, configure the matching provider auth/API key too (for example `GEMINI_API_KEY` or `GOOGLE_API_KEY` for `google/*`, `OPENAI_API_KEY` for `openai/*`, `FAL_KEY` for `fal/*`).
  - If omitted, `image_generate` can still infer an auth-backed provider default. It tries the current default provider first, then the remaining registered image-generation providers in provider-id order.
 - `videoGenerationModel`: accepts either a string (`"provider/model"`) or an object (`{ primary, fallbacks }`).
-  - Used by the shared video-generation capability.
+  - Used by the shared video-generation capability and the built-in `video_generate` tool.
  - Typical values: `qwen/wan2.6-t2v`, `qwen/wan2.6-i2v`, `qwen/wan2.6-r2v`, `qwen/wan2.6-r2v-flash`, or `qwen/wan2.7-r2v`.
-  - Set this explicitly before using shared video generation. Unlike `imageGenerationModel`, the video-generation runtime does not infer a provider default yet.
+  - If omitted, `video_generate` can still infer an auth-backed provider default. It tries the current default provider first, then the remaining registered video-generation providers in provider-id order.
  - If you select a provider/model directly, configure the matching provider auth/API key too.
  - The bundled Qwen video-generation provider currently supports up to 1 output video, 1 input image, 4 input videos, 10 seconds duration, and provider-level `size`, `aspectRatio`, `resolution`, `audio`, and `watermark` options.
 - `pdfModel`: accepts either a string (`"provider/model"`) or an object (`{ primary, fallbacks }`).
@@ -1936,12 +1936,12 @@ Defaults for Talk mode (macOS/iOS/Android).

 Local onboarding defaults new local configs to `tools.profile: "coding"` when unset (existing explicit profiles are preserved).

-| Profile     | Includes                                                                                                      |
-| ----------- | ------------------------------------------------------------------------------------------------------------- |
-| `minimal`   | `session_status` only                                                                                         |
-| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate` |
-| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                     |
-| `full`      | No restriction (same as unset)                                                                                |
+| Profile     | Includes                                                                                                                        |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `minimal`   | `session_status` only                                                                                                           |
+| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate`, `video_generate` |
+| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                                       |
+| `full`      | No restriction (same as unset)                                                                                                  |

 ### Tool groups

@@ -1957,7 +1957,7 @@ Local onboarding defaults new local configs to `tools.profile: "coding"` when un
 | `group:messaging`  | `message`                                                                                                               |
 | `group:nodes`      | `nodes`                                                                                                                 |
 | `group:agents`     | `agents_list`                                                                                                           |
-| `group:media`      | `image`, `image_generate`, `tts`                                                                                        |
+| `group:media`      | `image`, `image_generate`, `video_generate`, `tts`                                                                      |
 | `group:openclaw`   | All built-in tools (excludes provider plugins)                                                                          |

 ### `tools.allow` / `tools.deny`
--- a/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md
+++ b/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md
@@ -98,7 +98,7 @@ Available groups:
 - `group:messaging`: `message`
 - `group:nodes`: `nodes`
 - `group:agents`: `agents_list`
- `group:media`: `image`, `image_generate`, `tts`
+- `group:media`: `image`, `image_generate`, `video_generate`, `tts`
 - `group:openclaw`: all built-in OpenClaw tools (excludes provider plugins)

 ## Elevated: exec-only "run on host"
--- a/docs/providers/qwen.md
+++ b/docs/providers/qwen.md
@@ -123,6 +123,9 @@ Current bundled Qwen video-generation limits:
 - Up to **4** input videos
 - Up to **10 seconds** duration
 - Supports `size`, `aspectRatio`, `resolution`, `audio`, and `watermark`
+- Reference image/video mode currently requires **remote http(s) URLs**. Local
+  file paths are rejected up front because the DashScope video endpoint does not
+  accept uploaded local buffers for those references.

 See [Qwen / Model Studio](/providers/qwen_modelstudio) for endpoint-level detail
 and compatibility notes.
--- a/docs/tools/index.md
+++ b/docs/tools/index.md
@@ -53,25 +53,28 @@ OpenClaw has three layers that work together:

 These tools ship with OpenClaw and are available without installing any plugins:

-| Tool                                       | What it does                                                          | Page                                    |
-| ------------------------------------------ | --------------------------------------------------------------------- | --------------------------------------- |
-| `exec` / `process`                         | Run shell commands, manage background processes                       | [Exec](/tools/exec)                     |
-| `code_execution`                           | Run sandboxed remote Python analysis                                  | [Code Execution](/tools/code-execution) |
-| `browser`                                  | Control a Chromium browser (navigate, click, screenshot)              | [Browser](/tools/browser)               |
-| `web_search` / `x_search` / `web_fetch`    | Search the web, search X posts, fetch page content                    | [Web](/tools/web)                       |
-| `read` / `write` / `edit`                  | File I/O in the workspace                                             |                                         |
-| `apply_patch`                              | Multi-hunk file patches                                               | [Apply Patch](/tools/apply-patch)       |
-| `message`                                  | Send messages across all channels                                     | [Agent Send](/tools/agent-send)         |
-| `canvas`                                   | Drive node Canvas (present, eval, snapshot)                           |                                         |
-| `nodes`                                    | Discover and target paired devices                                    |                                         |
-| `cron` / `gateway`                         | Manage scheduled jobs; inspect, patch, restart, or update the gateway |                                         |
-| `image` / `image_generate`                 | Analyze or generate images                                            |                                         |
-| `tts`                                      | One-shot text-to-speech conversion                                    | [TTS](/tools/tts)                       |
-| `sessions_*` / `subagents` / `agents_list` | Session management, status, and sub-agent orchestration               | [Sub-agents](/tools/subagents)          |
-| `session_status`                           | Lightweight `/status`-style readback and session model override       | [Session Tools](/concepts/session-tool) |
+| Tool                                       | What it does                                                          | Page                                        |
+| ------------------------------------------ | --------------------------------------------------------------------- | ------------------------------------------- |
+| `exec` / `process`                         | Run shell commands, manage background processes                       | [Exec](/tools/exec)                         |
+| `code_execution`                           | Run sandboxed remote Python analysis                                  | [Code Execution](/tools/code-execution)     |
+| `browser`                                  | Control a Chromium browser (navigate, click, screenshot)              | [Browser](/tools/browser)                   |
+| `web_search` / `x_search` / `web_fetch`    | Search the web, search X posts, fetch page content                    | [Web](/tools/web)                           |
+| `read` / `write` / `edit`                  | File I/O in the workspace                                             |                                             |
+| `apply_patch`                              | Multi-hunk file patches                                               | [Apply Patch](/tools/apply-patch)           |
+| `message`                                  | Send messages across all channels                                     | [Agent Send](/tools/agent-send)             |
+| `canvas`                                   | Drive node Canvas (present, eval, snapshot)                           |                                             |
+| `nodes`                                    | Discover and target paired devices                                    |                                             |
+| `cron` / `gateway`                         | Manage scheduled jobs; inspect, patch, restart, or update the gateway |                                             |
+| `image` / `image_generate`                 | Analyze or generate images                                            | [Image Generation](/tools/image-generation) |
+| `video_generate`                           | Generate videos                                                       | [Video Generation](/tools/video-generation) |
+| `tts`                                      | One-shot text-to-speech conversion                                    | [TTS](/tools/tts)                           |
+| `sessions_*` / `subagents` / `agents_list` | Session management, status, and sub-agent orchestration               | [Sub-agents](/tools/subagents)              |
+| `session_status`                           | Lightweight `/status`-style readback and session model override       | [Session Tools](/concepts/session-tool)     |

 For image work, use `image` for analysis and `image_generate` for generation or editing. If you target `openai/*`, `google/*`, `fal/*`, or another non-default image provider, configure that provider's auth/API key first.

+For video work, use `video_generate`. If you target `qwen/*` or another non-default video provider, configure that provider's auth/API key first.
+
 `session_status` is the lightweight status/readback tool in the sessions group.
 It answers `/status`-style questions about the current session and can
 optionally set a per-session model override; `model=default` clears that
@@ -121,12 +124,12 @@ config. Deny always wins over allow.
 `tools.profile` sets a base allowlist before `allow`/`deny` is applied.
 Per-agent override: `agents.list[].tools.profile`.

-| Profile     | What it includes                                                                                              |
-| ----------- | ------------------------------------------------------------------------------------------------------------- |
-| `full`      | No restriction (same as unset)                                                                                |
-| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate` |
-| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                     |
-| `minimal`   | `session_status` only                                                                                         |
+| Profile     | What it includes                                                                                                                |
+| ----------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| `full`      | No restriction (same as unset)                                                                                                  |
+| `coding`    | `group:fs`, `group:runtime`, `group:web`, `group:sessions`, `group:memory`, `cron`, `image`, `image_generate`, `video_generate` |
+| `messaging` | `group:messaging`, `sessions_list`, `sessions_history`, `sessions_send`, `session_status`                                       |
+| `minimal`   | `session_status` only                                                                                                           |

 ### Tool groups

@@ -144,7 +147,7 @@ Use `group:*` shorthands in allow/deny lists:
 | `group:messaging`  | message                                                                                                   |
 | `group:nodes`      | nodes                                                                                                     |
 | `group:agents`     | agents_list                                                                                               |
-| `group:media`      | image, image_generate, tts                                                                                |
+| `group:media`      | image, image_generate, video_generate, tts                                                                |
 | `group:openclaw`   | All built-in OpenClaw tools (excludes plugin tools)                                                       |

 `sessions_history` returns a bounded, safety-filtered recall view. It strips
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -0,0 +1,109 @@
+---
+summary: "Generate videos using configured providers such as Qwen"
+read_when:
+  - Generating videos via the agent
+  - Configuring video generation providers and models
+  - Understanding the video_generate tool parameters
+title: "Video Generation"
+---
+
+# Video Generation
+
+The `video_generate` tool lets the agent create videos using your configured providers. Generated videos are delivered automatically as media attachments in the agent's reply.
+
+<Note>
+The tool only appears when at least one video-generation provider is available. If you don't see `video_generate` in your agent's tools, configure `agents.defaults.videoGenerationModel` or set up a provider API key.
+</Note>
+
+## Quick start
+
+1. Set an API key for at least one provider (for example `QWEN_API_KEY`).
+2. Optionally set your preferred model:
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: "qwen/wan2.6-t2v",
+    },
+  },
+}
+```
+
+3. Ask the agent: _"Generate a 5-second cinematic video of a friendly lobster surfing at sunset."_
+
+The agent calls `video_generate` automatically. No tool allow-listing needed — it's enabled by default when a provider is available.
+
+## Supported providers
+
+| Provider | Default model | Reference inputs | API key                                                    |
+| -------- | ------------- | ---------------- | ---------------------------------------------------------- |
+| Qwen     | `wan2.6-t2v`  | Yes, remote URLs | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` |
+
+Use `action: "list"` to inspect available providers and models at runtime:
+
+```
+/tool video_generate action=list
+```
+
+## Tool parameters
+
+| Parameter         | Type     | Description                                                                           |
+| ----------------- | -------- | ------------------------------------------------------------------------------------- |
+| `prompt`          | string   | Video generation prompt (required for `action: "generate"`)                           |
+| `action`          | string   | `"generate"` (default) or `"list"` to inspect providers                               |
+| `model`           | string   | Provider/model override, e.g. `qwen/wan2.6-t2v`                                       |
+| `image`           | string   | Single reference image path or URL                                                    |
+| `images`          | string[] | Multiple reference images (up to 5)                                                   |
+| `video`           | string   | Single reference video path or URL                                                    |
+| `videos`          | string[] | Multiple reference videos (up to 4)                                                   |
+| `size`            | string   | Size hint when the provider supports it                                               |
+| `aspectRatio`     | string   | Aspect ratio: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9` |
+| `resolution`      | string   | Resolution hint: `480P`, `720P`, or `1080P`                                           |
+| `durationSeconds` | number   | Target duration in seconds                                                            |
+| `audio`           | boolean  | Enable generated audio when the provider supports it                                  |
+| `watermark`       | boolean  | Toggle provider watermarking when supported                                           |
+| `filename`        | string   | Output filename hint                                                                  |
+
+Not all providers support all parameters. The tool validates provider capability limits before it submits the request.
+
+## Configuration
+
+### Model selection
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: {
+        primary: "qwen/wan2.6-t2v",
+        fallbacks: ["qwen/wan2.6-r2v-flash"],
+      },
+    },
+  },
+}
+```
+
+### Provider selection order
+
+When generating a video, OpenClaw tries providers in this order:
+
+1. **`model` parameter** from the tool call (if the agent specifies one)
+2. **`videoGenerationModel.primary`** from config
+3. **`videoGenerationModel.fallbacks`** in order
+4. **Auto-detection** — uses auth-backed provider defaults only:
+   - current default provider first
+   - remaining registered video-generation providers in provider-id order
+
+If a provider fails, the next candidate is tried automatically. If all fail, the error includes details from each attempt.
+
+## Qwen reference inputs
+
+The bundled Qwen provider supports text-to-video plus image/video reference modes, but the upstream DashScope video endpoint currently requires **remote http(s) URLs** for reference inputs. Local file paths and uploaded buffers are rejected up front instead of being silently ignored.
+
+## Related
+
+- [Tools Overview](/tools) — all available agent tools
+- [Qwen](/providers/qwen) — Qwen-specific setup and limits
+- [Configuration Reference](/gateway/configuration-reference#agent-defaults) — `videoGenerationModel` config
+- [Models](/concepts/models) — model configuration and failover
--- a/extensions/qwen/video-generation-provider.test.ts
+++ b/extensions/qwen/video-generation-provider.test.ts
@@ -107,4 +107,21 @@ describe("qwen video generation provider", () => {
      }),
    );
  });
+
+  it("fails fast when reference inputs are local buffers instead of remote URLs", async () => {
+    const provider = buildQwenVideoGenerationProvider();
+
+    await expect(
+      provider.generateVideo({
+        provider: "qwen",
+        model: "wan2.6-i2v",
+        prompt: "animate this local frame",
+        cfg: {},
+        inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
+      }),
+    ).rejects.toThrow(
+      "Qwen video generation currently requires remote http(s) URLs for reference images/videos.",
+    );
+    expect(postJsonRequestMock).not.toHaveBeenCalled();
+  });
 });
--- a/extensions/qwen/video-generation-provider.ts
+++ b/extensions/qwen/video-generation-provider.ts
@@ -90,7 +90,22 @@ function resolveReferenceUrls(
    .filter((value): value is string => Boolean(value));
 }

+function assertQwenReferenceInputsSupported(
+  inputImages: VideoGenerationSourceAsset[] | undefined,
+  inputVideos: VideoGenerationSourceAsset[] | undefined,
+): void {
+  const unsupported = [...(inputImages ?? []), ...(inputVideos ?? [])].some(
+    (asset) => !asset.url?.trim() && asset.buffer,
+  );
+  if (unsupported) {
+    throw new Error(
+      "Qwen video generation currently requires remote http(s) URLs for reference images/videos.",
+    );
+  }
+}
+
 function buildQwenVideoGenerationInput(req: VideoGenerationRequest): Record<string, unknown> {
+  assertQwenReferenceInputsSupported(req.inputImages, req.inputVideos);
  const input: Record<string, unknown> = {
    prompt: req.prompt,
  };
--- a/src/agents/openclaw-tools.ts
+++ b/src/agents/openclaw-tools.ts
@@ -31,6 +31,7 @@ import { createSessionsYieldTool } from "./tools/sessions-yield-tool.js";
 import { createSubagentsTool } from "./tools/subagents-tool.js";
 import { createTtsTool } from "./tools/tts-tool.js";
 import { createUpdatePlanTool } from "./tools/update-plan-tool.js";
+import { createVideoGenerateTool } from "./tools/video-generate-tool.js";
 import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js";
 import { resolveWorkspaceRoot } from "./workspace-dir.js";

@@ -159,6 +160,13 @@ export function createOpenClawTools(
    sandbox,
    fsPolicy: options?.fsPolicy,
  });
+  const videoGenerateTool = createVideoGenerateTool({
+    config: options?.config,
+    agentDir: options?.agentDir,
+    workspaceDir,
+    sandbox,
+    fsPolicy: options?.fsPolicy,
+  });
  const pdfTool = options?.agentDir?.trim()
    ? createPdfTool({
        config: options?.config,
@@ -216,6 +224,7 @@ export function createOpenClawTools(
      config: options?.config,
    }),
    ...(imageGenerateTool ? [imageGenerateTool] : []),
+    ...(videoGenerateTool ? [videoGenerateTool] : []),
    createGatewayTool({
      agentSessionKey: options?.agentSessionKey,
      config: options?.config,
--- a/src/agents/openclaw-tools.video-generation.test.ts
+++ b/src/agents/openclaw-tools.video-generation.test.ts
@@ -0,0 +1,91 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { OpenClawConfig } from "../config/config.js";
+import * as videoGenerationRuntime from "../video-generation/runtime.js";
+import { createOpenClawTools } from "./openclaw-tools.js";
+
+vi.mock("../plugins/tools.js", () => ({
+  resolvePluginTools: () => [],
+  copyPluginToolMeta: () => undefined,
+  getPluginToolMeta: () => undefined,
+}));
+
+function asConfig(value: unknown): OpenClawConfig {
+  return value as OpenClawConfig;
+}
+
+function stubVideoGenerationProviders() {
+  vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([
+    {
+      id: "qwen",
+      defaultModel: "wan2.6-t2v",
+      models: ["wan2.6-t2v"],
+      capabilities: {
+        maxVideos: 1,
+        maxInputImages: 1,
+        maxInputVideos: 4,
+        maxDurationSeconds: 10,
+        supportsSize: true,
+        supportsAspectRatio: true,
+        supportsResolution: true,
+        supportsAudio: true,
+        supportsWatermark: true,
+      },
+      generateVideo: vi.fn(async () => {
+        throw new Error("not used");
+      }),
+    },
+  ]);
+}
+
+describe("openclaw tools video generation registration", () => {
+  beforeEach(() => {
+    vi.stubEnv("QWEN_API_KEY", "");
+    vi.stubEnv("MODELSTUDIO_API_KEY", "");
+    vi.stubEnv("DASHSCOPE_API_KEY", "");
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.unstubAllEnvs();
+  });
+
+  it("registers video_generate when video-generation config is present", () => {
+    const tools = createOpenClawTools({
+      config: asConfig({
+        agents: {
+          defaults: {
+            videoGenerationModel: {
+              primary: "qwen/wan2.6-t2v",
+            },
+          },
+        },
+      }),
+      agentDir: "/tmp/openclaw-agent-main",
+    });
+
+    expect(tools.map((tool) => tool.name)).toContain("video_generate");
+  });
+
+  it("registers video_generate when a compatible provider has env-backed auth", () => {
+    stubVideoGenerationProviders();
+    vi.stubEnv("QWEN_API_KEY", "qwen-test");
+
+    const tools = createOpenClawTools({
+      config: asConfig({}),
+      agentDir: "/tmp/openclaw-agent-main",
+    });
+
+    expect(tools.map((tool) => tool.name)).toContain("video_generate");
+  });
+
+  it("omits video_generate when config is absent and no compatible provider auth exists", () => {
+    stubVideoGenerationProviders();
+
+    const tools = createOpenClawTools({
+      config: asConfig({}),
+      agentDir: "/tmp/openclaw-agent-main",
+    });
+
+    expect(tools.map((tool) => tool.name)).not.toContain("video_generate");
+  });
+});
--- a/src/agents/openclaw-tools.web-runtime.test.ts
+++ b/src/agents/openclaw-tools.web-runtime.test.ts
@@ -46,6 +46,9 @@ vi.mock("./tools/gateway-tool.js", () => ({
 vi.mock("./tools/image-generate-tool.js", () => ({
  createImageGenerateTool: mockToolFactory("image_generate_stub"),
 }));
+vi.mock("./tools/video-generate-tool.js", () => ({
+  createVideoGenerateTool: mockToolFactory("video_generate_stub"),
+}));
 vi.mock("./tools/image-tool.js", () => ({
  createImageTool: mockToolFactory("image_stub"),
 }));
--- a/src/agents/pi-embedded-subscribe.tools.media.test.ts
+++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts
@@ -265,6 +265,10 @@ describe("extractToolResultMediaPaths", () => {
    expect(isToolResultMediaTrusted("image_generate")).toBe(true);
  });

+  it("trusts video_generate local MEDIA paths", () => {
+    expect(isToolResultMediaTrusted("video_generate")).toBe(true);
+  });
+
  it("does not trust local MEDIA paths for MCP-provenance results", () => {
    expect(
      filterToolResultMediaUrls("browser", ["/tmp/screenshot.png"], {
--- a/src/agents/pi-embedded-subscribe.tools.ts
+++ b/src/agents/pi-embedded-subscribe.tools.ts
@@ -156,6 +156,7 @@ const TRUSTED_TOOL_RESULT_MEDIA = new Set([
  "sessions_spawn",
  "subagents",
  "tts",
+  "video_generate",
  "web_fetch",
  "web_search",
  "x_search",
--- a/src/agents/test-helpers/fast-openclaw-tools.ts
+++ b/src/agents/test-helpers/fast-openclaw-tools.ts
@@ -32,6 +32,7 @@ const coreTools = [
  stubActionTool("session_status", ["get", "show"]),
  stubTool("tts"),
  stubTool("image_generate"),
+  stubTool("video_generate"),
  stubTool("web_fetch"),
  stubTool("image"),
  stubTool("pdf"),
--- a/src/agents/test-helpers/fast-tool-stubs.ts
+++ b/src/agents/test-helpers/fast-tool-stubs.ts
@@ -23,6 +23,10 @@ vi.mock("../tools/image-generate-tool.js", () => ({
  createImageGenerateTool: () => stubTool("image_generate"),
 }));

+vi.mock("../tools/video-generate-tool.js", () => ({
+  createVideoGenerateTool: () => stubTool("video_generate"),
+}));
+
 vi.mock("../tools/web-tools.js", () => ({
  createWebSearchTool: () => null,
  createWebFetchTool: () => null,
--- a/src/agents/tool-catalog.test.ts
+++ b/src/agents/tool-catalog.test.ts
@@ -10,6 +10,7 @@ describe("tool-catalog", () => {
    expect(policy!.allow).toContain("x_search");
    expect(policy!.allow).toContain("web_fetch");
    expect(policy!.allow).toContain("image_generate");
+    expect(policy!.allow).toContain("video_generate");
    expect(policy!.allow).toContain("update_plan");
  });
 });
--- a/src/agents/tool-catalog.ts
+++ b/src/agents/tool-catalog.ts
@@ -277,6 +277,14 @@ const CORE_TOOL_DEFINITIONS: CoreToolDefinition[] = [
    profiles: ["coding"],
    includeInOpenClawGroup: true,
  },
+  {
+    id: "video_generate",
+    label: "video_generate",
+    description: "Video generation",
+    sectionId: "media",
+    profiles: ["coding"],
+    includeInOpenClawGroup: true,
+  },
  {
    id: "tts",
    label: "tts",
--- a/src/agents/tool-display-config.ts
+++ b/src/agents/tool-display-config.ts
@@ -640,6 +640,28 @@ export const TOOL_DISPLAY_CONFIG: ToolDisplayConfig = {
        },
      },
    },
+    video_generate: {
+      emoji: "🎬",
+      title: "Video Generation",
+      actions: {
+        generate: {
+          label: "generate",
+          detailKeys: [
+            "prompt",
+            "model",
+            "durationSeconds",
+            "resolution",
+            "aspectRatio",
+            "audio",
+            "watermark",
+          ],
+        },
+        list: {
+          label: "list",
+          detailKeys: ["provider", "model"],
+        },
+      },
+    },
    pdf: {
      emoji: "📑",
      title: "PDF",
--- a/src/agents/tools/media-tool-shared.ts
+++ b/src/agents/tools/media-tool-shared.ts
@@ -32,9 +32,16 @@ export function applyImageGenerationModelConfigDefaults(
  return applyAgentDefaultModelConfig(cfg, "imageGenerationModel", imageGenerationModelConfig);
 }

+export function applyVideoGenerationModelConfigDefaults(
+  cfg: OpenClawConfig | undefined,
+  videoGenerationModelConfig: ToolModelConfig,
+): OpenClawConfig | undefined {
+  return applyAgentDefaultModelConfig(cfg, "videoGenerationModel", videoGenerationModelConfig);
+}
+
 function applyAgentDefaultModelConfig(
  cfg: OpenClawConfig | undefined,
-  key: "imageModel" | "imageGenerationModel",
+  key: "imageModel" | "imageGenerationModel" | "videoGenerationModel",
  modelConfig: ToolModelConfig,
 ): OpenClawConfig | undefined {
  if (!cfg) {
--- a/src/agents/tools/video-generate-tool.test.ts
+++ b/src/agents/tools/video-generate-tool.test.ts
@@ -0,0 +1,91 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { OpenClawConfig } from "../../config/config.js";
+import * as mediaStore from "../../media/store.js";
+import * as videoGenerationRuntime from "../../video-generation/runtime.js";
+import { createVideoGenerateTool } from "./video-generate-tool.js";
+
+function asConfig(value: unknown): OpenClawConfig {
+  return value as OpenClawConfig;
+}
+
+describe("createVideoGenerateTool", () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  afterEach(() => {
+    vi.unstubAllEnvs();
+  });
+
+  it("returns null when no video-generation config or auth-backed provider is available", () => {
+    vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([]);
+
+    expect(createVideoGenerateTool({ config: asConfig({}) })).toBeNull();
+  });
+
+  it("registers when video-generation config is present", () => {
+    expect(
+      createVideoGenerateTool({
+        config: asConfig({
+          agents: {
+            defaults: {
+              videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
+            },
+          },
+        }),
+      }),
+    ).not.toBeNull();
+  });
+
+  it("generates videos, saves them, and emits MEDIA paths", async () => {
+    vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
+      provider: "qwen",
+      model: "wan2.6-t2v",
+      attempts: [],
+      videos: [
+        {
+          buffer: Buffer.from("video-bytes"),
+          mimeType: "video/mp4",
+          fileName: "lobster.mp4",
+        },
+      ],
+      metadata: { taskId: "task-1" },
+    });
+    vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
+      path: "/tmp/generated-lobster.mp4",
+      id: "generated-lobster.mp4",
+      size: 11,
+      contentType: "video/mp4",
+    });
+
+    const tool = createVideoGenerateTool({
+      config: asConfig({
+        agents: {
+          defaults: {
+            videoGenerationModel: { primary: "qwen/wan2.6-t2v" },
+          },
+        },
+      }),
+    });
+    expect(tool).not.toBeNull();
+    if (!tool) {
+      throw new Error("expected video_generate tool");
+    }
+
+    const result = await tool.execute("call-1", { prompt: "friendly lobster surfing" });
+    const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
+
+    expect(text).toContain("Generated 1 video with qwen/wan2.6-t2v.");
+    expect(text).toContain("MEDIA:/tmp/generated-lobster.mp4");
+    expect(result.details).toMatchObject({
+      provider: "qwen",
+      model: "wan2.6-t2v",
+      count: 1,
+      media: {
+        mediaUrls: ["/tmp/generated-lobster.mp4"],
+      },
+      paths: ["/tmp/generated-lobster.mp4"],
+      metadata: { taskId: "task-1" },
+    });
+  });
+});
--- a/src/agents/tools/video-generate-tool.ts
+++ b/src/agents/tools/video-generate-tool.ts
@@ -0,0 +1,735 @@
+import { Type } from "@sinclair/typebox";
+import type { OpenClawConfig } from "../../config/config.js";
+import { loadConfig } from "../../config/config.js";
+import { saveMediaBuffer } from "../../media/store.js";
+import { loadWebMedia } from "../../media/web-media.js";
+import { readSnakeCaseParamRaw } from "../../param-key.js";
+import { getProviderEnvVars } from "../../secrets/provider-env-vars.js";
+import { resolveUserPath } from "../../utils.js";
+import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js";
+import {
+  generateVideo,
+  listRuntimeVideoGenerationProviders,
+} from "../../video-generation/runtime.js";
+import type {
+  VideoGenerationProvider,
+  VideoGenerationResolution,
+  VideoGenerationSourceAsset,
+} from "../../video-generation/types.js";
+import { normalizeProviderId } from "../provider-id.js";
+import {
+  ToolInputError,
+  readNumberParam,
+  readStringArrayParam,
+  readStringParam,
+} from "./common.js";
+import { decodeDataUrl } from "./image-tool.helpers.js";
+import {
+  applyVideoGenerationModelConfigDefaults,
+  resolveMediaToolLocalRoots,
+} from "./media-tool-shared.js";
+import {
+  buildToolModelConfigFromCandidates,
+  coerceToolModelConfig,
+  hasAuthForProvider,
+  hasToolModelConfig,
+  resolveDefaultModelRef,
+  type ToolModelConfig,
+} from "./model-config.helpers.js";
+import {
+  createSandboxBridgeReadFile,
+  resolveSandboxedBridgeMediaPath,
+  type AnyAgentTool,
+  type SandboxFsBridge,
+  type ToolFsPolicy,
+} from "./tool-runtime.helpers.js";
+
+const MAX_INPUT_IMAGES = 5;
+const MAX_INPUT_VIDEOS = 4;
+const SUPPORTED_ASPECT_RATIOS = new Set([
+  "1:1",
+  "2:3",
+  "3:2",
+  "3:4",
+  "4:3",
+  "4:5",
+  "5:4",
+  "9:16",
+  "16:9",
+  "21:9",
+]);
+
+const VideoGenerateToolSchema = Type.Object({
+  action: Type.Optional(
+    Type.String({
+      description:
+        'Optional action: "generate" (default) or "list" to inspect available providers/models.',
+    }),
+  ),
+  prompt: Type.Optional(Type.String({ description: "Video generation prompt." })),
+  image: Type.Optional(
+    Type.String({
+      description: "Optional single reference image path or URL.",
+    }),
+  ),
+  images: Type.Optional(
+    Type.Array(Type.String(), {
+      description: `Optional reference images (up to ${MAX_INPUT_IMAGES}).`,
+    }),
+  ),
+  video: Type.Optional(
+    Type.String({
+      description: "Optional single reference video path or URL.",
+    }),
+  ),
+  videos: Type.Optional(
+    Type.Array(Type.String(), {
+      description: `Optional reference videos (up to ${MAX_INPUT_VIDEOS}).`,
+    }),
+  ),
+  model: Type.Optional(
+    Type.String({ description: "Optional provider/model override, e.g. qwen/wan2.6-t2v." }),
+  ),
+  filename: Type.Optional(
+    Type.String({
+      description:
+        "Optional output filename hint. OpenClaw preserves the basename and saves under its managed media directory.",
+    }),
+  ),
+  size: Type.Optional(
+    Type.String({
+      description: "Optional size hint like 1280x720 or 1920x1080 when the provider supports it.",
+    }),
+  ),
+  aspectRatio: Type.Optional(
+    Type.String({
+      description:
+        "Optional aspect ratio hint: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, or 21:9.",
+    }),
+  ),
+  resolution: Type.Optional(
+    Type.String({
+      description: "Optional resolution hint: 480P, 720P, or 1080P.",
+    }),
+  ),
+  durationSeconds: Type.Optional(
+    Type.Number({
+      description: "Optional target duration in seconds.",
+      minimum: 1,
+    }),
+  ),
+  audio: Type.Optional(
+    Type.Boolean({
+      description: "Optional audio toggle when the provider supports generated audio.",
+    }),
+  ),
+  watermark: Type.Optional(
+    Type.Boolean({
+      description: "Optional watermark toggle when the provider supports it.",
+    }),
+  ),
+});
+
+function getVideoGenerationProviderAuthEnvVars(providerId: string): string[] {
+  return getProviderEnvVars(providerId);
+}
+
+function resolveVideoGenerationModelCandidates(params: {
+  cfg?: OpenClawConfig;
+  agentDir?: string;
+}): Array<string | undefined> {
+  const providerDefaults = new Map<string, string>();
+  for (const provider of listRuntimeVideoGenerationProviders({ config: params.cfg })) {
+    const providerId = provider.id.trim();
+    const modelId = provider.defaultModel?.trim();
+    if (
+      !providerId ||
+      !modelId ||
+      providerDefaults.has(providerId) ||
+      !isVideoGenerationProviderConfigured({
+        provider,
+        cfg: params.cfg,
+        agentDir: params.agentDir,
+      })
+    ) {
+      continue;
+    }
+    providerDefaults.set(providerId, `${providerId}/${modelId}`);
+  }
+
+  const primaryProvider = resolveDefaultModelRef(params.cfg).provider;
+  const orderedProviders = [
+    primaryProvider,
+    ...[...providerDefaults.keys()]
+      .filter((providerId) => providerId !== primaryProvider)
+      .toSorted(),
+  ];
+  const orderedRefs: string[] = [];
+  const seen = new Set<string>();
+  for (const providerId of orderedProviders) {
+    const ref = providerDefaults.get(providerId);
+    if (!ref || seen.has(ref)) {
+      continue;
+    }
+    seen.add(ref);
+    orderedRefs.push(ref);
+  }
+  return orderedRefs;
+}
+
+export function resolveVideoGenerationModelConfigForTool(params: {
+  cfg?: OpenClawConfig;
+  agentDir?: string;
+}): ToolModelConfig | null {
+  const explicit = coerceToolModelConfig(params.cfg?.agents?.defaults?.videoGenerationModel);
+  if (hasToolModelConfig(explicit)) {
+    return explicit;
+  }
+  return buildToolModelConfigFromCandidates({
+    explicit,
+    agentDir: params.agentDir,
+    candidates: resolveVideoGenerationModelCandidates(params),
+    isProviderConfigured: (providerId) =>
+      isVideoGenerationProviderConfigured({
+        providerId,
+        cfg: params.cfg,
+        agentDir: params.agentDir,
+      }),
+  });
+}
+
+function isVideoGenerationProviderConfigured(params: {
+  provider?: VideoGenerationProvider;
+  providerId?: string;
+  cfg?: OpenClawConfig;
+  agentDir?: string;
+}): boolean {
+  const provider =
+    params.provider ??
+    listRuntimeVideoGenerationProviders({ config: params.cfg }).find((candidate) => {
+      const normalizedId = normalizeProviderId(params.providerId ?? "");
+      return (
+        normalizeProviderId(candidate.id) === normalizedId ||
+        (candidate.aliases ?? []).some((alias) => normalizeProviderId(alias) === normalizedId)
+      );
+    });
+  if (!provider) {
+    return params.providerId
+      ? hasAuthForProvider({ provider: params.providerId, agentDir: params.agentDir })
+      : false;
+  }
+  if (provider.isConfigured) {
+    return provider.isConfigured({
+      cfg: params.cfg,
+      agentDir: params.agentDir,
+    });
+  }
+  return hasAuthForProvider({ provider: provider.id, agentDir: params.agentDir });
+}
+
+function resolveAction(args: Record<string, unknown>): "generate" | "list" {
+  const raw = readStringParam(args, "action");
+  if (!raw) {
+    return "generate";
+  }
+  const normalized = raw.trim().toLowerCase();
+  if (normalized === "generate" || normalized === "list") {
+    return normalized;
+  }
+  throw new ToolInputError('action must be "generate" or "list"');
+}
+
+function normalizeResolution(raw: string | undefined): VideoGenerationResolution | undefined {
+  const normalized = raw?.trim().toUpperCase();
+  if (!normalized) {
+    return undefined;
+  }
+  if (normalized === "480P" || normalized === "720P" || normalized === "1080P") {
+    return normalized;
+  }
+  throw new ToolInputError("resolution must be one of 480P, 720P, or 1080P");
+}
+
+function normalizeAspectRatio(raw: string | undefined): string | undefined {
+  const normalized = raw?.trim();
+  if (!normalized) {
+    return undefined;
+  }
+  if (SUPPORTED_ASPECT_RATIOS.has(normalized)) {
+    return normalized;
+  }
+  throw new ToolInputError(
+    "aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, or 21:9",
+  );
+}
+
+function readBooleanParam(params: Record<string, unknown>, key: string): boolean | undefined {
+  const raw = readSnakeCaseParamRaw(params, key);
+  if (typeof raw === "boolean") {
+    return raw;
+  }
+  if (typeof raw === "string") {
+    const normalized = raw.trim().toLowerCase();
+    if (normalized === "true") {
+      return true;
+    }
+    if (normalized === "false") {
+      return false;
+    }
+  }
+  return undefined;
+}
+
+function normalizeReferenceInputs(params: {
+  args: Record<string, unknown>;
+  singularKey: "image" | "video";
+  pluralKey: "images" | "videos";
+  maxCount: number;
+}): string[] {
+  const single = readStringParam(params.args, params.singularKey);
+  const multiple = readStringArrayParam(params.args, params.pluralKey);
+  const combined = [...(single ? [single] : []), ...(multiple ?? [])];
+  const deduped: string[] = [];
+  const seen = new Set<string>();
+  for (const candidate of combined) {
+    const trimmed = candidate.trim();
+    const dedupe = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
+    if (!dedupe || seen.has(dedupe)) {
+      continue;
+    }
+    seen.add(dedupe);
+    deduped.push(trimmed);
+  }
+  if (deduped.length > params.maxCount) {
+    throw new ToolInputError(
+      `Too many reference ${params.pluralKey}: ${deduped.length} provided, maximum is ${params.maxCount}.`,
+    );
+  }
+  return deduped;
+}
+
+function resolveSelectedVideoGenerationProvider(params: {
+  config?: OpenClawConfig;
+  videoGenerationModelConfig: ToolModelConfig;
+  modelOverride?: string;
+}): VideoGenerationProvider | undefined {
+  const selectedRef =
+    parseVideoGenerationModelRef(params.modelOverride) ??
+    parseVideoGenerationModelRef(params.videoGenerationModelConfig.primary);
+  if (!selectedRef) {
+    return undefined;
+  }
+  const selectedProvider = normalizeProviderId(selectedRef.provider);
+  return listRuntimeVideoGenerationProviders({ config: params.config }).find(
+    (provider) =>
+      normalizeProviderId(provider.id) === selectedProvider ||
+      (provider.aliases ?? []).some((alias) => normalizeProviderId(alias) === selectedProvider),
+  );
+}
+
+function validateVideoGenerationCapabilities(params: {
+  provider: VideoGenerationProvider | undefined;
+  inputImageCount: number;
+  inputVideoCount: number;
+  size?: string;
+  aspectRatio?: string;
+  resolution?: VideoGenerationResolution;
+  durationSeconds?: number;
+  audio?: boolean;
+  watermark?: boolean;
+}) {
+  const provider = params.provider;
+  if (!provider) {
+    return;
+  }
+  const caps = provider.capabilities;
+  if (params.inputImageCount > 0) {
+    const maxInputImages = caps.maxInputImages ?? MAX_INPUT_IMAGES;
+    if (params.inputImageCount > maxInputImages) {
+      throw new ToolInputError(
+        `${provider.id} supports at most ${maxInputImages} reference image${maxInputImages === 1 ? "" : "s"}.`,
+      );
+    }
+  }
+  if (params.inputVideoCount > 0) {
+    const maxInputVideos = caps.maxInputVideos ?? MAX_INPUT_VIDEOS;
+    if (params.inputVideoCount > maxInputVideos) {
+      throw new ToolInputError(
+        `${provider.id} supports at most ${maxInputVideos} reference video${maxInputVideos === 1 ? "" : "s"}.`,
+      );
+    }
+  }
+  if (params.size && !caps.supportsSize) {
+    throw new ToolInputError(`${provider.id} does not support size overrides.`);
+  }
+  if (params.aspectRatio && !caps.supportsAspectRatio) {
+    throw new ToolInputError(`${provider.id} does not support aspectRatio overrides.`);
+  }
+  if (params.resolution && !caps.supportsResolution) {
+    throw new ToolInputError(`${provider.id} does not support resolution overrides.`);
+  }
+  if (
+    typeof params.durationSeconds === "number" &&
+    Number.isFinite(params.durationSeconds) &&
+    typeof caps.maxDurationSeconds === "number" &&
+    params.durationSeconds > caps.maxDurationSeconds
+  ) {
+    throw new ToolInputError(
+      `${provider.id} supports at most ${caps.maxDurationSeconds} seconds per video.`,
+    );
+  }
+  if (typeof params.audio === "boolean" && !caps.supportsAudio) {
+    throw new ToolInputError(`${provider.id} does not support audio toggles.`);
+  }
+  if (typeof params.watermark === "boolean" && !caps.supportsWatermark) {
+    throw new ToolInputError(`${provider.id} does not support watermark toggles.`);
+  }
+}
+
+type VideoGenerateSandboxConfig = {
+  root: string;
+  bridge: SandboxFsBridge;
+};
+
+async function loadReferenceAssets(params: {
+  inputs: string[];
+  expectedKind: "image" | "video";
+  maxBytes?: number;
+  workspaceDir?: string;
+  sandboxConfig: { root: string; bridge: SandboxFsBridge; workspaceOnly: boolean } | null;
+}): Promise<
+  Array<{
+    sourceAsset: VideoGenerationSourceAsset;
+    resolvedInput: string;
+    rewrittenFrom?: string;
+  }>
+> {
+  const loaded: Array<{
+    sourceAsset: VideoGenerationSourceAsset;
+    resolvedInput: string;
+    rewrittenFrom?: string;
+  }> = [];
+
+  for (const rawInput of params.inputs) {
+    const trimmed = rawInput.trim();
+    const inputRaw = trimmed.startsWith("@") ? trimmed.slice(1).trim() : trimmed;
+    if (!inputRaw) {
+      throw new ToolInputError(`${params.expectedKind} required (empty string in array)`);
+    }
+    const looksLikeWindowsDrivePath = /^[a-zA-Z]:[\\/]/.test(inputRaw);
+    const hasScheme = /^[a-z][a-z0-9+.-]*:/i.test(inputRaw);
+    const isFileUrl = /^file:/i.test(inputRaw);
+    const isHttpUrl = /^https?:\/\//i.test(inputRaw);
+    const isDataUrl = /^data:/i.test(inputRaw);
+    if (hasScheme && !looksLikeWindowsDrivePath && !isFileUrl && !isHttpUrl && !isDataUrl) {
+      throw new ToolInputError(
+        `Unsupported ${params.expectedKind} reference: ${rawInput}. Use a file path, a file:// URL, a data: URL, or an http(s) URL.`,
+      );
+    }
+    if (params.sandboxConfig && isHttpUrl) {
+      throw new ToolInputError(
+        `Sandboxed video_generate does not allow remote ${params.expectedKind} URLs.`,
+      );
+    }
+
+    const resolvedInput = (() => {
+      if (params.sandboxConfig) {
+        return inputRaw;
+      }
+      if (inputRaw.startsWith("~")) {
+        return resolveUserPath(inputRaw);
+      }
+      return inputRaw;
+    })();
+
+    if (isHttpUrl && !params.sandboxConfig) {
+      loaded.push({
+        sourceAsset: { url: resolvedInput },
+        resolvedInput,
+      });
+      continue;
+    }
+
+    const resolvedPathInfo: { resolved: string; rewrittenFrom?: string } = isDataUrl
+      ? { resolved: "" }
+      : params.sandboxConfig
+        ? await resolveSandboxedBridgeMediaPath({
+            sandbox: params.sandboxConfig,
+            mediaPath: resolvedInput,
+            inboundFallbackDir: "media/inbound",
+          })
+        : {
+            resolved: resolvedInput.startsWith("file://")
+              ? resolvedInput.slice("file://".length)
+              : resolvedInput,
+          };
+    const resolvedPath = isDataUrl ? null : resolvedPathInfo.resolved;
+    const localRoots = resolveMediaToolLocalRoots(
+      params.workspaceDir,
+      {
+        workspaceOnly: params.sandboxConfig?.workspaceOnly === true,
+      },
+      resolvedPath ? [resolvedPath] : undefined,
+    );
+    const media = isDataUrl
+      ? params.expectedKind === "image"
+        ? decodeDataUrl(resolvedInput)
+        : (() => {
+            throw new ToolInputError("Video data: URLs are not supported for video_generate.");
+          })()
+      : params.sandboxConfig
+        ? await loadWebMedia(resolvedPath ?? resolvedInput, {
+            maxBytes: params.maxBytes,
+            sandboxValidated: true,
+            readFile: createSandboxBridgeReadFile({ sandbox: params.sandboxConfig }),
+          })
+        : await loadWebMedia(resolvedPath ?? resolvedInput, {
+            maxBytes: params.maxBytes,
+            localRoots,
+          });
+    if (media.kind !== params.expectedKind) {
+      throw new ToolInputError(`Unsupported media type: ${media.kind ?? "unknown"}`);
+    }
+    const mimeType = "mimeType" in media ? media.mimeType : media.contentType;
+    const fileName = "fileName" in media ? media.fileName : undefined;
+    loaded.push({
+      sourceAsset: {
+        buffer: media.buffer,
+        mimeType,
+        fileName,
+      },
+      resolvedInput,
+      ...(resolvedPathInfo.rewrittenFrom ? { rewrittenFrom: resolvedPathInfo.rewrittenFrom } : {}),
+    });
+  }
+
+  return loaded;
+}
+
+export function createVideoGenerateTool(options?: {
+  config?: OpenClawConfig;
+  agentDir?: string;
+  workspaceDir?: string;
+  sandbox?: VideoGenerateSandboxConfig;
+  fsPolicy?: ToolFsPolicy;
+}): AnyAgentTool | null {
+  const cfg: OpenClawConfig = options?.config ?? loadConfig();
+  const videoGenerationModelConfig = resolveVideoGenerationModelConfigForTool({
+    cfg,
+    agentDir: options?.agentDir,
+  });
+  if (!videoGenerationModelConfig) {
+    return null;
+  }
+
+  const sandboxConfig = options?.sandbox
+    ? {
+        root: options.sandbox.root,
+        bridge: options.sandbox.bridge,
+        workspaceOnly: options.fsPolicy?.workspaceOnly === true,
+      }
+    : null;
+
+  return {
+    label: "Video Generation",
+    name: "video_generate",
+    displaySummary: "Generate videos",
+    description:
+      "Generate videos using configured providers. Generated videos are saved under OpenClaw-managed media storage and delivered automatically as attachments.",
+    parameters: VideoGenerateToolSchema,
+    execute: async (_toolCallId, rawArgs) => {
+      const args = rawArgs as Record<string, unknown>;
+      const action = resolveAction(args);
+      const effectiveCfg =
+        applyVideoGenerationModelConfigDefaults(cfg, videoGenerationModelConfig) ?? cfg;
+
+      if (action === "list") {
+        const providers = listRuntimeVideoGenerationProviders({ config: effectiveCfg });
+        if (providers.length === 0) {
+          return {
+            content: [{ type: "text", text: "No video-generation providers are registered." }],
+            details: { providers: [] },
+          };
+        }
+        const lines = providers.map((provider) => {
+          const authHints = getVideoGenerationProviderAuthEnvVars(provider.id);
+          const capabilities = [
+            provider.capabilities.maxVideos ? `maxVideos=${provider.capabilities.maxVideos}` : null,
+            provider.capabilities.maxInputImages
+              ? `maxInputImages=${provider.capabilities.maxInputImages}`
+              : null,
+            provider.capabilities.maxInputVideos
+              ? `maxInputVideos=${provider.capabilities.maxInputVideos}`
+              : null,
+            provider.capabilities.maxDurationSeconds
+              ? `maxDurationSeconds=${provider.capabilities.maxDurationSeconds}`
+              : null,
+            provider.capabilities.supportsResolution ? "resolution" : null,
+            provider.capabilities.supportsAspectRatio ? "aspectRatio" : null,
+            provider.capabilities.supportsSize ? "size" : null,
+            provider.capabilities.supportsAudio ? "audio" : null,
+            provider.capabilities.supportsWatermark ? "watermark" : null,
+          ]
+            .filter((entry): entry is string => Boolean(entry))
+            .join(", ");
+          return [
+            `${provider.id}: default=${provider.defaultModel ?? "none"}`,
+            provider.models?.length ? `models=${provider.models.join(", ")}` : null,
+            capabilities ? `capabilities=${capabilities}` : null,
+            authHints.length > 0 ? `auth=${authHints.join(" / ")}` : null,
+          ]
+            .filter((entry): entry is string => Boolean(entry))
+            .join(" | ");
+        });
+        return {
+          content: [{ type: "text", text: lines.join("\n") }],
+          details: {
+            providers: providers.map((provider) => ({
+              id: provider.id,
+              defaultModel: provider.defaultModel,
+              models: provider.models ?? [],
+              authEnvVars: getVideoGenerationProviderAuthEnvVars(provider.id),
+              capabilities: provider.capabilities,
+            })),
+          },
+        };
+      }
+
+      const prompt = readStringParam(args, "prompt", { required: true });
+      const model = readStringParam(args, "model");
+      const filename = readStringParam(args, "filename");
+      const size = readStringParam(args, "size");
+      const aspectRatio = normalizeAspectRatio(readStringParam(args, "aspectRatio"));
+      const resolution = normalizeResolution(readStringParam(args, "resolution"));
+      const durationSeconds = readNumberParam(args, "durationSeconds", {
+        integer: true,
+        strict: true,
+      });
+      const audio = readBooleanParam(args, "audio");
+      const watermark = readBooleanParam(args, "watermark");
+      const imageInputs = normalizeReferenceInputs({
+        args,
+        singularKey: "image",
+        pluralKey: "images",
+        maxCount: MAX_INPUT_IMAGES,
+      });
+      const videoInputs = normalizeReferenceInputs({
+        args,
+        singularKey: "video",
+        pluralKey: "videos",
+        maxCount: MAX_INPUT_VIDEOS,
+      });
+
+      const selectedProvider = resolveSelectedVideoGenerationProvider({
+        config: effectiveCfg,
+        videoGenerationModelConfig,
+        modelOverride: model,
+      });
+      const loadedReferenceImages = await loadReferenceAssets({
+        inputs: imageInputs,
+        expectedKind: "image",
+        workspaceDir: options?.workspaceDir,
+        sandboxConfig,
+      });
+      const loadedReferenceVideos = await loadReferenceAssets({
+        inputs: videoInputs,
+        expectedKind: "video",
+        workspaceDir: options?.workspaceDir,
+        sandboxConfig,
+      });
+      validateVideoGenerationCapabilities({
+        provider: selectedProvider,
+        inputImageCount: loadedReferenceImages.length,
+        inputVideoCount: loadedReferenceVideos.length,
+        size,
+        aspectRatio,
+        resolution,
+        durationSeconds,
+        audio,
+        watermark,
+      });
+
+      const result = await generateVideo({
+        cfg: effectiveCfg,
+        prompt,
+        agentDir: options?.agentDir,
+        modelOverride: model,
+        size,
+        aspectRatio,
+        resolution,
+        durationSeconds,
+        audio,
+        watermark,
+        inputImages: loadedReferenceImages.map((entry) => entry.sourceAsset),
+        inputVideos: loadedReferenceVideos.map((entry) => entry.sourceAsset),
+      });
+      const savedVideos = await Promise.all(
+        result.videos.map((video) =>
+          saveMediaBuffer(
+            video.buffer,
+            video.mimeType,
+            "tool-video-generation",
+            undefined,
+            filename || video.fileName,
+          ),
+        ),
+      );
+      const lines = [
+        `Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
+        ...savedVideos.map((video) => `MEDIA:${video.path}`),
+      ];
+
+      return {
+        content: [{ type: "text", text: lines.join("\n") }],
+        details: {
+          provider: result.provider,
+          model: result.model,
+          count: savedVideos.length,
+          media: {
+            mediaUrls: savedVideos.map((video) => video.path),
+          },
+          paths: savedVideos.map((video) => video.path),
+          ...(loadedReferenceImages.length === 1
+            ? {
+                image: loadedReferenceImages[0]?.resolvedInput,
+                ...(loadedReferenceImages[0]?.rewrittenFrom
+                  ? { rewrittenFrom: loadedReferenceImages[0].rewrittenFrom }
+                  : {}),
+              }
+            : loadedReferenceImages.length > 1
+              ? {
+                  images: loadedReferenceImages.map((entry) => ({
+                    image: entry.resolvedInput,
+                    ...(entry.rewrittenFrom ? { rewrittenFrom: entry.rewrittenFrom } : {}),
+                  })),
+                }
+              : {}),
+          ...(loadedReferenceVideos.length === 1
+            ? {
+                video: loadedReferenceVideos[0]?.resolvedInput,
+                ...(loadedReferenceVideos[0]?.rewrittenFrom
+                  ? { videoRewrittenFrom: loadedReferenceVideos[0].rewrittenFrom }
+                  : {}),
+              }
+            : loadedReferenceVideos.length > 1
+              ? {
+                  videos: loadedReferenceVideos.map((entry) => ({
+                    video: entry.resolvedInput,
+                    ...(entry.rewrittenFrom ? { rewrittenFrom: entry.rewrittenFrom } : {}),
+                  })),
+                }
+              : {}),
+          ...(size ? { size } : {}),
+          ...(aspectRatio ? { aspectRatio } : {}),
+          ...(resolution ? { resolution } : {}),
+          ...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
+          ...(typeof audio === "boolean" ? { audio } : {}),
+          ...(typeof watermark === "boolean" ? { watermark } : {}),
+          ...(filename ? { filename } : {}),
+          attempts: result.attempts,
+          metadata: result.metadata,
+        },
+      };
+    },
+  };
+}
--- a/src/cli/config-cli.test.ts
+++ b/src/cli/config-cli.test.ts
@@ -242,6 +242,37 @@ describe("config cli", () => {
      expect(written.gateway?.auth).toEqual({ mode: "token" });
    });

+    it("writes agents.defaults.videoGenerationModel.primary without disturbing sibling defaults", async () => {
+      const resolved: OpenClawConfig = {
+        agents: {
+          defaults: {
+            model: "openai/gpt-5.4",
+            imageGenerationModel: {
+              primary: "openai/gpt-image-1",
+            },
+          },
+        },
+      };
+      setSnapshot(resolved, resolved);
+
+      await runConfigCommand([
+        "config",
+        "set",
+        "agents.defaults.videoGenerationModel.primary",
+        "qwen/wan2.6-t2v",
+      ]);
+
+      expect(mockWriteConfigFile).toHaveBeenCalledTimes(1);
+      const written = mockWriteConfigFile.mock.calls[0]?.[0];
+      expect(written.agents?.defaults?.model).toBe("openai/gpt-5.4");
+      expect(written.agents?.defaults?.imageGenerationModel).toEqual({
+        primary: "openai/gpt-image-1",
+      });
+      expect(written.agents?.defaults?.videoGenerationModel).toEqual({
+        primary: "qwen/wan2.6-t2v",
+      });
+    });
+
    it("drops gateway.auth.password when switching mode to token", async () => {
      const resolved: OpenClawConfig = {
        gateway: {
--- a/src/config/schema.base.generated.test.ts
+++ b/src/config/schema.base.generated.test.ts
@@ -40,4 +40,25 @@ describe("generated base config schema", () => {
    expect(hooksInternalProperties?.handlers).toBeUndefined();
    expect(uiHints["hooks.internal.handlers"]).toBeUndefined();
  });
+
+  it("includes videoGenerationModel in the public schema payload", () => {
+    const agentDefaultsProperties = (
+      GENERATED_BASE_CONFIG_SCHEMA.schema as {
+        properties?: {
+          agents?: {
+            properties?: {
+              defaults?: {
+                properties?: Record<string, unknown>;
+              };
+            };
+          };
+        };
+      }
+    ).properties?.agents?.properties?.defaults?.properties;
+    const uiHints = GENERATED_BASE_CONFIG_SCHEMA.uiHints as Record<string, unknown>;
+
+    expect(agentDefaultsProperties?.videoGenerationModel).toBeDefined();
+    expect(uiHints["agents.defaults.videoGenerationModel.primary"]).toBeDefined();
+    expect(uiHints["agents.defaults.videoGenerationModel.fallbacks"]).toBeDefined();
+  });
 });
--- a/src/config/zod-schema.agent-defaults.test.ts
+++ b/src/config/zod-schema.agent-defaults.test.ts
@@ -11,4 +11,15 @@ describe("agent defaults schema", () => {
      }),
    ).not.toThrow();
  });
+
+  it("accepts videoGenerationModel", () => {
+    expect(() =>
+      AgentDefaultsSchema.parse({
+        videoGenerationModel: {
+          primary: "qwen/wan2.6-t2v",
+          fallbacks: ["minimax/video-01"],
+        },
+      }),
+    ).not.toThrow();
+  });
 });