From c3aeb71f74f8df1161e33f6ada91bc0f12668cd5 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 11 Apr 2026 02:57:47 +0100
Subject: [PATCH] feat(fal): add HeyGen video-agent model

---
 docs/providers/fal.md                         | 16 ++++
 docs/tools/video-generation.md                | 42 ++++++---
 extensions/fal/index.ts                       |  8 +-
 extensions/fal/openclaw.plugin.json           |  2 +-
 .../fal/video-generation-provider.test.ts     | 88 +++++++++++++++++++
 extensions/fal/video-generation-provider.ts   |  8 +-
 6 files changed, 144 insertions(+), 20 deletions(-)

diff --git a/docs/providers/fal.md b/docs/providers/fal.md
index 1eb70c1c935..1ae888cce2f 100644
--- a/docs/providers/fal.md
+++ b/docs/providers/fal.md
@@ -69,6 +69,8 @@ The bundled `fal` video-generation provider defaults to
 
 - Modes: text-to-video and single-image reference flows
 - Runtime: queue-backed submit/status/result flow for long-running jobs
+- HeyGen video-agent model ref:
+  - `fal/fal-ai/heygen/v2/video-agent`
 - Seedance 2.0 model refs:
   - `fal/bytedance/seedance-2.0/fast/text-to-video`
   - `fal/bytedance/seedance-2.0/fast/image-to-video`
@@ -89,6 +91,20 @@ To use Seedance 2.0 as the default video model:
 }
 ```
 
+To use HeyGen video-agent as the default video model:
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: {
+        primary: "fal/fal-ai/heygen/v2/video-agent",
+      },
+    },
+  },
+}
+```
+
 ## Related
 
 - [Image Generation](/tools/image-generation)
diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md
index e60bd614f84..ee77c800f73 100644
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -201,6 +201,20 @@ entries.
 }
 ```
 
+HeyGen video-agent on fal can be pinned with:
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: {
+        primary: "fal/fal-ai/heygen/v2/video-agent",
+      },
+    },
+  },
+}
+```
+
 Seedance 2.0 on fal can be pinned with:
 
 ```json5
@@ -217,20 +231,20 @@ Seedance 2.0 on fal can be pinned with:
 
 ## Provider notes
 
-| Provider | Notes                                                                                                                                                       |
-| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Alibaba  | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs.                                                      |
-| BytePlus | Single image reference only.                                                                                                                                |
-| ComfyUI  | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph.                                           |
-| fal      | Uses queue-backed flow for long-running jobs. Single image reference only. Includes Seedance 2.0 text-to-video and image-to-video model refs.               |
-| Google   | Uses Gemini/Veo. Supports one image or one video reference.                                                                                                 |
-| MiniMax  | Single image reference only.                                                                                                                                |
-| OpenAI   | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning.                    |
-| Qwen     | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront.                                        |
-| Runway   | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios.                     |
-| Together | Single image reference only.                                                                                                                                |
-| Vydra    | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. |
-| xAI      | Supports text-to-video, image-to-video, and remote video edit/extend flows.                                                                                 |
+| Provider | Notes                                                                                                                                                                |
+| -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Alibaba  | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs.                                                               |
+| BytePlus | Single image reference only.                                                                                                                                         |
+| ComfyUI  | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph.                                                    |
+| fal      | Uses queue-backed flow for long-running jobs. Single image reference only. Includes HeyGen video-agent and Seedance 2.0 text-to-video and image-to-video model refs. |
+| Google   | Uses Gemini/Veo. Supports one image or one video reference.                                                                                                          |
+| MiniMax  | Single image reference only.                                                                                                                                         |
+| OpenAI   | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning.                             |
+| Qwen     | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront.                                                 |
+| Runway   | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios.                              |
+| Together | Single image reference only.                                                                                                                                         |
+| Vydra    | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL.          |
+| xAI      | Supports text-to-video, image-to-video, and remote video edit/extend flows.                                                                                          |
 
 ## Provider capability modes
 
diff --git a/extensions/fal/index.ts b/extensions/fal/index.ts
index 6a370c8bf10..87cedfadfc2 100644
--- a/extensions/fal/index.ts
+++ b/extensions/fal/index.ts
@@ -9,7 +9,7 @@ const PROVIDER_ID = "fal";
 export default definePluginEntry({
   id: PROVIDER_ID,
   name: "fal Provider",
-  description: "Bundled fal image generation provider",
+  description: "Bundled fal image and video generation provider",
   register(api) {
     api.registerProvider({
       id: PROVIDER_ID,
@@ -21,7 +21,7 @@ export default definePluginEntry({
           providerId: PROVIDER_ID,
           methodId: "api-key",
           label: "fal API key",
-          hint: "Image generation API key",
+          hint: "Image and video generation API key",
           optionKey: "falApiKey",
           flagName: "--fal-api-key",
           envVar: "FAL_KEY",
@@ -32,10 +32,10 @@ export default definePluginEntry({
           wizard: {
             choiceId: "fal-api-key",
             choiceLabel: "fal API key",
-            choiceHint: "Image generation API key",
+            choiceHint: "Image and video generation API key",
             groupId: "fal",
             groupLabel: "fal",
-            groupHint: "Image generation",
+            groupHint: "Image and video generation",
             onboardingScopes: ["image-generation"],
           },
         }),
diff --git a/extensions/fal/openclaw.plugin.json b/extensions/fal/openclaw.plugin.json
index 2aaa36d8bff..ae3b061e7ce 100644
--- a/extensions/fal/openclaw.plugin.json
+++ b/extensions/fal/openclaw.plugin.json
@@ -13,7 +13,7 @@
       "choiceLabel": "fal API key",
       "groupId": "fal",
       "groupLabel": "fal",
-      "groupHint": "Image generation",
+      "groupHint": "Image and video generation",
       "onboardingScopes": ["image-generation"],
       "optionKey": "falApiKey",
       "cliFlag": "--fal-api-key",
diff --git a/extensions/fal/video-generation-provider.test.ts b/extensions/fal/video-generation-provider.test.ts
index 42dd607fdf0..35ca92b9b82 100644
--- a/extensions/fal/video-generation-provider.test.ts
+++ b/extensions/fal/video-generation-provider.test.ts
@@ -121,6 +121,7 @@ describe("fal video generation provider", () => {
 
     expect(provider.models).toEqual(
       expect.arrayContaining([
+        "fal-ai/heygen/v2/video-agent",
         "bytedance/seedance-2.0/fast/text-to-video",
         "bytedance/seedance-2.0/fast/image-to-video",
         "bytedance/seedance-2.0/text-to-video",
@@ -129,6 +130,93 @@ describe("fal video generation provider", () => {
     );
   });
 
+  it("submits HeyGen video-agent requests without unsupported fal controls", async () => {
+    vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
+      apiKey: "fal-key",
+      source: "env",
+      mode: "api-key",
+    });
+    vi.spyOn(providerHttp, "resolveProviderHttpRequestConfig").mockReturnValue({
+      baseUrl: "https://fal.run",
+      allowPrivateNetwork: false,
+      headers: new Headers({
+        Authorization: "Key fal-key",
+        "Content-Type": "application/json",
+      }),
+      dispatcherPolicy: undefined,
+      requestConfig: createMockRequestConfig(),
+    });
+    vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined);
+    _setFalVideoFetchGuardForTesting(fetchGuardMock as never);
+    fetchGuardMock
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            request_id: "heygen-req-123",
+            status_url:
+              "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123/status",
+            response_url:
+              "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123",
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            status: "COMPLETED",
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            status: "COMPLETED",
+            response: {
+              video: { url: "https://fal.run/files/heygen.mp4" },
+            },
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          headers: new Headers({ "content-type": "video/mp4" }),
+          arrayBuffer: async () => Buffer.from("heygen-mp4-bytes"),
+        },
+        release: vi.fn(async () => {}),
+      });
+
+    const provider = buildFalVideoGenerationProvider();
+    const result = await provider.generateVideo({
+      provider: "fal",
+      model: "fal-ai/heygen/v2/video-agent",
+      prompt: "A founder explains OpenClaw in a concise studio video",
+      durationSeconds: 8,
+      aspectRatio: "16:9",
+      resolution: "720P",
+      audio: true,
+      cfg: {},
+    });
+
+    expect(fetchGuardMock).toHaveBeenNthCalledWith(
+      1,
+      expect.objectContaining({
+        url: "https://queue.fal.run/fal-ai/heygen/v2/video-agent",
+      }),
+    );
+    const submitBody = JSON.parse(
+      String(fetchGuardMock.mock.calls[0]?.[0]?.init?.body ?? "{}"),
+    ) as Record<string, unknown>;
+    expect(submitBody).toEqual({
+      prompt: "A founder explains OpenClaw in a concise studio video",
+    });
+    expect(result.metadata).toEqual({
+      requestId: "heygen-req-123",
+    });
+  });
+
   it("submits Seedance 2 requests with fal schema fields", async () => {
     vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
       apiKey: "fal-key",
diff --git a/extensions/fal/video-generation-provider.ts b/extensions/fal/video-generation-provider.ts
index ef2ddc25d79..116a52ec646 100644
--- a/extensions/fal/video-generation-provider.ts
+++ b/extensions/fal/video-generation-provider.ts
@@ -22,6 +22,7 @@ import type {
 const DEFAULT_FAL_BASE_URL = "https://fal.run";
 const DEFAULT_FAL_QUEUE_BASE_URL = "https://queue.fal.run";
 const DEFAULT_FAL_VIDEO_MODEL = "fal-ai/minimax/video-01-live";
+const HEYGEN_VIDEO_AGENT_MODEL = "fal-ai/heygen/v2/video-agent";
 const SEEDANCE_2_VIDEO_MODELS = [
   "bytedance/seedance-2.0/fast/text-to-video",
   "bytedance/seedance-2.0/fast/image-to-video",
@@ -126,6 +127,10 @@ function isFalSeedance2Model(model: string): boolean {
   return SEEDANCE_2_VIDEO_MODELS.includes(model as (typeof SEEDANCE_2_VIDEO_MODELS)[number]);
 }
 
+function isFalHeyGenVideoAgentModel(model: string): boolean {
+  return normalizeLowercaseStringOrEmpty(model) === HEYGEN_VIDEO_AGENT_MODEL;
+}
+
 function resolveFalResolution(resolution: VideoGenerationRequest["resolution"], model: string) {
   if (!resolution) {
     return undefined;
@@ -168,7 +173,7 @@ function buildFalVideoRequestBody(params: {
   // MiniMax Live on fal currently documents prompt + optional image_url only.
   // Keep the default model conservative so queue requests do not hang behind
   // unsupported knobs such as duration/resolution/aspect-ratio overrides.
-  if (isFalMiniMaxLiveModel(params.model)) {
+  if (isFalMiniMaxLiveModel(params.model) || isFalHeyGenVideoAgentModel(params.model)) {
     return requestBody;
   }
   const aspectRatio = normalizeOptionalString(params.req.aspectRatio);
@@ -285,6 +290,7 @@ export function buildFalVideoGenerationProvider(): VideoGenerationProvider {
     defaultModel: DEFAULT_FAL_VIDEO_MODEL,
     models: [
       DEFAULT_FAL_VIDEO_MODEL,
+      HEYGEN_VIDEO_AGENT_MODEL,
       ...SEEDANCE_2_VIDEO_MODELS,
       "fal-ai/kling-video/v2.1/master/text-to-video",
       "fal-ai/wan/v2.2-a14b/text-to-video",