feat(fal): add HeyGen video-agent model

2026-04-12 01:31:08 +00:00 · 2026-04-11 02:57:47 +01:00
parent c40d2a424d
commit c3aeb71f74
6 changed files with 144 additions and 20 deletions
--- a/docs/providers/fal.md
+++ b/docs/providers/fal.md
@@ -69,6 +69,8 @@ The bundled `fal` video-generation provider defaults to

 - Modes: text-to-video and single-image reference flows
 - Runtime: queue-backed submit/status/result flow for long-running jobs
+- HeyGen video-agent model ref:
+  - `fal/fal-ai/heygen/v2/video-agent`
 - Seedance 2.0 model refs:
  - `fal/bytedance/seedance-2.0/fast/text-to-video`
  - `fal/bytedance/seedance-2.0/fast/image-to-video`
@@ -89,6 +91,20 @@ To use Seedance 2.0 as the default video model:
 }
 ```

+To use HeyGen video-agent as the default video model:
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: {
+        primary: "fal/fal-ai/heygen/v2/video-agent",
+      },
+    },
+  },
+}
+```
+
 ## Related

 - [Image Generation](/tools/image-generation)
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -201,6 +201,20 @@ entries.
 }
 ```

+HeyGen video-agent on fal can be pinned with:
+
+```json5
+{
+  agents: {
+    defaults: {
+      videoGenerationModel: {
+        primary: "fal/fal-ai/heygen/v2/video-agent",
+      },
+    },
+  },
+}
+```
+
 Seedance 2.0 on fal can be pinned with:

 ```json5
@@ -217,20 +231,20 @@ Seedance 2.0 on fal can be pinned with:

 ## Provider notes

-| Provider | Notes                                                                                                                                                       |
-| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| Alibaba  | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs.                                                      |
-| BytePlus | Single image reference only.                                                                                                                                |
-| ComfyUI  | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph.                                           |
-| fal      | Uses queue-backed flow for long-running jobs. Single image reference only. Includes Seedance 2.0 text-to-video and image-to-video model refs.               |
-| Google   | Uses Gemini/Veo. Supports one image or one video reference.                                                                                                 |
-| MiniMax  | Single image reference only.                                                                                                                                |
-| OpenAI   | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning.                    |
-| Qwen     | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront.                                        |
-| Runway   | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios.                     |
-| Together | Single image reference only.                                                                                                                                |
-| Vydra    | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. |
-| xAI      | Supports text-to-video, image-to-video, and remote video edit/extend flows.                                                                                 |
+| Provider | Notes                                                                                                                                                                |
+| -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Alibaba  | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs.                                                               |
+| BytePlus | Single image reference only.                                                                                                                                         |
+| ComfyUI  | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph.                                                    |
+| fal      | Uses queue-backed flow for long-running jobs. Single image reference only. Includes HeyGen video-agent and Seedance 2.0 text-to-video and image-to-video model refs. |
+| Google   | Uses Gemini/Veo. Supports one image or one video reference.                                                                                                          |
+| MiniMax  | Single image reference only.                                                                                                                                         |
+| OpenAI   | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning.                             |
+| Qwen     | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront.                                                 |
+| Runway   | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios.                              |
+| Together | Single image reference only.                                                                                                                                         |
+| Vydra    | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL.          |
+| xAI      | Supports text-to-video, image-to-video, and remote video edit/extend flows.                                                                                          |

 ## Provider capability modes

--- a/extensions/fal/index.ts
+++ b/extensions/fal/index.ts
@@ -9,7 +9,7 @@ const PROVIDER_ID = "fal";
 export default definePluginEntry({
  id: PROVIDER_ID,
  name: "fal Provider",
-  description: "Bundled fal image generation provider",
+  description: "Bundled fal image and video generation provider",
  register(api) {
    api.registerProvider({
      id: PROVIDER_ID,
@@ -21,7 +21,7 @@ export default definePluginEntry({
          providerId: PROVIDER_ID,
          methodId: "api-key",
          label: "fal API key",
-          hint: "Image generation API key",
+          hint: "Image and video generation API key",
          optionKey: "falApiKey",
          flagName: "--fal-api-key",
          envVar: "FAL_KEY",
@@ -32,10 +32,10 @@ export default definePluginEntry({
          wizard: {
            choiceId: "fal-api-key",
            choiceLabel: "fal API key",
-            choiceHint: "Image generation API key",
+            choiceHint: "Image and video generation API key",
            groupId: "fal",
            groupLabel: "fal",
-            groupHint: "Image generation",
+            groupHint: "Image and video generation",
            onboardingScopes: ["image-generation"],
          },
        }),
--- a/extensions/fal/openclaw.plugin.json
+++ b/extensions/fal/openclaw.plugin.json
@@ -13,7 +13,7 @@
      "choiceLabel": "fal API key",
      "groupId": "fal",
      "groupLabel": "fal",
-      "groupHint": "Image generation",
+      "groupHint": "Image and video generation",
      "onboardingScopes": ["image-generation"],
      "optionKey": "falApiKey",
      "cliFlag": "--fal-api-key",
--- a/extensions/fal/video-generation-provider.test.ts
+++ b/extensions/fal/video-generation-provider.test.ts
@@ -121,6 +121,7 @@ describe("fal video generation provider", () => {

    expect(provider.models).toEqual(
      expect.arrayContaining([
+        "fal-ai/heygen/v2/video-agent",
        "bytedance/seedance-2.0/fast/text-to-video",
        "bytedance/seedance-2.0/fast/image-to-video",
        "bytedance/seedance-2.0/text-to-video",
@@ -129,6 +130,93 @@ describe("fal video generation provider", () => {
    );
  });

+  it("submits HeyGen video-agent requests without unsupported fal controls", async () => {
+    vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
+      apiKey: "fal-key",
+      source: "env",
+      mode: "api-key",
+    });
+    vi.spyOn(providerHttp, "resolveProviderHttpRequestConfig").mockReturnValue({
+      baseUrl: "https://fal.run",
+      allowPrivateNetwork: false,
+      headers: new Headers({
+        Authorization: "Key fal-key",
+        "Content-Type": "application/json",
+      }),
+      dispatcherPolicy: undefined,
+      requestConfig: createMockRequestConfig(),
+    });
+    vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined);
+    _setFalVideoFetchGuardForTesting(fetchGuardMock as never);
+    fetchGuardMock
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            request_id: "heygen-req-123",
+            status_url:
+              "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123/status",
+            response_url:
+              "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123",
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            status: "COMPLETED",
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          json: async () => ({
+            status: "COMPLETED",
+            response: {
+              video: { url: "https://fal.run/files/heygen.mp4" },
+            },
+          }),
+        },
+        release: vi.fn(async () => {}),
+      })
+      .mockResolvedValueOnce({
+        response: {
+          headers: new Headers({ "content-type": "video/mp4" }),
+          arrayBuffer: async () => Buffer.from("heygen-mp4-bytes"),
+        },
+        release: vi.fn(async () => {}),
+      });
+
+    const provider = buildFalVideoGenerationProvider();
+    const result = await provider.generateVideo({
+      provider: "fal",
+      model: "fal-ai/heygen/v2/video-agent",
+      prompt: "A founder explains OpenClaw in a concise studio video",
+      durationSeconds: 8,
+      aspectRatio: "16:9",
+      resolution: "720P",
+      audio: true,
+      cfg: {},
+    });
+
+    expect(fetchGuardMock).toHaveBeenNthCalledWith(
+      1,
+      expect.objectContaining({
+        url: "https://queue.fal.run/fal-ai/heygen/v2/video-agent",
+      }),
+    );
+    const submitBody = JSON.parse(
+      String(fetchGuardMock.mock.calls[0]?.[0]?.init?.body ?? "{}"),
+    ) as Record<string, unknown>;
+    expect(submitBody).toEqual({
+      prompt: "A founder explains OpenClaw in a concise studio video",
+    });
+    expect(result.metadata).toEqual({
+      requestId: "heygen-req-123",
+    });
+  });
+
  it("submits Seedance 2 requests with fal schema fields", async () => {
    vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
      apiKey: "fal-key",
--- a/extensions/fal/video-generation-provider.ts
+++ b/extensions/fal/video-generation-provider.ts
@@ -22,6 +22,7 @@ import type {
 const DEFAULT_FAL_BASE_URL = "https://fal.run";
 const DEFAULT_FAL_QUEUE_BASE_URL = "https://queue.fal.run";
 const DEFAULT_FAL_VIDEO_MODEL = "fal-ai/minimax/video-01-live";
+const HEYGEN_VIDEO_AGENT_MODEL = "fal-ai/heygen/v2/video-agent";
 const SEEDANCE_2_VIDEO_MODELS = [
  "bytedance/seedance-2.0/fast/text-to-video",
  "bytedance/seedance-2.0/fast/image-to-video",
@@ -126,6 +127,10 @@ function isFalSeedance2Model(model: string): boolean {
  return SEEDANCE_2_VIDEO_MODELS.includes(model as (typeof SEEDANCE_2_VIDEO_MODELS)[number]);
 }

+function isFalHeyGenVideoAgentModel(model: string): boolean {
+  return normalizeLowercaseStringOrEmpty(model) === HEYGEN_VIDEO_AGENT_MODEL;
+}
+
 function resolveFalResolution(resolution: VideoGenerationRequest["resolution"], model: string) {
  if (!resolution) {
    return undefined;
@@ -168,7 +173,7 @@ function buildFalVideoRequestBody(params: {
  // MiniMax Live on fal currently documents prompt + optional image_url only.
  // Keep the default model conservative so queue requests do not hang behind
  // unsupported knobs such as duration/resolution/aspect-ratio overrides.
-  if (isFalMiniMaxLiveModel(params.model)) {
+  if (isFalMiniMaxLiveModel(params.model) || isFalHeyGenVideoAgentModel(params.model)) {
    return requestBody;
  }
  const aspectRatio = normalizeOptionalString(params.req.aspectRatio);
@@ -285,6 +290,7 @@ export function buildFalVideoGenerationProvider(): VideoGenerationProvider {
    defaultModel: DEFAULT_FAL_VIDEO_MODEL,
    models: [
      DEFAULT_FAL_VIDEO_MODEL,
+      HEYGEN_VIDEO_AGENT_MODEL,
      ...SEEDANCE_2_VIDEO_MODELS,
      "fal-ai/kling-video/v2.1/master/text-to-video",
      "fal-ai/wan/v2.2-a14b/text-to-video",