From c3aeb71f74f8df1161e33f6ada91bc0f12668cd5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 11 Apr 2026 02:57:47 +0100 Subject: [PATCH] feat(fal): add HeyGen video-agent model --- docs/providers/fal.md | 16 ++++ docs/tools/video-generation.md | 42 ++++++--- extensions/fal/index.ts | 8 +- extensions/fal/openclaw.plugin.json | 2 +- .../fal/video-generation-provider.test.ts | 88 +++++++++++++++++++ extensions/fal/video-generation-provider.ts | 8 +- 6 files changed, 144 insertions(+), 20 deletions(-) diff --git a/docs/providers/fal.md b/docs/providers/fal.md index 1eb70c1c935..1ae888cce2f 100644 --- a/docs/providers/fal.md +++ b/docs/providers/fal.md @@ -69,6 +69,8 @@ The bundled `fal` video-generation provider defaults to - Modes: text-to-video and single-image reference flows - Runtime: queue-backed submit/status/result flow for long-running jobs +- HeyGen video-agent model ref: + - `fal/fal-ai/heygen/v2/video-agent` - Seedance 2.0 model refs: - `fal/bytedance/seedance-2.0/fast/text-to-video` - `fal/bytedance/seedance-2.0/fast/image-to-video` @@ -89,6 +91,20 @@ To use Seedance 2.0 as the default video model: } ``` +To use HeyGen video-agent as the default video model: + +```json5 +{ + agents: { + defaults: { + videoGenerationModel: { + primary: "fal/fal-ai/heygen/v2/video-agent", + }, + }, + }, +} +``` + ## Related - [Image Generation](/tools/image-generation) diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index e60bd614f84..ee77c800f73 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -201,6 +201,20 @@ entries. } ``` +HeyGen video-agent on fal can be pinned with: + +```json5 +{ + agents: { + defaults: { + videoGenerationModel: { + primary: "fal/fal-ai/heygen/v2/video-agent", + }, + }, + }, +} +``` + Seedance 2.0 on fal can be pinned with: ```json5 @@ -217,20 +231,20 @@ Seedance 2.0 on fal can be pinned with: ## Provider notes -| Provider | Notes | -| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Alibaba | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs. | -| BytePlus | Single image reference only. | -| ComfyUI | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph. | -| fal | Uses queue-backed flow for long-running jobs. Single image reference only. Includes Seedance 2.0 text-to-video and image-to-video model refs. | -| Google | Uses Gemini/Veo. Supports one image or one video reference. | -| MiniMax | Single image reference only. | -| OpenAI | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning. | -| Qwen | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront. | -| Runway | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios. | -| Together | Single image reference only. | -| Vydra | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. | -| xAI | Supports text-to-video, image-to-video, and remote video edit/extend flows. | +| Provider | Notes | +| -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Alibaba | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs. | +| BytePlus | Single image reference only. | +| ComfyUI | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph. | +| fal | Uses queue-backed flow for long-running jobs. Single image reference only. Includes HeyGen video-agent and Seedance 2.0 text-to-video and image-to-video model refs. | +| Google | Uses Gemini/Veo. Supports one image or one video reference. | +| MiniMax | Single image reference only. | +| OpenAI | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning. | +| Qwen | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront. | +| Runway | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios. | +| Together | Single image reference only. | +| Vydra | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. | +| xAI | Supports text-to-video, image-to-video, and remote video edit/extend flows. | ## Provider capability modes diff --git a/extensions/fal/index.ts b/extensions/fal/index.ts index 6a370c8bf10..87cedfadfc2 100644 --- a/extensions/fal/index.ts +++ b/extensions/fal/index.ts @@ -9,7 +9,7 @@ const PROVIDER_ID = "fal"; export default definePluginEntry({ id: PROVIDER_ID, name: "fal Provider", - description: "Bundled fal image generation provider", + description: "Bundled fal image and video generation provider", register(api) { api.registerProvider({ id: PROVIDER_ID, @@ -21,7 +21,7 @@ export default definePluginEntry({ providerId: PROVIDER_ID, methodId: "api-key", label: "fal API key", - hint: "Image generation API key", + hint: "Image and video generation API key", optionKey: "falApiKey", flagName: "--fal-api-key", envVar: "FAL_KEY", @@ -32,10 +32,10 @@ export default definePluginEntry({ wizard: { choiceId: "fal-api-key", choiceLabel: "fal API key", - choiceHint: "Image generation API key", + choiceHint: "Image and video generation API key", groupId: "fal", groupLabel: "fal", - groupHint: "Image generation", + groupHint: "Image and video generation", onboardingScopes: ["image-generation"], }, }), diff --git a/extensions/fal/openclaw.plugin.json b/extensions/fal/openclaw.plugin.json index 2aaa36d8bff..ae3b061e7ce 100644 --- a/extensions/fal/openclaw.plugin.json +++ b/extensions/fal/openclaw.plugin.json @@ -13,7 +13,7 @@ "choiceLabel": "fal API key", "groupId": "fal", "groupLabel": "fal", - "groupHint": "Image generation", + "groupHint": "Image and video generation", "onboardingScopes": ["image-generation"], "optionKey": "falApiKey", "cliFlag": "--fal-api-key", diff --git a/extensions/fal/video-generation-provider.test.ts b/extensions/fal/video-generation-provider.test.ts index 42dd607fdf0..35ca92b9b82 100644 --- a/extensions/fal/video-generation-provider.test.ts +++ b/extensions/fal/video-generation-provider.test.ts @@ -121,6 +121,7 @@ describe("fal video generation provider", () => { expect(provider.models).toEqual( expect.arrayContaining([ + "fal-ai/heygen/v2/video-agent", "bytedance/seedance-2.0/fast/text-to-video", "bytedance/seedance-2.0/fast/image-to-video", "bytedance/seedance-2.0/text-to-video", @@ -129,6 +130,93 @@ describe("fal video generation provider", () => { ); }); + it("submits HeyGen video-agent requests without unsupported fal controls", async () => { + vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "fal-key", + source: "env", + mode: "api-key", + }); + vi.spyOn(providerHttp, "resolveProviderHttpRequestConfig").mockReturnValue({ + baseUrl: "https://fal.run", + allowPrivateNetwork: false, + headers: new Headers({ + Authorization: "Key fal-key", + "Content-Type": "application/json", + }), + dispatcherPolicy: undefined, + requestConfig: createMockRequestConfig(), + }); + vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined); + _setFalVideoFetchGuardForTesting(fetchGuardMock as never); + fetchGuardMock + .mockResolvedValueOnce({ + response: { + json: async () => ({ + request_id: "heygen-req-123", + status_url: + "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123/status", + response_url: + "https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123", + }), + }, + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: { + json: async () => ({ + status: "COMPLETED", + }), + }, + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: { + json: async () => ({ + status: "COMPLETED", + response: { + video: { url: "https://fal.run/files/heygen.mp4" }, + }, + }), + }, + release: vi.fn(async () => {}), + }) + .mockResolvedValueOnce({ + response: { + headers: new Headers({ "content-type": "video/mp4" }), + arrayBuffer: async () => Buffer.from("heygen-mp4-bytes"), + }, + release: vi.fn(async () => {}), + }); + + const provider = buildFalVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "fal", + model: "fal-ai/heygen/v2/video-agent", + prompt: "A founder explains OpenClaw in a concise studio video", + durationSeconds: 8, + aspectRatio: "16:9", + resolution: "720P", + audio: true, + cfg: {}, + }); + + expect(fetchGuardMock).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + url: "https://queue.fal.run/fal-ai/heygen/v2/video-agent", + }), + ); + const submitBody = JSON.parse( + String(fetchGuardMock.mock.calls[0]?.[0]?.init?.body ?? "{}"), + ) as Record; + expect(submitBody).toEqual({ + prompt: "A founder explains OpenClaw in a concise studio video", + }); + expect(result.metadata).toEqual({ + requestId: "heygen-req-123", + }); + }); + it("submits Seedance 2 requests with fal schema fields", async () => { vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({ apiKey: "fal-key", diff --git a/extensions/fal/video-generation-provider.ts b/extensions/fal/video-generation-provider.ts index ef2ddc25d79..116a52ec646 100644 --- a/extensions/fal/video-generation-provider.ts +++ b/extensions/fal/video-generation-provider.ts @@ -22,6 +22,7 @@ import type { const DEFAULT_FAL_BASE_URL = "https://fal.run"; const DEFAULT_FAL_QUEUE_BASE_URL = "https://queue.fal.run"; const DEFAULT_FAL_VIDEO_MODEL = "fal-ai/minimax/video-01-live"; +const HEYGEN_VIDEO_AGENT_MODEL = "fal-ai/heygen/v2/video-agent"; const SEEDANCE_2_VIDEO_MODELS = [ "bytedance/seedance-2.0/fast/text-to-video", "bytedance/seedance-2.0/fast/image-to-video", @@ -126,6 +127,10 @@ function isFalSeedance2Model(model: string): boolean { return SEEDANCE_2_VIDEO_MODELS.includes(model as (typeof SEEDANCE_2_VIDEO_MODELS)[number]); } +function isFalHeyGenVideoAgentModel(model: string): boolean { + return normalizeLowercaseStringOrEmpty(model) === HEYGEN_VIDEO_AGENT_MODEL; +} + function resolveFalResolution(resolution: VideoGenerationRequest["resolution"], model: string) { if (!resolution) { return undefined; @@ -168,7 +173,7 @@ function buildFalVideoRequestBody(params: { // MiniMax Live on fal currently documents prompt + optional image_url only. // Keep the default model conservative so queue requests do not hang behind // unsupported knobs such as duration/resolution/aspect-ratio overrides. - if (isFalMiniMaxLiveModel(params.model)) { + if (isFalMiniMaxLiveModel(params.model) || isFalHeyGenVideoAgentModel(params.model)) { return requestBody; } const aspectRatio = normalizeOptionalString(params.req.aspectRatio); @@ -285,6 +290,7 @@ export function buildFalVideoGenerationProvider(): VideoGenerationProvider { defaultModel: DEFAULT_FAL_VIDEO_MODEL, models: [ DEFAULT_FAL_VIDEO_MODEL, + HEYGEN_VIDEO_AGENT_MODEL, ...SEEDANCE_2_VIDEO_MODELS, "fal-ai/kling-video/v2.1/master/text-to-video", "fal-ai/wan/v2.2-a14b/text-to-video",