feat(fal): add HeyGen video-agent model

This commit is contained in:
Peter Steinberger
2026-04-11 02:57:47 +01:00
parent c40d2a424d
commit c3aeb71f74
6 changed files with 144 additions and 20 deletions

View File

@@ -69,6 +69,8 @@ The bundled `fal` video-generation provider defaults to
- Modes: text-to-video and single-image reference flows
- Runtime: queue-backed submit/status/result flow for long-running jobs
- HeyGen video-agent model ref:
- `fal/fal-ai/heygen/v2/video-agent`
- Seedance 2.0 model refs:
- `fal/bytedance/seedance-2.0/fast/text-to-video`
- `fal/bytedance/seedance-2.0/fast/image-to-video`
@@ -89,6 +91,20 @@ To use Seedance 2.0 as the default video model:
}
```
To use HeyGen video-agent as the default video model:
```json5
{
agents: {
defaults: {
videoGenerationModel: {
primary: "fal/fal-ai/heygen/v2/video-agent",
},
},
},
}
```
## Related
- [Image Generation](/tools/image-generation)

View File

@@ -201,6 +201,20 @@ entries.
}
```
HeyGen video-agent on fal can be pinned with:
```json5
{
agents: {
defaults: {
videoGenerationModel: {
primary: "fal/fal-ai/heygen/v2/video-agent",
},
},
},
}
```
Seedance 2.0 on fal can be pinned with:
```json5
@@ -217,20 +231,20 @@ Seedance 2.0 on fal can be pinned with:
## Provider notes
| Provider | Notes |
| -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Alibaba | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs. |
| BytePlus | Single image reference only. |
| ComfyUI | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph. |
| fal | Uses queue-backed flow for long-running jobs. Single image reference only. Includes Seedance 2.0 text-to-video and image-to-video model refs. |
| Google | Uses Gemini/Veo. Supports one image or one video reference. |
| MiniMax | Single image reference only. |
| OpenAI | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning. |
| Qwen | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront. |
| Runway | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios. |
| Together | Single image reference only. |
| Vydra | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. |
| xAI | Supports text-to-video, image-to-video, and remote video edit/extend flows. |
| Provider | Notes |
| -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Alibaba | Uses DashScope/Model Studio async endpoint. Reference images and videos must be remote `http(s)` URLs. |
| BytePlus | Single image reference only. |
| ComfyUI | Workflow-driven local or cloud execution. Supports text-to-video and image-to-video through the configured graph. |
| fal | Uses queue-backed flow for long-running jobs. Single image reference only. Includes HeyGen video-agent and Seedance 2.0 text-to-video and image-to-video model refs. |
| Google | Uses Gemini/Veo. Supports one image or one video reference. |
| MiniMax | Single image reference only. |
| OpenAI | Only `size` override is forwarded. Other style overrides (`aspectRatio`, `resolution`, `audio`, `watermark`) are ignored with a warning. |
| Qwen | Same DashScope backend as Alibaba. Reference inputs must be remote `http(s)` URLs; local files are rejected upfront. |
| Runway | Supports local files via data URIs. Video-to-video requires `runway/gen4_aleph`. Text-only runs expose `16:9` and `9:16` aspect ratios. |
| Together | Single image reference only. |
| Vydra | Uses `https://www.vydra.ai/api/v1` directly to avoid auth-dropping redirects. `veo3` is bundled as text-to-video only; `kling` requires a remote image URL. |
| xAI | Supports text-to-video, image-to-video, and remote video edit/extend flows. |
## Provider capability modes

View File

@@ -9,7 +9,7 @@ const PROVIDER_ID = "fal";
export default definePluginEntry({
id: PROVIDER_ID,
name: "fal Provider",
description: "Bundled fal image generation provider",
description: "Bundled fal image and video generation provider",
register(api) {
api.registerProvider({
id: PROVIDER_ID,
@@ -21,7 +21,7 @@ export default definePluginEntry({
providerId: PROVIDER_ID,
methodId: "api-key",
label: "fal API key",
hint: "Image generation API key",
hint: "Image and video generation API key",
optionKey: "falApiKey",
flagName: "--fal-api-key",
envVar: "FAL_KEY",
@@ -32,10 +32,10 @@ export default definePluginEntry({
wizard: {
choiceId: "fal-api-key",
choiceLabel: "fal API key",
choiceHint: "Image generation API key",
choiceHint: "Image and video generation API key",
groupId: "fal",
groupLabel: "fal",
groupHint: "Image generation",
groupHint: "Image and video generation",
onboardingScopes: ["image-generation"],
},
}),

View File

@@ -13,7 +13,7 @@
"choiceLabel": "fal API key",
"groupId": "fal",
"groupLabel": "fal",
"groupHint": "Image generation",
"groupHint": "Image and video generation",
"onboardingScopes": ["image-generation"],
"optionKey": "falApiKey",
"cliFlag": "--fal-api-key",

View File

@@ -121,6 +121,7 @@ describe("fal video generation provider", () => {
expect(provider.models).toEqual(
expect.arrayContaining([
"fal-ai/heygen/v2/video-agent",
"bytedance/seedance-2.0/fast/text-to-video",
"bytedance/seedance-2.0/fast/image-to-video",
"bytedance/seedance-2.0/text-to-video",
@@ -129,6 +130,93 @@ describe("fal video generation provider", () => {
);
});
it("submits HeyGen video-agent requests without unsupported fal controls", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "fal-key",
source: "env",
mode: "api-key",
});
vi.spyOn(providerHttp, "resolveProviderHttpRequestConfig").mockReturnValue({
baseUrl: "https://fal.run",
allowPrivateNetwork: false,
headers: new Headers({
Authorization: "Key fal-key",
"Content-Type": "application/json",
}),
dispatcherPolicy: undefined,
requestConfig: createMockRequestConfig(),
});
vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined);
_setFalVideoFetchGuardForTesting(fetchGuardMock as never);
fetchGuardMock
.mockResolvedValueOnce({
response: {
json: async () => ({
request_id: "heygen-req-123",
status_url:
"https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123/status",
response_url:
"https://queue.fal.run/fal-ai/heygen/v2/video-agent/requests/heygen-req-123",
}),
},
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: {
json: async () => ({
status: "COMPLETED",
}),
},
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: {
json: async () => ({
status: "COMPLETED",
response: {
video: { url: "https://fal.run/files/heygen.mp4" },
},
}),
},
release: vi.fn(async () => {}),
})
.mockResolvedValueOnce({
response: {
headers: new Headers({ "content-type": "video/mp4" }),
arrayBuffer: async () => Buffer.from("heygen-mp4-bytes"),
},
release: vi.fn(async () => {}),
});
const provider = buildFalVideoGenerationProvider();
const result = await provider.generateVideo({
provider: "fal",
model: "fal-ai/heygen/v2/video-agent",
prompt: "A founder explains OpenClaw in a concise studio video",
durationSeconds: 8,
aspectRatio: "16:9",
resolution: "720P",
audio: true,
cfg: {},
});
expect(fetchGuardMock).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
url: "https://queue.fal.run/fal-ai/heygen/v2/video-agent",
}),
);
const submitBody = JSON.parse(
String(fetchGuardMock.mock.calls[0]?.[0]?.init?.body ?? "{}"),
) as Record<string, unknown>;
expect(submitBody).toEqual({
prompt: "A founder explains OpenClaw in a concise studio video",
});
expect(result.metadata).toEqual({
requestId: "heygen-req-123",
});
});
it("submits Seedance 2 requests with fal schema fields", async () => {
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "fal-key",

View File

@@ -22,6 +22,7 @@ import type {
const DEFAULT_FAL_BASE_URL = "https://fal.run";
const DEFAULT_FAL_QUEUE_BASE_URL = "https://queue.fal.run";
const DEFAULT_FAL_VIDEO_MODEL = "fal-ai/minimax/video-01-live";
const HEYGEN_VIDEO_AGENT_MODEL = "fal-ai/heygen/v2/video-agent";
const SEEDANCE_2_VIDEO_MODELS = [
"bytedance/seedance-2.0/fast/text-to-video",
"bytedance/seedance-2.0/fast/image-to-video",
@@ -126,6 +127,10 @@ function isFalSeedance2Model(model: string): boolean {
return SEEDANCE_2_VIDEO_MODELS.includes(model as (typeof SEEDANCE_2_VIDEO_MODELS)[number]);
}
function isFalHeyGenVideoAgentModel(model: string): boolean {
return normalizeLowercaseStringOrEmpty(model) === HEYGEN_VIDEO_AGENT_MODEL;
}
function resolveFalResolution(resolution: VideoGenerationRequest["resolution"], model: string) {
if (!resolution) {
return undefined;
@@ -168,7 +173,7 @@ function buildFalVideoRequestBody(params: {
// MiniMax Live on fal currently documents prompt + optional image_url only.
// Keep the default model conservative so queue requests do not hang behind
// unsupported knobs such as duration/resolution/aspect-ratio overrides.
if (isFalMiniMaxLiveModel(params.model)) {
if (isFalMiniMaxLiveModel(params.model) || isFalHeyGenVideoAgentModel(params.model)) {
return requestBody;
}
const aspectRatio = normalizeOptionalString(params.req.aspectRatio);
@@ -285,6 +290,7 @@ export function buildFalVideoGenerationProvider(): VideoGenerationProvider {
defaultModel: DEFAULT_FAL_VIDEO_MODEL,
models: [
DEFAULT_FAL_VIDEO_MODEL,
HEYGEN_VIDEO_AGENT_MODEL,
...SEEDANCE_2_VIDEO_MODELS,
"fal-ai/kling-video/v2.1/master/text-to-video",
"fal-ai/wan/v2.2-a14b/text-to-video",