mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-28 09:33:06 +00:00
feat(video): add provider support and discord fallback
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
---
|
||||
summary: "Generate videos using configured providers such as Qwen"
|
||||
summary: "Generate videos using configured providers such as OpenAI, Google, Qwen, and MiniMax"
|
||||
read_when:
|
||||
- Generating videos via the agent
|
||||
- Configuring video generation providers and models
|
||||
@@ -17,7 +17,7 @@ The tool only appears when at least one video-generation provider is available.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Set an API key for at least one provider (for example `QWEN_API_KEY`).
|
||||
1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `QWEN_API_KEY`).
|
||||
2. Optionally set your preferred model:
|
||||
|
||||
```json5
|
||||
@@ -36,9 +36,15 @@ The agent calls `video_generate` automatically. No tool allow-listing needed —
|
||||
|
||||
## Supported providers
|
||||
|
||||
| Provider | Default model | Reference inputs | API key |
|
||||
| -------- | ------------- | ---------------- | ---------------------------------------------------------- |
|
||||
| Qwen | `wan2.6-t2v` | Yes, remote URLs | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` |
|
||||
| Provider | Default model | Reference inputs | API key |
|
||||
| -------- | ------------------------------- | ------------------ | ---------------------------------------------------------- |
|
||||
| BytePlus | `seedance-1-0-lite-t2v-250428` | 1 image | `BYTEPLUS_API_KEY` |
|
||||
| fal | `fal-ai/minimax/video-01-live` | 1 image | `FAL_KEY` |
|
||||
| Google | `veo-3.1-fast-generate-preview` | 1 image or 1 video | `GEMINI_API_KEY`, `GOOGLE_API_KEY` |
|
||||
| MiniMax | `MiniMax-Hailuo-2.3` | 1 image | `MINIMAX_API_KEY` |
|
||||
| OpenAI | `sora-2` | 1 image or 1 video | `OPENAI_API_KEY` |
|
||||
| Qwen | `wan2.6-t2v` | Yes, remote URLs | `QWEN_API_KEY`, `MODELSTUDIO_API_KEY`, `DASHSCOPE_API_KEY` |
|
||||
| Together | `Wan-AI/Wan2.2-T2V-A14B` | 1 image | `TOGETHER_API_KEY` |
|
||||
|
||||
Use `action: "list"` to inspect available providers and models at runtime:
|
||||
|
||||
@@ -97,6 +103,13 @@ When generating a video, OpenClaw tries providers in this order:
|
||||
|
||||
If a provider fails, the next candidate is tried automatically. If all fail, the error includes details from each attempt.
|
||||
|
||||
## Provider notes
|
||||
|
||||
- OpenAI uses the native video endpoint and currently defaults to `sora-2`.
|
||||
- Google uses Gemini/Veo and supports a single image or video reference input.
|
||||
- MiniMax, Together, BytePlus, and fal currently support a single image reference input.
|
||||
- Qwen supports image/video references, but the upstream DashScope video endpoint currently requires remote `http(s)` URLs for those references.
|
||||
|
||||
## Qwen reference inputs
|
||||
|
||||
The bundled Qwen provider supports text-to-video plus image/video reference modes, but the upstream DashScope video endpoint currently requires **remote http(s) URLs** for reference inputs. Local file paths and uploaded buffers are rejected up front instead of being silently ignored.
|
||||
|
||||
@@ -3,6 +3,7 @@ import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-aut
|
||||
import { ensureModelAllowlistEntry } from "openclaw/plugin-sdk/provider-onboard";
|
||||
import { BYTEPLUS_CODING_MODEL_CATALOG, BYTEPLUS_MODEL_CATALOG } from "./models.js";
|
||||
import { buildBytePlusCodingProvider, buildBytePlusProvider } from "./provider-catalog.js";
|
||||
import { buildBytePlusVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "byteplus";
|
||||
const BYTEPLUS_DEFAULT_MODEL_REF = "byteplus-plan/ark-code-latest";
|
||||
@@ -78,5 +79,6 @@ export default definePluginEntry({
|
||||
return [...byteplusModels, ...byteplusPlanModels];
|
||||
},
|
||||
});
|
||||
api.registerVideoGenerationProvider(buildBytePlusVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
"cliDescription": "BytePlus API key"
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"videoGenerationProviders": ["byteplus"]
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
8
extensions/byteplus/plugin-registration.contract.test.ts
Normal file
8
extensions/byteplus/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "byteplus",
|
||||
providerIds: ["byteplus", "byteplus-plan"],
|
||||
videoGenerationProviderIds: ["byteplus"],
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
88
extensions/byteplus/video-generation-provider.test.ts
Normal file
88
extensions/byteplus/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildBytePlusVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "byteplus-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("byteplus video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates a content-generation task, polls, and downloads the video", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
id: "task_123",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
id: "task_123",
|
||||
status: "succeeded",
|
||||
content: {
|
||||
video_url: "https://example.com/byteplus.mp4",
|
||||
},
|
||||
model: "seedance-1-0-lite-t2v-250428",
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildBytePlusVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "byteplus",
|
||||
model: "seedance-1-0-lite-t2v-250428",
|
||||
prompt: "A lantern floats upward into the night sky",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://ark.ap-southeast.bytepluses.com/api/v3/contents/generations/tasks",
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
taskId: "task_123",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
253
extensions/byteplus/video-generation-provider.ts
Normal file
253
extensions/byteplus/video-generation-provider.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
import { BYTEPLUS_BASE_URL } from "./models.js";
|
||||
|
||||
const DEFAULT_BYTEPLUS_VIDEO_MODEL = "seedance-1-0-lite-t2v-250428";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 5_000;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
|
||||
type BytePlusTaskCreateResponse = {
|
||||
id?: string;
|
||||
};
|
||||
|
||||
type BytePlusTaskResponse = {
|
||||
id?: string;
|
||||
model?: string;
|
||||
status?: "running" | "failed" | "queued" | "succeeded" | "cancelled";
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
};
|
||||
content?: {
|
||||
video_url?: string;
|
||||
last_frame_url?: string;
|
||||
file_url?: string;
|
||||
};
|
||||
duration?: number;
|
||||
ratio?: string;
|
||||
resolution?: string;
|
||||
};
|
||||
|
||||
function resolveBytePlusVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
return req.cfg?.models?.providers?.byteplus?.baseUrl?.trim() || BYTEPLUS_BASE_URL;
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function resolveBytePlusImageUrl(req: VideoGenerationRequest): string | undefined {
|
||||
const input = req.inputImages?.[0];
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
if (input.url?.trim()) {
|
||||
return input.url.trim();
|
||||
}
|
||||
if (!input.buffer) {
|
||||
throw new Error("BytePlus reference image is missing image data.");
|
||||
}
|
||||
return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png");
|
||||
}
|
||||
|
||||
async function pollBytePlusTask(params: {
|
||||
taskId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<BytePlusTaskResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/contents/generations/tasks/${params.taskId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "BytePlus video status request failed");
|
||||
const payload = (await response.json()) as BytePlusTaskResponse;
|
||||
switch (payload.status?.trim()) {
|
||||
case "succeeded":
|
||||
return payload;
|
||||
case "failed":
|
||||
case "cancelled":
|
||||
throw new Error(payload.error?.message?.trim() || "BytePlus video generation failed");
|
||||
case "queued":
|
||||
case "running":
|
||||
default:
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(`BytePlus video generation task ${params.taskId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadBytePlusVideo(params: {
|
||||
url: string;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "BytePlus generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildBytePlusVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "byteplus",
|
||||
label: "BytePlus",
|
||||
defaultModel: DEFAULT_BYTEPLUS_VIDEO_MODEL,
|
||||
models: [
|
||||
DEFAULT_BYTEPLUS_VIDEO_MODEL,
|
||||
"seedance-1-0-lite-i2v-250428",
|
||||
"seedance-1-0-pro-250528",
|
||||
"seedance-1-5-pro-251215",
|
||||
],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "byteplus",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 0,
|
||||
maxDurationSeconds: 12,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsAudio: true,
|
||||
supportsWatermark: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("BytePlus video generation does not support video reference inputs.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "byteplus",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("BytePlus API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveBytePlusVideoBaseUrl(req),
|
||||
defaultBaseUrl: BYTEPLUS_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "byteplus",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const content: Array<Record<string, unknown>> = [{ type: "text", text: req.prompt }];
|
||||
const imageUrl = resolveBytePlusImageUrl(req);
|
||||
if (imageUrl) {
|
||||
content.push({
|
||||
type: "image_url",
|
||||
image_url: { url: imageUrl },
|
||||
role: "first_frame",
|
||||
});
|
||||
}
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model?.trim() || DEFAULT_BYTEPLUS_VIDEO_MODEL,
|
||||
content,
|
||||
};
|
||||
if (req.aspectRatio?.trim()) {
|
||||
body.ratio = req.aspectRatio.trim();
|
||||
}
|
||||
if (req.resolution) {
|
||||
body.resolution = req.resolution;
|
||||
}
|
||||
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
|
||||
body.duration = Math.max(1, Math.round(req.durationSeconds));
|
||||
}
|
||||
if (typeof req.audio === "boolean") {
|
||||
body.generate_audio = req.audio;
|
||||
}
|
||||
if (typeof req.watermark === "boolean") {
|
||||
body.watermark = req.watermark;
|
||||
}
|
||||
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/contents/generations/tasks`,
|
||||
headers,
|
||||
body,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "BytePlus video generation failed");
|
||||
const submitted = (await response.json()) as BytePlusTaskCreateResponse;
|
||||
const taskId = submitted.id?.trim();
|
||||
if (!taskId) {
|
||||
throw new Error("BytePlus video generation response missing task id");
|
||||
}
|
||||
const completed = await pollBytePlusTask({
|
||||
taskId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const videoUrl = completed.content?.video_url?.trim();
|
||||
if (!videoUrl) {
|
||||
throw new Error("BytePlus video generation completed without a video URL");
|
||||
}
|
||||
const video = await downloadBytePlusVideo({
|
||||
url: videoUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos: [video],
|
||||
model: completed.model ?? req.model ?? DEFAULT_BYTEPLUS_VIDEO_MODEL,
|
||||
metadata: {
|
||||
taskId,
|
||||
status: completed.status,
|
||||
videoUrl,
|
||||
ratio: completed.ratio,
|
||||
resolution: completed.resolution,
|
||||
duration: completed.duration,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -242,6 +242,44 @@ describe("deliverDiscordReply", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("sends text first and videos as a separate media-only follow-up", async () => {
|
||||
await deliverDiscordReply({
|
||||
replies: [
|
||||
{
|
||||
text: "done — i kicked off a 5s Molty clip",
|
||||
mediaUrls: ["/tmp/molty.mp4"],
|
||||
},
|
||||
],
|
||||
target: "channel:654",
|
||||
token: "token",
|
||||
runtime,
|
||||
cfg,
|
||||
textLimit: 2000,
|
||||
replyToId: "reply-1",
|
||||
});
|
||||
|
||||
expect(sendMessageDiscordMock).toHaveBeenCalledTimes(2);
|
||||
expect(sendMessageDiscordMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"channel:654",
|
||||
"done — i kicked off a 5s Molty clip",
|
||||
expect.objectContaining({
|
||||
token: "token",
|
||||
replyTo: "reply-1",
|
||||
}),
|
||||
);
|
||||
expect(sendMessageDiscordMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"channel:654",
|
||||
"",
|
||||
expect.objectContaining({
|
||||
token: "token",
|
||||
mediaUrl: "/tmp/molty.mp4",
|
||||
replyTo: "reply-1",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards cfg to Discord send helpers", async () => {
|
||||
await deliverDiscordReply({
|
||||
replies: [{ text: "cfg path" }],
|
||||
|
||||
@@ -40,6 +40,8 @@ export type DiscordThreadBindingLookup = {
|
||||
|
||||
type ResolvedRetryConfig = Required<RetryConfig>;
|
||||
|
||||
const DISCORD_VIDEO_MEDIA_EXTENSIONS = new Set([".avi", ".m4v", ".mkv", ".mov", ".mp4", ".webm"]);
|
||||
|
||||
const DISCORD_DELIVERY_RETRY_DEFAULTS: ResolvedRetryConfig = {
|
||||
attempts: 3,
|
||||
minDelayMs: 1000,
|
||||
@@ -75,6 +77,31 @@ function resolveDeliveryRetryConfig(retry?: RetryConfig): ResolvedRetryConfig {
|
||||
return resolveRetryConfig(DISCORD_DELIVERY_RETRY_DEFAULTS, retry);
|
||||
}
|
||||
|
||||
function normalizeMediaPathForExtension(mediaUrl: string): string {
|
||||
const trimmed = mediaUrl.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
try {
|
||||
const parsed = new URL(trimmed);
|
||||
return parsed.pathname.toLowerCase();
|
||||
} catch {
|
||||
const withoutHash = trimmed.split("#", 1)[0] ?? trimmed;
|
||||
const withoutQuery = withoutHash.split("?", 1)[0] ?? withoutHash;
|
||||
return withoutQuery.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
||||
function isLikelyDiscordVideoMedia(mediaUrl: string): boolean {
|
||||
const normalized = normalizeMediaPathForExtension(mediaUrl);
|
||||
for (const ext of DISCORD_VIDEO_MEDIA_EXTENSIONS) {
|
||||
if (normalized.endsWith(ext)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function sendWithRetry(
|
||||
fn: () => Promise<unknown>,
|
||||
retryConfig: ResolvedRetryConfig,
|
||||
@@ -402,6 +429,51 @@ export async function deliverDiscordReply(params: {
|
||||
continue;
|
||||
}
|
||||
|
||||
const shouldSplitVideoMediaReply =
|
||||
reply.text.trim().length > 0 &&
|
||||
reply.mediaUrls.some((mediaUrl) => isLikelyDiscordVideoMedia(mediaUrl));
|
||||
if (shouldSplitVideoMediaReply) {
|
||||
await sendDiscordChunkWithFallback({
|
||||
cfg: params.cfg,
|
||||
target: params.target,
|
||||
text: reply.text,
|
||||
token: params.token,
|
||||
rest: params.rest,
|
||||
accountId: params.accountId,
|
||||
maxLinesPerMessage: params.maxLinesPerMessage,
|
||||
replyTo: resolvePayloadReplyTo(),
|
||||
binding,
|
||||
chunkMode: params.chunkMode,
|
||||
username: persona.username,
|
||||
avatarUrl: persona.avatarUrl,
|
||||
channelId,
|
||||
request,
|
||||
retryConfig,
|
||||
});
|
||||
await sendMediaWithLeadingCaption({
|
||||
mediaUrls: reply.mediaUrls,
|
||||
caption: "",
|
||||
send: async ({ mediaUrl }) => {
|
||||
const replyTo = resolvePayloadReplyTo();
|
||||
await sendWithRetry(
|
||||
() =>
|
||||
sendMessageDiscord(params.target, "", {
|
||||
cfg: params.cfg,
|
||||
token: params.token,
|
||||
rest: params.rest,
|
||||
mediaUrl,
|
||||
accountId: params.accountId,
|
||||
mediaLocalRoots: params.mediaLocalRoots,
|
||||
replyTo,
|
||||
}),
|
||||
retryConfig,
|
||||
);
|
||||
},
|
||||
});
|
||||
deliveredAny = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
await sendMediaWithLeadingCaption({
|
||||
mediaUrls: reply.mediaUrls,
|
||||
caption: reply.text,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
|
||||
import { createProviderApiKeyAuthMethod } from "openclaw/plugin-sdk/provider-auth-api-key";
|
||||
import { buildFalImageGenerationProvider } from "./image-generation-provider.js";
|
||||
import { applyFalConfig, FAL_DEFAULT_IMAGE_MODEL_REF } from "./onboard.js";
|
||||
import { buildFalVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "fal";
|
||||
|
||||
@@ -41,5 +42,6 @@ export default definePluginEntry({
|
||||
],
|
||||
});
|
||||
api.registerImageGenerationProvider(buildFalImageGenerationProvider());
|
||||
api.registerVideoGenerationProvider(buildFalVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -22,7 +22,8 @@
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"imageGenerationProviders": ["fal"]
|
||||
"imageGenerationProviders": ["fal"],
|
||||
"videoGenerationProviders": ["fal"]
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
|
||||
10
extensions/fal/plugin-registration.contract.test.ts
Normal file
10
extensions/fal/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "fal",
|
||||
providerIds: ["fal"],
|
||||
imageGenerationProviderIds: ["fal"],
|
||||
videoGenerationProviderIds: ["fal"],
|
||||
requireGenerateImage: true,
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
68
extensions/fal/video-generation-provider.test.ts
Normal file
68
extensions/fal/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import * as providerAuth from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import * as providerHttp from "openclaw/plugin-sdk/provider-http";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
_setFalVideoFetchGuardForTesting,
|
||||
buildFalVideoGenerationProvider,
|
||||
} from "./video-generation-provider.js";
|
||||
|
||||
describe("fal video generation provider", () => {
|
||||
const fetchGuardMock = vi.fn();
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
fetchGuardMock.mockReset();
|
||||
_setFalVideoFetchGuardForTesting(null);
|
||||
});
|
||||
|
||||
it("posts to the model endpoint and downloads the returned video URL", async () => {
|
||||
vi.spyOn(providerAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "fal-key",
|
||||
source: "env",
|
||||
});
|
||||
vi.spyOn(providerHttp, "resolveProviderHttpRequestConfig").mockReturnValue({
|
||||
baseUrl: "https://fal.run",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers({
|
||||
Authorization: "Key fal-key",
|
||||
"Content-Type": "application/json",
|
||||
}),
|
||||
dispatcherPolicy: undefined,
|
||||
});
|
||||
vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined);
|
||||
_setFalVideoFetchGuardForTesting(fetchGuardMock as never);
|
||||
fetchGuardMock
|
||||
.mockResolvedValueOnce({
|
||||
response: {
|
||||
json: async () => ({
|
||||
video: { url: "https://fal.run/files/video.mp4" },
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
response: {
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildFalVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "fal",
|
||||
model: "fal-ai/minimax/video-01-live",
|
||||
prompt: "A spaceship emerges from the clouds",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(fetchGuardMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
expect.objectContaining({
|
||||
url: "https://fal.run/fal-ai/minimax/video-01-live",
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
});
|
||||
});
|
||||
184
extensions/fal/video-generation-provider.ts
Normal file
184
extensions/fal/video-generation-provider.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import {
|
||||
fetchWithSsrFGuard,
|
||||
type SsrFPolicy,
|
||||
ssrfPolicyFromDangerouslyAllowPrivateNetwork,
|
||||
} from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_FAL_BASE_URL = "https://fal.run";
|
||||
const DEFAULT_FAL_VIDEO_MODEL = "fal-ai/minimax/video-01-live";
|
||||
const DEFAULT_TIMEOUT_MS = 180_000;
|
||||
|
||||
type FalVideoResponse = {
|
||||
video?: {
|
||||
url?: string;
|
||||
content_type?: string;
|
||||
};
|
||||
videos?: Array<{
|
||||
url?: string;
|
||||
content_type?: string;
|
||||
}>;
|
||||
prompt?: string;
|
||||
};
|
||||
|
||||
let falFetchGuard = fetchWithSsrFGuard;
|
||||
|
||||
export function _setFalVideoFetchGuardForTesting(impl: typeof fetchWithSsrFGuard | null): void {
|
||||
falFetchGuard = impl ?? fetchWithSsrFGuard;
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function buildPolicy(allowPrivateNetwork: boolean): SsrFPolicy | undefined {
|
||||
return allowPrivateNetwork ? ssrfPolicyFromDangerouslyAllowPrivateNetwork(true) : undefined;
|
||||
}
|
||||
|
||||
function extractFalVideoEntry(payload: FalVideoResponse) {
|
||||
if (payload.video?.url?.trim()) {
|
||||
return payload.video;
|
||||
}
|
||||
return payload.videos?.find((entry) => entry.url?.trim());
|
||||
}
|
||||
|
||||
async function downloadFalVideo(
|
||||
url: string,
|
||||
policy: SsrFPolicy | undefined,
|
||||
): Promise<GeneratedVideoAsset> {
|
||||
const { response, release } = await falFetchGuard({
|
||||
url,
|
||||
timeoutMs: DEFAULT_TIMEOUT_MS,
|
||||
policy,
|
||||
auditContext: "fal-video-download",
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "fal generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
|
||||
export function buildFalVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "fal",
|
||||
label: "fal",
|
||||
defaultModel: DEFAULT_FAL_VIDEO_MODEL,
|
||||
models: [
|
||||
DEFAULT_FAL_VIDEO_MODEL,
|
||||
"fal-ai/kling-video/v2.1/master/text-to-video",
|
||||
"fal-ai/wan/v2.2-a14b/text-to-video",
|
||||
"fal-ai/wan/v2.2-a14b/image-to-video",
|
||||
],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "fal",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 0,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsSize: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("fal video generation does not support video reference inputs.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "fal",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("fal API key missing");
|
||||
}
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: req.cfg?.models?.providers?.fal?.baseUrl?.trim(),
|
||||
defaultBaseUrl: DEFAULT_FAL_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Key ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "fal",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const requestBody: Record<string, unknown> = {
|
||||
prompt: req.prompt,
|
||||
};
|
||||
if (req.aspectRatio?.trim()) {
|
||||
requestBody.aspect_ratio = req.aspectRatio.trim();
|
||||
}
|
||||
if (req.size?.trim()) {
|
||||
requestBody.size = req.size.trim();
|
||||
}
|
||||
if (req.resolution) {
|
||||
requestBody.resolution = req.resolution;
|
||||
}
|
||||
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
|
||||
requestBody.duration = Math.max(1, Math.round(req.durationSeconds));
|
||||
}
|
||||
if (req.inputImages?.[0]) {
|
||||
const input = req.inputImages[0];
|
||||
requestBody.image_url = input.url?.trim()
|
||||
? input.url.trim()
|
||||
: input.buffer
|
||||
? toDataUrl(input.buffer, input.mimeType?.trim() || "image/png")
|
||||
: undefined;
|
||||
}
|
||||
|
||||
const { response, release } = await falFetchGuard({
|
||||
url: `${baseUrl}/${req.model?.trim() || DEFAULT_FAL_VIDEO_MODEL}`,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
},
|
||||
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
policy: buildPolicy(allowPrivateNetwork),
|
||||
dispatcherPolicy,
|
||||
auditContext: "fal-video-generate",
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "fal video generation failed");
|
||||
const payload = (await response.json()) as FalVideoResponse;
|
||||
const entry = extractFalVideoEntry(payload);
|
||||
const url = entry?.url?.trim();
|
||||
if (!url) {
|
||||
throw new Error("fal video generation response missing output URL");
|
||||
}
|
||||
const video = await downloadFalVideo(url, buildPolicy(allowPrivateNetwork));
|
||||
return {
|
||||
videos: [video],
|
||||
model: req.model?.trim() || DEFAULT_FAL_VIDEO_MODEL,
|
||||
metadata: payload.prompt ? { prompt: payload.prompt } : undefined,
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
} from "./api.js";
|
||||
import { isModernGoogleModel, resolveGoogleGeminiForwardCompatModel } from "./provider-models.js";
|
||||
import { createGeminiWebSearchProvider } from "./src/gemini-web-search-provider.js";
|
||||
import { buildGoogleVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
let googleImageGenerationProviderPromise: Promise<ImageGenerationProvider> | null = null;
|
||||
let googleMediaUnderstandingProviderPromise: Promise<MediaUnderstandingProvider> | null = null;
|
||||
@@ -163,6 +164,7 @@ export default definePluginEntry({
|
||||
});
|
||||
api.registerImageGenerationProvider(createLazyGoogleImageGenerationProvider());
|
||||
api.registerMediaUnderstandingProvider(createLazyGoogleMediaUnderstandingProvider());
|
||||
api.registerVideoGenerationProvider(buildGoogleVideoGenerationProvider());
|
||||
api.registerWebSearchProvider(createGeminiWebSearchProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["google"],
|
||||
"imageGenerationProviders": ["google"],
|
||||
"videoGenerationProviders": ["google"],
|
||||
"webSearchProviders": ["gemini"]
|
||||
},
|
||||
"configSchema": {
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
"private": true,
|
||||
"description": "OpenClaw Google plugin",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"@google/genai": "^1.48.0"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
|
||||
14
extensions/google/plugin-registration.contract.test.ts
Normal file
14
extensions/google/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "google",
|
||||
providerIds: ["google"],
|
||||
mediaUnderstandingProviderIds: ["google"],
|
||||
imageGenerationProviderIds: ["google"],
|
||||
videoGenerationProviderIds: ["google"],
|
||||
webSearchProviderIds: ["gemini"],
|
||||
cliBackendIds: ["google-gemini-cli"],
|
||||
requireDescribeImages: true,
|
||||
requireGenerateImage: true,
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
117
extensions/google/video-generation-provider.test.ts
Normal file
117
extensions/google/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const { GoogleGenAIMock, generateVideosMock, getVideosOperationMock } = vi.hoisted(() => {
|
||||
const generateVideosMock = vi.fn();
|
||||
const getVideosOperationMock = vi.fn();
|
||||
const GoogleGenAIMock = vi.fn(function GoogleGenAI() {
|
||||
return {
|
||||
models: {
|
||||
generateVideos: generateVideosMock,
|
||||
},
|
||||
operations: {
|
||||
getVideosOperation: getVideosOperationMock,
|
||||
},
|
||||
files: {
|
||||
download: vi.fn(),
|
||||
},
|
||||
};
|
||||
});
|
||||
return { GoogleGenAIMock, generateVideosMock, getVideosOperationMock };
|
||||
});
|
||||
|
||||
vi.mock("@google/genai", () => ({
|
||||
GoogleGenAI: GoogleGenAIMock,
|
||||
}));
|
||||
|
||||
import * as providerAuthRuntime from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import { buildGoogleVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
describe("google video generation provider", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
generateVideosMock.mockReset();
|
||||
getVideosOperationMock.mockReset();
|
||||
GoogleGenAIMock.mockClear();
|
||||
});
|
||||
|
||||
it("submits generation and returns inline video bytes", async () => {
|
||||
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-key",
|
||||
source: "env",
|
||||
});
|
||||
generateVideosMock.mockResolvedValue({
|
||||
done: false,
|
||||
name: "operations/123",
|
||||
});
|
||||
getVideosOperationMock.mockResolvedValue({
|
||||
done: true,
|
||||
name: "operations/123",
|
||||
response: {
|
||||
generatedVideos: [
|
||||
{
|
||||
video: {
|
||||
videoBytes: Buffer.from("mp4-bytes").toString("base64"),
|
||||
mimeType: "video/mp4",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const provider = buildGoogleVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "google",
|
||||
model: "veo-3.1-fast-generate-preview",
|
||||
prompt: "A tiny robot watering a windowsill garden",
|
||||
cfg: {},
|
||||
aspectRatio: "16:9",
|
||||
resolution: "720P",
|
||||
durationSeconds: 3,
|
||||
audio: true,
|
||||
});
|
||||
|
||||
expect(generateVideosMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: "veo-3.1-fast-generate-preview",
|
||||
prompt: "A tiny robot watering a windowsill garden",
|
||||
config: expect.objectContaining({
|
||||
numberOfVideos: 1,
|
||||
durationSeconds: 4,
|
||||
aspectRatio: "16:9",
|
||||
resolution: "720p",
|
||||
generateAudio: true,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(GoogleGenAIMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: "google-key",
|
||||
httpOptions: expect.not.objectContaining({
|
||||
baseUrl: expect.anything(),
|
||||
apiVersion: expect.anything(),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects mixed image and video inputs", async () => {
|
||||
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-key",
|
||||
source: "env",
|
||||
});
|
||||
const provider = buildGoogleVideoGenerationProvider();
|
||||
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "google",
|
||||
model: "veo-3.1-fast-generate-preview",
|
||||
prompt: "Animate",
|
||||
cfg: {},
|
||||
inputImages: [{ buffer: Buffer.from("img"), mimeType: "image/png" }],
|
||||
inputVideos: [{ buffer: Buffer.from("vid"), mimeType: "video/mp4" }],
|
||||
}),
|
||||
).rejects.toThrow("Google video generation does not support image and video inputs together.");
|
||||
});
|
||||
});
|
||||
251
extensions/google/video-generation-provider.ts
Normal file
251
extensions/google/video-generation-provider.ts
Normal file
@@ -0,0 +1,251 @@
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { GoogleGenAI } from "@google/genai";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
import { normalizeGoogleApiBaseUrl } from "./api.js";
|
||||
|
||||
const DEFAULT_GOOGLE_VIDEO_MODEL = "veo-3.1-fast-generate-preview";
|
||||
const DEFAULT_TIMEOUT_MS = 180_000;
|
||||
const POLL_INTERVAL_MS = 10_000;
|
||||
const MAX_POLL_ATTEMPTS = 90;
|
||||
const GOOGLE_VIDEO_MIN_DURATION_SECONDS = 4;
|
||||
const GOOGLE_VIDEO_MAX_DURATION_SECONDS = 8;
|
||||
|
||||
function resolveConfiguredGoogleVideoBaseUrl(req: VideoGenerationRequest): string | undefined {
|
||||
const configured = req.cfg?.models?.providers?.google?.baseUrl?.trim();
|
||||
return configured ? normalizeGoogleApiBaseUrl(configured) : undefined;
|
||||
}
|
||||
|
||||
function resolveAspectRatio(params: {
|
||||
aspectRatio?: string;
|
||||
size?: string;
|
||||
}): "16:9" | "9:16" | undefined {
|
||||
const direct = params.aspectRatio?.trim();
|
||||
if (direct === "16:9" || direct === "9:16") {
|
||||
return direct;
|
||||
}
|
||||
const size = params.size?.trim();
|
||||
if (!size) {
|
||||
return undefined;
|
||||
}
|
||||
const match = /^(\d+)x(\d+)$/u.exec(size);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
const width = Number.parseInt(match[1] ?? "", 10);
|
||||
const height = Number.parseInt(match[2] ?? "", 10);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height)) {
|
||||
return undefined;
|
||||
}
|
||||
return width >= height ? "16:9" : "9:16";
|
||||
}
|
||||
|
||||
function resolveResolution(params: {
|
||||
resolution?: string;
|
||||
size?: string;
|
||||
}): "720p" | "1080p" | undefined {
|
||||
if (params.resolution === "720P") {
|
||||
return "720p";
|
||||
}
|
||||
if (params.resolution === "1080P") {
|
||||
return "1080p";
|
||||
}
|
||||
const size = params.size?.trim();
|
||||
if (!size) {
|
||||
return undefined;
|
||||
}
|
||||
const match = /^(\d+)x(\d+)$/u.exec(size);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
const width = Number.parseInt(match[1] ?? "", 10);
|
||||
const height = Number.parseInt(match[2] ?? "", 10);
|
||||
const maxEdge = Math.max(width, height);
|
||||
return maxEdge >= 1920 ? "1080p" : maxEdge >= 1280 ? "720p" : undefined;
|
||||
}
|
||||
|
||||
function resolveDurationSeconds(durationSeconds: number | undefined): number | undefined {
|
||||
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
return Math.min(
|
||||
GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
||||
Math.max(GOOGLE_VIDEO_MIN_DURATION_SECONDS, Math.round(durationSeconds)),
|
||||
);
|
||||
}
|
||||
|
||||
function resolveInputImage(req: VideoGenerationRequest) {
|
||||
const input = req.inputImages?.[0];
|
||||
if (!input?.buffer) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
imageBytes: input.buffer.toString("base64"),
|
||||
mimeType: input.mimeType?.trim() || "image/png",
|
||||
};
|
||||
}
|
||||
|
||||
function resolveInputVideo(req: VideoGenerationRequest) {
|
||||
const input = req.inputVideos?.[0];
|
||||
if (!input?.buffer) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
videoBytes: input.buffer.toString("base64"),
|
||||
mimeType: input.mimeType?.trim() || "video/mp4",
|
||||
};
|
||||
}
|
||||
|
||||
async function downloadGeneratedVideo(params: {
|
||||
client: GoogleGenAI;
|
||||
file: unknown;
|
||||
index: number;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const tempDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-google-video-"));
|
||||
const downloadPath = path.join(tempDir, `video-${params.index + 1}.mp4`);
|
||||
try {
|
||||
await params.client.files.download({
|
||||
file: params.file as never,
|
||||
downloadPath,
|
||||
});
|
||||
const buffer = await readFile(downloadPath);
|
||||
return {
|
||||
buffer,
|
||||
mimeType: "video/mp4",
|
||||
fileName: `video-${params.index + 1}.mp4`,
|
||||
};
|
||||
} finally {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "google",
|
||||
label: "Google",
|
||||
defaultModel: DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
models: [
|
||||
DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
"veo-3.1-generate-preview",
|
||||
"veo-3.1-lite-generate-preview",
|
||||
"veo-3.0-fast-generate-001",
|
||||
"veo-3.0-generate-001",
|
||||
"veo-2.0-generate-001",
|
||||
],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "google",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 1,
|
||||
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsSize: true,
|
||||
supportsAudio: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputImages?.length ?? 0) > 1) {
|
||||
throw new Error("Google video generation supports at most one input image.");
|
||||
}
|
||||
if ((req.inputVideos?.length ?? 0) > 1) {
|
||||
throw new Error("Google video generation supports at most one input video.");
|
||||
}
|
||||
if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error(
|
||||
"Google video generation does not support image and video inputs together.",
|
||||
);
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "google",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("Google API key missing");
|
||||
}
|
||||
|
||||
const configuredBaseUrl = resolveConfiguredGoogleVideoBaseUrl(req);
|
||||
const durationSeconds = resolveDurationSeconds(req.durationSeconds);
|
||||
const client = new GoogleGenAI({
|
||||
apiKey: auth.apiKey,
|
||||
httpOptions: {
|
||||
...(configuredBaseUrl ? { baseUrl: configuredBaseUrl } : {}),
|
||||
timeout: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
},
|
||||
});
|
||||
let operation = await client.models.generateVideos({
|
||||
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
image: resolveInputImage(req),
|
||||
video: resolveInputVideo(req),
|
||||
config: {
|
||||
numberOfVideos: 1,
|
||||
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
|
||||
...(resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size })
|
||||
? { aspectRatio: resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) }
|
||||
: {}),
|
||||
...(resolveResolution({ resolution: req.resolution, size: req.size })
|
||||
? { resolution: resolveResolution({ resolution: req.resolution, size: req.size }) }
|
||||
: {}),
|
||||
...(req.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
for (let attempt = 0; !(operation.done ?? false); attempt += 1) {
|
||||
if (attempt >= MAX_POLL_ATTEMPTS) {
|
||||
throw new Error("Google video generation did not finish in time");
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
operation = await client.operations.getVideosOperation({ operation });
|
||||
}
|
||||
if (operation.error) {
|
||||
throw new Error(JSON.stringify(operation.error));
|
||||
}
|
||||
const generatedVideos = operation.response?.generatedVideos ?? [];
|
||||
if (generatedVideos.length === 0) {
|
||||
throw new Error("Google video generation response missing generated videos");
|
||||
}
|
||||
const videos = await Promise.all(
|
||||
generatedVideos.map(async (entry, index) => {
|
||||
const inline = entry.video;
|
||||
if (inline?.videoBytes) {
|
||||
return {
|
||||
buffer: Buffer.from(inline.videoBytes, "base64"),
|
||||
mimeType: inline.mimeType?.trim() || "video/mp4",
|
||||
fileName: `video-${index + 1}.mp4`,
|
||||
};
|
||||
}
|
||||
if (!inline) {
|
||||
throw new Error("Google generated video missing file handle");
|
||||
}
|
||||
return await downloadGeneratedVideo({
|
||||
client,
|
||||
file: inline,
|
||||
index,
|
||||
});
|
||||
}),
|
||||
);
|
||||
return {
|
||||
videos,
|
||||
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
metadata: operation.name
|
||||
? {
|
||||
operationName: operation.name,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -28,6 +28,7 @@ import { applyMinimaxApiConfig, applyMinimaxApiConfigCn } from "./onboard.js";
|
||||
import { buildMinimaxPortalProvider, buildMinimaxProvider } from "./provider-catalog.js";
|
||||
import { buildMinimaxSpeechProvider } from "./speech-provider.js";
|
||||
import { createMiniMaxWebSearchProvider } from "./src/minimax-web-search-provider.js";
|
||||
import { buildMinimaxVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const API_PROVIDER_ID = "minimax";
|
||||
const PORTAL_PROVIDER_ID = "minimax-portal";
|
||||
@@ -313,6 +314,7 @@ export default definePluginEntry({
|
||||
});
|
||||
api.registerImageGenerationProvider(buildMinimaxImageGenerationProvider());
|
||||
api.registerImageGenerationProvider(buildMinimaxPortalImageGenerationProvider());
|
||||
api.registerVideoGenerationProvider(buildMinimaxVideoGenerationProvider());
|
||||
api.registerSpeechProvider(buildMinimaxSpeechProvider());
|
||||
api.registerWebSearchProvider(createMiniMaxWebSearchProvider());
|
||||
},
|
||||
|
||||
@@ -64,6 +64,7 @@
|
||||
"speechProviders": ["minimax"],
|
||||
"mediaUnderstandingProviders": ["minimax", "minimax-portal"],
|
||||
"imageGenerationProviders": ["minimax", "minimax-portal"],
|
||||
"videoGenerationProviders": ["minimax"],
|
||||
"webSearchProviders": ["minimax"]
|
||||
},
|
||||
"uiHints": {
|
||||
|
||||
14
extensions/minimax/plugin-registration.contract.test.ts
Normal file
14
extensions/minimax/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "minimax",
|
||||
providerIds: ["minimax", "minimax-portal"],
|
||||
speechProviderIds: ["minimax"],
|
||||
mediaUnderstandingProviderIds: ["minimax", "minimax-portal"],
|
||||
imageGenerationProviderIds: ["minimax", "minimax-portal"],
|
||||
videoGenerationProviderIds: ["minimax"],
|
||||
webSearchProviderIds: ["minimax"],
|
||||
requireDescribeImages: true,
|
||||
requireGenerateImage: true,
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
159
extensions/minimax/video-generation-provider.test.ts
Normal file
159
extensions/minimax/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildMinimaxVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "minimax-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("minimax video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates a task, polls status, and downloads the generated video", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
task_id: "task-123",
|
||||
base_resp: { status_code: 0 },
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
task_id: "task-123",
|
||||
status: "Success",
|
||||
video_url: "https://example.com/out.mp4",
|
||||
file_id: "file-1",
|
||||
base_resp: { status_code: 0 },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "minimax",
|
||||
model: "MiniMax-Hailuo-2.3",
|
||||
prompt: "A fox sprints across snowy hills",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.minimax.io/v1/video_generation",
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
taskId: "task-123",
|
||||
fileId: "file-1",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("downloads via file_id when the status response omits video_url", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
task_id: "task-456",
|
||||
base_resp: { status_code: 0 },
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
task_id: "task-456",
|
||||
status: "Success",
|
||||
file_id: "file-9",
|
||||
base_resp: { status_code: 0 },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
file: {
|
||||
file_id: "file-9",
|
||||
filename: "output_aigc.mp4",
|
||||
download_url: "https://example.com/download.mp4",
|
||||
},
|
||||
base_resp: { status_code: 0 },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildMinimaxVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "minimax",
|
||||
model: "MiniMax-Hailuo-2.3",
|
||||
prompt: "A fox sprints across snowy hills",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"https://api.minimax.io/v1/files/retrieve?file_id=file-9",
|
||||
expect.objectContaining({
|
||||
method: "GET",
|
||||
}),
|
||||
expect.any(Number),
|
||||
expect.any(Function),
|
||||
);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
3,
|
||||
"https://example.com/download.mp4",
|
||||
expect.objectContaining({
|
||||
method: "GET",
|
||||
}),
|
||||
expect.any(Number),
|
||||
expect.any(Function),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
taskId: "task-456",
|
||||
fileId: "file-9",
|
||||
videoUrl: undefined,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
319
extensions/minimax/video-generation-provider.ts
Normal file
319
extensions/minimax/video-generation-provider.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_MINIMAX_VIDEO_BASE_URL = "https://api.minimax.io";
|
||||
const DEFAULT_MINIMAX_VIDEO_MODEL = "MiniMax-Hailuo-2.3";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 10_000;
|
||||
const MAX_POLL_ATTEMPTS = 90;
|
||||
|
||||
type MinimaxBaseResp = {
|
||||
status_code?: number;
|
||||
status_msg?: string;
|
||||
};
|
||||
|
||||
type MinimaxCreateResponse = {
|
||||
task_id?: string;
|
||||
base_resp?: MinimaxBaseResp;
|
||||
};
|
||||
|
||||
type MinimaxQueryResponse = {
|
||||
task_id?: string;
|
||||
status?: string;
|
||||
file_id?: string;
|
||||
video_url?: string;
|
||||
base_resp?: MinimaxBaseResp;
|
||||
};
|
||||
|
||||
type MinimaxFileRetrieveResponse = {
|
||||
file?: {
|
||||
download_url?: string;
|
||||
filename?: string;
|
||||
};
|
||||
base_resp?: MinimaxBaseResp;
|
||||
};
|
||||
|
||||
function resolveMinimaxVideoBaseUrl(
|
||||
cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"],
|
||||
): string {
|
||||
const direct = cfg?.models?.providers?.minimax?.baseUrl?.trim();
|
||||
if (!direct) {
|
||||
return DEFAULT_MINIMAX_VIDEO_BASE_URL;
|
||||
}
|
||||
try {
|
||||
return new URL(direct).origin;
|
||||
} catch {
|
||||
return DEFAULT_MINIMAX_VIDEO_BASE_URL;
|
||||
}
|
||||
}
|
||||
|
||||
function assertMinimaxBaseResp(baseResp: MinimaxBaseResp | undefined, context: string): void {
|
||||
if (!baseResp || typeof baseResp.status_code !== "number" || baseResp.status_code === 0) {
|
||||
return;
|
||||
}
|
||||
throw new Error(
|
||||
`${context} (${baseResp.status_code}): ${baseResp.status_msg ?? "unknown error"}`,
|
||||
);
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function resolveFirstFrameImage(req: VideoGenerationRequest): string | undefined {
|
||||
const input = req.inputImages?.[0];
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
if (input.url?.trim()) {
|
||||
return input.url.trim();
|
||||
}
|
||||
if (!input.buffer) {
|
||||
throw new Error("MiniMax image-to-video input is missing image data.");
|
||||
}
|
||||
return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png");
|
||||
}
|
||||
|
||||
async function pollMinimaxVideo(params: {
|
||||
taskId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<MinimaxQueryResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const url = new URL(`${params.baseUrl}/v1/query/video_generation`);
|
||||
url.searchParams.set("task_id", params.taskId);
|
||||
const response = await fetchWithTimeout(
|
||||
url.toString(),
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "MiniMax video status request failed");
|
||||
const payload = (await response.json()) as MinimaxQueryResponse;
|
||||
assertMinimaxBaseResp(payload.base_resp, "MiniMax video generation failed");
|
||||
switch (payload.status?.trim()) {
|
||||
case "Success":
|
||||
return payload;
|
||||
case "Fail":
|
||||
throw new Error(payload.base_resp?.status_msg?.trim() || "MiniMax video generation failed");
|
||||
case "Preparing":
|
||||
case "Processing":
|
||||
default:
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(`MiniMax video generation task ${params.taskId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadVideoFromUrl(params: {
|
||||
url: string;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "MiniMax generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
async function downloadVideoFromFileId(params: {
|
||||
fileId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const url = new URL(`${params.baseUrl}/v1/files/retrieve`);
|
||||
url.searchParams.set("file_id", params.fileId);
|
||||
const metadataResponse = await fetchWithTimeout(
|
||||
url.toString(),
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(
|
||||
metadataResponse,
|
||||
"MiniMax generated video metadata request failed",
|
||||
);
|
||||
const metadata = (await metadataResponse.json()) as MinimaxFileRetrieveResponse;
|
||||
assertMinimaxBaseResp(metadata.base_resp, "MiniMax generated video metadata request failed");
|
||||
const downloadUrl = metadata.file?.download_url?.trim();
|
||||
if (!downloadUrl) {
|
||||
throw new Error("MiniMax generated video metadata missing download_url");
|
||||
}
|
||||
const response = await fetchWithTimeout(
|
||||
downloadUrl,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "MiniMax generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName:
|
||||
metadata.file?.filename?.trim() || `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildMinimaxVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "minimax",
|
||||
label: "MiniMax",
|
||||
defaultModel: DEFAULT_MINIMAX_VIDEO_MODEL,
|
||||
models: [
|
||||
DEFAULT_MINIMAX_VIDEO_MODEL,
|
||||
"MiniMax-Hailuo-2.3-Fast",
|
||||
"MiniMax-Hailuo-02",
|
||||
"I2V-01-Director",
|
||||
"I2V-01-live",
|
||||
"I2V-01",
|
||||
],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "minimax",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 0,
|
||||
maxDurationSeconds: 10,
|
||||
supportsResolution: true,
|
||||
supportsWatermark: false,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("MiniMax video generation does not support video reference inputs.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "minimax",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("MiniMax API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveMinimaxVideoBaseUrl(req.cfg),
|
||||
defaultBaseUrl: DEFAULT_MINIMAX_VIDEO_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "minimax",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model?.trim() || DEFAULT_MINIMAX_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
};
|
||||
const firstFrameImage = resolveFirstFrameImage(req);
|
||||
if (firstFrameImage) {
|
||||
body.first_frame_image = firstFrameImage;
|
||||
}
|
||||
if (req.resolution) {
|
||||
body.resolution = req.resolution;
|
||||
}
|
||||
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
|
||||
body.duration = Math.max(1, Math.round(req.durationSeconds));
|
||||
}
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/v1/video_generation`,
|
||||
headers,
|
||||
body,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "MiniMax video generation failed");
|
||||
const submitted = (await response.json()) as MinimaxCreateResponse;
|
||||
assertMinimaxBaseResp(submitted.base_resp, "MiniMax video generation failed");
|
||||
const taskId = submitted.task_id?.trim();
|
||||
if (!taskId) {
|
||||
throw new Error("MiniMax video generation response missing task_id");
|
||||
}
|
||||
const completed = await pollMinimaxVideo({
|
||||
taskId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const videoUrl = completed.video_url?.trim();
|
||||
const fileId = completed.file_id?.trim();
|
||||
const video = videoUrl
|
||||
? await downloadVideoFromUrl({
|
||||
url: videoUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
})
|
||||
: fileId
|
||||
? await downloadVideoFromFileId({
|
||||
fileId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
})
|
||||
: (() => {
|
||||
throw new Error(
|
||||
"MiniMax video generation completed without a video URL or file_id",
|
||||
);
|
||||
})();
|
||||
return {
|
||||
videos: [video],
|
||||
model: req.model?.trim() || DEFAULT_MINIMAX_VIDEO_MODEL,
|
||||
metadata: {
|
||||
taskId,
|
||||
status: completed.status,
|
||||
fileId,
|
||||
videoUrl,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
import { buildOpenAIRealtimeTranscriptionProvider } from "./realtime-transcription-provider.js";
|
||||
import { buildOpenAIRealtimeVoiceProvider } from "./realtime-voice-provider.js";
|
||||
import { buildOpenAISpeechProvider } from "./speech-provider.js";
|
||||
import { buildOpenAIVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
export default definePluginEntry({
|
||||
id: "openai",
|
||||
@@ -43,5 +44,6 @@ export default definePluginEntry({
|
||||
api.registerSpeechProvider(buildOpenAISpeechProvider());
|
||||
api.registerMediaUnderstandingProvider(openaiMediaUnderstandingProvider);
|
||||
api.registerMediaUnderstandingProvider(openaiCodexMediaUnderstandingProvider);
|
||||
api.registerVideoGenerationProvider(buildOpenAIVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -39,7 +39,8 @@
|
||||
"realtimeTranscriptionProviders": ["openai"],
|
||||
"realtimeVoiceProviders": ["openai"],
|
||||
"mediaUnderstandingProviders": ["openai", "openai-codex"],
|
||||
"imageGenerationProviders": ["openai"]
|
||||
"imageGenerationProviders": ["openai"],
|
||||
"videoGenerationProviders": ["openai"]
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
|
||||
15
extensions/openai/plugin-registration.contract.test.ts
Normal file
15
extensions/openai/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "openai",
|
||||
providerIds: ["openai", "openai-codex"],
|
||||
speechProviderIds: ["openai"],
|
||||
realtimeTranscriptionProviderIds: ["openai"],
|
||||
realtimeVoiceProviderIds: ["openai"],
|
||||
mediaUnderstandingProviderIds: ["openai", "openai-codex"],
|
||||
imageGenerationProviderIds: ["openai"],
|
||||
videoGenerationProviderIds: ["openai"],
|
||||
cliBackendIds: ["codex-cli"],
|
||||
requireGenerateImage: true,
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
114
extensions/openai/video-generation-provider.test.ts
Normal file
114
extensions/openai/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildOpenAIVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postTranscriptionRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openai-key" })),
|
||||
postTranscriptionRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postTranscriptionRequest: postTranscriptionRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("openai video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postTranscriptionRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates, polls, and downloads a Sora video", async () => {
|
||||
postTranscriptionRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
id: "vid_123",
|
||||
model: "sora-2",
|
||||
status: "queued",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
id: "vid_123",
|
||||
model: "sora-2",
|
||||
status: "completed",
|
||||
seconds: "4",
|
||||
size: "720x1280",
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildOpenAIVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "openai",
|
||||
model: "sora-2",
|
||||
prompt: "A paper airplane gliding through golden hour light",
|
||||
cfg: {},
|
||||
durationSeconds: 4,
|
||||
});
|
||||
|
||||
expect(postTranscriptionRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.openai.com/v1/videos",
|
||||
}),
|
||||
);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"https://api.openai.com/v1/videos/vid_123",
|
||||
expect.objectContaining({ method: "GET" }),
|
||||
120000,
|
||||
fetch,
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
videoId: "vid_123",
|
||||
status: "completed",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects multiple reference assets", async () => {
|
||||
const provider = buildOpenAIVideoGenerationProvider();
|
||||
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "openai",
|
||||
model: "sora-2",
|
||||
prompt: "Animate these",
|
||||
cfg: {},
|
||||
inputImages: [{ buffer: Buffer.from("a"), mimeType: "image/png" }],
|
||||
inputVideos: [{ buffer: Buffer.from("b"), mimeType: "video/mp4" }],
|
||||
}),
|
||||
).rejects.toThrow("OpenAI video generation supports at most one reference image or video.");
|
||||
});
|
||||
});
|
||||
291
extensions/openai/video-generation-provider.ts
Normal file
291
extensions/openai/video-generation-provider.ts
Normal file
@@ -0,0 +1,291 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postTranscriptionRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_OPENAI_VIDEO_BASE_URL = "https://api.openai.com/v1";
|
||||
const DEFAULT_OPENAI_VIDEO_MODEL = "sora-2";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 2_500;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
const OPENAI_VIDEO_SECONDS = [4, 8, 12] as const;
|
||||
const OPENAI_VIDEO_SIZES = ["720x1280", "1280x720", "1024x1792", "1792x1024"] as const;
|
||||
|
||||
type OpenAIVideoStatus = "queued" | "in_progress" | "completed" | "failed";
|
||||
|
||||
type OpenAIVideoResponse = {
|
||||
id?: string;
|
||||
model?: string;
|
||||
status?: OpenAIVideoStatus;
|
||||
prompt?: string | null;
|
||||
seconds?: string;
|
||||
size?: string;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
};
|
||||
|
||||
function resolveOpenAIVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
const direct = req.cfg?.models?.providers?.openai?.baseUrl?.trim();
|
||||
return direct || DEFAULT_OPENAI_VIDEO_BASE_URL;
|
||||
}
|
||||
|
||||
function toBlobBytes(buffer: Buffer): ArrayBuffer {
|
||||
const arrayBuffer = new ArrayBuffer(buffer.byteLength);
|
||||
new Uint8Array(arrayBuffer).set(buffer);
|
||||
return arrayBuffer;
|
||||
}
|
||||
|
||||
function resolveDurationSeconds(durationSeconds: number | undefined): "4" | "8" | "12" | undefined {
|
||||
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
const rounded = Math.max(OPENAI_VIDEO_SECONDS[0], Math.round(durationSeconds));
|
||||
const nearest = OPENAI_VIDEO_SECONDS.reduce((best, current) =>
|
||||
Math.abs(current - rounded) < Math.abs(best - rounded) ? current : best,
|
||||
);
|
||||
return String(nearest) as "4" | "8" | "12";
|
||||
}
|
||||
|
||||
function resolveSize(params: {
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: string;
|
||||
}): (typeof OPENAI_VIDEO_SIZES)[number] | undefined {
|
||||
const explicitSize = params.size?.trim();
|
||||
if (
|
||||
explicitSize &&
|
||||
OPENAI_VIDEO_SIZES.includes(explicitSize as (typeof OPENAI_VIDEO_SIZES)[number])
|
||||
) {
|
||||
return explicitSize as (typeof OPENAI_VIDEO_SIZES)[number];
|
||||
}
|
||||
switch (params.aspectRatio?.trim()) {
|
||||
case "9:16":
|
||||
return "720x1280";
|
||||
case "16:9":
|
||||
return "1280x720";
|
||||
case "4:7":
|
||||
return "1024x1792";
|
||||
case "7:4":
|
||||
return "1792x1024";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (params.resolution === "1080P") {
|
||||
return "1792x1024";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveReferenceAsset(req: VideoGenerationRequest) {
|
||||
const allAssets = [...(req.inputImages ?? []), ...(req.inputVideos ?? [])];
|
||||
if (allAssets.length === 0) {
|
||||
return null;
|
||||
}
|
||||
if (allAssets.length > 1) {
|
||||
throw new Error("OpenAI video generation supports at most one reference image or video.");
|
||||
}
|
||||
const [asset] = allAssets;
|
||||
if (!asset?.buffer) {
|
||||
throw new Error(
|
||||
"OpenAI video generation currently requires local image/video uploads for reference assets.",
|
||||
);
|
||||
}
|
||||
const mimeType =
|
||||
asset.mimeType?.trim() || ((req.inputVideos?.length ?? 0) > 0 ? "video/mp4" : "image/png");
|
||||
const extension = mimeType.includes("video")
|
||||
? "mp4"
|
||||
: mimeType.includes("jpeg")
|
||||
? "jpg"
|
||||
: mimeType.includes("webp")
|
||||
? "webp"
|
||||
: "png";
|
||||
const fileName =
|
||||
asset.fileName?.trim() ||
|
||||
`${(req.inputVideos?.length ?? 0) > 0 ? "reference-video" : "reference-image"}.${extension}`;
|
||||
return new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType });
|
||||
}
|
||||
|
||||
async function pollOpenAIVideo(params: {
|
||||
videoId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<OpenAIVideoResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/videos/${params.videoId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "OpenAI video status request failed");
|
||||
const payload = (await response.json()) as OpenAIVideoResponse;
|
||||
if (payload.status === "completed") {
|
||||
return payload;
|
||||
}
|
||||
if (payload.status === "failed") {
|
||||
throw new Error(payload.error?.message?.trim() || "OpenAI video generation failed");
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
}
|
||||
throw new Error(`OpenAI video generation task ${params.videoId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadOpenAIVideo(params: {
|
||||
videoId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const url = new URL(`${params.baseUrl}/videos/${params.videoId}/content`);
|
||||
url.searchParams.set("variant", "video");
|
||||
const response = await fetchWithTimeout(
|
||||
url.toString(),
|
||||
{
|
||||
method: "GET",
|
||||
headers: new Headers({
|
||||
...Object.fromEntries(params.headers.entries()),
|
||||
Accept: "application/binary",
|
||||
}),
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "OpenAI video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
defaultModel: DEFAULT_OPENAI_VIDEO_MODEL,
|
||||
models: [DEFAULT_OPENAI_VIDEO_MODEL, "sora-2-pro"],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "openai",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 1,
|
||||
maxDurationSeconds: 12,
|
||||
supportsSize: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "openai",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("OpenAI API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveOpenAIVideoBaseUrl(req),
|
||||
defaultBaseUrl: DEFAULT_OPENAI_VIDEO_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
},
|
||||
provider: "openai",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const form = new FormData();
|
||||
form.set("prompt", req.prompt);
|
||||
form.set("model", req.model?.trim() || DEFAULT_OPENAI_VIDEO_MODEL);
|
||||
const seconds = resolveDurationSeconds(req.durationSeconds);
|
||||
if (seconds) {
|
||||
form.set("seconds", seconds);
|
||||
}
|
||||
const size = resolveSize({
|
||||
size: req.size,
|
||||
aspectRatio: req.aspectRatio,
|
||||
resolution: req.resolution,
|
||||
});
|
||||
if (size) {
|
||||
form.set("size", size);
|
||||
}
|
||||
const referenceAsset = resolveReferenceAsset(req);
|
||||
if (referenceAsset) {
|
||||
form.set("input_reference", referenceAsset);
|
||||
}
|
||||
|
||||
const multipartHeaders = new Headers(headers);
|
||||
multipartHeaders.delete("Content-Type");
|
||||
const { response, release } = await postTranscriptionRequest({
|
||||
url: `${baseUrl}/videos`,
|
||||
headers: multipartHeaders,
|
||||
body: form,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "OpenAI video generation failed");
|
||||
const submitted = (await response.json()) as OpenAIVideoResponse;
|
||||
const videoId = submitted.id?.trim();
|
||||
if (!videoId) {
|
||||
throw new Error("OpenAI video generation response missing video id");
|
||||
}
|
||||
const completed = await pollOpenAIVideo({
|
||||
videoId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const video = await downloadOpenAIVideo({
|
||||
videoId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos: [video],
|
||||
model: completed.model ?? submitted.model ?? req.model ?? DEFAULT_OPENAI_VIDEO_MODEL,
|
||||
metadata: {
|
||||
videoId,
|
||||
status: completed.status,
|
||||
seconds: completed.seconds ?? submitted.seconds,
|
||||
size: completed.size ?? submitted.size,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
|
||||
import { applyTogetherConfig, TOGETHER_DEFAULT_MODEL_REF } from "./onboard.js";
|
||||
import { buildTogetherProvider } from "./provider-catalog.js";
|
||||
import { buildTogetherVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const PROVIDER_ID = "together";
|
||||
|
||||
@@ -35,4 +36,7 @@ export default defineSingleProviderPluginEntry({
|
||||
? "rate_limit"
|
||||
: undefined,
|
||||
},
|
||||
register(api) {
|
||||
api.registerVideoGenerationProvider(buildTogetherVideoGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
"cliDescription": "Together AI API key"
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"videoGenerationProviders": ["together"]
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
|
||||
8
extensions/together/plugin-registration.contract.test.ts
Normal file
8
extensions/together/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "together",
|
||||
providerIds: ["together"],
|
||||
videoGenerationProviderIds: ["together"],
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
86
extensions/together/video-generation-provider.test.ts
Normal file
86
extensions/together/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildTogetherVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "together-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("together video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates a video, polls completion, and downloads the output", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
id: "video_123",
|
||||
status: "in_progress",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
id: "video_123",
|
||||
status: "completed",
|
||||
outputs: { video_url: "https://example.com/together.mp4" },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildTogetherVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "together",
|
||||
model: "Wan-AI/Wan2.2-T2V-A14B",
|
||||
prompt: "A bicycle weaving through a rainy neon street",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.together.xyz/v1/videos",
|
||||
}),
|
||||
);
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
videoId: "video_123",
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
235
extensions/together/video-generation-provider.ts
Normal file
235
extensions/together/video-generation-provider.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
import { TOGETHER_BASE_URL } from "./models.js";
|
||||
|
||||
const DEFAULT_TOGETHER_VIDEO_MODEL = "Wan-AI/Wan2.2-T2V-A14B";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 5_000;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
|
||||
type TogetherVideoResponse = {
|
||||
id?: string;
|
||||
model?: string;
|
||||
status?: "in_progress" | "completed" | "failed";
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
outputs?:
|
||||
| {
|
||||
video_url?: string;
|
||||
url?: string;
|
||||
}
|
||||
| Array<{
|
||||
video_url?: string;
|
||||
url?: string;
|
||||
}>;
|
||||
};
|
||||
|
||||
function resolveTogetherVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
return req.cfg?.models?.providers?.together?.baseUrl?.trim() || TOGETHER_BASE_URL;
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function extractTogetherVideoUrl(payload: TogetherVideoResponse): string | undefined {
|
||||
if (Array.isArray(payload.outputs)) {
|
||||
for (const entry of payload.outputs) {
|
||||
const url = entry.video_url?.trim() || entry.url?.trim();
|
||||
if (url) {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
return payload.outputs?.video_url?.trim() || payload.outputs?.url?.trim();
|
||||
}
|
||||
|
||||
async function pollTogetherVideo(params: {
|
||||
videoId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<TogetherVideoResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/videos/${params.videoId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "Together video status request failed");
|
||||
const payload = (await response.json()) as TogetherVideoResponse;
|
||||
if (payload.status === "completed") {
|
||||
return payload;
|
||||
}
|
||||
if (payload.status === "failed") {
|
||||
throw new Error(payload.error?.message?.trim() || "Together video generation failed");
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
}
|
||||
throw new Error(`Together video generation task ${params.videoId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadTogetherVideo(params: {
|
||||
url: string;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "Together generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildTogetherVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "together",
|
||||
label: "Together",
|
||||
defaultModel: DEFAULT_TOGETHER_VIDEO_MODEL,
|
||||
models: [
|
||||
DEFAULT_TOGETHER_VIDEO_MODEL,
|
||||
"Wan-AI/Wan2.2-I2V-A14B",
|
||||
"minimax/Hailuo-02",
|
||||
"Kwai/Kling-2.1-Master",
|
||||
],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "together",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 0,
|
||||
maxDurationSeconds: 12,
|
||||
supportsSize: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
if ((req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("Together video generation does not support video reference inputs.");
|
||||
}
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "together",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("Together API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveTogetherVideoBaseUrl(req),
|
||||
defaultBaseUrl: TOGETHER_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "together",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model?.trim() || DEFAULT_TOGETHER_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
};
|
||||
if (typeof req.durationSeconds === "number" && Number.isFinite(req.durationSeconds)) {
|
||||
body.seconds = String(Math.max(1, Math.round(req.durationSeconds)));
|
||||
}
|
||||
if (req.size?.trim()) {
|
||||
const match = /^(\d+)x(\d+)$/u.exec(req.size.trim());
|
||||
if (match) {
|
||||
body.width = Number.parseInt(match[1] ?? "", 10);
|
||||
body.height = Number.parseInt(match[2] ?? "", 10);
|
||||
}
|
||||
}
|
||||
if (req.inputImages?.[0]) {
|
||||
const input = req.inputImages[0];
|
||||
const value = input.url?.trim()
|
||||
? input.url.trim()
|
||||
: input.buffer
|
||||
? toDataUrl(input.buffer, input.mimeType?.trim() || "image/png")
|
||||
: undefined;
|
||||
if (!value) {
|
||||
throw new Error("Together reference image is missing image data.");
|
||||
}
|
||||
body.reference_images = [value];
|
||||
}
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/videos`,
|
||||
headers,
|
||||
body,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "Together video generation failed");
|
||||
const submitted = (await response.json()) as TogetherVideoResponse;
|
||||
const videoId = submitted.id?.trim();
|
||||
if (!videoId) {
|
||||
throw new Error("Together video generation response missing id");
|
||||
}
|
||||
const completed = await pollTogetherVideo({
|
||||
videoId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const videoUrl = extractTogetherVideoUrl(completed);
|
||||
if (!videoUrl) {
|
||||
throw new Error("Together video generation completed without an output URL");
|
||||
}
|
||||
const video = await downloadTogetherVideo({
|
||||
url: videoUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos: [video],
|
||||
model: completed.model ?? req.model ?? DEFAULT_TOGETHER_VIDEO_MODEL,
|
||||
metadata: {
|
||||
videoId,
|
||||
status: completed.status,
|
||||
videoUrl,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
123
extensions/video-generation-providers.live.test.ts
Normal file
123
extensions/video-generation-providers.live.test.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { collectProviderApiKeys } from "../src/agents/live-auth-keys.js";
|
||||
import { isLiveTestEnabled } from "../src/agents/live-test-helpers.js";
|
||||
import type { OpenClawConfig } from "../src/config/config.js";
|
||||
import { getProviderEnvVars } from "../src/secrets/provider-env-vars.js";
|
||||
import {
|
||||
DEFAULT_LIVE_VIDEO_MODELS,
|
||||
parseCsvFilter,
|
||||
parseProviderModelMap,
|
||||
} from "../src/video-generation/live-test-helpers.js";
|
||||
import { parseVideoGenerationModelRef } from "../src/video-generation/model-ref.js";
|
||||
import {
|
||||
registerProviderPlugin,
|
||||
requireRegisteredProvider,
|
||||
} from "../test/helpers/plugins/provider-registration.js";
|
||||
import byteplusPlugin from "./byteplus/index.js";
|
||||
import falPlugin from "./fal/index.js";
|
||||
import googlePlugin from "./google/index.js";
|
||||
import minimaxPlugin from "./minimax/index.js";
|
||||
import openaiPlugin from "./openai/index.js";
|
||||
import qwenPlugin from "./qwen/index.js";
|
||||
import togetherPlugin from "./together/index.js";
|
||||
|
||||
const LIVE = isLiveTestEnabled();
|
||||
const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS);
|
||||
const envModelMap = parseProviderModelMap(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_MODELS);
|
||||
|
||||
type LiveProviderCase = {
|
||||
plugin: { register: (api: unknown) => void | Promise<void> };
|
||||
pluginId: string;
|
||||
pluginName: string;
|
||||
providerId: string;
|
||||
};
|
||||
|
||||
const CASES: LiveProviderCase[] = [
|
||||
{
|
||||
plugin: byteplusPlugin,
|
||||
pluginId: "byteplus",
|
||||
pluginName: "BytePlus Provider",
|
||||
providerId: "byteplus",
|
||||
},
|
||||
{ plugin: falPlugin, pluginId: "fal", pluginName: "fal Provider", providerId: "fal" },
|
||||
{ plugin: googlePlugin, pluginId: "google", pluginName: "Google Provider", providerId: "google" },
|
||||
{
|
||||
plugin: minimaxPlugin,
|
||||
pluginId: "minimax",
|
||||
pluginName: "MiniMax Provider",
|
||||
providerId: "minimax",
|
||||
},
|
||||
{ plugin: openaiPlugin, pluginId: "openai", pluginName: "OpenAI Provider", providerId: "openai" },
|
||||
{ plugin: qwenPlugin, pluginId: "qwen", pluginName: "Qwen Provider", providerId: "qwen" },
|
||||
{
|
||||
plugin: togetherPlugin,
|
||||
pluginId: "together",
|
||||
pluginName: "Together Provider",
|
||||
providerId: "together",
|
||||
},
|
||||
]
|
||||
.filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true))
|
||||
.toSorted((left, right) => left.providerId.localeCompare(right.providerId));
|
||||
|
||||
function asConfig(value: unknown): OpenClawConfig {
|
||||
return value as OpenClawConfig;
|
||||
}
|
||||
|
||||
function resolveProviderModelForLiveTest(providerId: string, modelRef: string): string {
|
||||
const parsed = parseVideoGenerationModelRef(modelRef);
|
||||
if (parsed && parsed.provider === providerId) {
|
||||
return parsed.model;
|
||||
}
|
||||
return modelRef;
|
||||
}
|
||||
|
||||
describe.skipIf(!LIVE)("video generation provider live", () => {
|
||||
for (const testCase of CASES) {
|
||||
const modelRef =
|
||||
envModelMap.get(testCase.providerId) ?? DEFAULT_LIVE_VIDEO_MODELS[testCase.providerId];
|
||||
const hasAuth = collectProviderApiKeys(testCase.providerId).length > 0;
|
||||
const expectedEnvVars = getProviderEnvVars(testCase.providerId).join(", ");
|
||||
|
||||
const liveIt = hasAuth && modelRef ? it : it.skip;
|
||||
liveIt(
|
||||
`generates a short video via ${testCase.providerId}`,
|
||||
async () => {
|
||||
const { videoProviders } = await registerProviderPlugin({
|
||||
plugin: testCase.plugin,
|
||||
id: testCase.pluginId,
|
||||
name: testCase.pluginName,
|
||||
});
|
||||
const provider = requireRegisteredProvider(
|
||||
videoProviders,
|
||||
testCase.providerId,
|
||||
"video provider",
|
||||
);
|
||||
const durationSeconds = Math.min(provider.capabilities.maxDurationSeconds ?? 3, 3);
|
||||
const providerModel = resolveProviderModelForLiveTest(testCase.providerId, modelRef!);
|
||||
|
||||
const result = await provider.generateVideo({
|
||||
provider: testCase.providerId,
|
||||
model: providerModel,
|
||||
prompt:
|
||||
"A tiny paper diorama city at sunrise with slow cinematic camera motion and no text.",
|
||||
cfg: asConfig({ plugins: { enabled: true } }),
|
||||
agentDir: "/tmp/openclaw-live-video",
|
||||
durationSeconds,
|
||||
...(provider.capabilities.supportsAspectRatio ? { aspectRatio: "16:9" } : {}),
|
||||
...(provider.capabilities.supportsResolution ? { resolution: "480P" as const } : {}),
|
||||
...(provider.capabilities.supportsAudio ? { audio: false } : {}),
|
||||
...(provider.capabilities.supportsWatermark ? { watermark: false } : {}),
|
||||
});
|
||||
|
||||
expect(result.videos.length).toBeGreaterThan(0);
|
||||
expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true);
|
||||
expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024);
|
||||
},
|
||||
8 * 60_000,
|
||||
);
|
||||
|
||||
if (!hasAuth || !modelRef) {
|
||||
it.skip(`skips ${testCase.providerId} without live auth/model (${expectedEnvVars || "no env vars"})`, () => {});
|
||||
}
|
||||
}
|
||||
});
|
||||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
@@ -398,7 +398,11 @@ importers:
|
||||
|
||||
extensions/github-copilot: {}
|
||||
|
||||
extensions/google: {}
|
||||
extensions/google:
|
||||
dependencies:
|
||||
'@google/genai':
|
||||
specifier: ^1.48.0
|
||||
version: 1.48.0(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))
|
||||
|
||||
extensions/googlechat:
|
||||
dependencies:
|
||||
@@ -7490,7 +7494,7 @@ snapshots:
|
||||
discord-api-types: 0.38.44
|
||||
prism-media: 1.3.5(@discordjs/opus@0.10.0)(opusscript@0.1.1)
|
||||
tslib: 2.8.1
|
||||
ws: 8.19.0
|
||||
ws: 8.20.0
|
||||
transitivePeerDependencies:
|
||||
- '@discordjs/opus'
|
||||
- bufferutil
|
||||
|
||||
@@ -281,6 +281,53 @@ describe("handleToolExecutionEnd media emission", () => {
|
||||
expect(ctx.state.pendingToolMediaUrls).toEqual(["/tmp/generated.png"]);
|
||||
});
|
||||
|
||||
it("emits provider inventory output for compact video_generate list results", async () => {
|
||||
const ctx = createMockContext({
|
||||
shouldEmitToolOutput: false,
|
||||
onToolResult: vi.fn(),
|
||||
toolResultFormat: "plain",
|
||||
});
|
||||
|
||||
await handleToolExecutionEnd(ctx, {
|
||||
type: "tool_execution_end",
|
||||
toolName: "video_generate",
|
||||
toolCallId: "tc-1",
|
||||
isError: false,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: [
|
||||
"openai: default=sora-2 | models=sora-2",
|
||||
"google: default=veo-3.1-fast-generate-preview | models=veo-3.1-fast-generate-preview",
|
||||
].join("\n"),
|
||||
},
|
||||
],
|
||||
details: {
|
||||
providers: [
|
||||
{ id: "openai", defaultModel: "sora-2", models: ["sora-2"] },
|
||||
{
|
||||
id: "google",
|
||||
defaultModel: "veo-3.1-fast-generate-preview",
|
||||
models: ["veo-3.1-fast-generate-preview"],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(ctx.emitToolOutput).toHaveBeenCalledWith(
|
||||
"video_generate",
|
||||
undefined,
|
||||
[
|
||||
"openai: default=sora-2 | models=sora-2",
|
||||
"google: default=veo-3.1-fast-generate-preview | models=veo-3.1-fast-generate-preview",
|
||||
].join("\n"),
|
||||
expect.any(Object),
|
||||
);
|
||||
expect(ctx.state.pendingToolMediaUrls).toEqual([]);
|
||||
});
|
||||
|
||||
it("does NOT emit media for error results", async () => {
|
||||
const onToolResult = vi.fn();
|
||||
const ctx = createMockContext({ shouldEmitToolOutput: false, onToolResult });
|
||||
|
||||
@@ -295,6 +295,30 @@ function collectEmittedToolOutputMediaUrls(
|
||||
return filterToolResultMediaUrls(toolName, mediaUrls, result);
|
||||
}
|
||||
|
||||
const COMPACT_PROVIDER_INVENTORY_TOOLS = new Set(["image_generate", "video_generate"]);
|
||||
|
||||
function hasProviderInventoryDetails(result: unknown): boolean {
|
||||
if (!result || typeof result !== "object") {
|
||||
return false;
|
||||
}
|
||||
const details = readToolResultDetailsRecord(result);
|
||||
return Array.isArray(details?.providers);
|
||||
}
|
||||
|
||||
function shouldEmitCompactToolOutput(params: {
|
||||
toolName: string;
|
||||
result: unknown;
|
||||
outputText?: string;
|
||||
}): boolean {
|
||||
if (!COMPACT_PROVIDER_INVENTORY_TOOLS.has(params.toolName)) {
|
||||
return false;
|
||||
}
|
||||
if (!hasProviderInventoryDetails(params.result)) {
|
||||
return false;
|
||||
}
|
||||
return Boolean(params.outputText?.trim());
|
||||
}
|
||||
|
||||
function readExecApprovalPendingDetails(result: unknown): {
|
||||
approvalId: string;
|
||||
approvalSlug: string;
|
||||
@@ -448,8 +472,10 @@ async function emitToolResultOutput(params: {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.shouldEmitToolOutput()) {
|
||||
const outputText = extractToolResultText(sanitizedResult);
|
||||
const outputText = extractToolResultText(sanitizedResult);
|
||||
const shouldEmitOutput =
|
||||
ctx.shouldEmitToolOutput() || shouldEmitCompactToolOutput({ toolName, result, outputText });
|
||||
if (shouldEmitOutput) {
|
||||
if (outputText) {
|
||||
if (ctx.params.toolResultFormat === "plain") {
|
||||
emittedToolOutputMediaUrls = collectEmittedToolOutputMediaUrls(
|
||||
|
||||
@@ -310,6 +310,92 @@ describe("fetchWithSsrFGuard hardening", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("ignores dispatcher support markers on ambient global fetch", async () => {
|
||||
const runtimeFetch = vi.fn(async () => okResponse());
|
||||
const originalGlobalFetch = globalThis.fetch;
|
||||
let globalFetchCalls = 0;
|
||||
const flaggedGlobalFetch = Object.assign(
|
||||
async () => {
|
||||
globalFetchCalls += 1;
|
||||
throw new Error("ambient global fetch should not be used when a dispatcher is attached");
|
||||
},
|
||||
{ __openclawAcceptsDispatcher: true as const },
|
||||
);
|
||||
|
||||
class MockAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
class MockEnvHttpProxyAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
class MockProxyAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
|
||||
(globalThis as Record<string, unknown>).fetch = flaggedGlobalFetch as typeof fetch;
|
||||
(globalThis as Record<string, unknown>)[TEST_UNDICI_RUNTIME_DEPS_KEY] = {
|
||||
Agent: MockAgent,
|
||||
EnvHttpProxyAgent: MockEnvHttpProxyAgent,
|
||||
ProxyAgent: MockProxyAgent,
|
||||
fetch: runtimeFetch,
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await fetchWithSsrFGuard({
|
||||
url: "https://public.example/resource",
|
||||
lookupFn: createPublicLookup(),
|
||||
});
|
||||
|
||||
expect(runtimeFetch).toHaveBeenCalledTimes(1);
|
||||
expect(globalFetchCalls).toBe(0);
|
||||
await result.release();
|
||||
} finally {
|
||||
(globalThis as Record<string, unknown>).fetch = originalGlobalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
it("treats explicit fetchImpl equal to ambient global fetch as non-dispatcher-capable", async () => {
|
||||
const runtimeFetch = vi.fn(async () => okResponse());
|
||||
const originalGlobalFetch = globalThis.fetch;
|
||||
let globalFetchCalls = 0;
|
||||
const globalFetch = async () => {
|
||||
globalFetchCalls += 1;
|
||||
throw new Error("ambient global fetch should not be used when a dispatcher is attached");
|
||||
};
|
||||
|
||||
class MockAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
class MockEnvHttpProxyAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
class MockProxyAgent {
|
||||
constructor(readonly options: unknown) {}
|
||||
}
|
||||
|
||||
(globalThis as Record<string, unknown>).fetch = globalFetch as typeof fetch;
|
||||
(globalThis as Record<string, unknown>)[TEST_UNDICI_RUNTIME_DEPS_KEY] = {
|
||||
Agent: MockAgent,
|
||||
EnvHttpProxyAgent: MockEnvHttpProxyAgent,
|
||||
ProxyAgent: MockProxyAgent,
|
||||
fetch: runtimeFetch,
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await fetchWithSsrFGuard({
|
||||
url: "https://public.example/resource",
|
||||
fetchImpl: globalThis.fetch,
|
||||
lookupFn: createPublicLookup(),
|
||||
});
|
||||
|
||||
expect(runtimeFetch).toHaveBeenCalledTimes(1);
|
||||
expect(globalFetchCalls).toBe(0);
|
||||
await result.release();
|
||||
} finally {
|
||||
(globalThis as Record<string, unknown>).fetch = originalGlobalFetch;
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps explicit proxy transport policy when DNS pinning is disabled", async () => {
|
||||
const lookupFn = createPublicLookup();
|
||||
(globalThis as Record<string, unknown>)[TEST_UNDICI_RUNTIME_DEPS_KEY] = {
|
||||
|
||||
@@ -16,9 +16,6 @@ import { loadUndiciRuntimeDeps } from "./undici-runtime.js";
|
||||
|
||||
type FetchLike = (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>;
|
||||
type DispatcherAwareRequestInit = RequestInit & { dispatcher?: Dispatcher };
|
||||
type DispatcherCompatibleFetch = FetchLike & {
|
||||
__openclawAcceptsDispatcher?: boolean;
|
||||
};
|
||||
|
||||
export const GUARDED_FETCH_MODE = {
|
||||
STRICT: "strict",
|
||||
@@ -165,6 +162,17 @@ function isMockedFetch(fetchImpl: FetchLike | undefined): boolean {
|
||||
return typeof (fetchImpl as FetchLike & { mock?: unknown }).mock === "object";
|
||||
}
|
||||
|
||||
function isAmbientGlobalFetch(params: {
|
||||
fetchImpl: FetchLike | undefined;
|
||||
globalFetch: FetchLike | undefined;
|
||||
}): boolean {
|
||||
return (
|
||||
typeof params.fetchImpl === "function" &&
|
||||
typeof params.globalFetch === "function" &&
|
||||
params.fetchImpl === params.globalFetch
|
||||
);
|
||||
}
|
||||
|
||||
export function retainSafeHeadersForCrossOriginRedirectHeaders(
|
||||
headers?: HeadersInit,
|
||||
): Record<string, string> | undefined {
|
||||
@@ -302,11 +310,13 @@ export async function fetchWithSsrFGuard(params: GuardedFetchOptions): Promise<G
|
||||
};
|
||||
|
||||
const supportsDispatcherInit =
|
||||
params.fetchImpl !== undefined ||
|
||||
isMockedFetch(defaultFetch) ||
|
||||
(defaultFetch as DispatcherCompatibleFetch).__openclawAcceptsDispatcher === true;
|
||||
// Explicit caller stubs, test-installed global fetch mocks, and
|
||||
// dispatcher-aware wrappers should win.
|
||||
(params.fetchImpl !== undefined &&
|
||||
!isAmbientGlobalFetch({
|
||||
fetchImpl: params.fetchImpl,
|
||||
globalFetch: globalThis.fetch,
|
||||
})) ||
|
||||
isMockedFetch(defaultFetch);
|
||||
// Explicit caller stubs and test-installed fetch mocks should win.
|
||||
// Otherwise, fall back to undici's fetch whenever we attach a dispatcher,
|
||||
// because the default global fetch path will not honor per-request
|
||||
// dispatchers.
|
||||
|
||||
89
src/video-generation/live-test-helpers.ts
Normal file
89
src/video-generation/live-test-helpers.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import type { AuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
export const DEFAULT_LIVE_VIDEO_MODELS: Record<string, string> = {
|
||||
byteplus: "byteplus/seedance-1-0-lite-t2v-250428",
|
||||
fal: "fal/fal-ai/minimax/video-01-live",
|
||||
google: "google/veo-3.1-fast-generate-preview",
|
||||
minimax: "minimax/MiniMax-Hailuo-2.3",
|
||||
openai: "openai/sora-2",
|
||||
qwen: "qwen/wan2.6-t2v",
|
||||
together: "together/Wan-AI/Wan2.2-T2V-A14B",
|
||||
};
|
||||
|
||||
export function redactLiveApiKey(value: string | undefined): string {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return "none";
|
||||
}
|
||||
if (trimmed.length <= 12) {
|
||||
return trimmed;
|
||||
}
|
||||
return `${trimmed.slice(0, 8)}...${trimmed.slice(-4)}`;
|
||||
}
|
||||
|
||||
export function parseCsvFilter(raw?: string): Set<string> | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed || trimmed === "all") {
|
||||
return null;
|
||||
}
|
||||
const values = trimmed
|
||||
.split(",")
|
||||
.map((entry) => entry.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
return values.length > 0 ? new Set(values) : null;
|
||||
}
|
||||
|
||||
export function parseProviderModelMap(raw?: string): Map<string, string> {
|
||||
const entries = new Map<string, string>();
|
||||
for (const token of raw?.split(",") ?? []) {
|
||||
const trimmed = token.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0 || slash === trimmed.length - 1) {
|
||||
continue;
|
||||
}
|
||||
entries.set(trimmed.slice(0, slash).trim().toLowerCase(), trimmed);
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
export function resolveConfiguredLiveVideoModels(cfg: OpenClawConfig): Map<string, string> {
|
||||
const resolved = new Map<string, string>();
|
||||
const configured = cfg.agents?.defaults?.videoGenerationModel;
|
||||
const add = (value: string | undefined) => {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0 || slash === trimmed.length - 1) {
|
||||
return;
|
||||
}
|
||||
resolved.set(trimmed.slice(0, slash).trim().toLowerCase(), trimmed);
|
||||
};
|
||||
if (typeof configured === "string") {
|
||||
add(configured);
|
||||
return resolved;
|
||||
}
|
||||
add(configured?.primary);
|
||||
for (const fallback of configured?.fallbacks ?? []) {
|
||||
add(fallback);
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
export function resolveLiveVideoAuthStore(params: {
|
||||
requireProfileKeys: boolean;
|
||||
hasLiveKeys: boolean;
|
||||
}): AuthProfileStore | undefined {
|
||||
if (params.requireProfileKeys || !params.hasLiveKeys) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
version: 1,
|
||||
profiles: {},
|
||||
};
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import type {
|
||||
MediaUnderstandingProviderPlugin,
|
||||
ProviderPlugin,
|
||||
SpeechProviderPlugin,
|
||||
VideoGenerationProviderPlugin,
|
||||
} from "../../../src/plugins/types.js";
|
||||
import { createTestPluginApi } from "./plugin-api.js";
|
||||
|
||||
@@ -11,6 +12,7 @@ type RegisteredProviderCollections = {
|
||||
speechProviders: SpeechProviderPlugin[];
|
||||
mediaProviders: MediaUnderstandingProviderPlugin[];
|
||||
imageProviders: ImageGenerationProviderPlugin[];
|
||||
videoProviders: VideoGenerationProviderPlugin[];
|
||||
};
|
||||
|
||||
type ProviderPluginModule = {
|
||||
@@ -26,6 +28,7 @@ export async function registerProviderPlugin(params: {
|
||||
const speechProviders: SpeechProviderPlugin[] = [];
|
||||
const mediaProviders: MediaUnderstandingProviderPlugin[] = [];
|
||||
const imageProviders: ImageGenerationProviderPlugin[] = [];
|
||||
const videoProviders: VideoGenerationProviderPlugin[] = [];
|
||||
|
||||
await params.plugin.register(
|
||||
createTestPluginApi({
|
||||
@@ -46,10 +49,13 @@ export async function registerProviderPlugin(params: {
|
||||
registerImageGenerationProvider: (provider) => {
|
||||
imageProviders.push(provider);
|
||||
},
|
||||
registerVideoGenerationProvider: (provider) => {
|
||||
videoProviders.push(provider);
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
return { providers, speechProviders, mediaProviders, imageProviders };
|
||||
return { providers, speechProviders, mediaProviders, imageProviders, videoProviders };
|
||||
}
|
||||
|
||||
export function requireRegisteredProvider<T extends { id: string }>(
|
||||
|
||||
Reference in New Issue
Block a user