Files
openclaw/extensions/google/video-generation-provider.ts
2026-04-06 23:50:27 +01:00

296 lines
9.7 KiB
TypeScript

import { mkdtemp, readFile, rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { GoogleGenAI } from "@google/genai";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import type {
GeneratedVideoAsset,
VideoGenerationProvider,
VideoGenerationRequest,
} from "openclaw/plugin-sdk/video-generation";
import { normalizeGoogleApiBaseUrl } from "./api.js";
const DEFAULT_GOOGLE_VIDEO_MODEL = "veo-3.1-fast-generate-preview";
const DEFAULT_TIMEOUT_MS = 180_000;
const POLL_INTERVAL_MS = 10_000;
const MAX_POLL_ATTEMPTS = 90;
const GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS = [4, 6, 8] as const;
const GOOGLE_VIDEO_MIN_DURATION_SECONDS = GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS[0];
const GOOGLE_VIDEO_MAX_DURATION_SECONDS =
GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS[GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS.length - 1];
function resolveConfiguredGoogleVideoBaseUrl(req: VideoGenerationRequest): string | undefined {
const configured = req.cfg?.models?.providers?.google?.baseUrl?.trim();
return configured ? normalizeGoogleApiBaseUrl(configured) : undefined;
}
function parseVideoSize(size: string | undefined): { width: number; height: number } | undefined {
const trimmed = size?.trim();
if (!trimmed) {
return undefined;
}
const match = /^(\d+)x(\d+)$/u.exec(trimmed);
if (!match) {
return undefined;
}
const width = Number.parseInt(match[1] ?? "", 10);
const height = Number.parseInt(match[2] ?? "", 10);
if (!Number.isFinite(width) || !Number.isFinite(height)) {
return undefined;
}
return { width, height };
}
function resolveAspectRatio(params: {
aspectRatio?: string;
size?: string;
}): "16:9" | "9:16" | undefined {
const direct = params.aspectRatio?.trim();
if (direct === "16:9" || direct === "9:16") {
return direct;
}
const parsedSize = parseVideoSize(params.size);
if (!parsedSize) {
return undefined;
}
return parsedSize.width >= parsedSize.height ? "16:9" : "9:16";
}
function resolveResolution(params: {
resolution?: string;
size?: string;
}): "720p" | "1080p" | undefined {
if (params.resolution === "720P") {
return "720p";
}
if (params.resolution === "1080P") {
return "1080p";
}
const parsedSize = parseVideoSize(params.size);
if (!parsedSize) {
return undefined;
}
const maxEdge = Math.max(parsedSize.width, parsedSize.height);
return maxEdge >= 1920 ? "1080p" : maxEdge >= 1280 ? "720p" : undefined;
}
function resolveDurationSeconds(durationSeconds: number | undefined): number | undefined {
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
return undefined;
}
const rounded = Math.min(
GOOGLE_VIDEO_MAX_DURATION_SECONDS,
Math.max(GOOGLE_VIDEO_MIN_DURATION_SECONDS, Math.round(durationSeconds)),
);
return GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS.reduce((best, current) => {
const currentDistance = Math.abs(current - rounded);
const bestDistance = Math.abs(best - rounded);
if (currentDistance < bestDistance) {
return current;
}
if (currentDistance === bestDistance && current > best) {
return current;
}
return best;
});
}
function resolveInputImage(req: VideoGenerationRequest) {
const input = req.inputImages?.[0];
if (!input?.buffer) {
return undefined;
}
return {
imageBytes: input.buffer.toString("base64"),
mimeType: input.mimeType?.trim() || "image/png",
};
}
function resolveInputVideo(req: VideoGenerationRequest) {
const input = req.inputVideos?.[0];
if (!input?.buffer) {
return undefined;
}
return {
videoBytes: input.buffer.toString("base64"),
mimeType: input.mimeType?.trim() || "video/mp4",
};
}
async function downloadGeneratedVideo(params: {
client: GoogleGenAI;
file: unknown;
index: number;
}): Promise<GeneratedVideoAsset> {
const tempDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-google-video-"));
const downloadPath = path.join(tempDir, `video-${params.index + 1}.mp4`);
try {
await params.client.files.download({
file: params.file as never,
downloadPath,
});
const buffer = await readFile(downloadPath);
return {
buffer,
mimeType: "video/mp4",
fileName: `video-${params.index + 1}.mp4`,
};
} finally {
await rm(tempDir, { recursive: true, force: true });
}
}
export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
return {
id: "google",
label: "Google",
defaultModel: DEFAULT_GOOGLE_VIDEO_MODEL,
models: [
DEFAULT_GOOGLE_VIDEO_MODEL,
"veo-3.1-generate-preview",
"veo-3.1-lite-generate-preview",
"veo-3.0-fast-generate-001",
"veo-3.0-generate-001",
"veo-2.0-generate-001",
],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "google",
agentDir,
}),
capabilities: {
generate: {
maxVideos: 1,
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
aspectRatios: ["16:9", "9:16"],
resolutions: ["720P", "1080P"],
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
},
imageToVideo: {
enabled: true,
maxVideos: 1,
maxInputImages: 1,
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
aspectRatios: ["16:9", "9:16"],
resolutions: ["720P", "1080P"],
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
},
videoToVideo: {
enabled: true,
maxVideos: 1,
maxInputVideos: 1,
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
aspectRatios: ["16:9", "9:16"],
resolutions: ["720P", "1080P"],
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
},
},
async generateVideo(req) {
if ((req.inputImages?.length ?? 0) > 1) {
throw new Error("Google video generation supports at most one input image.");
}
if ((req.inputVideos?.length ?? 0) > 1) {
throw new Error("Google video generation supports at most one input video.");
}
if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
throw new Error(
"Google video generation does not support image and video inputs together.",
);
}
const auth = await resolveApiKeyForProvider({
provider: "google",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("Google API key missing");
}
const configuredBaseUrl = resolveConfiguredGoogleVideoBaseUrl(req);
const durationSeconds = resolveDurationSeconds(req.durationSeconds);
const client = new GoogleGenAI({
apiKey: auth.apiKey,
httpOptions: {
...(configuredBaseUrl ? { baseUrl: configuredBaseUrl } : {}),
timeout: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
},
});
let operation = await client.models.generateVideos({
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
prompt: req.prompt,
image: resolveInputImage(req),
video: resolveInputVideo(req),
config: {
numberOfVideos: 1,
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
...(resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size })
? { aspectRatio: resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) }
: {}),
...(resolveResolution({ resolution: req.resolution, size: req.size })
? { resolution: resolveResolution({ resolution: req.resolution, size: req.size }) }
: {}),
...(req.audio === true ? { generateAudio: true } : {}),
},
});
for (let attempt = 0; !(operation.done ?? false); attempt += 1) {
if (attempt >= MAX_POLL_ATTEMPTS) {
throw new Error("Google video generation did not finish in time");
}
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
operation = await client.operations.getVideosOperation({ operation });
}
if (operation.error) {
throw new Error(JSON.stringify(operation.error));
}
const generatedVideos = operation.response?.generatedVideos ?? [];
if (generatedVideos.length === 0) {
throw new Error("Google video generation response missing generated videos");
}
const videos = await Promise.all(
generatedVideos.map(async (entry, index) => {
const inline = entry.video;
if (inline?.videoBytes) {
return {
buffer: Buffer.from(inline.videoBytes, "base64"),
mimeType: inline.mimeType?.trim() || "video/mp4",
fileName: `video-${index + 1}.mp4`,
};
}
if (!inline) {
throw new Error("Google generated video missing file handle");
}
return await downloadGeneratedVideo({
client,
file: inline,
index,
});
}),
);
return {
videos,
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
metadata: operation.name
? {
operationName: operation.name,
}
: undefined,
};
},
};
}