mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-16 19:51:11 +00:00
296 lines
9.7 KiB
TypeScript
296 lines
9.7 KiB
TypeScript
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { GoogleGenAI } from "@google/genai";
|
|
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
|
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
|
import type {
|
|
GeneratedVideoAsset,
|
|
VideoGenerationProvider,
|
|
VideoGenerationRequest,
|
|
} from "openclaw/plugin-sdk/video-generation";
|
|
import { normalizeGoogleApiBaseUrl } from "./api.js";
|
|
|
|
const DEFAULT_GOOGLE_VIDEO_MODEL = "veo-3.1-fast-generate-preview";
|
|
const DEFAULT_TIMEOUT_MS = 180_000;
|
|
const POLL_INTERVAL_MS = 10_000;
|
|
const MAX_POLL_ATTEMPTS = 90;
|
|
const GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS = [4, 6, 8] as const;
|
|
const GOOGLE_VIDEO_MIN_DURATION_SECONDS = GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS[0];
|
|
const GOOGLE_VIDEO_MAX_DURATION_SECONDS =
|
|
GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS[GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS.length - 1];
|
|
|
|
function resolveConfiguredGoogleVideoBaseUrl(req: VideoGenerationRequest): string | undefined {
|
|
const configured = req.cfg?.models?.providers?.google?.baseUrl?.trim();
|
|
return configured ? normalizeGoogleApiBaseUrl(configured) : undefined;
|
|
}
|
|
|
|
function parseVideoSize(size: string | undefined): { width: number; height: number } | undefined {
|
|
const trimmed = size?.trim();
|
|
if (!trimmed) {
|
|
return undefined;
|
|
}
|
|
const match = /^(\d+)x(\d+)$/u.exec(trimmed);
|
|
if (!match) {
|
|
return undefined;
|
|
}
|
|
const width = Number.parseInt(match[1] ?? "", 10);
|
|
const height = Number.parseInt(match[2] ?? "", 10);
|
|
if (!Number.isFinite(width) || !Number.isFinite(height)) {
|
|
return undefined;
|
|
}
|
|
return { width, height };
|
|
}
|
|
|
|
function resolveAspectRatio(params: {
|
|
aspectRatio?: string;
|
|
size?: string;
|
|
}): "16:9" | "9:16" | undefined {
|
|
const direct = params.aspectRatio?.trim();
|
|
if (direct === "16:9" || direct === "9:16") {
|
|
return direct;
|
|
}
|
|
const parsedSize = parseVideoSize(params.size);
|
|
if (!parsedSize) {
|
|
return undefined;
|
|
}
|
|
return parsedSize.width >= parsedSize.height ? "16:9" : "9:16";
|
|
}
|
|
|
|
function resolveResolution(params: {
|
|
resolution?: string;
|
|
size?: string;
|
|
}): "720p" | "1080p" | undefined {
|
|
if (params.resolution === "720P") {
|
|
return "720p";
|
|
}
|
|
if (params.resolution === "1080P") {
|
|
return "1080p";
|
|
}
|
|
const parsedSize = parseVideoSize(params.size);
|
|
if (!parsedSize) {
|
|
return undefined;
|
|
}
|
|
const maxEdge = Math.max(parsedSize.width, parsedSize.height);
|
|
return maxEdge >= 1920 ? "1080p" : maxEdge >= 1280 ? "720p" : undefined;
|
|
}
|
|
|
|
function resolveDurationSeconds(durationSeconds: number | undefined): number | undefined {
|
|
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
|
|
return undefined;
|
|
}
|
|
const rounded = Math.min(
|
|
GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
|
Math.max(GOOGLE_VIDEO_MIN_DURATION_SECONDS, Math.round(durationSeconds)),
|
|
);
|
|
return GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS.reduce((best, current) => {
|
|
const currentDistance = Math.abs(current - rounded);
|
|
const bestDistance = Math.abs(best - rounded);
|
|
if (currentDistance < bestDistance) {
|
|
return current;
|
|
}
|
|
if (currentDistance === bestDistance && current > best) {
|
|
return current;
|
|
}
|
|
return best;
|
|
});
|
|
}
|
|
|
|
function resolveInputImage(req: VideoGenerationRequest) {
|
|
const input = req.inputImages?.[0];
|
|
if (!input?.buffer) {
|
|
return undefined;
|
|
}
|
|
return {
|
|
imageBytes: input.buffer.toString("base64"),
|
|
mimeType: input.mimeType?.trim() || "image/png",
|
|
};
|
|
}
|
|
|
|
function resolveInputVideo(req: VideoGenerationRequest) {
|
|
const input = req.inputVideos?.[0];
|
|
if (!input?.buffer) {
|
|
return undefined;
|
|
}
|
|
return {
|
|
videoBytes: input.buffer.toString("base64"),
|
|
mimeType: input.mimeType?.trim() || "video/mp4",
|
|
};
|
|
}
|
|
|
|
async function downloadGeneratedVideo(params: {
|
|
client: GoogleGenAI;
|
|
file: unknown;
|
|
index: number;
|
|
}): Promise<GeneratedVideoAsset> {
|
|
const tempDir = await mkdtemp(path.join(os.tmpdir(), "openclaw-google-video-"));
|
|
const downloadPath = path.join(tempDir, `video-${params.index + 1}.mp4`);
|
|
try {
|
|
await params.client.files.download({
|
|
file: params.file as never,
|
|
downloadPath,
|
|
});
|
|
const buffer = await readFile(downloadPath);
|
|
return {
|
|
buffer,
|
|
mimeType: "video/mp4",
|
|
fileName: `video-${params.index + 1}.mp4`,
|
|
};
|
|
} finally {
|
|
await rm(tempDir, { recursive: true, force: true });
|
|
}
|
|
}
|
|
|
|
export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
|
return {
|
|
id: "google",
|
|
label: "Google",
|
|
defaultModel: DEFAULT_GOOGLE_VIDEO_MODEL,
|
|
models: [
|
|
DEFAULT_GOOGLE_VIDEO_MODEL,
|
|
"veo-3.1-generate-preview",
|
|
"veo-3.1-lite-generate-preview",
|
|
"veo-3.0-fast-generate-001",
|
|
"veo-3.0-generate-001",
|
|
"veo-2.0-generate-001",
|
|
],
|
|
isConfigured: ({ agentDir }) =>
|
|
isProviderApiKeyConfigured({
|
|
provider: "google",
|
|
agentDir,
|
|
}),
|
|
capabilities: {
|
|
generate: {
|
|
maxVideos: 1,
|
|
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
|
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
|
|
aspectRatios: ["16:9", "9:16"],
|
|
resolutions: ["720P", "1080P"],
|
|
supportsAspectRatio: true,
|
|
supportsResolution: true,
|
|
supportsSize: true,
|
|
supportsAudio: true,
|
|
},
|
|
imageToVideo: {
|
|
enabled: true,
|
|
maxVideos: 1,
|
|
maxInputImages: 1,
|
|
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
|
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
|
|
aspectRatios: ["16:9", "9:16"],
|
|
resolutions: ["720P", "1080P"],
|
|
supportsAspectRatio: true,
|
|
supportsResolution: true,
|
|
supportsSize: true,
|
|
supportsAudio: true,
|
|
},
|
|
videoToVideo: {
|
|
enabled: true,
|
|
maxVideos: 1,
|
|
maxInputVideos: 1,
|
|
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
|
|
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
|
|
aspectRatios: ["16:9", "9:16"],
|
|
resolutions: ["720P", "1080P"],
|
|
supportsAspectRatio: true,
|
|
supportsResolution: true,
|
|
supportsSize: true,
|
|
supportsAudio: true,
|
|
},
|
|
},
|
|
async generateVideo(req) {
|
|
if ((req.inputImages?.length ?? 0) > 1) {
|
|
throw new Error("Google video generation supports at most one input image.");
|
|
}
|
|
if ((req.inputVideos?.length ?? 0) > 1) {
|
|
throw new Error("Google video generation supports at most one input video.");
|
|
}
|
|
if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
|
|
throw new Error(
|
|
"Google video generation does not support image and video inputs together.",
|
|
);
|
|
}
|
|
const auth = await resolveApiKeyForProvider({
|
|
provider: "google",
|
|
cfg: req.cfg,
|
|
agentDir: req.agentDir,
|
|
store: req.authStore,
|
|
});
|
|
if (!auth.apiKey) {
|
|
throw new Error("Google API key missing");
|
|
}
|
|
|
|
const configuredBaseUrl = resolveConfiguredGoogleVideoBaseUrl(req);
|
|
const durationSeconds = resolveDurationSeconds(req.durationSeconds);
|
|
const client = new GoogleGenAI({
|
|
apiKey: auth.apiKey,
|
|
httpOptions: {
|
|
...(configuredBaseUrl ? { baseUrl: configuredBaseUrl } : {}),
|
|
timeout: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
},
|
|
});
|
|
let operation = await client.models.generateVideos({
|
|
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
|
|
prompt: req.prompt,
|
|
image: resolveInputImage(req),
|
|
video: resolveInputVideo(req),
|
|
config: {
|
|
numberOfVideos: 1,
|
|
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
|
|
...(resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size })
|
|
? { aspectRatio: resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) }
|
|
: {}),
|
|
...(resolveResolution({ resolution: req.resolution, size: req.size })
|
|
? { resolution: resolveResolution({ resolution: req.resolution, size: req.size }) }
|
|
: {}),
|
|
...(req.audio === true ? { generateAudio: true } : {}),
|
|
},
|
|
});
|
|
|
|
for (let attempt = 0; !(operation.done ?? false); attempt += 1) {
|
|
if (attempt >= MAX_POLL_ATTEMPTS) {
|
|
throw new Error("Google video generation did not finish in time");
|
|
}
|
|
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
operation = await client.operations.getVideosOperation({ operation });
|
|
}
|
|
if (operation.error) {
|
|
throw new Error(JSON.stringify(operation.error));
|
|
}
|
|
const generatedVideos = operation.response?.generatedVideos ?? [];
|
|
if (generatedVideos.length === 0) {
|
|
throw new Error("Google video generation response missing generated videos");
|
|
}
|
|
const videos = await Promise.all(
|
|
generatedVideos.map(async (entry, index) => {
|
|
const inline = entry.video;
|
|
if (inline?.videoBytes) {
|
|
return {
|
|
buffer: Buffer.from(inline.videoBytes, "base64"),
|
|
mimeType: inline.mimeType?.trim() || "video/mp4",
|
|
fileName: `video-${index + 1}.mp4`,
|
|
};
|
|
}
|
|
if (!inline) {
|
|
throw new Error("Google generated video missing file handle");
|
|
}
|
|
return await downloadGeneratedVideo({
|
|
client,
|
|
file: inline,
|
|
index,
|
|
});
|
|
}),
|
|
);
|
|
return {
|
|
videos,
|
|
model: req.model?.trim() || DEFAULT_GOOGLE_VIDEO_MODEL,
|
|
metadata: operation.name
|
|
? {
|
|
operationName: operation.name,
|
|
}
|
|
: undefined,
|
|
};
|
|
},
|
|
};
|
|
}
|