import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { assertOkOrThrowHttpError, postJsonRequest, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { DEFAULT_GOOGLE_API_BASE_URL, normalizeGoogleApiBaseUrl, normalizeGoogleModelId, parseGeminiAuth, } from "./api.js"; const DEFAULT_GOOGLE_IMAGE_MODEL = "gemini-3.1-flash-image-preview"; const DEFAULT_OUTPUT_MIME = "image/png"; const GOOGLE_SUPPORTED_SIZES = [ "1024x1024", "1024x1536", "1536x1024", "1024x1792", "1792x1024", ] as const; const GOOGLE_SUPPORTED_ASPECT_RATIOS = [ "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9", ] as const; type GoogleInlineDataPart = { mimeType?: string; mime_type?: string; data?: string; }; type GoogleGenerateImageResponse = { candidates?: Array<{ content?: { parts?: Array<{ text?: string; inlineData?: GoogleInlineDataPart; inline_data?: GoogleInlineDataPart; }>; }; }>; }; function resolveGoogleBaseUrl(cfg: Parameters[0]["cfg"]): string { return normalizeGoogleApiBaseUrl(cfg?.models?.providers?.google?.baseUrl); } function normalizeGoogleImageModel(model: string | undefined): string { const trimmed = model?.trim(); return normalizeGoogleModelId(trimmed || DEFAULT_GOOGLE_IMAGE_MODEL); } function mapSizeToImageConfig( size: string | undefined, ): { aspectRatio?: string; imageSize?: "2K" | "4K" } | undefined { const trimmed = size?.trim(); if (!trimmed) { return undefined; } const normalized = trimmed.toLowerCase(); const mapping = new Map([ ["1024x1024", "1:1"], ["1024x1536", "2:3"], ["1536x1024", "3:2"], ["1024x1792", "9:16"], ["1792x1024", "16:9"], ]); const aspectRatio = mapping.get(normalized); const [widthRaw, heightRaw] = normalized.split("x"); const width = Number.parseInt(widthRaw ?? "", 10); const height = Number.parseInt(heightRaw ?? "", 10); const longestEdge = Math.max(width, height); const imageSize = longestEdge >= 3072 ? "4K" : longestEdge >= 1536 ? "2K" : undefined; if (!aspectRatio && !imageSize) { return undefined; } return { ...(aspectRatio ? { aspectRatio } : {}), ...(imageSize ? { imageSize } : {}), }; } export function buildGoogleImageGenerationProvider(): ImageGenerationProvider { return { id: "google", label: "Google", defaultModel: DEFAULT_GOOGLE_IMAGE_MODEL, models: [DEFAULT_GOOGLE_IMAGE_MODEL, "gemini-3-pro-image-preview"], capabilities: { generate: { maxCount: 4, supportsSize: true, supportsAspectRatio: true, supportsResolution: true, }, edit: { enabled: true, maxCount: 4, maxInputImages: 5, supportsSize: true, supportsAspectRatio: true, supportsResolution: true, }, geometry: { sizes: [...GOOGLE_SUPPORTED_SIZES], aspectRatios: [...GOOGLE_SUPPORTED_ASPECT_RATIOS], resolutions: ["1K", "2K", "4K"], }, }, async generateImage(req) { const auth = await resolveApiKeyForProvider({ provider: "google", cfg: req.cfg, agentDir: req.agentDir, store: req.authStore, }); if (!auth.apiKey) { throw new Error("Google API key missing"); } const model = normalizeGoogleImageModel(req.model); const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = resolveProviderHttpRequestConfig({ baseUrl: resolveGoogleBaseUrl(req.cfg), defaultBaseUrl: DEFAULT_GOOGLE_API_BASE_URL, allowPrivateNetwork: Boolean(req.cfg?.models?.providers?.google?.baseUrl?.trim()), defaultHeaders: parseGeminiAuth(auth.apiKey).headers, provider: "google", api: "google-generative-ai", capability: "image", transport: "http", }); const imageConfig = mapSizeToImageConfig(req.size); const inputParts = (req.inputImages ?? []).map((image) => ({ inlineData: { mimeType: image.mimeType, data: image.buffer.toString("base64"), }, })); const resolvedImageConfig = { ...imageConfig, ...(req.aspectRatio?.trim() ? { aspectRatio: req.aspectRatio.trim() } : {}), ...(req.resolution ? { imageSize: req.resolution } : {}), }; const { response: res, release } = await postJsonRequest({ url: `${baseUrl}/models/${model}:generateContent`, headers, body: { contents: [ { role: "user", parts: [...inputParts, { text: req.prompt }], }, ], generationConfig: { responseModalities: ["TEXT", "IMAGE"], ...(Object.keys(resolvedImageConfig).length > 0 ? { imageConfig: resolvedImageConfig } : {}), }, }, timeoutMs: 60_000, fetchFn: fetch, allowPrivateNetwork, dispatcherPolicy, }); try { await assertOkOrThrowHttpError(res, "Google image generation failed"); const payload = (await res.json()) as GoogleGenerateImageResponse; let imageIndex = 0; const images = (payload.candidates ?? []) .flatMap((candidate) => candidate.content?.parts ?? []) .map((part) => { const inline = part.inlineData ?? part.inline_data; const data = inline?.data?.trim(); if (!data) { return null; } const mimeType = inline?.mimeType ?? inline?.mime_type ?? DEFAULT_OUTPUT_MIME; const extension = mimeType.includes("jpeg") ? "jpg" : (mimeType.split("/")[1] ?? "png"); imageIndex += 1; return { buffer: Buffer.from(data, "base64"), mimeType, fileName: `image-${imageIndex}.${extension}`, }; }) .filter((entry): entry is NonNullable => entry !== null); if (images.length === 0) { throw new Error("Google image generation response missing image data"); } return { images, model, }; } finally { await release(); } }, }; }