From 94686c63fb692008f642bc5c72732a8f817e6401 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 11:09:46 +0100 Subject: [PATCH] fix(google): fall back to rest for veo sdk 404 --- CHANGELOG.md | 1 + .../google/video-generation-provider.test.ts | 86 +++++++ .../google/video-generation-provider.ts | 228 +++++++++++++++--- 3 files changed, 288 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bedc9e8a02b..ceb185648e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - OpenAI image generation: use `gpt-5.5` for the Codex OAuth responses transport instead of the retired `gpt-5.4` model, fixing 500s from ChatGPT Codex image generation. Fixes #71513. Thanks @baolongl. - Google video generation: download direct MLDev Veo `video.uri` results instead of passing them through the Files API path, fixing 404s after successful generation/polling. Fixes #71200. Thanks @panhaishan. +- Google video generation: fall back to the REST `predictLongRunning` Veo endpoint for text-only SDK 404s while keeping reference image/video generation on the SDK path. Fixes #62309 and #63008. (#62343) Thanks @leoleedev. - MiniMax music generation: switch the bundled default model from the unsupported `music-2.5+` id to the current `music-2.6` API model. Fixes #64870 and addresses the music default from #62315. Thanks @noahclanman and @edwardzheng1. - Google media generation: strip a configured trailing `/v1beta` from Google music/video provider base URLs before calling the Google GenAI SDK, preventing doubled `/v1beta/v1beta` paths. Fixes #63240. (#63258) Thanks @Hybirdss. - Discord: restore direct-message voice-note preflight transcription and classify URL-only Ogg/Opus voice attachments as audio while skipping partial attachments without usable URLs. Fixes #61314 and #64803. diff --git a/extensions/google/video-generation-provider.test.ts b/extensions/google/video-generation-provider.test.ts index 75ee8cdeaf1..f7c6aa8a009 100644 --- a/extensions/google/video-generation-provider.test.ts +++ b/extensions/google/video-generation-provider.test.ts @@ -188,6 +188,92 @@ describe("google video generation provider", () => { expect(result.videos[0]?.mimeType).toBe("video/mp4"); }); + it("falls back to REST predictLongRunning when text-only SDK video generation returns 404", async () => { + vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "google-key", + source: "env", + mode: "api-key", + }); + generateVideosMock.mockRejectedValue(Object.assign(new Error("sdk 404"), { status: 404 })); + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + new Response( + JSON.stringify({ + done: true, + name: "operations/rest-123", + response: { + generateVideoResponse: { + generatedSamples: [ + { + video: { + uri: "https://generativelanguage.googleapis.com/v1beta/files/rest-video:download?alt=media", + mimeType: "video/mp4", + }, + }, + ], + }, + }, + }), + ), + ) + .mockResolvedValueOnce( + new Response("rest-video", { + status: 200, + statusText: "OK", + headers: { "content-type": "video/mp4" }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const provider = buildGoogleVideoGenerationProvider(); + const result = await provider.generateVideo({ + provider: "google", + model: "google/models/veo-3.1-fast-generate-preview", + prompt: "A tiny robot watering a windowsill garden", + cfg: {}, + durationSeconds: 3, + }); + + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(String(fetchMock.mock.calls[0]?.[0])).toBe( + "https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-fast-generate-preview:predictLongRunning", + ); + expect(JSON.parse(String(fetchMock.mock.calls[0]?.[1]?.body))).toEqual({ + instances: [{ prompt: "A tiny robot watering a windowsill garden" }], + parameters: { durationSeconds: 4 }, + }); + expect(String(fetchMock.mock.calls[1]?.[0])).toBe( + "https://generativelanguage.googleapis.com/v1beta/files/rest-video:download?alt=media&key=google-key", + ); + expect(downloadMock).not.toHaveBeenCalled(); + expect(result.videos[0]?.buffer).toEqual(Buffer.from("rest-video")); + }); + + it("does not fall back to REST when SDK video generation with reference inputs returns 404", async () => { + vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "google-key", + source: "env", + mode: "api-key", + }); + generateVideosMock.mockRejectedValue(Object.assign(new Error("sdk 404"), { status: 404 })); + const fetchMock = vi.fn(); + vi.stubGlobal("fetch", fetchMock); + + const provider = buildGoogleVideoGenerationProvider(); + await expect( + provider.generateVideo({ + provider: "google", + model: "veo-3.1-fast-generate-preview", + prompt: "Animate this sketch", + cfg: {}, + inputImages: [{ buffer: Buffer.from("img"), mimeType: "image/png" }], + }), + ).rejects.toThrow("sdk 404"); + + expect(fetchMock).not.toHaveBeenCalled(); + }); + it("does NOT strip /v1beta when it appears mid-path (end-anchor proof)", async () => { vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({ apiKey: "google-key", diff --git a/extensions/google/video-generation-provider.ts b/extensions/google/video-generation-provider.ts index 387a81abcae..602527206c7 100644 --- a/extensions/google/video-generation-provider.ts +++ b/extensions/google/video-generation-provider.ts @@ -13,7 +13,7 @@ import type { VideoGenerationProvider, VideoGenerationRequest, } from "openclaw/plugin-sdk/video-generation"; -import { resolveGoogleGenerativeAiApiOrigin } from "./api.js"; +import { parseGeminiAuth, resolveGoogleGenerativeAiApiOrigin } from "./api.js"; import { createGoogleVideoGenerationProviderMetadata, DEFAULT_GOOGLE_VIDEO_MODEL, @@ -26,12 +26,32 @@ import { createGoogleGenAI, type GoogleGenAIClient } from "./google-genai-runtim const DEFAULT_TIMEOUT_MS = 180_000; const POLL_INTERVAL_MS = 10_000; const MAX_POLL_ATTEMPTS = 90; +const GOOGLE_VIDEO_EMPTY_RESULT_MESSAGE = + "Google video generation response missing generated videos"; function resolveConfiguredGoogleVideoBaseUrl(req: VideoGenerationRequest): string | undefined { const configured = normalizeOptionalString(req.cfg?.models?.providers?.google?.baseUrl); return configured ? resolveGoogleGenerativeAiApiOrigin(configured) : undefined; } +function resolveGoogleVideoRestBaseUrl(configuredBaseUrl?: string): string { + return `${configuredBaseUrl ?? "https://generativelanguage.googleapis.com"}/v1beta`; +} + +function resolveGoogleVideoRestModelPath(model: string): string { + const trimmed = normalizeOptionalString(model) || DEFAULT_GOOGLE_VIDEO_MODEL; + if (trimmed.startsWith("google/models/")) { + return trimmed.slice("google/".length); + } + if (trimmed.startsWith("models/")) { + return trimmed; + } + if (trimmed.startsWith("google/")) { + return `models/${trimmed.slice("google/".length)}`; + } + return `models/${trimmed}`; +} + function parseVideoSize(size: string | undefined): { width: number; height: number } | undefined { const trimmed = normalizeOptionalString(size); if (!trimmed) { @@ -220,6 +240,120 @@ async function downloadGeneratedVideoFromUri(params: { }; } +function extractGoogleApiErrorCode(error: unknown): number | undefined { + const status = (error as { status?: unknown } | undefined)?.status; + if (typeof status === "number") { + return status; + } + const message = error instanceof Error ? error.message : String(error); + try { + const parsed = JSON.parse(message) as { code?: unknown; error?: { code?: unknown } }; + const code = typeof parsed.code === "number" ? parsed.code : parsed.error?.code; + return typeof code === "number" ? code : undefined; + } catch { + return /\b404\b/u.test(message) ? 404 : undefined; + } +} + +function extractGeneratedVideos(operation: unknown): Array<{ video?: unknown }> { + const response = (operation as { response?: Record }).response; + const generatedVideos = response?.generatedVideos; + if (Array.isArray(generatedVideos) && generatedVideos.length > 0) { + return generatedVideos as Array<{ video?: unknown }>; + } + const generatedSamples = (response?.generateVideoResponse as { generatedSamples?: unknown }) + ?.generatedSamples; + return Array.isArray(generatedSamples) ? (generatedSamples as Array<{ video?: unknown }>) : []; +} + +async function requestGoogleVideoJson(params: { + url: string; + method: "GET" | "POST"; + headers: Record; + deadline: ReturnType; + body?: unknown; +}): Promise { + const controller = new AbortController(); + const timeout = setTimeout( + () => controller.abort(), + resolveProviderOperationTimeoutMs({ + deadline: params.deadline, + defaultTimeoutMs: DEFAULT_TIMEOUT_MS, + }), + ); + try { + const response = await fetch(params.url, { + method: params.method, + headers: params.headers, + ...(params.body === undefined ? {} : { body: JSON.stringify(params.body) }), + signal: controller.signal, + }); + const text = await response.text(); + const payload = text ? (JSON.parse(text) as unknown) : {}; + if (!response.ok) { + throw new Error(typeof payload === "string" ? payload : JSON.stringify(payload ?? null)); + } + return payload; + } finally { + clearTimeout(timeout); + } +} + +async function generateGoogleVideoViaRest(params: { + baseUrl: string; + headers: Record; + deadline: ReturnType; + model: string; + prompt: string; + durationSeconds?: number; + aspectRatio?: "16:9" | "9:16"; + resolution?: "720p" | "1080p"; + audio?: boolean; +}): Promise { + let operation = await requestGoogleVideoJson({ + url: `${params.baseUrl}/${resolveGoogleVideoRestModelPath(params.model)}:predictLongRunning`, + method: "POST", + headers: params.headers, + deadline: params.deadline, + body: { + instances: [{ prompt: params.prompt }], + parameters: { + ...(typeof params.durationSeconds === "number" + ? { durationSeconds: params.durationSeconds } + : {}), + ...(params.aspectRatio ? { aspectRatio: params.aspectRatio } : {}), + ...(params.resolution ? { resolution: params.resolution } : {}), + ...(params.audio === true ? { generateAudio: true } : {}), + }, + }, + }); + + for (let attempt = 0; !((operation as { done?: boolean }).done ?? false); attempt += 1) { + if (attempt >= MAX_POLL_ATTEMPTS) { + throw new Error("Google video generation did not finish in time"); + } + await waitProviderOperationPollInterval({ + deadline: params.deadline, + pollIntervalMs: POLL_INTERVAL_MS, + }); + const operationName = (operation as { name?: unknown }).name; + if (typeof operationName !== "string" || !operationName) { + throw new Error("Google video operation response missing name for polling"); + } + operation = await requestGoogleVideoJson({ + url: `${params.baseUrl}/${operationName}`, + method: "GET", + headers: params.headers, + deadline: params.deadline, + }); + } + const error = (operation as { error?: unknown }).error; + if (error) { + throw new Error(JSON.stringify(error)); + } + return operation; +} + export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { return { ...createGoogleVideoGenerationProviderMetadata(), @@ -247,7 +381,14 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { const apiKey = auth.apiKey; const configuredBaseUrl = resolveConfiguredGoogleVideoBaseUrl(req); + const restBaseUrl = resolveGoogleVideoRestBaseUrl(configuredBaseUrl); + const authHeaders = parseGeminiAuth(apiKey).headers; const durationSeconds = resolveDurationSeconds(req.durationSeconds); + const model = normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL; + const aspectRatio = resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }); + const resolution = resolveResolution({ resolution: req.resolution, size: req.size }); + const hasReferenceInputs = + (req.inputImages?.length ?? 0) > 0 || (req.inputVideos?.length ?? 0) > 0; const deadline = createProviderOperationDeadline({ timeoutMs: req.timeoutMs, label: "Google video generation", @@ -262,37 +403,70 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { }), }, }); - let operation = await client.models.generateVideos({ - model: normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL, - prompt: req.prompt, - image: resolveInputImage(req), - video: resolveInputVideo(req), - config: { - ...(typeof durationSeconds === "number" ? { durationSeconds } : {}), - ...(resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) - ? { aspectRatio: resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) } - : {}), - ...(resolveResolution({ resolution: req.resolution, size: req.size }) - ? { resolution: resolveResolution({ resolution: req.resolution, size: req.size }) } - : {}), - ...(req.audio === true ? { generateAudio: true } : {}), - }, - }); - - for (let attempt = 0; !(operation.done ?? false); attempt += 1) { - if (attempt >= MAX_POLL_ATTEMPTS) { - throw new Error("Google video generation did not finish in time"); + let usedRestFallback = false; + let operation; + try { + operation = await client.models.generateVideos({ + model, + prompt: req.prompt, + image: resolveInputImage(req), + video: resolveInputVideo(req), + config: { + ...(typeof durationSeconds === "number" ? { durationSeconds } : {}), + ...(aspectRatio ? { aspectRatio } : {}), + ...(resolution ? { resolution } : {}), + ...(req.audio === true ? { generateAudio: true } : {}), + }, + }); + } catch (error) { + if (hasReferenceInputs || extractGoogleApiErrorCode(error) !== 404) { + throw error; + } + usedRestFallback = true; + operation = await generateGoogleVideoViaRest({ + baseUrl: restBaseUrl, + headers: authHeaders, + deadline, + model, + prompt: req.prompt, + durationSeconds, + aspectRatio, + resolution, + audio: req.audio, + }); + } + + if (!usedRestFallback) { + for (let attempt = 0; !(operation.done ?? false); attempt += 1) { + if (attempt >= MAX_POLL_ATTEMPTS) { + throw new Error("Google video generation did not finish in time"); + } + await waitProviderOperationPollInterval({ deadline, pollIntervalMs: POLL_INTERVAL_MS }); + resolveProviderOperationTimeoutMs({ deadline, defaultTimeoutMs: DEFAULT_TIMEOUT_MS }); + operation = await client.operations.getVideosOperation({ operation }); } - await waitProviderOperationPollInterval({ deadline, pollIntervalMs: POLL_INTERVAL_MS }); - resolveProviderOperationTimeoutMs({ deadline, defaultTimeoutMs: DEFAULT_TIMEOUT_MS }); - operation = await client.operations.getVideosOperation({ operation }); } if (operation.error) { throw new Error(JSON.stringify(operation.error)); } - const generatedVideos = operation.response?.generatedVideos ?? []; + let generatedVideos = extractGeneratedVideos(operation); + if (generatedVideos.length === 0 && !hasReferenceInputs && !usedRestFallback) { + usedRestFallback = true; + operation = await generateGoogleVideoViaRest({ + baseUrl: restBaseUrl, + headers: authHeaders, + deadline, + model, + prompt: req.prompt, + durationSeconds, + aspectRatio, + resolution, + audio: req.audio, + }); + generatedVideos = extractGeneratedVideos(operation); + } if (generatedVideos.length === 0) { - throw new Error("Google video generation response missing generated videos"); + throw new Error(GOOGLE_VIDEO_EMPTY_RESULT_MESSAGE); } const videos = await Promise.all( generatedVideos.map(async (entry, index) => { @@ -326,7 +500,7 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { ); return { videos, - model: normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL, + model, metadata: operation.name ? { operationName: operation.name,