mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 08:10:44 +00:00
fix(google): fall back to rest for veo sdk 404
This commit is contained in:
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- OpenAI image generation: use `gpt-5.5` for the Codex OAuth responses transport instead of the retired `gpt-5.4` model, fixing 500s from ChatGPT Codex image generation. Fixes #71513. Thanks @baolongl.
|
||||
- Google video generation: download direct MLDev Veo `video.uri` results instead of passing them through the Files API path, fixing 404s after successful generation/polling. Fixes #71200. Thanks @panhaishan.
|
||||
- Google video generation: fall back to the REST `predictLongRunning` Veo endpoint for text-only SDK 404s while keeping reference image/video generation on the SDK path. Fixes #62309 and #63008. (#62343) Thanks @leoleedev.
|
||||
- MiniMax music generation: switch the bundled default model from the unsupported `music-2.5+` id to the current `music-2.6` API model. Fixes #64870 and addresses the music default from #62315. Thanks @noahclanman and @edwardzheng1.
|
||||
- Google media generation: strip a configured trailing `/v1beta` from Google music/video provider base URLs before calling the Google GenAI SDK, preventing doubled `/v1beta/v1beta` paths. Fixes #63240. (#63258) Thanks @Hybirdss.
|
||||
- Discord: restore direct-message voice-note preflight transcription and classify URL-only Ogg/Opus voice attachments as audio while skipping partial attachments without usable URLs. Fixes #61314 and #64803.
|
||||
|
||||
@@ -188,6 +188,92 @@ describe("google video generation provider", () => {
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
});
|
||||
|
||||
it("falls back to REST predictLongRunning when text-only SDK video generation returns 404", async () => {
|
||||
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-key",
|
||||
source: "env",
|
||||
mode: "api-key",
|
||||
});
|
||||
generateVideosMock.mockRejectedValue(Object.assign(new Error("sdk 404"), { status: 404 }));
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
done: true,
|
||||
name: "operations/rest-123",
|
||||
response: {
|
||||
generateVideoResponse: {
|
||||
generatedSamples: [
|
||||
{
|
||||
video: {
|
||||
uri: "https://generativelanguage.googleapis.com/v1beta/files/rest-video:download?alt=media",
|
||||
mimeType: "video/mp4",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}),
|
||||
),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
new Response("rest-video", {
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: { "content-type": "video/mp4" },
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const provider = buildGoogleVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "google",
|
||||
model: "google/models/veo-3.1-fast-generate-preview",
|
||||
prompt: "A tiny robot watering a windowsill garden",
|
||||
cfg: {},
|
||||
durationSeconds: 3,
|
||||
});
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
expect(String(fetchMock.mock.calls[0]?.[0])).toBe(
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/veo-3.1-fast-generate-preview:predictLongRunning",
|
||||
);
|
||||
expect(JSON.parse(String(fetchMock.mock.calls[0]?.[1]?.body))).toEqual({
|
||||
instances: [{ prompt: "A tiny robot watering a windowsill garden" }],
|
||||
parameters: { durationSeconds: 4 },
|
||||
});
|
||||
expect(String(fetchMock.mock.calls[1]?.[0])).toBe(
|
||||
"https://generativelanguage.googleapis.com/v1beta/files/rest-video:download?alt=media&key=google-key",
|
||||
);
|
||||
expect(downloadMock).not.toHaveBeenCalled();
|
||||
expect(result.videos[0]?.buffer).toEqual(Buffer.from("rest-video"));
|
||||
});
|
||||
|
||||
it("does not fall back to REST when SDK video generation with reference inputs returns 404", async () => {
|
||||
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-key",
|
||||
source: "env",
|
||||
mode: "api-key",
|
||||
});
|
||||
generateVideosMock.mockRejectedValue(Object.assign(new Error("sdk 404"), { status: 404 }));
|
||||
const fetchMock = vi.fn();
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const provider = buildGoogleVideoGenerationProvider();
|
||||
await expect(
|
||||
provider.generateVideo({
|
||||
provider: "google",
|
||||
model: "veo-3.1-fast-generate-preview",
|
||||
prompt: "Animate this sketch",
|
||||
cfg: {},
|
||||
inputImages: [{ buffer: Buffer.from("img"), mimeType: "image/png" }],
|
||||
}),
|
||||
).rejects.toThrow("sdk 404");
|
||||
|
||||
expect(fetchMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("does NOT strip /v1beta when it appears mid-path (end-anchor proof)", async () => {
|
||||
vi.spyOn(providerAuthRuntime, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "google-key",
|
||||
|
||||
@@ -13,7 +13,7 @@ import type {
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
import { resolveGoogleGenerativeAiApiOrigin } from "./api.js";
|
||||
import { parseGeminiAuth, resolveGoogleGenerativeAiApiOrigin } from "./api.js";
|
||||
import {
|
||||
createGoogleVideoGenerationProviderMetadata,
|
||||
DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
@@ -26,12 +26,32 @@ import { createGoogleGenAI, type GoogleGenAIClient } from "./google-genai-runtim
|
||||
const DEFAULT_TIMEOUT_MS = 180_000;
|
||||
const POLL_INTERVAL_MS = 10_000;
|
||||
const MAX_POLL_ATTEMPTS = 90;
|
||||
const GOOGLE_VIDEO_EMPTY_RESULT_MESSAGE =
|
||||
"Google video generation response missing generated videos";
|
||||
|
||||
function resolveConfiguredGoogleVideoBaseUrl(req: VideoGenerationRequest): string | undefined {
|
||||
const configured = normalizeOptionalString(req.cfg?.models?.providers?.google?.baseUrl);
|
||||
return configured ? resolveGoogleGenerativeAiApiOrigin(configured) : undefined;
|
||||
}
|
||||
|
||||
function resolveGoogleVideoRestBaseUrl(configuredBaseUrl?: string): string {
|
||||
return `${configuredBaseUrl ?? "https://generativelanguage.googleapis.com"}/v1beta`;
|
||||
}
|
||||
|
||||
function resolveGoogleVideoRestModelPath(model: string): string {
|
||||
const trimmed = normalizeOptionalString(model) || DEFAULT_GOOGLE_VIDEO_MODEL;
|
||||
if (trimmed.startsWith("google/models/")) {
|
||||
return trimmed.slice("google/".length);
|
||||
}
|
||||
if (trimmed.startsWith("models/")) {
|
||||
return trimmed;
|
||||
}
|
||||
if (trimmed.startsWith("google/")) {
|
||||
return `models/${trimmed.slice("google/".length)}`;
|
||||
}
|
||||
return `models/${trimmed}`;
|
||||
}
|
||||
|
||||
function parseVideoSize(size: string | undefined): { width: number; height: number } | undefined {
|
||||
const trimmed = normalizeOptionalString(size);
|
||||
if (!trimmed) {
|
||||
@@ -220,6 +240,120 @@ async function downloadGeneratedVideoFromUri(params: {
|
||||
};
|
||||
}
|
||||
|
||||
function extractGoogleApiErrorCode(error: unknown): number | undefined {
|
||||
const status = (error as { status?: unknown } | undefined)?.status;
|
||||
if (typeof status === "number") {
|
||||
return status;
|
||||
}
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
try {
|
||||
const parsed = JSON.parse(message) as { code?: unknown; error?: { code?: unknown } };
|
||||
const code = typeof parsed.code === "number" ? parsed.code : parsed.error?.code;
|
||||
return typeof code === "number" ? code : undefined;
|
||||
} catch {
|
||||
return /\b404\b/u.test(message) ? 404 : undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function extractGeneratedVideos(operation: unknown): Array<{ video?: unknown }> {
|
||||
const response = (operation as { response?: Record<string, unknown> }).response;
|
||||
const generatedVideos = response?.generatedVideos;
|
||||
if (Array.isArray(generatedVideos) && generatedVideos.length > 0) {
|
||||
return generatedVideos as Array<{ video?: unknown }>;
|
||||
}
|
||||
const generatedSamples = (response?.generateVideoResponse as { generatedSamples?: unknown })
|
||||
?.generatedSamples;
|
||||
return Array.isArray(generatedSamples) ? (generatedSamples as Array<{ video?: unknown }>) : [];
|
||||
}
|
||||
|
||||
async function requestGoogleVideoJson(params: {
|
||||
url: string;
|
||||
method: "GET" | "POST";
|
||||
headers: Record<string, string>;
|
||||
deadline: ReturnType<typeof createProviderOperationDeadline>;
|
||||
body?: unknown;
|
||||
}): Promise<unknown> {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(
|
||||
() => controller.abort(),
|
||||
resolveProviderOperationTimeoutMs({
|
||||
deadline: params.deadline,
|
||||
defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
|
||||
}),
|
||||
);
|
||||
try {
|
||||
const response = await fetch(params.url, {
|
||||
method: params.method,
|
||||
headers: params.headers,
|
||||
...(params.body === undefined ? {} : { body: JSON.stringify(params.body) }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
const text = await response.text();
|
||||
const payload = text ? (JSON.parse(text) as unknown) : {};
|
||||
if (!response.ok) {
|
||||
throw new Error(typeof payload === "string" ? payload : JSON.stringify(payload ?? null));
|
||||
}
|
||||
return payload;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function generateGoogleVideoViaRest(params: {
|
||||
baseUrl: string;
|
||||
headers: Record<string, string>;
|
||||
deadline: ReturnType<typeof createProviderOperationDeadline>;
|
||||
model: string;
|
||||
prompt: string;
|
||||
durationSeconds?: number;
|
||||
aspectRatio?: "16:9" | "9:16";
|
||||
resolution?: "720p" | "1080p";
|
||||
audio?: boolean;
|
||||
}): Promise<unknown> {
|
||||
let operation = await requestGoogleVideoJson({
|
||||
url: `${params.baseUrl}/${resolveGoogleVideoRestModelPath(params.model)}:predictLongRunning`,
|
||||
method: "POST",
|
||||
headers: params.headers,
|
||||
deadline: params.deadline,
|
||||
body: {
|
||||
instances: [{ prompt: params.prompt }],
|
||||
parameters: {
|
||||
...(typeof params.durationSeconds === "number"
|
||||
? { durationSeconds: params.durationSeconds }
|
||||
: {}),
|
||||
...(params.aspectRatio ? { aspectRatio: params.aspectRatio } : {}),
|
||||
...(params.resolution ? { resolution: params.resolution } : {}),
|
||||
...(params.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
for (let attempt = 0; !((operation as { done?: boolean }).done ?? false); attempt += 1) {
|
||||
if (attempt >= MAX_POLL_ATTEMPTS) {
|
||||
throw new Error("Google video generation did not finish in time");
|
||||
}
|
||||
await waitProviderOperationPollInterval({
|
||||
deadline: params.deadline,
|
||||
pollIntervalMs: POLL_INTERVAL_MS,
|
||||
});
|
||||
const operationName = (operation as { name?: unknown }).name;
|
||||
if (typeof operationName !== "string" || !operationName) {
|
||||
throw new Error("Google video operation response missing name for polling");
|
||||
}
|
||||
operation = await requestGoogleVideoJson({
|
||||
url: `${params.baseUrl}/${operationName}`,
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
deadline: params.deadline,
|
||||
});
|
||||
}
|
||||
const error = (operation as { error?: unknown }).error;
|
||||
if (error) {
|
||||
throw new Error(JSON.stringify(error));
|
||||
}
|
||||
return operation;
|
||||
}
|
||||
|
||||
export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
...createGoogleVideoGenerationProviderMetadata(),
|
||||
@@ -247,7 +381,14 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
const apiKey = auth.apiKey;
|
||||
|
||||
const configuredBaseUrl = resolveConfiguredGoogleVideoBaseUrl(req);
|
||||
const restBaseUrl = resolveGoogleVideoRestBaseUrl(configuredBaseUrl);
|
||||
const authHeaders = parseGeminiAuth(apiKey).headers;
|
||||
const durationSeconds = resolveDurationSeconds(req.durationSeconds);
|
||||
const model = normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL;
|
||||
const aspectRatio = resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size });
|
||||
const resolution = resolveResolution({ resolution: req.resolution, size: req.size });
|
||||
const hasReferenceInputs =
|
||||
(req.inputImages?.length ?? 0) > 0 || (req.inputVideos?.length ?? 0) > 0;
|
||||
const deadline = createProviderOperationDeadline({
|
||||
timeoutMs: req.timeoutMs,
|
||||
label: "Google video generation",
|
||||
@@ -262,37 +403,70 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
}),
|
||||
},
|
||||
});
|
||||
let operation = await client.models.generateVideos({
|
||||
model: normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
image: resolveInputImage(req),
|
||||
video: resolveInputVideo(req),
|
||||
config: {
|
||||
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
|
||||
...(resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size })
|
||||
? { aspectRatio: resolveAspectRatio({ aspectRatio: req.aspectRatio, size: req.size }) }
|
||||
: {}),
|
||||
...(resolveResolution({ resolution: req.resolution, size: req.size })
|
||||
? { resolution: resolveResolution({ resolution: req.resolution, size: req.size }) }
|
||||
: {}),
|
||||
...(req.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
});
|
||||
|
||||
for (let attempt = 0; !(operation.done ?? false); attempt += 1) {
|
||||
if (attempt >= MAX_POLL_ATTEMPTS) {
|
||||
throw new Error("Google video generation did not finish in time");
|
||||
let usedRestFallback = false;
|
||||
let operation;
|
||||
try {
|
||||
operation = await client.models.generateVideos({
|
||||
model,
|
||||
prompt: req.prompt,
|
||||
image: resolveInputImage(req),
|
||||
video: resolveInputVideo(req),
|
||||
config: {
|
||||
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
|
||||
...(aspectRatio ? { aspectRatio } : {}),
|
||||
...(resolution ? { resolution } : {}),
|
||||
...(req.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (hasReferenceInputs || extractGoogleApiErrorCode(error) !== 404) {
|
||||
throw error;
|
||||
}
|
||||
usedRestFallback = true;
|
||||
operation = await generateGoogleVideoViaRest({
|
||||
baseUrl: restBaseUrl,
|
||||
headers: authHeaders,
|
||||
deadline,
|
||||
model,
|
||||
prompt: req.prompt,
|
||||
durationSeconds,
|
||||
aspectRatio,
|
||||
resolution,
|
||||
audio: req.audio,
|
||||
});
|
||||
}
|
||||
|
||||
if (!usedRestFallback) {
|
||||
for (let attempt = 0; !(operation.done ?? false); attempt += 1) {
|
||||
if (attempt >= MAX_POLL_ATTEMPTS) {
|
||||
throw new Error("Google video generation did not finish in time");
|
||||
}
|
||||
await waitProviderOperationPollInterval({ deadline, pollIntervalMs: POLL_INTERVAL_MS });
|
||||
resolveProviderOperationTimeoutMs({ deadline, defaultTimeoutMs: DEFAULT_TIMEOUT_MS });
|
||||
operation = await client.operations.getVideosOperation({ operation });
|
||||
}
|
||||
await waitProviderOperationPollInterval({ deadline, pollIntervalMs: POLL_INTERVAL_MS });
|
||||
resolveProviderOperationTimeoutMs({ deadline, defaultTimeoutMs: DEFAULT_TIMEOUT_MS });
|
||||
operation = await client.operations.getVideosOperation({ operation });
|
||||
}
|
||||
if (operation.error) {
|
||||
throw new Error(JSON.stringify(operation.error));
|
||||
}
|
||||
const generatedVideos = operation.response?.generatedVideos ?? [];
|
||||
let generatedVideos = extractGeneratedVideos(operation);
|
||||
if (generatedVideos.length === 0 && !hasReferenceInputs && !usedRestFallback) {
|
||||
usedRestFallback = true;
|
||||
operation = await generateGoogleVideoViaRest({
|
||||
baseUrl: restBaseUrl,
|
||||
headers: authHeaders,
|
||||
deadline,
|
||||
model,
|
||||
prompt: req.prompt,
|
||||
durationSeconds,
|
||||
aspectRatio,
|
||||
resolution,
|
||||
audio: req.audio,
|
||||
});
|
||||
generatedVideos = extractGeneratedVideos(operation);
|
||||
}
|
||||
if (generatedVideos.length === 0) {
|
||||
throw new Error("Google video generation response missing generated videos");
|
||||
throw new Error(GOOGLE_VIDEO_EMPTY_RESULT_MESSAGE);
|
||||
}
|
||||
const videos = await Promise.all(
|
||||
generatedVideos.map(async (entry, index) => {
|
||||
@@ -326,7 +500,7 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
);
|
||||
return {
|
||||
videos,
|
||||
model: normalizeOptionalString(req.model) || DEFAULT_GOOGLE_VIDEO_MODEL,
|
||||
model,
|
||||
metadata: operation.name
|
||||
? {
|
||||
operationName: operation.name,
|
||||
|
||||
Reference in New Issue
Block a user