From dd978bf9754411719fc4a74b5e731946de463eac Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 6 Apr 2026 18:49:16 +0100 Subject: [PATCH] fix: stabilize media live provider coverage --- .../google/music-generation-provider.ts | 40 +- .../minimax/music-generation-provider.test.ts | 79 ++++ .../minimax/music-generation-provider.ts | 38 +- .../music-generation-providers.live.test.ts | 19 +- .../openai/image-generation-provider.test.ts | 46 ++- .../openai/image-generation-provider.ts | 51 +-- .../openai/video-generation-provider.test.ts | 69 +++- .../openai/video-generation-provider.ts | 84 +++-- .../video-generation-providers.live.test.ts | 91 +++-- extensions/xai/api.ts | 6 +- extensions/xai/code-execution.test.ts | 2 +- extensions/xai/onboard.test.ts | 2 +- extensions/xai/onboard.ts | 2 +- extensions/xai/provider-catalog.ts | 2 +- extensions/xai/setup-api.ts | 2 +- extensions/xai/src/code-execution-shared.ts | 2 +- extensions/xai/src/tool-auth-shared.test.ts | 2 +- extensions/xai/src/web-search-shared.ts | 2 +- extensions/xai/src/x-search-shared.ts | 2 +- extensions/xai/stream.ts | 4 +- extensions/xai/web-search.test.ts | 6 +- extensions/xai/x-search.test.ts | 2 +- src/agents/tools/video-generate-tool.ts | 11 +- src/image-generation/live-test-helpers.ts | 2 + src/image-generation/runtime.live.test.ts | 351 ++++++++++-------- .../live-test-helpers.test.ts | 20 +- src/video-generation/live-test-helpers.ts | 26 +- src/video-generation/types.ts | 2 +- 28 files changed, 654 insertions(+), 311 deletions(-) diff --git a/extensions/google/music-generation-provider.ts b/extensions/google/music-generation-provider.ts index da64156d034..e4c98cf7e83 100644 --- a/extensions/google/music-generation-provider.ts +++ b/extensions/google/music-generation-provider.ts @@ -66,26 +66,28 @@ function extractTracks(params: { payload: GoogleGenerateMusicResponse; model: st } { const lyrics: string[] = []; const tracks: GeneratedMusicAsset[] = []; - for (const part of params.payload.candidates?.[0]?.content?.parts ?? []) { - if (part.text?.trim()) { - lyrics.push(part.text.trim()); - continue; - } - const inline = part.inlineData ?? part.inline_data; - const data = inline?.data?.trim(); - if (!data) { - continue; - } - const mimeType = inline?.mimeType?.trim() || inline?.mime_type?.trim() || "audio/mpeg"; - tracks.push({ - buffer: Buffer.from(data, "base64"), - mimeType, - fileName: resolveTrackFileName({ - index: tracks.length, + for (const candidate of params.payload.candidates ?? []) { + for (const part of candidate.content?.parts ?? []) { + if (part.text?.trim()) { + lyrics.push(part.text.trim()); + continue; + } + const inline = part.inlineData ?? part.inline_data; + const data = inline?.data?.trim(); + if (!data) { + continue; + } + const mimeType = inline?.mimeType?.trim() || inline?.mime_type?.trim() || "audio/mpeg"; + tracks.push({ + buffer: Buffer.from(data, "base64"), mimeType, - model: params.model, - }), - }); + fileName: resolveTrackFileName({ + index: tracks.length, + mimeType, + model: params.model, + }), + }); + } } return { tracks, lyrics }; } diff --git a/extensions/minimax/music-generation-provider.test.ts b/extensions/minimax/music-generation-provider.test.ts index 2528c8b28a0..e87da12236f 100644 --- a/extensions/minimax/music-generation-provider.test.ts +++ b/extensions/minimax/music-generation-provider.test.ts @@ -70,13 +70,23 @@ describe("minimax music generation provider", () => { expect(postJsonRequestMock).toHaveBeenCalledWith( expect.objectContaining({ url: "https://api.minimax.io/v1/music_generation", + headers: expect.objectContaining({ + get: expect.any(Function), + }), body: expect.objectContaining({ model: "music-2.5+", lyrics: "our city wakes", output_format: "url", + audio_setting: { + sample_rate: 44100, + bitrate: 256000, + format: "mp3", + }, }), }), ); + const headers = postJsonRequestMock.mock.calls[0]?.[0]?.headers as Headers | undefined; + expect(headers?.get("content-type")).toBe("application/json"); expect(result.tracks).toHaveLength(1); expect(result.lyrics).toEqual(["our city wakes"]); expect(result.metadata).toEqual( @@ -87,6 +97,41 @@ describe("minimax music generation provider", () => { ); }); + it("downloads tracks when url output is returned in data.audio", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + data: { + audio: "https://example.com/url-audio.mp3", + }, + base_resp: { status_code: 0 }, + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock.mockResolvedValue({ + headers: new Headers({ "content-type": "audio/mpeg" }), + arrayBuffer: async () => Buffer.from("mp3-bytes"), + }); + + const provider = buildMinimaxMusicGenerationProvider(); + const result = await provider.generateMusic({ + provider: "minimax", + model: "music-2.5+", + prompt: "upbeat dance-pop with female vocals", + cfg: {}, + lyrics: "our city wakes", + }); + + expect(fetchWithTimeoutMock).toHaveBeenCalledWith( + "https://example.com/url-audio.mp3", + { method: "GET" }, + 120000, + fetch, + ); + expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(0); + }); + it("rejects instrumental requests that also include lyrics", async () => { const provider = buildMinimaxMusicGenerationProvider(); @@ -101,4 +146,38 @@ describe("minimax music generation provider", () => { }), ).rejects.toThrow("cannot use lyrics when instrumental=true"); }); + + it("uses lyrics optimizer when lyrics are omitted", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + task_id: "task-456", + audio_url: "https://example.com/out.mp3", + base_resp: { status_code: 0 }, + }), + }, + release: vi.fn(async () => {}), + }); + fetchWithTimeoutMock.mockResolvedValue({ + headers: new Headers({ "content-type": "audio/mpeg" }), + arrayBuffer: async () => Buffer.from("mp3-bytes"), + }); + + const provider = buildMinimaxMusicGenerationProvider(); + await provider.generateMusic({ + provider: "minimax", + model: "music-2.5+", + prompt: "upbeat dance-pop", + cfg: {}, + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + model: "music-2.5+", + lyrics_optimizer: true, + }), + }), + ); + }); }); diff --git a/extensions/minimax/music-generation-provider.ts b/extensions/minimax/music-generation-provider.ts index f63556a1f58..8bdd58aef51 100644 --- a/extensions/minimax/music-generation-provider.ts +++ b/extensions/minimax/music-generation-provider.ts @@ -77,6 +77,11 @@ function decodePossibleText(data: string): string { return trimmed; } +function isLikelyRemoteUrl(value: string | undefined): boolean { + const trimmed = value?.trim(); + return Boolean(trimmed && /^https?:\/\//iu.test(trimmed)); +} + async function downloadTrackFromUrl(params: { url: string; timeoutMs?: number; @@ -106,6 +111,14 @@ function buildPrompt(req: MusicGenerationRequest): string { return parts.join("\n\n"); } +function resolveMinimaxMusicModel(model: string | undefined): string { + const trimmed = model?.trim(); + if (!trimmed) { + return DEFAULT_MINIMAX_MUSIC_MODEL; + } + return trimmed; +} + export function buildMinimaxMusicGenerationProvider(): MusicGenerationProvider { return { id: "minimax", @@ -161,26 +174,27 @@ export function buildMinimaxMusicGenerationProvider(): MusicGenerationProvider { Authorization: `Bearer ${auth.apiKey}`, }, }); + const jsonHeaders = new Headers(headers); + jsonHeaders.set("Content-Type", "application/json"); - const model = req.model?.trim() || DEFAULT_MINIMAX_MUSIC_MODEL; + const model = resolveMinimaxMusicModel(req.model); + const lyrics = req.lyrics?.trim(); const body = { model, prompt: buildPrompt(req), ...(req.instrumental === true ? { is_instrumental: true } : {}), - ...(req.lyrics?.trim() - ? { lyrics: req.lyrics.trim() } - : req.instrumental === true - ? {} - : { lyrics_optimizer: true }), + ...(lyrics ? { lyrics } : req.instrumental === true ? {} : { lyrics_optimizer: true }), output_format: "url", audio_setting: { + sample_rate: 44_100, + bitrate: 256_000, format: "mp3", }, }; const { response: res, release } = await postJsonRequest({ url: `${baseUrl}/v1/music_generation`, - headers, + headers: jsonHeaders, body, timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS, fetchFn, @@ -194,8 +208,12 @@ export function buildMinimaxMusicGenerationProvider(): MusicGenerationProvider { const payload = (await res.json()) as MinimaxMusicCreateResponse; assertMinimaxBaseResp(payload.base_resp, "MiniMax music generation failed"); - const audioUrl = payload.audio_url?.trim() || payload.data?.audio_url?.trim(); - const inlineAudio = payload.audio?.trim() || payload.data?.audio?.trim(); + const audioCandidate = payload.audio?.trim() || payload.data?.audio?.trim(); + const audioUrl = + payload.audio_url?.trim() || + payload.data?.audio_url?.trim() || + (isLikelyRemoteUrl(audioCandidate) ? audioCandidate : undefined); + const inlineAudio = isLikelyRemoteUrl(audioCandidate) ? undefined : audioCandidate; const lyrics = decodePossibleText(payload.lyrics ?? payload.data?.lyrics ?? ""); const track = audioUrl @@ -223,7 +241,7 @@ export function buildMinimaxMusicGenerationProvider(): MusicGenerationProvider { ...(payload.task_id?.trim() ? { taskId: payload.task_id.trim() } : {}), ...(audioUrl ? { audioUrl } : {}), instrumental: req.instrumental === true, - ...(req.lyrics?.trim() ? { requestedLyrics: true } : {}), + ...(lyrics ? { requestedLyrics: true } : {}), ...(typeof req.durationSeconds === "number" ? { requestedDurationSeconds: req.durationSeconds } : {}), diff --git a/extensions/music-generation-providers.live.test.ts b/extensions/music-generation-providers.live.test.ts index ccd414bed77..3e274f36331 100644 --- a/extensions/music-generation-providers.live.test.ts +++ b/extensions/music-generation-providers.live.test.ts @@ -113,6 +113,20 @@ function maybeLoadShellEnvForMusicProviders(providerIds: string[]): void { }); } +function resolveLiveLyrics(providerId: string): string | undefined { + if (providerId !== "minimax") { + return undefined; + } + return [ + "[Verse]", + "Streetlights shimmer while we race the dawn", + "Neon echoes carry us along", + "[Chorus]", + "Hold the night inside this song", + "We run together bright and strong", + ].join("\n"); +} + describeLive("music generation provider live", () => { it( "covers generate plus declared edit paths with shell/profile auth", @@ -167,6 +181,7 @@ describeLive("music generation provider live", () => { ); const providerModel = resolveProviderModelForLiveTest(testCase.providerId, modelRef); const generateCaps = provider.capabilities.generate; + const liveLyrics = resolveLiveLyrics(testCase.providerId); try { const result = await provider.generateMusic({ @@ -178,7 +193,8 @@ describeLive("music generation provider live", () => { authStore, ...(generateCaps?.supportsDuration ? { durationSeconds: 12 } : {}), ...(generateCaps?.supportsFormat ? { format: "mp3" as const } : {}), - ...(generateCaps?.supportsInstrumental ? { instrumental: true } : {}), + ...(liveLyrics ? { lyrics: liveLyrics } : {}), + ...(generateCaps?.supportsInstrumental && !liveLyrics ? { instrumental: true } : {}), }); expect(result.tracks.length).toBeGreaterThan(0); @@ -233,6 +249,7 @@ describeLive("music generation provider live", () => { ); if (attempted.length === 0) { + expect(failures).toEqual([]); console.warn("[live:music-generation] no provider had usable auth; skipping assertions"); return; } diff --git a/extensions/openai/image-generation-provider.test.ts b/extensions/openai/image-generation-provider.test.ts index 31824ab758c..ead608934bf 100644 --- a/extensions/openai/image-generation-provider.test.ts +++ b/extensions/openai/image-generation-provider.test.ts @@ -4,13 +4,11 @@ import { buildOpenAIImageGenerationProvider } from "./image-generation-provider. const { resolveApiKeyForProviderMock, postJsonRequestMock, - postTranscriptionRequestMock, assertOkOrThrowHttpErrorMock, resolveProviderHttpRequestConfigMock, } = vi.hoisted(() => ({ resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openai-key" })), postJsonRequestMock: vi.fn(), - postTranscriptionRequestMock: vi.fn(), assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ baseUrl: params.baseUrl ?? params.defaultBaseUrl, @@ -27,7 +25,6 @@ vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ vi.mock("openclaw/plugin-sdk/provider-http", () => ({ assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, postJsonRequest: postJsonRequestMock, - postTranscriptionRequest: postTranscriptionRequestMock, resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, })); @@ -35,7 +32,6 @@ describe("openai image generation provider", () => { afterEach(() => { resolveApiKeyForProviderMock.mockClear(); postJsonRequestMock.mockReset(); - postTranscriptionRequestMock.mockReset(); assertOkOrThrowHttpErrorMock.mockClear(); resolveProviderHttpRequestConfigMock.mockClear(); }); @@ -80,4 +76,46 @@ describe("openai image generation provider", () => { ); expect(result.images).toHaveLength(1); }); + + it("uses JSON image_url edits for input-image requests", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + data: [{ b64_json: Buffer.from("png-bytes").toString("base64") }], + }), + }, + release: vi.fn(async () => {}), + }); + + const provider = buildOpenAIImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "openai", + model: "gpt-image-1", + prompt: "Change only the background to pale blue", + cfg: {}, + inputImages: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "reference.png", + }, + ], + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.openai.com/v1/images/edits", + body: expect.objectContaining({ + model: "gpt-image-1", + prompt: "Change only the background to pale blue", + images: [ + { + image_url: "data:image/png;base64,cG5nLWJ5dGVz", + }, + ], + }), + }), + ); + expect(result.images).toHaveLength(1); + }); }); diff --git a/extensions/openai/image-generation-provider.ts b/extensions/openai/image-generation-provider.ts index 8a1a1a5e429..943b8fd9988 100644 --- a/extensions/openai/image-generation-provider.ts +++ b/extensions/openai/image-generation-provider.ts @@ -4,7 +4,6 @@ import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runt import { assertOkOrThrowHttpError, postJsonRequest, - postTranscriptionRequest, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { OPENAI_DEFAULT_IMAGE_MODEL as DEFAULT_OPENAI_IMAGE_MODEL } from "./default-models.js"; @@ -12,7 +11,6 @@ import { OPENAI_DEFAULT_IMAGE_MODEL as DEFAULT_OPENAI_IMAGE_MODEL } from "./defa const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1"; const DEFAULT_OUTPUT_MIME = "image/png"; const DEFAULT_SIZE = "1024x1024"; -const DEFAULT_INPUT_IMAGE_MIME = "image/png"; const OPENAI_SUPPORTED_SIZES = ["1024x1024", "1024x1536", "1536x1024"] as const; const OPENAI_MAX_INPUT_IMAGES = 5; @@ -28,20 +26,8 @@ function resolveOpenAIBaseUrl(cfg: Parameters[0 return direct || DEFAULT_OPENAI_IMAGE_BASE_URL; } -function inferFileExtensionFromMimeType(mimeType: string): string { - if (mimeType.includes("jpeg")) { - return "jpg"; - } - if (mimeType.includes("webp")) { - return "webp"; - } - return "png"; -} - -function toBlobBytes(buffer: Buffer): ArrayBuffer { - const arrayBuffer = new ArrayBuffer(buffer.byteLength); - new Uint8Array(arrayBuffer).set(buffer); - return arrayBuffer; +function toDataUrl(buffer: Buffer, mimeType: string): string { + return `data:${mimeType};base64,${buffer.toString("base64")}`; } export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { @@ -103,27 +89,20 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { const size = req.size ?? DEFAULT_SIZE; const requestResult = isEdit ? await (() => { - const form = new FormData(); - form.set("model", model); - form.set("prompt", req.prompt); - form.set("n", String(count)); - form.set("size", size); - inputImages.forEach((image, index) => { - const mimeType = image.mimeType?.trim() || DEFAULT_INPUT_IMAGE_MIME; - const extension = inferFileExtensionFromMimeType(mimeType); - const fileName = image.fileName?.trim() || `image-${index + 1}.${extension}`; - form.append( - "image", - new Blob([toBlobBytes(image.buffer)], { type: mimeType }), - fileName, - ); - }); - const multipartHeaders = new Headers(headers); - multipartHeaders.delete("Content-Type"); - return postTranscriptionRequest({ + const jsonHeaders = new Headers(headers); + jsonHeaders.set("Content-Type", "application/json"); + return postJsonRequest({ url: `${baseUrl}/images/edits`, - headers: multipartHeaders, - body: form, + headers: jsonHeaders, + body: { + model, + prompt: req.prompt, + n: count, + size, + images: inputImages.map((image) => ({ + image_url: toDataUrl(image.buffer, image.mimeType?.trim() || DEFAULT_OUTPUT_MIME), + })), + }, timeoutMs: req.timeoutMs, fetchFn: fetch, allowPrivateNetwork, diff --git a/extensions/openai/video-generation-provider.test.ts b/extensions/openai/video-generation-provider.test.ts index dcda2af6800..d8034049b55 100644 --- a/extensions/openai/video-generation-provider.test.ts +++ b/extensions/openai/video-generation-provider.test.ts @@ -4,14 +4,12 @@ import { buildOpenAIVideoGenerationProvider } from "./video-generation-provider. const { resolveApiKeyForProviderMock, postJsonRequestMock, - postTranscriptionRequestMock, fetchWithTimeoutMock, assertOkOrThrowHttpErrorMock, resolveProviderHttpRequestConfigMock, } = vi.hoisted(() => ({ resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openai-key" })), postJsonRequestMock: vi.fn(), - postTranscriptionRequestMock: vi.fn(), fetchWithTimeoutMock: vi.fn(), assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ @@ -30,7 +28,6 @@ vi.mock("openclaw/plugin-sdk/provider-http", () => ({ assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, fetchWithTimeout: fetchWithTimeoutMock, postJsonRequest: postJsonRequestMock, - postTranscriptionRequest: postTranscriptionRequestMock, resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, })); @@ -38,7 +35,6 @@ describe("openai video generation provider", () => { afterEach(() => { resolveApiKeyForProviderMock.mockClear(); postJsonRequestMock.mockReset(); - postTranscriptionRequestMock.mockReset(); fetchWithTimeoutMock.mockReset(); assertOkOrThrowHttpErrorMock.mockClear(); resolveProviderHttpRequestConfigMock.mockClear(); @@ -84,7 +80,6 @@ describe("openai video generation provider", () => { url: "https://api.openai.com/v1/videos", }), ); - expect(postTranscriptionRequestMock).not.toHaveBeenCalled(); expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( 1, "https://api.openai.com/v1/videos/vid_123", @@ -102,8 +97,8 @@ describe("openai video generation provider", () => { ); }); - it("uses multipart when a reference asset is present", async () => { - postTranscriptionRequestMock.mockResolvedValue({ + it("uses JSON input_reference.image_url for image-to-video requests", async () => { + postJsonRequestMock.mockResolvedValue({ response: { json: async () => ({ id: "vid_456", @@ -135,12 +130,68 @@ describe("openai video generation provider", () => { inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], }); - expect(postJsonRequestMock).not.toHaveBeenCalled(); - expect(postTranscriptionRequestMock).toHaveBeenCalledWith( + expect(postJsonRequestMock).toHaveBeenCalledWith( expect.objectContaining({ url: "https://api.openai.com/v1/videos", + body: expect.objectContaining({ + input_reference: { + image_url: "data:image/png;base64,cG5nLWJ5dGVz", + }, + }), + }), + ); + expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( + 1, + "https://api.openai.com/v1/videos/vid_456", + expect.objectContaining({ + method: "GET", + }), + 120000, + fetch, + ); + }); + + it("uses multipart input_reference for video-to-video uploads", async () => { + fetchWithTimeoutMock + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + id: "vid_789", + model: "sora-2", + status: "queued", + }), + }) + .mockResolvedValueOnce({ + json: async () => ({ + id: "vid_789", + model: "sora-2", + status: "completed", + }), + }) + .mockResolvedValueOnce({ + headers: new Headers({ "content-type": "video/mp4" }), + arrayBuffer: async () => Buffer.from("mp4-bytes"), + }); + + const provider = buildOpenAIVideoGenerationProvider(); + await provider.generateVideo({ + provider: "openai", + model: "sora-2", + prompt: "Remix this clip", + cfg: {}, + inputVideos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }], + }); + + expect(postJsonRequestMock).not.toHaveBeenCalled(); + expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith( + 1, + "https://api.openai.com/v1/videos", + expect.objectContaining({ + method: "POST", body: expect.any(FormData), }), + 120000, + fetch, ); }); diff --git a/extensions/openai/video-generation-provider.ts b/extensions/openai/video-generation-provider.ts index 0ff392f998f..acf9d1837bd 100644 --- a/extensions/openai/video-generation-provider.ts +++ b/extensions/openai/video-generation-provider.ts @@ -4,7 +4,6 @@ import { assertOkOrThrowHttpError, fetchWithTimeout, postJsonRequest, - postTranscriptionRequest, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import type { @@ -47,6 +46,10 @@ function toBlobBytes(buffer: Buffer): ArrayBuffer { return arrayBuffer; } +function toDataUrl(buffer: Buffer, mimeType: string): string { + return `data:${mimeType};base64,${buffer.toString("base64")}`; +} + function resolveDurationSeconds(durationSeconds: number | undefined): "4" | "8" | "12" | undefined { if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) { return undefined; @@ -245,36 +248,67 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { aspectRatio: req.aspectRatio, resolution: req.resolution, }); + const inputImage = req.inputImages?.[0]; const referenceAsset = resolveReferenceAsset(req); + const requestUrl = `${baseUrl}/videos`; const requestResult = referenceAsset - ? await (() => { - const form = new FormData(); - form.set("prompt", req.prompt); - form.set("model", model); - if (seconds) { - form.set("seconds", seconds); - } - if (size) { - form.set("size", size); - } - form.set("input_reference", referenceAsset); - const multipartHeaders = new Headers(headers); - multipartHeaders.delete("Content-Type"); - return postTranscriptionRequest({ - url: `${baseUrl}/videos`, - headers: multipartHeaders, - body: form, - timeoutMs: req.timeoutMs, - fetchFn, - allowPrivateNetwork, - dispatcherPolicy, - }); - })() + ? inputImage?.buffer + ? await (() => { + const jsonHeaders = new Headers(headers); + jsonHeaders.set("Content-Type", "application/json"); + return postJsonRequest({ + url: requestUrl, + headers: jsonHeaders, + body: { + prompt: req.prompt, + model, + ...(seconds ? { seconds } : {}), + ...(size ? { size } : {}), + input_reference: { + image_url: toDataUrl( + inputImage.buffer, + inputImage.mimeType?.trim() || "image/png", + ), + }, + }, + timeoutMs: req.timeoutMs, + fetchFn, + allowPrivateNetwork, + dispatcherPolicy, + }); + })() + : await (() => { + const form = new FormData(); + form.set("prompt", req.prompt); + form.set("model", model); + if (seconds) { + form.set("seconds", seconds); + } + if (size) { + form.set("size", size); + } + form.set("input_reference", referenceAsset); + const multipartHeaders = new Headers(headers); + multipartHeaders.delete("Content-Type"); + return fetchWithTimeout( + requestUrl, + { + method: "POST", + headers: multipartHeaders, + body: form, + }, + req.timeoutMs ?? DEFAULT_TIMEOUT_MS, + fetchFn, + ).then((response) => ({ + response, + release: async () => {}, + })); + })() : await (() => { const jsonHeaders = new Headers(headers); jsonHeaders.set("Content-Type", "application/json"); return postJsonRequest({ - url: `${baseUrl}/videos`, + url: requestUrl, headers: jsonHeaders, body: { prompt: req.prompt, diff --git a/extensions/video-generation-providers.live.test.ts b/extensions/video-generation-providers.live.test.ts index d1829d38d46..ecac63e3143 100644 --- a/extensions/video-generation-providers.live.test.ts +++ b/extensions/video-generation-providers.live.test.ts @@ -9,6 +9,7 @@ import { getShellEnvAppliedKeys, loadShellEnvFallback } from "../src/infra/shell import { encodePngRgba, fillPixel } from "../src/media/png-encode.js"; import { getProviderEnvVars } from "../src/secrets/provider-env-vars.js"; import { + canRunBufferBackedImageToVideoLiveLane, canRunBufferBackedVideoToVideoLiveLane, DEFAULT_LIVE_VIDEO_MODELS, parseCsvFilter, @@ -16,6 +17,7 @@ import { redactLiveApiKey, resolveConfiguredLiveVideoModels, resolveLiveVideoAuthStore, + resolveLiveVideoResolution, } from "../src/video-generation/live-test-helpers.js"; import { parseVideoGenerationModelRef } from "../src/video-generation/model-ref.js"; import { @@ -94,9 +96,9 @@ function withPluginsEnabled(cfg: OpenClawConfig): OpenClawConfig { }; } -function createEditReferencePng(): Buffer { - const width = 192; - const height = 192; +function createEditReferencePng(params?: { width?: number; height?: number }): Buffer { + const width = params?.width ?? 384; + const height = params?.height ?? 384; const buf = Buffer.alloc(width * height * 4, 255); for (let y = 0; y < height; y += 1) { @@ -105,14 +107,18 @@ function createEditReferencePng(): Buffer { } } - for (let y = 24; y < 168; y += 1) { - for (let x = 24; x < 168; x += 1) { + const outerInsetX = Math.max(1, Math.floor(width / 8)); + const outerInsetY = Math.max(1, Math.floor(height / 8)); + for (let y = outerInsetY; y < height - outerInsetY; y += 1) { + for (let x = outerInsetX; x < width - outerInsetX; x += 1) { fillPixel(buf, x, y, width, 76, 154, 255, 255); } } - for (let y = 48; y < 144; y += 1) { - for (let x = 48; x < 144; x += 1) { + const innerInsetX = Math.max(1, Math.floor(width / 4)); + const innerInsetY = Math.max(1, Math.floor(height / 4)); + for (let y = innerInsetY; y < height - innerInsetY; y += 1) { + for (let x = innerInsetX; x < width - innerInsetX; x += 1) { fillPixel(buf, x, y, width, 255, 255, 255, 255); } } @@ -200,6 +206,12 @@ describeLive("video generation provider live", () => { const imageToVideoCaps = provider.capabilities.imageToVideo; const videoToVideoCaps = provider.capabilities.videoToVideo; const durationSeconds = Math.min(generateCaps?.maxDurationSeconds ?? 3, 3); + const liveResolution = resolveLiveVideoResolution({ + providerId: testCase.providerId, + modelRef, + }); + const liveSize = testCase.providerId === "openai" ? "1280x720" : undefined; + const logPrefix = `[live:video-generation] provider=${testCase.providerId} model=${providerModel}`; let generatedVideo = null as { buffer: Buffer; mimeType: string; @@ -207,6 +219,8 @@ describeLive("video generation provider live", () => { } | null; try { + const startedAt = Date.now(); + console.error(`${logPrefix} mode=generate start auth=${authLabel}`); const result = await provider.generateVideo({ provider: testCase.providerId, model: providerModel, @@ -216,8 +230,9 @@ describeLive("video generation provider live", () => { agentDir, authStore, durationSeconds, + ...(generateCaps?.supportsSize && liveSize ? { size: liveSize } : {}), ...(generateCaps?.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), - ...(generateCaps?.supportsResolution ? { resolution: "480P" as const } : {}), + ...(generateCaps?.supportsResolution ? { resolution: liveResolution } : {}), ...(generateCaps?.supportsAudio ? { audio: false } : {}), ...(generateCaps?.supportsWatermark ? { watermark: false } : {}), }); @@ -227,20 +242,38 @@ describeLive("video generation provider live", () => { expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); generatedVideo = result.videos[0] ?? null; attempted.push(`${testCase.providerId}:generate:${providerModel} (${authLabel})`); - } catch (error) { - failures.push( - `${testCase.providerId}:generate (${authLabel}): ${ - error instanceof Error ? error.message : String(error) - }`, + console.error( + `${logPrefix} mode=generate done ms=${Date.now() - startedAt} videos=${result.videos.length}`, ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${testCase.providerId}:generate (${authLabel}): ${message}`); + console.error(`${logPrefix} mode=generate failed error=${message}`); continue; } if (!imageToVideoCaps?.enabled) { continue; } + if ( + !canRunBufferBackedImageToVideoLiveLane({ + providerId: testCase.providerId, + modelRef, + }) + ) { + skipped.push( + `${testCase.providerId}:imageToVideo requires remote URL or model-specific input`, + ); + continue; + } try { + const startedAt = Date.now(); + console.error(`${logPrefix} mode=imageToVideo start auth=${authLabel}`); + const referenceImage = + testCase.providerId === "openai" + ? createEditReferencePng({ width: 1280, height: 720 }) + : createEditReferencePng(); const result = await provider.generateVideo({ provider: testCase.providerId, model: providerModel, @@ -250,15 +283,16 @@ describeLive("video generation provider live", () => { agentDir, authStore, durationSeconds, + ...(imageToVideoCaps.supportsSize && liveSize ? { size: liveSize } : {}), inputImages: [ { - buffer: createEditReferencePng(), + buffer: referenceImage, mimeType: "image/png", fileName: "reference.png", }, ], ...(imageToVideoCaps.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), - ...(imageToVideoCaps.supportsResolution ? { resolution: "480P" as const } : {}), + ...(imageToVideoCaps.supportsResolution ? { resolution: liveResolution } : {}), ...(imageToVideoCaps.supportsAudio ? { audio: false } : {}), ...(imageToVideoCaps.supportsWatermark ? { watermark: false } : {}), }); @@ -267,12 +301,13 @@ describeLive("video generation provider live", () => { expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); attempted.push(`${testCase.providerId}:imageToVideo:${providerModel} (${authLabel})`); - } catch (error) { - failures.push( - `${testCase.providerId}:imageToVideo (${authLabel}): ${ - error instanceof Error ? error.message : String(error) - }`, + console.error( + `${logPrefix} mode=imageToVideo done ms=${Date.now() - startedAt} videos=${result.videos.length}`, ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${testCase.providerId}:imageToVideo (${authLabel}): ${message}`); + console.error(`${logPrefix} mode=imageToVideo failed error=${message}`); } if (!videoToVideoCaps?.enabled) { @@ -295,6 +330,8 @@ describeLive("video generation provider live", () => { } try { + const startedAt = Date.now(); + console.error(`${logPrefix} mode=videoToVideo start auth=${authLabel}`); const result = await provider.generateVideo({ provider: testCase.providerId, model: providerModel, @@ -305,7 +342,7 @@ describeLive("video generation provider live", () => { durationSeconds: Math.min(videoToVideoCaps.maxDurationSeconds ?? durationSeconds, 3), inputVideos: [generatedVideo], ...(videoToVideoCaps.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), - ...(videoToVideoCaps.supportsResolution ? { resolution: "480P" as const } : {}), + ...(videoToVideoCaps.supportsResolution ? { resolution: liveResolution } : {}), ...(videoToVideoCaps.supportsAudio ? { audio: false } : {}), ...(videoToVideoCaps.supportsWatermark ? { watermark: false } : {}), }); @@ -314,12 +351,13 @@ describeLive("video generation provider live", () => { expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); attempted.push(`${testCase.providerId}:videoToVideo:${providerModel} (${authLabel})`); - } catch (error) { - failures.push( - `${testCase.providerId}:videoToVideo (${authLabel}): ${ - error instanceof Error ? error.message : String(error) - }`, + console.error( + `${logPrefix} mode=videoToVideo done ms=${Date.now() - startedAt} videos=${result.videos.length}`, ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${testCase.providerId}:videoToVideo (${authLabel}): ${message}`); + console.error(`${logPrefix} mode=videoToVideo failed error=${message}`); } } @@ -328,6 +366,7 @@ describeLive("video generation provider live", () => { ); if (attempted.length === 0) { + expect(failures).toEqual([]); console.warn("[live:video-generation] no provider had usable auth; skipping assertions"); return; } diff --git a/extensions/xai/api.ts b/extensions/xai/api.ts index 3208079cb44..1a0d9fe8bb9 100644 --- a/extensions/xai/api.ts +++ b/extensions/xai/api.ts @@ -3,11 +3,11 @@ import { normalizeNativeXaiModelId, normalizeProviderId, resolveProviderEndpoint, -} from "@openclaw/plugin-sdk/provider-model-shared"; +} from "openclaw/plugin-sdk/provider-model-shared"; import { applyXaiModelCompat, resolveXaiModelCompatPatch, -} from "@openclaw/plugin-sdk/provider-tools"; +} from "openclaw/plugin-sdk/provider-tools"; export { buildXaiProvider } from "./provider-catalog.js"; export { applyXaiConfig, applyXaiProviderConfig } from "./onboard.js"; @@ -27,7 +27,7 @@ export { HTML_ENTITY_TOOL_CALL_ARGUMENTS_ENCODING, XAI_TOOL_SCHEMA_PROFILE, resolveXaiModelCompatPatch, -} from "@openclaw/plugin-sdk/provider-tools"; +} from "openclaw/plugin-sdk/provider-tools"; function isXaiNativeEndpoint(baseUrl: unknown): boolean { return ( diff --git a/extensions/xai/code-execution.test.ts b/extensions/xai/code-execution.test.ts index 876cebc7d10..4810bebcc8a 100644 --- a/extensions/xai/code-execution.test.ts +++ b/extensions/xai/code-execution.test.ts @@ -1,4 +1,4 @@ -import { withFetchPreconnect } from "@openclaw/plugin-sdk/testing"; +import { withFetchPreconnect } from "openclaw/plugin-sdk/testing"; import { afterEach, describe, expect, it, vi } from "vitest"; import { createCodeExecutionTool } from "./code-execution.js"; diff --git a/extensions/xai/onboard.test.ts b/extensions/xai/onboard.test.ts index a8c0f330f5a..769ed6ef6ef 100644 --- a/extensions/xai/onboard.test.ts +++ b/extensions/xai/onboard.test.ts @@ -1,7 +1,7 @@ import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, -} from "@openclaw/plugin-sdk/provider-onboard"; +} from "openclaw/plugin-sdk/provider-onboard"; import { describe, expect, it } from "vitest"; import { createConfigWithFallbacks, diff --git a/extensions/xai/onboard.ts b/extensions/xai/onboard.ts index 9a9cddd0bfa..bf4b4967fdf 100644 --- a/extensions/xai/onboard.ts +++ b/extensions/xai/onboard.ts @@ -1,7 +1,7 @@ import { createDefaultModelsPresetAppliers, type OpenClawConfig, -} from "@openclaw/plugin-sdk/provider-onboard"; +} from "openclaw/plugin-sdk/provider-onboard"; import { XAI_BASE_URL, XAI_DEFAULT_MODEL_ID } from "./model-definitions.js"; import { buildXaiCatalogModels } from "./model-definitions.js"; diff --git a/extensions/xai/provider-catalog.ts b/extensions/xai/provider-catalog.ts index 9dca2fb1957..f8c248d60de 100644 --- a/extensions/xai/provider-catalog.ts +++ b/extensions/xai/provider-catalog.ts @@ -1,4 +1,4 @@ -import type { ModelProviderConfig } from "@openclaw/plugin-sdk/provider-model-shared"; +import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-shared"; import { buildXaiCatalogModels, XAI_BASE_URL } from "./model-definitions.js"; export function buildXaiProvider( diff --git a/extensions/xai/setup-api.ts b/extensions/xai/setup-api.ts index 57e617c85e3..9e7a0382d48 100644 --- a/extensions/xai/setup-api.ts +++ b/extensions/xai/setup-api.ts @@ -1,4 +1,4 @@ -import { definePluginEntry } from "@openclaw/plugin-sdk/plugin-entry"; +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; function isRecord(value: unknown): value is Record { return Boolean(value) && typeof value === "object" && !Array.isArray(value); diff --git a/extensions/xai/src/code-execution-shared.ts b/extensions/xai/src/code-execution-shared.ts index 3c34c269bde..f274348eba5 100644 --- a/extensions/xai/src/code-execution-shared.ts +++ b/extensions/xai/src/code-execution-shared.ts @@ -1,4 +1,4 @@ -import { postTrustedWebToolsJson } from "@openclaw/plugin-sdk/provider-web-search"; +import { postTrustedWebToolsJson } from "openclaw/plugin-sdk/provider-web-search"; import { buildXaiResponsesToolBody, resolveXaiResponseTextAndCitations, diff --git a/extensions/xai/src/tool-auth-shared.test.ts b/extensions/xai/src/tool-auth-shared.test.ts index 6a6bd80f3ee..2c281b6a06e 100644 --- a/extensions/xai/src/tool-auth-shared.test.ts +++ b/extensions/xai/src/tool-auth-shared.test.ts @@ -1,4 +1,4 @@ -import { NON_ENV_SECRETREF_MARKER } from "@openclaw/plugin-sdk/provider-auth-runtime"; +import { NON_ENV_SECRETREF_MARKER } from "openclaw/plugin-sdk/provider-auth-runtime"; import { afterEach, describe, expect, it, vi } from "vitest"; import { isXaiToolEnabled, diff --git a/extensions/xai/src/web-search-shared.ts b/extensions/xai/src/web-search-shared.ts index f50e249a8f7..d79efc42be4 100644 --- a/extensions/xai/src/web-search-shared.ts +++ b/extensions/xai/src/web-search-shared.ts @@ -1,4 +1,4 @@ -import { postTrustedWebToolsJson, wrapWebContent } from "@openclaw/plugin-sdk/provider-web-search"; +import { postTrustedWebToolsJson, wrapWebContent } from "openclaw/plugin-sdk/provider-web-search"; import { normalizeXaiModelId } from "../model-id.js"; import { buildXaiResponsesToolBody, diff --git a/extensions/xai/src/x-search-shared.ts b/extensions/xai/src/x-search-shared.ts index c32384aed84..53a41f4ed10 100644 --- a/extensions/xai/src/x-search-shared.ts +++ b/extensions/xai/src/x-search-shared.ts @@ -1,4 +1,4 @@ -import { postTrustedWebToolsJson, wrapWebContent } from "@openclaw/plugin-sdk/provider-web-search"; +import { postTrustedWebToolsJson, wrapWebContent } from "openclaw/plugin-sdk/provider-web-search"; import { buildXaiResponsesToolBody, resolveXaiResponseTextCitationsAndInline, diff --git a/extensions/xai/stream.ts b/extensions/xai/stream.ts index c55050b85e2..412f24a0e82 100644 --- a/extensions/xai/stream.ts +++ b/extensions/xai/stream.ts @@ -1,10 +1,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import { streamSimple } from "@mariozechner/pi-ai"; -import type { ProviderWrapStreamFnContext } from "@openclaw/plugin-sdk/plugin-entry"; +import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; import { composeProviderStreamWrappers, createToolStreamWrapper, -} from "@openclaw/plugin-sdk/provider-stream-shared"; +} from "openclaw/plugin-sdk/provider-stream-shared"; const XAI_FAST_MODEL_IDS = new Map([ ["grok-3", "grok-3-fast"], diff --git a/extensions/xai/web-search.test.ts b/extensions/xai/web-search.test.ts index 3e422730de7..9681a6fdecf 100644 --- a/extensions/xai/web-search.test.ts +++ b/extensions/xai/web-search.test.ts @@ -1,6 +1,6 @@ -import { NON_ENV_SECRETREF_MARKER } from "@openclaw/plugin-sdk/provider-auth-runtime"; -import { createNonExitingRuntime } from "@openclaw/plugin-sdk/runtime-env"; -import { withEnv } from "@openclaw/plugin-sdk/testing"; +import { NON_ENV_SECRETREF_MARKER } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { createNonExitingRuntime } from "openclaw/plugin-sdk/runtime-env"; +import { withEnv } from "openclaw/plugin-sdk/testing"; import { describe, expect, it, vi } from "vitest"; import { capturePluginRegistration } from "../../src/plugins/captured-registration.js"; import { createWizardPrompter } from "../../test/helpers/wizard-prompter.js"; diff --git a/extensions/xai/x-search.test.ts b/extensions/xai/x-search.test.ts index fa2c2199ce6..f6e57052abe 100644 --- a/extensions/xai/x-search.test.ts +++ b/extensions/xai/x-search.test.ts @@ -1,4 +1,4 @@ -import { withFetchPreconnect } from "@openclaw/plugin-sdk/testing"; +import { withFetchPreconnect } from "openclaw/plugin-sdk/testing"; import { afterEach, describe, expect, it, vi } from "vitest"; import { createXSearchTool } from "./x-search.js"; diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 0b30f2d7311..8801a252b0e 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -124,7 +124,7 @@ const VideoGenerateToolSchema = Type.Object({ ), resolution: Type.Optional( Type.String({ - description: "Optional resolution hint: 480P, 720P, or 1080P.", + description: "Optional resolution hint: 480P, 720P, 768P, or 1080P.", }), ), durationSeconds: Type.Optional( @@ -175,10 +175,15 @@ function normalizeResolution(raw: string | undefined): VideoGenerationResolution if (!normalized) { return undefined; } - if (normalized === "480P" || normalized === "720P" || normalized === "1080P") { + if ( + normalized === "480P" || + normalized === "720P" || + normalized === "768P" || + normalized === "1080P" + ) { return normalized; } - throw new ToolInputError("resolution must be one of 480P, 720P, or 1080P"); + throw new ToolInputError("resolution must be one of 480P, 720P, 768P, or 1080P"); } function normalizeAspectRatio(raw: string | undefined): string | undefined { diff --git a/src/image-generation/live-test-helpers.ts b/src/image-generation/live-test-helpers.ts index 40e06938375..1aafdf8ec16 100644 --- a/src/image-generation/live-test-helpers.ts +++ b/src/image-generation/live-test-helpers.ts @@ -2,7 +2,9 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js"; import type { OpenClawConfig } from "../config/config.js"; export const DEFAULT_LIVE_IMAGE_MODELS: Record = { + fal: "fal/fal-ai/flux/dev", google: "google/gemini-3.1-flash-image-preview", + minimax: "minimax/image-01", openai: "openai/gpt-image-1", vydra: "vydra/grok-imagine", }; diff --git a/src/image-generation/runtime.live.test.ts b/src/image-generation/runtime.live.test.ts index 3c35526bcc7..d6356551cd8 100644 --- a/src/image-generation/runtime.live.test.ts +++ b/src/image-generation/runtime.live.test.ts @@ -1,17 +1,22 @@ import { describe, expect, it } from "vitest"; +import falPlugin from "../../extensions/fal/index.js"; +import googlePlugin from "../../extensions/google/index.js"; +import minimaxPlugin from "../../extensions/minimax/index.js"; +import openaiPlugin from "../../extensions/openai/index.js"; +import vydraPlugin from "../../extensions/vydra/index.js"; +import { + registerProviderPlugin, + requireRegisteredProvider, +} from "../../test/helpers/plugins/provider-registration.js"; import { resolveOpenClawAgentDir } from "../agents/agent-paths.js"; import { collectProviderApiKeys } from "../agents/live-auth-keys.js"; -import { isLiveTestEnabled } from "../agents/live-test-helpers.js"; +import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "../agents/live-test-helpers.js"; import { resolveApiKeyForProvider } from "../agents/model-auth.js"; -import type { OpenClawConfig } from "../config/config.js"; -import { loadConfig } from "../config/config.js"; +import { loadConfig, type OpenClawConfig } from "../config/config.js"; import { isTruthyEnvValue } from "../infra/env.js"; import { getShellEnvAppliedKeys, loadShellEnvFallback } from "../infra/shell-env.js"; import { encodePngRgba, fillPixel } from "../media/png-encode.js"; -import { - imageGenerationProviderContractRegistry, - providerContractRegistry, -} from "../plugins/contracts/registry.js"; +import { getProviderEnvVars } from "../secrets/provider-env-vars.js"; import { DEFAULT_LIVE_IMAGE_MODELS, parseCaseFilter, @@ -21,11 +26,21 @@ import { resolveConfiguredLiveImageModels, resolveLiveImageAuthStore, } from "./live-test-helpers.js"; -import { generateImage } from "./runtime.js"; const LIVE = isLiveTestEnabled(); -const REQUIRE_PROFILE_KEYS = isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS); +const REQUIRE_PROFILE_KEYS = + isLiveProfileKeyModeEnabled() || isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS); const describeLive = LIVE ? describe : describe.skip; +const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS); +const caseFilter = parseCaseFilter(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_CASES); +const envModelMap = parseProviderModelMap(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_MODELS); + +type LiveProviderCase = { + plugin: Parameters[0]["plugin"]; + pluginId: string; + pluginName: string; + providerId: string; +}; type LiveImageCase = { id: string; @@ -37,6 +52,36 @@ type LiveImageCase = { inputImages?: Array<{ buffer: Buffer; mimeType: string; fileName?: string }>; }; +const PROVIDER_CASES: LiveProviderCase[] = [ + { plugin: falPlugin, pluginId: "fal", pluginName: "fal Provider", providerId: "fal" }, + { + plugin: googlePlugin, + pluginId: "google", + pluginName: "Google Provider", + providerId: "google", + }, + { + plugin: minimaxPlugin, + pluginId: "minimax", + pluginName: "MiniMax Provider", + providerId: "minimax", + }, + { + plugin: openaiPlugin, + pluginId: "openai", + pluginName: "OpenAI Provider", + providerId: "openai", + }, + { + plugin: vydraPlugin, + pluginId: "vydra", + pluginName: "Vydra Provider", + providerId: "vydra", + }, +] + .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true)) + .toSorted((left, right) => left.providerId.localeCompare(right.providerId)); + function createEditReferencePng(): Buffer { const width = 192; const height = 192; @@ -73,14 +118,9 @@ function withPluginsEnabled(cfg: OpenClawConfig): OpenClawConfig { }; } -function resolveProviderEnvVars(providerId: string): string[] { - const entry = providerContractRegistry.find((candidate) => candidate.provider.id === providerId); - return entry?.provider.envVars ?? []; -} - function maybeLoadShellEnvForImageProviders(providerIds: string[]): void { const expectedKeys = [ - ...new Set(providerIds.flatMap((providerId) => resolveProviderEnvVars(providerId))), + ...new Set(providerIds.flatMap((providerId) => getProviderEnvVars(providerId))), ]; if (expectedKeys.length === 0) { return; @@ -93,158 +133,157 @@ function maybeLoadShellEnvForImageProviders(providerIds: string[]): void { }); } -async function resolveLiveAuthForProvider( - provider: string, - cfg: ReturnType, - agentDir: string, -) { - const authStore = resolveLiveImageAuthStore({ - requireProfileKeys: REQUIRE_PROFILE_KEYS, - hasLiveKeys: collectProviderApiKeys(provider).length > 0, - }); - try { - const auth = await resolveApiKeyForProvider({ provider, cfg, agentDir, store: authStore }); - return { auth, authStore }; - } catch { - return null; +function resolveProviderModelForLiveTest(providerId: string, modelRef: string): string { + const slash = modelRef.indexOf("/"); + if (slash <= 0 || slash === modelRef.length - 1) { + return modelRef; } + return modelRef.slice(0, slash) === providerId ? modelRef.slice(slash + 1) : modelRef; +} + +function buildLiveCases(params: { + providerId: string; + modelRef: string; + editEnabled: boolean; +}): LiveImageCase[] { + const generatePrompt = + "Create a minimal flat illustration of an orange cat face sticker on a white background."; + const editPrompt = + "Change ONLY the background to a pale blue gradient. Keep the subject, framing, and style identical."; + const cases: LiveImageCase[] = [ + { + id: `${params.providerId}:generate`, + providerId: params.providerId, + modelRef: params.modelRef, + prompt: generatePrompt, + size: "1024x1024", + }, + ]; + if (params.editEnabled) { + cases.push({ + id: `${params.providerId}:edit`, + providerId: params.providerId, + modelRef: params.modelRef, + prompt: editPrompt, + resolution: "2K", + inputImages: [ + { + buffer: createEditReferencePng(), + mimeType: "image/png", + fileName: "reference.png", + }, + ], + }); + } + return cases; } describeLive("image generation live (provider sweep)", () => { - it("generates images for every configured image-generation variant with available auth", async () => { - const cfg = withPluginsEnabled(loadConfig()); - const agentDir = resolveOpenClawAgentDir(); - const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS); - const caseFilter = parseCaseFilter(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_CASES); - const envModelMap = parseProviderModelMap(process.env.OPENCLAW_LIVE_IMAGE_GENERATION_MODELS); - const configuredModels = resolveConfiguredLiveImageModels(cfg); - const availableProviders = imageGenerationProviderContractRegistry - .map((entry) => entry.provider.id) - .toSorted((left, right) => left.localeCompare(right)) - .filter((providerId) => (providerFilter ? providerFilter.has(providerId) : true)); - const liveCases: LiveImageCase[] = []; + it( + "generates images for every configured image-generation variant with available auth", + async () => { + const cfg = withPluginsEnabled(loadConfig()); + const configuredModels = resolveConfiguredLiveImageModels(cfg); + const agentDir = resolveOpenClawAgentDir(); + const attempted: string[] = []; + const skipped: string[] = []; + const failures: string[] = []; - if (availableProviders.includes("google")) { - liveCases.push( - { - id: "google:flash-generate", - providerId: "google", - modelRef: - envModelMap.get("google") ?? - configuredModels.get("google") ?? - DEFAULT_LIVE_IMAGE_MODELS.google, - prompt: - "Create a minimal flat illustration of an orange cat face sticker on a white background.", - size: "1024x1024", - }, - { - id: "google:pro-generate", - providerId: "google", - modelRef: "google/gemini-3-pro-image-preview", - prompt: - "Create a minimal flat illustration of an orange cat face sticker on a white background.", - size: "1024x1024", - }, - { - id: "google:pro-edit", - providerId: "google", - modelRef: "google/gemini-3-pro-image-preview", - prompt: - "Change ONLY the background to a pale blue gradient. Keep the subject, framing, and style identical.", - resolution: "2K", - inputImages: [ - { - buffer: createEditReferencePng(), - mimeType: "image/png", - fileName: "reference.png", - }, - ], - }, - ); - } - if (availableProviders.includes("openai")) { - liveCases.push({ - id: "openai:default-generate", - providerId: "openai", - modelRef: - envModelMap.get("openai") ?? - configuredModels.get("openai") ?? - DEFAULT_LIVE_IMAGE_MODELS.openai, - prompt: - "Create a minimal flat illustration of an orange cat face sticker on a white background.", - size: "1024x1024", - }); - } - if (availableProviders.includes("vydra")) { - liveCases.push({ - id: "vydra:default-generate", - providerId: "vydra", - modelRef: - envModelMap.get("vydra") ?? - configuredModels.get("vydra") ?? - DEFAULT_LIVE_IMAGE_MODELS.vydra, - prompt: - "Create a minimal flat illustration of an orange cat face sticker on a white background.", - }); - } + maybeLoadShellEnvForImageProviders(PROVIDER_CASES.map((entry) => entry.providerId)); - const selectedCases = liveCases.filter((entry) => - caseFilter ? caseFilter.has(entry.id.toLowerCase()) : true, - ); + for (const providerCase of PROVIDER_CASES) { + const modelRef = + envModelMap.get(providerCase.providerId) ?? + configuredModels.get(providerCase.providerId) ?? + DEFAULT_LIVE_IMAGE_MODELS[providerCase.providerId]; + if (!modelRef) { + skipped.push(`${providerCase.providerId}: no model configured`); + continue; + } - maybeLoadShellEnvForImageProviders(availableProviders); - - const attempted: string[] = []; - const skipped: string[] = []; - const failures: string[] = []; - - for (const testCase of selectedCases) { - if (!testCase.modelRef) { - skipped.push(`${testCase.id}: no model configured`); - continue; - } - const resolvedAuth = await resolveLiveAuthForProvider(testCase.providerId, cfg, agentDir); - if (!resolvedAuth) { - skipped.push(`${testCase.id}: no auth`); - continue; - } - - try { - const result = await generateImage({ - cfg, - agentDir, - authStore: resolvedAuth.authStore, - modelOverride: testCase.modelRef, - prompt: testCase.prompt, - size: testCase.size, - resolution: testCase.resolution, - inputImages: testCase.inputImages, + const hasLiveKeys = collectProviderApiKeys(providerCase.providerId).length > 0; + const authStore = resolveLiveImageAuthStore({ + requireProfileKeys: REQUIRE_PROFILE_KEYS, + hasLiveKeys, }); + let authLabel = "unresolved"; + try { + const auth = await resolveApiKeyForProvider({ + provider: providerCase.providerId, + cfg, + agentDir, + store: authStore, + }); + authLabel = `${auth.source} ${redactLiveApiKey(auth.apiKey)}`; + } catch { + skipped.push(`${providerCase.providerId}: no usable auth`); + continue; + } - attempted.push( - `${testCase.id}:${result.model} (${resolvedAuth.auth.source} ${redactLiveApiKey(resolvedAuth.auth.apiKey)})`, - ); - expect(result.provider).toBe(testCase.providerId); - expect(result.images.length).toBeGreaterThan(0); - expect(result.images[0]?.mimeType.startsWith("image/")).toBe(true); - expect(result.images[0]?.buffer.byteLength).toBeGreaterThan(512); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - failures.push( - `${testCase.id} (${resolvedAuth.auth.source} ${redactLiveApiKey(resolvedAuth.auth.apiKey)}): ${message}`, + const { imageProviders } = await registerProviderPlugin({ + plugin: providerCase.plugin, + id: providerCase.pluginId, + name: providerCase.pluginName, + }); + const provider = requireRegisteredProvider( + imageProviders, + providerCase.providerId, + "image provider", ); + const providerModel = resolveProviderModelForLiveTest(providerCase.providerId, modelRef); + const liveCases = buildLiveCases({ + providerId: providerCase.providerId, + modelRef, + editEnabled: provider.capabilities.edit?.enabled ?? false, + }).filter((entry) => (caseFilter ? caseFilter.has(entry.id.toLowerCase()) : true)); + + for (const testCase of liveCases) { + const startedAt = Date.now(); + console.error( + `[live:image-generation] starting ${testCase.id} model=${providerModel} auth=${authLabel}`, + ); + try { + const result = await provider.generateImage({ + provider: providerCase.providerId, + model: providerModel, + prompt: testCase.prompt, + cfg, + agentDir, + authStore, + size: testCase.size, + resolution: testCase.resolution, + inputImages: testCase.inputImages, + timeoutMs: 60_000, + }); + + expect(result.images.length).toBeGreaterThan(0); + expect(result.images[0]?.mimeType.startsWith("image/")).toBe(true); + expect(result.images[0]?.buffer.byteLength).toBeGreaterThan(512); + attempted.push(`${testCase.id}:${result.model} (${authLabel})`); + console.error( + `[live:image-generation] done ${testCase.id} ms=${Date.now() - startedAt} images=${result.images.length}`, + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + failures.push(`${testCase.id} (${authLabel}): ${message}`); + console.error( + `[live:image-generation] failed ${testCase.id} ms=${Date.now() - startedAt} error=${message}`, + ); + } + } } - } - console.log( - `[live:image-generation] attempted=${attempted.join(", ") || "none"} skipped=${skipped.join(", ") || "none"} failures=${failures.join(" | ") || "none"} shellEnv=${getShellEnvAppliedKeys().join(", ") || "none"}`, - ); + console.log( + `[live:image-generation] attempted=${attempted.join(", ") || "none"} skipped=${skipped.join(", ") || "none"} failures=${failures.join(" | ") || "none"} shellEnv=${getShellEnvAppliedKeys().join(", ") || "none"}`, + ); - if (attempted.length === 0) { - console.warn("[live:image-generation] no provider had usable auth; skipping assertions"); - return; - } - expect(failures).toEqual([]); - expect(attempted.length).toBeGreaterThan(0); - }, 180_000); + if (attempted.length === 0) { + expect(failures).toEqual([]); + console.warn("[live:image-generation] no provider had usable auth; skipping assertions"); + return; + } + expect(failures).toEqual([]); + }, + 10 * 60_000, + ); }); diff --git a/src/video-generation/live-test-helpers.test.ts b/src/video-generation/live-test-helpers.test.ts index 392c1eba914..507bc34e801 100644 --- a/src/video-generation/live-test-helpers.test.ts +++ b/src/video-generation/live-test-helpers.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; import { + canRunBufferBackedImageToVideoLiveLane, canRunBufferBackedVideoToVideoLiveLane, parseCsvFilter, parseProviderModelMap, @@ -86,13 +87,13 @@ describe("video-generation live-test helpers", () => { providerId: "google", modelRef: "google/veo-3.1-fast-generate-preview", }), - ).toBe(true); + ).toBe(false); expect( canRunBufferBackedVideoToVideoLiveLane({ providerId: "openai", modelRef: "openai/sora-2", }), - ).toBe(true); + ).toBe(false); expect( canRunBufferBackedVideoToVideoLiveLane({ providerId: "runway", @@ -124,4 +125,19 @@ describe("video-generation live-test helpers", () => { }), ).toBe(false); }); + + it("runs buffer-backed image-to-video only for providers that accept bundled image inputs", () => { + expect( + canRunBufferBackedImageToVideoLiveLane({ + providerId: "openai", + modelRef: "openai/sora-2", + }), + ).toBe(true); + expect( + canRunBufferBackedImageToVideoLiveLane({ + providerId: "vydra", + modelRef: "vydra/veo3", + }), + ).toBe(false); + }); }); diff --git a/src/video-generation/live-test-helpers.ts b/src/video-generation/live-test-helpers.ts index e9b77c50dfb..662e058b8c7 100644 --- a/src/video-generation/live-test-helpers.ts +++ b/src/video-generation/live-test-helpers.ts @@ -11,10 +11,23 @@ export const DEFAULT_LIVE_VIDEO_MODELS: Record = { qwen: "qwen/wan2.6-t2v", runway: "runway/gen4.5", together: "together/Wan-AI/Wan2.2-T2V-A14B", + vydra: "vydra/veo3", xai: "xai/grok-imagine-video", }; -const REMOTE_URL_VIDEO_TO_VIDEO_PROVIDERS = new Set(["alibaba", "qwen", "xai"]); +const REMOTE_URL_VIDEO_TO_VIDEO_PROVIDERS = new Set(["alibaba", "google", "openai", "qwen", "xai"]); +const BUFFER_BACKED_IMAGE_TO_VIDEO_UNSUPPORTED_PROVIDERS = new Set(["vydra"]); + +export function resolveLiveVideoResolution(params: { + providerId: string; + modelRef: string; +}): "480P" | "768P" | "1080P" { + const providerId = params.providerId.trim().toLowerCase(); + if (providerId === "minimax") { + return "768P"; + } + return "480P"; +} export function redactLiveApiKey(value: string | undefined): string { const trimmed = value?.trim(); @@ -99,6 +112,17 @@ export function canRunBufferBackedVideoToVideoLiveLane(params: { return model === "gen4_aleph"; } +export function canRunBufferBackedImageToVideoLiveLane(params: { + providerId: string; + modelRef: string; +}): boolean { + const providerId = params.providerId.trim().toLowerCase(); + if (BUFFER_BACKED_IMAGE_TO_VIDEO_UNSUPPORTED_PROVIDERS.has(providerId)) { + return false; + } + return true; +} + export function resolveLiveVideoAuthStore(params: { requireProfileKeys: boolean; hasLiveKeys: boolean; diff --git a/src/video-generation/types.ts b/src/video-generation/types.ts index 4947218367b..8bcfd8a73f8 100644 --- a/src/video-generation/types.ts +++ b/src/video-generation/types.ts @@ -8,7 +8,7 @@ export type GeneratedVideoAsset = { metadata?: Record; }; -export type VideoGenerationResolution = "480P" | "720P" | "1080P"; +export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P"; export type VideoGenerationSourceAsset = { url?: string;