From cd5b1653f60025b80df8429fc5fe49bb29c98da8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 6 Apr 2026 15:24:16 +0100 Subject: [PATCH] feat: declare explicit media provider capabilities --- CHANGELOG.md | 1 + docs/help/testing.md | 35 +++ docs/plugins/sdk-provider-plugins.md | 13 +- docs/tools/music-generation.md | 51 ++++ docs/tools/video-generation.md | 51 +++- .../alibaba/video-generation-provider.ts | 40 ++- .../byteplus/video-generation-provider.ts | 29 +- .../comfy/music-generation-provider.test.ts | 2 +- extensions/comfy/music-generation-provider.ts | 6 +- extensions/comfy/video-generation-provider.ts | 29 +- extensions/fal/video-generation-provider.ts | 23 +- .../google/music-generation-provider.ts | 29 +- .../google/video-generation-provider.ts | 40 ++- .../minimax/music-generation-provider.ts | 17 +- .../minimax/video-generation-provider.ts | 26 +- .../music-generation-providers.live.test.ts | 223 +++++++++++--- .../openai/video-generation-provider.ts | 28 +- extensions/qwen/video-generation-provider.ts | 40 ++- .../runway/video-generation-provider.ts | 23 +- .../together/video-generation-provider.ts | 20 +- .../video-generation-core/src/runtime.test.ts | 8 +- .../video-generation-core/src/runtime.ts | 14 +- .../video-generation-providers.live.test.ts | 278 +++++++++++++++--- extensions/vydra/video-generation-provider.ts | 14 +- extensions/xai/video-generation-provider.ts | 28 +- .../tools/music-generate-tool.actions.ts | 56 ++-- src/agents/tools/music-generate-tool.test.ts | 26 +- src/agents/tools/music-generate-tool.ts | 18 +- .../tools/video-generate-tool.actions.ts | 66 +++-- src/agents/tools/video-generate-tool.test.ts | 17 +- src/agents/tools/video-generate-tool.ts | 6 + src/music-generation/capabilities.test.ts | 77 +++++ src/music-generation/capabilities.ts | 47 +++ src/music-generation/live-test-helpers.ts | 80 +++++ .../provider-capabilities.contract.test.ts | 33 +++ src/music-generation/runtime.test.ts | 86 +++++- src/music-generation/runtime.ts | 18 +- src/music-generation/types.ts | 16 +- src/plugin-sdk/music-generation.ts | 3 + .../contracts/speech-vitest-registry.ts | 114 ++++--- src/video-generation/capabilities.test.ts | 24 +- src/video-generation/capabilities.ts | 57 +--- .../live-test-helpers.test.ts | 127 ++++++++ src/video-generation/live-test-helpers.ts | 21 ++ .../provider-capabilities.contract.test.ts | 44 +++ src/video-generation/runtime.test.ts | 12 +- 46 files changed, 1623 insertions(+), 393 deletions(-) create mode 100644 src/music-generation/capabilities.test.ts create mode 100644 src/music-generation/capabilities.ts create mode 100644 src/music-generation/provider-capabilities.contract.test.ts create mode 100644 src/video-generation/live-test-helpers.test.ts create mode 100644 src/video-generation/provider-capabilities.contract.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 470d5c79c7d..ef13bfcebf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai ### Changes - Plugins/webhooks: add a bundled webhook ingress plugin so external automation can create and drive bound TaskFlows through per-route shared-secret endpoints. (#61892) Thanks @mbelinky. +- Tools/media: document per-provider music and video generation capabilities, and add shared live video-to-video sweep coverage for providers that support local reference clips. ### Fixes diff --git a/docs/help/testing.md b/docs/help/testing.md index 840b6e25a37..e3c66dda905 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -475,10 +475,45 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local - Exercises the shared bundled music-generation provider path - Currently covers Google and MiniMax - Loads provider env vars from your login shell (`~/.profile`) before probing + - Uses live/env API keys ahead of stored auth profiles by default, so stale test keys in `auth-profiles.json` do not mask real shell credentials - Skips providers with no usable auth/profile/model + - Runs both declared runtime modes when available: + - `generate` with prompt-only input + - `edit` when the provider declares `capabilities.edit.enabled` + - Current shared-lane coverage: + - `google`: `generate`, `edit` + - `minimax`: `generate` + - `comfy`: separate Comfy live file, not this shared sweep - Optional narrowing: - `OPENCLAW_LIVE_MUSIC_GENERATION_PROVIDERS="google,minimax"` - `OPENCLAW_LIVE_MUSIC_GENERATION_MODELS="google/lyria-3-clip-preview,minimax/music-2.5+"` +- Optional auth behavior: + - `OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS=1` to force profile-store auth and ignore env-only overrides + +## Video generation live + +- Test: `extensions/video-generation-providers.live.test.ts` +- Enable: `OPENCLAW_LIVE_TEST=1 pnpm test:live -- extensions/video-generation-providers.live.test.ts` +- Scope: + - Exercises the shared bundled video-generation provider path + - Loads provider env vars from your login shell (`~/.profile`) before probing + - Uses live/env API keys ahead of stored auth profiles by default, so stale test keys in `auth-profiles.json` do not mask real shell credentials + - Skips providers with no usable auth/profile/model + - Runs both declared runtime modes when available: + - `generate` with prompt-only input + - `imageToVideo` when the provider declares `capabilities.imageToVideo.enabled` + - `videoToVideo` when the provider declares `capabilities.videoToVideo.enabled` and the selected provider/model accepts buffer-backed local video input in the shared sweep + - Current `videoToVideo` live coverage: + - `google` + - `openai` + - `runway` only when the selected model is `runway/gen4_aleph` + - Current declared-but-skipped `videoToVideo` providers in the shared sweep: + - `alibaba`, `qwen`, `xai` because those paths currently require remote `http(s)` / MP4 reference URLs +- Optional narrowing: + - `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="google,openai,runway"` + - `OPENCLAW_LIVE_VIDEO_GENERATION_MODELS="google/veo-3.1-fast-generate-preview,openai/sora-2,runway/gen4_aleph"` +- Optional auth behavior: + - `OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS=1` to force profile-store auth and ignore env-only overrides ## Docker runners (optional "works in Linux" checks) diff --git a/docs/plugins/sdk-provider-plugins.md b/docs/plugins/sdk-provider-plugins.md index bf9e38bd775..76c325fa2e2 100644 --- a/docs/plugins/sdk-provider-plugins.md +++ b/docs/plugins/sdk-provider-plugins.md @@ -643,10 +643,15 @@ API key auth, and dynamic model resolution. [Internals: Capability Ownership](/plugins/architecture#capability-ownership-model). For video generation, prefer the mode-aware capability shape shown above: - `generate`, `imageToVideo`, and `videoToVideo`. The older flat fields such - as `maxInputImages`, `maxInputVideos`, and `maxDurationSeconds` still work - as aggregate fallback caps, but they cannot describe per-mode limits or - disabled transform modes as cleanly. + `generate`, `imageToVideo`, and `videoToVideo`. Flat aggregate fields such + as `maxInputImages`, `maxInputVideos`, and `maxDurationSeconds` are not + enough to advertise transform-mode support or disabled modes cleanly. + + Music-generation providers should follow the same pattern: + `generate` for prompt-only generation and `edit` for reference-image-based + generation. Flat aggregate fields such as `maxInputImages`, + `supportsLyrics`, and `supportsFormat` are not enough to advertise edit + support; explicit `generate` / `edit` blocks are the expected contract. diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md index 31d3bf9692d..7452521491b 100644 --- a/docs/tools/music-generation.md +++ b/docs/tools/music-generation.md @@ -85,6 +85,17 @@ Example: | Google | `lyria-3-clip-preview` | Up to 10 images | `lyrics`, `instrumental`, `format` | `GEMINI_API_KEY`, `GOOGLE_API_KEY` | | MiniMax | `music-2.5+` | None | `lyrics`, `instrumental`, `durationSeconds`, `format=mp3` | `MINIMAX_API_KEY` | +### Declared capability matrix + +This is the explicit mode contract used by `music_generate`, contract tests, +and the shared live sweep. + +| Provider | `generate` | `edit` | Edit limit | Shared live lanes | +| -------- | ---------- | ------ | ---------- | ------------------------------------------------------------------------- | +| ComfyUI | Yes | Yes | 1 image | Not in the shared sweep; covered by `extensions/comfy/comfy.live.test.ts` | +| Google | Yes | Yes | 10 images | `generate`, `edit` | +| MiniMax | Yes | No | None | `generate` | + Use `action: "list"` to inspect available shared providers and models at runtime: @@ -174,6 +185,36 @@ error includes details from each attempt. - ComfyUI support is workflow-driven and depends on the configured graph plus node mapping for prompt/output fields. +## Provider capability modes + +The shared music-generation contract now supports explicit mode declarations: + +- `generate` for prompt-only generation +- `edit` when the request includes one or more reference images + +New provider implementations should prefer explicit mode blocks: + +```typescript +capabilities: { + generate: { + maxTracks: 1, + supportsLyrics: true, + supportsFormat: true, + }, + edit: { + enabled: true, + maxTracks: 1, + maxInputImages: 1, + supportsFormat: true, + }, +} +``` + +Legacy flat fields such as `maxInputImages`, `supportsLyrics`, and +`supportsFormat` are not enough to advertise edit support. Providers should +declare `generate` and `edit` explicitly so live tests, contract tests, and +the shared `music_generate` tool can validate mode support deterministically. + ## Choosing the right path - Use the shared provider-backed path when you want model selection, provider failover, and the built-in async task/status flow. @@ -188,6 +229,16 @@ Opt-in live coverage for the shared bundled providers: OPENCLAW_LIVE_TEST=1 pnpm test:live -- extensions/music-generation-providers.live.test.ts ``` +This live file loads missing provider env vars from `~/.profile`, prefers +live/env API keys ahead of stored auth profiles by default, and runs both +`generate` and declared `edit` coverage when the provider enables edit mode. + +Today that means: + +- `google`: `generate` plus `edit` +- `minimax`: `generate` only +- `comfy`: separate Comfy live coverage, not the shared provider sweep + Opt-in live coverage for the bundled ComfyUI music path: ```bash diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index 04aafead8cc..82cac135bc1 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -79,6 +79,26 @@ Some providers accept additional or alternate API key env vars. See individual [ Run `video_generate action=list` to inspect available providers, models, and runtime modes at runtime. +### Declared capability matrix + +This is the explicit mode contract used by `video_generate`, contract tests, +and the shared live sweep. + +| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today | +| -------- | ---------- | -------------- | -------------- | ---------------------------------------------------------------------------------------------------------- | +| Alibaba | Yes | Yes | Yes | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs | +| BytePlus | Yes | Yes | No | `generate`, `imageToVideo` | +| ComfyUI | Yes | Yes | No | Not in the shared sweep; workflow-specific coverage lives with Comfy tests | +| fal | Yes | Yes | No | `generate`, `imageToVideo` | +| Google | Yes | Yes | Yes | `generate`, `imageToVideo`, `videoToVideo` | +| MiniMax | Yes | Yes | No | `generate`, `imageToVideo` | +| OpenAI | Yes | Yes | Yes | `generate`, `imageToVideo`, `videoToVideo` | +| Qwen | Yes | Yes | Yes | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs | +| Runway | Yes | Yes | Yes | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` | +| Together | Yes | Yes | No | `generate`, `imageToVideo` | +| Vydra | Yes | Yes | No | `generate`, `imageToVideo` | +| xAI | Yes | Yes | Yes | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL | + ## Tool parameters ### Required @@ -201,9 +221,34 @@ capabilities: { } ``` -Legacy flat fields such as `maxInputImages` and `maxInputVideos` still work as -backward-compatible aggregate caps, but they cannot express per-mode limits as -precisely. +Flat aggregate fields such as `maxInputImages` and `maxInputVideos` are not +enough to advertise transform-mode support. Providers should declare +`generate`, `imageToVideo`, and `videoToVideo` explicitly so live tests, +contract tests, and the shared `video_generate` tool can validate mode support +deterministically. + +## Live tests + +Opt-in live coverage for the shared bundled providers: + +```bash +OPENCLAW_LIVE_TEST=1 pnpm test:live -- extensions/video-generation-providers.live.test.ts +``` + +This live file loads missing provider env vars from `~/.profile`, prefers +live/env API keys ahead of stored auth profiles by default, and runs the +declared modes it can exercise safely with local media: + +- `generate` for every provider in the sweep +- `imageToVideo` when `capabilities.imageToVideo.enabled` +- `videoToVideo` when `capabilities.videoToVideo.enabled` and the provider/model + accepts buffer-backed local video input in the shared sweep + +Today the shared `videoToVideo` live lane covers: + +- `google` +- `openai` +- `runway` only when you select `runway/gen4_aleph` ## Configuration diff --git a/extensions/alibaba/video-generation-provider.ts b/extensions/alibaba/video-generation-provider.ts index 9f3cff78681..b53648b2b26 100644 --- a/extensions/alibaba/video-generation-provider.ts +++ b/extensions/alibaba/video-generation-provider.ts @@ -198,15 +198,37 @@ export function buildAlibabaVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 4, - maxDurationSeconds: 10, - supportsSize: true, - supportsAspectRatio: true, - supportsResolution: true, - supportsAudio: true, - supportsWatermark: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 4, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, }, async generateVideo(req): Promise { const fetchFn = fetch; diff --git a/extensions/byteplus/video-generation-provider.ts b/extensions/byteplus/video-generation-provider.ts index 25ba40b7525..c0b84a4aaa3 100644 --- a/extensions/byteplus/video-generation-provider.ts +++ b/extensions/byteplus/video-generation-provider.ts @@ -135,14 +135,27 @@ export function buildBytePlusVideoGenerationProvider(): VideoGenerationProvider agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, - maxDurationSeconds: 12, - supportsAspectRatio: true, - supportsResolution: true, - supportsAudio: true, - supportsWatermark: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 12, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 12, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputVideos?.length ?? 0) > 0) { diff --git a/extensions/comfy/music-generation-provider.test.ts b/extensions/comfy/music-generation-provider.test.ts index 7fbe8b77051..225a9887a23 100644 --- a/extensions/comfy/music-generation-provider.test.ts +++ b/extensions/comfy/music-generation-provider.test.ts @@ -12,7 +12,7 @@ describe("comfy music-generation provider", () => { expect(provider.defaultModel).toBe("workflow"); expect(provider.models).toEqual(["workflow"]); - expect(provider.capabilities.maxInputImages).toBe(1); + expect(provider.capabilities.edit?.maxInputImages).toBe(1); }); it("runs a music workflow and returns audio outputs", async () => { diff --git a/extensions/comfy/music-generation-provider.ts b/extensions/comfy/music-generation-provider.ts index 1ecc11cde10..a582a90ed02 100644 --- a/extensions/comfy/music-generation-provider.ts +++ b/extensions/comfy/music-generation-provider.ts @@ -50,7 +50,11 @@ export function buildComfyMusicGenerationProvider(): MusicGenerationProvider { capability: "music", }), capabilities: { - maxInputImages: COMFY_MAX_INPUT_IMAGES, + generate: {}, + edit: { + enabled: true, + maxInputImages: COMFY_MAX_INPUT_IMAGES, + }, }, async generateMusic(req) { if ((req.inputImages?.length ?? 0) > COMFY_MAX_INPUT_IMAGES) { diff --git a/extensions/comfy/video-generation-provider.ts b/extensions/comfy/video-generation-provider.ts index 81fff490051..bb7465d2622 100644 --- a/extensions/comfy/video-generation-provider.ts +++ b/extensions/comfy/video-generation-provider.ts @@ -39,14 +39,27 @@ export function buildComfyVideoGenerationProvider(): VideoGenerationProvider { capability: "video", }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, - supportsSize: false, - supportsAspectRatio: false, - supportsResolution: false, - supportsAudio: false, - supportsWatermark: false, + generate: { + maxVideos: 1, + supportsSize: false, + supportsAspectRatio: false, + supportsResolution: false, + supportsAudio: false, + supportsWatermark: false, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + supportsSize: false, + supportsAspectRatio: false, + supportsResolution: false, + supportsAudio: false, + supportsWatermark: false, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputImages?.length ?? 0) > 1) { diff --git a/extensions/fal/video-generation-provider.ts b/extensions/fal/video-generation-provider.ts index d6aaf70c06d..52feb3e25eb 100644 --- a/extensions/fal/video-generation-provider.ts +++ b/extensions/fal/video-generation-provider.ts @@ -251,12 +251,23 @@ export function buildFalVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, - supportsAspectRatio: true, - supportsResolution: true, - supportsSize: true, + generate: { + maxVideos: 1, + supportsAspectRatio: true, + supportsResolution: true, + supportsSize: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + supportsAspectRatio: true, + supportsResolution: true, + supportsSize: true, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputVideos?.length ?? 0) > 0) { diff --git a/extensions/google/music-generation-provider.ts b/extensions/google/music-generation-provider.ts index ce51a1b19e7..da64156d034 100644 --- a/extensions/google/music-generation-provider.ts +++ b/extensions/google/music-generation-provider.ts @@ -102,14 +102,27 @@ export function buildGoogleMusicGenerationProvider(): MusicGenerationProvider { agentDir, }), capabilities: { - maxTracks: 1, - maxInputImages: GOOGLE_MAX_INPUT_IMAGES, - supportsLyrics: true, - supportsInstrumental: true, - supportsFormat: true, - supportedFormatsByModel: { - [DEFAULT_GOOGLE_MUSIC_MODEL]: ["mp3"], - [GOOGLE_PRO_MUSIC_MODEL]: ["mp3", "wav"], + generate: { + maxTracks: 1, + supportsLyrics: true, + supportsInstrumental: true, + supportsFormat: true, + supportedFormatsByModel: { + [DEFAULT_GOOGLE_MUSIC_MODEL]: ["mp3"], + [GOOGLE_PRO_MUSIC_MODEL]: ["mp3", "wav"], + }, + }, + edit: { + enabled: true, + maxTracks: 1, + maxInputImages: GOOGLE_MAX_INPUT_IMAGES, + supportsLyrics: true, + supportsInstrumental: true, + supportsFormat: true, + supportedFormatsByModel: { + [DEFAULT_GOOGLE_MUSIC_MODEL]: ["mp3"], + [GOOGLE_PRO_MUSIC_MODEL]: ["mp3", "wav"], + }, }, }, async generateMusic(req) { diff --git a/extensions/google/video-generation-provider.ts b/extensions/google/video-generation-provider.ts index 32833b9938f..4b04ce99aeb 100644 --- a/extensions/google/video-generation-provider.ts +++ b/extensions/google/video-generation-provider.ts @@ -158,15 +158,37 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 1, - maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, - supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, - supportsAspectRatio: true, - supportsResolution: true, - supportsSize: true, - supportsAudio: true, + generate: { + maxVideos: 1, + maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, + supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + supportsAspectRatio: true, + supportsResolution: true, + supportsSize: true, + supportsAudio: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, + supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + supportsAspectRatio: true, + supportsResolution: true, + supportsSize: true, + supportsAudio: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 1, + maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, + supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + supportsAspectRatio: true, + supportsResolution: true, + supportsSize: true, + supportsAudio: true, + }, }, async generateVideo(req) { if ((req.inputImages?.length ?? 0) > 1) { diff --git a/extensions/minimax/music-generation-provider.ts b/extensions/minimax/music-generation-provider.ts index ee5ac8d2396..f63556a1f58 100644 --- a/extensions/minimax/music-generation-provider.ts +++ b/extensions/minimax/music-generation-provider.ts @@ -118,12 +118,17 @@ export function buildMinimaxMusicGenerationProvider(): MusicGenerationProvider { agentDir, }), capabilities: { - maxTracks: 1, - supportsLyrics: true, - supportsInstrumental: true, - supportsDuration: true, - supportsFormat: true, - supportedFormats: ["mp3"], + generate: { + maxTracks: 1, + supportsLyrics: true, + supportsInstrumental: true, + supportsDuration: true, + supportsFormat: true, + supportedFormats: ["mp3"], + }, + edit: { + enabled: false, + }, }, async generateMusic(req) { if ((req.inputImages?.length ?? 0) > 0) { diff --git a/extensions/minimax/video-generation-provider.ts b/extensions/minimax/video-generation-provider.ts index 30cf9099052..88058d2b215 100644 --- a/extensions/minimax/video-generation-provider.ts +++ b/extensions/minimax/video-generation-provider.ts @@ -228,13 +228,25 @@ export function buildMinimaxVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, - maxDurationSeconds: 10, - supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS, - supportsResolution: true, - supportsWatermark: false, + generate: { + maxVideos: 1, + maxDurationSeconds: 10, + supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS, + supportsResolution: true, + supportsWatermark: false, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 10, + supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS, + supportsResolution: true, + supportsWatermark: false, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputVideos?.length ?? 0) > 0) { diff --git a/extensions/music-generation-providers.live.test.ts b/extensions/music-generation-providers.live.test.ts index 52cd3212c84..ccd414bed77 100644 --- a/extensions/music-generation-providers.live.test.ts +++ b/extensions/music-generation-providers.live.test.ts @@ -1,14 +1,21 @@ import { describe, expect, it } from "vitest"; +import { resolveOpenClawAgentDir } from "../src/agents/agent-paths.js"; import { collectProviderApiKeys } from "../src/agents/live-auth-keys.js"; -import { isLiveTestEnabled } from "../src/agents/live-test-helpers.js"; -import type { OpenClawConfig } from "../src/config/config.js"; -import { DEFAULT_LIVE_MUSIC_MODELS } from "../src/music-generation/live-test-helpers.js"; -import { parseMusicGenerationModelRef } from "../src/music-generation/model-ref.js"; -import { getProviderEnvVars } from "../src/secrets/provider-env-vars.js"; +import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "../src/agents/live-test-helpers.js"; +import { resolveApiKeyForProvider } from "../src/agents/model-auth.js"; +import { loadConfig, type OpenClawConfig } from "../src/config/config.js"; +import { isTruthyEnvValue } from "../src/infra/env.js"; +import { getShellEnvAppliedKeys, loadShellEnvFallback } from "../src/infra/shell-env.js"; +import { encodePngRgba, fillPixel } from "../src/media/png-encode.js"; import { + DEFAULT_LIVE_MUSIC_MODELS, parseCsvFilter, parseProviderModelMap, -} from "../src/video-generation/live-test-helpers.js"; + redactLiveApiKey, + resolveConfiguredLiveMusicModels, + resolveLiveMusicAuthStore, +} from "../src/music-generation/live-test-helpers.js"; +import { getProviderEnvVars } from "../src/secrets/provider-env-vars.js"; import { registerProviderPlugin, requireRegisteredProvider, @@ -17,6 +24,9 @@ import googlePlugin from "./google/index.js"; import minimaxPlugin from "./minimax/index.js"; const LIVE = isLiveTestEnabled(); +const REQUIRE_PROFILE_KEYS = + isLiveProfileKeyModeEnabled() || isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS); +const describeLive = LIVE ? describe : describe.skip; const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_MUSIC_GENERATION_PROVIDERS); const envModelMap = parseProviderModelMap(process.env.OPENCLAW_LIVE_MUSIC_GENERATION_MODELS); @@ -44,29 +54,107 @@ const CASES: LiveProviderCase[] = [ .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true)) .toSorted((left, right) => left.providerId.localeCompare(right.providerId)); -function asConfig(value: unknown): OpenClawConfig { - return value as OpenClawConfig; +function withPluginsEnabled(cfg: OpenClawConfig): OpenClawConfig { + return { + ...cfg, + plugins: { + ...cfg.plugins, + enabled: true, + }, + }; +} + +function createEditReferencePng(): Buffer { + const width = 192; + const height = 192; + const buf = Buffer.alloc(width * height * 4, 255); + + for (let y = 0; y < height; y += 1) { + for (let x = 0; x < width; x += 1) { + fillPixel(buf, x, y, width, 250, 246, 240, 255); + } + } + + for (let y = 24; y < 168; y += 1) { + for (let x = 24; x < 168; x += 1) { + fillPixel(buf, x, y, width, 255, 143, 77, 255); + } + } + + for (let y = 48; y < 144; y += 1) { + for (let x = 48; x < 144; x += 1) { + fillPixel(buf, x, y, width, 34, 40, 49, 255); + } + } + + return encodePngRgba(buf, width, height); } function resolveProviderModelForLiveTest(providerId: string, modelRef: string): string { - const parsed = parseMusicGenerationModelRef(modelRef); - if (parsed && parsed.provider === providerId) { - return parsed.model; + const slash = modelRef.indexOf("/"); + if (slash <= 0 || slash === modelRef.length - 1) { + return modelRef; } - return modelRef; + return modelRef.slice(0, slash) === providerId ? modelRef.slice(slash + 1) : modelRef; } -describe.skipIf(!LIVE)("music generation provider live", () => { - for (const testCase of CASES) { - const modelRef = - envModelMap.get(testCase.providerId) ?? DEFAULT_LIVE_MUSIC_MODELS[testCase.providerId]; - const hasAuth = collectProviderApiKeys(testCase.providerId).length > 0; - const expectedEnvVars = getProviderEnvVars(testCase.providerId).join(", "); +function maybeLoadShellEnvForMusicProviders(providerIds: string[]): void { + const expectedKeys = [ + ...new Set(providerIds.flatMap((providerId) => getProviderEnvVars(providerId))), + ]; + if (expectedKeys.length === 0) { + return; + } + loadShellEnvFallback({ + enabled: true, + env: process.env, + expectedKeys, + logger: { warn: (message: string) => console.warn(message) }, + }); +} + +describeLive("music generation provider live", () => { + it( + "covers generate plus declared edit paths with shell/profile auth", + async () => { + const cfg = withPluginsEnabled(loadConfig()); + const configuredModels = resolveConfiguredLiveMusicModels(cfg); + const agentDir = resolveOpenClawAgentDir(); + const attempted: string[] = []; + const skipped: string[] = []; + const failures: string[] = []; + + maybeLoadShellEnvForMusicProviders(CASES.map((entry) => entry.providerId)); + + for (const testCase of CASES) { + const modelRef = + envModelMap.get(testCase.providerId) ?? + configuredModels.get(testCase.providerId) ?? + DEFAULT_LIVE_MUSIC_MODELS[testCase.providerId]; + if (!modelRef) { + skipped.push(`${testCase.providerId}: no model configured`); + continue; + } + + const hasLiveKeys = collectProviderApiKeys(testCase.providerId).length > 0; + const authStore = resolveLiveMusicAuthStore({ + requireProfileKeys: REQUIRE_PROFILE_KEYS, + hasLiveKeys, + }); + let authLabel = "unresolved"; + try { + const auth = await resolveApiKeyForProvider({ + provider: testCase.providerId, + cfg, + agentDir, + store: authStore, + }); + authLabel = `${auth.source} ${redactLiveApiKey(auth.apiKey)}`; + } catch { + skipped.push(`${testCase.providerId}: no usable auth`); + continue; + } - const liveIt = hasAuth && modelRef ? it : it.skip; - liveIt( - `generates a short track via ${testCase.providerId}`, - async () => { const { musicProviders } = await registerProviderPlugin({ plugin: testCase.plugin, id: testCase.pluginId, @@ -78,27 +166,78 @@ describe.skipIf(!LIVE)("music generation provider live", () => { "music provider", ); const providerModel = resolveProviderModelForLiveTest(testCase.providerId, modelRef); + const generateCaps = provider.capabilities.generate; - const result = await provider.generateMusic({ - provider: testCase.providerId, - model: providerModel, - prompt: "Upbeat instrumental synthwave with warm neon pads and a simple driving beat.", - cfg: asConfig({ plugins: { enabled: true } }), - agentDir: "/tmp/openclaw-live-music", - instrumental: true, - ...(provider.capabilities.supportsDuration ? { durationSeconds: 12 } : {}), - ...(provider.capabilities.supportsFormat ? { format: "mp3" as const } : {}), - }); + try { + const result = await provider.generateMusic({ + provider: testCase.providerId, + model: providerModel, + prompt: "Upbeat instrumental synthwave with warm neon pads and a simple driving beat.", + cfg, + agentDir, + authStore, + ...(generateCaps?.supportsDuration ? { durationSeconds: 12 } : {}), + ...(generateCaps?.supportsFormat ? { format: "mp3" as const } : {}), + ...(generateCaps?.supportsInstrumental ? { instrumental: true } : {}), + }); - expect(result.tracks.length).toBeGreaterThan(0); - expect(result.tracks[0]?.mimeType.startsWith("audio/")).toBe(true); - expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(1024); - }, - 6 * 60_000, - ); + expect(result.tracks.length).toBeGreaterThan(0); + expect(result.tracks[0]?.mimeType.startsWith("audio/")).toBe(true); + expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(1024); + attempted.push(`${testCase.providerId}:generate:${providerModel} (${authLabel})`); + } catch (error) { + failures.push( + `${testCase.providerId}:generate (${authLabel}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + continue; + } - if (!hasAuth || !modelRef) { - it.skip(`skips ${testCase.providerId} without live auth/model (${expectedEnvVars || "no env vars"})`, () => {}); - } - } + if (!provider.capabilities.edit?.enabled) { + continue; + } + + try { + const result = await provider.generateMusic({ + provider: testCase.providerId, + model: providerModel, + prompt: "Turn the reference cover art into a short dramatic trailer sting.", + cfg, + agentDir, + authStore, + inputImages: [ + { + buffer: createEditReferencePng(), + mimeType: "image/png", + fileName: "reference.png", + }, + ], + }); + + expect(result.tracks.length).toBeGreaterThan(0); + expect(result.tracks[0]?.mimeType.startsWith("audio/")).toBe(true); + expect(result.tracks[0]?.buffer.byteLength).toBeGreaterThan(1024); + attempted.push(`${testCase.providerId}:edit:${providerModel} (${authLabel})`); + } catch (error) { + failures.push( + `${testCase.providerId}:edit (${authLabel}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + } + + console.log( + `[live:music-generation] attempted=${attempted.join(", ") || "none"} skipped=${skipped.join(", ") || "none"} failures=${failures.join(" | ") || "none"} shellEnv=${getShellEnvAppliedKeys().join(", ") || "none"}`, + ); + + if (attempted.length === 0) { + console.warn("[live:music-generation] no provider had usable auth; skipping assertions"); + return; + } + expect(failures).toEqual([]); + }, + 10 * 60_000, + ); }); diff --git a/extensions/openai/video-generation-provider.ts b/extensions/openai/video-generation-provider.ts index 78bea502e4e..0ff392f998f 100644 --- a/extensions/openai/video-generation-provider.ts +++ b/extensions/openai/video-generation-provider.ts @@ -190,12 +190,28 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 1, - maxDurationSeconds: 12, - supportedDurationSeconds: OPENAI_VIDEO_SECONDS, - supportsSize: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 12, + supportedDurationSeconds: OPENAI_VIDEO_SECONDS, + supportsSize: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 12, + supportedDurationSeconds: OPENAI_VIDEO_SECONDS, + supportsSize: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 1, + maxDurationSeconds: 12, + supportedDurationSeconds: OPENAI_VIDEO_SECONDS, + supportsSize: true, + }, }, async generateVideo(req) { const auth = await resolveApiKeyForProvider({ diff --git a/extensions/qwen/video-generation-provider.ts b/extensions/qwen/video-generation-provider.ts index 53ab5713802..83ccec5b516 100644 --- a/extensions/qwen/video-generation-provider.ts +++ b/extensions/qwen/video-generation-provider.ts @@ -226,15 +226,37 @@ export function buildQwenVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 4, - maxDurationSeconds: 10, - supportsSize: true, - supportsAspectRatio: true, - supportsResolution: true, - supportsAudio: true, - supportsWatermark: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 4, + maxDurationSeconds: 10, + supportsSize: true, + supportsAspectRatio: true, + supportsResolution: true, + supportsAudio: true, + supportsWatermark: true, + }, }, async generateVideo(req): Promise { const fetchFn = fetch; diff --git a/extensions/runway/video-generation-provider.ts b/extensions/runway/video-generation-provider.ts index a8ffb59d8f0..d1062ef80d1 100644 --- a/extensions/runway/video-generation-provider.ts +++ b/extensions/runway/video-generation-provider.ts @@ -261,11 +261,24 @@ export function buildRunwayVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 1, - maxDurationSeconds: MAX_DURATION_SECONDS, - supportsAspectRatio: true, + generate: { + maxVideos: 1, + maxDurationSeconds: MAX_DURATION_SECONDS, + supportsAspectRatio: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: MAX_DURATION_SECONDS, + supportsAspectRatio: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 1, + supportsAspectRatio: true, + }, }, async generateVideo(req): Promise { const auth = await resolveApiKeyForProvider({ diff --git a/extensions/together/video-generation-provider.ts b/extensions/together/video-generation-provider.ts index 3b5f9d317ac..9c0380b29ab 100644 --- a/extensions/together/video-generation-provider.ts +++ b/extensions/together/video-generation-provider.ts @@ -126,11 +126,21 @@ export function buildTogetherVideoGenerationProvider(): VideoGenerationProvider agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, - maxDurationSeconds: 12, - supportsSize: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 12, + supportsSize: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 12, + supportsSize: true, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputVideos?.length ?? 0) > 0) { diff --git a/extensions/video-generation-core/src/runtime.test.ts b/extensions/video-generation-core/src/runtime.test.ts index cdbfc1fb1a9..145579170f0 100644 --- a/extensions/video-generation-core/src/runtime.test.ts +++ b/extensions/video-generation-core/src/runtime.test.ts @@ -126,7 +126,9 @@ describe("video-generation runtime", () => { defaultModel: "vid-v1", models: ["vid-v1"], capabilities: { - supportsAudio: true, + generate: { + supportsAudio: true, + }, }, generateVideo: async () => ({ videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }], @@ -177,7 +179,9 @@ describe("video-generation runtime", () => { mocks.getVideoGenerationProvider.mockReturnValue({ id: "openai", capabilities: { - supportsSize: true, + generate: { + supportsSize: true, + }, }, generateVideo: async (req) => { seenRequest = { diff --git a/extensions/video-generation-core/src/runtime.ts b/extensions/video-generation-core/src/runtime.ts index 07760ab32c1..a9af75d5b9c 100644 --- a/extensions/video-generation-core/src/runtime.ts +++ b/extensions/video-generation-core/src/runtime.ts @@ -53,30 +53,24 @@ function resolveVideoGenerationModeCapabilities(params: { if (mode === "generate") { return { mode, - capabilities: capabilities.generate ?? capabilities, + capabilities: capabilities.generate, }; } if (mode === "imageToVideo") { return { mode, - capabilities: capabilities.imageToVideo ?? { - ...capabilities, - enabled: (capabilities.maxInputImages ?? 0) > 0, - }, + capabilities: capabilities.imageToVideo, }; } if (mode === "videoToVideo") { return { mode, - capabilities: capabilities.videoToVideo ?? { - ...capabilities, - enabled: (capabilities.maxInputVideos ?? 0) > 0, - }, + capabilities: capabilities.videoToVideo, }; } return { mode, - capabilities, + capabilities: undefined, }; } diff --git a/extensions/video-generation-providers.live.test.ts b/extensions/video-generation-providers.live.test.ts index cbe171102f7..d1829d38d46 100644 --- a/extensions/video-generation-providers.live.test.ts +++ b/extensions/video-generation-providers.live.test.ts @@ -1,12 +1,21 @@ import { describe, expect, it } from "vitest"; +import { resolveOpenClawAgentDir } from "../src/agents/agent-paths.js"; import { collectProviderApiKeys } from "../src/agents/live-auth-keys.js"; -import { isLiveTestEnabled } from "../src/agents/live-test-helpers.js"; -import type { OpenClawConfig } from "../src/config/config.js"; +import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "../src/agents/live-test-helpers.js"; +import { resolveApiKeyForProvider } from "../src/agents/model-auth.js"; +import { loadConfig, type OpenClawConfig } from "../src/config/config.js"; +import { isTruthyEnvValue } from "../src/infra/env.js"; +import { getShellEnvAppliedKeys, loadShellEnvFallback } from "../src/infra/shell-env.js"; +import { encodePngRgba, fillPixel } from "../src/media/png-encode.js"; import { getProviderEnvVars } from "../src/secrets/provider-env-vars.js"; import { + canRunBufferBackedVideoToVideoLiveLane, DEFAULT_LIVE_VIDEO_MODELS, parseCsvFilter, parseProviderModelMap, + redactLiveApiKey, + resolveConfiguredLiveVideoModels, + resolveLiveVideoAuthStore, } from "../src/video-generation/live-test-helpers.js"; import { parseVideoGenerationModelRef } from "../src/video-generation/model-ref.js"; import { @@ -26,6 +35,9 @@ import vydraPlugin from "./vydra/index.js"; import xaiPlugin from "./xai/index.js"; const LIVE = isLiveTestEnabled(); +const REQUIRE_PROFILE_KEYS = + isLiveProfileKeyModeEnabled() || isTruthyEnvValue(process.env.OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS); +const describeLive = LIVE ? describe : describe.skip; const providerFilter = parseCsvFilter(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS); const envModelMap = parseProviderModelMap(process.env.OPENCLAW_LIVE_VIDEO_GENERATION_MODELS); @@ -72,8 +84,40 @@ const CASES: LiveProviderCase[] = [ .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true)) .toSorted((left, right) => left.providerId.localeCompare(right.providerId)); -function asConfig(value: unknown): OpenClawConfig { - return value as OpenClawConfig; +function withPluginsEnabled(cfg: OpenClawConfig): OpenClawConfig { + return { + ...cfg, + plugins: { + ...cfg.plugins, + enabled: true, + }, + }; +} + +function createEditReferencePng(): Buffer { + const width = 192; + const height = 192; + const buf = Buffer.alloc(width * height * 4, 255); + + for (let y = 0; y < height; y += 1) { + for (let x = 0; x < width; x += 1) { + fillPixel(buf, x, y, width, 238, 247, 255, 255); + } + } + + for (let y = 24; y < 168; y += 1) { + for (let x = 24; x < 168; x += 1) { + fillPixel(buf, x, y, width, 76, 154, 255, 255); + } + } + + for (let y = 48; y < 144; y += 1) { + for (let x = 48; x < 144; x += 1) { + fillPixel(buf, x, y, width, 255, 255, 255, 255); + } + } + + return encodePngRgba(buf, width, height); } function resolveProviderModelForLiveTest(providerId: string, modelRef: string): string { @@ -84,17 +128,63 @@ function resolveProviderModelForLiveTest(providerId: string, modelRef: string): return modelRef; } -describe.skipIf(!LIVE)("video generation provider live", () => { - for (const testCase of CASES) { - const modelRef = - envModelMap.get(testCase.providerId) ?? DEFAULT_LIVE_VIDEO_MODELS[testCase.providerId]; - const hasAuth = collectProviderApiKeys(testCase.providerId).length > 0; - const expectedEnvVars = getProviderEnvVars(testCase.providerId).join(", "); +function maybeLoadShellEnvForVideoProviders(providerIds: string[]): void { + const expectedKeys = [ + ...new Set(providerIds.flatMap((providerId) => getProviderEnvVars(providerId))), + ]; + if (expectedKeys.length === 0) { + return; + } + loadShellEnvFallback({ + enabled: true, + env: process.env, + expectedKeys, + logger: { warn: (message: string) => console.warn(message) }, + }); +} + +describeLive("video generation provider live", () => { + it( + "covers declared video-generation modes with shell/profile auth", + async () => { + const cfg = withPluginsEnabled(loadConfig()); + const configuredModels = resolveConfiguredLiveVideoModels(cfg); + const agentDir = resolveOpenClawAgentDir(); + const attempted: string[] = []; + const skipped: string[] = []; + const failures: string[] = []; + + maybeLoadShellEnvForVideoProviders(CASES.map((entry) => entry.providerId)); + + for (const testCase of CASES) { + const modelRef = + envModelMap.get(testCase.providerId) ?? + configuredModels.get(testCase.providerId) ?? + DEFAULT_LIVE_VIDEO_MODELS[testCase.providerId]; + if (!modelRef) { + skipped.push(`${testCase.providerId}: no model configured`); + continue; + } + + const hasLiveKeys = collectProviderApiKeys(testCase.providerId).length > 0; + const authStore = resolveLiveVideoAuthStore({ + requireProfileKeys: REQUIRE_PROFILE_KEYS, + hasLiveKeys, + }); + let authLabel = "unresolved"; + try { + const auth = await resolveApiKeyForProvider({ + provider: testCase.providerId, + cfg, + agentDir, + store: authStore, + }); + authLabel = `${auth.source} ${redactLiveApiKey(auth.apiKey)}`; + } catch { + skipped.push(`${testCase.providerId}: no usable auth`); + continue; + } - const liveIt = hasAuth && modelRef ? it : it.skip; - liveIt( - `generates a short video via ${testCase.providerId}`, - async () => { const { videoProviders } = await registerProviderPlugin({ plugin: testCase.plugin, id: testCase.pluginId, @@ -105,32 +195,144 @@ describe.skipIf(!LIVE)("video generation provider live", () => { testCase.providerId, "video provider", ); - const durationSeconds = Math.min(provider.capabilities.maxDurationSeconds ?? 3, 3); const providerModel = resolveProviderModelForLiveTest(testCase.providerId, modelRef); + const generateCaps = provider.capabilities.generate; + const imageToVideoCaps = provider.capabilities.imageToVideo; + const videoToVideoCaps = provider.capabilities.videoToVideo; + const durationSeconds = Math.min(generateCaps?.maxDurationSeconds ?? 3, 3); + let generatedVideo = null as { + buffer: Buffer; + mimeType: string; + fileName?: string; + } | null; - const result = await provider.generateVideo({ - provider: testCase.providerId, - model: providerModel, - prompt: - "A tiny paper diorama city at sunrise with slow cinematic camera motion and no text.", - cfg: asConfig({ plugins: { enabled: true } }), - agentDir: "/tmp/openclaw-live-video", - durationSeconds, - ...(provider.capabilities.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), - ...(provider.capabilities.supportsResolution ? { resolution: "480P" as const } : {}), - ...(provider.capabilities.supportsAudio ? { audio: false } : {}), - ...(provider.capabilities.supportsWatermark ? { watermark: false } : {}), - }); + try { + const result = await provider.generateVideo({ + provider: testCase.providerId, + model: providerModel, + prompt: + "A tiny paper diorama city at sunrise with slow cinematic camera motion and no text.", + cfg, + agentDir, + authStore, + durationSeconds, + ...(generateCaps?.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), + ...(generateCaps?.supportsResolution ? { resolution: "480P" as const } : {}), + ...(generateCaps?.supportsAudio ? { audio: false } : {}), + ...(generateCaps?.supportsWatermark ? { watermark: false } : {}), + }); - expect(result.videos.length).toBeGreaterThan(0); - expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); - expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); - }, - 8 * 60_000, - ); + expect(result.videos.length).toBeGreaterThan(0); + expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); + expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); + generatedVideo = result.videos[0] ?? null; + attempted.push(`${testCase.providerId}:generate:${providerModel} (${authLabel})`); + } catch (error) { + failures.push( + `${testCase.providerId}:generate (${authLabel}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + continue; + } - if (!hasAuth || !modelRef) { - it.skip(`skips ${testCase.providerId} without live auth/model (${expectedEnvVars || "no env vars"})`, () => {}); - } - } + if (!imageToVideoCaps?.enabled) { + continue; + } + + try { + const result = await provider.generateVideo({ + provider: testCase.providerId, + model: providerModel, + prompt: + "Animate the reference art with subtle parallax motion and drifting camera movement.", + cfg, + agentDir, + authStore, + durationSeconds, + inputImages: [ + { + buffer: createEditReferencePng(), + mimeType: "image/png", + fileName: "reference.png", + }, + ], + ...(imageToVideoCaps.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), + ...(imageToVideoCaps.supportsResolution ? { resolution: "480P" as const } : {}), + ...(imageToVideoCaps.supportsAudio ? { audio: false } : {}), + ...(imageToVideoCaps.supportsWatermark ? { watermark: false } : {}), + }); + + expect(result.videos.length).toBeGreaterThan(0); + expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); + expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); + attempted.push(`${testCase.providerId}:imageToVideo:${providerModel} (${authLabel})`); + } catch (error) { + failures.push( + `${testCase.providerId}:imageToVideo (${authLabel}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + + if (!videoToVideoCaps?.enabled) { + continue; + } + if ( + !canRunBufferBackedVideoToVideoLiveLane({ + providerId: testCase.providerId, + modelRef, + }) + ) { + skipped.push( + `${testCase.providerId}:videoToVideo requires remote URL or model-specific input`, + ); + continue; + } + if (!generatedVideo?.buffer) { + skipped.push(`${testCase.providerId}:videoToVideo missing generated seed video`); + continue; + } + + try { + const result = await provider.generateVideo({ + provider: testCase.providerId, + model: providerModel, + prompt: "Rework the reference clip into a brighter, steadier cinematic continuation.", + cfg, + agentDir, + authStore, + durationSeconds: Math.min(videoToVideoCaps.maxDurationSeconds ?? durationSeconds, 3), + inputVideos: [generatedVideo], + ...(videoToVideoCaps.supportsAspectRatio ? { aspectRatio: "16:9" } : {}), + ...(videoToVideoCaps.supportsResolution ? { resolution: "480P" as const } : {}), + ...(videoToVideoCaps.supportsAudio ? { audio: false } : {}), + ...(videoToVideoCaps.supportsWatermark ? { watermark: false } : {}), + }); + + expect(result.videos.length).toBeGreaterThan(0); + expect(result.videos[0]?.mimeType.startsWith("video/")).toBe(true); + expect(result.videos[0]?.buffer.byteLength).toBeGreaterThan(1024); + attempted.push(`${testCase.providerId}:videoToVideo:${providerModel} (${authLabel})`); + } catch (error) { + failures.push( + `${testCase.providerId}:videoToVideo (${authLabel}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } + } + + console.log( + `[live:video-generation] attempted=${attempted.join(", ") || "none"} skipped=${skipped.join(", ") || "none"} failures=${failures.join(" | ") || "none"} shellEnv=${getShellEnvAppliedKeys().join(", ") || "none"}`, + ); + + if (attempted.length === 0) { + console.warn("[live:video-generation] no provider had usable auth; skipping assertions"); + return; + } + expect(failures).toEqual([]); + }, + 15 * 60_000, + ); }); diff --git a/extensions/vydra/video-generation-provider.ts b/extensions/vydra/video-generation-provider.ts index 4342f5e2ed7..be09b246a03 100644 --- a/extensions/vydra/video-generation-provider.ts +++ b/extensions/vydra/video-generation-provider.ts @@ -63,9 +63,17 @@ export function buildVydraVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 0, + generate: { + maxVideos: 1, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + }, + videoToVideo: { + enabled: false, + }, }, async generateVideo(req) { if ((req.inputVideos?.length ?? 0) > 0) { diff --git a/extensions/xai/video-generation-provider.ts b/extensions/xai/video-generation-provider.ts index 9cfbc435b79..62276dd5183 100644 --- a/extensions/xai/video-generation-provider.ts +++ b/extensions/xai/video-generation-provider.ts @@ -254,12 +254,28 @@ export function buildXaiVideoGenerationProvider(): VideoGenerationProvider { agentDir, }), capabilities: { - maxVideos: 1, - maxInputImages: 1, - maxInputVideos: 1, - maxDurationSeconds: 15, - supportsAspectRatio: true, - supportsResolution: true, + generate: { + maxVideos: 1, + maxDurationSeconds: 15, + supportsAspectRatio: true, + supportsResolution: true, + }, + imageToVideo: { + enabled: true, + maxVideos: 1, + maxInputImages: 1, + maxDurationSeconds: 15, + supportsAspectRatio: true, + supportsResolution: true, + }, + videoToVideo: { + enabled: true, + maxVideos: 1, + maxInputVideos: 1, + maxDurationSeconds: 15, + supportsAspectRatio: true, + supportsResolution: true, + }, }, async generateVideo(req) { const auth = await resolveApiKeyForProvider({ diff --git a/src/agents/tools/music-generate-tool.actions.ts b/src/agents/tools/music-generate-tool.actions.ts index 5c477ef5c00..253035d572c 100644 --- a/src/agents/tools/music-generate-tool.actions.ts +++ b/src/agents/tools/music-generate-tool.actions.ts @@ -1,4 +1,5 @@ import type { OpenClawConfig } from "../../config/config.js"; +import { listSupportedMusicGenerationModes } from "../../music-generation/capabilities.js"; import { listRuntimeMusicGenerationProviders } from "../../music-generation/runtime.js"; import { getProviderEnvVars } from "../../secrets/provider-env-vars.js"; import { @@ -16,6 +17,35 @@ function getMusicGenerationProviderAuthEnvVars(providerId: string): string[] { return getProviderEnvVars(providerId); } +function summarizeMusicGenerationCapabilities( + provider: ReturnType[number], +): string { + const supportedModes = listSupportedMusicGenerationModes(provider); + const generate = provider.capabilities.generate; + const edit = provider.capabilities.edit; + const capabilities = [ + supportedModes.length > 0 ? `modes=${supportedModes.join("/")}` : null, + generate?.maxTracks ? `maxTracks=${generate.maxTracks}` : null, + edit?.maxInputImages ? `maxInputImages=${edit.maxInputImages}` : null, + generate?.maxDurationSeconds ? `maxDurationSeconds=${generate.maxDurationSeconds}` : null, + generate?.supportsLyrics ? "lyrics" : null, + generate?.supportsInstrumental ? "instrumental" : null, + generate?.supportsDuration ? "duration" : null, + generate?.supportsFormat ? "format" : null, + generate?.supportedFormats?.length + ? `supportedFormats=${generate.supportedFormats.join("/")}` + : null, + generate?.supportedFormatsByModel && Object.keys(generate.supportedFormatsByModel).length > 0 + ? `supportedFormatsByModel=${Object.entries(generate.supportedFormatsByModel) + .map(([modelId, formats]) => `${modelId}:${formats.join("/")}`) + .join("; ")}` + : null, + ] + .filter((entry): entry is string => Boolean(entry)) + .join(", "); + return capabilities; +} + export function createMusicGenerateListActionResult( config?: OpenClawConfig, ): MusicGenerateActionResult { @@ -28,30 +58,7 @@ export function createMusicGenerateListActionResult( } const lines = providers.map((provider) => { const authHints = getMusicGenerationProviderAuthEnvVars(provider.id); - const capabilities = [ - provider.capabilities.maxTracks ? `maxTracks=${provider.capabilities.maxTracks}` : null, - provider.capabilities.maxInputImages - ? `maxInputImages=${provider.capabilities.maxInputImages}` - : null, - provider.capabilities.maxDurationSeconds - ? `maxDurationSeconds=${provider.capabilities.maxDurationSeconds}` - : null, - provider.capabilities.supportsLyrics ? "lyrics" : null, - provider.capabilities.supportsInstrumental ? "instrumental" : null, - provider.capabilities.supportsDuration ? "duration" : null, - provider.capabilities.supportsFormat ? "format" : null, - provider.capabilities.supportedFormats?.length - ? `supportedFormats=${provider.capabilities.supportedFormats.join("/")}` - : null, - provider.capabilities.supportedFormatsByModel && - Object.keys(provider.capabilities.supportedFormatsByModel).length > 0 - ? `supportedFormatsByModel=${Object.entries(provider.capabilities.supportedFormatsByModel) - .map(([modelId, formats]) => `${modelId}:${formats.join("/")}`) - .join("; ")}` - : null, - ] - .filter((entry): entry is string => Boolean(entry)) - .join(", "); + const capabilities = summarizeMusicGenerationCapabilities(provider); return [ `${provider.id}: default=${provider.defaultModel ?? "none"}`, provider.models?.length ? `models=${provider.models.join(", ")}` : null, @@ -68,6 +75,7 @@ export function createMusicGenerateListActionResult( id: provider.id, defaultModel: provider.defaultModel, models: provider.models ?? [], + modes: listSupportedMusicGenerationModes(provider), authEnvVars: getMusicGenerationProviderAuthEnvVars(provider.id), capabilities: provider.capabilities, })), diff --git a/src/agents/tools/music-generate-tool.test.ts b/src/agents/tools/music-generate-tool.test.ts index 075b1dd49a6..0f16a15fb85 100644 --- a/src/agents/tools/music-generate-tool.test.ts +++ b/src/agents/tools/music-generate-tool.test.ts @@ -241,12 +241,14 @@ describe("createMusicGenerateTool", () => { defaultModel: "music-2.5+", models: ["music-2.5+"], capabilities: { - maxTracks: 1, - supportsLyrics: true, - supportsInstrumental: true, - supportsDuration: true, - supportsFormat: true, - supportedFormats: ["mp3"], + generate: { + maxTracks: 1, + supportsLyrics: true, + supportsInstrumental: true, + supportsDuration: true, + supportsFormat: true, + supportedFormats: ["mp3"], + }, }, generateMusic: vi.fn(async () => { throw new Error("not used"); @@ -280,11 +282,13 @@ describe("createMusicGenerateTool", () => { defaultModel: "lyria-3-clip-preview", models: ["lyria-3-clip-preview"], capabilities: { - supportsLyrics: true, - supportsInstrumental: true, - supportsFormat: true, - supportedFormatsByModel: { - "lyria-3-clip-preview": ["mp3"], + generate: { + supportsLyrics: true, + supportsInstrumental: true, + supportsFormat: true, + supportedFormatsByModel: { + "lyria-3-clip-preview": ["mp3"], + }, }, }, generateMusic: vi.fn(async () => { diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index bdbc57db5ca..f3da868bbe7 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -4,6 +4,7 @@ import { loadConfig } from "../../config/config.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { saveMediaBuffer } from "../../media/store.js"; import { loadWebMedia } from "../../media/web-media.js"; +import { resolveMusicGenerationModeCapabilities } from "../../music-generation/capabilities.js"; import { parseMusicGenerationModelRef } from "../../music-generation/model-ref.js"; import { generateMusic, @@ -213,15 +214,28 @@ function validateMusicGenerationCapabilities(params: { if (!provider) { return; } - const caps = provider.capabilities; + const { capabilities: caps } = resolveMusicGenerationModeCapabilities({ + provider, + inputImageCount: params.inputImageCount, + }); if (params.inputImageCount > 0) { - const maxInputImages = caps.maxInputImages ?? MAX_INPUT_IMAGES; + if (!caps) { + throw new ToolInputError(`${provider.id} does not support reference-image edit inputs.`); + } + if ("enabled" in caps && !caps.enabled) { + throw new ToolInputError(`${provider.id} does not support reference-image edit inputs.`); + } + const maxInputImages = + ("maxInputImages" in caps ? caps.maxInputImages : undefined) ?? MAX_INPUT_IMAGES; if (params.inputImageCount > maxInputImages) { throw new ToolInputError( `${provider.id} supports at most ${maxInputImages} reference image${maxInputImages === 1 ? "" : "s"}.`, ); } } + if (!caps) { + return; + } if ( typeof params.durationSeconds === "number" && caps.supportsDuration && diff --git a/src/agents/tools/video-generate-tool.actions.ts b/src/agents/tools/video-generate-tool.actions.ts index 5bfe5690caa..71df0057ec8 100644 --- a/src/agents/tools/video-generate-tool.actions.ts +++ b/src/agents/tools/video-generate-tool.actions.ts @@ -17,6 +17,39 @@ function getVideoGenerationProviderAuthEnvVars(providerId: string): string[] { return getProviderEnvVars(providerId); } +function summarizeVideoGenerationCapabilities( + provider: ReturnType[number], +): string { + const supportedModes = listSupportedVideoGenerationModes(provider); + const generate = provider.capabilities.generate; + const imageToVideo = provider.capabilities.imageToVideo; + const videoToVideo = provider.capabilities.videoToVideo; + const capabilities = [ + supportedModes.length > 0 ? `modes=${supportedModes.join("/")}` : null, + generate?.maxVideos ? `maxVideos=${generate.maxVideos}` : null, + imageToVideo?.maxInputImages ? `maxInputImages=${imageToVideo.maxInputImages}` : null, + videoToVideo?.maxInputVideos ? `maxInputVideos=${videoToVideo.maxInputVideos}` : null, + generate?.maxDurationSeconds ? `maxDurationSeconds=${generate.maxDurationSeconds}` : null, + generate?.supportedDurationSeconds?.length + ? `supportedDurationSeconds=${generate.supportedDurationSeconds.join("/")}` + : null, + generate?.supportedDurationSecondsByModel && + Object.keys(generate.supportedDurationSecondsByModel).length > 0 + ? `supportedDurationSecondsByModel=${Object.entries(generate.supportedDurationSecondsByModel) + .map(([modelId, durations]) => `${modelId}:${durations.join("/")}`) + .join("; ")}` + : null, + generate?.supportsResolution ? "resolution" : null, + generate?.supportsAspectRatio ? "aspectRatio" : null, + generate?.supportsSize ? "size" : null, + generate?.supportsAudio ? "audio" : null, + generate?.supportsWatermark ? "watermark" : null, + ] + .filter((entry): entry is string => Boolean(entry)) + .join(", "); + return capabilities; +} + export function createVideoGenerateListActionResult( config?: OpenClawConfig, ): VideoGenerateActionResult { @@ -29,38 +62,7 @@ export function createVideoGenerateListActionResult( } const lines = providers.map((provider) => { const authHints = getVideoGenerationProviderAuthEnvVars(provider.id); - const supportedModes = listSupportedVideoGenerationModes(provider); - const capabilities = [ - supportedModes.length > 0 ? `modes=${supportedModes.join("/")}` : null, - provider.capabilities.maxVideos ? `maxVideos=${provider.capabilities.maxVideos}` : null, - provider.capabilities.maxInputImages - ? `maxInputImages=${provider.capabilities.maxInputImages}` - : null, - provider.capabilities.maxInputVideos - ? `maxInputVideos=${provider.capabilities.maxInputVideos}` - : null, - provider.capabilities.maxDurationSeconds - ? `maxDurationSeconds=${provider.capabilities.maxDurationSeconds}` - : null, - provider.capabilities.supportedDurationSeconds?.length - ? `supportedDurationSeconds=${provider.capabilities.supportedDurationSeconds.join("/")}` - : null, - provider.capabilities.supportedDurationSecondsByModel && - Object.keys(provider.capabilities.supportedDurationSecondsByModel).length > 0 - ? `supportedDurationSecondsByModel=${Object.entries( - provider.capabilities.supportedDurationSecondsByModel, - ) - .map(([modelId, durations]) => `${modelId}:${durations.join("/")}`) - .join("; ")}` - : null, - provider.capabilities.supportsResolution ? "resolution" : null, - provider.capabilities.supportsAspectRatio ? "aspectRatio" : null, - provider.capabilities.supportsSize ? "size" : null, - provider.capabilities.supportsAudio ? "audio" : null, - provider.capabilities.supportsWatermark ? "watermark" : null, - ] - .filter((entry): entry is string => Boolean(entry)) - .join(", "); + const capabilities = summarizeVideoGenerationCapabilities(provider); return [ `${provider.id}: default=${provider.defaultModel ?? "none"}`, provider.models?.length ? `models=${provider.models.join(", ")}` : null, diff --git a/src/agents/tools/video-generate-tool.test.ts b/src/agents/tools/video-generate-tool.test.ts index ac139711969..631121d48b3 100644 --- a/src/agents/tools/video-generate-tool.test.ts +++ b/src/agents/tools/video-generate-tool.test.ts @@ -305,9 +305,16 @@ describe("createVideoGenerateTool", () => { defaultModel: "veo-3.1-fast-generate-preview", models: ["veo-3.1-fast-generate-preview"], capabilities: { - maxInputImages: 1, - maxDurationSeconds: 8, - supportedDurationSeconds: [4, 6, 8], + generate: { + maxDurationSeconds: 8, + supportedDurationSeconds: [4, 6, 8], + }, + imageToVideo: { + enabled: true, + maxInputImages: 1, + maxDurationSeconds: 8, + supportedDurationSeconds: [4, 6, 8], + }, }, generateVideo: vi.fn(async () => { throw new Error("not used"); @@ -389,7 +396,9 @@ describe("createVideoGenerateTool", () => { defaultModel: "sora-2", models: ["sora-2"], capabilities: { - supportsSize: true, + generate: { + supportsSize: true, + }, }, generateVideo: vi.fn(async () => { throw new Error("not used"); diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index 1a170abd146..0b30f2d7311 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -281,6 +281,12 @@ function validateVideoGenerationCapabilities(params: { inputImageCount: params.inputImageCount, inputVideoCount: params.inputVideoCount, }); + if (!caps && mode === "imageToVideo" && params.inputVideoCount === 0) { + throw new ToolInputError(`${provider.id} does not support image-to-video reference inputs.`); + } + if (!caps && mode === "videoToVideo" && params.inputImageCount === 0) { + throw new ToolInputError(`${provider.id} does not support video-to-video reference inputs.`); + } if (!caps) { return; } diff --git a/src/music-generation/capabilities.test.ts b/src/music-generation/capabilities.test.ts new file mode 100644 index 00000000000..ce84eb175c5 --- /dev/null +++ b/src/music-generation/capabilities.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it } from "vitest"; +import { + listSupportedMusicGenerationModes, + resolveMusicGenerationMode, + resolveMusicGenerationModeCapabilities, +} from "./capabilities.js"; +import type { MusicGenerationProvider } from "./types.js"; + +function createProvider( + capabilities: MusicGenerationProvider["capabilities"], +): MusicGenerationProvider { + return { + id: "music-plugin", + capabilities, + async generateMusic() { + throw new Error("not used"); + }, + }; +} + +describe("music-generation capabilities", () => { + it("requires explicit edit capabilities before advertising edit mode", () => { + const provider = createProvider({ + maxInputImages: 2, + }); + + expect(listSupportedMusicGenerationModes(provider)).toEqual(["generate"]); + }); + + it("prefers explicit edit capabilities for reference-image requests", () => { + const provider = createProvider({ + supportsDuration: true, + edit: { + enabled: true, + maxInputImages: 1, + supportsDuration: false, + supportsLyrics: true, + }, + }); + + expect( + resolveMusicGenerationModeCapabilities({ + provider, + inputImageCount: 1, + }), + ).toEqual({ + mode: "edit", + capabilities: { + enabled: true, + maxInputImages: 1, + supportsDuration: false, + supportsLyrics: true, + }, + }); + }); + + it("detects generate vs edit mode from reference images", () => { + expect(resolveMusicGenerationMode({ inputImageCount: 0 })).toBe("generate"); + expect(resolveMusicGenerationMode({ inputImageCount: 1 })).toBe("edit"); + }); + + it("does not infer edit capabilities from aggregate fields", () => { + const provider = createProvider({ + maxInputImages: 1, + }); + + expect( + resolveMusicGenerationModeCapabilities({ + provider, + inputImageCount: 1, + }), + ).toEqual({ + mode: "edit", + capabilities: undefined, + }); + }); +}); diff --git a/src/music-generation/capabilities.ts b/src/music-generation/capabilities.ts new file mode 100644 index 00000000000..09b71f98c14 --- /dev/null +++ b/src/music-generation/capabilities.ts @@ -0,0 +1,47 @@ +import type { + MusicGenerationEditCapabilities, + MusicGenerationMode, + MusicGenerationModeCapabilities, + MusicGenerationProvider, +} from "./types.js"; + +export function resolveMusicGenerationMode(params: { + inputImageCount?: number; +}): MusicGenerationMode { + return (params.inputImageCount ?? 0) > 0 ? "edit" : "generate"; +} + +export function listSupportedMusicGenerationModes( + provider: Pick, +): MusicGenerationMode[] { + const modes: MusicGenerationMode[] = ["generate"]; + const edit = provider.capabilities.edit; + if (edit?.enabled) { + modes.push("edit"); + } + return modes; +} + +export function resolveMusicGenerationModeCapabilities(params: { + provider?: Pick; + inputImageCount?: number; +}): { + mode: MusicGenerationMode; + capabilities: MusicGenerationModeCapabilities | MusicGenerationEditCapabilities | undefined; +} { + const mode = resolveMusicGenerationMode(params); + const capabilities = params.provider?.capabilities; + if (!capabilities) { + return { mode, capabilities: undefined }; + } + if (mode === "generate") { + return { + mode, + capabilities: capabilities.generate, + }; + } + return { + mode, + capabilities: capabilities.edit, + }; +} diff --git a/src/music-generation/live-test-helpers.ts b/src/music-generation/live-test-helpers.ts index 0ac204f3211..ecca280c4c7 100644 --- a/src/music-generation/live-test-helpers.ts +++ b/src/music-generation/live-test-helpers.ts @@ -1,4 +1,84 @@ +import type { AuthProfileStore } from "../agents/auth-profiles.js"; +import type { OpenClawConfig } from "../config/config.js"; + export const DEFAULT_LIVE_MUSIC_MODELS: Record = { google: "google/lyria-3-clip-preview", minimax: "minimax/music-2.5+", }; + +export function redactLiveApiKey(value: string | undefined): string { + const trimmed = value?.trim(); + if (!trimmed) { + return "none"; + } + if (trimmed.length <= 12) { + return trimmed; + } + return `${trimmed.slice(0, 8)}...${trimmed.slice(-4)}`; +} + +export function parseCsvFilter(raw?: string): Set | null { + const trimmed = raw?.trim(); + if (!trimmed || trimmed === "all") { + return null; + } + const values = trimmed + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + return values.length > 0 ? new Set(values) : null; +} + +export function parseProviderModelMap(raw?: string): Map { + const entries = new Map(); + for (const token of raw?.split(",") ?? []) { + const trimmed = token.trim(); + if (!trimmed) { + continue; + } + const slash = trimmed.indexOf("/"); + if (slash <= 0 || slash === trimmed.length - 1) { + continue; + } + entries.set(trimmed.slice(0, slash).trim().toLowerCase(), trimmed); + } + return entries; +} + +export function resolveConfiguredLiveMusicModels(cfg: OpenClawConfig): Map { + const resolved = new Map(); + const configured = cfg.agents?.defaults?.musicGenerationModel; + const add = (value: string | undefined) => { + const trimmed = value?.trim(); + if (!trimmed) { + return; + } + const slash = trimmed.indexOf("/"); + if (slash <= 0 || slash === trimmed.length - 1) { + return; + } + resolved.set(trimmed.slice(0, slash).trim().toLowerCase(), trimmed); + }; + if (typeof configured === "string") { + add(configured); + return resolved; + } + add(configured?.primary); + for (const fallback of configured?.fallbacks ?? []) { + add(fallback); + } + return resolved; +} + +export function resolveLiveMusicAuthStore(params: { + requireProfileKeys: boolean; + hasLiveKeys: boolean; +}): AuthProfileStore | undefined { + if (params.requireProfileKeys || !params.hasLiveKeys) { + return undefined; + } + return { + version: 1, + profiles: {}, + }; +} diff --git a/src/music-generation/provider-capabilities.contract.test.ts b/src/music-generation/provider-capabilities.contract.test.ts new file mode 100644 index 00000000000..174d9f755c5 --- /dev/null +++ b/src/music-generation/provider-capabilities.contract.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from "vitest"; +import { musicGenerationProviderContractRegistry } from "../plugins/contracts/registry.js"; +import { listSupportedMusicGenerationModes } from "./capabilities.js"; + +describe("bundled music-generation provider capabilities", () => { + it("declares explicit generate/edit support for every bundled provider", () => { + expect(musicGenerationProviderContractRegistry.length).toBeGreaterThan(0); + + for (const entry of musicGenerationProviderContractRegistry) { + const { provider } = entry; + expect( + provider.capabilities.generate, + `${provider.id} missing generate capabilities`, + ).toBeDefined(); + expect(provider.capabilities.edit, `${provider.id} missing edit capabilities`).toBeDefined(); + + const edit = provider.capabilities.edit; + if (!edit) { + continue; + } + + if (edit.enabled) { + expect( + edit.maxInputImages ?? 0, + `${provider.id} edit.enabled requires maxInputImages`, + ).toBeGreaterThan(0); + expect(listSupportedMusicGenerationModes(provider)).toContain("edit"); + } else { + expect(listSupportedMusicGenerationModes(provider)).toEqual(["generate"]); + } + } + }); +}); diff --git a/src/music-generation/runtime.test.ts b/src/music-generation/runtime.test.ts index a74f8add681..5fc146accdb 100644 --- a/src/music-generation/runtime.test.ts +++ b/src/music-generation/runtime.test.ts @@ -136,7 +136,9 @@ describe("music-generation runtime", () => { defaultModel: "track-v1", models: ["track-v1"], capabilities: { - supportsDuration: true, + generate: { + supportsDuration: true, + }, }, generateMusic: async () => ({ tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }], @@ -164,11 +166,13 @@ describe("music-generation runtime", () => { mocks.getMusicGenerationProvider.mockReturnValue({ id: "google", capabilities: { - supportsLyrics: true, - supportsInstrumental: true, - supportsFormat: true, - supportedFormatsByModel: { - "lyria-3-clip-preview": ["mp3"], + generate: { + supportsLyrics: true, + supportsInstrumental: true, + supportsFormat: true, + supportedFormatsByModel: { + "lyria-3-clip-preview": ["mp3"], + }, }, }, generateMusic: async (req) => { @@ -211,4 +215,74 @@ describe("music-generation runtime", () => { { key: "format", value: "wav" }, ]); }); + + it("uses mode-specific capabilities for edit requests", async () => { + let seenRequest: + | { + lyrics?: string; + instrumental?: boolean; + durationSeconds?: number; + format?: string; + } + | undefined; + mocks.resolveAgentModelPrimaryValue.mockReturnValue("google/lyria-3-pro-preview"); + mocks.getMusicGenerationProvider.mockReturnValue({ + id: "google", + capabilities: { + generate: { + supportsLyrics: false, + supportsInstrumental: false, + supportsFormat: true, + supportedFormats: ["mp3"], + }, + edit: { + enabled: true, + maxInputImages: 1, + supportsLyrics: true, + supportsInstrumental: true, + supportsDuration: false, + supportsFormat: false, + }, + }, + generateMusic: async (req) => { + seenRequest = { + lyrics: req.lyrics, + instrumental: req.instrumental, + durationSeconds: req.durationSeconds, + format: req.format, + }; + return { + tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }], + model: "lyria-3-pro-preview", + }; + }, + }); + + const result = await generateMusic({ + cfg: { + agents: { + defaults: { + musicGenerationModel: { primary: "google/lyria-3-pro-preview" }, + }, + }, + } as OpenClawConfig, + prompt: "turn this cover image into a trailer cue", + lyrics: "rise up", + instrumental: true, + durationSeconds: 30, + format: "mp3", + inputImages: [{ buffer: Buffer.from("png"), mimeType: "image/png" }], + }); + + expect(seenRequest).toEqual({ + lyrics: "rise up", + instrumental: true, + durationSeconds: undefined, + format: undefined, + }); + expect(result.ignoredOverrides).toEqual([ + { key: "durationSeconds", value: 30 }, + { key: "format", value: "mp3" }, + ]); + }); }); diff --git a/src/music-generation/runtime.ts b/src/music-generation/runtime.ts index 5f1c96a1026..1b78d65f728 100644 --- a/src/music-generation/runtime.ts +++ b/src/music-generation/runtime.ts @@ -8,6 +8,7 @@ import { resolveCapabilityModelCandidates, throwCapabilityGenerationFailure, } from "../media-generation/runtime-shared.js"; +import { resolveMusicGenerationModeCapabilities } from "./capabilities.js"; import { parseMusicGenerationModelRef } from "./model-ref.js"; import { getMusicGenerationProvider, listMusicGenerationProviders } from "./provider-registry.js"; import type { @@ -54,14 +55,28 @@ function resolveProviderMusicGenerationOverrides(params: { instrumental?: boolean; durationSeconds?: number; format?: MusicGenerationOutputFormat; + inputImages?: MusicGenerationSourceImage[]; }) { - const caps = params.provider.capabilities; + const { capabilities: caps } = resolveMusicGenerationModeCapabilities({ + provider: params.provider, + inputImageCount: params.inputImages?.length ?? 0, + }); const ignoredOverrides: MusicGenerationIgnoredOverride[] = []; let lyrics = params.lyrics; let instrumental = params.instrumental; let durationSeconds = params.durationSeconds; let format = params.format; + if (!caps) { + return { + lyrics, + instrumental, + durationSeconds, + format, + ignoredOverrides, + }; + } + if (lyrics?.trim() && !caps.supportsLyrics) { ignoredOverrides.push({ key: "lyrics", value: lyrics }); lyrics = undefined; @@ -142,6 +157,7 @@ export async function generateMusic( instrumental: params.instrumental, durationSeconds: params.durationSeconds, format: params.format, + inputImages: params.inputImages, }); const result: MusicGenerationResult = await provider.generateMusic({ provider: candidate.provider, diff --git a/src/music-generation/types.ts b/src/music-generation/types.ts index 0a5dfd5e734..cec2ea74046 100644 --- a/src/music-generation/types.ts +++ b/src/music-generation/types.ts @@ -50,9 +50,10 @@ export type MusicGenerationIgnoredOverride = { value: string | boolean | number; }; -export type MusicGenerationProviderCapabilities = { +export type MusicGenerationMode = "generate" | "edit"; + +export type MusicGenerationModeCapabilities = { maxTracks?: number; - maxInputImages?: number; maxDurationSeconds?: number; supportsLyrics?: boolean; supportsInstrumental?: boolean; @@ -62,6 +63,17 @@ export type MusicGenerationProviderCapabilities = { supportedFormatsByModel?: Readonly>; }; +export type MusicGenerationEditCapabilities = MusicGenerationModeCapabilities & { + enabled: boolean; + maxInputImages?: number; +}; + +export type MusicGenerationProviderCapabilities = MusicGenerationModeCapabilities & { + maxInputImages?: number; + generate?: MusicGenerationModeCapabilities; + edit?: MusicGenerationEditCapabilities; +}; + export type MusicGenerationProvider = { id: string; aliases?: string[]; diff --git a/src/plugin-sdk/music-generation.ts b/src/plugin-sdk/music-generation.ts index a2e9c33e944..adb90e1ab2c 100644 --- a/src/plugin-sdk/music-generation.ts +++ b/src/plugin-sdk/music-generation.ts @@ -2,6 +2,9 @@ export type { GeneratedMusicAsset, + MusicGenerationEditCapabilities, + MusicGenerationMode, + MusicGenerationModeCapabilities, MusicGenerationProvider, MusicGenerationProviderCapabilities, MusicGenerationRequest, diff --git a/src/plugins/contracts/speech-vitest-registry.ts b/src/plugins/contracts/speech-vitest-registry.ts index 6bbac8712ad..1c834b74977 100644 --- a/src/plugins/contracts/speech-vitest-registry.ts +++ b/src/plugins/contracts/speech-vitest-registry.ts @@ -1,8 +1,4 @@ -import { createJiti } from "jiti"; import { loadBundledCapabilityRuntimeRegistry } from "../bundled-capability-runtime.js"; -import { resolveBundledPluginRepoEntryPath } from "../bundled-plugin-metadata.js"; -import { createCapturedPluginRegistration } from "../captured-registration.js"; -import type { OpenClawPluginDefinition } from "../types.js"; import type { ImageGenerationProviderPlugin, MediaUnderstandingProviderPlugin, @@ -85,58 +81,62 @@ const VITEST_CONTRACT_PLUGIN_IDS = { function loadVitestVideoGenerationFallbackEntries( pluginIds: readonly string[], ): VideoGenerationProviderContractEntry[] { - const jiti = createJiti(import.meta.url, { - interopDefault: true, - moduleCache: false, - fsCache: false, + return loadVitestCapabilityContractEntries({ + contract: "videoGenerationProviders", + pluginSdkResolution: "src", + pluginIds, + pickEntries: (registry) => + registry.videoGenerationProviders.map((entry) => ({ + pluginId: entry.pluginId, + provider: entry.provider, + })), }); - const repoRoot = process.cwd(); - return pluginIds.flatMap((pluginId) => { - const modulePath = resolveBundledPluginRepoEntryPath({ - rootDir: repoRoot, - pluginId, - preferBuilt: true, - }); - if (!modulePath) { - return []; - } - try { - const mod = jiti(modulePath) as - | OpenClawPluginDefinition - | { default?: OpenClawPluginDefinition }; - const plugin = - (mod as { default?: OpenClawPluginDefinition }).default ?? - (mod as OpenClawPluginDefinition); - if (typeof plugin?.register !== "function") { - return []; - } - const captured = createCapturedPluginRegistration(); - void plugin.register(captured.api); - return captured.videoGenerationProviders.map((provider) => ({ - pluginId, - provider, - })); - } catch { - return []; - } +} + +function loadVitestMusicGenerationFallbackEntries( + pluginIds: readonly string[], +): MusicGenerationProviderContractEntry[] { + return loadVitestCapabilityContractEntries({ + contract: "musicGenerationProviders", + pluginSdkResolution: "src", + pluginIds, + pickEntries: (registry) => + registry.musicGenerationProviders.map((entry) => ({ + pluginId: entry.pluginId, + provider: entry.provider, + })), }); } +function hasExplicitVideoGenerationModes(provider: VideoGenerationProviderPlugin): boolean { + return Boolean( + provider.capabilities.generate && + provider.capabilities.imageToVideo && + provider.capabilities.videoToVideo, + ); +} + +function hasExplicitMusicGenerationModes(provider: MusicGenerationProviderPlugin): boolean { + return Boolean(provider.capabilities.generate && provider.capabilities.edit); +} + function loadVitestCapabilityContractEntries(params: { contract: ManifestContractKey; + pluginIds?: readonly string[]; + pluginSdkResolution?: "dist" | "src"; pickEntries: (registry: ReturnType) => Array<{ pluginId: string; provider: T; }>; }): Array<{ pluginId: string; provider: T }> { - const pluginIds = VITEST_CONTRACT_PLUGIN_IDS[params.contract]; + const pluginIds = [...(params.pluginIds ?? VITEST_CONTRACT_PLUGIN_IDS[params.contract])]; if (pluginIds.length === 0) { return []; } const bulkEntries = params.pickEntries( loadBundledCapabilityRuntimeRegistry({ pluginIds, - pluginSdkResolution: "dist", + pluginSdkResolution: params.pluginSdkResolution ?? "dist", }), ); const coveredPluginIds = new Set(bulkEntries.map((entry) => entry.pluginId)); @@ -148,7 +148,7 @@ function loadVitestCapabilityContractEntries(params: { .pickEntries( loadBundledCapabilityRuntimeRegistry({ pluginIds: [pluginId], - pluginSdkResolution: "dist", + pluginSdkResolution: params.pluginSdkResolution ?? "dist", }), ) .filter((entry) => entry.pluginId === pluginId), @@ -220,17 +220,27 @@ export function loadVitestVideoGenerationProviderContractRegistry(): VideoGenera })), }); const coveredPluginIds = new Set(entries.map((entry) => entry.pluginId)); + const stalePluginIds = new Set( + entries + .filter((entry) => !hasExplicitVideoGenerationModes(entry.provider)) + .map((entry) => entry.pluginId), + ); const missingPluginIds = VITEST_CONTRACT_PLUGIN_IDS.videoGenerationProviders.filter( - (pluginId) => !coveredPluginIds.has(pluginId), + (pluginId) => !coveredPluginIds.has(pluginId) || stalePluginIds.has(pluginId), ); if (missingPluginIds.length === 0) { return entries; } - return [...entries, ...loadVitestVideoGenerationFallbackEntries(missingPluginIds)]; + const replacementEntries = loadVitestVideoGenerationFallbackEntries(missingPluginIds); + const replacedPluginIds = new Set(replacementEntries.map((entry) => entry.pluginId)); + return [ + ...entries.filter((entry) => !replacedPluginIds.has(entry.pluginId)), + ...replacementEntries, + ]; } export function loadVitestMusicGenerationProviderContractRegistry(): MusicGenerationProviderContractEntry[] { - return loadVitestCapabilityContractEntries({ + const entries = loadVitestCapabilityContractEntries({ contract: "musicGenerationProviders", pickEntries: (registry) => registry.musicGenerationProviders.map((entry) => ({ @@ -238,4 +248,22 @@ export function loadVitestMusicGenerationProviderContractRegistry(): MusicGenera provider: entry.provider, })), }); + const coveredPluginIds = new Set(entries.map((entry) => entry.pluginId)); + const stalePluginIds = new Set( + entries + .filter((entry) => !hasExplicitMusicGenerationModes(entry.provider)) + .map((entry) => entry.pluginId), + ); + const missingPluginIds = VITEST_CONTRACT_PLUGIN_IDS.musicGenerationProviders.filter( + (pluginId) => !coveredPluginIds.has(pluginId) || stalePluginIds.has(pluginId), + ); + if (missingPluginIds.length === 0) { + return entries; + } + const replacementEntries = loadVitestMusicGenerationFallbackEntries(missingPluginIds); + const replacedPluginIds = new Set(replacementEntries.map((entry) => entry.pluginId)); + return [ + ...entries.filter((entry) => !replacedPluginIds.has(entry.pluginId)), + ...replacementEntries, + ]; } diff --git a/src/video-generation/capabilities.test.ts b/src/video-generation/capabilities.test.ts index 553a4231d16..dab2e2f16fb 100644 --- a/src/video-generation/capabilities.test.ts +++ b/src/video-generation/capabilities.test.ts @@ -19,17 +19,13 @@ function createProvider( } describe("video-generation capabilities", () => { - it("derives legacy modes from aggregate input limits", () => { + it("requires explicit transform capabilities before advertising transform modes", () => { const provider = createProvider({ maxInputImages: 1, maxInputVideos: 2, }); - expect(listSupportedVideoGenerationModes(provider)).toEqual([ - "generate", - "imageToVideo", - "videoToVideo", - ]); + expect(listSupportedVideoGenerationModes(provider)).toEqual(["generate"]); }); it("prefers explicit mode capabilities for image-to-video requests", () => { @@ -60,7 +56,7 @@ describe("video-generation capabilities", () => { }); }); - it("falls back to aggregate capabilities for mixed reference requests", () => { + it("does not infer transform capabilities for mixed reference requests", () => { const provider = createProvider({ maxInputImages: 1, maxInputVideos: 4, @@ -76,19 +72,7 @@ describe("video-generation capabilities", () => { }), ).toEqual({ mode: null, - capabilities: { - maxVideos: undefined, - maxInputImages: 1, - maxInputVideos: 4, - maxDurationSeconds: undefined, - supportedDurationSeconds: undefined, - supportedDurationSecondsByModel: undefined, - supportsSize: undefined, - supportsAspectRatio: undefined, - supportsResolution: undefined, - supportsAudio: true, - supportsWatermark: undefined, - }, + capabilities: undefined, }); }); }); diff --git a/src/video-generation/capabilities.ts b/src/video-generation/capabilities.ts index cce3c0963b2..d2d789700c7 100644 --- a/src/video-generation/capabilities.ts +++ b/src/video-generation/capabilities.ts @@ -2,46 +2,9 @@ import type { VideoGenerationMode, VideoGenerationModeCapabilities, VideoGenerationProvider, - VideoGenerationProviderCapabilities, VideoGenerationTransformCapabilities, } from "./types.js"; -function pickModeCapabilities( - capabilities: VideoGenerationProviderCapabilities, -): VideoGenerationModeCapabilities { - return { - maxVideos: capabilities.maxVideos, - maxInputImages: capabilities.maxInputImages, - maxInputVideos: capabilities.maxInputVideos, - maxDurationSeconds: capabilities.maxDurationSeconds, - supportedDurationSeconds: capabilities.supportedDurationSeconds, - supportedDurationSecondsByModel: capabilities.supportedDurationSecondsByModel, - supportsSize: capabilities.supportsSize, - supportsAspectRatio: capabilities.supportsAspectRatio, - supportsResolution: capabilities.supportsResolution, - supportsAudio: capabilities.supportsAudio, - supportsWatermark: capabilities.supportsWatermark, - }; -} - -function deriveLegacyImageToVideoCapabilities( - capabilities: VideoGenerationProviderCapabilities, -): VideoGenerationTransformCapabilities { - return { - ...pickModeCapabilities(capabilities), - enabled: (capabilities.maxInputImages ?? 0) > 0, - }; -} - -function deriveLegacyVideoToVideoCapabilities( - capabilities: VideoGenerationProviderCapabilities, -): VideoGenerationTransformCapabilities { - return { - ...pickModeCapabilities(capabilities), - enabled: (capabilities.maxInputVideos ?? 0) > 0, - }; -} - export function resolveVideoGenerationMode(params: { inputImageCount?: number; inputVideoCount?: number; @@ -64,16 +27,12 @@ export function listSupportedVideoGenerationModes( provider: Pick, ): VideoGenerationMode[] { const modes: VideoGenerationMode[] = ["generate"]; - const imageToVideo = - provider.capabilities.imageToVideo ?? - deriveLegacyImageToVideoCapabilities(provider.capabilities); - if (imageToVideo.enabled) { + const imageToVideo = provider.capabilities.imageToVideo; + if (imageToVideo?.enabled) { modes.push("imageToVideo"); } - const videoToVideo = - provider.capabilities.videoToVideo ?? - deriveLegacyVideoToVideoCapabilities(provider.capabilities); - if (videoToVideo.enabled) { + const videoToVideo = provider.capabilities.videoToVideo; + if (videoToVideo?.enabled) { modes.push("videoToVideo"); } return modes; @@ -95,23 +54,23 @@ export function resolveVideoGenerationModeCapabilities(params: { if (mode === "generate") { return { mode, - capabilities: capabilities.generate ?? pickModeCapabilities(capabilities), + capabilities: capabilities.generate, }; } if (mode === "imageToVideo") { return { mode, - capabilities: capabilities.imageToVideo ?? deriveLegacyImageToVideoCapabilities(capabilities), + capabilities: capabilities.imageToVideo, }; } if (mode === "videoToVideo") { return { mode, - capabilities: capabilities.videoToVideo ?? deriveLegacyVideoToVideoCapabilities(capabilities), + capabilities: capabilities.videoToVideo, }; } return { mode, - capabilities: pickModeCapabilities(capabilities), + capabilities: undefined, }; } diff --git a/src/video-generation/live-test-helpers.test.ts b/src/video-generation/live-test-helpers.test.ts new file mode 100644 index 00000000000..392c1eba914 --- /dev/null +++ b/src/video-generation/live-test-helpers.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/config.js"; +import { + canRunBufferBackedVideoToVideoLiveLane, + parseCsvFilter, + parseProviderModelMap, + redactLiveApiKey, + resolveConfiguredLiveVideoModels, + resolveLiveVideoAuthStore, +} from "./live-test-helpers.js"; + +describe("video-generation live-test helpers", () => { + it("parses provider filters and treats empty/all as unfiltered", () => { + expect(parseCsvFilter()).toBeNull(); + expect(parseCsvFilter("all")).toBeNull(); + expect(parseCsvFilter(" google , openai ")).toEqual(new Set(["google", "openai"])); + }); + + it("parses provider model overrides by provider id", () => { + expect( + parseProviderModelMap("google/veo-3.1-fast-generate-preview, openai/sora-2, invalid"), + ).toEqual( + new Map([ + ["google", "google/veo-3.1-fast-generate-preview"], + ["openai", "openai/sora-2"], + ]), + ); + }); + + it("collects configured models from primary and fallbacks", () => { + const cfg = { + agents: { + defaults: { + videoGenerationModel: { + primary: "google/veo-3.1-fast-generate-preview", + fallbacks: ["openai/sora-2", "invalid"], + }, + }, + }, + } as OpenClawConfig; + + expect(resolveConfiguredLiveVideoModels(cfg)).toEqual( + new Map([ + ["google", "google/veo-3.1-fast-generate-preview"], + ["openai", "openai/sora-2"], + ]), + ); + }); + + it("uses an empty auth store when live env keys should override stale profiles", () => { + expect( + resolveLiveVideoAuthStore({ + requireProfileKeys: false, + hasLiveKeys: true, + }), + ).toEqual({ + version: 1, + profiles: {}, + }); + }); + + it("keeps profile-store mode when requested or when no live keys exist", () => { + expect( + resolveLiveVideoAuthStore({ + requireProfileKeys: true, + hasLiveKeys: true, + }), + ).toBeUndefined(); + expect( + resolveLiveVideoAuthStore({ + requireProfileKeys: false, + hasLiveKeys: false, + }), + ).toBeUndefined(); + }); + + it("redacts live API keys for diagnostics", () => { + expect(redactLiveApiKey(undefined)).toBe("none"); + expect(redactLiveApiKey("short-key")).toBe("short-key"); + expect(redactLiveApiKey("sk-proj-1234567890")).toBe("sk-proj-...7890"); + }); + + it("runs buffer-backed video-to-video only for supported providers/models", () => { + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "google", + modelRef: "google/veo-3.1-fast-generate-preview", + }), + ).toBe(true); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "openai", + modelRef: "openai/sora-2", + }), + ).toBe(true); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "runway", + modelRef: "runway/gen4_aleph", + }), + ).toBe(true); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "runway", + modelRef: "runway/gen4.5", + }), + ).toBe(false); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "alibaba", + modelRef: "alibaba/wan2.6-r2v", + }), + ).toBe(false); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "qwen", + modelRef: "qwen/wan2.6-r2v", + }), + ).toBe(false); + expect( + canRunBufferBackedVideoToVideoLiveLane({ + providerId: "xai", + modelRef: "xai/grok-imagine-video", + }), + ).toBe(false); + }); +}); diff --git a/src/video-generation/live-test-helpers.ts b/src/video-generation/live-test-helpers.ts index 21007975929..e9b77c50dfb 100644 --- a/src/video-generation/live-test-helpers.ts +++ b/src/video-generation/live-test-helpers.ts @@ -14,6 +14,8 @@ export const DEFAULT_LIVE_VIDEO_MODELS: Record = { xai: "xai/grok-imagine-video", }; +const REMOTE_URL_VIDEO_TO_VIDEO_PROVIDERS = new Set(["alibaba", "qwen", "xai"]); + export function redactLiveApiKey(value: string | undefined): string { const trimmed = value?.trim(); if (!trimmed) { @@ -78,6 +80,25 @@ export function resolveConfiguredLiveVideoModels(cfg: OpenClawConfig): Map { + it("declares explicit mode support for every bundled provider", () => { + expect(videoGenerationProviderContractRegistry.length).toBeGreaterThan(0); + + for (const entry of videoGenerationProviderContractRegistry) { + const { provider } = entry; + expect( + provider.capabilities.generate, + `${provider.id} missing generate capabilities`, + ).toBeDefined(); + expect( + provider.capabilities.imageToVideo, + `${provider.id} missing imageToVideo capabilities`, + ).toBeDefined(); + expect( + provider.capabilities.videoToVideo, + `${provider.id} missing videoToVideo capabilities`, + ).toBeDefined(); + + const supportedModes = listSupportedVideoGenerationModes(provider); + const imageToVideo = provider.capabilities.imageToVideo; + const videoToVideo = provider.capabilities.videoToVideo; + + if (imageToVideo?.enabled) { + expect( + imageToVideo.maxInputImages ?? 0, + `${provider.id} imageToVideo.enabled requires maxInputImages`, + ).toBeGreaterThan(0); + expect(supportedModes).toContain("imageToVideo"); + } + if (videoToVideo?.enabled) { + expect( + videoToVideo.maxInputVideos ?? 0, + `${provider.id} videoToVideo.enabled requires maxInputVideos`, + ).toBeGreaterThan(0); + expect(supportedModes).toContain("videoToVideo"); + } + } + }); +}); diff --git a/src/video-generation/runtime.test.ts b/src/video-generation/runtime.test.ts index fd363f76f1d..49aa2f81898 100644 --- a/src/video-generation/runtime.test.ts +++ b/src/video-generation/runtime.test.ts @@ -136,7 +136,9 @@ describe("video-generation runtime", () => { defaultModel: "vid-v1", models: ["vid-v1"], capabilities: { - supportsAudio: true, + generate: { + supportsAudio: true, + }, }, generateVideo: async () => ({ videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }], @@ -157,7 +159,9 @@ describe("video-generation runtime", () => { mocks.getVideoGenerationProvider.mockReturnValue({ id: "video-plugin", capabilities: { - supportedDurationSeconds: [4, 6, 8], + generate: { + supportedDurationSeconds: [4, 6, 8], + }, }, generateVideo: async (req) => { seenDurationSeconds = req.durationSeconds; @@ -203,7 +207,9 @@ describe("video-generation runtime", () => { mocks.getVideoGenerationProvider.mockReturnValue({ id: "openai", capabilities: { - supportsSize: true, + generate: { + supportsSize: true, + }, }, generateVideo: async (req) => { seenRequest = {