From f45390416565253dfd686c8ea68bddd0f85ecbb9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 17 May 2026 02:05:22 +0100 Subject: [PATCH] feat: add fal and OpenRouter music generation (#82789) * feat: add fal and OpenRouter music generation * fix: repair music generation CI gates * chore: refresh proof gate --- CHANGELOG.md | 1 + .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- docs/plugins/manifest.md | 36 +- docs/plugins/reference/fal.md | 2 +- docs/plugins/reference/openrouter.md | 2 +- docs/providers/fal.md | 41 ++- docs/providers/openrouter.md | 29 ++ docs/tools/media-overview.md | 4 +- docs/tools/music-generation.md | 49 ++- extensions/fal/index.ts | 4 +- .../fal/music-generation-provider.test.ts | 200 ++++++++++ extensions/fal/music-generation-provider.ts | 219 +++++++++++ extensions/fal/openclaw.plugin.json | 5 +- .../fal/plugin-registration.contract.test.ts | 1 + extensions/fal/provider-contract-api.ts | 8 +- extensions/fal/provider-registration.ts | 8 +- extensions/fal/test-api.ts | 1 + .../music-generation-providers.live.test.ts | 16 +- extensions/openrouter/api.ts | 1 + extensions/openrouter/index.test.ts | 19 +- extensions/openrouter/index.ts | 3 + .../music-generation-provider.test.ts | 226 ++++++++++++ .../openrouter/music-generation-provider.ts | 344 ++++++++++++++++++ extensions/openrouter/openclaw.plugin.json | 2 + .../openrouter/provider-contract-api.ts | 1 + extensions/openrouter/test-api.ts | 1 + .../tools/music-generate-tool.actions.ts | 11 + src/cli/command-catalog.ts | 5 + src/commands/auth-choice-options.test.ts | 27 +- src/flows/provider-flow.ts | 2 +- src/gateway/origin-check.test.ts | 18 + .../provider-capabilities.contract.test.ts | 8 +- src/media-generation/runtime-shared.test.ts | 26 ++ src/media-generation/runtime-shared.ts | 24 ++ src/model-catalog/provider-index/normalize.ts | 4 +- src/model-catalog/provider-index/types.ts | 2 +- src/music-generation/live-test-helpers.ts | 2 + src/music-generation/normalization.ts | 22 +- src/music-generation/provider-assets.ts | 110 ++++++ src/music-generation/runtime.test.ts | 58 +++ src/music-generation/types.ts | 2 + src/pairing/setup-code.test.ts | 22 ++ src/plugin-sdk/index.ts | 2 +- src/plugin-sdk/music-generation.ts | 6 + .../plugin-registration-contract-cases.ts | 2 + src/plugins/inspect-shape.ts | 4 + src/plugins/manifest.ts | 7 +- .../official-external-plugin-catalog.ts | 2 +- src/plugins/provider-auth-choices.ts | 2 +- src/plugins/provider-install-catalog.ts | 8 +- src/plugins/provider-validation.ts | 10 +- src/plugins/provider-wizard.ts | 2 +- src/plugins/types.ts | 2 +- test/test-env.ts | 5 + 54 files changed, 1535 insertions(+), 87 deletions(-) create mode 100644 extensions/fal/music-generation-provider.test.ts create mode 100644 extensions/fal/music-generation-provider.ts create mode 100644 extensions/openrouter/music-generation-provider.test.ts create mode 100644 extensions/openrouter/music-generation-provider.ts create mode 100644 src/music-generation/provider-assets.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 8593fb46b0a..83eac67ac92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai - Security/audit: add `security.audit.suppressions` for intentionally accepted audit findings, keeping suppressed matches out of the active summary while preserving them in JSON output with an active suppression notice. (#76949) Thanks @100menotu001. - Agents/subagents: label delegated task and subagent completion handoffs as ready for parent review, and tell requester agents to review/verify results before calling them done. (#78985) Thanks @100menotu001. +- Providers/media: add fal and OpenRouter music-generation providers for the shared `music_generate` tool, including fal MiniMax/ACE/Stable Audio endpoints and OpenRouter Lyria audio output. - Control UI: show provider quota usage in the Overview card and Chat header, and recover stale Chat in-progress state after missed terminal events. (#82647) - Mac app remote setup can now be preconfigured from `openclaw-mac configure-remote`, skips onboarding when config is already complete, supports direct LAN/Tailnet gateway URLs, allows private same-origin Control UI loads, and owns the SSH tunnel process when SSH is selected. - Providers/xAI: add xAI Grok OAuth login for SuperGrok subscribers, letting `xai/*` models and xAI media/tool providers authenticate without `XAI_API_KEY`. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index fd14b9ca91e..950fb4b0a2e 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -1b2d60a1ce15bdac9db5259df0480a6073646faf1de81d88bf53dc6e43ae2949 plugin-sdk-api-baseline.json -d76b67aa2618604da379147f44ac0746850bc5f5174404c979dc82ec6c45e05d plugin-sdk-api-baseline.jsonl +2c665b045d30f690c5fd6adb89481a003d5cc55ab4eed1a0456ef47136f6b684 plugin-sdk-api-baseline.json +f4b6c016576cd19409356ef23d18da0e54cb6c5904f864049461ace921e1f72c plugin-sdk-api-baseline.jsonl diff --git a/docs/plugins/manifest.md b/docs/plugins/manifest.md index 9feba6f0ea8..e9b24edf124 100644 --- a/docs/plugins/manifest.md +++ b/docs/plugins/manifest.md @@ -328,24 +328,24 @@ OpenClaw reads this before provider runtime loads. Provider setup lists use these manifest choices, descriptor-derived setup choices, and install-catalog metadata without loading provider runtime. -| Field | Required | Type | What it means | -| --------------------- | -------- | ----------------------------------------------- | -------------------------------------------------------------------------------------------------------- | -| `provider` | Yes | `string` | Provider id this choice belongs to. | -| `method` | Yes | `string` | Auth method id to dispatch to. | -| `choiceId` | Yes | `string` | Stable auth-choice id used by onboarding and CLI flows. | -| `choiceLabel` | No | `string` | User-facing label. If omitted, OpenClaw falls back to `choiceId`. | -| `choiceHint` | No | `string` | Short helper text for the picker. | -| `assistantPriority` | No | `number` | Lower values sort earlier in assistant-driven interactive pickers. | -| `assistantVisibility` | No | `"visible"` \| `"manual-only"` | Hide the choice from assistant pickers while still allowing manual CLI selection. | -| `deprecatedChoiceIds` | No | `string[]` | Legacy choice ids that should redirect users to this replacement choice. | -| `groupId` | No | `string` | Optional group id for grouping related choices. | -| `groupLabel` | No | `string` | User-facing label for that group. | -| `groupHint` | No | `string` | Short helper text for the group. | -| `optionKey` | No | `string` | Internal option key for simple one-flag auth flows. | -| `cliFlag` | No | `string` | CLI flag name, such as `--openrouter-api-key`. | -| `cliOption` | No | `string` | Full CLI option shape, such as `--openrouter-api-key `. | -| `cliDescription` | No | `string` | Description used in CLI help. | -| `onboardingScopes` | No | `Array<"text-inference" \| "image-generation">` | Which onboarding surfaces this choice should appear in. If omitted, it defaults to `["text-inference"]`. | +| Field | Required | Type | What it means | +| --------------------- | -------- | --------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------- | +| `provider` | Yes | `string` | Provider id this choice belongs to. | +| `method` | Yes | `string` | Auth method id to dispatch to. | +| `choiceId` | Yes | `string` | Stable auth-choice id used by onboarding and CLI flows. | +| `choiceLabel` | No | `string` | User-facing label. If omitted, OpenClaw falls back to `choiceId`. | +| `choiceHint` | No | `string` | Short helper text for the picker. | +| `assistantPriority` | No | `number` | Lower values sort earlier in assistant-driven interactive pickers. | +| `assistantVisibility` | No | `"visible"` \| `"manual-only"` | Hide the choice from assistant pickers while still allowing manual CLI selection. | +| `deprecatedChoiceIds` | No | `string[]` | Legacy choice ids that should redirect users to this replacement choice. | +| `groupId` | No | `string` | Optional group id for grouping related choices. | +| `groupLabel` | No | `string` | User-facing label for that group. | +| `groupHint` | No | `string` | Short helper text for the group. | +| `optionKey` | No | `string` | Internal option key for simple one-flag auth flows. | +| `cliFlag` | No | `string` | CLI flag name, such as `--openrouter-api-key`. | +| `cliOption` | No | `string` | Full CLI option shape, such as `--openrouter-api-key `. | +| `cliDescription` | No | `string` | Description used in CLI help. | +| `onboardingScopes` | No | `Array<"text-inference" \| "image-generation" \| "music-generation">` | Which onboarding surfaces this choice should appear in. If omitted, it defaults to `["text-inference"]`. | ## commandAliases reference diff --git a/docs/plugins/reference/fal.md b/docs/plugins/reference/fal.md index 6ee215ef102..35be46a6e4e 100644 --- a/docs/plugins/reference/fal.md +++ b/docs/plugins/reference/fal.md @@ -16,7 +16,7 @@ Adds fal model provider support to OpenClaw. ## Surface -providers: fal; contracts: imageGenerationProviders, videoGenerationProviders +providers: fal; contracts: imageGenerationProviders, musicGenerationProviders, videoGenerationProviders ## Related docs diff --git a/docs/plugins/reference/openrouter.md b/docs/plugins/reference/openrouter.md index c4ad0777a88..3dc2ae14cdd 100644 --- a/docs/plugins/reference/openrouter.md +++ b/docs/plugins/reference/openrouter.md @@ -16,7 +16,7 @@ Adds OpenRouter model provider support to OpenClaw. ## Surface -providers: openrouter; contracts: imageGenerationProviders, mediaUnderstandingProviders, speechProviders, videoGenerationProviders +providers: openrouter; contracts: imageGenerationProviders, mediaUnderstandingProviders, musicGenerationProviders, speechProviders, videoGenerationProviders ## Related docs diff --git a/docs/providers/fal.md b/docs/providers/fal.md index d86fd3161ed..16a93995d89 100644 --- a/docs/providers/fal.md +++ b/docs/providers/fal.md @@ -1,13 +1,14 @@ --- -summary: "fal image and video generation setup in OpenClaw" +summary: "fal image, video, and music generation setup in OpenClaw" title: "Fal" read_when: - You want to use fal image generation in OpenClaw - You need the FAL_KEY auth flow - - You want fal defaults for image_generate or video_generate + - You want fal defaults for image_generate, video_generate, or music_generate --- -OpenClaw ships a bundled `fal` provider for hosted image and video generation. +OpenClaw ships a bundled `fal` provider for hosted image, video, and music +generation. | Property | Value | | -------- | ------------------------------------------------------------- | @@ -151,6 +152,35 @@ The bundled `fal` video-generation provider defaults to +## Music generation + +The bundled `fal` plugin also registers a music-generation provider for the +shared `music_generate` tool. + +| Capability | Value | +| ------------- | ------------------------------------------------------------------------------------------------------ | +| Default model | `fal/fal-ai/minimax-music/v2.6` | +| Models | `fal-ai/minimax-music/v2.6`, `fal-ai/ace-step/prompt-to-audio`, `fal-ai/stable-audio-25/text-to-audio` | +| Runtime | Synchronous request plus generated audio download | + +Use fal as the default music provider: + +```json5 +{ + agents: { + defaults: { + musicGenerationModel: { + primary: "fal/fal-ai/minimax-music/v2.6", + }, + }, + }, +} +``` + +`fal-ai/minimax-music/v2.6` supports explicit lyrics and instrumental mode. +ACE-Step and Stable Audio are prompt-to-audio endpoints; choose them with the +`model` override when you want those model families. + Use `openclaw models list --provider fal` to see the full list of available fal models, including any recently added entries. @@ -165,7 +195,10 @@ models, including any recently added entries. Shared video tool parameters and provider selection. + + Shared music tool parameters and provider selection. + - Agent defaults including image and video model selection. + Agent defaults including image, video, and music model selection. diff --git a/docs/providers/openrouter.md b/docs/providers/openrouter.md index 769b0c574cb..1c469c40393 100644 --- a/docs/providers/openrouter.md +++ b/docs/providers/openrouter.md @@ -4,6 +4,7 @@ read_when: - You want a single API key for many LLMs - You want to run models via OpenRouter in OpenClaw - You want to use OpenRouter for image generation + - You want to use OpenRouter for music generation - You want to use OpenRouter for video generation title: "OpenRouter" --- @@ -107,6 +108,34 @@ second durations, `720P`/`1080P` resolutions, and `16:9`/`9:16` aspect ratios. Video-to-video is not registered for OpenRouter because the upstream video generation API currently accepts text and image references. +## Music generation + +OpenRouter can also back the `music_generate` tool through chat completions +audio output. Use an OpenRouter audio model under +`agents.defaults.musicGenerationModel`: + +```json5 +{ + env: { OPENROUTER_API_KEY: "sk-or-..." }, + agents: { + defaults: { + musicGenerationModel: { + primary: "openrouter/google/lyria-3-pro-preview", + timeoutMs: 180_000, + }, + }, + }, +} +``` + +The bundled OpenRouter music provider defaults to +`google/lyria-3-pro-preview` and also exposes +`google/lyria-3-clip-preview`. OpenClaw sends `modalities: ["text", +"audio"]`, enables streaming, collects the streamed audio chunks, and saves +the result as generated media for channel delivery. Reference images are +accepted for Lyria models through the shared `music_generate image=...` +parameter. + ## Text-to-speech OpenRouter can also be used as a TTS provider through its OpenAI-compatible diff --git a/docs/tools/media-overview.md b/docs/tools/media-overview.md index 0717a24f5fc..87a84c74e53 100644 --- a/docs/tools/media-overview.md +++ b/docs/tools/media-overview.md @@ -60,7 +60,7 @@ telephony, meetings, browser realtime, and native push-to-talk clients. | DeepInfra | ✓ | ✓ | | ✓ | ✓ | | ✓ | | Deepgram | | | | | ✓ | ✓ | | | ElevenLabs | | | | ✓ | ✓ | | | -| fal | ✓ | ✓ | | | | | | +| fal | ✓ | ✓ | ✓ | | | | | | Google | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | | Gradium | | | | ✓ | | | | | Local CLI | | | | ✓ | | | | @@ -68,7 +68,7 @@ telephony, meetings, browser realtime, and native push-to-talk clients. | MiniMax | ✓ | ✓ | ✓ | ✓ | | | | | Mistral | | | | | ✓ | | | | OpenAI | ✓ | ✓ | | ✓ | ✓ | ✓ | ✓ | -| OpenRouter | ✓ | ✓ | | ✓ | ✓ | | ✓ | +| OpenRouter | ✓ | ✓ | ✓ | ✓ | ✓ | | ✓ | | Qwen | | ✓ | | | | | | | Runway | | ✓ | | | | | | | SenseAudio | | | | | ✓ | | | diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md index 02af34f8029..66c0f69ca6c 100644 --- a/docs/tools/music-generation.md +++ b/docs/tools/music-generation.md @@ -1,5 +1,5 @@ --- -summary: "Generate music via music_generate across Google Lyria, MiniMax, and ComfyUI workflows" +summary: "Generate music via music_generate across ComfyUI, fal, Google Lyria, MiniMax, and OpenRouter workflows" read_when: - Generating music or audio via the agent - Configuring music-generation providers and models @@ -9,8 +9,8 @@ sidebarTitle: "Music generation" --- The `music_generate` tool lets the agent create music or audio through the -shared music-generation capability with configured providers — Google, -MiniMax, and workflow-configured ComfyUI today. +shared music-generation capability with configured providers — ComfyUI, +fal, Google, MiniMax, and OpenRouter today. For session-backed agent runs, OpenClaw starts music generation as a background task, tracks it in the task ledger, then wakes the agent again @@ -94,22 +94,26 @@ Generate an energetic chiptune loop about launching a rocket at sunrise. ## Supported providers -| Provider | Default model | Reference inputs | Supported controls | Auth | -| -------- | ---------------------- | ---------------- | --------------------------------------------------------- | -------------------------------------- | -| ComfyUI | `workflow` | Up to 1 image | Workflow-defined music or audio | `COMFY_API_KEY`, `COMFY_CLOUD_API_KEY` | -| Google | `lyria-3-clip-preview` | Up to 10 images | `lyrics`, `instrumental`, `format` | `GEMINI_API_KEY`, `GOOGLE_API_KEY` | -| MiniMax | `music-2.6` | None | `lyrics`, `instrumental`, `durationSeconds`, `format=mp3` | `MINIMAX_API_KEY` or MiniMax OAuth | +| Provider | Default model | Reference inputs | Supported controls | Auth | +| ---------- | ---------------------------- | ---------------- | --------------------------------------------------------- | -------------------------------------- | +| ComfyUI | `workflow` | Up to 1 image | Workflow-defined music or audio | `COMFY_API_KEY`, `COMFY_CLOUD_API_KEY` | +| fal | `fal-ai/minimax-music/v2.6` | None | `lyrics`, `instrumental`, `durationSeconds`, `format` | `FAL_KEY` or `FAL_API_KEY` | +| Google | `lyria-3-clip-preview` | Up to 10 images | `lyrics`, `instrumental`, `format` | `GEMINI_API_KEY`, `GOOGLE_API_KEY` | +| MiniMax | `music-2.6` | None | `lyrics`, `instrumental`, `durationSeconds`, `format=mp3` | `MINIMAX_API_KEY` or MiniMax OAuth | +| OpenRouter | `google/lyria-3-pro-preview` | Up to 1 image | `lyrics`, `instrumental`, `durationSeconds`, `format` | `OPENROUTER_API_KEY` | ### Capability matrix The explicit mode contract used by `music_generate`, contract tests, and the shared live sweep: -| Provider | `generate` | `edit` | Edit limit | Shared live lanes | -| -------- | :--------: | :----: | ---------- | ------------------------------------------------------------------------- | -| ComfyUI | ✓ | ✓ | 1 image | Not in the shared sweep; covered by `extensions/comfy/comfy.live.test.ts` | -| Google | ✓ | ✓ | 10 images | `generate`, `edit` | -| MiniMax | ✓ | — | None | `generate` | +| Provider | `generate` | `edit` | Edit limit | Shared live lanes | +| ---------- | :--------: | :----: | ---------- | ------------------------------------------------------------------------- | +| ComfyUI | ✓ | ✓ | 1 image | Not in the shared sweep; covered by `extensions/comfy/comfy.live.test.ts` | +| fal | ✓ | — | None | `generate` | +| Google | ✓ | ✓ | 10 images | `generate`, `edit` | +| MiniMax | ✓ | — | None | `generate` | +| OpenRouter | ✓ | ✓ | 1 image | `generate`, `edit` | Use `action: "list"` to inspect available shared providers and models at runtime: @@ -225,7 +229,7 @@ openclaw tasks cancel defaults: { musicGenerationModel: { primary: "google/lyria-3-clip-preview", - fallbacks: ["minimax/music-2.6"], + fallbacks: ["fal/fal-ai/minimax-music/v2.6", "minimax/music-2.6"], }, }, }, @@ -258,6 +262,12 @@ explicit `model`, `primary`, and `fallbacks` entries. shared `music_generate` tool through the music-generation provider registry. + + Uses fal model endpoints through the shared provider auth path. The + bundled provider defaults to `fal-ai/minimax-music/v2.6` and also exposes + `fal-ai/ace-step/prompt-to-audio` and + `fal-ai/stable-audio-25/text-to-audio` for prompt-to-audio requests. + Uses Lyria 3 batch generation. The current bundled flow supports prompt, optional lyrics text, and optional reference images. @@ -267,6 +277,11 @@ explicit `model`, `primary`, and `fallbacks` entries. lyrics, instrumental mode, duration steering, and mp3 output through either `minimax` API-key auth or `minimax-portal` OAuth. + + Uses OpenRouter chat completions audio output with streaming enabled. The + bundled provider defaults to `google/lyria-3-pro-preview` and also exposes + `openrouter/google/lyria-3-clip-preview`. + ## Choosing the right path @@ -278,8 +293,8 @@ explicit `model`, `primary`, and `fallbacks` entries. If you are debugging ComfyUI-specific behavior, see [ComfyUI](/providers/comfy). If you are debugging shared provider -behavior, start with [Google (Gemini)](/providers/google) or -[MiniMax](/providers/minimax). +behavior, start with [fal](/providers/fal), [Google (Gemini)](/providers/google), +[MiniMax](/providers/minimax), or [OpenRouter](/providers/openrouter). ## Provider capability modes @@ -331,7 +346,9 @@ profiles by default, and runs both `generate` and declared `edit` coverage when the provider enables edit mode. Coverage today: - `google`: `generate` plus `edit` +- `fal`: `generate` only - `minimax`: `generate` only +- `openrouter`: `generate` plus `edit` - `comfy`: separate Comfy live coverage, not the shared provider sweep Opt-in live coverage for the bundled ComfyUI music path: diff --git a/extensions/fal/index.ts b/extensions/fal/index.ts index 3d38d821272..d46327ec7aa 100644 --- a/extensions/fal/index.ts +++ b/extensions/fal/index.ts @@ -1,5 +1,6 @@ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; import { buildFalImageGenerationProvider } from "./image-generation-provider.js"; +import { buildFalMusicGenerationProvider } from "./music-generation-provider.js"; import { createFalProvider } from "./provider-registration.js"; import { buildFalVideoGenerationProvider } from "./video-generation-provider.js"; @@ -8,10 +9,11 @@ const PROVIDER_ID = "fal"; export default definePluginEntry({ id: PROVIDER_ID, name: "fal Provider", - description: "Bundled fal image and video generation provider", + description: "Bundled fal image, video, and music generation provider", register(api) { api.registerProvider(createFalProvider()); api.registerImageGenerationProvider(buildFalImageGenerationProvider()); + api.registerMusicGenerationProvider(buildFalMusicGenerationProvider()); api.registerVideoGenerationProvider(buildFalVideoGenerationProvider()); }, }); diff --git a/extensions/fal/music-generation-provider.test.ts b/extensions/fal/music-generation-provider.test.ts new file mode 100644 index 00000000000..c9914de38f6 --- /dev/null +++ b/extensions/fal/music-generation-provider.test.ts @@ -0,0 +1,200 @@ +import { expectExplicitMusicGenerationCapabilities } from "openclaw/plugin-sdk/provider-test-contracts"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildFalMusicGenerationProvider } from "./music-generation-provider.js"; + +const { + assertOkOrThrowHttpErrorMock, + postJsonRequestMock, + resolveApiKeyForProviderMock, + resolveProviderHttpRequestConfigMock, +} = vi.hoisted(() => ({ + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + postJsonRequestMock: vi.fn(), + resolveApiKeyForProviderMock: vi.fn(async () => ({ + apiKey: "fal-key", + source: "env", + mode: "api-key", + })), + resolveProviderHttpRequestConfigMock: vi.fn((params: Record) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl, + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders as HeadersInit | undefined), + dispatcherPolicy: undefined, + })), +})); + +vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ + resolveApiKeyForProvider: resolveApiKeyForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/provider-http", async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, + }; +}); + +function postRequest(): Record { + const request = postJsonRequestMock.mock.calls[0]?.[0]; + if (!request || typeof request !== "object" || Array.isArray(request)) { + throw new Error("expected fal music request"); + } + return request as Record; +} + +describe("fal music generation provider", () => { + afterEach(() => { + assertOkOrThrowHttpErrorMock.mockClear(); + postJsonRequestMock.mockReset(); + resolveApiKeyForProviderMock.mockClear(); + resolveProviderHttpRequestConfigMock.mockClear(); + vi.unstubAllGlobals(); + }); + + it("declares explicit mode capabilities", () => { + expectExplicitMusicGenerationCapabilities(buildFalMusicGenerationProvider()); + }); + + it("submits MiniMax music through fal and downloads the generated track", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + audio: { + url: "https://v3b.fal.media/files/b/kangaroo/out.mp3", + content_type: "audio/mpeg", + file_name: "out.mp3", + }, + }), + }, + release: vi.fn(async () => {}), + }); + const fetchMock = vi.fn( + async () => + new Response(Buffer.from("mp3-bytes"), { + headers: { "content-type": "application/octet-stream" }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const result = await buildFalMusicGenerationProvider().generateMusic({ + provider: "fal", + model: "", + prompt: "city pop chorus", + cfg: {}, + lyrics: "[Verse]\nNeon rain", + durationSeconds: 42, + format: "mp3", + }); + + expect(postRequest().url).toBe("https://fal.run/fal-ai/minimax-music/v2.6"); + expect(postRequest().body).toEqual({ + prompt: "city pop chorus", + lyrics: "[Verse]\nNeon rain", + duration: 42, + audio_setting: { + sample_rate: 44100, + bitrate: 256000, + format: "mp3", + }, + }); + expect(fetchMock).toHaveBeenCalledWith( + "https://v3b.fal.media/files/b/kangaroo/out.mp3", + expect.objectContaining({ method: "GET" }), + ); + expect(result.model).toBe("fal-ai/minimax-music/v2.6"); + expect(result.tracks[0]?.mimeType).toBe("audio/mpeg"); + expect(result.tracks[0]?.buffer).toEqual(Buffer.from("mp3-bytes")); + expect(result.tracks[0]?.fileName).toBe("out.mp3"); + expect(result.metadata?.audioUrl).toBe("https://v3b.fal.media/files/b/kangaroo/out.mp3"); + }); + + it("rejects MiniMax lyrics requests that also ask for instrumental output", async () => { + await expect( + buildFalMusicGenerationProvider().generateMusic({ + provider: "fal", + model: "fal-ai/minimax-music/v2.6", + prompt: "city pop chorus", + cfg: {}, + lyrics: "[Verse]\nNeon rain", + instrumental: true, + }), + ).rejects.toThrow("fal MiniMax music generation cannot use lyrics when instrumental=true."); + + expect(postJsonRequestMock).not.toHaveBeenCalled(); + }); + + it("maps ACE-Step duration and instrumental controls", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + audio: { url: "https://example.com/out.wav", content_type: "audio/wav" }, + seed: 42, + tags: "lofi, chill", + }), + }, + release: vi.fn(async () => {}), + }); + vi.stubGlobal( + "fetch", + vi.fn( + async () => + new Response(Buffer.from("wav-bytes"), { + headers: { "content-type": "audio/wav" }, + }), + ), + ); + + await buildFalMusicGenerationProvider().generateMusic({ + provider: "fal", + model: "fal-ai/ace-step/prompt-to-audio", + prompt: "lofi beach loop", + cfg: {}, + instrumental: true, + durationSeconds: 30, + }); + + expect(postRequest().url).toBe("https://fal.run/fal-ai/ace-step/prompt-to-audio"); + expect(postRequest().body).toEqual({ + prompt: "lofi beach loop", + instrumental: true, + duration: 30, + }); + }); + + it("maps Stable Audio duration controls", async () => { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + audio: "https://example.com/stable.wav", + }), + }, + release: vi.fn(async () => {}), + }); + vi.stubGlobal( + "fetch", + vi.fn( + async () => + new Response(Buffer.from("wav-bytes"), { + headers: { "content-type": "audio/wav" }, + }), + ), + ); + + await buildFalMusicGenerationProvider().generateMusic({ + provider: "fal", + model: "fal-ai/stable-audio-25/text-to-audio", + prompt: "orchestral hit", + cfg: {}, + durationSeconds: 12, + }); + + expect(postRequest().url).toBe("https://fal.run/fal-ai/stable-audio-25/text-to-audio"); + expect(postRequest().body).toEqual({ + prompt: "orchestral hit", + seconds_total: 12, + }); + }); +}); diff --git a/extensions/fal/music-generation-provider.ts b/extensions/fal/music-generation-provider.ts new file mode 100644 index 00000000000..101e6e8fc63 --- /dev/null +++ b/extensions/fal/music-generation-provider.ts @@ -0,0 +1,219 @@ +import { + downloadGeneratedMusicAsset, + extractGeneratedMusicFileCandidates, + type MusicGenerationProvider, + type MusicGenerationRequest, +} from "openclaw/plugin-sdk/music-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + postJsonRequest, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime"; + +const DEFAULT_FAL_BASE_URL = "https://fal.run"; +const DEFAULT_FAL_MUSIC_MODEL = "fal-ai/minimax-music/v2.6"; +const FAL_ACE_STEP_MODEL = "fal-ai/ace-step/prompt-to-audio"; +const FAL_STABLE_AUDIO_MODEL = "fal-ai/stable-audio-25/text-to-audio"; +const DEFAULT_TIMEOUT_MS = 180_000; + +const FAL_MUSIC_MODELS = [ + DEFAULT_FAL_MUSIC_MODEL, + FAL_ACE_STEP_MODEL, + FAL_STABLE_AUDIO_MODEL, +] as const; + +function resolveFalMusicModel(model: string | undefined): string { + return normalizeOptionalString(model) ?? DEFAULT_FAL_MUSIC_MODEL; +} + +function resolveFalMusicBaseUrl(req: MusicGenerationRequest): string | undefined { + return normalizeOptionalString(req.cfg?.models?.providers?.fal?.baseUrl); +} + +function buildFalMinimaxBody(req: MusicGenerationRequest): Record { + const lyrics = normalizeOptionalString(req.lyrics); + if (lyrics && req.instrumental === true) { + throw new Error("fal MiniMax music generation cannot use lyrics when instrumental=true."); + } + return { + prompt: req.prompt, + ...(lyrics ? { lyrics } : {}), + ...(req.instrumental === true ? { is_instrumental: true } : {}), + ...(!lyrics && req.instrumental !== true ? { lyrics_optimizer: true } : {}), + ...(typeof req.durationSeconds === "number" ? { duration: req.durationSeconds } : {}), + audio_setting: { + sample_rate: 44_100, + bitrate: 256_000, + format: req.format ?? "mp3", + }, + }; +} + +function buildFalAceStepBody(req: MusicGenerationRequest): Record { + if (normalizeOptionalString(req.lyrics)) { + throw new Error("fal ACE-Step music generation does not support explicit lyrics."); + } + return { + prompt: req.prompt, + ...(req.instrumental === true ? { instrumental: true } : {}), + ...(typeof req.durationSeconds === "number" ? { duration: req.durationSeconds } : {}), + }; +} + +function buildFalStableAudioBody(req: MusicGenerationRequest): Record { + if (normalizeOptionalString(req.lyrics)) { + throw new Error("fal Stable Audio music generation does not support explicit lyrics."); + } + if (req.instrumental === true) { + throw new Error("fal Stable Audio music generation does not support instrumental mode."); + } + return { + prompt: req.prompt, + ...(typeof req.durationSeconds === "number" ? { seconds_total: req.durationSeconds } : {}), + }; +} + +function buildFalMusicRequestBody( + req: MusicGenerationRequest, + model: string, +): Record { + if (model === FAL_ACE_STEP_MODEL) { + return buildFalAceStepBody(req); + } + if (model === FAL_STABLE_AUDIO_MODEL) { + return buildFalStableAudioBody(req); + } + return buildFalMinimaxBody(req); +} + +function resolveFalMusicMetadata(payload: unknown): Record | undefined { + if (!payload || typeof payload !== "object" || Array.isArray(payload)) { + return undefined; + } + const metadata: Record = {}; + for (const key of ["seed", "tags"]) { + const value = (payload as Record)[key]; + if (value !== undefined && value !== null) { + metadata[key] = value; + } + } + return Object.keys(metadata).length > 0 ? metadata : undefined; +} + +export function buildFalMusicGenerationProvider(): MusicGenerationProvider { + return { + id: "fal", + label: "fal", + defaultModel: DEFAULT_FAL_MUSIC_MODEL, + models: [...FAL_MUSIC_MODELS], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "fal", + agentDir, + }), + capabilities: { + generate: { + maxTracks: 1, + maxDurationSeconds: 240, + supportsLyrics: true, + supportsLyricsByModel: { + [FAL_ACE_STEP_MODEL]: false, + [FAL_STABLE_AUDIO_MODEL]: false, + }, + supportsInstrumental: true, + supportsInstrumentalByModel: { + [FAL_STABLE_AUDIO_MODEL]: false, + }, + supportsDuration: true, + supportsFormat: true, + supportedFormats: ["mp3", "wav"], + supportedFormatsByModel: { + [DEFAULT_FAL_MUSIC_MODEL]: ["mp3"], + [FAL_ACE_STEP_MODEL]: ["wav"], + [FAL_STABLE_AUDIO_MODEL]: ["wav"], + }, + }, + edit: { + enabled: false, + }, + }, + async generateMusic(req) { + if ((req.inputImages?.length ?? 0) > 0) { + throw new Error("fal music generation does not support image reference inputs."); + } + + const auth = await resolveApiKeyForProvider({ + provider: "fal", + cfg: req.cfg, + agentDir: req.agentDir, + store: req.authStore, + }); + if (!auth.apiKey) { + throw new Error("fal API key missing"); + } + + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: resolveFalMusicBaseUrl(req), + defaultBaseUrl: DEFAULT_FAL_BASE_URL, + allowPrivateNetwork: false, + defaultHeaders: { + Authorization: `Key ${auth.apiKey}`, + "Content-Type": "application/json", + }, + provider: "fal", + capability: "audio", + transport: "http", + }); + const model = resolveFalMusicModel(req.model); + const { response, release } = await postJsonRequest({ + url: `${baseUrl}/${model}`, + headers, + body: buildFalMusicRequestBody(req, model), + timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS, + fetchFn: fetch, + allowPrivateNetwork, + dispatcherPolicy, + }); + + try { + await assertOkOrThrowHttpError(response, "fal music generation failed"); + const payload = await response.json(); + const [candidate] = extractGeneratedMusicFileCandidates(payload); + if (!candidate) { + throw new Error("fal music generation response missing audio output"); + } + const track = await downloadGeneratedMusicAsset({ + candidate, + timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS, + fetchFn: fetch, + provider: "fal", + requestFailedMessage: "fal generated music download failed", + }); + const lyrics = + typeof payload === "object" && payload && !Array.isArray(payload) + ? normalizeOptionalString((payload as Record).lyrics) + : undefined; + return { + tracks: [track], + model, + ...(lyrics ? { lyrics: [lyrics] } : {}), + metadata: { + ...resolveFalMusicMetadata(payload), + ...(track.metadata?.url ? { audioUrl: track.metadata.url } : {}), + instrumental: req.instrumental === true, + ...(req.format ? { requestedFormat: req.format } : {}), + ...(typeof req.durationSeconds === "number" + ? { requestedDurationSeconds: req.durationSeconds } + : {}), + }, + }; + } finally { + await release(); + } + }, + }; +} diff --git a/extensions/fal/openclaw.plugin.json b/extensions/fal/openclaw.plugin.json index 13cbb55e69d..ecd1b8602c7 100644 --- a/extensions/fal/openclaw.plugin.json +++ b/extensions/fal/openclaw.plugin.json @@ -16,8 +16,8 @@ "choiceLabel": "fal API key", "groupId": "fal", "groupLabel": "fal", - "groupHint": "Image and video generation", - "onboardingScopes": ["image-generation"], + "groupHint": "Image, video, and music generation", + "onboardingScopes": ["image-generation", "music-generation"], "optionKey": "falApiKey", "cliFlag": "--fal-api-key", "cliOption": "--fal-api-key ", @@ -26,6 +26,7 @@ ], "contracts": { "imageGenerationProviders": ["fal"], + "musicGenerationProviders": ["fal"], "videoGenerationProviders": ["fal"] }, "configSchema": { diff --git a/extensions/fal/plugin-registration.contract.test.ts b/extensions/fal/plugin-registration.contract.test.ts index 5533d2cc894..ad8ad719563 100644 --- a/extensions/fal/plugin-registration.contract.test.ts +++ b/extensions/fal/plugin-registration.contract.test.ts @@ -4,6 +4,7 @@ describePluginRegistrationContract({ pluginId: "fal", providerIds: ["fal"], imageGenerationProviderIds: ["fal"], + musicGenerationProviderIds: ["fal"], videoGenerationProviderIds: ["fal"], requireGenerateImage: true, requireGenerateVideo: true, diff --git a/extensions/fal/provider-contract-api.ts b/extensions/fal/provider-contract-api.ts index 66fbf2fd532..0730d153d7e 100644 --- a/extensions/fal/provider-contract-api.ts +++ b/extensions/fal/provider-contract-api.ts @@ -14,16 +14,16 @@ export function createFalProvider(): ProviderPlugin { id: "api-key", kind: "api_key", label: "fal API key", - hint: "Image and video generation API key", + hint: "Image, video, and music generation API key", run: async () => ({ profiles: [], defaultModel: FAL_DEFAULT_IMAGE_MODEL_REF }), wizard: { choiceId: "fal-api-key", choiceLabel: "fal API key", - choiceHint: "Image and video generation API key", + choiceHint: "Image, video, and music generation API key", groupId: "fal", groupLabel: "fal", - groupHint: "Image and video generation", - onboardingScopes: ["image-generation"], + groupHint: "Image, video, and music generation", + onboardingScopes: ["image-generation", "music-generation"], }, }, ], diff --git a/extensions/fal/provider-registration.ts b/extensions/fal/provider-registration.ts index d62c879f444..0cb67d50956 100644 --- a/extensions/fal/provider-registration.ts +++ b/extensions/fal/provider-registration.ts @@ -15,7 +15,7 @@ export function createFalProvider(): ProviderPlugin { providerId: PROVIDER_ID, methodId: "api-key", label: "fal API key", - hint: "Image and video generation API key", + hint: "Image, video, and music generation API key", optionKey: "falApiKey", flagName: "--fal-api-key", envVar: "FAL_KEY", @@ -26,11 +26,11 @@ export function createFalProvider(): ProviderPlugin { wizard: { choiceId: "fal-api-key", choiceLabel: "fal API key", - choiceHint: "Image and video generation API key", + choiceHint: "Image, video, and music generation API key", groupId: "fal", groupLabel: "fal", - groupHint: "Image and video generation", - onboardingScopes: ["image-generation"], + groupHint: "Image, video, and music generation", + onboardingScopes: ["image-generation", "music-generation"], }, }), ], diff --git a/extensions/fal/test-api.ts b/extensions/fal/test-api.ts index e386763147c..24caaec6e9f 100644 --- a/extensions/fal/test-api.ts +++ b/extensions/fal/test-api.ts @@ -1,2 +1,3 @@ export { buildFalImageGenerationProvider } from "./image-generation-provider.js"; +export { buildFalMusicGenerationProvider } from "./music-generation-provider.js"; export { buildFalVideoGenerationProvider } from "./video-generation-provider.js"; diff --git a/extensions/music-generation-providers.live.test.ts b/extensions/music-generation-providers.live.test.ts index 6218e1a3e38..a7d9b979a2f 100644 --- a/extensions/music-generation-providers.live.test.ts +++ b/extensions/music-generation-providers.live.test.ts @@ -30,8 +30,10 @@ import { resolveLiveMusicAuthStore, } from "openclaw/plugin-sdk/test-env"; import { describe, expect, it } from "vitest"; +import falPlugin from "./fal/index.js"; import googlePlugin from "./google/index.js"; import minimaxPlugin from "./minimax/index.js"; +import openrouterPlugin from "./openrouter/index.js"; import { maybeLoadShellEnvForGenerationProviders } from "./test-support/generation-live-test-helpers.js"; const LIVE = isLiveTestEnabled(); @@ -49,6 +51,12 @@ type LiveProviderCase = { }; const CASES: LiveProviderCase[] = [ + { + plugin: falPlugin, + pluginId: "fal", + pluginName: "fal Provider", + providerId: "fal", + }, { plugin: googlePlugin, pluginId: "google", @@ -61,6 +69,12 @@ const CASES: LiveProviderCase[] = [ pluginName: "MiniMax Provider", providerId: "minimax", }, + { + plugin: openrouterPlugin, + pluginId: "openrouter", + pluginName: "OpenRouter Provider", + providerId: "openrouter", + }, ] .filter((entry) => (providerFilter ? providerFilter.has(entry.providerId) : true)) .toSorted((left, right) => left.providerId.localeCompare(right.providerId)); @@ -130,7 +144,7 @@ function resolveLiveLyrics(providerId: string): string | undefined { function resolveLiveMusicSkipReason(providerId: string, error: unknown): string | null { const message = error instanceof Error ? error.message : String(error); if ( - providerId === "google" && + (providerId === "google" || providerId === "openrouter") && message.toLowerCase().includes("music generation response missing audio data") ) { return "transient no-audio response"; diff --git a/extensions/openrouter/api.ts b/extensions/openrouter/api.ts index cf2b950b176..a853538b1d8 100644 --- a/extensions/openrouter/api.ts +++ b/extensions/openrouter/api.ts @@ -1,4 +1,5 @@ export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js"; +export { buildOpenRouterMusicGenerationProvider } from "./music-generation-provider.js"; export { buildOpenrouterProvider, isOpenRouterProxyReasoningUnsupportedModel, diff --git a/extensions/openrouter/index.test.ts b/extensions/openrouter/index.test.ts index 7bef3e91996..11ec56a42b6 100644 --- a/extensions/openrouter/index.test.ts +++ b/extensions/openrouter/index.test.ts @@ -16,12 +16,18 @@ import { resolveThinkingProfile } from "./provider-policy-api.js"; describe("openrouter provider hooks", () => { it("registers OpenRouter speech alongside model, media, and catalog providers", async () => { - const { providers, speechProviders, mediaProviders, imageProviders, videoProviders } = - await registerProviderPlugin({ - plugin: openrouterPlugin, - id: "openrouter", - name: "OpenRouter Provider", - }); + const { + providers, + speechProviders, + mediaProviders, + imageProviders, + musicProviders, + videoProviders, + } = await registerProviderPlugin({ + plugin: openrouterPlugin, + id: "openrouter", + name: "OpenRouter Provider", + }); const modelCatalogProvider = expectUnifiedModelCatalogProviderRegistration({ plugin: openrouterPlugin, pluginId: "openrouter", @@ -34,6 +40,7 @@ describe("openrouter provider hooks", () => { expect(speechProviders.map((provider) => provider.id)).toEqual(["openrouter"]); expect(mediaProviders.map((provider) => provider.id)).toEqual(["openrouter"]); expect(imageProviders.map((provider) => provider.id)).toEqual(["openrouter"]); + expect(musicProviders.map((provider) => provider.id)).toEqual(["openrouter"]); expect(videoProviders.map((provider) => provider.id)).toEqual(["openrouter"]); expect(modelCatalogProvider.liveCatalog).toBeTypeOf("function"); }); diff --git a/extensions/openrouter/index.ts b/extensions/openrouter/index.ts index 626edef4319..e29f178f890 100644 --- a/extensions/openrouter/index.ts +++ b/extensions/openrouter/index.ts @@ -14,6 +14,7 @@ import { } from "openclaw/plugin-sdk/provider-stream-family"; import { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js"; import { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js"; +import { buildOpenRouterMusicGenerationProvider } from "./music-generation-provider.js"; import { applyOpenrouterConfig, OPENROUTER_DEFAULT_MODEL_REF } from "./onboard.js"; import { buildOpenrouterProvider, @@ -114,6 +115,7 @@ export default definePluginEntry({ groupId: "openrouter", groupLabel: "OpenRouter", groupHint: "API key", + onboardingScopes: ["text-inference", "music-generation"], }, }), ], @@ -168,6 +170,7 @@ export default definePluginEntry({ }); api.registerMediaUnderstandingProvider(openrouterMediaUnderstandingProvider); api.registerImageGenerationProvider(buildOpenRouterImageGenerationProvider()); + api.registerMusicGenerationProvider(buildOpenRouterMusicGenerationProvider()); api.registerVideoGenerationProvider(buildOpenRouterVideoGenerationProvider()); api.registerModelCatalogProvider({ provider: PROVIDER_ID, diff --git a/extensions/openrouter/music-generation-provider.test.ts b/extensions/openrouter/music-generation-provider.test.ts new file mode 100644 index 00000000000..9bdface8b05 --- /dev/null +++ b/extensions/openrouter/music-generation-provider.test.ts @@ -0,0 +1,226 @@ +import { expectExplicitMusicGenerationCapabilities } from "openclaw/plugin-sdk/provider-test-contracts"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildOpenRouterMusicGenerationProvider } from "./music-generation-provider.js"; + +const { + assertOkOrThrowHttpErrorMock, + postJsonRequestMock, + resolveApiKeyForProviderMock, + resolveProviderHttpRequestConfigMock, +} = vi.hoisted(() => ({ + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + postJsonRequestMock: vi.fn(), + resolveApiKeyForProviderMock: vi.fn(async () => ({ + apiKey: "openrouter-key", + source: "env", + mode: "api-key", + })), + resolveProviderHttpRequestConfigMock: vi.fn((params: Record) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl, + allowPrivateNetwork: false, + headers: new Headers(params.defaultHeaders as HeadersInit | undefined), + dispatcherPolicy: undefined, + })), +})); + +vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ + resolveApiKeyForProvider: resolveApiKeyForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/provider-http", async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, + }; +}); + +function sseResponse(lines: string[]): Response { + const encoder = new TextEncoder(); + return new Response( + new ReadableStream({ + start(controller) { + for (const line of lines) { + controller.enqueue(encoder.encode(line)); + } + controller.close(); + }, + }), + { status: 200, headers: { "content-type": "text/event-stream" } }, + ); +} + +function stalledSseResponse(line: string): Response { + const encoder = new TextEncoder(); + return new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(line)); + }, + cancel() {}, + }), + { status: 200, headers: { "content-type": "text/event-stream" } }, + ); +} + +function postRequest(): Record { + const request = postJsonRequestMock.mock.calls[0]?.[0]; + if (!request || typeof request !== "object" || Array.isArray(request)) { + throw new Error("expected OpenRouter music request"); + } + return request as Record; +} + +describe("openrouter music generation provider", () => { + afterEach(() => { + assertOkOrThrowHttpErrorMock.mockClear(); + postJsonRequestMock.mockReset(); + resolveApiKeyForProviderMock.mockClear(); + resolveProviderHttpRequestConfigMock.mockClear(); + }); + + it("declares explicit mode capabilities", () => { + expectExplicitMusicGenerationCapabilities(buildOpenRouterMusicGenerationProvider()); + }); + + it("streams OpenRouter audio chunks into a generated music asset", async () => { + const release = vi.fn(async () => {}); + const audioBase64 = Buffer.from("wav-bytes").toString("base64"); + postJsonRequestMock.mockResolvedValue({ + response: sseResponse([ + `data: ${JSON.stringify({ choices: [{ delta: { audio: { transcript: "line " } } }] })}\n`, + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: audioBase64.slice(0, 4) } } }] })}\n`, + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: audioBase64.slice(4), transcript: "two" } } }] })}\n`, + "data: [DONE]\n", + ]), + release, + }); + + const result = await buildOpenRouterMusicGenerationProvider().generateMusic({ + provider: "openrouter", + model: "", + prompt: "bright soundtrack", + cfg: {}, + instrumental: true, + format: "wav", + }); + + expect(postRequest().url).toBe("https://openrouter.ai/api/v1/chat/completions"); + expect(postRequest().body).toEqual({ + model: "google/lyria-3-pro-preview", + messages: [ + { + role: "user", + content: + "bright soundtrack\n\nInstrumental only. No vocals, no sung lyrics, no spoken word.", + }, + ], + modalities: ["text", "audio"], + audio: { format: "wav" }, + stream: true, + }); + expect(result.tracks[0]?.mimeType).toBe("audio/wav"); + expect(result.tracks[0]?.buffer).toEqual(Buffer.from("wav-bytes")); + expect(result.lyrics).toEqual(["line two"]); + expect(release).toHaveBeenCalledOnce(); + }); + + it("decodes independently padded OpenRouter audio chunks", async () => { + postJsonRequestMock.mockResolvedValue({ + response: sseResponse([ + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: Buffer.from("a").toString("base64") } } }] })}\n`, + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: Buffer.from("b").toString("base64") } } }] })}\n`, + "data: [DONE]\n", + ]), + release: vi.fn(async () => {}), + }); + + const result = await buildOpenRouterMusicGenerationProvider().generateMusic({ + provider: "openrouter", + model: "google/lyria-3-pro-preview", + prompt: "chunked soundtrack", + cfg: {}, + }); + + expect(result.tracks[0]?.buffer).toEqual(Buffer.from("ab")); + }); + + it("sends reference images as multimodal message content", async () => { + postJsonRequestMock.mockResolvedValue({ + response: sseResponse([ + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: Buffer.from("mp3").toString("base64") } } }] })}\n`, + "data: [DONE]\n", + ]), + release: vi.fn(async () => {}), + }); + + await buildOpenRouterMusicGenerationProvider().generateMusic({ + provider: "openrouter", + model: "google/lyria-3-clip-preview", + prompt: "score this image", + cfg: {}, + format: "mp3", + inputImages: [{ buffer: Buffer.from("png"), mimeType: "image/png" }], + }); + + expect(postRequest().body).toEqual( + expect.objectContaining({ + model: "google/lyria-3-clip-preview", + audio: { format: "mp3" }, + messages: [ + { + role: "user", + content: [ + { type: "text", text: "score this image" }, + { + type: "image_url", + image_url: { + url: `data:image/png;base64,${Buffer.from("png").toString("base64")}`, + }, + }, + ], + }, + ], + }), + ); + }); + + it("times out stalled OpenRouter audio streams after headers", async () => { + postJsonRequestMock.mockResolvedValue({ + response: stalledSseResponse( + `data: ${JSON.stringify({ choices: [{ delta: { audio: { transcript: "start" } } }] })}\n`, + ), + release: vi.fn(async () => {}), + }); + + await expect( + buildOpenRouterMusicGenerationProvider().generateMusic({ + provider: "openrouter", + model: "google/lyria-3-clip-preview", + prompt: "never finish", + cfg: {}, + timeoutMs: 1, + }), + ).rejects.toThrow("OpenRouter music generation timed out after 1ms"); + }); + + it("rejects OpenRouter streams that end before completion", async () => { + postJsonRequestMock.mockResolvedValue({ + response: sseResponse([ + `data: ${JSON.stringify({ choices: [{ delta: { audio: { data: Buffer.from("partial").toString("base64") } } }] })}\n`, + ]), + release: vi.fn(async () => {}), + }); + + await expect( + buildOpenRouterMusicGenerationProvider().generateMusic({ + provider: "openrouter", + model: "google/lyria-3-clip-preview", + prompt: "interrupted", + cfg: {}, + }), + ).rejects.toThrow("OpenRouter music generation stream ended before completion"); + }); +}); diff --git a/extensions/openrouter/music-generation-provider.ts b/extensions/openrouter/music-generation-provider.ts new file mode 100644 index 00000000000..60c55a29fa9 --- /dev/null +++ b/extensions/openrouter/music-generation-provider.ts @@ -0,0 +1,344 @@ +import type { + MusicGenerationProvider, + MusicGenerationRequest, + MusicGenerationSourceImage, +} from "openclaw/plugin-sdk/music-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + postJsonRequest, + resolveProviderHttpRequestConfig, +} from "openclaw/plugin-sdk/provider-http"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime"; +import { OPENROUTER_BASE_URL } from "./provider-catalog.js"; + +const DEFAULT_OPENROUTER_MUSIC_MODEL = "google/lyria-3-pro-preview"; +const OPENROUTER_CLIP_MUSIC_MODEL = "google/lyria-3-clip-preview"; +const DEFAULT_TIMEOUT_MS = 180_000; +const OPENROUTER_MUSIC_MODELS = [ + DEFAULT_OPENROUTER_MUSIC_MODEL, + OPENROUTER_CLIP_MUSIC_MODEL, +] as const; + +type OpenRouterAudioStreamResult = { + audioBuffer: Buffer; + transcript: string; +}; + +type OpenRouterStreamDeadline = { + deadlineAtMs: number; + timeoutMs: number; +}; + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function resolveOpenRouterMusicModel(model: string | undefined): string { + return normalizeOptionalString(model) ?? DEFAULT_OPENROUTER_MUSIC_MODEL; +} + +function outputFormatToMimeType(format: "mp3" | "wav" | undefined): string { + return format === "mp3" ? "audio/mpeg" : "audio/wav"; +} + +function imageToContentPart(image: MusicGenerationSourceImage): { + type: "image_url"; + image_url: { url: string }; +} { + const url = + normalizeOptionalString(image.url) ?? + (image.buffer + ? `data:${normalizeOptionalString(image.mimeType) ?? "image/png"};base64,${image.buffer.toString("base64")}` + : undefined); + if (!url) { + throw new Error("OpenRouter music generation reference image is missing data."); + } + return { + type: "image_url", + image_url: { url }, + }; +} + +function buildOpenRouterMusicPrompt(req: MusicGenerationRequest): string { + const parts = [req.prompt.trim()]; + const lyrics = normalizeOptionalString(req.lyrics); + if (req.instrumental === true) { + parts.push("Instrumental only. No vocals, no sung lyrics, no spoken word."); + } + if (lyrics) { + parts.push(`Lyrics:\n${lyrics}`); + } + if (typeof req.durationSeconds === "number") { + parts.push(`Target duration: about ${Math.round(req.durationSeconds)} seconds.`); + } + return parts.join("\n\n"); +} + +function buildOpenRouterMessageContent( + req: MusicGenerationRequest, +): + | string + | Array<{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }> { + const prompt = buildOpenRouterMusicPrompt(req); + const images = req.inputImages ?? []; + if (images.length === 0) { + return prompt; + } + return [{ type: "text", text: prompt }, ...images.map((image) => imageToContentPart(image))]; +} + +function readDeltaAudio(part: unknown): { data?: string; transcript?: string } | undefined { + if (!isRecord(part)) { + return undefined; + } + const choices = part.choices; + if (!Array.isArray(choices)) { + return undefined; + } + const first = choices[0]; + if (!isRecord(first)) { + return undefined; + } + const delta = first.delta; + if (!isRecord(delta)) { + return undefined; + } + const audio = delta.audio; + if (!isRecord(audio)) { + return undefined; + } + return { + data: normalizeOptionalString(audio.data), + transcript: typeof audio.transcript === "string" ? audio.transcript : undefined, + }; +} + +function processOpenRouterSseLine( + line: string, + result: { audioBuffers: Buffer[]; transcriptChunks: string[] }, +): boolean { + if (!line.startsWith("data:")) { + return false; + } + const data = line.slice("data:".length).trim(); + if (!data) { + return false; + } + if (data === "[DONE]") { + return true; + } + const audio = readDeltaAudio(JSON.parse(data)); + if (audio?.data) { + result.audioBuffers.push(Buffer.from(audio.data, "base64")); + } + if (audio?.transcript) { + result.transcriptChunks.push(audio.transcript); + } + return false; +} + +function createOpenRouterStreamDeadline(timeoutMs: number): OpenRouterStreamDeadline { + return { + deadlineAtMs: Date.now() + Math.max(1, Math.floor(timeoutMs)), + timeoutMs, + }; +} + +function resolveOpenRouterStreamRemainingMs(deadline: OpenRouterStreamDeadline): number { + const remainingMs = deadline.deadlineAtMs - Date.now(); + if (remainingMs <= 0) { + throw new Error(`OpenRouter music generation timed out after ${deadline.timeoutMs}ms`); + } + return Math.max(1, remainingMs); +} + +async function readOpenRouterStreamChunk( + reader: ReadableStreamDefaultReader, + deadline: OpenRouterStreamDeadline, +): Promise> { + const timeoutMs = resolveOpenRouterStreamRemainingMs(deadline); + let timeoutId: ReturnType | undefined; + try { + return await Promise.race([ + reader.read(), + new Promise((_, reject) => { + timeoutId = setTimeout(() => { + reject(new Error(`OpenRouter music generation timed out after ${deadline.timeoutMs}ms`)); + }, timeoutMs); + }), + ]); + } catch (error) { + await reader.cancel().catch(() => {}); + throw error; + } finally { + if (timeoutId) { + clearTimeout(timeoutId); + } + } +} + +async function readOpenRouterAudioStream( + response: Response, + deadline: OpenRouterStreamDeadline, +): Promise { + if (!response.body) { + throw new Error("OpenRouter music generation response missing stream body"); + } + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + const result = { audioBuffers: [] as Buffer[], transcriptChunks: [] as string[] }; + let buffer = ""; + let doneSeen = false; + for (;;) { + const { value, done } = await readOpenRouterStreamChunk(reader, deadline); + if (done) { + break; + } + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split(/\r?\n/u); + buffer = lines.pop() ?? ""; + for (const line of lines) { + if (processOpenRouterSseLine(line.trim(), result)) { + doneSeen = true; + await reader.cancel(); + return { + audioBuffer: Buffer.concat(result.audioBuffers), + transcript: result.transcriptChunks.join(""), + }; + } + } + } + resolveOpenRouterStreamRemainingMs(deadline); + buffer += decoder.decode(); + if (buffer.trim()) { + for (const line of buffer.split(/\r?\n/u)) { + if (processOpenRouterSseLine(line.trim(), result)) { + doneSeen = true; + } + } + } + if (!doneSeen) { + throw new Error("OpenRouter music generation stream ended before completion"); + } + return { + audioBuffer: Buffer.concat(result.audioBuffers), + transcript: result.transcriptChunks.join(""), + }; +} + +export function buildOpenRouterMusicGenerationProvider(): MusicGenerationProvider { + return { + id: "openrouter", + label: "OpenRouter", + defaultModel: DEFAULT_OPENROUTER_MUSIC_MODEL, + models: [...OPENROUTER_MUSIC_MODELS], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "openrouter", + agentDir, + }), + capabilities: { + generate: { + maxTracks: 1, + maxDurationSeconds: 180, + supportsLyrics: true, + supportsInstrumental: true, + supportsDuration: true, + supportsFormat: true, + supportedFormats: ["mp3", "wav"], + }, + edit: { + enabled: true, + maxTracks: 1, + maxInputImages: 1, + maxDurationSeconds: 180, + supportsLyrics: true, + supportsInstrumental: true, + supportsDuration: true, + supportsFormat: true, + supportedFormats: ["mp3", "wav"], + }, + }, + async generateMusic(req) { + if ((req.inputImages?.length ?? 0) > 1) { + throw new Error("OpenRouter music generation supports at most one reference image."); + } + const auth = await resolveApiKeyForProvider({ + provider: "openrouter", + cfg: req.cfg, + agentDir: req.agentDir, + store: req.authStore, + }); + if (!auth.apiKey) { + throw new Error("OpenRouter API key missing"); + } + + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: req.cfg?.models?.providers?.openrouter?.baseUrl, + defaultBaseUrl: OPENROUTER_BASE_URL, + allowPrivateNetwork: false, + defaultHeaders: { + Authorization: `Bearer ${auth.apiKey}`, + "Content-Type": "application/json", + "HTTP-Referer": "https://openclaw.ai", + "X-OpenRouter-Title": "OpenClaw", + }, + provider: "openrouter", + capability: "audio", + transport: "http", + }); + const model = resolveOpenRouterMusicModel(req.model); + const format = req.format ?? "wav"; + const timeoutMs = req.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const streamDeadline = createOpenRouterStreamDeadline(timeoutMs); + const { response, release } = await postJsonRequest({ + url: `${baseUrl}/chat/completions`, + headers, + body: { + model, + messages: [{ role: "user", content: buildOpenRouterMessageContent(req) }], + modalities: ["text", "audio"], + audio: { format }, + stream: true, + }, + timeoutMs, + fetchFn: fetch, + allowPrivateNetwork, + dispatcherPolicy, + }); + + try { + await assertOkOrThrowHttpError(response, "OpenRouter music generation failed"); + const streamResult = await readOpenRouterAudioStream(response, streamDeadline); + if (streamResult.audioBuffer.byteLength === 0) { + throw new Error("OpenRouter music generation response missing audio data"); + } + return { + tracks: [ + { + buffer: streamResult.audioBuffer, + mimeType: outputFormatToMimeType(format), + fileName: `track-1.${format}`, + }, + ], + model, + ...(streamResult.transcript ? { lyrics: [streamResult.transcript] } : {}), + metadata: { + inputImageCount: req.inputImages?.length ?? 0, + instrumental: req.instrumental === true, + requestedFormat: format, + }, + }; + } finally { + await release(); + } + }, + }; +} + +export const _openRouterMusicTestInternals = { + readOpenRouterAudioStream, +}; diff --git a/extensions/openrouter/openclaw.plugin.json b/extensions/openrouter/openclaw.plugin.json index 87153173207..7c7332b876c 100644 --- a/extensions/openrouter/openclaw.plugin.json +++ b/extensions/openrouter/openclaw.plugin.json @@ -47,6 +47,7 @@ "groupId": "openrouter", "groupLabel": "OpenRouter", "groupHint": "API key", + "onboardingScopes": ["text-inference", "music-generation"], "optionKey": "openrouterApiKey", "cliFlag": "--openrouter-api-key", "cliOption": "--openrouter-api-key ", @@ -56,6 +57,7 @@ "contracts": { "mediaUnderstandingProviders": ["openrouter"], "imageGenerationProviders": ["openrouter"], + "musicGenerationProviders": ["openrouter"], "videoGenerationProviders": ["openrouter"], "speechProviders": ["openrouter"] }, diff --git a/extensions/openrouter/provider-contract-api.ts b/extensions/openrouter/provider-contract-api.ts index 792f0ec5b0f..4031472c64e 100644 --- a/extensions/openrouter/provider-contract-api.ts +++ b/extensions/openrouter/provider-contract-api.ts @@ -19,6 +19,7 @@ export function createOpenrouterProvider(): ProviderPlugin { groupId: "openrouter", groupLabel: "OpenRouter", groupHint: "API key", + onboardingScopes: ["text-inference", "music-generation"], }, }, ], diff --git a/extensions/openrouter/test-api.ts b/extensions/openrouter/test-api.ts index 8c6d24e135e..2e864bdaa7b 100644 --- a/extensions/openrouter/test-api.ts +++ b/extensions/openrouter/test-api.ts @@ -1,3 +1,4 @@ export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js"; +export { buildOpenRouterMusicGenerationProvider } from "./music-generation-provider.js"; export { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js"; export { buildOpenRouterSpeechProvider } from "./speech-provider.js"; diff --git a/src/agents/tools/music-generate-tool.actions.ts b/src/agents/tools/music-generate-tool.actions.ts index 07bce53f4eb..83e449dbd29 100644 --- a/src/agents/tools/music-generate-tool.actions.ts +++ b/src/agents/tools/music-generate-tool.actions.ts @@ -27,7 +27,18 @@ function summarizeMusicGenerationCapabilities( edit?.maxInputImages ? `maxInputImages=${edit.maxInputImages}` : null, generate?.maxDurationSeconds ? `maxDurationSeconds=${generate.maxDurationSeconds}` : null, generate?.supportsLyrics ? "lyrics" : null, + generate?.supportsLyricsByModel && Object.keys(generate.supportsLyricsByModel).length > 0 + ? `supportsLyricsByModel=${Object.entries(generate.supportsLyricsByModel) + .map(([modelId, supported]) => `${modelId}:${supported}`) + .join("; ")}` + : null, generate?.supportsInstrumental ? "instrumental" : null, + generate?.supportsInstrumentalByModel && + Object.keys(generate.supportsInstrumentalByModel).length > 0 + ? `supportsInstrumentalByModel=${Object.entries(generate.supportsInstrumentalByModel) + .map(([modelId, supported]) => `${modelId}:${supported}`) + .join("; ")}` + : null, generate?.supportsDuration ? "duration" : null, generate?.supportsFormat ? "format" : null, generate?.supportedFormats?.length diff --git a/src/cli/command-catalog.ts b/src/cli/command-catalog.ts index f4d69fe5c2e..bcfdf1d4410 100644 --- a/src/cli/command-catalog.ts +++ b/src/cli/command-catalog.ts @@ -110,6 +110,11 @@ export const cliCommandCatalog: readonly CliCommandCatalogEntry[] = [ exact: true, policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" }, }, + { + commandPath: ["config", "models"], + exact: true, + policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" }, + }, { commandPath: ["migrate"], policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" }, diff --git a/src/commands/auth-choice-options.test.ts b/src/commands/auth-choice-options.test.ts index d960ef860b1..048c0bec765 100644 --- a/src/commands/auth-choice-options.test.ts +++ b/src/commands/auth-choice-options.test.ts @@ -22,15 +22,15 @@ vi.mock("./auth-choice-legacy.js", () => ({ })); function includesOnboardingScope( - scopes: readonly ("text-inference" | "image-generation")[] | undefined, - scope: "text-inference" | "image-generation", + scopes: readonly ("text-inference" | "image-generation" | "music-generation")[] | undefined, + scope: "text-inference" | "image-generation" | "music-generation", ): boolean { return scopes ? scopes.includes(scope) : scope === "text-inference"; } vi.mock("../flows/provider-flow.js", () => ({ resolveProviderSetupFlowContributions: vi.fn( - (params?: { scope?: "text-inference" | "image-generation" }) => { + (params?: { scope?: "text-inference" | "image-generation" | "music-generation" }) => { const scope = params?.scope ?? "text-inference"; return [ ...resolveManifestProviderAuthChoices() @@ -619,7 +619,7 @@ describe("buildAuthChoiceOptions", () => { expect(openCodeValues).toContain("opencode-go"); }); - it("hides image-generation-only providers from the interactive auth picker", () => { + it("hides media-generation-only providers from the interactive auth picker", () => { resolveManifestProviderAuthChoices.mockReturnValue([ { pluginId: "fal", @@ -631,6 +631,16 @@ describe("buildAuthChoiceOptions", () => { groupLabel: "fal", onboardingScopes: ["image-generation"], }, + { + pluginId: "openrouter", + providerId: "openrouter", + methodId: "api-key", + choiceId: "openrouter-api-key", + choiceLabel: "OpenRouter API key", + groupId: "openrouter", + groupLabel: "OpenRouter", + onboardingScopes: ["music-generation"], + }, { pluginId: "openai", providerId: "openai", @@ -649,6 +659,13 @@ describe("buildAuthChoiceOptions", () => { groupLabel: "Local image runtime", onboardingScopes: ["image-generation"], }, + { + value: "local-music-runtime", + label: "Local music runtime", + groupId: "local-music-runtime", + groupLabel: "Local music runtime", + onboardingScopes: ["music-generation"], + }, { value: "ollama", label: "Ollama", @@ -663,6 +680,8 @@ describe("buildAuthChoiceOptions", () => { expect(optionValues).toContain("openai-api-key"); expect(optionValues).toContain("ollama"); expect(optionValues).not.toContain("fal-api-key"); + expect(optionValues).not.toContain("openrouter-api-key"); expect(optionValues).not.toContain("local-image-runtime"); + expect(optionValues).not.toContain("local-music-runtime"); }); }); diff --git a/src/flows/provider-flow.ts b/src/flows/provider-flow.ts index 96eddc1f342..b1e3247b030 100644 --- a/src/flows/provider-flow.ts +++ b/src/flows/provider-flow.ts @@ -5,7 +5,7 @@ import * as providerInstallCatalog from "../plugins/provider-install-catalog.js" import type { FlowContribution, FlowOption } from "./types.js"; import { sortFlowContributionsByLabel } from "./types.js"; -type ProviderFlowScope = "text-inference" | "image-generation"; +type ProviderFlowScope = "text-inference" | "image-generation" | "music-generation"; const DEFAULT_PROVIDER_FLOW_SCOPE: ProviderFlowScope = "text-inference"; diff --git a/src/gateway/origin-check.test.ts b/src/gateway/origin-check.test.ts index 8c43bdb49c1..03ff571b041 100644 --- a/src/gateway/origin-check.test.ts +++ b/src/gateway/origin-check.test.ts @@ -36,6 +36,24 @@ describe("checkBrowserOrigin", () => { }, expected: { ok: true as const, matchedBy: "private-same-origin" as const }, }, + { + name: "accepts same-origin loopback host for local clients", + input: { + requestHost: "127.0.0.1:18789", + origin: "http://127.0.0.1:18789", + isLocalClient: true, + }, + expected: { ok: true as const, matchedBy: "private-same-origin" as const }, + }, + { + name: "rejects same-origin loopback host for non-local clients", + input: { + requestHost: "127.0.0.1:18789", + origin: "http://127.0.0.1:18789", + isLocalClient: false, + }, + expected: { ok: false as const, reason: "origin not allowed" }, + }, { name: "rejects same-origin public host without dangerous fallback", input: { diff --git a/src/media-generation/provider-capabilities.contract.test.ts b/src/media-generation/provider-capabilities.contract.test.ts index 3cfe301854c..b788fe29937 100644 --- a/src/media-generation/provider-capabilities.contract.test.ts +++ b/src/media-generation/provider-capabilities.contract.test.ts @@ -18,7 +18,13 @@ const EXPECTED_BUNDLED_VIDEO_PROVIDER_PLUGIN_IDS = [ "xai", ] as const; -const EXPECTED_BUNDLED_MUSIC_PROVIDER_PLUGIN_IDS = ["comfy", "google", "minimax"] as const; +const EXPECTED_BUNDLED_MUSIC_PROVIDER_PLUGIN_IDS = [ + "comfy", + "fal", + "google", + "minimax", + "openrouter", +] as const; const EXPECTED_BUNDLED_VIDEO_PROVIDER_IDS_BY_PLUGIN: Record = { minimax: ["minimax", "minimax-portal"], diff --git a/src/media-generation/runtime-shared.test.ts b/src/media-generation/runtime-shared.test.ts index 24f3a517caf..50312ad67e1 100644 --- a/src/media-generation/runtime-shared.test.ts +++ b/src/media-generation/runtime-shared.test.ts @@ -207,6 +207,32 @@ describe("media-generation runtime shared candidates", () => { expect(candidates).toEqual([{ provider: "fal", model: "fal-ai/flux/dev" }]); }); + + it("prefers explicit provider refs over colliding slash-containing model IDs", () => { + const candidates = resolveCapabilityModelCandidates({ + cfg: {} as OpenClawConfig, + modelConfig: { + primary: "google/lyria-3-pro-preview", + }, + parseModelRef, + listProviders: () => [ + { + id: "google", + defaultModel: "lyria-3-clip-preview", + models: ["lyria-3-clip-preview", "lyria-3-pro-preview"], + isConfigured: () => true, + }, + { + id: "openrouter", + defaultModel: "google/lyria-3-clip-preview", + models: ["google/lyria-3-clip-preview", "google/lyria-3-pro-preview"], + isConfigured: () => true, + }, + ], + }); + + expect(candidates[0]).toEqual({ provider: "google", model: "lyria-3-pro-preview" }); + }); }); describe("media-generation runtime shared normalization", () => { diff --git a/src/media-generation/runtime-shared.ts b/src/media-generation/runtime-shared.ts index 27453014f9c..76309c32d2c 100644 --- a/src/media-generation/runtime-shared.ts +++ b/src/media-generation/runtime-shared.ts @@ -178,6 +178,21 @@ function resolveProviderModelOnlyRef(params: { return provider ? { provider: provider.id, model } : null; } +function hasCapabilityProviderId(params: { + providerId: string | undefined; + providers: CapabilityProviderCandidate[]; +}): boolean { + const providerId = normalizeOptionalString(params.providerId); + if (!providerId) { + return false; + } + return params.providers.some( + (provider) => + provider.id === providerId || + (provider.aliases ?? []).some((alias) => normalizeOptionalString(alias) === providerId), + ); +} + export function resolveCapabilityModelCandidates(params: { cfg: OpenClawConfig; modelConfig: AgentModelConfig | undefined; @@ -203,6 +218,15 @@ export function resolveCapabilityModelCandidates(params: { if (!options.useProviderMetadata) { return parsed; } + if ( + parsed && + hasCapabilityProviderId({ + providerId: parsed.provider, + providers: getProviders(), + }) + ) { + return parsed; + } return resolveProviderModelOnlyRef({ raw: trimmed, providers: getProviders() }) ?? parsed; }; const add = (raw: string | undefined, options: { useProviderMetadata: boolean }) => { diff --git a/src/model-catalog/provider-index/normalize.ts b/src/model-catalog/provider-index/normalize.ts index a0f29f32b5c..f73a73cf3df 100644 --- a/src/model-catalog/provider-index/normalize.ts +++ b/src/model-catalog/provider-index/normalize.ts @@ -95,8 +95,8 @@ function normalizeOnboardingScopes( value: unknown, ): OpenClawProviderIndexProviderAuthChoice["onboardingScopes"] | undefined { const scopes = normalizeTrimmedStringList(value).filter( - (scope): scope is "text-inference" | "image-generation" => - scope === "text-inference" || scope === "image-generation", + (scope): scope is "text-inference" | "image-generation" | "music-generation" => + scope === "text-inference" || scope === "image-generation" || scope === "music-generation", ); return scopes.length > 0 ? [...new Set(scopes)] : undefined; } diff --git a/src/model-catalog/provider-index/types.ts b/src/model-catalog/provider-index/types.ts index 0529afcea1a..1d8353c9452 100644 --- a/src/model-catalog/provider-index/types.ts +++ b/src/model-catalog/provider-index/types.ts @@ -29,7 +29,7 @@ export type OpenClawProviderIndexProviderAuthChoice = { cliFlag?: string; cliOption?: string; cliDescription?: string; - onboardingScopes?: readonly ("text-inference" | "image-generation")[]; + onboardingScopes?: readonly ("text-inference" | "image-generation" | "music-generation")[]; }; export type OpenClawProviderIndexProvider = { diff --git a/src/music-generation/live-test-helpers.ts b/src/music-generation/live-test-helpers.ts index a0034c6072b..c8221fecafc 100644 --- a/src/music-generation/live-test-helpers.ts +++ b/src/music-generation/live-test-helpers.ts @@ -10,8 +10,10 @@ import { export { parseProviderModelMap, redactLiveApiKey }; export const DEFAULT_LIVE_MUSIC_MODELS: Record = { + fal: "fal/fal-ai/minimax-music/v2.6", google: "google/lyria-3-clip-preview", minimax: "minimax/music-2.6", + openrouter: "openrouter/google/lyria-3-pro-preview", }; export function parseCsvFilter(raw?: string): Set | null { diff --git a/src/music-generation/normalization.ts b/src/music-generation/normalization.ts index be4abdf1e55..3fa54709c2b 100644 --- a/src/music-generation/normalization.ts +++ b/src/music-generation/normalization.ts @@ -20,6 +20,14 @@ type ResolvedMusicGenerationOverrides = { normalization?: MusicGenerationNormalization; }; +function resolveModelBooleanSupport( + model: string, + defaultSupport: boolean | undefined, + supportByModel: Readonly> | undefined, +): boolean { + return supportByModel?.[model] ?? defaultSupport === true; +} + export function resolveMusicGenerationOverrides(params: { provider: MusicGenerationProvider; model: string; @@ -50,12 +58,22 @@ export function resolveMusicGenerationOverrides(params: { }; } - if (lyrics?.trim() && !caps.supportsLyrics) { + if ( + lyrics?.trim() && + !resolveModelBooleanSupport(params.model, caps.supportsLyrics, caps.supportsLyricsByModel) + ) { ignoredOverrides.push({ key: "lyrics", value: lyrics }); lyrics = undefined; } - if (typeof instrumental === "boolean" && !caps.supportsInstrumental) { + if ( + typeof instrumental === "boolean" && + !resolveModelBooleanSupport( + params.model, + caps.supportsInstrumental, + caps.supportsInstrumentalByModel, + ) + ) { ignoredOverrides.push({ key: "instrumental", value: instrumental }); instrumental = undefined; } diff --git a/src/music-generation/provider-assets.ts b/src/music-generation/provider-assets.ts new file mode 100644 index 00000000000..cdebe073d29 --- /dev/null +++ b/src/music-generation/provider-assets.ts @@ -0,0 +1,110 @@ +import { fetchProviderDownloadResponse } from "../media-understanding/shared.js"; +import { extensionForMime } from "../media/mime.js"; +import { normalizeOptionalString } from "../shared/string-coerce.js"; +import type { GeneratedMusicAsset } from "./types.js"; + +export type GeneratedMusicFileCandidate = { + url: string; + mimeType?: string; + fileName?: string; +}; + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function normalizeSpecificAudioMimeType(value: unknown): string | undefined { + const mimeType = normalizeOptionalString(value)?.split(";")[0]?.trim().toLowerCase(); + if (!mimeType || mimeType === "application/octet-stream" || mimeType === "binary/octet-stream") { + return undefined; + } + return mimeType; +} + +function pushGeneratedMusicFileCandidate( + candidates: GeneratedMusicFileCandidate[], + value: unknown, +): void { + if (typeof value === "string") { + const url = normalizeOptionalString(value); + if (url) { + candidates.push({ url }); + } + return; + } + if (!isRecord(value)) { + return; + } + const url = normalizeOptionalString(value.url); + if (!url) { + return; + } + candidates.push({ + url, + ...(normalizeOptionalString(value.content_type) + ? { mimeType: normalizeOptionalString(value.content_type) } + : {}), + ...(normalizeOptionalString(value.file_name) + ? { fileName: normalizeOptionalString(value.file_name) } + : {}), + }); +} + +export function extractGeneratedMusicFileCandidates( + payload: unknown, + keys: readonly string[] = ["audio", "audio_file"], +): GeneratedMusicFileCandidate[] { + if (!isRecord(payload)) { + return []; + } + const candidates: GeneratedMusicFileCandidate[] = []; + for (const key of keys) { + pushGeneratedMusicFileCandidate(candidates, payload[key]); + } + return candidates; +} + +export function generatedMusicAssetFromBase64(params: { + base64: string; + mimeType: string; + index?: number; + fileName?: string; +}): GeneratedMusicAsset { + const ext = extensionForMime(params.mimeType)?.replace(/^\./u, "") || "mp3"; + return { + buffer: Buffer.from(params.base64, "base64"), + mimeType: params.mimeType, + fileName: params.fileName ?? `track-${(params.index ?? 0) + 1}.${ext}`, + }; +} + +export async function downloadGeneratedMusicAsset(params: { + candidate: GeneratedMusicFileCandidate; + timeoutMs: number; + fetchFn: typeof fetch; + provider: string; + requestFailedMessage: string; + index?: number; +}): Promise { + const response = await fetchProviderDownloadResponse({ + url: params.candidate.url, + init: { method: "GET" }, + timeoutMs: params.timeoutMs, + fetchFn: params.fetchFn, + provider: params.provider, + requestFailedMessage: params.requestFailedMessage, + }); + const mimeType = + normalizeSpecificAudioMimeType(response.headers.get("content-type")) ?? + normalizeSpecificAudioMimeType(params.candidate.mimeType) ?? + "audio/mpeg"; + const ext = extensionForMime(mimeType)?.replace(/^\./u, "") || "mp3"; + return { + buffer: Buffer.from(await response.arrayBuffer()), + mimeType, + fileName: params.candidate.fileName ?? `track-${(params.index ?? 0) + 1}.${ext}`, + metadata: { + url: params.candidate.url, + }, + }; +} diff --git a/src/music-generation/runtime.test.ts b/src/music-generation/runtime.test.ts index 882e3b87990..18595b1d0e9 100644 --- a/src/music-generation/runtime.test.ts +++ b/src/music-generation/runtime.test.ts @@ -281,6 +281,64 @@ describe("music-generation runtime", () => { ]); }); + it("ignores model-specific unsupported lyrics and instrumental overrides", async () => { + let seenRequest: + | { + lyrics?: string; + instrumental?: boolean; + } + | undefined; + providers = [ + { + id: "fal", + capabilities: { + generate: { + supportsLyrics: true, + supportsLyricsByModel: { + "fal-ai/stable-audio-25/text-to-audio": false, + }, + supportsInstrumental: true, + supportsInstrumentalByModel: { + "fal-ai/stable-audio-25/text-to-audio": false, + }, + }, + }, + generateMusic: async (req) => { + seenRequest = { + lyrics: req.lyrics, + instrumental: req.instrumental, + }; + return { + tracks: [{ buffer: Buffer.from("wav-bytes"), mimeType: "audio/wav" }], + model: "fal-ai/stable-audio-25/text-to-audio", + }; + }, + }, + ]; + + const result = await runGenerateMusic({ + cfg: { + agents: { + defaults: { + musicGenerationModel: { primary: "fal/fal-ai/stable-audio-25/text-to-audio" }, + }, + }, + } as OpenClawConfig, + prompt: "orchestral hit", + lyrics: "rise up", + instrumental: true, + }); + + expect(seenRequest).toEqual({ + lyrics: undefined, + instrumental: undefined, + }); + expect(result.ignoredOverrides).toEqual([ + { key: "lyrics", value: "rise up" }, + { key: "instrumental", value: true }, + ]); + }); + it("uses mode-specific capabilities for edit requests", async () => { let seenRequest: | { diff --git a/src/music-generation/types.ts b/src/music-generation/types.ts index 064e31900ec..4094b6dd69d 100644 --- a/src/music-generation/types.ts +++ b/src/music-generation/types.ts @@ -57,7 +57,9 @@ export type MusicGenerationModeCapabilities = { maxTracks?: number; maxDurationSeconds?: number; supportsLyrics?: boolean; + supportsLyricsByModel?: Readonly>; supportsInstrumental?: boolean; + supportsInstrumentalByModel?: Readonly>; supportsDuration?: boolean; supportsFormat?: boolean; supportedFormats?: readonly MusicGenerationOutputFormat[]; diff --git a/src/pairing/setup-code.test.ts b/src/pairing/setup-code.test.ts index c8adb27e601..3dbfbbc8c51 100644 --- a/src/pairing/setup-code.test.ts +++ b/src/pairing/setup-code.test.ts @@ -551,6 +551,28 @@ describe("pairing setup code", () => { }); }); + it("allows tailnet bind setup urls when gateway TLS is enabled", async () => { + await expectResolvedSetupSuccessCase({ + config: { + gateway: { + bind: "tailnet", + tls: { + enabled: true, + }, + auth: { mode: "token", token: "tok_123" }, + }, + } satisfies ResolveSetupConfig, + options: { + networkInterfaces: () => createIpv4NetworkInterfaces("100.64.0.9"), + } satisfies ResolveSetupOptions, + expected: { + authLabel: "token", + url: "wss://100.64.0.9:18789", + urlSource: "gateway.bind=tailnet", + }, + }); + }); + it.each([ { name: "errors when gateway is loopback only", diff --git a/src/plugin-sdk/index.ts b/src/plugin-sdk/index.ts index 8e6e747ba8a..39f3e590a2b 100644 --- a/src/plugin-sdk/index.ts +++ b/src/plugin-sdk/index.ts @@ -103,7 +103,7 @@ export type { } from "../plugins/memory-state.js"; export type { CliBackendConfig } from "../config/types.js"; export type * from "./image-generation.js"; -export * from "./music-generation.js"; +export type * from "./music-generation.js"; export type { SecretInput, SecretRef } from "../config/types.secrets.js"; export type { RuntimeEnv } from "../runtime.js"; export type { HookEntry } from "../hooks/types.js"; diff --git a/src/plugin-sdk/music-generation.ts b/src/plugin-sdk/music-generation.ts index adb90e1ab2c..b5ce6686a66 100644 --- a/src/plugin-sdk/music-generation.ts +++ b/src/plugin-sdk/music-generation.ts @@ -12,3 +12,9 @@ export type { MusicGenerationSourceImage, MusicGenerationOutputFormat, } from "../music-generation/types.js"; +export { + downloadGeneratedMusicAsset, + extractGeneratedMusicFileCandidates, + generatedMusicAssetFromBase64, + type GeneratedMusicFileCandidate, +} from "../music-generation/provider-assets.js"; diff --git a/src/plugin-sdk/test-helpers/plugin-registration-contract-cases.ts b/src/plugin-sdk/test-helpers/plugin-registration-contract-cases.ts index 4133a90c5c2..aa1602fae0b 100644 --- a/src/plugin-sdk/test-helpers/plugin-registration-contract-cases.ts +++ b/src/plugin-sdk/test-helpers/plugin-registration-contract-cases.ts @@ -44,6 +44,7 @@ export const pluginRegistrationContractCases = { pluginId: "fal", providerIds: ["fal"], imageGenerationProviderIds: ["fal"], + musicGenerationProviderIds: ["fal"], }, firecrawl: { pluginId: "firecrawl", @@ -115,6 +116,7 @@ export const pluginRegistrationContractCases = { providerIds: ["openrouter"], mediaUnderstandingProviderIds: ["openrouter"], imageGenerationProviderIds: ["openrouter"], + musicGenerationProviderIds: ["openrouter"], videoGenerationProviderIds: ["openrouter"], requireDescribeImages: true, requireGenerateImage: true, diff --git a/src/plugins/inspect-shape.ts b/src/plugins/inspect-shape.ts index cab3be209f5..41f3ee003f8 100644 --- a/src/plugins/inspect-shape.ts +++ b/src/plugins/inspect-shape.ts @@ -9,6 +9,8 @@ export type PluginCapabilityKind = | "realtime-voice" | "media-understanding" | "image-generation" + | "video-generation" + | "music-generation" | "web-search" | "agent-harness" | "context-engine" @@ -44,6 +46,8 @@ function buildPluginCapabilityEntries( { kind: "realtime-voice" as const, ids: plugin.realtimeVoiceProviderIds }, { kind: "media-understanding" as const, ids: plugin.mediaUnderstandingProviderIds }, { kind: "image-generation" as const, ids: plugin.imageGenerationProviderIds }, + { kind: "video-generation" as const, ids: plugin.videoGenerationProviderIds }, + { kind: "music-generation" as const, ids: plugin.musicGenerationProviderIds }, { kind: "web-search" as const, ids: plugin.webSearchProviderIds }, { kind: "agent-harness" as const, ids: plugin.agentHarnessIds }, { diff --git a/src/plugins/manifest.ts b/src/plugins/manifest.ts index 8d680cfafa2..51125c69a98 100644 --- a/src/plugins/manifest.ts +++ b/src/plugins/manifest.ts @@ -502,7 +502,10 @@ export type PluginManifestProviderAuthChoice = { onboardingScopes?: PluginManifestOnboardingScope[]; }; -export type PluginManifestOnboardingScope = "text-inference" | "image-generation"; +export type PluginManifestOnboardingScope = + | "text-inference" + | "image-generation" + | "music-generation"; export type PluginManifestLoadResult = | { ok: true; manifest: PluginManifest; manifestPath: string } @@ -1365,7 +1368,7 @@ function normalizeProviderAuthChoices( const cliDescription = normalizeOptionalString(entry.cliDescription) ?? ""; const onboardingScopes = normalizeTrimmedStringList(entry.onboardingScopes).filter( (scope): scope is PluginManifestOnboardingScope => - scope === "text-inference" || scope === "image-generation", + scope === "text-inference" || scope === "image-generation" || scope === "music-generation", ); normalized.push({ provider, diff --git a/src/plugins/official-external-plugin-catalog.ts b/src/plugins/official-external-plugin-catalog.ts index 34f73806436..8d43060d4e8 100644 --- a/src/plugins/official-external-plugin-catalog.ts +++ b/src/plugins/official-external-plugin-catalog.ts @@ -26,7 +26,7 @@ export type OfficialExternalProviderAuthChoice = { cliFlag?: string; cliOption?: string; cliDescription?: string; - onboardingScopes?: readonly ("text-inference" | "image-generation")[]; + onboardingScopes?: readonly ("text-inference" | "image-generation" | "music-generation")[]; }; export type OfficialExternalProviderCatalogProvider = { diff --git a/src/plugins/provider-auth-choices.ts b/src/plugins/provider-auth-choices.ts index eaa8b65d493..2bf96cf5c8a 100644 --- a/src/plugins/provider-auth-choices.ts +++ b/src/plugins/provider-auth-choices.ts @@ -24,7 +24,7 @@ export type ProviderAuthChoiceMetadata = { cliFlag?: string; cliOption?: string; cliDescription?: string; - onboardingScopes?: ("text-inference" | "image-generation")[]; + onboardingScopes?: ("text-inference" | "image-generation" | "music-generation")[]; }; export type ProviderOnboardAuthFlag = { diff --git a/src/plugins/provider-install-catalog.ts b/src/plugins/provider-install-catalog.ts index da4c42b371f..55db4937b09 100644 --- a/src/plugins/provider-install-catalog.ts +++ b/src/plugins/provider-install-catalog.ts @@ -262,13 +262,15 @@ function resolveProviderIndexInstallCatalogEntries(params: { return entries; } -function isProviderFlowScope(value: unknown): value is "text-inference" | "image-generation" { - return value === "text-inference" || value === "image-generation"; +function isProviderFlowScope( + value: unknown, +): value is "text-inference" | "image-generation" | "music-generation" { + return value === "text-inference" || value === "image-generation" || value === "music-generation"; } function normalizeProviderAuthChoiceScopes( scopes: OfficialExternalProviderAuthChoice["onboardingScopes"], -): ("text-inference" | "image-generation")[] | undefined { +): ("text-inference" | "image-generation" | "music-generation")[] | undefined { if (!Array.isArray(scopes)) { return undefined; } diff --git a/src/plugins/provider-validation.ts b/src/plugins/provider-validation.ts index 9db93ae44b0..33f7cc402b1 100644 --- a/src/plugins/provider-validation.ts +++ b/src/plugins/provider-validation.ts @@ -16,13 +16,15 @@ function normalizeTextList(values: string[] | undefined): string[] | undefined { } function normalizeOnboardingScopes( - values: Array<"text-inference" | "image-generation"> | undefined, -): Array<"text-inference" | "image-generation"> | undefined { + values: Array<"text-inference" | "image-generation" | "music-generation"> | undefined, +): Array<"text-inference" | "image-generation" | "music-generation"> | undefined { const normalized = Array.from( new Set( (values ?? []).filter( - (value): value is "text-inference" | "image-generation" => - value === "text-inference" || value === "image-generation", + (value): value is "text-inference" | "image-generation" | "music-generation" => + value === "text-inference" || + value === "image-generation" || + value === "music-generation", ), ), ); diff --git a/src/plugins/provider-wizard.ts b/src/plugins/provider-wizard.ts index 61d16a0b930..a1a71640e49 100644 --- a/src/plugins/provider-wizard.ts +++ b/src/plugins/provider-wizard.ts @@ -24,7 +24,7 @@ export type ProviderWizardOption = { groupId: string; groupLabel: string; groupHint?: string; - onboardingScopes?: Array<"text-inference" | "image-generation">; + onboardingScopes?: Array<"text-inference" | "image-generation" | "music-generation">; assistantPriority?: number; assistantVisibility?: "visible" | "manual-only"; onboardingFeatured?: boolean; diff --git a/src/plugins/types.ts b/src/plugins/types.ts index 62f32b372de..f54a2b4a457 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -1129,7 +1129,7 @@ export type ProviderPluginWizardSetup = { * Interactive onboarding surfaces where this auth choice should appear. * Defaults to `["text-inference"]` when omitted. */ - onboardingScopes?: Array<"text-inference" | "image-generation">; + onboardingScopes?: Array<"text-inference" | "image-generation" | "music-generation">; /** * Optional model-allowlist prompt policy applied after this auth choice is * selected in configure/onboarding flows. diff --git a/test/test-env.ts b/test/test-env.ts index ba2677f29f6..91bf9497f6e 100644 --- a/test/test-env.ts +++ b/test/test-env.ts @@ -305,6 +305,7 @@ function sanitizeLiveConfig(raw: string): string { defaults?: Record; list?: Array>; }; + diagnostics?: Record; } = JSON5.parse(raw); if (!parsed || typeof parsed !== "object") { @@ -328,6 +329,10 @@ function sanitizeLiveConfig(raw: string): string { }); } + if (parsed.diagnostics && typeof parsed.diagnostics === "object") { + delete parsed.diagnostics.memoryPressureSnapshot; + } + if (!isTruthyEnvValue(process.env.OPENCLAW_LIVE_TEST_NORMALIZE_CONFIG)) { return `${JSON.stringify(parsed, null, 2)}\n`; }