feat: add fal and OpenRouter music generation (#82789)

* feat: add fal and OpenRouter music generation

* fix: repair music generation CI gates

* chore: refresh proof gate
This commit is contained in:
Peter Steinberger
2026-05-17 02:05:22 +01:00
committed by GitHub
parent 562d460d75
commit f453904165
54 changed files with 1535 additions and 87 deletions

View File

@@ -27,7 +27,18 @@ function summarizeMusicGenerationCapabilities(
edit?.maxInputImages ? `maxInputImages=${edit.maxInputImages}` : null,
generate?.maxDurationSeconds ? `maxDurationSeconds=${generate.maxDurationSeconds}` : null,
generate?.supportsLyrics ? "lyrics" : null,
generate?.supportsLyricsByModel && Object.keys(generate.supportsLyricsByModel).length > 0
? `supportsLyricsByModel=${Object.entries(generate.supportsLyricsByModel)
.map(([modelId, supported]) => `${modelId}:${supported}`)
.join("; ")}`
: null,
generate?.supportsInstrumental ? "instrumental" : null,
generate?.supportsInstrumentalByModel &&
Object.keys(generate.supportsInstrumentalByModel).length > 0
? `supportsInstrumentalByModel=${Object.entries(generate.supportsInstrumentalByModel)
.map(([modelId, supported]) => `${modelId}:${supported}`)
.join("; ")}`
: null,
generate?.supportsDuration ? "duration" : null,
generate?.supportsFormat ? "format" : null,
generate?.supportedFormats?.length

View File

@@ -110,6 +110,11 @@ export const cliCommandCatalog: readonly CliCommandCatalogEntry[] = [
exact: true,
policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" },
},
{
commandPath: ["config", "models"],
exact: true,
policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" },
},
{
commandPath: ["migrate"],
policy: { bypassConfigGuard: true, loadPlugins: "never", networkProxy: "bypass" },

View File

@@ -22,15 +22,15 @@ vi.mock("./auth-choice-legacy.js", () => ({
}));
function includesOnboardingScope(
scopes: readonly ("text-inference" | "image-generation")[] | undefined,
scope: "text-inference" | "image-generation",
scopes: readonly ("text-inference" | "image-generation" | "music-generation")[] | undefined,
scope: "text-inference" | "image-generation" | "music-generation",
): boolean {
return scopes ? scopes.includes(scope) : scope === "text-inference";
}
vi.mock("../flows/provider-flow.js", () => ({
resolveProviderSetupFlowContributions: vi.fn(
(params?: { scope?: "text-inference" | "image-generation" }) => {
(params?: { scope?: "text-inference" | "image-generation" | "music-generation" }) => {
const scope = params?.scope ?? "text-inference";
return [
...resolveManifestProviderAuthChoices()
@@ -619,7 +619,7 @@ describe("buildAuthChoiceOptions", () => {
expect(openCodeValues).toContain("opencode-go");
});
it("hides image-generation-only providers from the interactive auth picker", () => {
it("hides media-generation-only providers from the interactive auth picker", () => {
resolveManifestProviderAuthChoices.mockReturnValue([
{
pluginId: "fal",
@@ -631,6 +631,16 @@ describe("buildAuthChoiceOptions", () => {
groupLabel: "fal",
onboardingScopes: ["image-generation"],
},
{
pluginId: "openrouter",
providerId: "openrouter",
methodId: "api-key",
choiceId: "openrouter-api-key",
choiceLabel: "OpenRouter API key",
groupId: "openrouter",
groupLabel: "OpenRouter",
onboardingScopes: ["music-generation"],
},
{
pluginId: "openai",
providerId: "openai",
@@ -649,6 +659,13 @@ describe("buildAuthChoiceOptions", () => {
groupLabel: "Local image runtime",
onboardingScopes: ["image-generation"],
},
{
value: "local-music-runtime",
label: "Local music runtime",
groupId: "local-music-runtime",
groupLabel: "Local music runtime",
onboardingScopes: ["music-generation"],
},
{
value: "ollama",
label: "Ollama",
@@ -663,6 +680,8 @@ describe("buildAuthChoiceOptions", () => {
expect(optionValues).toContain("openai-api-key");
expect(optionValues).toContain("ollama");
expect(optionValues).not.toContain("fal-api-key");
expect(optionValues).not.toContain("openrouter-api-key");
expect(optionValues).not.toContain("local-image-runtime");
expect(optionValues).not.toContain("local-music-runtime");
});
});

View File

@@ -5,7 +5,7 @@ import * as providerInstallCatalog from "../plugins/provider-install-catalog.js"
import type { FlowContribution, FlowOption } from "./types.js";
import { sortFlowContributionsByLabel } from "./types.js";
type ProviderFlowScope = "text-inference" | "image-generation";
type ProviderFlowScope = "text-inference" | "image-generation" | "music-generation";
const DEFAULT_PROVIDER_FLOW_SCOPE: ProviderFlowScope = "text-inference";

View File

@@ -36,6 +36,24 @@ describe("checkBrowserOrigin", () => {
},
expected: { ok: true as const, matchedBy: "private-same-origin" as const },
},
{
name: "accepts same-origin loopback host for local clients",
input: {
requestHost: "127.0.0.1:18789",
origin: "http://127.0.0.1:18789",
isLocalClient: true,
},
expected: { ok: true as const, matchedBy: "private-same-origin" as const },
},
{
name: "rejects same-origin loopback host for non-local clients",
input: {
requestHost: "127.0.0.1:18789",
origin: "http://127.0.0.1:18789",
isLocalClient: false,
},
expected: { ok: false as const, reason: "origin not allowed" },
},
{
name: "rejects same-origin public host without dangerous fallback",
input: {

View File

@@ -18,7 +18,13 @@ const EXPECTED_BUNDLED_VIDEO_PROVIDER_PLUGIN_IDS = [
"xai",
] as const;
const EXPECTED_BUNDLED_MUSIC_PROVIDER_PLUGIN_IDS = ["comfy", "google", "minimax"] as const;
const EXPECTED_BUNDLED_MUSIC_PROVIDER_PLUGIN_IDS = [
"comfy",
"fal",
"google",
"minimax",
"openrouter",
] as const;
const EXPECTED_BUNDLED_VIDEO_PROVIDER_IDS_BY_PLUGIN: Record<string, readonly string[]> = {
minimax: ["minimax", "minimax-portal"],

View File

@@ -207,6 +207,32 @@ describe("media-generation runtime shared candidates", () => {
expect(candidates).toEqual([{ provider: "fal", model: "fal-ai/flux/dev" }]);
});
it("prefers explicit provider refs over colliding slash-containing model IDs", () => {
const candidates = resolveCapabilityModelCandidates({
cfg: {} as OpenClawConfig,
modelConfig: {
primary: "google/lyria-3-pro-preview",
},
parseModelRef,
listProviders: () => [
{
id: "google",
defaultModel: "lyria-3-clip-preview",
models: ["lyria-3-clip-preview", "lyria-3-pro-preview"],
isConfigured: () => true,
},
{
id: "openrouter",
defaultModel: "google/lyria-3-clip-preview",
models: ["google/lyria-3-clip-preview", "google/lyria-3-pro-preview"],
isConfigured: () => true,
},
],
});
expect(candidates[0]).toEqual({ provider: "google", model: "lyria-3-pro-preview" });
});
});
describe("media-generation runtime shared normalization", () => {

View File

@@ -178,6 +178,21 @@ function resolveProviderModelOnlyRef(params: {
return provider ? { provider: provider.id, model } : null;
}
function hasCapabilityProviderId(params: {
providerId: string | undefined;
providers: CapabilityProviderCandidate[];
}): boolean {
const providerId = normalizeOptionalString(params.providerId);
if (!providerId) {
return false;
}
return params.providers.some(
(provider) =>
provider.id === providerId ||
(provider.aliases ?? []).some((alias) => normalizeOptionalString(alias) === providerId),
);
}
export function resolveCapabilityModelCandidates(params: {
cfg: OpenClawConfig;
modelConfig: AgentModelConfig | undefined;
@@ -203,6 +218,15 @@ export function resolveCapabilityModelCandidates(params: {
if (!options.useProviderMetadata) {
return parsed;
}
if (
parsed &&
hasCapabilityProviderId({
providerId: parsed.provider,
providers: getProviders(),
})
) {
return parsed;
}
return resolveProviderModelOnlyRef({ raw: trimmed, providers: getProviders() }) ?? parsed;
};
const add = (raw: string | undefined, options: { useProviderMetadata: boolean }) => {

View File

@@ -95,8 +95,8 @@ function normalizeOnboardingScopes(
value: unknown,
): OpenClawProviderIndexProviderAuthChoice["onboardingScopes"] | undefined {
const scopes = normalizeTrimmedStringList(value).filter(
(scope): scope is "text-inference" | "image-generation" =>
scope === "text-inference" || scope === "image-generation",
(scope): scope is "text-inference" | "image-generation" | "music-generation" =>
scope === "text-inference" || scope === "image-generation" || scope === "music-generation",
);
return scopes.length > 0 ? [...new Set(scopes)] : undefined;
}

View File

@@ -29,7 +29,7 @@ export type OpenClawProviderIndexProviderAuthChoice = {
cliFlag?: string;
cliOption?: string;
cliDescription?: string;
onboardingScopes?: readonly ("text-inference" | "image-generation")[];
onboardingScopes?: readonly ("text-inference" | "image-generation" | "music-generation")[];
};
export type OpenClawProviderIndexProvider = {

View File

@@ -10,8 +10,10 @@ import {
export { parseProviderModelMap, redactLiveApiKey };
export const DEFAULT_LIVE_MUSIC_MODELS: Record<string, string> = {
fal: "fal/fal-ai/minimax-music/v2.6",
google: "google/lyria-3-clip-preview",
minimax: "minimax/music-2.6",
openrouter: "openrouter/google/lyria-3-pro-preview",
};
export function parseCsvFilter(raw?: string): Set<string> | null {

View File

@@ -20,6 +20,14 @@ type ResolvedMusicGenerationOverrides = {
normalization?: MusicGenerationNormalization;
};
function resolveModelBooleanSupport(
model: string,
defaultSupport: boolean | undefined,
supportByModel: Readonly<Record<string, boolean>> | undefined,
): boolean {
return supportByModel?.[model] ?? defaultSupport === true;
}
export function resolveMusicGenerationOverrides(params: {
provider: MusicGenerationProvider;
model: string;
@@ -50,12 +58,22 @@ export function resolveMusicGenerationOverrides(params: {
};
}
if (lyrics?.trim() && !caps.supportsLyrics) {
if (
lyrics?.trim() &&
!resolveModelBooleanSupport(params.model, caps.supportsLyrics, caps.supportsLyricsByModel)
) {
ignoredOverrides.push({ key: "lyrics", value: lyrics });
lyrics = undefined;
}
if (typeof instrumental === "boolean" && !caps.supportsInstrumental) {
if (
typeof instrumental === "boolean" &&
!resolveModelBooleanSupport(
params.model,
caps.supportsInstrumental,
caps.supportsInstrumentalByModel,
)
) {
ignoredOverrides.push({ key: "instrumental", value: instrumental });
instrumental = undefined;
}

View File

@@ -0,0 +1,110 @@
import { fetchProviderDownloadResponse } from "../media-understanding/shared.js";
import { extensionForMime } from "../media/mime.js";
import { normalizeOptionalString } from "../shared/string-coerce.js";
import type { GeneratedMusicAsset } from "./types.js";
export type GeneratedMusicFileCandidate = {
url: string;
mimeType?: string;
fileName?: string;
};
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === "object" && !Array.isArray(value));
}
function normalizeSpecificAudioMimeType(value: unknown): string | undefined {
const mimeType = normalizeOptionalString(value)?.split(";")[0]?.trim().toLowerCase();
if (!mimeType || mimeType === "application/octet-stream" || mimeType === "binary/octet-stream") {
return undefined;
}
return mimeType;
}
function pushGeneratedMusicFileCandidate(
candidates: GeneratedMusicFileCandidate[],
value: unknown,
): void {
if (typeof value === "string") {
const url = normalizeOptionalString(value);
if (url) {
candidates.push({ url });
}
return;
}
if (!isRecord(value)) {
return;
}
const url = normalizeOptionalString(value.url);
if (!url) {
return;
}
candidates.push({
url,
...(normalizeOptionalString(value.content_type)
? { mimeType: normalizeOptionalString(value.content_type) }
: {}),
...(normalizeOptionalString(value.file_name)
? { fileName: normalizeOptionalString(value.file_name) }
: {}),
});
}
export function extractGeneratedMusicFileCandidates(
payload: unknown,
keys: readonly string[] = ["audio", "audio_file"],
): GeneratedMusicFileCandidate[] {
if (!isRecord(payload)) {
return [];
}
const candidates: GeneratedMusicFileCandidate[] = [];
for (const key of keys) {
pushGeneratedMusicFileCandidate(candidates, payload[key]);
}
return candidates;
}
export function generatedMusicAssetFromBase64(params: {
base64: string;
mimeType: string;
index?: number;
fileName?: string;
}): GeneratedMusicAsset {
const ext = extensionForMime(params.mimeType)?.replace(/^\./u, "") || "mp3";
return {
buffer: Buffer.from(params.base64, "base64"),
mimeType: params.mimeType,
fileName: params.fileName ?? `track-${(params.index ?? 0) + 1}.${ext}`,
};
}
export async function downloadGeneratedMusicAsset(params: {
candidate: GeneratedMusicFileCandidate;
timeoutMs: number;
fetchFn: typeof fetch;
provider: string;
requestFailedMessage: string;
index?: number;
}): Promise<GeneratedMusicAsset> {
const response = await fetchProviderDownloadResponse({
url: params.candidate.url,
init: { method: "GET" },
timeoutMs: params.timeoutMs,
fetchFn: params.fetchFn,
provider: params.provider,
requestFailedMessage: params.requestFailedMessage,
});
const mimeType =
normalizeSpecificAudioMimeType(response.headers.get("content-type")) ??
normalizeSpecificAudioMimeType(params.candidate.mimeType) ??
"audio/mpeg";
const ext = extensionForMime(mimeType)?.replace(/^\./u, "") || "mp3";
return {
buffer: Buffer.from(await response.arrayBuffer()),
mimeType,
fileName: params.candidate.fileName ?? `track-${(params.index ?? 0) + 1}.${ext}`,
metadata: {
url: params.candidate.url,
},
};
}

View File

@@ -281,6 +281,64 @@ describe("music-generation runtime", () => {
]);
});
it("ignores model-specific unsupported lyrics and instrumental overrides", async () => {
let seenRequest:
| {
lyrics?: string;
instrumental?: boolean;
}
| undefined;
providers = [
{
id: "fal",
capabilities: {
generate: {
supportsLyrics: true,
supportsLyricsByModel: {
"fal-ai/stable-audio-25/text-to-audio": false,
},
supportsInstrumental: true,
supportsInstrumentalByModel: {
"fal-ai/stable-audio-25/text-to-audio": false,
},
},
},
generateMusic: async (req) => {
seenRequest = {
lyrics: req.lyrics,
instrumental: req.instrumental,
};
return {
tracks: [{ buffer: Buffer.from("wav-bytes"), mimeType: "audio/wav" }],
model: "fal-ai/stable-audio-25/text-to-audio",
};
},
},
];
const result = await runGenerateMusic({
cfg: {
agents: {
defaults: {
musicGenerationModel: { primary: "fal/fal-ai/stable-audio-25/text-to-audio" },
},
},
} as OpenClawConfig,
prompt: "orchestral hit",
lyrics: "rise up",
instrumental: true,
});
expect(seenRequest).toEqual({
lyrics: undefined,
instrumental: undefined,
});
expect(result.ignoredOverrides).toEqual([
{ key: "lyrics", value: "rise up" },
{ key: "instrumental", value: true },
]);
});
it("uses mode-specific capabilities for edit requests", async () => {
let seenRequest:
| {

View File

@@ -57,7 +57,9 @@ export type MusicGenerationModeCapabilities = {
maxTracks?: number;
maxDurationSeconds?: number;
supportsLyrics?: boolean;
supportsLyricsByModel?: Readonly<Record<string, boolean>>;
supportsInstrumental?: boolean;
supportsInstrumentalByModel?: Readonly<Record<string, boolean>>;
supportsDuration?: boolean;
supportsFormat?: boolean;
supportedFormats?: readonly MusicGenerationOutputFormat[];

View File

@@ -551,6 +551,28 @@ describe("pairing setup code", () => {
});
});
it("allows tailnet bind setup urls when gateway TLS is enabled", async () => {
await expectResolvedSetupSuccessCase({
config: {
gateway: {
bind: "tailnet",
tls: {
enabled: true,
},
auth: { mode: "token", token: "tok_123" },
},
} satisfies ResolveSetupConfig,
options: {
networkInterfaces: () => createIpv4NetworkInterfaces("100.64.0.9"),
} satisfies ResolveSetupOptions,
expected: {
authLabel: "token",
url: "wss://100.64.0.9:18789",
urlSource: "gateway.bind=tailnet",
},
});
});
it.each([
{
name: "errors when gateway is loopback only",

View File

@@ -103,7 +103,7 @@ export type {
} from "../plugins/memory-state.js";
export type { CliBackendConfig } from "../config/types.js";
export type * from "./image-generation.js";
export * from "./music-generation.js";
export type * from "./music-generation.js";
export type { SecretInput, SecretRef } from "../config/types.secrets.js";
export type { RuntimeEnv } from "../runtime.js";
export type { HookEntry } from "../hooks/types.js";

View File

@@ -12,3 +12,9 @@ export type {
MusicGenerationSourceImage,
MusicGenerationOutputFormat,
} from "../music-generation/types.js";
export {
downloadGeneratedMusicAsset,
extractGeneratedMusicFileCandidates,
generatedMusicAssetFromBase64,
type GeneratedMusicFileCandidate,
} from "../music-generation/provider-assets.js";

View File

@@ -44,6 +44,7 @@ export const pluginRegistrationContractCases = {
pluginId: "fal",
providerIds: ["fal"],
imageGenerationProviderIds: ["fal"],
musicGenerationProviderIds: ["fal"],
},
firecrawl: {
pluginId: "firecrawl",
@@ -115,6 +116,7 @@ export const pluginRegistrationContractCases = {
providerIds: ["openrouter"],
mediaUnderstandingProviderIds: ["openrouter"],
imageGenerationProviderIds: ["openrouter"],
musicGenerationProviderIds: ["openrouter"],
videoGenerationProviderIds: ["openrouter"],
requireDescribeImages: true,
requireGenerateImage: true,

View File

@@ -9,6 +9,8 @@ export type PluginCapabilityKind =
| "realtime-voice"
| "media-understanding"
| "image-generation"
| "video-generation"
| "music-generation"
| "web-search"
| "agent-harness"
| "context-engine"
@@ -44,6 +46,8 @@ function buildPluginCapabilityEntries(
{ kind: "realtime-voice" as const, ids: plugin.realtimeVoiceProviderIds },
{ kind: "media-understanding" as const, ids: plugin.mediaUnderstandingProviderIds },
{ kind: "image-generation" as const, ids: plugin.imageGenerationProviderIds },
{ kind: "video-generation" as const, ids: plugin.videoGenerationProviderIds },
{ kind: "music-generation" as const, ids: plugin.musicGenerationProviderIds },
{ kind: "web-search" as const, ids: plugin.webSearchProviderIds },
{ kind: "agent-harness" as const, ids: plugin.agentHarnessIds },
{

View File

@@ -502,7 +502,10 @@ export type PluginManifestProviderAuthChoice = {
onboardingScopes?: PluginManifestOnboardingScope[];
};
export type PluginManifestOnboardingScope = "text-inference" | "image-generation";
export type PluginManifestOnboardingScope =
| "text-inference"
| "image-generation"
| "music-generation";
export type PluginManifestLoadResult =
| { ok: true; manifest: PluginManifest; manifestPath: string }
@@ -1365,7 +1368,7 @@ function normalizeProviderAuthChoices(
const cliDescription = normalizeOptionalString(entry.cliDescription) ?? "";
const onboardingScopes = normalizeTrimmedStringList(entry.onboardingScopes).filter(
(scope): scope is PluginManifestOnboardingScope =>
scope === "text-inference" || scope === "image-generation",
scope === "text-inference" || scope === "image-generation" || scope === "music-generation",
);
normalized.push({
provider,

View File

@@ -26,7 +26,7 @@ export type OfficialExternalProviderAuthChoice = {
cliFlag?: string;
cliOption?: string;
cliDescription?: string;
onboardingScopes?: readonly ("text-inference" | "image-generation")[];
onboardingScopes?: readonly ("text-inference" | "image-generation" | "music-generation")[];
};
export type OfficialExternalProviderCatalogProvider = {

View File

@@ -24,7 +24,7 @@ export type ProviderAuthChoiceMetadata = {
cliFlag?: string;
cliOption?: string;
cliDescription?: string;
onboardingScopes?: ("text-inference" | "image-generation")[];
onboardingScopes?: ("text-inference" | "image-generation" | "music-generation")[];
};
export type ProviderOnboardAuthFlag = {

View File

@@ -262,13 +262,15 @@ function resolveProviderIndexInstallCatalogEntries(params: {
return entries;
}
function isProviderFlowScope(value: unknown): value is "text-inference" | "image-generation" {
return value === "text-inference" || value === "image-generation";
function isProviderFlowScope(
value: unknown,
): value is "text-inference" | "image-generation" | "music-generation" {
return value === "text-inference" || value === "image-generation" || value === "music-generation";
}
function normalizeProviderAuthChoiceScopes(
scopes: OfficialExternalProviderAuthChoice["onboardingScopes"],
): ("text-inference" | "image-generation")[] | undefined {
): ("text-inference" | "image-generation" | "music-generation")[] | undefined {
if (!Array.isArray(scopes)) {
return undefined;
}

View File

@@ -16,13 +16,15 @@ function normalizeTextList(values: string[] | undefined): string[] | undefined {
}
function normalizeOnboardingScopes(
values: Array<"text-inference" | "image-generation"> | undefined,
): Array<"text-inference" | "image-generation"> | undefined {
values: Array<"text-inference" | "image-generation" | "music-generation"> | undefined,
): Array<"text-inference" | "image-generation" | "music-generation"> | undefined {
const normalized = Array.from(
new Set(
(values ?? []).filter(
(value): value is "text-inference" | "image-generation" =>
value === "text-inference" || value === "image-generation",
(value): value is "text-inference" | "image-generation" | "music-generation" =>
value === "text-inference" ||
value === "image-generation" ||
value === "music-generation",
),
),
);

View File

@@ -24,7 +24,7 @@ export type ProviderWizardOption = {
groupId: string;
groupLabel: string;
groupHint?: string;
onboardingScopes?: Array<"text-inference" | "image-generation">;
onboardingScopes?: Array<"text-inference" | "image-generation" | "music-generation">;
assistantPriority?: number;
assistantVisibility?: "visible" | "manual-only";
onboardingFeatured?: boolean;

View File

@@ -1129,7 +1129,7 @@ export type ProviderPluginWizardSetup = {
* Interactive onboarding surfaces where this auth choice should appear.
* Defaults to `["text-inference"]` when omitted.
*/
onboardingScopes?: Array<"text-inference" | "image-generation">;
onboardingScopes?: Array<"text-inference" | "image-generation" | "music-generation">;
/**
* Optional model-allowlist prompt policy applied after this auth choice is
* selected in configure/onboarding flows.