feat: preserve media intent across provider fallback

This commit is contained in:
Peter Steinberger
2026-04-06 23:18:34 +01:00
parent ee04ba0386
commit a463a33eee
27 changed files with 1354 additions and 79 deletions

View File

@@ -732,6 +732,47 @@ describe("createImageGenerateTool", () => {
});
});
it("surfaces normalized image geometry from runtime metadata", async () => {
vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
provider: "minimax",
model: "image-01",
attempts: [],
ignoredOverrides: [],
images: [
{
buffer: Buffer.from("png-out"),
mimeType: "image/png",
fileName: "generated.png",
},
],
metadata: {
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
},
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValue({
path: "/tmp/generated.png",
id: "generated.png",
size: 7,
contentType: "image/png",
});
const tool = createToolWithPrimaryImageModel("minimax/image-01");
const result = await tool.execute("call-minimax-generate", {
prompt: "A lobster at the movies",
size: "1280x720",
});
expect(result.details).toMatchObject({
aspectRatio: "16:9",
metadata: {
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
},
});
expect(result.details).not.toHaveProperty("size");
});
it("rejects unsupported aspect ratios", async () => {
const tool = createImageGenerateTool({
config: {

View File

@@ -615,6 +615,25 @@ export function createImageGenerateTool(options?: {
ignoredOverrides.length > 0
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredImageGenerationOverride).join(", ")}.`
: undefined;
const normalizedSize =
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
? result.metadata.normalizedSize
: undefined;
const normalizedAspectRatio =
typeof result.metadata?.normalizedAspectRatio === "string" &&
result.metadata.normalizedAspectRatio.trim()
? result.metadata.normalizedAspectRatio
: undefined;
const normalizedResolution =
typeof result.metadata?.normalizedResolution === "string" &&
result.metadata.normalizedResolution.trim()
? result.metadata.normalizedResolution
: undefined;
const sizeTranslatedToAspectRatio =
!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === size &&
Boolean(normalizedAspectRatio);
const savedImages = await Promise.all(
result.images.map((image) =>
@@ -664,9 +683,15 @@ export function createImageGenerateTool(options?: {
})),
}
: {}),
...(resolution ? { resolution } : {}),
...(size ? { size } : {}),
...(aspectRatio ? { aspectRatio } : {}),
...(normalizedResolution || resolution
? { resolution: normalizedResolution ?? resolution }
: {}),
...(normalizedSize || (size && !sizeTranslatedToAspectRatio)
? { size: normalizedSize ?? size }
: {}),
...(normalizedAspectRatio || aspectRatio
? { aspectRatio: normalizedAspectRatio ?? aspectRatio }
: {}),
...(filename ? { filename } : {}),
attempts: result.attempts,
metadata: result.metadata,

View File

@@ -355,4 +355,55 @@ describe("createMusicGenerateTool", () => {
expect(result.details).not.toHaveProperty("durationSeconds");
expect(result.details).not.toHaveProperty("format");
});
it("surfaces normalized durations from runtime metadata", async () => {
vi.spyOn(musicGenerationRuntime, "generateMusic").mockResolvedValue({
provider: "minimax",
model: "music-2.5+",
attempts: [],
ignoredOverrides: [],
tracks: [
{
buffer: Buffer.from("music-bytes"),
mimeType: "audio/mpeg",
fileName: "night-drive.mp3",
},
],
metadata: {
requestedDurationSeconds: 45,
normalizedDurationSeconds: 30,
},
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-night-drive.mp3",
id: "generated-night-drive.mp3",
size: 11,
contentType: "audio/mpeg",
});
const tool = createMusicGenerateTool({
config: asConfig({
agents: {
defaults: {
musicGenerationModel: { primary: "minimax/music-2.5+" },
},
},
}),
});
if (!tool) {
throw new Error("expected music_generate tool");
}
const result = await tool.execute("call-1", {
prompt: "night-drive synthwave",
durationSeconds: 45,
});
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(text).toContain("Duration normalized: requested 45s; used 30s.");
expect(result.details).toMatchObject({
durationSeconds: 30,
requestedDurationSeconds: 45,
});
});
});

View File

@@ -236,17 +236,6 @@ function validateMusicGenerationCapabilities(params: {
if (!caps) {
return;
}
if (
typeof params.durationSeconds === "number" &&
caps.supportsDuration &&
typeof caps.maxDurationSeconds === "number"
) {
if (params.durationSeconds > caps.maxDurationSeconds) {
throw new ToolInputError(
`${provider.id} supports at most ${caps.maxDurationSeconds} seconds per track.`,
);
}
}
}
type MusicGenerateSandboxConfig = {
@@ -418,6 +407,21 @@ async function executeMusicGenerationJob(params: {
);
const ignoredOverrides = result.ignoredOverrides ?? [];
const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key));
const requestedDurationSeconds =
typeof result.metadata?.requestedDurationSeconds === "number" &&
Number.isFinite(result.metadata.requestedDurationSeconds)
? result.metadata.requestedDurationSeconds
: params.durationSeconds;
const runtimeNormalizedDurationSeconds =
typeof result.metadata?.normalizedDurationSeconds === "number" &&
Number.isFinite(result.metadata.normalizedDurationSeconds)
? result.metadata.normalizedDurationSeconds
: undefined;
const appliedDurationSeconds =
runtimeNormalizedDurationSeconds ??
(!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number"
? params.durationSeconds
: undefined);
const warning =
ignoredOverrides.length > 0
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map((entry) => `${entry.key}=${String(entry.value)}`).join(", ")}.`
@@ -425,9 +429,14 @@ async function executeMusicGenerationJob(params: {
const lines = [
`Generated ${savedTracks.length} track${savedTracks.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
...(warning ? [`Warning: ${warning}`] : []),
typeof requestedDurationSeconds === "number" &&
typeof appliedDurationSeconds === "number" &&
requestedDurationSeconds !== appliedDurationSeconds
? `Duration normalized: requested ${requestedDurationSeconds}s; used ${appliedDurationSeconds}s.`
: null,
...(result.lyrics?.length ? ["Lyrics returned.", ...result.lyrics] : []),
...savedTracks.map((track) => `MEDIA:${track.path}`),
];
].filter((entry): entry is string => Boolean(entry));
return {
provider: result.provider,
model: result.model,
@@ -456,8 +465,13 @@ async function executeMusicGenerationJob(params: {
...(!ignoredOverrideKeys.has("instrumental") && typeof params.instrumental === "boolean"
? { instrumental: params.instrumental }
: {}),
...(!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number"
? { durationSeconds: params.durationSeconds }
...(typeof appliedDurationSeconds === "number"
? { durationSeconds: appliedDurationSeconds }
: {}),
...(typeof requestedDurationSeconds === "number" &&
typeof appliedDurationSeconds === "number" &&
requestedDurationSeconds !== appliedDurationSeconds
? { requestedDurationSeconds }
: {}),
...(!ignoredOverrideKeys.has("format") && params.format ? { format: params.format } : {}),
...(params.filename ? { filename: params.filename } : {}),

View File

@@ -298,6 +298,59 @@ describe("createVideoGenerateTool", () => {
});
});
it("surfaces normalized video geometry from runtime metadata", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "runway",
model: "gen4.5",
attempts: [],
ignoredOverrides: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
metadata: {
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
},
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-lobster.mp4",
id: "generated-lobster.mp4",
size: 11,
contentType: "video/mp4",
});
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "runway/gen4.5" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-1", {
prompt: "friendly lobster surfing",
size: "1280x720",
});
expect(result.details).toMatchObject({
aspectRatio: "16:9",
metadata: {
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
},
});
expect(result.details).not.toHaveProperty("size");
});
it("lists supported provider durations when advertised", async () => {
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([
{

View File

@@ -11,7 +11,6 @@ import {
resolveVideoGenerationMode,
resolveVideoGenerationModeCapabilities,
} from "../../video-generation/capabilities.js";
import { resolveVideoGenerationSupportedDurations } from "../../video-generation/duration-support.js";
import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js";
import {
generateVideo,
@@ -327,22 +326,6 @@ function validateVideoGenerationCapabilities(params: {
);
}
}
if (
typeof params.durationSeconds === "number" &&
Number.isFinite(params.durationSeconds) &&
!resolveVideoGenerationSupportedDurations({
provider,
model: params.model,
inputImageCount: params.inputImageCount,
inputVideoCount: params.inputVideoCount,
}) &&
typeof caps.maxDurationSeconds === "number" &&
params.durationSeconds > caps.maxDurationSeconds
) {
throw new ToolInputError(
`${provider.id} supports at most ${caps.maxDurationSeconds} seconds per video.`,
);
}
}
function formatIgnoredVideoGenerationOverride(override: VideoGenerationIgnoredOverride): string {
@@ -566,6 +549,25 @@ async function executeVideoGenerationJob(params: {
(entry): entry is number => typeof entry === "number" && Number.isFinite(entry),
)
: undefined;
const normalizedSize =
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
? result.metadata.normalizedSize
: undefined;
const normalizedAspectRatio =
typeof result.metadata?.normalizedAspectRatio === "string" &&
result.metadata.normalizedAspectRatio.trim()
? result.metadata.normalizedAspectRatio
: undefined;
const normalizedResolution =
typeof result.metadata?.normalizedResolution === "string" &&
result.metadata.normalizedResolution.trim()
? result.metadata.normalizedResolution
: undefined;
const sizeTranslatedToAspectRatio =
!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === params.size &&
Boolean(normalizedAspectRatio);
const lines = [
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
...(warning ? [`Warning: ${warning}`] : []),
@@ -629,12 +631,15 @@ async function executeVideoGenerationJob(params: {
})),
}
: {}),
...(!ignoredOverrideKeys.has("size") && params.size ? { size: params.size } : {}),
...(!ignoredOverrideKeys.has("aspectRatio") && params.aspectRatio
? { aspectRatio: params.aspectRatio }
...(normalizedSize ||
(!ignoredOverrideKeys.has("size") && params.size && !sizeTranslatedToAspectRatio)
? { size: normalizedSize ?? params.size }
: {}),
...(!ignoredOverrideKeys.has("resolution") && params.resolution
? { resolution: params.resolution }
...(normalizedAspectRatio || (!ignoredOverrideKeys.has("aspectRatio") && params.aspectRatio)
? { aspectRatio: normalizedAspectRatio ?? params.aspectRatio }
: {}),
...(normalizedResolution || (!ignoredOverrideKeys.has("resolution") && params.resolution)
? { resolution: normalizedResolution ?? params.resolution }
: {}),
...(typeof normalizedDurationSeconds === "number"
? { durationSeconds: normalizedDurationSeconds }

View File

@@ -60,5 +60,6 @@ describe("generated base config schema", () => {
expect(agentDefaultsProperties?.videoGenerationModel).toBeDefined();
expect(uiHints["agents.defaults.videoGenerationModel.primary"]).toBeDefined();
expect(uiHints["agents.defaults.videoGenerationModel.fallbacks"]).toBeDefined();
expect(uiHints["agents.defaults.mediaGenerationAutoProviderFallback"]).toBeDefined();
});
});

View File

@@ -3080,6 +3080,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
},
],
},
mediaGenerationAutoProviderFallback: {
type: "boolean",
title: "Media Generation Auto Provider Fallback",
description:
"When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
},
pdfModel: {
anyOf: [
{
@@ -24995,6 +25001,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Ordered fallback music-generation models (provider/model).",
tags: ["reliability"],
},
"agents.defaults.mediaGenerationAutoProviderFallback": {
label: "Media Generation Auto Provider Fallback",
help: "When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
tags: ["reliability"],
},
"agents.defaults.pdfModel.primary": {
label: "PDF Model",
help: "Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",

View File

@@ -1097,6 +1097,8 @@ export const FIELD_HELP: Record<string, string> = {
"Optional music-generation model (provider/model) used by the shared music generation capability.",
"agents.defaults.musicGenerationModel.fallbacks":
"Ordered fallback music-generation models (provider/model).",
"agents.defaults.mediaGenerationAutoProviderFallback":
"When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
"agents.defaults.pdfModel.primary":
"Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",
"agents.defaults.pdfModel.fallbacks": "Ordered fallback PDF models (provider/model).",

View File

@@ -500,6 +500,7 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.videoGenerationModel.fallbacks": "Video Generation Model Fallbacks",
"agents.defaults.musicGenerationModel.primary": "Music Generation Model",
"agents.defaults.musicGenerationModel.fallbacks": "Music Generation Model Fallbacks",
"agents.defaults.mediaGenerationAutoProviderFallback": "Media Generation Auto Provider Fallback",
"agents.defaults.pdfModel.primary": "PDF Model",
"agents.defaults.pdfModel.fallbacks": "PDF Model Fallbacks",
"agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)",

View File

@@ -132,6 +132,13 @@ export type AgentDefaultsConfig = {
videoGenerationModel?: AgentModelConfig;
/** Optional music-generation model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
musicGenerationModel?: AgentModelConfig;
/**
* When true (default), shared image/music/video generation appends other
* auth-backed provider defaults after explicit primary/fallback refs. Set to
* false to disable implicit cross-provider fallback while keeping explicit
* fallbacks.
*/
mediaGenerationAutoProviderFallback?: boolean;
/** Optional PDF-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
pdfModel?: AgentModelConfig;
/** Maximum PDF file size in megabytes (default: 10). */

View File

@@ -23,6 +23,14 @@ describe("agent defaults schema", () => {
).not.toThrow();
});
it("accepts mediaGenerationAutoProviderFallback", () => {
expect(() =>
AgentDefaultsSchema.parse({
mediaGenerationAutoProviderFallback: false,
}),
).not.toThrow();
});
it("accepts contextInjection: always", () => {
const result = AgentDefaultsSchema.parse({ contextInjection: "always" })!;
expect(result.contextInjection).toBe("always");

View File

@@ -23,6 +23,7 @@ export const AgentDefaultsSchema = z
imageGenerationModel: AgentModelSchema.optional(),
videoGenerationModel: AgentModelSchema.optional(),
musicGenerationModel: AgentModelSchema.optional(),
mediaGenerationAutoProviderFallback: z.boolean().optional(),
pdfModel: AgentModelSchema.optional(),
pdfMaxBytesMb: z.number().positive().optional(),
pdfMaxPages: z.number().int().positive().optional(),

View File

@@ -12,6 +12,7 @@ const mocks = vi.hoisted(() => {
(providerId: string, config?: OpenClawConfig) => ImageGenerationProvider | undefined
>(() => undefined),
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
listImageGenerationProviders: vi.fn<(config?: OpenClawConfig) => ImageGenerationProvider[]>(
() => [],
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
vi.mock("../logging/subsystem.js", () => ({
createSubsystemLogger: mocks.createSubsystemLogger,
}));
vi.mock("../secrets/provider-env-vars.js", () => ({
getProviderEnvVars: mocks.getProviderEnvVars,
}));
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
return {
...actual,
getProviderEnvVars: mocks.getProviderEnvVars,
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
};
});
vi.mock("./model-ref.js", () => ({
parseImageGenerationModelRef: mocks.parseImageGenerationModelRef,
}));
@@ -67,6 +73,8 @@ describe("image-generation runtime", () => {
mocks.getImageGenerationProvider.mockReset();
mocks.getProviderEnvVars.mockReset();
mocks.getProviderEnvVars.mockReturnValue([]);
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
mocks.isFailoverError.mockReset();
mocks.isFailoverError.mockReturnValue(false);
mocks.listImageGenerationProviders.mockReset();
@@ -132,6 +140,80 @@ describe("image-generation runtime", () => {
expect(result.ignoredOverrides).toEqual([]);
});
it("auto-detects and falls through to another configured image-generation provider by default", async () => {
mocks.getImageGenerationProvider.mockImplementation((providerId: string) => {
if (providerId === "openai") {
return {
id: "openai",
defaultModel: "gpt-image-1",
capabilities: {
generate: {},
edit: { enabled: true },
},
isConfigured: () => true,
async generateImage() {
throw new Error("OpenAI API key missing");
},
};
}
if (providerId === "google") {
return {
id: "google",
defaultModel: "gemini-3.1-flash-image-preview",
capabilities: {
generate: {},
edit: { enabled: true },
},
isConfigured: () => true,
async generateImage() {
return {
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
model: "gemini-3.1-flash-image-preview",
};
},
};
}
return undefined;
});
mocks.listImageGenerationProviders.mockReturnValue([
{
id: "openai",
defaultModel: "gpt-image-1",
capabilities: {
generate: {},
edit: { enabled: true },
},
isConfigured: () => true,
generateImage: async () => ({ images: [] }),
},
{
id: "google",
defaultModel: "gemini-3.1-flash-image-preview",
capabilities: {
generate: {},
edit: { enabled: true },
},
isConfigured: () => true,
generateImage: async () => ({ images: [] }),
},
]);
const result = await generateImage({
cfg: {} as OpenClawConfig,
prompt: "draw a cat",
});
expect(result.provider).toBe("google");
expect(result.model).toBe("gemini-3.1-flash-image-preview");
expect(result.attempts).toEqual([
{
provider: "openai",
model: "gpt-image-1",
error: "OpenAI API key missing",
},
]);
});
it("drops unsupported provider geometry overrides and reports them", async () => {
let seenRequest:
| {
@@ -196,6 +278,71 @@ describe("image-generation runtime", () => {
]);
});
it("maps requested size to the closest supported fallback geometry", async () => {
let seenRequest:
| {
size?: string;
aspectRatio?: string;
resolution?: string;
}
| undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("minimax/image-01");
mocks.getImageGenerationProvider.mockReturnValue({
id: "minimax",
capabilities: {
generate: {
supportsSize: false,
supportsAspectRatio: true,
supportsResolution: false,
},
edit: {
enabled: true,
supportsSize: false,
supportsAspectRatio: true,
supportsResolution: false,
},
geometry: {
aspectRatios: ["1:1", "16:9"],
},
},
async generateImage(req) {
seenRequest = {
size: req.size,
aspectRatio: req.aspectRatio,
resolution: req.resolution,
};
return {
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
model: "image-01",
};
},
});
const result = await generateImage({
cfg: {
agents: {
defaults: {
imageGenerationModel: { primary: "minimax/image-01" },
},
},
} as OpenClawConfig,
prompt: "draw a cat",
size: "1280x720",
});
expect(seenRequest).toEqual({
size: undefined,
aspectRatio: "16:9",
resolution: undefined,
});
expect(result.ignoredOverrides).toEqual([]);
expect(result.metadata).toMatchObject({
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
aspectRatioDerivedFromSize: "16:9",
});
});
it("lists runtime image-generation providers through the provider registry", () => {
const providers: ImageGenerationProvider[] = [
{
@@ -232,6 +379,7 @@ describe("image-generation runtime", () => {
{
id: "vision-one",
defaultModel: "paint-v1",
isConfigured: () => false,
capabilities: {
generate: {},
edit: { enabled: false },
@@ -243,6 +391,7 @@ describe("image-generation runtime", () => {
{
id: "vision-two",
defaultModel: "paint-v2",
isConfigured: () => false,
capabilities: {
generate: {},
edit: { enabled: false },

View File

@@ -5,6 +5,10 @@ import type { OpenClawConfig } from "../config/config.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
deriveAspectRatioFromSize,
resolveClosestAspectRatio,
resolveClosestResolution,
resolveClosestSize,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
@@ -71,29 +75,79 @@ function resolveProviderImageGenerationOverrides(params: {
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
if (
size &&
(!modeCaps.supportsSize ||
((geometry?.sizes?.length ?? 0) > 0 && !geometry?.sizes?.includes(size)))
) {
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
size = resolveClosestSize({
requestedSize: size,
supportedSizes: geometry?.sizes,
});
}
if (!modeCaps.supportsSize && size) {
let translated = false;
if (modeCaps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) {
aspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
} else if (!modeCaps.supportsAspectRatio && aspectRatio) {
const derivedSize =
modeCaps.supportsSize && !size
? resolveClosestSize({
requestedSize: params.size,
requestedAspectRatio: aspectRatio,
supportedSizes: geometry?.sizes,
})
: undefined;
let translated = false;
if (derivedSize) {
size = derivedSize;
translated = true;
}
if (!translated) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
}
aspectRatio = undefined;
}
if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) {
resolution = resolveClosestResolution({
requestedResolution: resolution,
supportedResolutions: geometry?.resolutions,
});
} else if (!modeCaps.supportsResolution && resolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (size && !modeCaps.supportsSize) {
ignoredOverrides.push({ key: "size", value: size });
size = undefined;
}
if (
aspectRatio &&
(!modeCaps.supportsAspectRatio ||
((geometry?.aspectRatios?.length ?? 0) > 0 && !geometry?.aspectRatios?.includes(aspectRatio)))
) {
if (aspectRatio && !modeCaps.supportsAspectRatio) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
aspectRatio = undefined;
}
if (
resolution &&
(!modeCaps.supportsResolution ||
((geometry?.resolutions?.length ?? 0) > 0 && !geometry?.resolutions?.includes(resolution)))
) {
if (resolution && !modeCaps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
@@ -114,6 +168,8 @@ export async function generateImage(
modelConfig: params.cfg.agents?.defaults?.imageGenerationModel,
modelOverride: params.modelOverride,
parseModelRef: parseImageGenerationModelRef,
agentDir: params.agentDir,
listProviders: listImageGenerationProviders,
});
if (candidates.length === 0) {
throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg));
@@ -164,7 +220,33 @@ export async function generateImage(
provider: candidate.provider,
model: result.model ?? candidate.model,
attempts,
metadata: result.metadata,
metadata: {
...result.metadata,
...(params.size && sanitized.size && params.size !== sanitized.size
? { requestedSize: params.size, normalizedSize: sanitized.size }
: {}),
...((params.aspectRatio &&
sanitized.aspectRatio &&
params.aspectRatio !== sanitized.aspectRatio) ||
(!params.aspectRatio && params.size && sanitized.aspectRatio)
? {
...(params.size ? { requestedSize: params.size } : {}),
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
normalizedAspectRatio: sanitized.aspectRatio,
...(params.size
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
: {}),
}
: {}),
...(params.resolution &&
sanitized.resolution &&
params.resolution !== sanitized.resolution
? {
requestedResolution: params.resolution,
normalizedResolution: sanitized.resolution,
}
: {}),
},
ignoredOverrides: sanitized.ignoredOverrides,
};
} catch (err) {

View File

@@ -0,0 +1,161 @@
import { describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import {
deriveAspectRatioFromSize,
normalizeDurationToClosestMax,
resolveCapabilityModelCandidates,
resolveClosestAspectRatio,
resolveClosestResolution,
resolveClosestSize,
} from "./runtime-shared.js";
function parseModelRef(raw?: string) {
const trimmed = raw?.trim();
if (!trimmed) {
return null;
}
const slash = trimmed.indexOf("/");
if (slash <= 0 || slash === trimmed.length - 1) {
return null;
}
return {
provider: trimmed.slice(0, slash),
model: trimmed.slice(slash + 1),
};
}
describe("media-generation runtime shared candidates", () => {
it("appends auth-backed provider defaults after explicit refs by default", () => {
const cfg = {
agents: {
defaults: {
model: {
primary: "openai/gpt-5.4",
},
},
},
} as OpenClawConfig;
const candidates = resolveCapabilityModelCandidates({
cfg,
modelConfig: {
primary: "google/gemini-3.1-flash-image-preview",
fallbacks: ["fal/fal-ai/flux/dev"],
},
parseModelRef,
listProviders: () => [
{
id: "google",
defaultModel: "gemini-3.1-flash-image-preview",
isConfigured: () => true,
},
{
id: "openai",
defaultModel: "gpt-image-1",
isConfigured: () => true,
},
{
id: "minimax",
defaultModel: "image-01",
isConfigured: () => true,
},
],
});
expect(candidates).toEqual([
{ provider: "google", model: "gemini-3.1-flash-image-preview" },
{ provider: "fal", model: "fal-ai/flux/dev" },
{ provider: "openai", model: "gpt-image-1" },
{ provider: "minimax", model: "image-01" },
]);
});
it("auto-detects auth-backed provider defaults when no explicit media model is configured", () => {
const candidates = resolveCapabilityModelCandidates({
cfg: {} as OpenClawConfig,
modelConfig: undefined,
parseModelRef,
listProviders: () => [
{
id: "openai",
defaultModel: "gpt-image-1",
isConfigured: () => true,
},
{
id: "fal",
defaultModel: "fal-ai/flux/dev",
isConfigured: () => true,
},
],
});
expect(candidates).toEqual([
{ provider: "openai", model: "gpt-image-1" },
{ provider: "fal", model: "fal-ai/flux/dev" },
]);
});
it("disables implicit provider expansion when mediaGenerationAutoProviderFallback=false", () => {
const candidates = resolveCapabilityModelCandidates({
cfg: {
agents: {
defaults: {
mediaGenerationAutoProviderFallback: false,
},
},
} as OpenClawConfig,
modelConfig: {
primary: "google/gemini-3.1-flash-image-preview",
},
parseModelRef,
listProviders: () => [
{
id: "openai",
defaultModel: "gpt-image-1",
isConfigured: () => true,
},
],
});
expect(candidates).toEqual([{ provider: "google", model: "gemini-3.1-flash-image-preview" }]);
});
});
describe("media-generation runtime shared normalization", () => {
it("derives reduced aspect ratios from size strings", () => {
expect(deriveAspectRatioFromSize("1280x720")).toBe("16:9");
expect(deriveAspectRatioFromSize("1024x1536")).toBe("2:3");
});
it("maps unsupported sizes to the closest supported size", () => {
expect(
resolveClosestSize({
requestedSize: "1792x1024",
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
}),
).toBe("1536x1024");
});
it("maps unsupported aspect ratios to the closest supported aspect ratio", () => {
expect(
resolveClosestAspectRatio({
requestedAspectRatio: "17:10",
supportedAspectRatios: ["1:1", "4:3", "16:9"],
}),
).toBe("16:9");
});
it("maps unsupported resolutions to the closest supported resolution", () => {
expect(
resolveClosestResolution({
requestedResolution: "2K",
supportedResolutions: ["1K", "4K"],
}),
).toBe("1K");
});
it("clamps durations to the closest supported max", () => {
expect(normalizeDurationToClosestMax(12, 8)).toBe(8);
expect(normalizeDurationToClosestMax(6, 8)).toBe(6);
});
});

View File

@@ -1,3 +1,7 @@
import { listProfilesForProvider } from "../agents/auth-profiles.js";
import { ensureAuthProfileStore } from "../agents/auth-profiles.js";
import { DEFAULT_PROVIDER } from "../agents/defaults.js";
import { resolveEnvApiKey } from "../agents/model-auth-env.js";
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
import type { OpenClawConfig } from "../config/config.js";
import {
@@ -12,11 +16,110 @@ export type ParsedProviderModelRef = {
model: string;
};
const IMAGE_RESOLUTION_ORDER = ["1K", "2K", "4K"] as const;
type CapabilityProviderCandidate = {
id: string;
defaultModel?: string | null;
isConfigured?: (ctx: { cfg?: OpenClawConfig; agentDir?: string }) => boolean;
};
type ParsedAspectRatio = {
width: number;
height: number;
value: number;
};
type ParsedSize = {
width: number;
height: number;
aspectRatio: number;
area: number;
};
function resolveCurrentDefaultProviderId(cfg?: OpenClawConfig): string {
const configured = resolveAgentModelPrimaryValue(cfg?.agents?.defaults?.model);
const trimmed = configured?.trim();
if (!trimmed) {
return DEFAULT_PROVIDER;
}
const slash = trimmed.indexOf("/");
if (slash <= 0) {
return DEFAULT_PROVIDER;
}
const provider = trimmed.slice(0, slash).trim();
return provider || DEFAULT_PROVIDER;
}
function isCapabilityProviderConfigured(params: {
provider: CapabilityProviderCandidate;
cfg?: OpenClawConfig;
agentDir?: string;
}): boolean {
if (params.provider.isConfigured) {
return params.provider.isConfigured({
cfg: params.cfg,
agentDir: params.agentDir,
});
}
if (resolveEnvApiKey(params.provider.id)?.apiKey) {
return true;
}
const agentDir = params.agentDir?.trim();
if (!agentDir) {
return false;
}
const store = ensureAuthProfileStore(agentDir, {
allowKeychainPrompt: false,
});
return listProfilesForProvider(store, params.provider.id).length > 0;
}
function resolveAutoCapabilityFallbackRefs(params: {
cfg: OpenClawConfig;
agentDir?: string;
listProviders: (cfg?: OpenClawConfig) => CapabilityProviderCandidate[];
}): string[] {
const providerDefaults = new Map<string, string>();
for (const provider of params.listProviders(params.cfg)) {
const providerId = provider.id.trim();
const modelId = provider.defaultModel?.trim();
if (
!providerId ||
!modelId ||
providerDefaults.has(providerId) ||
!isCapabilityProviderConfigured({
provider,
cfg: params.cfg,
agentDir: params.agentDir,
})
) {
continue;
}
providerDefaults.set(providerId, `${providerId}/${modelId}`);
}
const defaultProvider = resolveCurrentDefaultProviderId(params.cfg);
const orderedProviders = [
defaultProvider,
...[...providerDefaults.keys()]
.filter((providerId) => providerId !== defaultProvider)
.toSorted(),
];
return orderedProviders.flatMap((providerId) => {
const ref = providerDefaults.get(providerId);
return ref ? [ref] : [];
});
}
export function resolveCapabilityModelCandidates(params: {
cfg: OpenClawConfig;
modelConfig: AgentModelConfig | undefined;
modelOverride?: string;
parseModelRef: (raw: string | undefined) => ParsedProviderModelRef | null;
agentDir?: string;
listProviders?: (cfg?: OpenClawConfig) => CapabilityProviderCandidate[];
autoProviderFallback?: boolean;
}): ParsedProviderModelRef[] {
const candidates: ParsedProviderModelRef[] = [];
const seen = new Set<string>();
@@ -38,9 +141,237 @@ export function resolveCapabilityModelCandidates(params: {
for (const fallback of resolveAgentModelFallbackValues(params.modelConfig)) {
add(fallback);
}
const autoProviderFallbackEnabled =
params.autoProviderFallback ??
params.cfg.agents?.defaults?.mediaGenerationAutoProviderFallback !== false;
if (autoProviderFallbackEnabled && params.listProviders) {
for (const candidate of resolveAutoCapabilityFallbackRefs({
cfg: params.cfg,
agentDir: params.agentDir,
listProviders: params.listProviders,
})) {
add(candidate);
}
}
return candidates;
}
function compareScores(
next: { primary: number; secondary: number; tertiary: string },
best: { primary: number; secondary: number; tertiary: string } | null,
): boolean {
if (!best) {
return true;
}
if (next.primary !== best.primary) {
return next.primary < best.primary;
}
if (next.secondary !== best.secondary) {
return next.secondary < best.secondary;
}
return next.tertiary.localeCompare(best.tertiary) < 0;
}
function parseAspectRatioValue(raw?: string | null): ParsedAspectRatio | null {
const trimmed = raw?.trim();
if (!trimmed) {
return null;
}
const match = /^(\d+(?:\.\d+)?)\s*:\s*(\d+(?:\.\d+)?)$/.exec(trimmed);
if (!match) {
return null;
}
const width = Number(match[1]);
const height = Number(match[2]);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return {
width,
height,
value: width / height,
};
}
function parseSizeValue(raw?: string | null): ParsedSize | null {
const trimmed = raw?.trim();
if (!trimmed) {
return null;
}
const match = /^(\d+)\s*x\s*(\d+)$/i.exec(trimmed);
if (!match) {
return null;
}
const width = Number(match[1]);
const height = Number(match[2]);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return {
width,
height,
aspectRatio: width / height,
area: width * height,
};
}
function greatestCommonDivisor(a: number, b: number): number {
let left = Math.abs(a);
let right = Math.abs(b);
while (right !== 0) {
const next = left % right;
left = right;
right = next;
}
return left || 1;
}
export function deriveAspectRatioFromSize(size?: string): string | undefined {
const parsed = parseSizeValue(size);
if (!parsed) {
return undefined;
}
const divisor = greatestCommonDivisor(parsed.width, parsed.height);
return `${parsed.width / divisor}:${parsed.height / divisor}`;
}
export function resolveClosestAspectRatio(params: {
requestedAspectRatio?: string;
requestedSize?: string;
supportedAspectRatios?: readonly string[];
}): string | undefined {
const supported = params.supportedAspectRatios?.filter((entry) => entry.trim().length > 0) ?? [];
if (supported.length === 0) {
return params.requestedAspectRatio ?? deriveAspectRatioFromSize(params.requestedSize);
}
if (params.requestedAspectRatio && supported.includes(params.requestedAspectRatio)) {
return params.requestedAspectRatio;
}
const requested =
parseAspectRatioValue(params.requestedAspectRatio) ??
parseAspectRatioValue(deriveAspectRatioFromSize(params.requestedSize));
if (!requested) {
return undefined;
}
let bestValue: string | undefined;
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
for (const candidate of supported) {
const parsed = parseAspectRatioValue(candidate);
if (!parsed) {
continue;
}
const score = {
primary: Math.abs(Math.log(parsed.value / requested.value)),
secondary: Math.abs(parsed.width * requested.height - requested.width * parsed.height),
tertiary: candidate,
};
if (compareScores(score, bestScore)) {
bestValue = candidate;
bestScore = score;
}
}
return bestValue;
}
export function resolveClosestSize(params: {
requestedSize?: string;
requestedAspectRatio?: string;
supportedSizes?: readonly string[];
}): string | undefined {
const supported = params.supportedSizes?.filter((entry) => entry.trim().length > 0) ?? [];
if (supported.length === 0) {
return params.requestedSize;
}
if (params.requestedSize && supported.includes(params.requestedSize)) {
return params.requestedSize;
}
const requested = parseSizeValue(params.requestedSize);
const requestedAspectRatio = parseAspectRatioValue(params.requestedAspectRatio);
if (!requested && !requestedAspectRatio) {
return undefined;
}
let bestValue: string | undefined;
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
for (const candidate of supported) {
const parsed = parseSizeValue(candidate);
if (!parsed) {
continue;
}
const score = {
primary: Math.abs(
Math.log(parsed.aspectRatio / (requested?.aspectRatio ?? requestedAspectRatio!.value)),
),
secondary: requested ? Math.abs(Math.log(parsed.area / requested.area)) : parsed.area,
tertiary: candidate,
};
if (compareScores(score, bestScore)) {
bestValue = candidate;
bestScore = score;
}
}
return bestValue;
}
export function resolveClosestResolution<TResolution extends string>(params: {
requestedResolution?: TResolution;
supportedResolutions?: readonly TResolution[];
order?: readonly TResolution[];
}): TResolution | undefined {
const supported = params.supportedResolutions?.filter((entry) => entry.trim().length > 0) ?? [];
if (supported.length === 0) {
return params.requestedResolution;
}
if (params.requestedResolution && supported.includes(params.requestedResolution)) {
return params.requestedResolution;
}
const order = params.order ?? (IMAGE_RESOLUTION_ORDER as readonly TResolution[]);
const requestedIndex = params.requestedResolution
? order.indexOf(params.requestedResolution)
: -1;
if (requestedIndex < 0) {
return undefined;
}
let bestValue: TResolution | undefined;
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
for (const candidate of supported) {
const candidateIndex = order.indexOf(candidate);
if (candidateIndex < 0) {
continue;
}
const score = {
primary: Math.abs(candidateIndex - requestedIndex),
secondary: candidateIndex,
tertiary: candidate,
};
if (compareScores(score, bestScore)) {
bestValue = candidate;
bestScore = score;
}
}
return bestValue;
}
export function normalizeDurationToClosestMax(
durationSeconds?: number,
maxDurationSeconds?: number,
) {
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
return undefined;
}
const rounded = Math.max(1, Math.round(durationSeconds));
if (
typeof maxDurationSeconds !== "number" ||
!Number.isFinite(maxDurationSeconds) ||
maxDurationSeconds <= 0
) {
return rounded;
}
return Math.min(rounded, Math.max(1, Math.round(maxDurationSeconds)));
}
export function throwCapabilityGenerationFailure(params: {
capabilityLabel: string;
attempts: FallbackAttempt[];

View File

@@ -12,6 +12,7 @@ const mocks = vi.hoisted(() => {
(providerId: string, config?: OpenClawConfig) => MusicGenerationProvider | undefined
>(() => undefined),
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
listMusicGenerationProviders: vi.fn<(config?: OpenClawConfig) => MusicGenerationProvider[]>(
() => [],
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
vi.mock("../logging/subsystem.js", () => ({
createSubsystemLogger: mocks.createSubsystemLogger,
}));
vi.mock("../secrets/provider-env-vars.js", () => ({
getProviderEnvVars: mocks.getProviderEnvVars,
}));
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
return {
...actual,
getProviderEnvVars: mocks.getProviderEnvVars,
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
};
});
vi.mock("./model-ref.js", () => ({
parseMusicGenerationModelRef: mocks.parseMusicGenerationModelRef,
}));
@@ -67,6 +73,8 @@ describe("music-generation runtime", () => {
mocks.getMusicGenerationProvider.mockReset();
mocks.getProviderEnvVars.mockReset();
mocks.getProviderEnvVars.mockReturnValue([]);
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
mocks.isFailoverError.mockReset();
mocks.isFailoverError.mockReturnValue(false);
mocks.listMusicGenerationProviders.mockReset();
@@ -129,6 +137,68 @@ describe("music-generation runtime", () => {
]);
});
it("auto-detects and falls through to another configured music-generation provider by default", async () => {
mocks.getMusicGenerationProvider.mockImplementation((providerId: string) => {
if (providerId === "google") {
return {
id: "google",
defaultModel: "lyria-3-clip-preview",
capabilities: {},
isConfigured: () => true,
async generateMusic() {
throw new Error("Google music generation response missing audio data");
},
};
}
if (providerId === "minimax") {
return {
id: "minimax",
defaultModel: "music-2.5+",
capabilities: {},
isConfigured: () => true,
async generateMusic() {
return {
tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }],
model: "music-2.5+",
};
},
};
}
return undefined;
});
mocks.listMusicGenerationProviders.mockReturnValue([
{
id: "google",
defaultModel: "lyria-3-clip-preview",
capabilities: {},
isConfigured: () => true,
generateMusic: async () => ({ tracks: [] }),
},
{
id: "minimax",
defaultModel: "music-2.5+",
capabilities: {},
isConfigured: () => true,
generateMusic: async () => ({ tracks: [] }),
},
]);
const result = await generateMusic({
cfg: {} as OpenClawConfig,
prompt: "play a synth line",
});
expect(result.provider).toBe("minimax");
expect(result.model).toBe("music-2.5+");
expect(result.attempts).toEqual([
{
provider: "google",
model: "lyria-3-clip-preview",
error: "Google music generation response missing audio data",
},
]);
});
it("lists runtime music-generation providers through the provider registry", () => {
const providers: MusicGenerationProvider[] = [
{
@@ -285,4 +355,52 @@ describe("music-generation runtime", () => {
{ key: "format", value: "mp3" },
]);
});
it("normalizes requested durations to the closest supported max duration", async () => {
let seenRequest:
| {
durationSeconds?: number;
}
| undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("minimax/music-2.5+");
mocks.getMusicGenerationProvider.mockReturnValue({
id: "minimax",
capabilities: {
generate: {
supportsDuration: true,
maxDurationSeconds: 30,
},
},
generateMusic: async (req) => {
seenRequest = {
durationSeconds: req.durationSeconds,
};
return {
tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }],
model: "music-2.5+",
};
},
});
const result = await generateMusic({
cfg: {
agents: {
defaults: {
musicGenerationModel: { primary: "minimax/music-2.5+" },
},
},
} as OpenClawConfig,
prompt: "energetic arcade anthem",
durationSeconds: 45,
});
expect(seenRequest).toEqual({
durationSeconds: 30,
});
expect(result.ignoredOverrides).toEqual([]);
expect(result.metadata).toMatchObject({
requestedDurationSeconds: 45,
normalizedDurationSeconds: 30,
});
});
});

View File

@@ -5,6 +5,7 @@ import type { OpenClawConfig } from "../config/config.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
normalizeDurationToClosestMax,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
@@ -90,6 +91,8 @@ function resolveProviderMusicGenerationOverrides(params: {
if (typeof durationSeconds === "number" && !caps.supportsDuration) {
ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds });
durationSeconds = undefined;
} else if (typeof durationSeconds === "number") {
durationSeconds = normalizeDurationToClosestMax(durationSeconds, caps.maxDurationSeconds);
}
if (format) {
@@ -121,6 +124,8 @@ export async function generateMusic(
modelConfig: params.cfg.agents?.defaults?.musicGenerationModel,
modelOverride: params.modelOverride,
parseModelRef: parseMusicGenerationModelRef,
agentDir: params.agentDir,
listProviders: listMusicGenerationProviders,
});
if (candidates.length === 0) {
throw new Error(
@@ -181,7 +186,17 @@ export async function generateMusic(
model: result.model ?? candidate.model,
attempts,
lyrics: result.lyrics,
metadata: result.metadata,
metadata: {
...result.metadata,
...(typeof params.durationSeconds === "number" &&
typeof sanitized.durationSeconds === "number" &&
params.durationSeconds !== sanitized.durationSeconds
? {
requestedDurationSeconds: params.durationSeconds,
normalizedDurationSeconds: sanitized.durationSeconds,
}
: {}),
},
ignoredOverrides: sanitized.ignoredOverrides,
};
} catch (err) {

View File

@@ -9,6 +9,7 @@ const mocks = vi.hoisted(() => {
createSubsystemLogger: vi.fn(() => ({ debug })),
describeFailoverError: vi.fn(),
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
getVideoGenerationProvider: vi.fn<
(providerId: string, config?: OpenClawConfig) => VideoGenerationProvider | undefined
>(() => undefined),
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
vi.mock("../logging/subsystem.js", () => ({
createSubsystemLogger: mocks.createSubsystemLogger,
}));
vi.mock("../secrets/provider-env-vars.js", () => ({
getProviderEnvVars: mocks.getProviderEnvVars,
}));
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
return {
...actual,
getProviderEnvVars: mocks.getProviderEnvVars,
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
};
});
vi.mock("./model-ref.js", () => ({
parseVideoGenerationModelRef: mocks.parseVideoGenerationModelRef,
}));
@@ -66,6 +72,8 @@ describe("video-generation runtime", () => {
mocks.describeFailoverError.mockReset();
mocks.getProviderEnvVars.mockReset();
mocks.getProviderEnvVars.mockReturnValue([]);
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
mocks.getVideoGenerationProvider.mockReset();
mocks.isFailoverError.mockReset();
mocks.isFailoverError.mockReturnValue(false);
@@ -129,6 +137,68 @@ describe("video-generation runtime", () => {
]);
});
it("auto-detects and falls through to another configured video-generation provider by default", async () => {
mocks.getVideoGenerationProvider.mockImplementation((providerId: string) => {
if (providerId === "openai") {
return {
id: "openai",
defaultModel: "sora-2",
capabilities: {},
isConfigured: () => true,
async generateVideo() {
throw new Error("Your request was blocked by our moderation system.");
},
};
}
if (providerId === "runway") {
return {
id: "runway",
defaultModel: "gen4.5",
capabilities: {},
isConfigured: () => true,
async generateVideo() {
return {
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
model: "gen4.5",
};
},
};
}
return undefined;
});
mocks.listVideoGenerationProviders.mockReturnValue([
{
id: "openai",
defaultModel: "sora-2",
capabilities: {},
isConfigured: () => true,
generateVideo: async () => ({ videos: [] }),
},
{
id: "runway",
defaultModel: "gen4.5",
capabilities: {},
isConfigured: () => true,
generateVideo: async () => ({ videos: [] }),
},
]);
const result = await generateVideo({
cfg: {} as OpenClawConfig,
prompt: "animate a cat",
});
expect(result.provider).toBe("runway");
expect(result.model).toBe("gen4.5");
expect(result.attempts).toEqual([
{
provider: "openai",
model: "sora-2",
error: "Your request was blocked by our moderation system.",
},
]);
});
it("lists runtime video-generation providers through the provider registry", () => {
const providers: VideoGenerationProvider[] = [
{
@@ -303,7 +373,6 @@ describe("video-generation runtime", () => {
} as OpenClawConfig,
prompt: "animate a lobster",
size: "1280x720",
aspectRatio: "16:9",
inputImages: [{ buffer: Buffer.from("png"), mimeType: "image/png" }],
});
@@ -312,7 +381,12 @@ describe("video-generation runtime", () => {
aspectRatio: "16:9",
resolution: undefined,
});
expect(result.ignoredOverrides).toEqual([{ key: "size", value: "1280x720" }]);
expect(result.ignoredOverrides).toEqual([]);
expect(result.metadata).toMatchObject({
requestedSize: "1280x720",
normalizedAspectRatio: "16:9",
aspectRatioDerivedFromSize: "16:9",
});
});
it("builds a generic config hint without hardcoded provider ids", async () => {

View File

@@ -5,6 +5,8 @@ import type { OpenClawConfig } from "../config/config.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
deriveAspectRatioFromSize,
resolveClosestAspectRatio,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
@@ -96,7 +98,20 @@ function resolveProviderVideoGenerationOverrides(params: {
}
if (size && !caps.supportsSize) {
ignoredOverrides.push({ key: "size", value: size });
let translated = false;
if (caps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
@@ -138,6 +153,8 @@ export async function generateVideo(
modelConfig: params.cfg.agents?.defaults?.videoGenerationModel,
modelOverride: params.modelOverride,
parseModelRef: parseVideoGenerationModelRef,
agentDir: params.agentDir,
listProviders: listVideoGenerationProviders,
});
if (candidates.length === 0) {
throw new Error(buildNoVideoGenerationModelConfiguredMessage(params.cfg));
@@ -212,17 +229,39 @@ export async function generateVideo(
model: result.model ?? candidate.model,
attempts,
ignoredOverrides: sanitized.ignoredOverrides,
metadata:
typeof requestedDurationSeconds === "number" &&
metadata: {
...result.metadata,
...((params.size && sanitized.aspectRatio && params.size !== sanitized.size) ||
(params.aspectRatio &&
sanitized.aspectRatio &&
params.aspectRatio !== sanitized.aspectRatio)
? {
...(params.size ? { requestedSize: params.size } : {}),
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
normalizedAspectRatio: sanitized.aspectRatio,
...(params.size
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
: {}),
}
: {}),
...(params.resolution &&
sanitized.resolution &&
params.resolution !== sanitized.resolution
? {
requestedResolution: params.resolution,
normalizedResolution: sanitized.resolution,
}
: {}),
...(typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
requestedDurationSeconds !== normalizedDurationSeconds
? {
...result.metadata,
requestedDurationSeconds,
normalizedDurationSeconds,
...(supportedDurationSeconds ? { supportedDurationSeconds } : {}),
}
: result.metadata,
: {}),
},
};
} catch (err) {
lastError = err;