mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-03 01:40:23 +00:00
feat: preserve media intent across provider fallback
This commit is contained in:
@@ -732,6 +732,47 @@ describe("createImageGenerateTool", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces normalized image geometry from runtime metadata", async () => {
|
||||
vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
|
||||
provider: "minimax",
|
||||
model: "image-01",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-out"),
|
||||
mimeType: "image/png",
|
||||
fileName: "generated.png",
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
},
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValue({
|
||||
path: "/tmp/generated.png",
|
||||
id: "generated.png",
|
||||
size: 7,
|
||||
contentType: "image/png",
|
||||
});
|
||||
|
||||
const tool = createToolWithPrimaryImageModel("minimax/image-01");
|
||||
const result = await tool.execute("call-minimax-generate", {
|
||||
prompt: "A lobster at the movies",
|
||||
size: "1280x720",
|
||||
});
|
||||
|
||||
expect(result.details).toMatchObject({
|
||||
aspectRatio: "16:9",
|
||||
metadata: {
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
},
|
||||
});
|
||||
expect(result.details).not.toHaveProperty("size");
|
||||
});
|
||||
|
||||
it("rejects unsupported aspect ratios", async () => {
|
||||
const tool = createImageGenerateTool({
|
||||
config: {
|
||||
|
||||
@@ -615,6 +615,25 @@ export function createImageGenerateTool(options?: {
|
||||
ignoredOverrides.length > 0
|
||||
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredImageGenerationOverride).join(", ")}.`
|
||||
: undefined;
|
||||
const normalizedSize =
|
||||
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
|
||||
? result.metadata.normalizedSize
|
||||
: undefined;
|
||||
const normalizedAspectRatio =
|
||||
typeof result.metadata?.normalizedAspectRatio === "string" &&
|
||||
result.metadata.normalizedAspectRatio.trim()
|
||||
? result.metadata.normalizedAspectRatio
|
||||
: undefined;
|
||||
const normalizedResolution =
|
||||
typeof result.metadata?.normalizedResolution === "string" &&
|
||||
result.metadata.normalizedResolution.trim()
|
||||
? result.metadata.normalizedResolution
|
||||
: undefined;
|
||||
const sizeTranslatedToAspectRatio =
|
||||
!normalizedSize &&
|
||||
typeof result.metadata?.requestedSize === "string" &&
|
||||
result.metadata.requestedSize === size &&
|
||||
Boolean(normalizedAspectRatio);
|
||||
|
||||
const savedImages = await Promise.all(
|
||||
result.images.map((image) =>
|
||||
@@ -664,9 +683,15 @@ export function createImageGenerateTool(options?: {
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
...(resolution ? { resolution } : {}),
|
||||
...(size ? { size } : {}),
|
||||
...(aspectRatio ? { aspectRatio } : {}),
|
||||
...(normalizedResolution || resolution
|
||||
? { resolution: normalizedResolution ?? resolution }
|
||||
: {}),
|
||||
...(normalizedSize || (size && !sizeTranslatedToAspectRatio)
|
||||
? { size: normalizedSize ?? size }
|
||||
: {}),
|
||||
...(normalizedAspectRatio || aspectRatio
|
||||
? { aspectRatio: normalizedAspectRatio ?? aspectRatio }
|
||||
: {}),
|
||||
...(filename ? { filename } : {}),
|
||||
attempts: result.attempts,
|
||||
metadata: result.metadata,
|
||||
|
||||
@@ -355,4 +355,55 @@ describe("createMusicGenerateTool", () => {
|
||||
expect(result.details).not.toHaveProperty("durationSeconds");
|
||||
expect(result.details).not.toHaveProperty("format");
|
||||
});
|
||||
|
||||
it("surfaces normalized durations from runtime metadata", async () => {
|
||||
vi.spyOn(musicGenerationRuntime, "generateMusic").mockResolvedValue({
|
||||
provider: "minimax",
|
||||
model: "music-2.5+",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
tracks: [
|
||||
{
|
||||
buffer: Buffer.from("music-bytes"),
|
||||
mimeType: "audio/mpeg",
|
||||
fileName: "night-drive.mp3",
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
requestedDurationSeconds: 45,
|
||||
normalizedDurationSeconds: 30,
|
||||
},
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
|
||||
path: "/tmp/generated-night-drive.mp3",
|
||||
id: "generated-night-drive.mp3",
|
||||
size: 11,
|
||||
contentType: "audio/mpeg",
|
||||
});
|
||||
|
||||
const tool = createMusicGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
musicGenerationModel: { primary: "minimax/music-2.5+" },
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected music_generate tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-1", {
|
||||
prompt: "night-drive synthwave",
|
||||
durationSeconds: 45,
|
||||
});
|
||||
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
|
||||
|
||||
expect(text).toContain("Duration normalized: requested 45s; used 30s.");
|
||||
expect(result.details).toMatchObject({
|
||||
durationSeconds: 30,
|
||||
requestedDurationSeconds: 45,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -236,17 +236,6 @@ function validateMusicGenerationCapabilities(params: {
|
||||
if (!caps) {
|
||||
return;
|
||||
}
|
||||
if (
|
||||
typeof params.durationSeconds === "number" &&
|
||||
caps.supportsDuration &&
|
||||
typeof caps.maxDurationSeconds === "number"
|
||||
) {
|
||||
if (params.durationSeconds > caps.maxDurationSeconds) {
|
||||
throw new ToolInputError(
|
||||
`${provider.id} supports at most ${caps.maxDurationSeconds} seconds per track.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type MusicGenerateSandboxConfig = {
|
||||
@@ -418,6 +407,21 @@ async function executeMusicGenerationJob(params: {
|
||||
);
|
||||
const ignoredOverrides = result.ignoredOverrides ?? [];
|
||||
const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key));
|
||||
const requestedDurationSeconds =
|
||||
typeof result.metadata?.requestedDurationSeconds === "number" &&
|
||||
Number.isFinite(result.metadata.requestedDurationSeconds)
|
||||
? result.metadata.requestedDurationSeconds
|
||||
: params.durationSeconds;
|
||||
const runtimeNormalizedDurationSeconds =
|
||||
typeof result.metadata?.normalizedDurationSeconds === "number" &&
|
||||
Number.isFinite(result.metadata.normalizedDurationSeconds)
|
||||
? result.metadata.normalizedDurationSeconds
|
||||
: undefined;
|
||||
const appliedDurationSeconds =
|
||||
runtimeNormalizedDurationSeconds ??
|
||||
(!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number"
|
||||
? params.durationSeconds
|
||||
: undefined);
|
||||
const warning =
|
||||
ignoredOverrides.length > 0
|
||||
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map((entry) => `${entry.key}=${String(entry.value)}`).join(", ")}.`
|
||||
@@ -425,9 +429,14 @@ async function executeMusicGenerationJob(params: {
|
||||
const lines = [
|
||||
`Generated ${savedTracks.length} track${savedTracks.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
|
||||
...(warning ? [`Warning: ${warning}`] : []),
|
||||
typeof requestedDurationSeconds === "number" &&
|
||||
typeof appliedDurationSeconds === "number" &&
|
||||
requestedDurationSeconds !== appliedDurationSeconds
|
||||
? `Duration normalized: requested ${requestedDurationSeconds}s; used ${appliedDurationSeconds}s.`
|
||||
: null,
|
||||
...(result.lyrics?.length ? ["Lyrics returned.", ...result.lyrics] : []),
|
||||
...savedTracks.map((track) => `MEDIA:${track.path}`),
|
||||
];
|
||||
].filter((entry): entry is string => Boolean(entry));
|
||||
return {
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
@@ -456,8 +465,13 @@ async function executeMusicGenerationJob(params: {
|
||||
...(!ignoredOverrideKeys.has("instrumental") && typeof params.instrumental === "boolean"
|
||||
? { instrumental: params.instrumental }
|
||||
: {}),
|
||||
...(!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number"
|
||||
? { durationSeconds: params.durationSeconds }
|
||||
...(typeof appliedDurationSeconds === "number"
|
||||
? { durationSeconds: appliedDurationSeconds }
|
||||
: {}),
|
||||
...(typeof requestedDurationSeconds === "number" &&
|
||||
typeof appliedDurationSeconds === "number" &&
|
||||
requestedDurationSeconds !== appliedDurationSeconds
|
||||
? { requestedDurationSeconds }
|
||||
: {}),
|
||||
...(!ignoredOverrideKeys.has("format") && params.format ? { format: params.format } : {}),
|
||||
...(params.filename ? { filename: params.filename } : {}),
|
||||
|
||||
@@ -298,6 +298,59 @@ describe("createVideoGenerateTool", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("surfaces normalized video geometry from runtime metadata", async () => {
|
||||
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
|
||||
provider: "runway",
|
||||
model: "gen4.5",
|
||||
attempts: [],
|
||||
ignoredOverrides: [],
|
||||
videos: [
|
||||
{
|
||||
buffer: Buffer.from("video-bytes"),
|
||||
mimeType: "video/mp4",
|
||||
fileName: "lobster.mp4",
|
||||
},
|
||||
],
|
||||
metadata: {
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
},
|
||||
});
|
||||
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
|
||||
path: "/tmp/generated-lobster.mp4",
|
||||
id: "generated-lobster.mp4",
|
||||
size: 11,
|
||||
contentType: "video/mp4",
|
||||
});
|
||||
|
||||
const tool = createVideoGenerateTool({
|
||||
config: asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "runway/gen4.5" },
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
if (!tool) {
|
||||
throw new Error("expected video_generate tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-1", {
|
||||
prompt: "friendly lobster surfing",
|
||||
size: "1280x720",
|
||||
});
|
||||
|
||||
expect(result.details).toMatchObject({
|
||||
aspectRatio: "16:9",
|
||||
metadata: {
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
},
|
||||
});
|
||||
expect(result.details).not.toHaveProperty("size");
|
||||
});
|
||||
|
||||
it("lists supported provider durations when advertised", async () => {
|
||||
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([
|
||||
{
|
||||
|
||||
@@ -11,7 +11,6 @@ import {
|
||||
resolveVideoGenerationMode,
|
||||
resolveVideoGenerationModeCapabilities,
|
||||
} from "../../video-generation/capabilities.js";
|
||||
import { resolveVideoGenerationSupportedDurations } from "../../video-generation/duration-support.js";
|
||||
import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js";
|
||||
import {
|
||||
generateVideo,
|
||||
@@ -327,22 +326,6 @@ function validateVideoGenerationCapabilities(params: {
|
||||
);
|
||||
}
|
||||
}
|
||||
if (
|
||||
typeof params.durationSeconds === "number" &&
|
||||
Number.isFinite(params.durationSeconds) &&
|
||||
!resolveVideoGenerationSupportedDurations({
|
||||
provider,
|
||||
model: params.model,
|
||||
inputImageCount: params.inputImageCount,
|
||||
inputVideoCount: params.inputVideoCount,
|
||||
}) &&
|
||||
typeof caps.maxDurationSeconds === "number" &&
|
||||
params.durationSeconds > caps.maxDurationSeconds
|
||||
) {
|
||||
throw new ToolInputError(
|
||||
`${provider.id} supports at most ${caps.maxDurationSeconds} seconds per video.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function formatIgnoredVideoGenerationOverride(override: VideoGenerationIgnoredOverride): string {
|
||||
@@ -566,6 +549,25 @@ async function executeVideoGenerationJob(params: {
|
||||
(entry): entry is number => typeof entry === "number" && Number.isFinite(entry),
|
||||
)
|
||||
: undefined;
|
||||
const normalizedSize =
|
||||
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
|
||||
? result.metadata.normalizedSize
|
||||
: undefined;
|
||||
const normalizedAspectRatio =
|
||||
typeof result.metadata?.normalizedAspectRatio === "string" &&
|
||||
result.metadata.normalizedAspectRatio.trim()
|
||||
? result.metadata.normalizedAspectRatio
|
||||
: undefined;
|
||||
const normalizedResolution =
|
||||
typeof result.metadata?.normalizedResolution === "string" &&
|
||||
result.metadata.normalizedResolution.trim()
|
||||
? result.metadata.normalizedResolution
|
||||
: undefined;
|
||||
const sizeTranslatedToAspectRatio =
|
||||
!normalizedSize &&
|
||||
typeof result.metadata?.requestedSize === "string" &&
|
||||
result.metadata.requestedSize === params.size &&
|
||||
Boolean(normalizedAspectRatio);
|
||||
const lines = [
|
||||
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
|
||||
...(warning ? [`Warning: ${warning}`] : []),
|
||||
@@ -629,12 +631,15 @@ async function executeVideoGenerationJob(params: {
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
...(!ignoredOverrideKeys.has("size") && params.size ? { size: params.size } : {}),
|
||||
...(!ignoredOverrideKeys.has("aspectRatio") && params.aspectRatio
|
||||
? { aspectRatio: params.aspectRatio }
|
||||
...(normalizedSize ||
|
||||
(!ignoredOverrideKeys.has("size") && params.size && !sizeTranslatedToAspectRatio)
|
||||
? { size: normalizedSize ?? params.size }
|
||||
: {}),
|
||||
...(!ignoredOverrideKeys.has("resolution") && params.resolution
|
||||
? { resolution: params.resolution }
|
||||
...(normalizedAspectRatio || (!ignoredOverrideKeys.has("aspectRatio") && params.aspectRatio)
|
||||
? { aspectRatio: normalizedAspectRatio ?? params.aspectRatio }
|
||||
: {}),
|
||||
...(normalizedResolution || (!ignoredOverrideKeys.has("resolution") && params.resolution)
|
||||
? { resolution: normalizedResolution ?? params.resolution }
|
||||
: {}),
|
||||
...(typeof normalizedDurationSeconds === "number"
|
||||
? { durationSeconds: normalizedDurationSeconds }
|
||||
|
||||
@@ -60,5 +60,6 @@ describe("generated base config schema", () => {
|
||||
expect(agentDefaultsProperties?.videoGenerationModel).toBeDefined();
|
||||
expect(uiHints["agents.defaults.videoGenerationModel.primary"]).toBeDefined();
|
||||
expect(uiHints["agents.defaults.videoGenerationModel.fallbacks"]).toBeDefined();
|
||||
expect(uiHints["agents.defaults.mediaGenerationAutoProviderFallback"]).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3080,6 +3080,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
},
|
||||
],
|
||||
},
|
||||
mediaGenerationAutoProviderFallback: {
|
||||
type: "boolean",
|
||||
title: "Media Generation Auto Provider Fallback",
|
||||
description:
|
||||
"When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
|
||||
},
|
||||
pdfModel: {
|
||||
anyOf: [
|
||||
{
|
||||
@@ -24995,6 +25001,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Ordered fallback music-generation models (provider/model).",
|
||||
tags: ["reliability"],
|
||||
},
|
||||
"agents.defaults.mediaGenerationAutoProviderFallback": {
|
||||
label: "Media Generation Auto Provider Fallback",
|
||||
help: "When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
|
||||
tags: ["reliability"],
|
||||
},
|
||||
"agents.defaults.pdfModel.primary": {
|
||||
label: "PDF Model",
|
||||
help: "Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",
|
||||
|
||||
@@ -1097,6 +1097,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Optional music-generation model (provider/model) used by the shared music generation capability.",
|
||||
"agents.defaults.musicGenerationModel.fallbacks":
|
||||
"Ordered fallback music-generation models (provider/model).",
|
||||
"agents.defaults.mediaGenerationAutoProviderFallback":
|
||||
"When true (default), shared image, music, and video generation automatically appends other auth-backed provider defaults after explicit primary/fallback refs. Set false to disable implicit cross-provider fallback while keeping explicit fallbacks.",
|
||||
"agents.defaults.pdfModel.primary":
|
||||
"Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",
|
||||
"agents.defaults.pdfModel.fallbacks": "Ordered fallback PDF models (provider/model).",
|
||||
|
||||
@@ -500,6 +500,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.videoGenerationModel.fallbacks": "Video Generation Model Fallbacks",
|
||||
"agents.defaults.musicGenerationModel.primary": "Music Generation Model",
|
||||
"agents.defaults.musicGenerationModel.fallbacks": "Music Generation Model Fallbacks",
|
||||
"agents.defaults.mediaGenerationAutoProviderFallback": "Media Generation Auto Provider Fallback",
|
||||
"agents.defaults.pdfModel.primary": "PDF Model",
|
||||
"agents.defaults.pdfModel.fallbacks": "PDF Model Fallbacks",
|
||||
"agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)",
|
||||
|
||||
@@ -132,6 +132,13 @@ export type AgentDefaultsConfig = {
|
||||
videoGenerationModel?: AgentModelConfig;
|
||||
/** Optional music-generation model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
|
||||
musicGenerationModel?: AgentModelConfig;
|
||||
/**
|
||||
* When true (default), shared image/music/video generation appends other
|
||||
* auth-backed provider defaults after explicit primary/fallback refs. Set to
|
||||
* false to disable implicit cross-provider fallback while keeping explicit
|
||||
* fallbacks.
|
||||
*/
|
||||
mediaGenerationAutoProviderFallback?: boolean;
|
||||
/** Optional PDF-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
|
||||
pdfModel?: AgentModelConfig;
|
||||
/** Maximum PDF file size in megabytes (default: 10). */
|
||||
|
||||
@@ -23,6 +23,14 @@ describe("agent defaults schema", () => {
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it("accepts mediaGenerationAutoProviderFallback", () => {
|
||||
expect(() =>
|
||||
AgentDefaultsSchema.parse({
|
||||
mediaGenerationAutoProviderFallback: false,
|
||||
}),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it("accepts contextInjection: always", () => {
|
||||
const result = AgentDefaultsSchema.parse({ contextInjection: "always" })!;
|
||||
expect(result.contextInjection).toBe("always");
|
||||
|
||||
@@ -23,6 +23,7 @@ export const AgentDefaultsSchema = z
|
||||
imageGenerationModel: AgentModelSchema.optional(),
|
||||
videoGenerationModel: AgentModelSchema.optional(),
|
||||
musicGenerationModel: AgentModelSchema.optional(),
|
||||
mediaGenerationAutoProviderFallback: z.boolean().optional(),
|
||||
pdfModel: AgentModelSchema.optional(),
|
||||
pdfMaxBytesMb: z.number().positive().optional(),
|
||||
pdfMaxPages: z.number().int().positive().optional(),
|
||||
|
||||
@@ -12,6 +12,7 @@ const mocks = vi.hoisted(() => {
|
||||
(providerId: string, config?: OpenClawConfig) => ImageGenerationProvider | undefined
|
||||
>(() => undefined),
|
||||
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
|
||||
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
|
||||
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
|
||||
listImageGenerationProviders: vi.fn<(config?: OpenClawConfig) => ImageGenerationProvider[]>(
|
||||
() => [],
|
||||
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
|
||||
vi.mock("../logging/subsystem.js", () => ({
|
||||
createSubsystemLogger: mocks.createSubsystemLogger,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", () => ({
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
|
||||
return {
|
||||
...actual,
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
|
||||
};
|
||||
});
|
||||
vi.mock("./model-ref.js", () => ({
|
||||
parseImageGenerationModelRef: mocks.parseImageGenerationModelRef,
|
||||
}));
|
||||
@@ -67,6 +73,8 @@ describe("image-generation runtime", () => {
|
||||
mocks.getImageGenerationProvider.mockReset();
|
||||
mocks.getProviderEnvVars.mockReset();
|
||||
mocks.getProviderEnvVars.mockReturnValue([]);
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
|
||||
mocks.isFailoverError.mockReset();
|
||||
mocks.isFailoverError.mockReturnValue(false);
|
||||
mocks.listImageGenerationProviders.mockReset();
|
||||
@@ -132,6 +140,80 @@ describe("image-generation runtime", () => {
|
||||
expect(result.ignoredOverrides).toEqual([]);
|
||||
});
|
||||
|
||||
it("auto-detects and falls through to another configured image-generation provider by default", async () => {
|
||||
mocks.getImageGenerationProvider.mockImplementation((providerId: string) => {
|
||||
if (providerId === "openai") {
|
||||
return {
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: true },
|
||||
},
|
||||
isConfigured: () => true,
|
||||
async generateImage() {
|
||||
throw new Error("OpenAI API key missing");
|
||||
},
|
||||
};
|
||||
}
|
||||
if (providerId === "google") {
|
||||
return {
|
||||
id: "google",
|
||||
defaultModel: "gemini-3.1-flash-image-preview",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: true },
|
||||
},
|
||||
isConfigured: () => true,
|
||||
async generateImage() {
|
||||
return {
|
||||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
model: "gemini-3.1-flash-image-preview",
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
mocks.listImageGenerationProviders.mockReturnValue([
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: true },
|
||||
},
|
||||
isConfigured: () => true,
|
||||
generateImage: async () => ({ images: [] }),
|
||||
},
|
||||
{
|
||||
id: "google",
|
||||
defaultModel: "gemini-3.1-flash-image-preview",
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: true },
|
||||
},
|
||||
isConfigured: () => true,
|
||||
generateImage: async () => ({ images: [] }),
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await generateImage({
|
||||
cfg: {} as OpenClawConfig,
|
||||
prompt: "draw a cat",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("google");
|
||||
expect(result.model).toBe("gemini-3.1-flash-image-preview");
|
||||
expect(result.attempts).toEqual([
|
||||
{
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
error: "OpenAI API key missing",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("drops unsupported provider geometry overrides and reports them", async () => {
|
||||
let seenRequest:
|
||||
| {
|
||||
@@ -196,6 +278,71 @@ describe("image-generation runtime", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("maps requested size to the closest supported fallback geometry", async () => {
|
||||
let seenRequest:
|
||||
| {
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: string;
|
||||
}
|
||||
| undefined;
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue("minimax/image-01");
|
||||
mocks.getImageGenerationProvider.mockReturnValue({
|
||||
id: "minimax",
|
||||
capabilities: {
|
||||
generate: {
|
||||
supportsSize: false,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: false,
|
||||
},
|
||||
edit: {
|
||||
enabled: true,
|
||||
supportsSize: false,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: false,
|
||||
},
|
||||
geometry: {
|
||||
aspectRatios: ["1:1", "16:9"],
|
||||
},
|
||||
},
|
||||
async generateImage(req) {
|
||||
seenRequest = {
|
||||
size: req.size,
|
||||
aspectRatio: req.aspectRatio,
|
||||
resolution: req.resolution,
|
||||
};
|
||||
return {
|
||||
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
|
||||
model: "image-01",
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await generateImage({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: { primary: "minimax/image-01" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
prompt: "draw a cat",
|
||||
size: "1280x720",
|
||||
});
|
||||
|
||||
expect(seenRequest).toEqual({
|
||||
size: undefined,
|
||||
aspectRatio: "16:9",
|
||||
resolution: undefined,
|
||||
});
|
||||
expect(result.ignoredOverrides).toEqual([]);
|
||||
expect(result.metadata).toMatchObject({
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
aspectRatioDerivedFromSize: "16:9",
|
||||
});
|
||||
});
|
||||
|
||||
it("lists runtime image-generation providers through the provider registry", () => {
|
||||
const providers: ImageGenerationProvider[] = [
|
||||
{
|
||||
@@ -232,6 +379,7 @@ describe("image-generation runtime", () => {
|
||||
{
|
||||
id: "vision-one",
|
||||
defaultModel: "paint-v1",
|
||||
isConfigured: () => false,
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: false },
|
||||
@@ -243,6 +391,7 @@ describe("image-generation runtime", () => {
|
||||
{
|
||||
id: "vision-two",
|
||||
defaultModel: "paint-v2",
|
||||
isConfigured: () => false,
|
||||
capabilities: {
|
||||
generate: {},
|
||||
edit: { enabled: false },
|
||||
|
||||
@@ -5,6 +5,10 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import {
|
||||
buildNoCapabilityModelConfiguredMessage,
|
||||
deriveAspectRatioFromSize,
|
||||
resolveClosestAspectRatio,
|
||||
resolveClosestResolution,
|
||||
resolveClosestSize,
|
||||
resolveCapabilityModelCandidates,
|
||||
throwCapabilityGenerationFailure,
|
||||
} from "../media-generation/runtime-shared.js";
|
||||
@@ -71,29 +75,79 @@ function resolveProviderImageGenerationOverrides(params: {
|
||||
let aspectRatio = params.aspectRatio;
|
||||
let resolution = params.resolution;
|
||||
|
||||
if (
|
||||
size &&
|
||||
(!modeCaps.supportsSize ||
|
||||
((geometry?.sizes?.length ?? 0) > 0 && !geometry?.sizes?.includes(size)))
|
||||
) {
|
||||
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
|
||||
size = resolveClosestSize({
|
||||
requestedSize: size,
|
||||
supportedSizes: geometry?.sizes,
|
||||
});
|
||||
}
|
||||
|
||||
if (!modeCaps.supportsSize && size) {
|
||||
let translated = false;
|
||||
if (modeCaps.supportsAspectRatio) {
|
||||
const normalizedAspectRatio = resolveClosestAspectRatio({
|
||||
requestedAspectRatio: aspectRatio,
|
||||
requestedSize: size,
|
||||
supportedAspectRatios: geometry?.aspectRatios,
|
||||
});
|
||||
if (normalizedAspectRatio) {
|
||||
aspectRatio = normalizedAspectRatio;
|
||||
translated = true;
|
||||
}
|
||||
}
|
||||
if (!translated) {
|
||||
ignoredOverrides.push({ key: "size", value: size });
|
||||
}
|
||||
size = undefined;
|
||||
}
|
||||
|
||||
if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) {
|
||||
aspectRatio = resolveClosestAspectRatio({
|
||||
requestedAspectRatio: aspectRatio,
|
||||
requestedSize: size,
|
||||
supportedAspectRatios: geometry?.aspectRatios,
|
||||
});
|
||||
} else if (!modeCaps.supportsAspectRatio && aspectRatio) {
|
||||
const derivedSize =
|
||||
modeCaps.supportsSize && !size
|
||||
? resolveClosestSize({
|
||||
requestedSize: params.size,
|
||||
requestedAspectRatio: aspectRatio,
|
||||
supportedSizes: geometry?.sizes,
|
||||
})
|
||||
: undefined;
|
||||
let translated = false;
|
||||
if (derivedSize) {
|
||||
size = derivedSize;
|
||||
translated = true;
|
||||
}
|
||||
if (!translated) {
|
||||
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
|
||||
}
|
||||
aspectRatio = undefined;
|
||||
}
|
||||
|
||||
if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) {
|
||||
resolution = resolveClosestResolution({
|
||||
requestedResolution: resolution,
|
||||
supportedResolutions: geometry?.resolutions,
|
||||
});
|
||||
} else if (!modeCaps.supportsResolution && resolution) {
|
||||
ignoredOverrides.push({ key: "resolution", value: resolution });
|
||||
resolution = undefined;
|
||||
}
|
||||
|
||||
if (size && !modeCaps.supportsSize) {
|
||||
ignoredOverrides.push({ key: "size", value: size });
|
||||
size = undefined;
|
||||
}
|
||||
|
||||
if (
|
||||
aspectRatio &&
|
||||
(!modeCaps.supportsAspectRatio ||
|
||||
((geometry?.aspectRatios?.length ?? 0) > 0 && !geometry?.aspectRatios?.includes(aspectRatio)))
|
||||
) {
|
||||
if (aspectRatio && !modeCaps.supportsAspectRatio) {
|
||||
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
|
||||
aspectRatio = undefined;
|
||||
}
|
||||
|
||||
if (
|
||||
resolution &&
|
||||
(!modeCaps.supportsResolution ||
|
||||
((geometry?.resolutions?.length ?? 0) > 0 && !geometry?.resolutions?.includes(resolution)))
|
||||
) {
|
||||
if (resolution && !modeCaps.supportsResolution) {
|
||||
ignoredOverrides.push({ key: "resolution", value: resolution });
|
||||
resolution = undefined;
|
||||
}
|
||||
@@ -114,6 +168,8 @@ export async function generateImage(
|
||||
modelConfig: params.cfg.agents?.defaults?.imageGenerationModel,
|
||||
modelOverride: params.modelOverride,
|
||||
parseModelRef: parseImageGenerationModelRef,
|
||||
agentDir: params.agentDir,
|
||||
listProviders: listImageGenerationProviders,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg));
|
||||
@@ -164,7 +220,33 @@ export async function generateImage(
|
||||
provider: candidate.provider,
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
metadata: result.metadata,
|
||||
metadata: {
|
||||
...result.metadata,
|
||||
...(params.size && sanitized.size && params.size !== sanitized.size
|
||||
? { requestedSize: params.size, normalizedSize: sanitized.size }
|
||||
: {}),
|
||||
...((params.aspectRatio &&
|
||||
sanitized.aspectRatio &&
|
||||
params.aspectRatio !== sanitized.aspectRatio) ||
|
||||
(!params.aspectRatio && params.size && sanitized.aspectRatio)
|
||||
? {
|
||||
...(params.size ? { requestedSize: params.size } : {}),
|
||||
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
|
||||
normalizedAspectRatio: sanitized.aspectRatio,
|
||||
...(params.size
|
||||
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
|
||||
: {}),
|
||||
}
|
||||
: {}),
|
||||
...(params.resolution &&
|
||||
sanitized.resolution &&
|
||||
params.resolution !== sanitized.resolution
|
||||
? {
|
||||
requestedResolution: params.resolution,
|
||||
normalizedResolution: sanitized.resolution,
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
ignoredOverrides: sanitized.ignoredOverrides,
|
||||
};
|
||||
} catch (err) {
|
||||
|
||||
161
src/media-generation/runtime-shared.test.ts
Normal file
161
src/media-generation/runtime-shared.test.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
deriveAspectRatioFromSize,
|
||||
normalizeDurationToClosestMax,
|
||||
resolveCapabilityModelCandidates,
|
||||
resolveClosestAspectRatio,
|
||||
resolveClosestResolution,
|
||||
resolveClosestSize,
|
||||
} from "./runtime-shared.js";
|
||||
|
||||
function parseModelRef(raw?: string) {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0 || slash === trimmed.length - 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: trimmed.slice(0, slash),
|
||||
model: trimmed.slice(slash + 1),
|
||||
};
|
||||
}
|
||||
|
||||
describe("media-generation runtime shared candidates", () => {
|
||||
it("appends auth-backed provider defaults after explicit refs by default", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai/gpt-5.4",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const candidates = resolveCapabilityModelCandidates({
|
||||
cfg,
|
||||
modelConfig: {
|
||||
primary: "google/gemini-3.1-flash-image-preview",
|
||||
fallbacks: ["fal/fal-ai/flux/dev"],
|
||||
},
|
||||
parseModelRef,
|
||||
listProviders: () => [
|
||||
{
|
||||
id: "google",
|
||||
defaultModel: "gemini-3.1-flash-image-preview",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
{
|
||||
id: "minimax",
|
||||
defaultModel: "image-01",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(candidates).toEqual([
|
||||
{ provider: "google", model: "gemini-3.1-flash-image-preview" },
|
||||
{ provider: "fal", model: "fal-ai/flux/dev" },
|
||||
{ provider: "openai", model: "gpt-image-1" },
|
||||
{ provider: "minimax", model: "image-01" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("auto-detects auth-backed provider defaults when no explicit media model is configured", () => {
|
||||
const candidates = resolveCapabilityModelCandidates({
|
||||
cfg: {} as OpenClawConfig,
|
||||
modelConfig: undefined,
|
||||
parseModelRef,
|
||||
listProviders: () => [
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
{
|
||||
id: "fal",
|
||||
defaultModel: "fal-ai/flux/dev",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(candidates).toEqual([
|
||||
{ provider: "openai", model: "gpt-image-1" },
|
||||
{ provider: "fal", model: "fal-ai/flux/dev" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("disables implicit provider expansion when mediaGenerationAutoProviderFallback=false", () => {
|
||||
const candidates = resolveCapabilityModelCandidates({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
mediaGenerationAutoProviderFallback: false,
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
modelConfig: {
|
||||
primary: "google/gemini-3.1-flash-image-preview",
|
||||
},
|
||||
parseModelRef,
|
||||
listProviders: () => [
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "gpt-image-1",
|
||||
isConfigured: () => true,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(candidates).toEqual([{ provider: "google", model: "gemini-3.1-flash-image-preview" }]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("media-generation runtime shared normalization", () => {
|
||||
it("derives reduced aspect ratios from size strings", () => {
|
||||
expect(deriveAspectRatioFromSize("1280x720")).toBe("16:9");
|
||||
expect(deriveAspectRatioFromSize("1024x1536")).toBe("2:3");
|
||||
});
|
||||
|
||||
it("maps unsupported sizes to the closest supported size", () => {
|
||||
expect(
|
||||
resolveClosestSize({
|
||||
requestedSize: "1792x1024",
|
||||
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
|
||||
}),
|
||||
).toBe("1536x1024");
|
||||
});
|
||||
|
||||
it("maps unsupported aspect ratios to the closest supported aspect ratio", () => {
|
||||
expect(
|
||||
resolveClosestAspectRatio({
|
||||
requestedAspectRatio: "17:10",
|
||||
supportedAspectRatios: ["1:1", "4:3", "16:9"],
|
||||
}),
|
||||
).toBe("16:9");
|
||||
});
|
||||
|
||||
it("maps unsupported resolutions to the closest supported resolution", () => {
|
||||
expect(
|
||||
resolveClosestResolution({
|
||||
requestedResolution: "2K",
|
||||
supportedResolutions: ["1K", "4K"],
|
||||
}),
|
||||
).toBe("1K");
|
||||
});
|
||||
|
||||
it("clamps durations to the closest supported max", () => {
|
||||
expect(normalizeDurationToClosestMax(12, 8)).toBe(8);
|
||||
expect(normalizeDurationToClosestMax(6, 8)).toBe(6);
|
||||
});
|
||||
});
|
||||
@@ -1,3 +1,7 @@
|
||||
import { listProfilesForProvider } from "../agents/auth-profiles.js";
|
||||
import { ensureAuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import { DEFAULT_PROVIDER } from "../agents/defaults.js";
|
||||
import { resolveEnvApiKey } from "../agents/model-auth-env.js";
|
||||
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
@@ -12,11 +16,110 @@ export type ParsedProviderModelRef = {
|
||||
model: string;
|
||||
};
|
||||
|
||||
const IMAGE_RESOLUTION_ORDER = ["1K", "2K", "4K"] as const;
|
||||
|
||||
type CapabilityProviderCandidate = {
|
||||
id: string;
|
||||
defaultModel?: string | null;
|
||||
isConfigured?: (ctx: { cfg?: OpenClawConfig; agentDir?: string }) => boolean;
|
||||
};
|
||||
|
||||
type ParsedAspectRatio = {
|
||||
width: number;
|
||||
height: number;
|
||||
value: number;
|
||||
};
|
||||
|
||||
type ParsedSize = {
|
||||
width: number;
|
||||
height: number;
|
||||
aspectRatio: number;
|
||||
area: number;
|
||||
};
|
||||
|
||||
function resolveCurrentDefaultProviderId(cfg?: OpenClawConfig): string {
|
||||
const configured = resolveAgentModelPrimaryValue(cfg?.agents?.defaults?.model);
|
||||
const trimmed = configured?.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_PROVIDER;
|
||||
}
|
||||
const slash = trimmed.indexOf("/");
|
||||
if (slash <= 0) {
|
||||
return DEFAULT_PROVIDER;
|
||||
}
|
||||
const provider = trimmed.slice(0, slash).trim();
|
||||
return provider || DEFAULT_PROVIDER;
|
||||
}
|
||||
|
||||
function isCapabilityProviderConfigured(params: {
|
||||
provider: CapabilityProviderCandidate;
|
||||
cfg?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
}): boolean {
|
||||
if (params.provider.isConfigured) {
|
||||
return params.provider.isConfigured({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
});
|
||||
}
|
||||
if (resolveEnvApiKey(params.provider.id)?.apiKey) {
|
||||
return true;
|
||||
}
|
||||
const agentDir = params.agentDir?.trim();
|
||||
if (!agentDir) {
|
||||
return false;
|
||||
}
|
||||
const store = ensureAuthProfileStore(agentDir, {
|
||||
allowKeychainPrompt: false,
|
||||
});
|
||||
return listProfilesForProvider(store, params.provider.id).length > 0;
|
||||
}
|
||||
|
||||
function resolveAutoCapabilityFallbackRefs(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
listProviders: (cfg?: OpenClawConfig) => CapabilityProviderCandidate[];
|
||||
}): string[] {
|
||||
const providerDefaults = new Map<string, string>();
|
||||
for (const provider of params.listProviders(params.cfg)) {
|
||||
const providerId = provider.id.trim();
|
||||
const modelId = provider.defaultModel?.trim();
|
||||
if (
|
||||
!providerId ||
|
||||
!modelId ||
|
||||
providerDefaults.has(providerId) ||
|
||||
!isCapabilityProviderConfigured({
|
||||
provider,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
})
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
providerDefaults.set(providerId, `${providerId}/${modelId}`);
|
||||
}
|
||||
|
||||
const defaultProvider = resolveCurrentDefaultProviderId(params.cfg);
|
||||
const orderedProviders = [
|
||||
defaultProvider,
|
||||
...[...providerDefaults.keys()]
|
||||
.filter((providerId) => providerId !== defaultProvider)
|
||||
.toSorted(),
|
||||
];
|
||||
return orderedProviders.flatMap((providerId) => {
|
||||
const ref = providerDefaults.get(providerId);
|
||||
return ref ? [ref] : [];
|
||||
});
|
||||
}
|
||||
|
||||
export function resolveCapabilityModelCandidates(params: {
|
||||
cfg: OpenClawConfig;
|
||||
modelConfig: AgentModelConfig | undefined;
|
||||
modelOverride?: string;
|
||||
parseModelRef: (raw: string | undefined) => ParsedProviderModelRef | null;
|
||||
agentDir?: string;
|
||||
listProviders?: (cfg?: OpenClawConfig) => CapabilityProviderCandidate[];
|
||||
autoProviderFallback?: boolean;
|
||||
}): ParsedProviderModelRef[] {
|
||||
const candidates: ParsedProviderModelRef[] = [];
|
||||
const seen = new Set<string>();
|
||||
@@ -38,9 +141,237 @@ export function resolveCapabilityModelCandidates(params: {
|
||||
for (const fallback of resolveAgentModelFallbackValues(params.modelConfig)) {
|
||||
add(fallback);
|
||||
}
|
||||
const autoProviderFallbackEnabled =
|
||||
params.autoProviderFallback ??
|
||||
params.cfg.agents?.defaults?.mediaGenerationAutoProviderFallback !== false;
|
||||
if (autoProviderFallbackEnabled && params.listProviders) {
|
||||
for (const candidate of resolveAutoCapabilityFallbackRefs({
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
listProviders: params.listProviders,
|
||||
})) {
|
||||
add(candidate);
|
||||
}
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function compareScores(
|
||||
next: { primary: number; secondary: number; tertiary: string },
|
||||
best: { primary: number; secondary: number; tertiary: string } | null,
|
||||
): boolean {
|
||||
if (!best) {
|
||||
return true;
|
||||
}
|
||||
if (next.primary !== best.primary) {
|
||||
return next.primary < best.primary;
|
||||
}
|
||||
if (next.secondary !== best.secondary) {
|
||||
return next.secondary < best.secondary;
|
||||
}
|
||||
return next.tertiary.localeCompare(best.tertiary) < 0;
|
||||
}
|
||||
|
||||
function parseAspectRatioValue(raw?: string | null): ParsedAspectRatio | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const match = /^(\d+(?:\.\d+)?)\s*:\s*(\d+(?:\.\d+)?)$/.exec(trimmed);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
const width = Number(match[1]);
|
||||
const height = Number(match[2]);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
width,
|
||||
height,
|
||||
value: width / height,
|
||||
};
|
||||
}
|
||||
|
||||
function parseSizeValue(raw?: string | null): ParsedSize | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const match = /^(\d+)\s*x\s*(\d+)$/i.exec(trimmed);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
const width = Number(match[1]);
|
||||
const height = Number(match[2]);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
width,
|
||||
height,
|
||||
aspectRatio: width / height,
|
||||
area: width * height,
|
||||
};
|
||||
}
|
||||
|
||||
function greatestCommonDivisor(a: number, b: number): number {
|
||||
let left = Math.abs(a);
|
||||
let right = Math.abs(b);
|
||||
while (right !== 0) {
|
||||
const next = left % right;
|
||||
left = right;
|
||||
right = next;
|
||||
}
|
||||
return left || 1;
|
||||
}
|
||||
|
||||
export function deriveAspectRatioFromSize(size?: string): string | undefined {
|
||||
const parsed = parseSizeValue(size);
|
||||
if (!parsed) {
|
||||
return undefined;
|
||||
}
|
||||
const divisor = greatestCommonDivisor(parsed.width, parsed.height);
|
||||
return `${parsed.width / divisor}:${parsed.height / divisor}`;
|
||||
}
|
||||
|
||||
export function resolveClosestAspectRatio(params: {
|
||||
requestedAspectRatio?: string;
|
||||
requestedSize?: string;
|
||||
supportedAspectRatios?: readonly string[];
|
||||
}): string | undefined {
|
||||
const supported = params.supportedAspectRatios?.filter((entry) => entry.trim().length > 0) ?? [];
|
||||
if (supported.length === 0) {
|
||||
return params.requestedAspectRatio ?? deriveAspectRatioFromSize(params.requestedSize);
|
||||
}
|
||||
if (params.requestedAspectRatio && supported.includes(params.requestedAspectRatio)) {
|
||||
return params.requestedAspectRatio;
|
||||
}
|
||||
const requested =
|
||||
parseAspectRatioValue(params.requestedAspectRatio) ??
|
||||
parseAspectRatioValue(deriveAspectRatioFromSize(params.requestedSize));
|
||||
if (!requested) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let bestValue: string | undefined;
|
||||
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
|
||||
for (const candidate of supported) {
|
||||
const parsed = parseAspectRatioValue(candidate);
|
||||
if (!parsed) {
|
||||
continue;
|
||||
}
|
||||
const score = {
|
||||
primary: Math.abs(Math.log(parsed.value / requested.value)),
|
||||
secondary: Math.abs(parsed.width * requested.height - requested.width * parsed.height),
|
||||
tertiary: candidate,
|
||||
};
|
||||
if (compareScores(score, bestScore)) {
|
||||
bestValue = candidate;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
return bestValue;
|
||||
}
|
||||
|
||||
export function resolveClosestSize(params: {
|
||||
requestedSize?: string;
|
||||
requestedAspectRatio?: string;
|
||||
supportedSizes?: readonly string[];
|
||||
}): string | undefined {
|
||||
const supported = params.supportedSizes?.filter((entry) => entry.trim().length > 0) ?? [];
|
||||
if (supported.length === 0) {
|
||||
return params.requestedSize;
|
||||
}
|
||||
if (params.requestedSize && supported.includes(params.requestedSize)) {
|
||||
return params.requestedSize;
|
||||
}
|
||||
const requested = parseSizeValue(params.requestedSize);
|
||||
const requestedAspectRatio = parseAspectRatioValue(params.requestedAspectRatio);
|
||||
if (!requested && !requestedAspectRatio) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let bestValue: string | undefined;
|
||||
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
|
||||
for (const candidate of supported) {
|
||||
const parsed = parseSizeValue(candidate);
|
||||
if (!parsed) {
|
||||
continue;
|
||||
}
|
||||
const score = {
|
||||
primary: Math.abs(
|
||||
Math.log(parsed.aspectRatio / (requested?.aspectRatio ?? requestedAspectRatio!.value)),
|
||||
),
|
||||
secondary: requested ? Math.abs(Math.log(parsed.area / requested.area)) : parsed.area,
|
||||
tertiary: candidate,
|
||||
};
|
||||
if (compareScores(score, bestScore)) {
|
||||
bestValue = candidate;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
return bestValue;
|
||||
}
|
||||
|
||||
export function resolveClosestResolution<TResolution extends string>(params: {
|
||||
requestedResolution?: TResolution;
|
||||
supportedResolutions?: readonly TResolution[];
|
||||
order?: readonly TResolution[];
|
||||
}): TResolution | undefined {
|
||||
const supported = params.supportedResolutions?.filter((entry) => entry.trim().length > 0) ?? [];
|
||||
if (supported.length === 0) {
|
||||
return params.requestedResolution;
|
||||
}
|
||||
if (params.requestedResolution && supported.includes(params.requestedResolution)) {
|
||||
return params.requestedResolution;
|
||||
}
|
||||
const order = params.order ?? (IMAGE_RESOLUTION_ORDER as readonly TResolution[]);
|
||||
const requestedIndex = params.requestedResolution
|
||||
? order.indexOf(params.requestedResolution)
|
||||
: -1;
|
||||
if (requestedIndex < 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let bestValue: TResolution | undefined;
|
||||
let bestScore: { primary: number; secondary: number; tertiary: string } | null = null;
|
||||
for (const candidate of supported) {
|
||||
const candidateIndex = order.indexOf(candidate);
|
||||
if (candidateIndex < 0) {
|
||||
continue;
|
||||
}
|
||||
const score = {
|
||||
primary: Math.abs(candidateIndex - requestedIndex),
|
||||
secondary: candidateIndex,
|
||||
tertiary: candidate,
|
||||
};
|
||||
if (compareScores(score, bestScore)) {
|
||||
bestValue = candidate;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
return bestValue;
|
||||
}
|
||||
|
||||
export function normalizeDurationToClosestMax(
|
||||
durationSeconds?: number,
|
||||
maxDurationSeconds?: number,
|
||||
) {
|
||||
if (typeof durationSeconds !== "number" || !Number.isFinite(durationSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
const rounded = Math.max(1, Math.round(durationSeconds));
|
||||
if (
|
||||
typeof maxDurationSeconds !== "number" ||
|
||||
!Number.isFinite(maxDurationSeconds) ||
|
||||
maxDurationSeconds <= 0
|
||||
) {
|
||||
return rounded;
|
||||
}
|
||||
return Math.min(rounded, Math.max(1, Math.round(maxDurationSeconds)));
|
||||
}
|
||||
|
||||
export function throwCapabilityGenerationFailure(params: {
|
||||
capabilityLabel: string;
|
||||
attempts: FallbackAttempt[];
|
||||
|
||||
@@ -12,6 +12,7 @@ const mocks = vi.hoisted(() => {
|
||||
(providerId: string, config?: OpenClawConfig) => MusicGenerationProvider | undefined
|
||||
>(() => undefined),
|
||||
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
|
||||
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
|
||||
isFailoverError: vi.fn<(err: unknown) => boolean>(() => false),
|
||||
listMusicGenerationProviders: vi.fn<(config?: OpenClawConfig) => MusicGenerationProvider[]>(
|
||||
() => [],
|
||||
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
|
||||
vi.mock("../logging/subsystem.js", () => ({
|
||||
createSubsystemLogger: mocks.createSubsystemLogger,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", () => ({
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
|
||||
return {
|
||||
...actual,
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
|
||||
};
|
||||
});
|
||||
vi.mock("./model-ref.js", () => ({
|
||||
parseMusicGenerationModelRef: mocks.parseMusicGenerationModelRef,
|
||||
}));
|
||||
@@ -67,6 +73,8 @@ describe("music-generation runtime", () => {
|
||||
mocks.getMusicGenerationProvider.mockReset();
|
||||
mocks.getProviderEnvVars.mockReset();
|
||||
mocks.getProviderEnvVars.mockReturnValue([]);
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
|
||||
mocks.isFailoverError.mockReset();
|
||||
mocks.isFailoverError.mockReturnValue(false);
|
||||
mocks.listMusicGenerationProviders.mockReset();
|
||||
@@ -129,6 +137,68 @@ describe("music-generation runtime", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("auto-detects and falls through to another configured music-generation provider by default", async () => {
|
||||
mocks.getMusicGenerationProvider.mockImplementation((providerId: string) => {
|
||||
if (providerId === "google") {
|
||||
return {
|
||||
id: "google",
|
||||
defaultModel: "lyria-3-clip-preview",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
async generateMusic() {
|
||||
throw new Error("Google music generation response missing audio data");
|
||||
},
|
||||
};
|
||||
}
|
||||
if (providerId === "minimax") {
|
||||
return {
|
||||
id: "minimax",
|
||||
defaultModel: "music-2.5+",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
async generateMusic() {
|
||||
return {
|
||||
tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }],
|
||||
model: "music-2.5+",
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
mocks.listMusicGenerationProviders.mockReturnValue([
|
||||
{
|
||||
id: "google",
|
||||
defaultModel: "lyria-3-clip-preview",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
generateMusic: async () => ({ tracks: [] }),
|
||||
},
|
||||
{
|
||||
id: "minimax",
|
||||
defaultModel: "music-2.5+",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
generateMusic: async () => ({ tracks: [] }),
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await generateMusic({
|
||||
cfg: {} as OpenClawConfig,
|
||||
prompt: "play a synth line",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("minimax");
|
||||
expect(result.model).toBe("music-2.5+");
|
||||
expect(result.attempts).toEqual([
|
||||
{
|
||||
provider: "google",
|
||||
model: "lyria-3-clip-preview",
|
||||
error: "Google music generation response missing audio data",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("lists runtime music-generation providers through the provider registry", () => {
|
||||
const providers: MusicGenerationProvider[] = [
|
||||
{
|
||||
@@ -285,4 +355,52 @@ describe("music-generation runtime", () => {
|
||||
{ key: "format", value: "mp3" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("normalizes requested durations to the closest supported max duration", async () => {
|
||||
let seenRequest:
|
||||
| {
|
||||
durationSeconds?: number;
|
||||
}
|
||||
| undefined;
|
||||
mocks.resolveAgentModelPrimaryValue.mockReturnValue("minimax/music-2.5+");
|
||||
mocks.getMusicGenerationProvider.mockReturnValue({
|
||||
id: "minimax",
|
||||
capabilities: {
|
||||
generate: {
|
||||
supportsDuration: true,
|
||||
maxDurationSeconds: 30,
|
||||
},
|
||||
},
|
||||
generateMusic: async (req) => {
|
||||
seenRequest = {
|
||||
durationSeconds: req.durationSeconds,
|
||||
};
|
||||
return {
|
||||
tracks: [{ buffer: Buffer.from("mp3-bytes"), mimeType: "audio/mpeg" }],
|
||||
model: "music-2.5+",
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const result = await generateMusic({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
musicGenerationModel: { primary: "minimax/music-2.5+" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
prompt: "energetic arcade anthem",
|
||||
durationSeconds: 45,
|
||||
});
|
||||
|
||||
expect(seenRequest).toEqual({
|
||||
durationSeconds: 30,
|
||||
});
|
||||
expect(result.ignoredOverrides).toEqual([]);
|
||||
expect(result.metadata).toMatchObject({
|
||||
requestedDurationSeconds: 45,
|
||||
normalizedDurationSeconds: 30,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import {
|
||||
buildNoCapabilityModelConfiguredMessage,
|
||||
normalizeDurationToClosestMax,
|
||||
resolveCapabilityModelCandidates,
|
||||
throwCapabilityGenerationFailure,
|
||||
} from "../media-generation/runtime-shared.js";
|
||||
@@ -90,6 +91,8 @@ function resolveProviderMusicGenerationOverrides(params: {
|
||||
if (typeof durationSeconds === "number" && !caps.supportsDuration) {
|
||||
ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds });
|
||||
durationSeconds = undefined;
|
||||
} else if (typeof durationSeconds === "number") {
|
||||
durationSeconds = normalizeDurationToClosestMax(durationSeconds, caps.maxDurationSeconds);
|
||||
}
|
||||
|
||||
if (format) {
|
||||
@@ -121,6 +124,8 @@ export async function generateMusic(
|
||||
modelConfig: params.cfg.agents?.defaults?.musicGenerationModel,
|
||||
modelOverride: params.modelOverride,
|
||||
parseModelRef: parseMusicGenerationModelRef,
|
||||
agentDir: params.agentDir,
|
||||
listProviders: listMusicGenerationProviders,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(
|
||||
@@ -181,7 +186,17 @@ export async function generateMusic(
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
lyrics: result.lyrics,
|
||||
metadata: result.metadata,
|
||||
metadata: {
|
||||
...result.metadata,
|
||||
...(typeof params.durationSeconds === "number" &&
|
||||
typeof sanitized.durationSeconds === "number" &&
|
||||
params.durationSeconds !== sanitized.durationSeconds
|
||||
? {
|
||||
requestedDurationSeconds: params.durationSeconds,
|
||||
normalizedDurationSeconds: sanitized.durationSeconds,
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
ignoredOverrides: sanitized.ignoredOverrides,
|
||||
};
|
||||
} catch (err) {
|
||||
|
||||
@@ -9,6 +9,7 @@ const mocks = vi.hoisted(() => {
|
||||
createSubsystemLogger: vi.fn(() => ({ debug })),
|
||||
describeFailoverError: vi.fn(),
|
||||
getProviderEnvVars: vi.fn<(providerId: string) => string[]>(() => []),
|
||||
resolveProviderAuthEnvVarCandidates: vi.fn(() => ({})),
|
||||
getVideoGenerationProvider: vi.fn<
|
||||
(providerId: string, config?: OpenClawConfig) => VideoGenerationProvider | undefined
|
||||
>(() => undefined),
|
||||
@@ -49,9 +50,14 @@ vi.mock("../config/model-input.js", () => ({
|
||||
vi.mock("../logging/subsystem.js", () => ({
|
||||
createSubsystemLogger: mocks.createSubsystemLogger,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", () => ({
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
}));
|
||||
vi.mock("../secrets/provider-env-vars.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../secrets/provider-env-vars.js")>();
|
||||
return {
|
||||
...actual,
|
||||
getProviderEnvVars: mocks.getProviderEnvVars,
|
||||
resolveProviderAuthEnvVarCandidates: mocks.resolveProviderAuthEnvVarCandidates,
|
||||
};
|
||||
});
|
||||
vi.mock("./model-ref.js", () => ({
|
||||
parseVideoGenerationModelRef: mocks.parseVideoGenerationModelRef,
|
||||
}));
|
||||
@@ -66,6 +72,8 @@ describe("video-generation runtime", () => {
|
||||
mocks.describeFailoverError.mockReset();
|
||||
mocks.getProviderEnvVars.mockReset();
|
||||
mocks.getProviderEnvVars.mockReturnValue([]);
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReset();
|
||||
mocks.resolveProviderAuthEnvVarCandidates.mockReturnValue({});
|
||||
mocks.getVideoGenerationProvider.mockReset();
|
||||
mocks.isFailoverError.mockReset();
|
||||
mocks.isFailoverError.mockReturnValue(false);
|
||||
@@ -129,6 +137,68 @@ describe("video-generation runtime", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("auto-detects and falls through to another configured video-generation provider by default", async () => {
|
||||
mocks.getVideoGenerationProvider.mockImplementation((providerId: string) => {
|
||||
if (providerId === "openai") {
|
||||
return {
|
||||
id: "openai",
|
||||
defaultModel: "sora-2",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
async generateVideo() {
|
||||
throw new Error("Your request was blocked by our moderation system.");
|
||||
},
|
||||
};
|
||||
}
|
||||
if (providerId === "runway") {
|
||||
return {
|
||||
id: "runway",
|
||||
defaultModel: "gen4.5",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
async generateVideo() {
|
||||
return {
|
||||
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
|
||||
model: "gen4.5",
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
});
|
||||
mocks.listVideoGenerationProviders.mockReturnValue([
|
||||
{
|
||||
id: "openai",
|
||||
defaultModel: "sora-2",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
generateVideo: async () => ({ videos: [] }),
|
||||
},
|
||||
{
|
||||
id: "runway",
|
||||
defaultModel: "gen4.5",
|
||||
capabilities: {},
|
||||
isConfigured: () => true,
|
||||
generateVideo: async () => ({ videos: [] }),
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await generateVideo({
|
||||
cfg: {} as OpenClawConfig,
|
||||
prompt: "animate a cat",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("runway");
|
||||
expect(result.model).toBe("gen4.5");
|
||||
expect(result.attempts).toEqual([
|
||||
{
|
||||
provider: "openai",
|
||||
model: "sora-2",
|
||||
error: "Your request was blocked by our moderation system.",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("lists runtime video-generation providers through the provider registry", () => {
|
||||
const providers: VideoGenerationProvider[] = [
|
||||
{
|
||||
@@ -303,7 +373,6 @@ describe("video-generation runtime", () => {
|
||||
} as OpenClawConfig,
|
||||
prompt: "animate a lobster",
|
||||
size: "1280x720",
|
||||
aspectRatio: "16:9",
|
||||
inputImages: [{ buffer: Buffer.from("png"), mimeType: "image/png" }],
|
||||
});
|
||||
|
||||
@@ -312,7 +381,12 @@ describe("video-generation runtime", () => {
|
||||
aspectRatio: "16:9",
|
||||
resolution: undefined,
|
||||
});
|
||||
expect(result.ignoredOverrides).toEqual([{ key: "size", value: "1280x720" }]);
|
||||
expect(result.ignoredOverrides).toEqual([]);
|
||||
expect(result.metadata).toMatchObject({
|
||||
requestedSize: "1280x720",
|
||||
normalizedAspectRatio: "16:9",
|
||||
aspectRatioDerivedFromSize: "16:9",
|
||||
});
|
||||
});
|
||||
|
||||
it("builds a generic config hint without hardcoded provider ids", async () => {
|
||||
|
||||
@@ -5,6 +5,8 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import {
|
||||
buildNoCapabilityModelConfiguredMessage,
|
||||
deriveAspectRatioFromSize,
|
||||
resolveClosestAspectRatio,
|
||||
resolveCapabilityModelCandidates,
|
||||
throwCapabilityGenerationFailure,
|
||||
} from "../media-generation/runtime-shared.js";
|
||||
@@ -96,7 +98,20 @@ function resolveProviderVideoGenerationOverrides(params: {
|
||||
}
|
||||
|
||||
if (size && !caps.supportsSize) {
|
||||
ignoredOverrides.push({ key: "size", value: size });
|
||||
let translated = false;
|
||||
if (caps.supportsAspectRatio) {
|
||||
const normalizedAspectRatio = resolveClosestAspectRatio({
|
||||
requestedAspectRatio: aspectRatio,
|
||||
requestedSize: size,
|
||||
});
|
||||
if (normalizedAspectRatio) {
|
||||
aspectRatio = normalizedAspectRatio;
|
||||
translated = true;
|
||||
}
|
||||
}
|
||||
if (!translated) {
|
||||
ignoredOverrides.push({ key: "size", value: size });
|
||||
}
|
||||
size = undefined;
|
||||
}
|
||||
|
||||
@@ -138,6 +153,8 @@ export async function generateVideo(
|
||||
modelConfig: params.cfg.agents?.defaults?.videoGenerationModel,
|
||||
modelOverride: params.modelOverride,
|
||||
parseModelRef: parseVideoGenerationModelRef,
|
||||
agentDir: params.agentDir,
|
||||
listProviders: listVideoGenerationProviders,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(buildNoVideoGenerationModelConfiguredMessage(params.cfg));
|
||||
@@ -212,17 +229,39 @@ export async function generateVideo(
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
ignoredOverrides: sanitized.ignoredOverrides,
|
||||
metadata:
|
||||
typeof requestedDurationSeconds === "number" &&
|
||||
metadata: {
|
||||
...result.metadata,
|
||||
...((params.size && sanitized.aspectRatio && params.size !== sanitized.size) ||
|
||||
(params.aspectRatio &&
|
||||
sanitized.aspectRatio &&
|
||||
params.aspectRatio !== sanitized.aspectRatio)
|
||||
? {
|
||||
...(params.size ? { requestedSize: params.size } : {}),
|
||||
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
|
||||
normalizedAspectRatio: sanitized.aspectRatio,
|
||||
...(params.size
|
||||
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
|
||||
: {}),
|
||||
}
|
||||
: {}),
|
||||
...(params.resolution &&
|
||||
sanitized.resolution &&
|
||||
params.resolution !== sanitized.resolution
|
||||
? {
|
||||
requestedResolution: params.resolution,
|
||||
normalizedResolution: sanitized.resolution,
|
||||
}
|
||||
: {}),
|
||||
...(typeof requestedDurationSeconds === "number" &&
|
||||
typeof normalizedDurationSeconds === "number" &&
|
||||
requestedDurationSeconds !== normalizedDurationSeconds
|
||||
? {
|
||||
...result.metadata,
|
||||
requestedDurationSeconds,
|
||||
normalizedDurationSeconds,
|
||||
...(supportedDurationSeconds ? { supportedDurationSeconds } : {}),
|
||||
}
|
||||
: result.metadata,
|
||||
: {}),
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
|
||||
Reference in New Issue
Block a user