refactor: share media normalization across runtimes

This commit is contained in:
Peter Steinberger
2026-04-06 23:46:31 +01:00
parent 1e7f39abdb
commit 425592cf9c
23 changed files with 751 additions and 413 deletions

View File

@@ -616,24 +616,29 @@ export function createImageGenerateTool(options?: {
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredImageGenerationOverride).join(", ")}.`
: undefined;
const normalizedSize =
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
result.normalization?.size?.applied ??
(typeof result.metadata?.normalizedSize === "string" &&
result.metadata.normalizedSize.trim()
? result.metadata.normalizedSize
: undefined;
: undefined);
const normalizedAspectRatio =
typeof result.metadata?.normalizedAspectRatio === "string" &&
result.normalization?.aspectRatio?.applied ??
(typeof result.metadata?.normalizedAspectRatio === "string" &&
result.metadata.normalizedAspectRatio.trim()
? result.metadata.normalizedAspectRatio
: undefined;
: undefined);
const normalizedResolution =
typeof result.metadata?.normalizedResolution === "string" &&
result.normalization?.resolution?.applied ??
(typeof result.metadata?.normalizedResolution === "string" &&
result.metadata.normalizedResolution.trim()
? result.metadata.normalizedResolution
: undefined;
: undefined);
const sizeTranslatedToAspectRatio =
!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === size &&
Boolean(normalizedAspectRatio);
result.normalization?.aspectRatio?.derivedFrom === "size" ||
(!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === size &&
Boolean(normalizedAspectRatio));
const savedImages = await Promise.all(
result.images.map((image) =>
@@ -694,6 +699,7 @@ export function createImageGenerateTool(options?: {
: {}),
...(filename ? { filename } : {}),
attempts: result.attempts,
...(result.normalization ? { normalization: result.normalization } : {}),
metadata: result.metadata,
...(warning ? { warning } : {}),
...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}),

View File

@@ -408,15 +408,17 @@ async function executeMusicGenerationJob(params: {
const ignoredOverrides = result.ignoredOverrides ?? [];
const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key));
const requestedDurationSeconds =
typeof result.metadata?.requestedDurationSeconds === "number" &&
result.normalization?.durationSeconds?.requested ??
(typeof result.metadata?.requestedDurationSeconds === "number" &&
Number.isFinite(result.metadata.requestedDurationSeconds)
? result.metadata.requestedDurationSeconds
: params.durationSeconds;
: params.durationSeconds);
const runtimeNormalizedDurationSeconds =
typeof result.metadata?.normalizedDurationSeconds === "number" &&
result.normalization?.durationSeconds?.applied ??
(typeof result.metadata?.normalizedDurationSeconds === "number" &&
Number.isFinite(result.metadata.normalizedDurationSeconds)
? result.metadata.normalizedDurationSeconds
: undefined;
: undefined);
const appliedDurationSeconds =
runtimeNormalizedDurationSeconds ??
(!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number"
@@ -492,6 +494,7 @@ async function executeMusicGenerationJob(params: {
: {}),
...(result.lyrics?.length ? { lyrics: result.lyrics } : {}),
attempts: result.attempts,
...(result.normalization ? { normalization: result.normalization } : {}),
metadata: result.metadata,
...(warning ? { warning } : {}),
...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}),

View File

@@ -529,10 +529,11 @@ async function executeVideoGenerationJob(params: {
),
);
const requestedDurationSeconds =
typeof result.metadata?.requestedDurationSeconds === "number" &&
result.normalization?.durationSeconds?.requested ??
(typeof result.metadata?.requestedDurationSeconds === "number" &&
Number.isFinite(result.metadata.requestedDurationSeconds)
? result.metadata.requestedDurationSeconds
: params.durationSeconds;
: params.durationSeconds);
const ignoredOverrides = result.ignoredOverrides ?? [];
const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key));
const warning =
@@ -540,34 +541,41 @@ async function executeVideoGenerationJob(params: {
? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredVideoGenerationOverride).join(", ")}.`
: undefined;
const normalizedDurationSeconds =
typeof result.metadata?.normalizedDurationSeconds === "number" &&
result.normalization?.durationSeconds?.applied ??
(typeof result.metadata?.normalizedDurationSeconds === "number" &&
Number.isFinite(result.metadata.normalizedDurationSeconds)
? result.metadata.normalizedDurationSeconds
: requestedDurationSeconds;
const supportedDurationSeconds = Array.isArray(result.metadata?.supportedDurationSeconds)
? result.metadata.supportedDurationSeconds.filter(
(entry): entry is number => typeof entry === "number" && Number.isFinite(entry),
)
: undefined;
: requestedDurationSeconds);
const supportedDurationSeconds =
result.normalization?.durationSeconds?.supportedValues ??
(Array.isArray(result.metadata?.supportedDurationSeconds)
? result.metadata.supportedDurationSeconds.filter(
(entry): entry is number => typeof entry === "number" && Number.isFinite(entry),
)
: undefined);
const normalizedSize =
typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
result.normalization?.size?.applied ??
(typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim()
? result.metadata.normalizedSize
: undefined;
: undefined);
const normalizedAspectRatio =
typeof result.metadata?.normalizedAspectRatio === "string" &&
result.normalization?.aspectRatio?.applied ??
(typeof result.metadata?.normalizedAspectRatio === "string" &&
result.metadata.normalizedAspectRatio.trim()
? result.metadata.normalizedAspectRatio
: undefined;
: undefined);
const normalizedResolution =
typeof result.metadata?.normalizedResolution === "string" &&
result.normalization?.resolution?.applied ??
(typeof result.metadata?.normalizedResolution === "string" &&
result.metadata.normalizedResolution.trim()
? result.metadata.normalizedResolution
: undefined;
: undefined);
const sizeTranslatedToAspectRatio =
!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === params.size &&
Boolean(normalizedAspectRatio);
result.normalization?.aspectRatio?.derivedFrom === "size" ||
(!normalizedSize &&
typeof result.metadata?.requestedSize === "string" &&
result.metadata.requestedSize === params.size &&
Boolean(normalizedAspectRatio));
const lines = [
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
...(warning ? [`Warning: ${warning}`] : []),
@@ -660,6 +668,7 @@ async function executeVideoGenerationJob(params: {
: {}),
...(params.filename ? { filename: params.filename } : {}),
attempts: result.attempts,
...(result.normalization ? { normalization: result.normalization } : {}),
metadata: result.metadata,
...(warning ? { warning } : {}),
...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}),

View File

@@ -0,0 +1,204 @@
import {
hasMediaNormalizationEntry,
resolveClosestAspectRatio,
resolveClosestResolution,
resolveClosestSize,
type MediaNormalizationEntry,
} from "../media-generation/runtime-shared.js";
import type {
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationProvider,
ImageGenerationResolution,
ImageGenerationSourceImage,
} from "./types.js";
export type ResolvedImageGenerationOverrides = {
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
ignoredOverrides: ImageGenerationIgnoredOverride[];
normalization?: ImageGenerationNormalization;
};
function finalizeImageNormalization(
normalization: ImageGenerationNormalization,
): ImageGenerationNormalization | undefined {
return hasMediaNormalizationEntry(normalization.size) ||
hasMediaNormalizationEntry(normalization.aspectRatio) ||
hasMediaNormalizationEntry(normalization.resolution)
? normalization
: undefined;
}
export function resolveImageGenerationOverrides(params: {
provider: ImageGenerationProvider;
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
inputImages?: ImageGenerationSourceImage[];
}): ResolvedImageGenerationOverrides {
const hasInputImages = (params.inputImages?.length ?? 0) > 0;
const modeCaps = hasInputImages
? params.provider.capabilities.edit
: params.provider.capabilities.generate;
const geometry = params.provider.capabilities.geometry;
const ignoredOverrides: ImageGenerationIgnoredOverride[] = [];
const normalization: ImageGenerationNormalization = {};
let size = params.size;
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
const normalizedSize = resolveClosestSize({
requestedSize: size,
supportedSizes: geometry?.sizes,
});
if (normalizedSize && normalizedSize !== size) {
normalization.size = {
requested: size,
applied: normalizedSize,
};
}
size = normalizedSize;
}
if (!modeCaps.supportsSize && size) {
let translated = false;
if (modeCaps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
normalization.aspectRatio = {
applied: normalizedAspectRatio,
derivedFrom: "size",
};
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
if (normalizedAspectRatio && normalizedAspectRatio !== aspectRatio) {
normalization.aspectRatio = {
requested: aspectRatio,
applied: normalizedAspectRatio,
};
}
aspectRatio = normalizedAspectRatio;
} else if (!modeCaps.supportsAspectRatio && aspectRatio) {
const derivedSize =
modeCaps.supportsSize && !size
? resolveClosestSize({
requestedSize: params.size,
requestedAspectRatio: aspectRatio,
supportedSizes: geometry?.sizes,
})
: undefined;
let translated = false;
if (derivedSize) {
size = derivedSize;
normalization.size = {
applied: derivedSize,
derivedFrom: "aspectRatio",
};
translated = true;
}
if (!translated) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
}
aspectRatio = undefined;
}
if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) {
const normalizedResolution = resolveClosestResolution({
requestedResolution: resolution,
supportedResolutions: geometry?.resolutions,
});
if (normalizedResolution && normalizedResolution !== resolution) {
normalization.resolution = {
requested: resolution,
applied: normalizedResolution,
};
}
resolution = normalizedResolution;
} else if (!modeCaps.supportsResolution && resolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (size && !modeCaps.supportsSize) {
ignoredOverrides.push({ key: "size", value: size });
size = undefined;
}
if (aspectRatio && !modeCaps.supportsAspectRatio) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
aspectRatio = undefined;
}
if (resolution && !modeCaps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (
!normalization.aspectRatio &&
aspectRatio &&
((!params.aspectRatio && params.size) || params.aspectRatio !== aspectRatio)
) {
const entry: MediaNormalizationEntry<string> = {
applied: aspectRatio,
...(params.aspectRatio ? { requested: params.aspectRatio } : {}),
...(!params.aspectRatio && params.size ? { derivedFrom: "size" } : {}),
};
normalization.aspectRatio = entry;
}
if (!normalization.size && size && params.size && params.size !== size) {
normalization.size = {
requested: params.size,
applied: size,
};
}
if (!normalization.aspectRatio && !params.aspectRatio && params.size && aspectRatio) {
normalization.aspectRatio = {
applied: aspectRatio,
derivedFrom: "size",
};
}
if (
!normalization.resolution &&
resolution &&
params.resolution &&
params.resolution !== resolution
) {
normalization.resolution = {
requested: params.resolution,
applied: resolution,
};
}
return {
size,
aspectRatio,
resolution,
ignoredOverrides,
normalization: finalizeImageNormalization(normalization),
};
}

View File

@@ -6,17 +6,16 @@ import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
deriveAspectRatioFromSize,
resolveClosestAspectRatio,
resolveClosestResolution,
resolveClosestSize,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
import { parseImageGenerationModelRef } from "./model-ref.js";
import { resolveImageGenerationOverrides } from "./normalization.js";
import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js";
import type {
GeneratedImageAsset,
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationResolution,
ImageGenerationResult,
ImageGenerationSourceImage,
@@ -42,6 +41,7 @@ export type GenerateImageRuntimeResult = {
provider: string;
model: string;
attempts: FallbackAttempt[];
normalization?: ImageGenerationNormalization;
metadata?: Record<string, unknown>;
ignoredOverrides: ImageGenerationIgnoredOverride[];
};
@@ -58,108 +58,6 @@ export function listRuntimeImageGenerationProviders(params?: { config?: OpenClaw
return listImageGenerationProviders(params?.config);
}
function resolveProviderImageGenerationOverrides(params: {
provider: NonNullable<ReturnType<typeof getImageGenerationProvider>>;
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
inputImages?: ImageGenerationSourceImage[];
}) {
const hasInputImages = (params.inputImages?.length ?? 0) > 0;
const modeCaps = hasInputImages
? params.provider.capabilities.edit
: params.provider.capabilities.generate;
const geometry = params.provider.capabilities.geometry;
const ignoredOverrides: ImageGenerationIgnoredOverride[] = [];
let size = params.size;
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
size = resolveClosestSize({
requestedSize: size,
supportedSizes: geometry?.sizes,
});
}
if (!modeCaps.supportsSize && size) {
let translated = false;
if (modeCaps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) {
aspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: geometry?.aspectRatios,
});
} else if (!modeCaps.supportsAspectRatio && aspectRatio) {
const derivedSize =
modeCaps.supportsSize && !size
? resolveClosestSize({
requestedSize: params.size,
requestedAspectRatio: aspectRatio,
supportedSizes: geometry?.sizes,
})
: undefined;
let translated = false;
if (derivedSize) {
size = derivedSize;
translated = true;
}
if (!translated) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
}
aspectRatio = undefined;
}
if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) {
resolution = resolveClosestResolution({
requestedResolution: resolution,
supportedResolutions: geometry?.resolutions,
});
} else if (!modeCaps.supportsResolution && resolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (size && !modeCaps.supportsSize) {
ignoredOverrides.push({ key: "size", value: size });
size = undefined;
}
if (aspectRatio && !modeCaps.supportsAspectRatio) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
aspectRatio = undefined;
}
if (resolution && !modeCaps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
return {
size,
aspectRatio,
resolution,
ignoredOverrides,
};
}
export async function generateImage(
params: GenerateImageParams,
): Promise<GenerateImageRuntimeResult> {
@@ -192,7 +90,7 @@ export async function generateImage(
}
try {
const sanitized = resolveProviderImageGenerationOverrides({
const sanitized = resolveImageGenerationOverrides({
provider,
size: params.size,
aspectRatio: params.aspectRatio,
@@ -220,30 +118,35 @@ export async function generateImage(
provider: candidate.provider,
model: result.model ?? candidate.model,
attempts,
normalization: sanitized.normalization,
metadata: {
...result.metadata,
...(params.size && sanitized.size && params.size !== sanitized.size
? { requestedSize: params.size, normalizedSize: sanitized.size }
: {}),
...((params.aspectRatio &&
sanitized.aspectRatio &&
params.aspectRatio !== sanitized.aspectRatio) ||
(!params.aspectRatio && params.size && sanitized.aspectRatio)
...(sanitized.normalization?.size?.requested !== undefined &&
sanitized.normalization.size.applied !== undefined
? {
...(params.size ? { requestedSize: params.size } : {}),
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
normalizedAspectRatio: sanitized.aspectRatio,
...(params.size
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
requestedSize: sanitized.normalization.size.requested,
normalizedSize: sanitized.normalization.size.applied,
}
: {}),
...(sanitized.normalization?.aspectRatio?.applied !== undefined
? {
...(sanitized.normalization.aspectRatio.requested !== undefined
? { requestedAspectRatio: sanitized.normalization.aspectRatio.requested }
: {}),
normalizedAspectRatio: sanitized.normalization.aspectRatio.applied,
...(sanitized.normalization.aspectRatio.derivedFrom === "size" && params.size
? {
requestedSize: params.size,
aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size),
}
: {}),
}
: {}),
...(params.resolution &&
sanitized.resolution &&
params.resolution !== sanitized.resolution
...(sanitized.normalization?.resolution?.requested !== undefined &&
sanitized.normalization.resolution.applied !== undefined
? {
requestedResolution: params.resolution,
normalizedResolution: sanitized.resolution,
requestedResolution: sanitized.normalization.resolution.requested,
normalizedResolution: sanitized.normalization.resolution.applied,
}
: {}),
},

View File

@@ -1,5 +1,6 @@
import type { AuthProfileStore } from "../agents/auth-profiles.js";
import type { OpenClawConfig } from "../config/config.js";
import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js";
export type GeneratedImageAsset = {
buffer: Buffer;
@@ -69,6 +70,12 @@ export type ImageGenerationGeometryCapabilities = {
resolutions?: ImageGenerationResolution[];
};
export type ImageGenerationNormalization = {
size?: MediaNormalizationEntry<string>;
aspectRatio?: MediaNormalizationEntry<string>;
resolution?: MediaNormalizationEntry<ImageGenerationResolution>;
};
export type ImageGenerationProviderCapabilities = {
generate: ImageGenerationModeCapabilities;
edit: ImageGenerationEditCapabilities;

View File

@@ -16,6 +16,27 @@ export type ParsedProviderModelRef = {
model: string;
};
export type MediaNormalizationValue = string | number | boolean;
export type MediaNormalizationEntry<TValue extends MediaNormalizationValue> = {
requested?: TValue;
applied?: TValue;
derivedFrom?: string;
supportedValues?: readonly TValue[];
};
export function hasMediaNormalizationEntry<TValue extends MediaNormalizationValue>(
entry: MediaNormalizationEntry<TValue> | undefined,
): entry is MediaNormalizationEntry<TValue> {
return Boolean(
entry &&
(entry.requested !== undefined ||
entry.applied !== undefined ||
entry.derivedFrom !== undefined ||
(entry.supportedValues?.length ?? 0) > 0),
);
}
const IMAGE_RESOLUTION_ORDER = ["1K", "2K", "4K"] as const;
type CapabilityProviderCandidate = {

View File

@@ -0,0 +1,105 @@
import {
hasMediaNormalizationEntry,
normalizeDurationToClosestMax,
} from "../media-generation/runtime-shared.js";
import { resolveMusicGenerationModeCapabilities } from "./capabilities.js";
import type {
MusicGenerationIgnoredOverride,
MusicGenerationNormalization,
MusicGenerationOutputFormat,
MusicGenerationProvider,
MusicGenerationSourceImage,
} from "./types.js";
export type ResolvedMusicGenerationOverrides = {
lyrics?: string;
instrumental?: boolean;
durationSeconds?: number;
format?: MusicGenerationOutputFormat;
ignoredOverrides: MusicGenerationIgnoredOverride[];
normalization?: MusicGenerationNormalization;
};
export function resolveMusicGenerationOverrides(params: {
provider: MusicGenerationProvider;
model: string;
lyrics?: string;
instrumental?: boolean;
durationSeconds?: number;
format?: MusicGenerationOutputFormat;
inputImages?: MusicGenerationSourceImage[];
}): ResolvedMusicGenerationOverrides {
const { capabilities: caps } = resolveMusicGenerationModeCapabilities({
provider: params.provider,
inputImageCount: params.inputImages?.length ?? 0,
});
const ignoredOverrides: MusicGenerationIgnoredOverride[] = [];
const normalization: MusicGenerationNormalization = {};
let lyrics = params.lyrics;
let instrumental = params.instrumental;
let durationSeconds = params.durationSeconds;
let format = params.format;
if (!caps) {
return {
lyrics,
instrumental,
durationSeconds,
format,
ignoredOverrides,
};
}
if (lyrics?.trim() && !caps.supportsLyrics) {
ignoredOverrides.push({ key: "lyrics", value: lyrics });
lyrics = undefined;
}
if (typeof instrumental === "boolean" && !caps.supportsInstrumental) {
ignoredOverrides.push({ key: "instrumental", value: instrumental });
instrumental = undefined;
}
if (typeof durationSeconds === "number" && !caps.supportsDuration) {
ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds });
durationSeconds = undefined;
} else if (typeof durationSeconds === "number") {
const normalizedDurationSeconds = normalizeDurationToClosestMax(
durationSeconds,
caps.maxDurationSeconds,
);
if (
typeof normalizedDurationSeconds === "number" &&
normalizedDurationSeconds !== durationSeconds
) {
normalization.durationSeconds = {
requested: durationSeconds,
applied: normalizedDurationSeconds,
};
}
durationSeconds = normalizedDurationSeconds;
}
if (format) {
const supportedFormats =
caps.supportedFormatsByModel?.[params.model] ?? caps.supportedFormats ?? [];
if (
!caps.supportsFormat ||
(supportedFormats.length > 0 && !supportedFormats.includes(format))
) {
ignoredOverrides.push({ key: "format", value: format });
format = undefined;
}
}
return {
lyrics,
instrumental,
durationSeconds,
format,
ignoredOverrides,
normalization: hasMediaNormalizationEntry(normalization.durationSeconds)
? normalization
: undefined,
};
}

View File

@@ -5,16 +5,16 @@ import type { OpenClawConfig } from "../config/config.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
normalizeDurationToClosestMax,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
import { resolveMusicGenerationModeCapabilities } from "./capabilities.js";
import { parseMusicGenerationModelRef } from "./model-ref.js";
import { resolveMusicGenerationOverrides } from "./normalization.js";
import { getMusicGenerationProvider, listMusicGenerationProviders } from "./provider-registry.js";
import type {
GeneratedMusicAsset,
MusicGenerationIgnoredOverride,
MusicGenerationNormalization,
MusicGenerationOutputFormat,
MusicGenerationResult,
MusicGenerationSourceImage,
@@ -41,6 +41,7 @@ export type GenerateMusicRuntimeResult = {
model: string;
attempts: FallbackAttempt[];
lyrics?: string[];
normalization?: MusicGenerationNormalization;
metadata?: Record<string, unknown>;
ignoredOverrides: MusicGenerationIgnoredOverride[];
};
@@ -49,73 +50,6 @@ export function listRuntimeMusicGenerationProviders(params?: { config?: OpenClaw
return listMusicGenerationProviders(params?.config);
}
function resolveProviderMusicGenerationOverrides(params: {
provider: NonNullable<ReturnType<typeof getMusicGenerationProvider>>;
model: string;
lyrics?: string;
instrumental?: boolean;
durationSeconds?: number;
format?: MusicGenerationOutputFormat;
inputImages?: MusicGenerationSourceImage[];
}) {
const { capabilities: caps } = resolveMusicGenerationModeCapabilities({
provider: params.provider,
inputImageCount: params.inputImages?.length ?? 0,
});
const ignoredOverrides: MusicGenerationIgnoredOverride[] = [];
let lyrics = params.lyrics;
let instrumental = params.instrumental;
let durationSeconds = params.durationSeconds;
let format = params.format;
if (!caps) {
return {
lyrics,
instrumental,
durationSeconds,
format,
ignoredOverrides,
};
}
if (lyrics?.trim() && !caps.supportsLyrics) {
ignoredOverrides.push({ key: "lyrics", value: lyrics });
lyrics = undefined;
}
if (typeof instrumental === "boolean" && !caps.supportsInstrumental) {
ignoredOverrides.push({ key: "instrumental", value: instrumental });
instrumental = undefined;
}
if (typeof durationSeconds === "number" && !caps.supportsDuration) {
ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds });
durationSeconds = undefined;
} else if (typeof durationSeconds === "number") {
durationSeconds = normalizeDurationToClosestMax(durationSeconds, caps.maxDurationSeconds);
}
if (format) {
const supportedFormats =
caps.supportedFormatsByModel?.[params.model] ?? caps.supportedFormats ?? [];
if (
!caps.supportsFormat ||
(supportedFormats.length > 0 && !supportedFormats.includes(format))
) {
ignoredOverrides.push({ key: "format", value: format });
format = undefined;
}
}
return {
lyrics,
instrumental,
durationSeconds,
format,
ignoredOverrides,
};
}
export async function generateMusic(
params: GenerateMusicParams,
): Promise<GenerateMusicRuntimeResult> {
@@ -155,7 +89,7 @@ export async function generateMusic(
}
try {
const sanitized = resolveProviderMusicGenerationOverrides({
const sanitized = resolveMusicGenerationOverrides({
provider,
model: candidate.model,
lyrics: params.lyrics,
@@ -186,14 +120,14 @@ export async function generateMusic(
model: result.model ?? candidate.model,
attempts,
lyrics: result.lyrics,
normalization: sanitized.normalization,
metadata: {
...result.metadata,
...(typeof params.durationSeconds === "number" &&
typeof sanitized.durationSeconds === "number" &&
params.durationSeconds !== sanitized.durationSeconds
...(sanitized.normalization?.durationSeconds?.requested !== undefined &&
sanitized.normalization.durationSeconds.applied !== undefined
? {
requestedDurationSeconds: params.durationSeconds,
normalizedDurationSeconds: sanitized.durationSeconds,
requestedDurationSeconds: sanitized.normalization.durationSeconds.requested,
normalizedDurationSeconds: sanitized.normalization.durationSeconds.applied,
}
: {}),
},

View File

@@ -1,5 +1,6 @@
import type { AuthProfileStore } from "../agents/auth-profiles.js";
import type { OpenClawConfig } from "../config/config.js";
import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js";
export type MusicGenerationOutputFormat = "mp3" | "wav";
@@ -74,6 +75,10 @@ export type MusicGenerationProviderCapabilities = MusicGenerationModeCapabilitie
edit?: MusicGenerationEditCapabilities;
};
export type MusicGenerationNormalization = {
durationSeconds?: MediaNormalizationEntry<number>;
};
export type MusicGenerationProvider = {
id: string;
aliases?: string[];

View File

@@ -0,0 +1,246 @@
import {
hasMediaNormalizationEntry,
resolveClosestAspectRatio,
resolveClosestResolution,
resolveClosestSize,
} from "../media-generation/runtime-shared.js";
import { resolveVideoGenerationModeCapabilities } from "./capabilities.js";
import {
normalizeVideoGenerationDuration,
resolveVideoGenerationSupportedDurations,
} from "./duration-support.js";
import type {
VideoGenerationIgnoredOverride,
VideoGenerationNormalization,
VideoGenerationProvider,
VideoGenerationResolution,
} from "./types.js";
export type ResolvedVideoGenerationOverrides = {
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
durationSeconds?: number;
supportedDurationSeconds?: readonly number[];
audio?: boolean;
watermark?: boolean;
ignoredOverrides: VideoGenerationIgnoredOverride[];
normalization?: VideoGenerationNormalization;
};
export function resolveVideoGenerationOverrides(params: {
provider: VideoGenerationProvider;
model: string;
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
durationSeconds?: number;
audio?: boolean;
watermark?: boolean;
inputImageCount?: number;
inputVideoCount?: number;
}): ResolvedVideoGenerationOverrides {
const { capabilities: caps } = resolveVideoGenerationModeCapabilities({
provider: params.provider,
inputImageCount: params.inputImageCount,
inputVideoCount: params.inputVideoCount,
});
const ignoredOverrides: VideoGenerationIgnoredOverride[] = [];
const normalization: VideoGenerationNormalization = {};
let size = params.size;
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
let audio = params.audio;
let watermark = params.watermark;
if (caps) {
if (size && (caps.sizes?.length ?? 0) > 0 && caps.supportsSize) {
const normalizedSize = resolveClosestSize({
requestedSize: size,
requestedAspectRatio: aspectRatio,
supportedSizes: caps.sizes,
});
if (normalizedSize && normalizedSize !== size) {
normalization.size = {
requested: size,
applied: normalizedSize,
};
}
size = normalizedSize;
}
if (!caps.supportsSize && size) {
let translated = false;
if (caps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: caps.aspectRatios,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
normalization.aspectRatio = {
applied: normalizedAspectRatio,
derivedFrom: "size",
};
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
if (aspectRatio && (caps.aspectRatios?.length ?? 0) > 0 && caps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
supportedAspectRatios: caps.aspectRatios,
});
if (normalizedAspectRatio && normalizedAspectRatio !== aspectRatio) {
normalization.aspectRatio = {
requested: aspectRatio,
applied: normalizedAspectRatio,
};
}
aspectRatio = normalizedAspectRatio;
} else if (!caps.supportsAspectRatio && aspectRatio) {
const derivedSize =
caps.supportsSize && !size
? resolveClosestSize({
requestedSize: params.size,
requestedAspectRatio: aspectRatio,
supportedSizes: caps.sizes,
})
: undefined;
if (derivedSize) {
size = derivedSize;
normalization.size = {
applied: derivedSize,
derivedFrom: "aspectRatio",
};
} else {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
}
aspectRatio = undefined;
}
if (resolution && (caps.resolutions?.length ?? 0) > 0 && caps.supportsResolution) {
const normalizedResolution = resolveClosestResolution({
requestedResolution: resolution,
supportedResolutions: caps.resolutions,
});
if (normalizedResolution && normalizedResolution !== resolution) {
normalization.resolution = {
requested: resolution,
applied: normalizedResolution,
};
}
resolution = normalizedResolution;
} else if (resolution && !caps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (typeof audio === "boolean" && !caps.supportsAudio) {
ignoredOverrides.push({ key: "audio", value: audio });
audio = undefined;
}
if (typeof watermark === "boolean" && !caps.supportsWatermark) {
ignoredOverrides.push({ key: "watermark", value: watermark });
watermark = undefined;
}
}
if (caps && size && !caps.supportsSize) {
ignoredOverrides.push({ key: "size", value: size });
size = undefined;
}
if (caps && aspectRatio && !caps.supportsAspectRatio) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
aspectRatio = undefined;
}
if (caps && resolution && !caps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (!normalization.size && size && params.size && params.size !== size) {
normalization.size = {
requested: params.size,
applied: size,
};
}
if (
!normalization.aspectRatio &&
aspectRatio &&
((!params.aspectRatio && params.size) || params.aspectRatio !== aspectRatio)
) {
normalization.aspectRatio = {
applied: aspectRatio,
...(params.aspectRatio ? { requested: params.aspectRatio } : {}),
...(!params.aspectRatio && params.size ? { derivedFrom: "size" } : {}),
};
}
if (
!normalization.resolution &&
resolution &&
params.resolution &&
params.resolution !== resolution
) {
normalization.resolution = {
requested: params.resolution,
applied: resolution,
};
}
const requestedDurationSeconds =
typeof params.durationSeconds === "number" && Number.isFinite(params.durationSeconds)
? Math.max(1, Math.round(params.durationSeconds))
: undefined;
const durationSeconds = normalizeVideoGenerationDuration({
provider: params.provider,
model: params.model,
durationSeconds: requestedDurationSeconds,
inputImageCount: params.inputImageCount ?? 0,
inputVideoCount: params.inputVideoCount ?? 0,
});
const supportedDurationSeconds = resolveVideoGenerationSupportedDurations({
provider: params.provider,
model: params.model,
inputImageCount: params.inputImageCount ?? 0,
inputVideoCount: params.inputVideoCount ?? 0,
});
if (
typeof requestedDurationSeconds === "number" &&
typeof durationSeconds === "number" &&
requestedDurationSeconds !== durationSeconds
) {
normalization.durationSeconds = {
requested: requestedDurationSeconds,
applied: durationSeconds,
...(supportedDurationSeconds?.length ? { supportedValues: supportedDurationSeconds } : {}),
};
}
return {
size,
aspectRatio,
resolution,
durationSeconds,
supportedDurationSeconds,
audio,
watermark,
ignoredOverrides,
normalization:
hasMediaNormalizationEntry(normalization.size) ||
hasMediaNormalizationEntry(normalization.aspectRatio) ||
hasMediaNormalizationEntry(normalization.resolution) ||
hasMediaNormalizationEntry(normalization.durationSeconds)
? normalization
: undefined,
};
}

View File

@@ -6,20 +6,16 @@ import { createSubsystemLogger } from "../logging/subsystem.js";
import {
buildNoCapabilityModelConfiguredMessage,
deriveAspectRatioFromSize,
resolveClosestAspectRatio,
resolveCapabilityModelCandidates,
throwCapabilityGenerationFailure,
} from "../media-generation/runtime-shared.js";
import { resolveVideoGenerationModeCapabilities } from "./capabilities.js";
import {
normalizeVideoGenerationDuration,
resolveVideoGenerationSupportedDurations,
} from "./duration-support.js";
import { parseVideoGenerationModelRef } from "./model-ref.js";
import { resolveVideoGenerationOverrides } from "./normalization.js";
import { getVideoGenerationProvider, listVideoGenerationProviders } from "./provider-registry.js";
import type {
GeneratedVideoAsset,
VideoGenerationIgnoredOverride,
VideoGenerationNormalization,
VideoGenerationResolution,
VideoGenerationResult,
VideoGenerationSourceAsset,
@@ -48,6 +44,7 @@ export type GenerateVideoRuntimeResult = {
provider: string;
model: string;
attempts: FallbackAttempt[];
normalization?: VideoGenerationNormalization;
metadata?: Record<string, unknown>;
ignoredOverrides: VideoGenerationIgnoredOverride[];
};
@@ -64,87 +61,6 @@ export function listRuntimeVideoGenerationProviders(params?: { config?: OpenClaw
return listVideoGenerationProviders(params?.config);
}
function resolveProviderVideoGenerationOverrides(params: {
provider: NonNullable<ReturnType<typeof getVideoGenerationProvider>>;
size?: string;
aspectRatio?: string;
resolution?: VideoGenerationResolution;
audio?: boolean;
watermark?: boolean;
inputImageCount?: number;
inputVideoCount?: number;
}) {
const { capabilities: caps } = resolveVideoGenerationModeCapabilities({
provider: params.provider,
inputImageCount: params.inputImageCount,
inputVideoCount: params.inputVideoCount,
});
const ignoredOverrides: VideoGenerationIgnoredOverride[] = [];
let size = params.size;
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
let audio = params.audio;
let watermark = params.watermark;
if (!caps) {
return {
size,
aspectRatio,
resolution,
audio,
watermark,
ignoredOverrides,
};
}
if (size && !caps.supportsSize) {
let translated = false;
if (caps.supportsAspectRatio) {
const normalizedAspectRatio = resolveClosestAspectRatio({
requestedAspectRatio: aspectRatio,
requestedSize: size,
});
if (normalizedAspectRatio) {
aspectRatio = normalizedAspectRatio;
translated = true;
}
}
if (!translated) {
ignoredOverrides.push({ key: "size", value: size });
}
size = undefined;
}
if (aspectRatio && !caps.supportsAspectRatio) {
ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio });
aspectRatio = undefined;
}
if (resolution && !caps.supportsResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
resolution = undefined;
}
if (typeof audio === "boolean" && !caps.supportsAudio) {
ignoredOverrides.push({ key: "audio", value: audio });
audio = undefined;
}
if (typeof watermark === "boolean" && !caps.supportsWatermark) {
ignoredOverrides.push({ key: "watermark", value: watermark });
watermark = undefined;
}
return {
size,
aspectRatio,
resolution,
audio,
watermark,
ignoredOverrides,
};
}
export async function generateVideo(
params: GenerateVideoParams,
): Promise<GenerateVideoRuntimeResult> {
@@ -177,33 +93,18 @@ export async function generateVideo(
}
try {
const sanitized = resolveProviderVideoGenerationOverrides({
const sanitized = resolveVideoGenerationOverrides({
provider,
model: candidate.model,
size: params.size,
aspectRatio: params.aspectRatio,
resolution: params.resolution,
durationSeconds: params.durationSeconds,
audio: params.audio,
watermark: params.watermark,
inputImageCount: params.inputImages?.length ?? 0,
inputVideoCount: params.inputVideos?.length ?? 0,
});
const requestedDurationSeconds =
typeof params.durationSeconds === "number" && Number.isFinite(params.durationSeconds)
? Math.max(1, Math.round(params.durationSeconds))
: undefined;
const normalizedDurationSeconds = normalizeVideoGenerationDuration({
provider,
model: candidate.model,
durationSeconds: requestedDurationSeconds,
inputImageCount: params.inputImages?.length ?? 0,
inputVideoCount: params.inputVideos?.length ?? 0,
});
const supportedDurationSeconds = resolveVideoGenerationSupportedDurations({
provider,
model: candidate.model,
inputImageCount: params.inputImages?.length ?? 0,
inputVideoCount: params.inputVideos?.length ?? 0,
});
const result: VideoGenerationResult = await provider.generateVideo({
provider: candidate.provider,
model: candidate.model,
@@ -214,7 +115,7 @@ export async function generateVideo(
size: sanitized.size,
aspectRatio: sanitized.aspectRatio,
resolution: sanitized.resolution,
durationSeconds: normalizedDurationSeconds,
durationSeconds: sanitized.durationSeconds,
audio: sanitized.audio,
watermark: sanitized.watermark,
inputImages: params.inputImages,
@@ -228,37 +129,49 @@ export async function generateVideo(
provider: candidate.provider,
model: result.model ?? candidate.model,
attempts,
normalization: sanitized.normalization,
ignoredOverrides: sanitized.ignoredOverrides,
metadata: {
...result.metadata,
...((params.size && sanitized.aspectRatio && params.size !== sanitized.size) ||
(params.aspectRatio &&
sanitized.aspectRatio &&
params.aspectRatio !== sanitized.aspectRatio)
...(sanitized.normalization?.size?.requested !== undefined &&
sanitized.normalization.size.applied !== undefined
? {
...(params.size ? { requestedSize: params.size } : {}),
...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}),
normalizedAspectRatio: sanitized.aspectRatio,
...(params.size
? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) }
requestedSize: sanitized.normalization.size.requested,
normalizedSize: sanitized.normalization.size.applied,
}
: {}),
...(sanitized.normalization?.aspectRatio?.applied !== undefined
? {
...(sanitized.normalization.aspectRatio.requested !== undefined
? { requestedAspectRatio: sanitized.normalization.aspectRatio.requested }
: {}),
normalizedAspectRatio: sanitized.normalization.aspectRatio.applied,
...(sanitized.normalization.aspectRatio.derivedFrom === "size" && params.size
? {
requestedSize: params.size,
aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size),
}
: {}),
}
: {}),
...(params.resolution &&
sanitized.resolution &&
params.resolution !== sanitized.resolution
...(sanitized.normalization?.resolution?.requested !== undefined &&
sanitized.normalization.resolution.applied !== undefined
? {
requestedResolution: params.resolution,
normalizedResolution: sanitized.resolution,
requestedResolution: sanitized.normalization.resolution.requested,
normalizedResolution: sanitized.normalization.resolution.applied,
}
: {}),
...(typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
requestedDurationSeconds !== normalizedDurationSeconds
...(sanitized.normalization?.durationSeconds?.requested !== undefined &&
sanitized.normalization.durationSeconds.applied !== undefined
? {
requestedDurationSeconds,
normalizedDurationSeconds,
...(supportedDurationSeconds ? { supportedDurationSeconds } : {}),
requestedDurationSeconds: sanitized.normalization.durationSeconds.requested,
normalizedDurationSeconds: sanitized.normalization.durationSeconds.applied,
...(sanitized.normalization.durationSeconds.supportedValues?.length
? {
supportedDurationSeconds:
sanitized.normalization.durationSeconds.supportedValues,
}
: {}),
}
: {}),
},

View File

@@ -1,5 +1,6 @@
import type { AuthProfileStore } from "../agents/auth-profiles.js";
import type { OpenClawConfig } from "../config/config.js";
import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js";
export type GeneratedVideoAsset = {
buffer: Buffer;
@@ -61,6 +62,9 @@ export type VideoGenerationModeCapabilities = {
maxDurationSeconds?: number;
supportedDurationSeconds?: readonly number[];
supportedDurationSecondsByModel?: Readonly<Record<string, readonly number[]>>;
sizes?: readonly string[];
aspectRatios?: readonly string[];
resolutions?: readonly VideoGenerationResolution[];
supportsSize?: boolean;
supportsAspectRatio?: boolean;
supportsResolution?: boolean;
@@ -78,6 +82,13 @@ export type VideoGenerationProviderCapabilities = VideoGenerationModeCapabilitie
videoToVideo?: VideoGenerationTransformCapabilities;
};
export type VideoGenerationNormalization = {
size?: MediaNormalizationEntry<string>;
aspectRatio?: MediaNormalizationEntry<string>;
resolution?: MediaNormalizationEntry<VideoGenerationResolution>;
durationSeconds?: MediaNormalizationEntry<number>;
};
export type VideoGenerationProvider = {
id: string;
aliases?: string[];