diff --git a/CHANGELOG.md b/CHANGELOG.md index 239e62dd565..4f0eeb5343e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ Docs: https://docs.openclaw.ai - Providers/Google: add Gemma 4 model support and keep Google fallback resolution on the requested provider path so native Google Gemma routes work again. (#61507) Thanks @eyjohn. - Providers/Anthropic: restore Claude CLI as the preferred local Anthropic path in onboarding, model-auth guidance, doctor flows, and Docker Claude CLI live lanes again. - ACP/ACPX plugin: bump the bundled `acpx` pin to `0.5.1` so plugin-local installs and strict version checks pick up the latest published runtime release. (#62148) Thanks @onutc. +- Tools/media generation: auto-fallback across auth-backed image, music, and video providers by default, and remap fallback size, aspect ratio, resolution, and duration hints to the closest supported option instead of dropping intent on provider switches. +- Tools/media generation: report applied fallback geometry and duration settings consistently in tool results, add a shared normalization contract for image/music/video runtimes, and simplify the bundled image-generation-core runtime test to only verify the plugin-sdk re-export seam. ### Fixes diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index 70f68eb744d..c187ea368d5 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -70,6 +70,8 @@ Use `action: "list"` to inspect available providers and models at runtime: Not all providers support all parameters. When a fallback provider supports a nearby geometry option instead of the exact requested one, OpenClaw remaps to the closest supported size, aspect ratio, or resolution before submission. Truly unsupported overrides are still reported in the tool result. +Tool results report the applied settings. When OpenClaw remaps geometry during provider fallback, the returned `size`, `aspectRatio`, and `resolution` values reflect what was actually sent, and `details.normalization` captures the requested-to-applied translation. + ## Configuration ### Model selection diff --git a/docs/tools/music-generation.md b/docs/tools/music-generation.md index ab5458e83b7..e6a8cb39592 100644 --- a/docs/tools/music-generation.md +++ b/docs/tools/music-generation.md @@ -136,6 +136,8 @@ uses a shorter maximum than the requested value, OpenClaw automatically clamps to the closest supported duration. Truly unsupported optional hints are ignored with a warning when the selected provider or model cannot honor them. +Tool results report the applied settings. When OpenClaw clamps duration during provider fallback, the returned `durationSeconds` reflects the submitted value and `details.normalization.durationSeconds` shows the requested-to-applied mapping. + ## Async behavior for the shared provider-backed path - Session-backed agent runs: `music_generate` creates a background task, returns a started/task response immediately, and posts the finished track later in a follow-up agent message. diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md index a0141b36dd7..65221b0099e 100644 --- a/docs/tools/video-generation.md +++ b/docs/tools/video-generation.md @@ -156,6 +156,8 @@ and the shared live sweep. Not all providers support all parameters. OpenClaw already normalizes duration to the closest provider-supported value, and it also remaps translated geometry hints such as size-to-aspect-ratio when a fallback provider exposes a different control surface. Truly unsupported overrides are ignored on a best-effort basis and reported as warnings in the tool result. Hard capability limits (such as too many reference inputs) fail before submission. +Tool results report the applied settings. When OpenClaw remaps duration or geometry during provider fallback, the returned `durationSeconds`, `size`, `aspectRatio`, and `resolution` values reflect what was submitted, and `details.normalization` captures the requested-to-applied translation. + Reference inputs also select the runtime mode: - No reference media: `generate` diff --git a/extensions/acpx/src/acpx-runtime.d.ts b/extensions/acpx/src/acpx-runtime.d.ts deleted file mode 100644 index 0535b8195df..00000000000 --- a/extensions/acpx/src/acpx-runtime.d.ts +++ /dev/null @@ -1,55 +0,0 @@ -declare module "acpx/runtime" { - export const ACPX_BACKEND_ID: string; - - export type AcpRuntimeDoctorReport = - import("../../../src/acp/runtime/types.js").AcpRuntimeDoctorReport; - export type AcpRuntimeEnsureInput = - import("../../../src/acp/runtime/types.js").AcpRuntimeEnsureInput; - export type AcpRuntimeEvent = import("../../../src/acp/runtime/types.js").AcpRuntimeEvent; - export type AcpRuntimeHandle = import("../../../src/acp/runtime/types.js").AcpRuntimeHandle; - export type AcpRuntimeTurnInput = import("../../../src/acp/runtime/types.js").AcpRuntimeTurnInput; - export type AcpRuntimeStatus = import("../../../src/acp/runtime/types.js").AcpRuntimeStatus; - export type AcpRuntimeCapabilities = - import("../../../src/acp/runtime/types.js").AcpRuntimeCapabilities; - - export type AcpSessionStore = { - load(sessionId: string): Promise; - save(record: unknown): Promise; - }; - - export type AcpAgentRegistry = { - resolve(agentId: string): string; - list(): string[]; - }; - - export type AcpRuntimeOptions = { - cwd: string; - sessionStore: AcpSessionStore; - agentRegistry: AcpAgentRegistry; - permissionMode: string; - mcpServers?: unknown[]; - nonInteractivePermissions?: unknown; - timeoutMs?: number; - }; - - export class AcpxRuntime { - constructor(options: AcpRuntimeOptions, testOptions?: unknown); - isHealthy(): boolean; - probeAvailability(): Promise; - doctor(): Promise; - ensureSession(input: AcpRuntimeEnsureInput): Promise; - runTurn(input: AcpRuntimeTurnInput): AsyncIterable; - getCapabilities(input?: { handle?: AcpRuntimeHandle }): AcpRuntimeCapabilities; - getStatus(input: { handle: AcpRuntimeHandle; signal?: AbortSignal }): Promise; - setMode(input: { handle: AcpRuntimeHandle; mode: string }): Promise; - setConfigOption(input: { handle: AcpRuntimeHandle; key: string; value: string }): Promise; - cancel(input: { handle: AcpRuntimeHandle; reason?: string }): Promise; - close(input: { handle: AcpRuntimeHandle; reason: string }): Promise; - } - - export function createAcpRuntime(...args: unknown[]): unknown; - export function createAgentRegistry(...args: unknown[]): AcpAgentRegistry; - export function createFileSessionStore(...args: unknown[]): AcpSessionStore; - export function decodeAcpxRuntimeHandleState(...args: unknown[]): unknown; - export function encodeAcpxRuntimeHandleState(...args: unknown[]): unknown; -} diff --git a/extensions/acpx/src/runtime.ts b/extensions/acpx/src/runtime.ts index 6ca0b466012..021642c2782 100644 --- a/extensions/acpx/src/runtime.ts +++ b/extensions/acpx/src/runtime.ts @@ -12,7 +12,7 @@ import { type AcpRuntimeHandle, type AcpRuntimeOptions, type AcpRuntimeStatus, -} from "acpx/runtime"; +} from "acpx/dist/runtime.js"; import type { AcpRuntime } from "../runtime-api.js"; type AcpSessionStore = AcpRuntimeOptions["sessionStore"]; diff --git a/extensions/google/video-generation-provider.ts b/extensions/google/video-generation-provider.ts index 6be6c437aaa..e2c5ea8bdf4 100644 --- a/extensions/google/video-generation-provider.ts +++ b/extensions/google/video-generation-provider.ts @@ -164,6 +164,8 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { maxVideos: 1, maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + aspectRatios: ["16:9", "9:16"], + resolutions: ["720P", "1080P"], supportsAspectRatio: true, supportsResolution: true, supportsSize: true, @@ -175,6 +177,8 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { maxInputImages: 1, maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + aspectRatios: ["16:9", "9:16"], + resolutions: ["720P", "1080P"], supportsAspectRatio: true, supportsResolution: true, supportsSize: true, @@ -186,6 +190,8 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider { maxInputVideos: 1, maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS, supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS, + aspectRatios: ["16:9", "9:16"], + resolutions: ["720P", "1080P"], supportsAspectRatio: true, supportsResolution: true, supportsSize: true, diff --git a/extensions/openai/video-generation-provider.ts b/extensions/openai/video-generation-provider.ts index d8745f75711..e7a88bb7ca8 100644 --- a/extensions/openai/video-generation-provider.ts +++ b/extensions/openai/video-generation-provider.ts @@ -190,6 +190,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { maxDurationSeconds: 12, supportedDurationSeconds: OPENAI_VIDEO_SECONDS, supportsSize: true, + sizes: OPENAI_VIDEO_SIZES, }, imageToVideo: { enabled: true, @@ -198,6 +199,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { maxDurationSeconds: 12, supportedDurationSeconds: OPENAI_VIDEO_SECONDS, supportsSize: true, + sizes: OPENAI_VIDEO_SIZES, }, videoToVideo: { enabled: true, @@ -206,6 +208,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider { maxDurationSeconds: 12, supportedDurationSeconds: OPENAI_VIDEO_SECONDS, supportsSize: true, + sizes: OPENAI_VIDEO_SIZES, }, }, async generateVideo(req) { diff --git a/extensions/runway/video-generation-provider.ts b/extensions/runway/video-generation-provider.ts index d1062ef80d1..1b409f47c9d 100644 --- a/extensions/runway/video-generation-provider.ts +++ b/extensions/runway/video-generation-provider.ts @@ -45,6 +45,8 @@ const IMAGE_MODELS = new Set([ "veo3", ]); const VIDEO_MODELS = new Set(["gen4_aleph"]); +const RUNWAY_TEXT_ASPECT_RATIOS = ["16:9", "9:16"] as const; +const RUNWAY_EDIT_ASPECT_RATIOS = ["1:1", "16:9", "9:16", "3:4", "4:3", "21:9"] as const; function resolveRunwayBaseUrl(req: VideoGenerationRequest): string { return req.cfg?.models?.providers?.runway?.baseUrl?.trim() || DEFAULT_RUNWAY_BASE_URL; @@ -264,6 +266,7 @@ export function buildRunwayVideoGenerationProvider(): VideoGenerationProvider { generate: { maxVideos: 1, maxDurationSeconds: MAX_DURATION_SECONDS, + aspectRatios: RUNWAY_TEXT_ASPECT_RATIOS, supportsAspectRatio: true, }, imageToVideo: { @@ -271,12 +274,14 @@ export function buildRunwayVideoGenerationProvider(): VideoGenerationProvider { maxVideos: 1, maxInputImages: 1, maxDurationSeconds: MAX_DURATION_SECONDS, + aspectRatios: RUNWAY_EDIT_ASPECT_RATIOS, supportsAspectRatio: true, }, videoToVideo: { enabled: true, maxVideos: 1, maxInputVideos: 1, + aspectRatios: RUNWAY_EDIT_ASPECT_RATIOS, supportsAspectRatio: true, }, }, diff --git a/extensions/xai/video-generation-provider.ts b/extensions/xai/video-generation-provider.ts index 62276dd5183..42937edb9cb 100644 --- a/extensions/xai/video-generation-provider.ts +++ b/extensions/xai/video-generation-provider.ts @@ -257,6 +257,8 @@ export function buildXaiVideoGenerationProvider(): VideoGenerationProvider { generate: { maxVideos: 1, maxDurationSeconds: 15, + aspectRatios: [...XAI_VIDEO_ASPECT_RATIOS], + resolutions: ["480P", "720P"], supportsAspectRatio: true, supportsResolution: true, }, @@ -265,6 +267,8 @@ export function buildXaiVideoGenerationProvider(): VideoGenerationProvider { maxVideos: 1, maxInputImages: 1, maxDurationSeconds: 15, + aspectRatios: [...XAI_VIDEO_ASPECT_RATIOS], + resolutions: ["480P", "720P"], supportsAspectRatio: true, supportsResolution: true, }, diff --git a/src/agents/tools/image-generate-tool.ts b/src/agents/tools/image-generate-tool.ts index acbe7b8980b..ceae69bfc15 100644 --- a/src/agents/tools/image-generate-tool.ts +++ b/src/agents/tools/image-generate-tool.ts @@ -616,24 +616,29 @@ export function createImageGenerateTool(options?: { ? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredImageGenerationOverride).join(", ")}.` : undefined; const normalizedSize = - typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim() + result.normalization?.size?.applied ?? + (typeof result.metadata?.normalizedSize === "string" && + result.metadata.normalizedSize.trim() ? result.metadata.normalizedSize - : undefined; + : undefined); const normalizedAspectRatio = - typeof result.metadata?.normalizedAspectRatio === "string" && + result.normalization?.aspectRatio?.applied ?? + (typeof result.metadata?.normalizedAspectRatio === "string" && result.metadata.normalizedAspectRatio.trim() ? result.metadata.normalizedAspectRatio - : undefined; + : undefined); const normalizedResolution = - typeof result.metadata?.normalizedResolution === "string" && + result.normalization?.resolution?.applied ?? + (typeof result.metadata?.normalizedResolution === "string" && result.metadata.normalizedResolution.trim() ? result.metadata.normalizedResolution - : undefined; + : undefined); const sizeTranslatedToAspectRatio = - !normalizedSize && - typeof result.metadata?.requestedSize === "string" && - result.metadata.requestedSize === size && - Boolean(normalizedAspectRatio); + result.normalization?.aspectRatio?.derivedFrom === "size" || + (!normalizedSize && + typeof result.metadata?.requestedSize === "string" && + result.metadata.requestedSize === size && + Boolean(normalizedAspectRatio)); const savedImages = await Promise.all( result.images.map((image) => @@ -694,6 +699,7 @@ export function createImageGenerateTool(options?: { : {}), ...(filename ? { filename } : {}), attempts: result.attempts, + ...(result.normalization ? { normalization: result.normalization } : {}), metadata: result.metadata, ...(warning ? { warning } : {}), ...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}), diff --git a/src/agents/tools/music-generate-tool.ts b/src/agents/tools/music-generate-tool.ts index ed8584656f2..1b90445f0cf 100644 --- a/src/agents/tools/music-generate-tool.ts +++ b/src/agents/tools/music-generate-tool.ts @@ -408,15 +408,17 @@ async function executeMusicGenerationJob(params: { const ignoredOverrides = result.ignoredOverrides ?? []; const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key)); const requestedDurationSeconds = - typeof result.metadata?.requestedDurationSeconds === "number" && + result.normalization?.durationSeconds?.requested ?? + (typeof result.metadata?.requestedDurationSeconds === "number" && Number.isFinite(result.metadata.requestedDurationSeconds) ? result.metadata.requestedDurationSeconds - : params.durationSeconds; + : params.durationSeconds); const runtimeNormalizedDurationSeconds = - typeof result.metadata?.normalizedDurationSeconds === "number" && + result.normalization?.durationSeconds?.applied ?? + (typeof result.metadata?.normalizedDurationSeconds === "number" && Number.isFinite(result.metadata.normalizedDurationSeconds) ? result.metadata.normalizedDurationSeconds - : undefined; + : undefined); const appliedDurationSeconds = runtimeNormalizedDurationSeconds ?? (!ignoredOverrideKeys.has("durationSeconds") && typeof params.durationSeconds === "number" @@ -492,6 +494,7 @@ async function executeMusicGenerationJob(params: { : {}), ...(result.lyrics?.length ? { lyrics: result.lyrics } : {}), attempts: result.attempts, + ...(result.normalization ? { normalization: result.normalization } : {}), metadata: result.metadata, ...(warning ? { warning } : {}), ...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}), diff --git a/src/agents/tools/video-generate-tool.ts b/src/agents/tools/video-generate-tool.ts index a378ae82f1d..52abb4261de 100644 --- a/src/agents/tools/video-generate-tool.ts +++ b/src/agents/tools/video-generate-tool.ts @@ -529,10 +529,11 @@ async function executeVideoGenerationJob(params: { ), ); const requestedDurationSeconds = - typeof result.metadata?.requestedDurationSeconds === "number" && + result.normalization?.durationSeconds?.requested ?? + (typeof result.metadata?.requestedDurationSeconds === "number" && Number.isFinite(result.metadata.requestedDurationSeconds) ? result.metadata.requestedDurationSeconds - : params.durationSeconds; + : params.durationSeconds); const ignoredOverrides = result.ignoredOverrides ?? []; const ignoredOverrideKeys = new Set(ignoredOverrides.map((entry) => entry.key)); const warning = @@ -540,34 +541,41 @@ async function executeVideoGenerationJob(params: { ? `Ignored unsupported overrides for ${result.provider}/${result.model}: ${ignoredOverrides.map(formatIgnoredVideoGenerationOverride).join(", ")}.` : undefined; const normalizedDurationSeconds = - typeof result.metadata?.normalizedDurationSeconds === "number" && + result.normalization?.durationSeconds?.applied ?? + (typeof result.metadata?.normalizedDurationSeconds === "number" && Number.isFinite(result.metadata.normalizedDurationSeconds) ? result.metadata.normalizedDurationSeconds - : requestedDurationSeconds; - const supportedDurationSeconds = Array.isArray(result.metadata?.supportedDurationSeconds) - ? result.metadata.supportedDurationSeconds.filter( - (entry): entry is number => typeof entry === "number" && Number.isFinite(entry), - ) - : undefined; + : requestedDurationSeconds); + const supportedDurationSeconds = + result.normalization?.durationSeconds?.supportedValues ?? + (Array.isArray(result.metadata?.supportedDurationSeconds) + ? result.metadata.supportedDurationSeconds.filter( + (entry): entry is number => typeof entry === "number" && Number.isFinite(entry), + ) + : undefined); const normalizedSize = - typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim() + result.normalization?.size?.applied ?? + (typeof result.metadata?.normalizedSize === "string" && result.metadata.normalizedSize.trim() ? result.metadata.normalizedSize - : undefined; + : undefined); const normalizedAspectRatio = - typeof result.metadata?.normalizedAspectRatio === "string" && + result.normalization?.aspectRatio?.applied ?? + (typeof result.metadata?.normalizedAspectRatio === "string" && result.metadata.normalizedAspectRatio.trim() ? result.metadata.normalizedAspectRatio - : undefined; + : undefined); const normalizedResolution = - typeof result.metadata?.normalizedResolution === "string" && + result.normalization?.resolution?.applied ?? + (typeof result.metadata?.normalizedResolution === "string" && result.metadata.normalizedResolution.trim() ? result.metadata.normalizedResolution - : undefined; + : undefined); const sizeTranslatedToAspectRatio = - !normalizedSize && - typeof result.metadata?.requestedSize === "string" && - result.metadata.requestedSize === params.size && - Boolean(normalizedAspectRatio); + result.normalization?.aspectRatio?.derivedFrom === "size" || + (!normalizedSize && + typeof result.metadata?.requestedSize === "string" && + result.metadata.requestedSize === params.size && + Boolean(normalizedAspectRatio)); const lines = [ `Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`, ...(warning ? [`Warning: ${warning}`] : []), @@ -660,6 +668,7 @@ async function executeVideoGenerationJob(params: { : {}), ...(params.filename ? { filename: params.filename } : {}), attempts: result.attempts, + ...(result.normalization ? { normalization: result.normalization } : {}), metadata: result.metadata, ...(warning ? { warning } : {}), ...(ignoredOverrides.length > 0 ? { ignoredOverrides } : {}), diff --git a/src/image-generation/normalization.ts b/src/image-generation/normalization.ts new file mode 100644 index 00000000000..f20fa883c69 --- /dev/null +++ b/src/image-generation/normalization.ts @@ -0,0 +1,204 @@ +import { + hasMediaNormalizationEntry, + resolveClosestAspectRatio, + resolveClosestResolution, + resolveClosestSize, + type MediaNormalizationEntry, +} from "../media-generation/runtime-shared.js"; +import type { + ImageGenerationIgnoredOverride, + ImageGenerationNormalization, + ImageGenerationProvider, + ImageGenerationResolution, + ImageGenerationSourceImage, +} from "./types.js"; + +export type ResolvedImageGenerationOverrides = { + size?: string; + aspectRatio?: string; + resolution?: ImageGenerationResolution; + ignoredOverrides: ImageGenerationIgnoredOverride[]; + normalization?: ImageGenerationNormalization; +}; + +function finalizeImageNormalization( + normalization: ImageGenerationNormalization, +): ImageGenerationNormalization | undefined { + return hasMediaNormalizationEntry(normalization.size) || + hasMediaNormalizationEntry(normalization.aspectRatio) || + hasMediaNormalizationEntry(normalization.resolution) + ? normalization + : undefined; +} + +export function resolveImageGenerationOverrides(params: { + provider: ImageGenerationProvider; + size?: string; + aspectRatio?: string; + resolution?: ImageGenerationResolution; + inputImages?: ImageGenerationSourceImage[]; +}): ResolvedImageGenerationOverrides { + const hasInputImages = (params.inputImages?.length ?? 0) > 0; + const modeCaps = hasInputImages + ? params.provider.capabilities.edit + : params.provider.capabilities.generate; + const geometry = params.provider.capabilities.geometry; + const ignoredOverrides: ImageGenerationIgnoredOverride[] = []; + const normalization: ImageGenerationNormalization = {}; + let size = params.size; + let aspectRatio = params.aspectRatio; + let resolution = params.resolution; + + if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) { + const normalizedSize = resolveClosestSize({ + requestedSize: size, + supportedSizes: geometry?.sizes, + }); + if (normalizedSize && normalizedSize !== size) { + normalization.size = { + requested: size, + applied: normalizedSize, + }; + } + size = normalizedSize; + } + + if (!modeCaps.supportsSize && size) { + let translated = false; + if (modeCaps.supportsAspectRatio) { + const normalizedAspectRatio = resolveClosestAspectRatio({ + requestedAspectRatio: aspectRatio, + requestedSize: size, + supportedAspectRatios: geometry?.aspectRatios, + }); + if (normalizedAspectRatio) { + aspectRatio = normalizedAspectRatio; + normalization.aspectRatio = { + applied: normalizedAspectRatio, + derivedFrom: "size", + }; + translated = true; + } + } + if (!translated) { + ignoredOverrides.push({ key: "size", value: size }); + } + size = undefined; + } + + if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) { + const normalizedAspectRatio = resolveClosestAspectRatio({ + requestedAspectRatio: aspectRatio, + requestedSize: size, + supportedAspectRatios: geometry?.aspectRatios, + }); + if (normalizedAspectRatio && normalizedAspectRatio !== aspectRatio) { + normalization.aspectRatio = { + requested: aspectRatio, + applied: normalizedAspectRatio, + }; + } + aspectRatio = normalizedAspectRatio; + } else if (!modeCaps.supportsAspectRatio && aspectRatio) { + const derivedSize = + modeCaps.supportsSize && !size + ? resolveClosestSize({ + requestedSize: params.size, + requestedAspectRatio: aspectRatio, + supportedSizes: geometry?.sizes, + }) + : undefined; + let translated = false; + if (derivedSize) { + size = derivedSize; + normalization.size = { + applied: derivedSize, + derivedFrom: "aspectRatio", + }; + translated = true; + } + if (!translated) { + ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); + } + aspectRatio = undefined; + } + + if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) { + const normalizedResolution = resolveClosestResolution({ + requestedResolution: resolution, + supportedResolutions: geometry?.resolutions, + }); + if (normalizedResolution && normalizedResolution !== resolution) { + normalization.resolution = { + requested: resolution, + applied: normalizedResolution, + }; + } + resolution = normalizedResolution; + } else if (!modeCaps.supportsResolution && resolution) { + ignoredOverrides.push({ key: "resolution", value: resolution }); + resolution = undefined; + } + + if (size && !modeCaps.supportsSize) { + ignoredOverrides.push({ key: "size", value: size }); + size = undefined; + } + + if (aspectRatio && !modeCaps.supportsAspectRatio) { + ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); + aspectRatio = undefined; + } + + if (resolution && !modeCaps.supportsResolution) { + ignoredOverrides.push({ key: "resolution", value: resolution }); + resolution = undefined; + } + + if ( + !normalization.aspectRatio && + aspectRatio && + ((!params.aspectRatio && params.size) || params.aspectRatio !== aspectRatio) + ) { + const entry: MediaNormalizationEntry = { + applied: aspectRatio, + ...(params.aspectRatio ? { requested: params.aspectRatio } : {}), + ...(!params.aspectRatio && params.size ? { derivedFrom: "size" } : {}), + }; + normalization.aspectRatio = entry; + } + + if (!normalization.size && size && params.size && params.size !== size) { + normalization.size = { + requested: params.size, + applied: size, + }; + } + + if (!normalization.aspectRatio && !params.aspectRatio && params.size && aspectRatio) { + normalization.aspectRatio = { + applied: aspectRatio, + derivedFrom: "size", + }; + } + + if ( + !normalization.resolution && + resolution && + params.resolution && + params.resolution !== resolution + ) { + normalization.resolution = { + requested: params.resolution, + applied: resolution, + }; + } + + return { + size, + aspectRatio, + resolution, + ignoredOverrides, + normalization: finalizeImageNormalization(normalization), + }; +} diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts index 825be486165..985bad44bf8 100644 --- a/src/image-generation/runtime.ts +++ b/src/image-generation/runtime.ts @@ -6,17 +6,16 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; import { buildNoCapabilityModelConfiguredMessage, deriveAspectRatioFromSize, - resolveClosestAspectRatio, - resolveClosestResolution, - resolveClosestSize, resolveCapabilityModelCandidates, throwCapabilityGenerationFailure, } from "../media-generation/runtime-shared.js"; import { parseImageGenerationModelRef } from "./model-ref.js"; +import { resolveImageGenerationOverrides } from "./normalization.js"; import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js"; import type { GeneratedImageAsset, ImageGenerationIgnoredOverride, + ImageGenerationNormalization, ImageGenerationResolution, ImageGenerationResult, ImageGenerationSourceImage, @@ -42,6 +41,7 @@ export type GenerateImageRuntimeResult = { provider: string; model: string; attempts: FallbackAttempt[]; + normalization?: ImageGenerationNormalization; metadata?: Record; ignoredOverrides: ImageGenerationIgnoredOverride[]; }; @@ -58,108 +58,6 @@ export function listRuntimeImageGenerationProviders(params?: { config?: OpenClaw return listImageGenerationProviders(params?.config); } -function resolveProviderImageGenerationOverrides(params: { - provider: NonNullable>; - size?: string; - aspectRatio?: string; - resolution?: ImageGenerationResolution; - inputImages?: ImageGenerationSourceImage[]; -}) { - const hasInputImages = (params.inputImages?.length ?? 0) > 0; - const modeCaps = hasInputImages - ? params.provider.capabilities.edit - : params.provider.capabilities.generate; - const geometry = params.provider.capabilities.geometry; - const ignoredOverrides: ImageGenerationIgnoredOverride[] = []; - let size = params.size; - let aspectRatio = params.aspectRatio; - let resolution = params.resolution; - - if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) { - size = resolveClosestSize({ - requestedSize: size, - supportedSizes: geometry?.sizes, - }); - } - - if (!modeCaps.supportsSize && size) { - let translated = false; - if (modeCaps.supportsAspectRatio) { - const normalizedAspectRatio = resolveClosestAspectRatio({ - requestedAspectRatio: aspectRatio, - requestedSize: size, - supportedAspectRatios: geometry?.aspectRatios, - }); - if (normalizedAspectRatio) { - aspectRatio = normalizedAspectRatio; - translated = true; - } - } - if (!translated) { - ignoredOverrides.push({ key: "size", value: size }); - } - size = undefined; - } - - if (aspectRatio && (geometry?.aspectRatios?.length ?? 0) > 0 && modeCaps.supportsAspectRatio) { - aspectRatio = resolveClosestAspectRatio({ - requestedAspectRatio: aspectRatio, - requestedSize: size, - supportedAspectRatios: geometry?.aspectRatios, - }); - } else if (!modeCaps.supportsAspectRatio && aspectRatio) { - const derivedSize = - modeCaps.supportsSize && !size - ? resolveClosestSize({ - requestedSize: params.size, - requestedAspectRatio: aspectRatio, - supportedSizes: geometry?.sizes, - }) - : undefined; - let translated = false; - if (derivedSize) { - size = derivedSize; - translated = true; - } - if (!translated) { - ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); - } - aspectRatio = undefined; - } - - if (resolution && (geometry?.resolutions?.length ?? 0) > 0 && modeCaps.supportsResolution) { - resolution = resolveClosestResolution({ - requestedResolution: resolution, - supportedResolutions: geometry?.resolutions, - }); - } else if (!modeCaps.supportsResolution && resolution) { - ignoredOverrides.push({ key: "resolution", value: resolution }); - resolution = undefined; - } - - if (size && !modeCaps.supportsSize) { - ignoredOverrides.push({ key: "size", value: size }); - size = undefined; - } - - if (aspectRatio && !modeCaps.supportsAspectRatio) { - ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); - aspectRatio = undefined; - } - - if (resolution && !modeCaps.supportsResolution) { - ignoredOverrides.push({ key: "resolution", value: resolution }); - resolution = undefined; - } - - return { - size, - aspectRatio, - resolution, - ignoredOverrides, - }; -} - export async function generateImage( params: GenerateImageParams, ): Promise { @@ -192,7 +90,7 @@ export async function generateImage( } try { - const sanitized = resolveProviderImageGenerationOverrides({ + const sanitized = resolveImageGenerationOverrides({ provider, size: params.size, aspectRatio: params.aspectRatio, @@ -220,30 +118,35 @@ export async function generateImage( provider: candidate.provider, model: result.model ?? candidate.model, attempts, + normalization: sanitized.normalization, metadata: { ...result.metadata, - ...(params.size && sanitized.size && params.size !== sanitized.size - ? { requestedSize: params.size, normalizedSize: sanitized.size } - : {}), - ...((params.aspectRatio && - sanitized.aspectRatio && - params.aspectRatio !== sanitized.aspectRatio) || - (!params.aspectRatio && params.size && sanitized.aspectRatio) + ...(sanitized.normalization?.size?.requested !== undefined && + sanitized.normalization.size.applied !== undefined ? { - ...(params.size ? { requestedSize: params.size } : {}), - ...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}), - normalizedAspectRatio: sanitized.aspectRatio, - ...(params.size - ? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) } + requestedSize: sanitized.normalization.size.requested, + normalizedSize: sanitized.normalization.size.applied, + } + : {}), + ...(sanitized.normalization?.aspectRatio?.applied !== undefined + ? { + ...(sanitized.normalization.aspectRatio.requested !== undefined + ? { requestedAspectRatio: sanitized.normalization.aspectRatio.requested } + : {}), + normalizedAspectRatio: sanitized.normalization.aspectRatio.applied, + ...(sanitized.normalization.aspectRatio.derivedFrom === "size" && params.size + ? { + requestedSize: params.size, + aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size), + } : {}), } : {}), - ...(params.resolution && - sanitized.resolution && - params.resolution !== sanitized.resolution + ...(sanitized.normalization?.resolution?.requested !== undefined && + sanitized.normalization.resolution.applied !== undefined ? { - requestedResolution: params.resolution, - normalizedResolution: sanitized.resolution, + requestedResolution: sanitized.normalization.resolution.requested, + normalizedResolution: sanitized.normalization.resolution.applied, } : {}), }, diff --git a/src/image-generation/types.ts b/src/image-generation/types.ts index 40664d9dfa6..9d3682e64c7 100644 --- a/src/image-generation/types.ts +++ b/src/image-generation/types.ts @@ -1,5 +1,6 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js"; import type { OpenClawConfig } from "../config/config.js"; +import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js"; export type GeneratedImageAsset = { buffer: Buffer; @@ -69,6 +70,12 @@ export type ImageGenerationGeometryCapabilities = { resolutions?: ImageGenerationResolution[]; }; +export type ImageGenerationNormalization = { + size?: MediaNormalizationEntry; + aspectRatio?: MediaNormalizationEntry; + resolution?: MediaNormalizationEntry; +}; + export type ImageGenerationProviderCapabilities = { generate: ImageGenerationModeCapabilities; edit: ImageGenerationEditCapabilities; diff --git a/src/media-generation/runtime-shared.ts b/src/media-generation/runtime-shared.ts index de44159c643..90b14fb1961 100644 --- a/src/media-generation/runtime-shared.ts +++ b/src/media-generation/runtime-shared.ts @@ -16,6 +16,27 @@ export type ParsedProviderModelRef = { model: string; }; +export type MediaNormalizationValue = string | number | boolean; + +export type MediaNormalizationEntry = { + requested?: TValue; + applied?: TValue; + derivedFrom?: string; + supportedValues?: readonly TValue[]; +}; + +export function hasMediaNormalizationEntry( + entry: MediaNormalizationEntry | undefined, +): entry is MediaNormalizationEntry { + return Boolean( + entry && + (entry.requested !== undefined || + entry.applied !== undefined || + entry.derivedFrom !== undefined || + (entry.supportedValues?.length ?? 0) > 0), + ); +} + const IMAGE_RESOLUTION_ORDER = ["1K", "2K", "4K"] as const; type CapabilityProviderCandidate = { diff --git a/src/music-generation/normalization.ts b/src/music-generation/normalization.ts new file mode 100644 index 00000000000..b30983b605b --- /dev/null +++ b/src/music-generation/normalization.ts @@ -0,0 +1,105 @@ +import { + hasMediaNormalizationEntry, + normalizeDurationToClosestMax, +} from "../media-generation/runtime-shared.js"; +import { resolveMusicGenerationModeCapabilities } from "./capabilities.js"; +import type { + MusicGenerationIgnoredOverride, + MusicGenerationNormalization, + MusicGenerationOutputFormat, + MusicGenerationProvider, + MusicGenerationSourceImage, +} from "./types.js"; + +export type ResolvedMusicGenerationOverrides = { + lyrics?: string; + instrumental?: boolean; + durationSeconds?: number; + format?: MusicGenerationOutputFormat; + ignoredOverrides: MusicGenerationIgnoredOverride[]; + normalization?: MusicGenerationNormalization; +}; + +export function resolveMusicGenerationOverrides(params: { + provider: MusicGenerationProvider; + model: string; + lyrics?: string; + instrumental?: boolean; + durationSeconds?: number; + format?: MusicGenerationOutputFormat; + inputImages?: MusicGenerationSourceImage[]; +}): ResolvedMusicGenerationOverrides { + const { capabilities: caps } = resolveMusicGenerationModeCapabilities({ + provider: params.provider, + inputImageCount: params.inputImages?.length ?? 0, + }); + const ignoredOverrides: MusicGenerationIgnoredOverride[] = []; + const normalization: MusicGenerationNormalization = {}; + let lyrics = params.lyrics; + let instrumental = params.instrumental; + let durationSeconds = params.durationSeconds; + let format = params.format; + + if (!caps) { + return { + lyrics, + instrumental, + durationSeconds, + format, + ignoredOverrides, + }; + } + + if (lyrics?.trim() && !caps.supportsLyrics) { + ignoredOverrides.push({ key: "lyrics", value: lyrics }); + lyrics = undefined; + } + + if (typeof instrumental === "boolean" && !caps.supportsInstrumental) { + ignoredOverrides.push({ key: "instrumental", value: instrumental }); + instrumental = undefined; + } + + if (typeof durationSeconds === "number" && !caps.supportsDuration) { + ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds }); + durationSeconds = undefined; + } else if (typeof durationSeconds === "number") { + const normalizedDurationSeconds = normalizeDurationToClosestMax( + durationSeconds, + caps.maxDurationSeconds, + ); + if ( + typeof normalizedDurationSeconds === "number" && + normalizedDurationSeconds !== durationSeconds + ) { + normalization.durationSeconds = { + requested: durationSeconds, + applied: normalizedDurationSeconds, + }; + } + durationSeconds = normalizedDurationSeconds; + } + + if (format) { + const supportedFormats = + caps.supportedFormatsByModel?.[params.model] ?? caps.supportedFormats ?? []; + if ( + !caps.supportsFormat || + (supportedFormats.length > 0 && !supportedFormats.includes(format)) + ) { + ignoredOverrides.push({ key: "format", value: format }); + format = undefined; + } + } + + return { + lyrics, + instrumental, + durationSeconds, + format, + ignoredOverrides, + normalization: hasMediaNormalizationEntry(normalization.durationSeconds) + ? normalization + : undefined, + }; +} diff --git a/src/music-generation/runtime.ts b/src/music-generation/runtime.ts index 042838c058e..df519722730 100644 --- a/src/music-generation/runtime.ts +++ b/src/music-generation/runtime.ts @@ -5,16 +5,16 @@ import type { OpenClawConfig } from "../config/config.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { buildNoCapabilityModelConfiguredMessage, - normalizeDurationToClosestMax, resolveCapabilityModelCandidates, throwCapabilityGenerationFailure, } from "../media-generation/runtime-shared.js"; -import { resolveMusicGenerationModeCapabilities } from "./capabilities.js"; import { parseMusicGenerationModelRef } from "./model-ref.js"; +import { resolveMusicGenerationOverrides } from "./normalization.js"; import { getMusicGenerationProvider, listMusicGenerationProviders } from "./provider-registry.js"; import type { GeneratedMusicAsset, MusicGenerationIgnoredOverride, + MusicGenerationNormalization, MusicGenerationOutputFormat, MusicGenerationResult, MusicGenerationSourceImage, @@ -41,6 +41,7 @@ export type GenerateMusicRuntimeResult = { model: string; attempts: FallbackAttempt[]; lyrics?: string[]; + normalization?: MusicGenerationNormalization; metadata?: Record; ignoredOverrides: MusicGenerationIgnoredOverride[]; }; @@ -49,73 +50,6 @@ export function listRuntimeMusicGenerationProviders(params?: { config?: OpenClaw return listMusicGenerationProviders(params?.config); } -function resolveProviderMusicGenerationOverrides(params: { - provider: NonNullable>; - model: string; - lyrics?: string; - instrumental?: boolean; - durationSeconds?: number; - format?: MusicGenerationOutputFormat; - inputImages?: MusicGenerationSourceImage[]; -}) { - const { capabilities: caps } = resolveMusicGenerationModeCapabilities({ - provider: params.provider, - inputImageCount: params.inputImages?.length ?? 0, - }); - const ignoredOverrides: MusicGenerationIgnoredOverride[] = []; - let lyrics = params.lyrics; - let instrumental = params.instrumental; - let durationSeconds = params.durationSeconds; - let format = params.format; - - if (!caps) { - return { - lyrics, - instrumental, - durationSeconds, - format, - ignoredOverrides, - }; - } - - if (lyrics?.trim() && !caps.supportsLyrics) { - ignoredOverrides.push({ key: "lyrics", value: lyrics }); - lyrics = undefined; - } - - if (typeof instrumental === "boolean" && !caps.supportsInstrumental) { - ignoredOverrides.push({ key: "instrumental", value: instrumental }); - instrumental = undefined; - } - - if (typeof durationSeconds === "number" && !caps.supportsDuration) { - ignoredOverrides.push({ key: "durationSeconds", value: durationSeconds }); - durationSeconds = undefined; - } else if (typeof durationSeconds === "number") { - durationSeconds = normalizeDurationToClosestMax(durationSeconds, caps.maxDurationSeconds); - } - - if (format) { - const supportedFormats = - caps.supportedFormatsByModel?.[params.model] ?? caps.supportedFormats ?? []; - if ( - !caps.supportsFormat || - (supportedFormats.length > 0 && !supportedFormats.includes(format)) - ) { - ignoredOverrides.push({ key: "format", value: format }); - format = undefined; - } - } - - return { - lyrics, - instrumental, - durationSeconds, - format, - ignoredOverrides, - }; -} - export async function generateMusic( params: GenerateMusicParams, ): Promise { @@ -155,7 +89,7 @@ export async function generateMusic( } try { - const sanitized = resolveProviderMusicGenerationOverrides({ + const sanitized = resolveMusicGenerationOverrides({ provider, model: candidate.model, lyrics: params.lyrics, @@ -186,14 +120,14 @@ export async function generateMusic( model: result.model ?? candidate.model, attempts, lyrics: result.lyrics, + normalization: sanitized.normalization, metadata: { ...result.metadata, - ...(typeof params.durationSeconds === "number" && - typeof sanitized.durationSeconds === "number" && - params.durationSeconds !== sanitized.durationSeconds + ...(sanitized.normalization?.durationSeconds?.requested !== undefined && + sanitized.normalization.durationSeconds.applied !== undefined ? { - requestedDurationSeconds: params.durationSeconds, - normalizedDurationSeconds: sanitized.durationSeconds, + requestedDurationSeconds: sanitized.normalization.durationSeconds.requested, + normalizedDurationSeconds: sanitized.normalization.durationSeconds.applied, } : {}), }, diff --git a/src/music-generation/types.ts b/src/music-generation/types.ts index cec2ea74046..e5e9338b859 100644 --- a/src/music-generation/types.ts +++ b/src/music-generation/types.ts @@ -1,5 +1,6 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js"; import type { OpenClawConfig } from "../config/config.js"; +import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js"; export type MusicGenerationOutputFormat = "mp3" | "wav"; @@ -74,6 +75,10 @@ export type MusicGenerationProviderCapabilities = MusicGenerationModeCapabilitie edit?: MusicGenerationEditCapabilities; }; +export type MusicGenerationNormalization = { + durationSeconds?: MediaNormalizationEntry; +}; + export type MusicGenerationProvider = { id: string; aliases?: string[]; diff --git a/src/video-generation/normalization.ts b/src/video-generation/normalization.ts new file mode 100644 index 00000000000..030cc8aeea3 --- /dev/null +++ b/src/video-generation/normalization.ts @@ -0,0 +1,246 @@ +import { + hasMediaNormalizationEntry, + resolveClosestAspectRatio, + resolveClosestResolution, + resolveClosestSize, +} from "../media-generation/runtime-shared.js"; +import { resolveVideoGenerationModeCapabilities } from "./capabilities.js"; +import { + normalizeVideoGenerationDuration, + resolveVideoGenerationSupportedDurations, +} from "./duration-support.js"; +import type { + VideoGenerationIgnoredOverride, + VideoGenerationNormalization, + VideoGenerationProvider, + VideoGenerationResolution, +} from "./types.js"; + +export type ResolvedVideoGenerationOverrides = { + size?: string; + aspectRatio?: string; + resolution?: VideoGenerationResolution; + durationSeconds?: number; + supportedDurationSeconds?: readonly number[]; + audio?: boolean; + watermark?: boolean; + ignoredOverrides: VideoGenerationIgnoredOverride[]; + normalization?: VideoGenerationNormalization; +}; + +export function resolveVideoGenerationOverrides(params: { + provider: VideoGenerationProvider; + model: string; + size?: string; + aspectRatio?: string; + resolution?: VideoGenerationResolution; + durationSeconds?: number; + audio?: boolean; + watermark?: boolean; + inputImageCount?: number; + inputVideoCount?: number; +}): ResolvedVideoGenerationOverrides { + const { capabilities: caps } = resolveVideoGenerationModeCapabilities({ + provider: params.provider, + inputImageCount: params.inputImageCount, + inputVideoCount: params.inputVideoCount, + }); + const ignoredOverrides: VideoGenerationIgnoredOverride[] = []; + const normalization: VideoGenerationNormalization = {}; + let size = params.size; + let aspectRatio = params.aspectRatio; + let resolution = params.resolution; + let audio = params.audio; + let watermark = params.watermark; + + if (caps) { + if (size && (caps.sizes?.length ?? 0) > 0 && caps.supportsSize) { + const normalizedSize = resolveClosestSize({ + requestedSize: size, + requestedAspectRatio: aspectRatio, + supportedSizes: caps.sizes, + }); + if (normalizedSize && normalizedSize !== size) { + normalization.size = { + requested: size, + applied: normalizedSize, + }; + } + size = normalizedSize; + } + + if (!caps.supportsSize && size) { + let translated = false; + if (caps.supportsAspectRatio) { + const normalizedAspectRatio = resolveClosestAspectRatio({ + requestedAspectRatio: aspectRatio, + requestedSize: size, + supportedAspectRatios: caps.aspectRatios, + }); + if (normalizedAspectRatio) { + aspectRatio = normalizedAspectRatio; + normalization.aspectRatio = { + applied: normalizedAspectRatio, + derivedFrom: "size", + }; + translated = true; + } + } + if (!translated) { + ignoredOverrides.push({ key: "size", value: size }); + } + size = undefined; + } + + if (aspectRatio && (caps.aspectRatios?.length ?? 0) > 0 && caps.supportsAspectRatio) { + const normalizedAspectRatio = resolveClosestAspectRatio({ + requestedAspectRatio: aspectRatio, + requestedSize: size, + supportedAspectRatios: caps.aspectRatios, + }); + if (normalizedAspectRatio && normalizedAspectRatio !== aspectRatio) { + normalization.aspectRatio = { + requested: aspectRatio, + applied: normalizedAspectRatio, + }; + } + aspectRatio = normalizedAspectRatio; + } else if (!caps.supportsAspectRatio && aspectRatio) { + const derivedSize = + caps.supportsSize && !size + ? resolveClosestSize({ + requestedSize: params.size, + requestedAspectRatio: aspectRatio, + supportedSizes: caps.sizes, + }) + : undefined; + if (derivedSize) { + size = derivedSize; + normalization.size = { + applied: derivedSize, + derivedFrom: "aspectRatio", + }; + } else { + ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); + } + aspectRatio = undefined; + } + + if (resolution && (caps.resolutions?.length ?? 0) > 0 && caps.supportsResolution) { + const normalizedResolution = resolveClosestResolution({ + requestedResolution: resolution, + supportedResolutions: caps.resolutions, + }); + if (normalizedResolution && normalizedResolution !== resolution) { + normalization.resolution = { + requested: resolution, + applied: normalizedResolution, + }; + } + resolution = normalizedResolution; + } else if (resolution && !caps.supportsResolution) { + ignoredOverrides.push({ key: "resolution", value: resolution }); + resolution = undefined; + } + + if (typeof audio === "boolean" && !caps.supportsAudio) { + ignoredOverrides.push({ key: "audio", value: audio }); + audio = undefined; + } + + if (typeof watermark === "boolean" && !caps.supportsWatermark) { + ignoredOverrides.push({ key: "watermark", value: watermark }); + watermark = undefined; + } + } + + if (caps && size && !caps.supportsSize) { + ignoredOverrides.push({ key: "size", value: size }); + size = undefined; + } + if (caps && aspectRatio && !caps.supportsAspectRatio) { + ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); + aspectRatio = undefined; + } + if (caps && resolution && !caps.supportsResolution) { + ignoredOverrides.push({ key: "resolution", value: resolution }); + resolution = undefined; + } + + if (!normalization.size && size && params.size && params.size !== size) { + normalization.size = { + requested: params.size, + applied: size, + }; + } + if ( + !normalization.aspectRatio && + aspectRatio && + ((!params.aspectRatio && params.size) || params.aspectRatio !== aspectRatio) + ) { + normalization.aspectRatio = { + applied: aspectRatio, + ...(params.aspectRatio ? { requested: params.aspectRatio } : {}), + ...(!params.aspectRatio && params.size ? { derivedFrom: "size" } : {}), + }; + } + if ( + !normalization.resolution && + resolution && + params.resolution && + params.resolution !== resolution + ) { + normalization.resolution = { + requested: params.resolution, + applied: resolution, + }; + } + + const requestedDurationSeconds = + typeof params.durationSeconds === "number" && Number.isFinite(params.durationSeconds) + ? Math.max(1, Math.round(params.durationSeconds)) + : undefined; + const durationSeconds = normalizeVideoGenerationDuration({ + provider: params.provider, + model: params.model, + durationSeconds: requestedDurationSeconds, + inputImageCount: params.inputImageCount ?? 0, + inputVideoCount: params.inputVideoCount ?? 0, + }); + const supportedDurationSeconds = resolveVideoGenerationSupportedDurations({ + provider: params.provider, + model: params.model, + inputImageCount: params.inputImageCount ?? 0, + inputVideoCount: params.inputVideoCount ?? 0, + }); + + if ( + typeof requestedDurationSeconds === "number" && + typeof durationSeconds === "number" && + requestedDurationSeconds !== durationSeconds + ) { + normalization.durationSeconds = { + requested: requestedDurationSeconds, + applied: durationSeconds, + ...(supportedDurationSeconds?.length ? { supportedValues: supportedDurationSeconds } : {}), + }; + } + + return { + size, + aspectRatio, + resolution, + durationSeconds, + supportedDurationSeconds, + audio, + watermark, + ignoredOverrides, + normalization: + hasMediaNormalizationEntry(normalization.size) || + hasMediaNormalizationEntry(normalization.aspectRatio) || + hasMediaNormalizationEntry(normalization.resolution) || + hasMediaNormalizationEntry(normalization.durationSeconds) + ? normalization + : undefined, + }; +} diff --git a/src/video-generation/runtime.ts b/src/video-generation/runtime.ts index e5785986823..097baa03b09 100644 --- a/src/video-generation/runtime.ts +++ b/src/video-generation/runtime.ts @@ -6,20 +6,16 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; import { buildNoCapabilityModelConfiguredMessage, deriveAspectRatioFromSize, - resolveClosestAspectRatio, resolveCapabilityModelCandidates, throwCapabilityGenerationFailure, } from "../media-generation/runtime-shared.js"; -import { resolveVideoGenerationModeCapabilities } from "./capabilities.js"; -import { - normalizeVideoGenerationDuration, - resolveVideoGenerationSupportedDurations, -} from "./duration-support.js"; import { parseVideoGenerationModelRef } from "./model-ref.js"; +import { resolveVideoGenerationOverrides } from "./normalization.js"; import { getVideoGenerationProvider, listVideoGenerationProviders } from "./provider-registry.js"; import type { GeneratedVideoAsset, VideoGenerationIgnoredOverride, + VideoGenerationNormalization, VideoGenerationResolution, VideoGenerationResult, VideoGenerationSourceAsset, @@ -48,6 +44,7 @@ export type GenerateVideoRuntimeResult = { provider: string; model: string; attempts: FallbackAttempt[]; + normalization?: VideoGenerationNormalization; metadata?: Record; ignoredOverrides: VideoGenerationIgnoredOverride[]; }; @@ -64,87 +61,6 @@ export function listRuntimeVideoGenerationProviders(params?: { config?: OpenClaw return listVideoGenerationProviders(params?.config); } -function resolveProviderVideoGenerationOverrides(params: { - provider: NonNullable>; - size?: string; - aspectRatio?: string; - resolution?: VideoGenerationResolution; - audio?: boolean; - watermark?: boolean; - inputImageCount?: number; - inputVideoCount?: number; -}) { - const { capabilities: caps } = resolveVideoGenerationModeCapabilities({ - provider: params.provider, - inputImageCount: params.inputImageCount, - inputVideoCount: params.inputVideoCount, - }); - const ignoredOverrides: VideoGenerationIgnoredOverride[] = []; - let size = params.size; - let aspectRatio = params.aspectRatio; - let resolution = params.resolution; - let audio = params.audio; - let watermark = params.watermark; - - if (!caps) { - return { - size, - aspectRatio, - resolution, - audio, - watermark, - ignoredOverrides, - }; - } - - if (size && !caps.supportsSize) { - let translated = false; - if (caps.supportsAspectRatio) { - const normalizedAspectRatio = resolveClosestAspectRatio({ - requestedAspectRatio: aspectRatio, - requestedSize: size, - }); - if (normalizedAspectRatio) { - aspectRatio = normalizedAspectRatio; - translated = true; - } - } - if (!translated) { - ignoredOverrides.push({ key: "size", value: size }); - } - size = undefined; - } - - if (aspectRatio && !caps.supportsAspectRatio) { - ignoredOverrides.push({ key: "aspectRatio", value: aspectRatio }); - aspectRatio = undefined; - } - - if (resolution && !caps.supportsResolution) { - ignoredOverrides.push({ key: "resolution", value: resolution }); - resolution = undefined; - } - - if (typeof audio === "boolean" && !caps.supportsAudio) { - ignoredOverrides.push({ key: "audio", value: audio }); - audio = undefined; - } - - if (typeof watermark === "boolean" && !caps.supportsWatermark) { - ignoredOverrides.push({ key: "watermark", value: watermark }); - watermark = undefined; - } - - return { - size, - aspectRatio, - resolution, - audio, - watermark, - ignoredOverrides, - }; -} - export async function generateVideo( params: GenerateVideoParams, ): Promise { @@ -177,33 +93,18 @@ export async function generateVideo( } try { - const sanitized = resolveProviderVideoGenerationOverrides({ + const sanitized = resolveVideoGenerationOverrides({ provider, + model: candidate.model, size: params.size, aspectRatio: params.aspectRatio, resolution: params.resolution, + durationSeconds: params.durationSeconds, audio: params.audio, watermark: params.watermark, inputImageCount: params.inputImages?.length ?? 0, inputVideoCount: params.inputVideos?.length ?? 0, }); - const requestedDurationSeconds = - typeof params.durationSeconds === "number" && Number.isFinite(params.durationSeconds) - ? Math.max(1, Math.round(params.durationSeconds)) - : undefined; - const normalizedDurationSeconds = normalizeVideoGenerationDuration({ - provider, - model: candidate.model, - durationSeconds: requestedDurationSeconds, - inputImageCount: params.inputImages?.length ?? 0, - inputVideoCount: params.inputVideos?.length ?? 0, - }); - const supportedDurationSeconds = resolveVideoGenerationSupportedDurations({ - provider, - model: candidate.model, - inputImageCount: params.inputImages?.length ?? 0, - inputVideoCount: params.inputVideos?.length ?? 0, - }); const result: VideoGenerationResult = await provider.generateVideo({ provider: candidate.provider, model: candidate.model, @@ -214,7 +115,7 @@ export async function generateVideo( size: sanitized.size, aspectRatio: sanitized.aspectRatio, resolution: sanitized.resolution, - durationSeconds: normalizedDurationSeconds, + durationSeconds: sanitized.durationSeconds, audio: sanitized.audio, watermark: sanitized.watermark, inputImages: params.inputImages, @@ -228,37 +129,49 @@ export async function generateVideo( provider: candidate.provider, model: result.model ?? candidate.model, attempts, + normalization: sanitized.normalization, ignoredOverrides: sanitized.ignoredOverrides, metadata: { ...result.metadata, - ...((params.size && sanitized.aspectRatio && params.size !== sanitized.size) || - (params.aspectRatio && - sanitized.aspectRatio && - params.aspectRatio !== sanitized.aspectRatio) + ...(sanitized.normalization?.size?.requested !== undefined && + sanitized.normalization.size.applied !== undefined ? { - ...(params.size ? { requestedSize: params.size } : {}), - ...(params.aspectRatio ? { requestedAspectRatio: params.aspectRatio } : {}), - normalizedAspectRatio: sanitized.aspectRatio, - ...(params.size - ? { aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size) } + requestedSize: sanitized.normalization.size.requested, + normalizedSize: sanitized.normalization.size.applied, + } + : {}), + ...(sanitized.normalization?.aspectRatio?.applied !== undefined + ? { + ...(sanitized.normalization.aspectRatio.requested !== undefined + ? { requestedAspectRatio: sanitized.normalization.aspectRatio.requested } + : {}), + normalizedAspectRatio: sanitized.normalization.aspectRatio.applied, + ...(sanitized.normalization.aspectRatio.derivedFrom === "size" && params.size + ? { + requestedSize: params.size, + aspectRatioDerivedFromSize: deriveAspectRatioFromSize(params.size), + } : {}), } : {}), - ...(params.resolution && - sanitized.resolution && - params.resolution !== sanitized.resolution + ...(sanitized.normalization?.resolution?.requested !== undefined && + sanitized.normalization.resolution.applied !== undefined ? { - requestedResolution: params.resolution, - normalizedResolution: sanitized.resolution, + requestedResolution: sanitized.normalization.resolution.requested, + normalizedResolution: sanitized.normalization.resolution.applied, } : {}), - ...(typeof requestedDurationSeconds === "number" && - typeof normalizedDurationSeconds === "number" && - requestedDurationSeconds !== normalizedDurationSeconds + ...(sanitized.normalization?.durationSeconds?.requested !== undefined && + sanitized.normalization.durationSeconds.applied !== undefined ? { - requestedDurationSeconds, - normalizedDurationSeconds, - ...(supportedDurationSeconds ? { supportedDurationSeconds } : {}), + requestedDurationSeconds: sanitized.normalization.durationSeconds.requested, + normalizedDurationSeconds: sanitized.normalization.durationSeconds.applied, + ...(sanitized.normalization.durationSeconds.supportedValues?.length + ? { + supportedDurationSeconds: + sanitized.normalization.durationSeconds.supportedValues, + } + : {}), } : {}), }, diff --git a/src/video-generation/types.ts b/src/video-generation/types.ts index 8bcfd8a73f8..5d2442dbdea 100644 --- a/src/video-generation/types.ts +++ b/src/video-generation/types.ts @@ -1,5 +1,6 @@ import type { AuthProfileStore } from "../agents/auth-profiles.js"; import type { OpenClawConfig } from "../config/config.js"; +import type { MediaNormalizationEntry } from "../media-generation/runtime-shared.js"; export type GeneratedVideoAsset = { buffer: Buffer; @@ -61,6 +62,9 @@ export type VideoGenerationModeCapabilities = { maxDurationSeconds?: number; supportedDurationSeconds?: readonly number[]; supportedDurationSecondsByModel?: Readonly>; + sizes?: readonly string[]; + aspectRatios?: readonly string[]; + resolutions?: readonly VideoGenerationResolution[]; supportsSize?: boolean; supportsAspectRatio?: boolean; supportsResolution?: boolean; @@ -78,6 +82,13 @@ export type VideoGenerationProviderCapabilities = VideoGenerationModeCapabilitie videoToVideo?: VideoGenerationTransformCapabilities; }; +export type VideoGenerationNormalization = { + size?: MediaNormalizationEntry; + aspectRatio?: MediaNormalizationEntry; + resolution?: MediaNormalizationEntry; + durationSeconds?: MediaNormalizationEntry; +}; + export type VideoGenerationProvider = { id: string; aliases?: string[];