fix(video): recover generation parameter fallbacks

This commit is contained in:
Vincent Koc
2026-05-04 22:26:10 -07:00
parent f126f72d63
commit 177167c846
13 changed files with 177 additions and 58 deletions

View File

@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
- Update/restart: probe managed Gateway restarts with the service environment and add a Docker product lane that exercises candidate-owned `openclaw update --yes --json` restarts, so SecretRef-backed local gateway auth cannot regress behind mocked restart checks. Thanks @vincentkoc.
- Webhooks/Gmail/Windows: resolve `gcloud`, `gog`, and `tailscale` PATH/PATHEXT shims before setup and watcher spawns, using the Windows-safe `.cmd` wrapper for long-lived `gog serve` processes. (#74881, fixes #54470) Thanks @Angfr95.
- Video generation: accept provider-specific aspect-ratio and resolution hints at the tool boundary, normalize `720P` to MiniMax's supported `768P`, and stop sending Google `generateAudio` on Gemini video requests so provider fallback can recover from model-specific parameter differences. Thanks @vincentkoc.
- Plugins/install: honor the beta update channel for onboarding and doctor-managed plugin installs by requesting floating npm and ClawHub specs with `@beta` while keeping persistent install records on the catalog default. Thanks @vincentkoc.
- WhatsApp/onboarding: canonicalize setup and pairing allowlist entries to WhatsApp's digit-only phone ids while still accepting E.164, JID, and `whatsapp:` inputs, so personal-phone allowlists match WhatsApp Web sender ids after setup. Thanks @vincentkoc.
- Gateway/startup: load provider plugins that own explicitly configured image, video, or music generation defaults so generation tools become live after gateway restart instead of remaining catalog-only. Fixes #77244. Thanks @buyuangtampan, @Nikoxx99, and @vincentkoc.

View File

@@ -198,9 +198,9 @@ role or use `first_frame` for single-image image-to-video.
### Style controls
<ParamField path="aspectRatio" type="string">
`1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, or `adaptive`.
Aspect-ratio hint such as `1:1`, `16:9`, `9:16`, `adaptive`, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.
</ParamField>
<ParamField path="resolution" type="string">`480P`, `720P`, `768P`, or `1080P`.</ParamField>
<ParamField path="resolution" type="string">Resolution hint such as `480P`, `720P`, `768P`, `1080P`, `4K`, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.</ParamField>
<ParamField path="durationSeconds" type="number">
Target duration in seconds (rounded to nearest provider-supported value).
</ParamField>

View File

@@ -88,7 +88,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
supportsAudio: false,
},
imageToVideo: {
enabled: true,
@@ -101,7 +101,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
supportsAudio: false,
},
videoToVideo: {
enabled: true,
@@ -114,7 +114,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,
supportsAudio: true,
supportsAudio: false,
},
},
};

View File

@@ -86,11 +86,11 @@ describe("google video generation provider", () => {
durationSeconds: 4,
aspectRatio: "16:9",
resolution: "720p",
generateAudio: true,
}),
}),
);
expect(request?.config).not.toHaveProperty("numberOfVideos");
expect(request?.config).not.toHaveProperty("generateAudio");
expect(result.videos).toHaveLength(1);
expect(result.videos[0]?.mimeType).toBe("video/mp4");
expect(createGoogleGenAIMock).toHaveBeenCalledWith(

View File

@@ -322,7 +322,6 @@ async function generateGoogleVideoViaRest(params: {
durationSeconds?: number;
aspectRatio?: "16:9" | "9:16";
resolution?: "720p" | "1080p";
audio?: boolean;
}): Promise<unknown> {
let operation = await requestGoogleVideoJson({
url: `${params.baseUrl}/${resolveGoogleVideoRestModelPath(params.model)}:predictLongRunning`,
@@ -337,7 +336,6 @@ async function generateGoogleVideoViaRest(params: {
: {}),
...(params.aspectRatio ? { aspectRatio: params.aspectRatio } : {}),
...(params.resolution ? { resolution: params.resolution } : {}),
...(params.audio === true ? { generateAudio: true } : {}),
},
},
});
@@ -429,7 +427,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
...(aspectRatio ? { aspectRatio } : {}),
...(resolution ? { resolution } : {}),
...(req.audio === true ? { generateAudio: true } : {}),
},
});
} catch (error) {
@@ -446,7 +443,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
durationSeconds,
aspectRatio,
resolution,
audio: req.audio,
});
}
@@ -480,7 +476,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
durationSeconds,
aspectRatio,
resolution,
audio: req.audio,
});
generatedVideos = extractGeneratedVideos(operation);
}

View File

@@ -64,6 +64,7 @@ describe("minimax video generation provider", () => {
prompt: "A fox sprints across snowy hills",
cfg: {},
durationSeconds: 5,
resolution: "720P",
});
expect(postJsonRequestMock).toHaveBeenCalledWith(
@@ -71,6 +72,7 @@ describe("minimax video generation provider", () => {
url: "https://api.minimax.io/v1/video_generation",
body: expect.objectContaining({
duration: 6,
resolution: "768P",
}),
}),
);

View File

@@ -25,6 +25,12 @@ const MINIMAX_MODEL_ALLOWED_DURATIONS: Readonly<Record<string, readonly number[]
"MiniMax-Hailuo-2.3": [6, 10],
"MiniMax-Hailuo-02": [6, 10],
};
const MINIMAX_MODEL_ALLOWED_RESOLUTIONS: Readonly<Record<string, readonly string[]>> = {
"MiniMax-Hailuo-2.3": ["768P", "1080P"],
"MiniMax-Hailuo-2.3-Fast": ["768P", "1080P"],
"MiniMax-Hailuo-02": ["768P", "1080P"],
};
const MINIMAX_RESOLUTION_ORDER = ["480P", "720P", "768P", "1080P"] as const;
type MinimaxBaseResp = {
status_code?: number;
@@ -112,6 +118,43 @@ function resolveDurationSeconds(params: {
);
}
function resolveResolution(params: {
model: string;
resolution: string | undefined;
}): string | undefined {
const requested = normalizeOptionalString(params.resolution)?.toUpperCase();
if (!requested) {
return undefined;
}
const allowed = MINIMAX_MODEL_ALLOWED_RESOLUTIONS[params.model];
if (!allowed || allowed.length === 0 || allowed.includes(requested)) {
return requested;
}
const requestedIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
requested as (typeof MINIMAX_RESOLUTION_ORDER)[number],
);
if (requestedIndex < 0) {
return undefined;
}
return allowed.reduce((best, current) => {
const currentIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
current as (typeof MINIMAX_RESOLUTION_ORDER)[number],
);
const bestIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
best as (typeof MINIMAX_RESOLUTION_ORDER)[number],
);
if (currentIndex < 0) {
return best;
}
if (bestIndex < 0) {
return current;
}
return Math.abs(currentIndex - requestedIndex) < Math.abs(bestIndex - requestedIndex)
? current
: best;
});
}
async function pollMinimaxVideo(params: {
taskId: string;
headers: Headers;
@@ -246,6 +289,7 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
maxVideos: 1,
maxDurationSeconds: 10,
supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS,
resolutions: ["768P", "1080P"],
supportsResolution: true,
supportsWatermark: false,
},
@@ -255,6 +299,7 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
maxInputImages: 1,
maxDurationSeconds: 10,
supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS,
resolutions: ["768P", "1080P"],
supportsResolution: true,
supportsWatermark: false,
},
@@ -303,8 +348,12 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
if (firstFrameImage) {
body.first_frame_image = firstFrameImage;
}
if (req.resolution) {
body.resolution = req.resolution;
const resolution = resolveResolution({
model,
resolution: req.resolution,
});
if (resolution) {
body.resolution = resolution;
}
const durationSeconds = resolveDurationSeconds({
model,

View File

@@ -1073,17 +1073,22 @@ describe("createVideoGenerateTool", () => {
expect(generateSpy).toHaveBeenCalledWith(expect.objectContaining({ aspectRatio: "adaptive" }));
});
it("rejects unsupported aspectRatio values", async () => {
it("accepts provider-specific aspectRatio and resolution values and forwards them to the runtime", async () => {
mockVideoPluginProvider();
const generateSpy = mockSavedVideoResult();
const tool = createVideoPluginTool();
await expect(
tool.execute("call-1", {
prompt: "lobster",
await tool.execute("call-1", {
prompt: "lobster",
aspectRatio: "17:9",
resolution: "draft-large",
});
expect(generateSpy).toHaveBeenCalledWith(
expect.objectContaining({
aspectRatio: "17:9",
resolution: "draft-large",
}),
).rejects.toThrow(
"aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or adaptive",
);
});
});

View File

@@ -77,26 +77,6 @@ const log = createSubsystemLogger("agents/tools/video-generate");
const MAX_INPUT_IMAGES = 9;
const MAX_INPUT_VIDEOS = 4;
const MAX_INPUT_AUDIOS = 3;
const SUPPORTED_ASPECT_RATIOS = new Set([
"1:1",
"2:3",
"3:2",
"3:4",
"4:3",
"4:5",
"5:4",
"9:16",
"16:9",
"21:9",
// Provider-specific sentinel: accepted at the tool boundary, then forwarded
// to the active provider only if that provider declares "adaptive" in its
// capabilities.aspectRatios list. Providers that do not declare it see the
// value pushed into `ignoredOverrides` in the normalization layer so the
// tool surfaces a user-visible "ignored override" warning rather than
// silently dropping the request. Seedance uses this to auto-detect the
// ratio from input image dimensions.
"adaptive",
]);
const VideoGenerateToolSchema = Type.Object({
action: Type.Optional(
@@ -184,12 +164,13 @@ const VideoGenerateToolSchema = Type.Object({
aspectRatio: Type.Optional(
Type.String({
description:
'Optional aspect ratio hint: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or "adaptive".',
'Optional aspect ratio hint such as 1:1, 16:9, 9:16, "adaptive", or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.',
}),
),
resolution: Type.Optional(
Type.String({
description: "Optional resolution hint: 480P, 720P, 768P, or 1080P.",
description:
"Optional resolution hint such as 480P, 720P, 768P, 1080P, 4K, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.",
}),
),
durationSeconds: Type.Optional(
@@ -254,19 +235,15 @@ function resolveAction(args: Record<string, unknown>): "generate" | "list" | "st
}
function normalizeResolution(raw: string | undefined): VideoGenerationResolution | undefined {
const normalized = raw?.trim().toUpperCase();
const normalized = raw?.trim();
if (!normalized) {
return undefined;
}
if (
normalized === "480P" ||
normalized === "720P" ||
normalized === "768P" ||
normalized === "1080P"
) {
return normalized;
const uppercase = normalized.toUpperCase();
if (/^\d+P$/.test(uppercase) || /^\d+K$/.test(uppercase)) {
return uppercase;
}
throw new ToolInputError("resolution must be one of 480P, 720P, 768P, or 1080P");
return normalized;
}
function normalizeAspectRatio(raw: string | undefined): string | undefined {
@@ -274,12 +251,7 @@ function normalizeAspectRatio(raw: string | undefined): string | undefined {
if (!normalized) {
return undefined;
}
if (SUPPORTED_ASPECT_RATIOS.has(normalized)) {
return normalized;
}
throw new ToolInputError(
"aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or adaptive",
);
return normalized;
}
/**

View File

@@ -33,7 +33,7 @@ export type GeneratedVideoAsset = {
metadata?: Record<string, unknown>;
};
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P";
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P" | (string & {});
/**
* Canonical semantic role hints for reference assets (first/last frame,

View File

@@ -16,6 +16,13 @@ import type {
VideoGenerationResolution,
} from "./types.js";
const VIDEO_RESOLUTION_ORDER: readonly VideoGenerationResolution[] = [
"480P",
"720P",
"768P",
"1080P",
];
export type ResolvedVideoGenerationOverrides = {
size?: string;
aspectRatio?: string;
@@ -138,12 +145,15 @@ export function resolveVideoGenerationOverrides(params: {
const normalizedResolution = resolveClosestResolution({
requestedResolution: resolution,
supportedResolutions: caps.resolutions,
order: VIDEO_RESOLUTION_ORDER,
});
if (normalizedResolution && normalizedResolution !== resolution) {
normalization.resolution = {
requested: resolution,
applied: normalizedResolution,
};
} else if (!normalizedResolution) {
ignoredOverrides.push({ key: "resolution", value: resolution });
}
resolution = normalizedResolution;
} else if (resolution && !caps.supportsResolution) {

View File

@@ -690,6 +690,91 @@ describe("video-generation runtime", () => {
]);
});
it("normalizes video resolutions against provider-supported values", async () => {
let seenResolution: string | undefined;
providers = [
{
id: "minimax",
capabilities: {
generate: {
supportsResolution: true,
resolutions: ["768P", "1080P"],
},
},
generateVideo: async (req) => {
seenResolution = req.resolution;
return {
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
model: "MiniMax-Hailuo-2.3",
};
},
},
];
const result = await runGenerateVideo({
cfg: {
agents: {
defaults: {
videoGenerationModel: { primary: "minimax/MiniMax-Hailuo-2.3" },
},
},
} as OpenClawConfig,
prompt: "animate a lobster",
resolution: "720P",
});
expect(seenResolution).toBe("768P");
expect(result.ignoredOverrides).toEqual([]);
expect(result.normalization).toMatchObject({
resolution: {
requested: "720P",
applied: "768P",
},
});
expect(result.metadata).toMatchObject({
requestedResolution: "720P",
normalizedResolution: "768P",
});
});
it("ignores unparseable video resolutions instead of sending them to providers", async () => {
let seenResolution: string | undefined;
providers = [
{
id: "minimax",
capabilities: {
generate: {
supportsResolution: true,
resolutions: ["768P", "1080P"],
},
},
generateVideo: async (req) => {
seenResolution = req.resolution;
return {
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
model: "MiniMax-Hailuo-2.3",
};
},
},
];
const result = await runGenerateVideo({
cfg: {
agents: {
defaults: {
videoGenerationModel: { primary: "minimax/MiniMax-Hailuo-2.3" },
},
},
} as OpenClawConfig,
prompt: "animate a lobster",
resolution: "4K",
});
expect(seenResolution).toBeUndefined();
expect(result.ignoredOverrides).toEqual([{ key: "resolution", value: "4K" }]);
expect(result.normalization).toBeUndefined();
});
it("uses mode-specific capabilities for image-to-video requests", async () => {
let seenRequest:
| {

View File

@@ -14,7 +14,7 @@ export type GeneratedVideoAsset = {
metadata?: Record<string, unknown>;
};
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P";
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P" | (string & {});
/**
* Canonical semantic role hints for reference assets. The list covers the