mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix(video): recover generation parameter fallbacks
This commit is contained in:
@@ -68,6 +68,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Update/restart: probe managed Gateway restarts with the service environment and add a Docker product lane that exercises candidate-owned `openclaw update --yes --json` restarts, so SecretRef-backed local gateway auth cannot regress behind mocked restart checks. Thanks @vincentkoc.
|
||||
- Webhooks/Gmail/Windows: resolve `gcloud`, `gog`, and `tailscale` PATH/PATHEXT shims before setup and watcher spawns, using the Windows-safe `.cmd` wrapper for long-lived `gog serve` processes. (#74881, fixes #54470) Thanks @Angfr95.
|
||||
- Video generation: accept provider-specific aspect-ratio and resolution hints at the tool boundary, normalize `720P` to MiniMax's supported `768P`, and stop sending Google `generateAudio` on Gemini video requests so provider fallback can recover from model-specific parameter differences. Thanks @vincentkoc.
|
||||
- Plugins/install: honor the beta update channel for onboarding and doctor-managed plugin installs by requesting floating npm and ClawHub specs with `@beta` while keeping persistent install records on the catalog default. Thanks @vincentkoc.
|
||||
- WhatsApp/onboarding: canonicalize setup and pairing allowlist entries to WhatsApp's digit-only phone ids while still accepting E.164, JID, and `whatsapp:` inputs, so personal-phone allowlists match WhatsApp Web sender ids after setup. Thanks @vincentkoc.
|
||||
- Gateway/startup: load provider plugins that own explicitly configured image, video, or music generation defaults so generation tools become live after gateway restart instead of remaining catalog-only. Fixes #77244. Thanks @buyuangtampan, @Nikoxx99, and @vincentkoc.
|
||||
|
||||
@@ -198,9 +198,9 @@ role or use `first_frame` for single-image image-to-video.
|
||||
### Style controls
|
||||
|
||||
<ParamField path="aspectRatio" type="string">
|
||||
`1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, or `adaptive`.
|
||||
Aspect-ratio hint such as `1:1`, `16:9`, `9:16`, `adaptive`, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.
|
||||
</ParamField>
|
||||
<ParamField path="resolution" type="string">`480P`, `720P`, `768P`, or `1080P`.</ParamField>
|
||||
<ParamField path="resolution" type="string">Resolution hint such as `480P`, `720P`, `768P`, `1080P`, `4K`, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.</ParamField>
|
||||
<ParamField path="durationSeconds" type="number">
|
||||
Target duration in seconds (rounded to nearest provider-supported value).
|
||||
</ParamField>
|
||||
|
||||
@@ -88,7 +88,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsSize: true,
|
||||
supportsAudio: true,
|
||||
supportsAudio: false,
|
||||
},
|
||||
imageToVideo: {
|
||||
enabled: true,
|
||||
@@ -101,7 +101,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsSize: true,
|
||||
supportsAudio: true,
|
||||
supportsAudio: false,
|
||||
},
|
||||
videoToVideo: {
|
||||
enabled: true,
|
||||
@@ -114,7 +114,7 @@ export function createGoogleVideoGenerationProviderMetadata(): Omit<
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
supportsSize: true,
|
||||
supportsAudio: true,
|
||||
supportsAudio: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -86,11 +86,11 @@ describe("google video generation provider", () => {
|
||||
durationSeconds: 4,
|
||||
aspectRatio: "16:9",
|
||||
resolution: "720p",
|
||||
generateAudio: true,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(request?.config).not.toHaveProperty("numberOfVideos");
|
||||
expect(request?.config).not.toHaveProperty("generateAudio");
|
||||
expect(result.videos).toHaveLength(1);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(createGoogleGenAIMock).toHaveBeenCalledWith(
|
||||
|
||||
@@ -322,7 +322,6 @@ async function generateGoogleVideoViaRest(params: {
|
||||
durationSeconds?: number;
|
||||
aspectRatio?: "16:9" | "9:16";
|
||||
resolution?: "720p" | "1080p";
|
||||
audio?: boolean;
|
||||
}): Promise<unknown> {
|
||||
let operation = await requestGoogleVideoJson({
|
||||
url: `${params.baseUrl}/${resolveGoogleVideoRestModelPath(params.model)}:predictLongRunning`,
|
||||
@@ -337,7 +336,6 @@ async function generateGoogleVideoViaRest(params: {
|
||||
: {}),
|
||||
...(params.aspectRatio ? { aspectRatio: params.aspectRatio } : {}),
|
||||
...(params.resolution ? { resolution: params.resolution } : {}),
|
||||
...(params.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
},
|
||||
});
|
||||
@@ -429,7 +427,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
|
||||
...(aspectRatio ? { aspectRatio } : {}),
|
||||
...(resolution ? { resolution } : {}),
|
||||
...(req.audio === true ? { generateAudio: true } : {}),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -446,7 +443,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
durationSeconds,
|
||||
aspectRatio,
|
||||
resolution,
|
||||
audio: req.audio,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -480,7 +476,6 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
|
||||
durationSeconds,
|
||||
aspectRatio,
|
||||
resolution,
|
||||
audio: req.audio,
|
||||
});
|
||||
generatedVideos = extractGeneratedVideos(operation);
|
||||
}
|
||||
|
||||
@@ -64,6 +64,7 @@ describe("minimax video generation provider", () => {
|
||||
prompt: "A fox sprints across snowy hills",
|
||||
cfg: {},
|
||||
durationSeconds: 5,
|
||||
resolution: "720P",
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
@@ -71,6 +72,7 @@ describe("minimax video generation provider", () => {
|
||||
url: "https://api.minimax.io/v1/video_generation",
|
||||
body: expect.objectContaining({
|
||||
duration: 6,
|
||||
resolution: "768P",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
@@ -25,6 +25,12 @@ const MINIMAX_MODEL_ALLOWED_DURATIONS: Readonly<Record<string, readonly number[]
|
||||
"MiniMax-Hailuo-2.3": [6, 10],
|
||||
"MiniMax-Hailuo-02": [6, 10],
|
||||
};
|
||||
const MINIMAX_MODEL_ALLOWED_RESOLUTIONS: Readonly<Record<string, readonly string[]>> = {
|
||||
"MiniMax-Hailuo-2.3": ["768P", "1080P"],
|
||||
"MiniMax-Hailuo-2.3-Fast": ["768P", "1080P"],
|
||||
"MiniMax-Hailuo-02": ["768P", "1080P"],
|
||||
};
|
||||
const MINIMAX_RESOLUTION_ORDER = ["480P", "720P", "768P", "1080P"] as const;
|
||||
|
||||
type MinimaxBaseResp = {
|
||||
status_code?: number;
|
||||
@@ -112,6 +118,43 @@ function resolveDurationSeconds(params: {
|
||||
);
|
||||
}
|
||||
|
||||
function resolveResolution(params: {
|
||||
model: string;
|
||||
resolution: string | undefined;
|
||||
}): string | undefined {
|
||||
const requested = normalizeOptionalString(params.resolution)?.toUpperCase();
|
||||
if (!requested) {
|
||||
return undefined;
|
||||
}
|
||||
const allowed = MINIMAX_MODEL_ALLOWED_RESOLUTIONS[params.model];
|
||||
if (!allowed || allowed.length === 0 || allowed.includes(requested)) {
|
||||
return requested;
|
||||
}
|
||||
const requestedIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
|
||||
requested as (typeof MINIMAX_RESOLUTION_ORDER)[number],
|
||||
);
|
||||
if (requestedIndex < 0) {
|
||||
return undefined;
|
||||
}
|
||||
return allowed.reduce((best, current) => {
|
||||
const currentIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
|
||||
current as (typeof MINIMAX_RESOLUTION_ORDER)[number],
|
||||
);
|
||||
const bestIndex = MINIMAX_RESOLUTION_ORDER.indexOf(
|
||||
best as (typeof MINIMAX_RESOLUTION_ORDER)[number],
|
||||
);
|
||||
if (currentIndex < 0) {
|
||||
return best;
|
||||
}
|
||||
if (bestIndex < 0) {
|
||||
return current;
|
||||
}
|
||||
return Math.abs(currentIndex - requestedIndex) < Math.abs(bestIndex - requestedIndex)
|
||||
? current
|
||||
: best;
|
||||
});
|
||||
}
|
||||
|
||||
async function pollMinimaxVideo(params: {
|
||||
taskId: string;
|
||||
headers: Headers;
|
||||
@@ -246,6 +289,7 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
|
||||
maxVideos: 1,
|
||||
maxDurationSeconds: 10,
|
||||
supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS,
|
||||
resolutions: ["768P", "1080P"],
|
||||
supportsResolution: true,
|
||||
supportsWatermark: false,
|
||||
},
|
||||
@@ -255,6 +299,7 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
|
||||
maxInputImages: 1,
|
||||
maxDurationSeconds: 10,
|
||||
supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS,
|
||||
resolutions: ["768P", "1080P"],
|
||||
supportsResolution: true,
|
||||
supportsWatermark: false,
|
||||
},
|
||||
@@ -303,8 +348,12 @@ function buildMinimaxVideoProvider(providerId: string): VideoGenerationProvider
|
||||
if (firstFrameImage) {
|
||||
body.first_frame_image = firstFrameImage;
|
||||
}
|
||||
if (req.resolution) {
|
||||
body.resolution = req.resolution;
|
||||
const resolution = resolveResolution({
|
||||
model,
|
||||
resolution: req.resolution,
|
||||
});
|
||||
if (resolution) {
|
||||
body.resolution = resolution;
|
||||
}
|
||||
const durationSeconds = resolveDurationSeconds({
|
||||
model,
|
||||
|
||||
@@ -1073,17 +1073,22 @@ describe("createVideoGenerateTool", () => {
|
||||
expect(generateSpy).toHaveBeenCalledWith(expect.objectContaining({ aspectRatio: "adaptive" }));
|
||||
});
|
||||
|
||||
it("rejects unsupported aspectRatio values", async () => {
|
||||
it("accepts provider-specific aspectRatio and resolution values and forwards them to the runtime", async () => {
|
||||
mockVideoPluginProvider();
|
||||
const generateSpy = mockSavedVideoResult();
|
||||
const tool = createVideoPluginTool();
|
||||
|
||||
await expect(
|
||||
tool.execute("call-1", {
|
||||
prompt: "lobster",
|
||||
await tool.execute("call-1", {
|
||||
prompt: "lobster",
|
||||
aspectRatio: "17:9",
|
||||
resolution: "draft-large",
|
||||
});
|
||||
|
||||
expect(generateSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
aspectRatio: "17:9",
|
||||
resolution: "draft-large",
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
"aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or adaptive",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -77,26 +77,6 @@ const log = createSubsystemLogger("agents/tools/video-generate");
|
||||
const MAX_INPUT_IMAGES = 9;
|
||||
const MAX_INPUT_VIDEOS = 4;
|
||||
const MAX_INPUT_AUDIOS = 3;
|
||||
const SUPPORTED_ASPECT_RATIOS = new Set([
|
||||
"1:1",
|
||||
"2:3",
|
||||
"3:2",
|
||||
"3:4",
|
||||
"4:3",
|
||||
"4:5",
|
||||
"5:4",
|
||||
"9:16",
|
||||
"16:9",
|
||||
"21:9",
|
||||
// Provider-specific sentinel: accepted at the tool boundary, then forwarded
|
||||
// to the active provider only if that provider declares "adaptive" in its
|
||||
// capabilities.aspectRatios list. Providers that do not declare it see the
|
||||
// value pushed into `ignoredOverrides` in the normalization layer so the
|
||||
// tool surfaces a user-visible "ignored override" warning rather than
|
||||
// silently dropping the request. Seedance uses this to auto-detect the
|
||||
// ratio from input image dimensions.
|
||||
"adaptive",
|
||||
]);
|
||||
|
||||
const VideoGenerateToolSchema = Type.Object({
|
||||
action: Type.Optional(
|
||||
@@ -184,12 +164,13 @@ const VideoGenerateToolSchema = Type.Object({
|
||||
aspectRatio: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
'Optional aspect ratio hint: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or "adaptive".',
|
||||
'Optional aspect ratio hint such as 1:1, 16:9, 9:16, "adaptive", or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.',
|
||||
}),
|
||||
),
|
||||
resolution: Type.Optional(
|
||||
Type.String({
|
||||
description: "Optional resolution hint: 480P, 720P, 768P, or 1080P.",
|
||||
description:
|
||||
"Optional resolution hint such as 480P, 720P, 768P, 1080P, 4K, or a provider-specific value. OpenClaw normalizes or ignores unsupported values per provider.",
|
||||
}),
|
||||
),
|
||||
durationSeconds: Type.Optional(
|
||||
@@ -254,19 +235,15 @@ function resolveAction(args: Record<string, unknown>): "generate" | "list" | "st
|
||||
}
|
||||
|
||||
function normalizeResolution(raw: string | undefined): VideoGenerationResolution | undefined {
|
||||
const normalized = raw?.trim().toUpperCase();
|
||||
const normalized = raw?.trim();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if (
|
||||
normalized === "480P" ||
|
||||
normalized === "720P" ||
|
||||
normalized === "768P" ||
|
||||
normalized === "1080P"
|
||||
) {
|
||||
return normalized;
|
||||
const uppercase = normalized.toUpperCase();
|
||||
if (/^\d+P$/.test(uppercase) || /^\d+K$/.test(uppercase)) {
|
||||
return uppercase;
|
||||
}
|
||||
throw new ToolInputError("resolution must be one of 480P, 720P, 768P, or 1080P");
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function normalizeAspectRatio(raw: string | undefined): string | undefined {
|
||||
@@ -274,12 +251,7 @@ function normalizeAspectRatio(raw: string | undefined): string | undefined {
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if (SUPPORTED_ASPECT_RATIOS.has(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
throw new ToolInputError(
|
||||
"aspectRatio must be one of 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9, or adaptive",
|
||||
);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -33,7 +33,7 @@ export type GeneratedVideoAsset = {
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P";
|
||||
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P" | (string & {});
|
||||
|
||||
/**
|
||||
* Canonical semantic role hints for reference assets (first/last frame,
|
||||
|
||||
@@ -16,6 +16,13 @@ import type {
|
||||
VideoGenerationResolution,
|
||||
} from "./types.js";
|
||||
|
||||
const VIDEO_RESOLUTION_ORDER: readonly VideoGenerationResolution[] = [
|
||||
"480P",
|
||||
"720P",
|
||||
"768P",
|
||||
"1080P",
|
||||
];
|
||||
|
||||
export type ResolvedVideoGenerationOverrides = {
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
@@ -138,12 +145,15 @@ export function resolveVideoGenerationOverrides(params: {
|
||||
const normalizedResolution = resolveClosestResolution({
|
||||
requestedResolution: resolution,
|
||||
supportedResolutions: caps.resolutions,
|
||||
order: VIDEO_RESOLUTION_ORDER,
|
||||
});
|
||||
if (normalizedResolution && normalizedResolution !== resolution) {
|
||||
normalization.resolution = {
|
||||
requested: resolution,
|
||||
applied: normalizedResolution,
|
||||
};
|
||||
} else if (!normalizedResolution) {
|
||||
ignoredOverrides.push({ key: "resolution", value: resolution });
|
||||
}
|
||||
resolution = normalizedResolution;
|
||||
} else if (resolution && !caps.supportsResolution) {
|
||||
|
||||
@@ -690,6 +690,91 @@ describe("video-generation runtime", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("normalizes video resolutions against provider-supported values", async () => {
|
||||
let seenResolution: string | undefined;
|
||||
providers = [
|
||||
{
|
||||
id: "minimax",
|
||||
capabilities: {
|
||||
generate: {
|
||||
supportsResolution: true,
|
||||
resolutions: ["768P", "1080P"],
|
||||
},
|
||||
},
|
||||
generateVideo: async (req) => {
|
||||
seenResolution = req.resolution;
|
||||
return {
|
||||
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
|
||||
model: "MiniMax-Hailuo-2.3",
|
||||
};
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await runGenerateVideo({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "minimax/MiniMax-Hailuo-2.3" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
prompt: "animate a lobster",
|
||||
resolution: "720P",
|
||||
});
|
||||
|
||||
expect(seenResolution).toBe("768P");
|
||||
expect(result.ignoredOverrides).toEqual([]);
|
||||
expect(result.normalization).toMatchObject({
|
||||
resolution: {
|
||||
requested: "720P",
|
||||
applied: "768P",
|
||||
},
|
||||
});
|
||||
expect(result.metadata).toMatchObject({
|
||||
requestedResolution: "720P",
|
||||
normalizedResolution: "768P",
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores unparseable video resolutions instead of sending them to providers", async () => {
|
||||
let seenResolution: string | undefined;
|
||||
providers = [
|
||||
{
|
||||
id: "minimax",
|
||||
capabilities: {
|
||||
generate: {
|
||||
supportsResolution: true,
|
||||
resolutions: ["768P", "1080P"],
|
||||
},
|
||||
},
|
||||
generateVideo: async (req) => {
|
||||
seenResolution = req.resolution;
|
||||
return {
|
||||
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
|
||||
model: "MiniMax-Hailuo-2.3",
|
||||
};
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const result = await runGenerateVideo({
|
||||
cfg: {
|
||||
agents: {
|
||||
defaults: {
|
||||
videoGenerationModel: { primary: "minimax/MiniMax-Hailuo-2.3" },
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
prompt: "animate a lobster",
|
||||
resolution: "4K",
|
||||
});
|
||||
|
||||
expect(seenResolution).toBeUndefined();
|
||||
expect(result.ignoredOverrides).toEqual([{ key: "resolution", value: "4K" }]);
|
||||
expect(result.normalization).toBeUndefined();
|
||||
});
|
||||
|
||||
it("uses mode-specific capabilities for image-to-video requests", async () => {
|
||||
let seenRequest:
|
||||
| {
|
||||
|
||||
@@ -14,7 +14,7 @@ export type GeneratedVideoAsset = {
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P";
|
||||
export type VideoGenerationResolution = "480P" | "720P" | "768P" | "1080P" | (string & {});
|
||||
|
||||
/**
|
||||
* Canonical semantic role hints for reference assets. The list covers the
|
||||
|
||||
Reference in New Issue
Block a user