mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-27 22:42:31 +00:00
fix: route openai video edits to edits endpoint
This commit is contained in:
@@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Codex app-server: restart the native app-server and retry once when server-side compaction times out, so preflight compaction stalls recover instead of failing every dispatch. (#85500)
|
||||
- Restore Control UI gateway token pairing [AI]. (#85459) Thanks @pgondhi987.
|
||||
- OpenAI video: honor configured provider request private-network opt-in for local/custom video endpoints so explicitly trusted mock and self-hosted providers are not blocked. Thanks @shakkernerd.
|
||||
- OpenAI video: send uploaded video edit requests to the documented `/videos/edits` endpoint with a `video` file instead of posting MP4 references to `/videos`. Thanks @shakkernerd.
|
||||
- CLI/update: repair managed npm plugin `openclaw` peer links during post-core convergence and reject stale or wrong-target peer links before restart. (#83794) Thanks @fuller-stack-dev.
|
||||
- CLI/agents: default new omitted-account bindings to all accounts when the channel has multiple configured accounts, and clarify account-scope docs. (#49769) Thanks @Gcaufy.
|
||||
- Codex app-server: let authorized `/codex` control commands such as `/codex detach` escape plugin-owned conversation bindings while keeping unknown or unauthorized slash text routed to the bound plugin. Fixes #85157. (#85188) Thanks @TurboTheTurtle.
|
||||
|
||||
@@ -552,7 +552,7 @@ request. Plugin dependencies are expected to be present before runtime load.
|
||||
- Current declared-but-skipped `videoToVideo` providers in the shared sweep:
|
||||
- `alibaba`, `qwen`, `xai` because those paths currently require remote `http(s)` / MP4 reference URLs
|
||||
- `google` because the current shared Gemini/Veo lane uses local buffer-backed input and that path is not accepted in the shared sweep
|
||||
- `openai` because the current shared lane lacks org-specific video inpaint/remix access guarantees
|
||||
- `openai` because the current shared lane lacks org-specific video edit access guarantees
|
||||
- Optional narrowing:
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="deepinfra,google,openai,runway"`
|
||||
- `OPENCLAW_LIVE_VIDEO_GENERATION_MODELS="google/veo-3.1-fast-generate-preview,openai/sora-2,runway/gen4_aleph"`
|
||||
|
||||
@@ -516,9 +516,13 @@ The bundled `openai` plugin registers video generation through the `video_genera
|
||||
| Default model | `openai/sora-2` |
|
||||
| Modes | Text-to-video, image-to-video, single-video edit |
|
||||
| Reference inputs | 1 image or 1 video |
|
||||
| Size overrides | Supported |
|
||||
| Size overrides | Supported for text-to-video and image-to-video |
|
||||
| Other overrides | `aspectRatio`, `resolution`, `audio`, `watermark` are ignored with a tool warning |
|
||||
|
||||
OpenAI image-to-video requests use `POST /v1/videos` with an image
|
||||
`input_reference`. Single-video edits use `POST /v1/videos/edits` with the
|
||||
uploaded video in the `video` field.
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
|
||||
@@ -137,22 +137,22 @@ runtime modes at runtime.
|
||||
The explicit mode contract used by `video_generate`, contract tests, and
|
||||
the shared live sweep:
|
||||
|
||||
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
|
||||
| ---------- | :--------: | :------------: | :------------: | ---------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| BytePlus | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| ComfyUI | ✓ | ✓ | - | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
|
||||
| DeepInfra | ✓ | - | - | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract |
|
||||
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
|
||||
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
|
||||
| MiniMax | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side inpaint/remix access |
|
||||
| OpenRouter | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
|
||||
| Together | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| Vydra | ✓ | ✓ | - | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
|
||||
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
|
||||
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
|
||||
| ---------- | :--------: | :------------: | :------------: | --------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| BytePlus | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| ComfyUI | ✓ | ✓ | - | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
|
||||
| DeepInfra | ✓ | - | - | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract |
|
||||
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
|
||||
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
|
||||
| MiniMax | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side video edit access |
|
||||
| OpenRouter | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
|
||||
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
|
||||
| Together | ✓ | ✓ | - | `generate`, `imageToVideo` |
|
||||
| Vydra | ✓ | ✓ | - | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
|
||||
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
|
||||
|
||||
## Tool parameters
|
||||
|
||||
|
||||
@@ -98,6 +98,16 @@ describe("openai video generation provider", () => {
|
||||
expectExplicitVideoGenerationCapabilities(buildOpenAIVideoGenerationProvider());
|
||||
});
|
||||
|
||||
it("does not claim size or duration controls for OpenAI video edits", () => {
|
||||
const provider = buildOpenAIVideoGenerationProvider();
|
||||
|
||||
expect(provider.capabilities.videoToVideo).toEqual({
|
||||
enabled: true,
|
||||
maxVideos: 1,
|
||||
maxInputVideos: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it("uses JSON for text-only Sora requests", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
@@ -440,7 +450,7 @@ describe("openai video generation provider", () => {
|
||||
expect(secondRelease).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("uses multipart input_reference for video-to-video uploads", async () => {
|
||||
it("uses the video edits endpoint for video-to-video uploads", async () => {
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
@@ -473,8 +483,13 @@ describe("openai video generation provider", () => {
|
||||
|
||||
expect(postJsonRequestMock).not.toHaveBeenCalled();
|
||||
const createRequest = postMultipartRequest();
|
||||
expect(createRequest.url).toBe("https://api.openai.com/v1/videos");
|
||||
expect(createRequest.url).toBe("https://api.openai.com/v1/videos/edits");
|
||||
expect(createRequest.body).toBeInstanceOf(FormData);
|
||||
const form = createRequest.body as FormData;
|
||||
expect(form.get("prompt")).toBe("Remix this clip");
|
||||
expect(form.get("model")).toBe("sora-2");
|
||||
expect(form.get("video")).toBeInstanceOf(File);
|
||||
expect(form.get("input_reference")).toBeNull();
|
||||
expect(createRequest.timeoutMs).toBe(120000);
|
||||
expect(createRequest.fetchFn).toBe(fetch);
|
||||
expect(createRequest.allowPrivateNetwork).toBe(false);
|
||||
@@ -523,7 +538,7 @@ describe("openai video generation provider", () => {
|
||||
|
||||
expect(postJsonRequestMock).not.toHaveBeenCalled();
|
||||
const createRequest = postMultipartRequest();
|
||||
expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos");
|
||||
expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos/edits");
|
||||
expect(createRequest.body).toBeInstanceOf(FormData);
|
||||
expect(createRequest.allowPrivateNetwork).toBe(true);
|
||||
expect(pollProviderOperationRequest().allowPrivateNetwork).toBe(true);
|
||||
|
||||
@@ -39,6 +39,13 @@ type OpenAIVideoRequestPolicy = {
|
||||
|
||||
type OpenAIVideoStatus = "queued" | "in_progress" | "completed" | "failed";
|
||||
|
||||
type OpenAIReferenceAsset = {
|
||||
kind: "image" | "video";
|
||||
file: File;
|
||||
buffer: Buffer;
|
||||
mimeType: string;
|
||||
};
|
||||
|
||||
type OpenAIVideoResponse = {
|
||||
id?: string;
|
||||
model?: string;
|
||||
@@ -99,7 +106,7 @@ function resolveSize(params: {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveReferenceAsset(req: VideoGenerationRequest) {
|
||||
function resolveReferenceAsset(req: VideoGenerationRequest): OpenAIReferenceAsset | null {
|
||||
const allAssets = [...(req.inputImages ?? []), ...(req.inputVideos ?? [])];
|
||||
if (allAssets.length === 0) {
|
||||
return null;
|
||||
@@ -113,15 +120,20 @@ function resolveReferenceAsset(req: VideoGenerationRequest) {
|
||||
"OpenAI video generation currently requires local image/video uploads for reference assets.",
|
||||
);
|
||||
}
|
||||
const kind = (req.inputVideos?.length ?? 0) > 0 ? "video" : "image";
|
||||
const mimeType =
|
||||
normalizeOptionalString(asset.mimeType) ||
|
||||
((req.inputVideos?.length ?? 0) > 0 ? "video/mp4" : "image/png");
|
||||
normalizeOptionalString(asset.mimeType) || (kind === "video" ? "video/mp4" : "image/png");
|
||||
const extension =
|
||||
extensionForMime(mimeType)?.slice(1) ?? (mimeType.startsWith("video/") ? "mp4" : "png");
|
||||
const fileName =
|
||||
normalizeOptionalString(asset.fileName) ||
|
||||
`${(req.inputVideos?.length ?? 0) > 0 ? "reference-video" : "reference-image"}.${extension}`;
|
||||
return new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType });
|
||||
`${kind === "video" ? "reference-video" : "reference-image"}.${extension}`;
|
||||
return {
|
||||
kind,
|
||||
file: new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType }),
|
||||
buffer: asset.buffer,
|
||||
mimeType,
|
||||
};
|
||||
}
|
||||
|
||||
async function pollOpenAIVideo(
|
||||
@@ -285,10 +297,6 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
enabled: true,
|
||||
maxVideos: 1,
|
||||
maxInputVideos: 1,
|
||||
maxDurationSeconds: 12,
|
||||
supportedDurationSeconds: OPENAI_VIDEO_SECONDS,
|
||||
supportsSize: true,
|
||||
sizes: OPENAI_VIDEO_SIZES,
|
||||
},
|
||||
},
|
||||
async generateVideo(req) {
|
||||
@@ -328,16 +336,14 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
aspectRatio: req.aspectRatio,
|
||||
resolution: req.resolution,
|
||||
});
|
||||
const inputImage = req.inputImages?.[0];
|
||||
const referenceAsset = resolveReferenceAsset(req);
|
||||
const requestUrl = `${baseUrl}/videos`;
|
||||
const requestResult = referenceAsset
|
||||
? inputImage?.buffer
|
||||
? referenceAsset.kind === "image"
|
||||
? await (() => {
|
||||
const jsonHeaders = new Headers(headers);
|
||||
jsonHeaders.set("Content-Type", "application/json");
|
||||
return postJsonRequest({
|
||||
url: requestUrl,
|
||||
url: `${baseUrl}/videos`,
|
||||
headers: jsonHeaders,
|
||||
body: {
|
||||
prompt: req.prompt,
|
||||
@@ -345,10 +351,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
...(seconds ? { seconds } : {}),
|
||||
...(size ? { size } : {}),
|
||||
input_reference: {
|
||||
image_url: toOpenAIDataUrl(
|
||||
inputImage.buffer,
|
||||
normalizeOptionalString(inputImage.mimeType) ?? "image/png",
|
||||
),
|
||||
image_url: toOpenAIDataUrl(referenceAsset.buffer, referenceAsset.mimeType),
|
||||
},
|
||||
},
|
||||
timeoutMs: resolveProviderOperationTimeoutMs({
|
||||
@@ -364,17 +367,11 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
const form = new FormData();
|
||||
form.set("prompt", req.prompt);
|
||||
form.set("model", model);
|
||||
if (seconds) {
|
||||
form.set("seconds", seconds);
|
||||
}
|
||||
if (size) {
|
||||
form.set("size", size);
|
||||
}
|
||||
form.set("input_reference", referenceAsset);
|
||||
form.set("video", referenceAsset.file);
|
||||
const multipartHeaders = new Headers(headers);
|
||||
multipartHeaders.delete("Content-Type");
|
||||
return postMultipartRequest({
|
||||
url: requestUrl,
|
||||
url: `${baseUrl}/videos/edits`,
|
||||
headers: multipartHeaders,
|
||||
body: form,
|
||||
timeoutMs: resolveProviderOperationTimeoutMs({
|
||||
@@ -390,7 +387,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
|
||||
const jsonHeaders = new Headers(headers);
|
||||
jsonHeaders.set("Content-Type", "application/json");
|
||||
return postJsonRequest({
|
||||
url: requestUrl,
|
||||
url: `${baseUrl}/videos`,
|
||||
headers: jsonHeaders,
|
||||
body: {
|
||||
prompt: req.prompt,
|
||||
|
||||
Reference in New Issue
Block a user