fix: route openai video edits to edits endpoint

This commit is contained in:
Shakker
2026-05-23 00:50:33 +01:00
committed by Shakker
parent 227b4bffee
commit 6c3fcb8bfc
6 changed files with 64 additions and 47 deletions

View File

@@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai
- Codex app-server: restart the native app-server and retry once when server-side compaction times out, so preflight compaction stalls recover instead of failing every dispatch. (#85500)
- Restore Control UI gateway token pairing [AI]. (#85459) Thanks @pgondhi987.
- OpenAI video: honor configured provider request private-network opt-in for local/custom video endpoints so explicitly trusted mock and self-hosted providers are not blocked. Thanks @shakkernerd.
- OpenAI video: send uploaded video edit requests to the documented `/videos/edits` endpoint with a `video` file instead of posting MP4 references to `/videos`. Thanks @shakkernerd.
- CLI/update: repair managed npm plugin `openclaw` peer links during post-core convergence and reject stale or wrong-target peer links before restart. (#83794) Thanks @fuller-stack-dev.
- CLI/agents: default new omitted-account bindings to all accounts when the channel has multiple configured accounts, and clarify account-scope docs. (#49769) Thanks @Gcaufy.
- Codex app-server: let authorized `/codex` control commands such as `/codex detach` escape plugin-owned conversation bindings while keeping unknown or unauthorized slash text routed to the bound plugin. Fixes #85157. (#85188) Thanks @TurboTheTurtle.

View File

@@ -552,7 +552,7 @@ request. Plugin dependencies are expected to be present before runtime load.
- Current declared-but-skipped `videoToVideo` providers in the shared sweep:
- `alibaba`, `qwen`, `xai` because those paths currently require remote `http(s)` / MP4 reference URLs
- `google` because the current shared Gemini/Veo lane uses local buffer-backed input and that path is not accepted in the shared sweep
- `openai` because the current shared lane lacks org-specific video inpaint/remix access guarantees
- `openai` because the current shared lane lacks org-specific video edit access guarantees
- Optional narrowing:
- `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="deepinfra,google,openai,runway"`
- `OPENCLAW_LIVE_VIDEO_GENERATION_MODELS="google/veo-3.1-fast-generate-preview,openai/sora-2,runway/gen4_aleph"`

View File

@@ -516,9 +516,13 @@ The bundled `openai` plugin registers video generation through the `video_genera
| Default model | `openai/sora-2` |
| Modes | Text-to-video, image-to-video, single-video edit |
| Reference inputs | 1 image or 1 video |
| Size overrides | Supported |
| Size overrides | Supported for text-to-video and image-to-video |
| Other overrides | `aspectRatio`, `resolution`, `audio`, `watermark` are ignored with a tool warning |
OpenAI image-to-video requests use `POST /v1/videos` with an image
`input_reference`. Single-video edits use `POST /v1/videos/edits` with the
uploaded video in the `video` field.
```json5
{
agents: {

View File

@@ -137,22 +137,22 @@ runtime modes at runtime.
The explicit mode contract used by `video_generate`, contract tests, and
the shared live sweep:
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
| ---------- | :--------: | :------------: | :------------: | ---------------------------------------------------------------------------------------------------------------------------------------- |
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
| BytePlus | ✓ | ✓ | - | `generate`, `imageToVideo` |
| ComfyUI | ✓ | ✓ | - | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
| DeepInfra | ✓ | - | - | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract |
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
| MiniMax | ✓ | ✓ | - | `generate`, `imageToVideo` |
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side inpaint/remix access |
| OpenRouter | ✓ | ✓ | - | `generate`, `imageToVideo` |
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
| Together | ✓ | ✓ | - | `generate`, `imageToVideo` |
| Vydra | ✓ | ✓ | - | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
| Provider | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today |
| ---------- | :--------: | :------------: | :------------: | --------------------------------------------------------------------------------------------------------------------------------------- |
| Alibaba | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
| BytePlus | ✓ | ✓ | - | `generate`, `imageToVideo` |
| ComfyUI | ✓ | ✓ | - | Not in the shared sweep; workflow-specific coverage lives with Comfy tests |
| DeepInfra | ✓ | - | - | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract |
| fal | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video |
| Google | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
| MiniMax | ✓ | ✓ | - | `generate`, `imageToVideo` |
| OpenAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side video edit access |
| OpenRouter | ✓ | ✓ | - | `generate`, `imageToVideo` |
| Qwen | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs |
| Runway | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph` |
| Together | ✓ | ✓ | - | `generate`, `imageToVideo` |
| Vydra | ✓ | ✓ | - | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL |
| xAI | ✓ | ✓ | ✓ | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL |
## Tool parameters

View File

@@ -98,6 +98,16 @@ describe("openai video generation provider", () => {
expectExplicitVideoGenerationCapabilities(buildOpenAIVideoGenerationProvider());
});
it("does not claim size or duration controls for OpenAI video edits", () => {
const provider = buildOpenAIVideoGenerationProvider();
expect(provider.capabilities.videoToVideo).toEqual({
enabled: true,
maxVideos: 1,
maxInputVideos: 1,
});
});
it("uses JSON for text-only Sora requests", async () => {
postJsonRequestMock.mockResolvedValue({
response: {
@@ -440,7 +450,7 @@ describe("openai video generation provider", () => {
expect(secondRelease).toHaveBeenCalledTimes(1);
});
it("uses multipart input_reference for video-to-video uploads", async () => {
it("uses the video edits endpoint for video-to-video uploads", async () => {
fetchWithTimeoutMock
.mockResolvedValueOnce({
ok: true,
@@ -473,8 +483,13 @@ describe("openai video generation provider", () => {
expect(postJsonRequestMock).not.toHaveBeenCalled();
const createRequest = postMultipartRequest();
expect(createRequest.url).toBe("https://api.openai.com/v1/videos");
expect(createRequest.url).toBe("https://api.openai.com/v1/videos/edits");
expect(createRequest.body).toBeInstanceOf(FormData);
const form = createRequest.body as FormData;
expect(form.get("prompt")).toBe("Remix this clip");
expect(form.get("model")).toBe("sora-2");
expect(form.get("video")).toBeInstanceOf(File);
expect(form.get("input_reference")).toBeNull();
expect(createRequest.timeoutMs).toBe(120000);
expect(createRequest.fetchFn).toBe(fetch);
expect(createRequest.allowPrivateNetwork).toBe(false);
@@ -523,7 +538,7 @@ describe("openai video generation provider", () => {
expect(postJsonRequestMock).not.toHaveBeenCalled();
const createRequest = postMultipartRequest();
expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos");
expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos/edits");
expect(createRequest.body).toBeInstanceOf(FormData);
expect(createRequest.allowPrivateNetwork).toBe(true);
expect(pollProviderOperationRequest().allowPrivateNetwork).toBe(true);

View File

@@ -39,6 +39,13 @@ type OpenAIVideoRequestPolicy = {
type OpenAIVideoStatus = "queued" | "in_progress" | "completed" | "failed";
type OpenAIReferenceAsset = {
kind: "image" | "video";
file: File;
buffer: Buffer;
mimeType: string;
};
type OpenAIVideoResponse = {
id?: string;
model?: string;
@@ -99,7 +106,7 @@ function resolveSize(params: {
return undefined;
}
function resolveReferenceAsset(req: VideoGenerationRequest) {
function resolveReferenceAsset(req: VideoGenerationRequest): OpenAIReferenceAsset | null {
const allAssets = [...(req.inputImages ?? []), ...(req.inputVideos ?? [])];
if (allAssets.length === 0) {
return null;
@@ -113,15 +120,20 @@ function resolveReferenceAsset(req: VideoGenerationRequest) {
"OpenAI video generation currently requires local image/video uploads for reference assets.",
);
}
const kind = (req.inputVideos?.length ?? 0) > 0 ? "video" : "image";
const mimeType =
normalizeOptionalString(asset.mimeType) ||
((req.inputVideos?.length ?? 0) > 0 ? "video/mp4" : "image/png");
normalizeOptionalString(asset.mimeType) || (kind === "video" ? "video/mp4" : "image/png");
const extension =
extensionForMime(mimeType)?.slice(1) ?? (mimeType.startsWith("video/") ? "mp4" : "png");
const fileName =
normalizeOptionalString(asset.fileName) ||
`${(req.inputVideos?.length ?? 0) > 0 ? "reference-video" : "reference-image"}.${extension}`;
return new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType });
`${kind === "video" ? "reference-video" : "reference-image"}.${extension}`;
return {
kind,
file: new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType }),
buffer: asset.buffer,
mimeType,
};
}
async function pollOpenAIVideo(
@@ -285,10 +297,6 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
enabled: true,
maxVideos: 1,
maxInputVideos: 1,
maxDurationSeconds: 12,
supportedDurationSeconds: OPENAI_VIDEO_SECONDS,
supportsSize: true,
sizes: OPENAI_VIDEO_SIZES,
},
},
async generateVideo(req) {
@@ -328,16 +336,14 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
aspectRatio: req.aspectRatio,
resolution: req.resolution,
});
const inputImage = req.inputImages?.[0];
const referenceAsset = resolveReferenceAsset(req);
const requestUrl = `${baseUrl}/videos`;
const requestResult = referenceAsset
? inputImage?.buffer
? referenceAsset.kind === "image"
? await (() => {
const jsonHeaders = new Headers(headers);
jsonHeaders.set("Content-Type", "application/json");
return postJsonRequest({
url: requestUrl,
url: `${baseUrl}/videos`,
headers: jsonHeaders,
body: {
prompt: req.prompt,
@@ -345,10 +351,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
...(seconds ? { seconds } : {}),
...(size ? { size } : {}),
input_reference: {
image_url: toOpenAIDataUrl(
inputImage.buffer,
normalizeOptionalString(inputImage.mimeType) ?? "image/png",
),
image_url: toOpenAIDataUrl(referenceAsset.buffer, referenceAsset.mimeType),
},
},
timeoutMs: resolveProviderOperationTimeoutMs({
@@ -364,17 +367,11 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
const form = new FormData();
form.set("prompt", req.prompt);
form.set("model", model);
if (seconds) {
form.set("seconds", seconds);
}
if (size) {
form.set("size", size);
}
form.set("input_reference", referenceAsset);
form.set("video", referenceAsset.file);
const multipartHeaders = new Headers(headers);
multipartHeaders.delete("Content-Type");
return postMultipartRequest({
url: requestUrl,
url: `${baseUrl}/videos/edits`,
headers: multipartHeaders,
body: form,
timeoutMs: resolveProviderOperationTimeoutMs({
@@ -390,7 +387,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
const jsonHeaders = new Headers(headers);
jsonHeaders.set("Content-Type", "application/json");
return postJsonRequest({
url: requestUrl,
url: `${baseUrl}/videos`,
headers: jsonHeaders,
body: {
prompt: req.prompt,