From 6c3fcb8bfca11176208e9fbed7e7b7eb14e30d5b Mon Sep 17 00:00:00 2001
From: Shakker <shakkerdroid@gmail.com>
Date: Sat, 23 May 2026 00:50:33 +0100
Subject: [PATCH] fix: route openai video edits to edits endpoint

---
 CHANGELOG.md                                  |  1 +
 docs/help/testing-live.md                     |  2 +-
 docs/providers/openai.md                      |  6 ++-
 docs/tools/video-generation.md                | 32 ++++++------
 .../openai/video-generation-provider.test.ts  | 21 ++++++--
 .../openai/video-generation-provider.ts       | 49 +++++++++----------
 6 files changed, 64 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a200e0fcf92..5fb2ffa4d1f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai
 - Codex app-server: restart the native app-server and retry once when server-side compaction times out, so preflight compaction stalls recover instead of failing every dispatch. (#85500)
 - Restore Control UI gateway token pairing [AI]. (#85459) Thanks @pgondhi987.
 - OpenAI video: honor configured provider request private-network opt-in for local/custom video endpoints so explicitly trusted mock and self-hosted providers are not blocked. Thanks @shakkernerd.
+- OpenAI video: send uploaded video edit requests to the documented `/videos/edits` endpoint with a `video` file instead of posting MP4 references to `/videos`. Thanks @shakkernerd.
 - CLI/update: repair managed npm plugin `openclaw` peer links during post-core convergence and reject stale or wrong-target peer links before restart. (#83794) Thanks @fuller-stack-dev.
 - CLI/agents: default new omitted-account bindings to all accounts when the channel has multiple configured accounts, and clarify account-scope docs. (#49769) Thanks @Gcaufy.
 - Codex app-server: let authorized `/codex` control commands such as `/codex detach` escape plugin-owned conversation bindings while keeping unknown or unauthorized slash text routed to the bound plugin. Fixes #85157. (#85188) Thanks @TurboTheTurtle.
diff --git a/docs/help/testing-live.md b/docs/help/testing-live.md
index 0b8810ec57f..7c5fbc31b2f 100644
--- a/docs/help/testing-live.md
+++ b/docs/help/testing-live.md
@@ -552,7 +552,7 @@ request. Plugin dependencies are expected to be present before runtime load.
   - Current declared-but-skipped `videoToVideo` providers in the shared sweep:
     - `alibaba`, `qwen`, `xai` because those paths currently require remote `http(s)` / MP4 reference URLs
     - `google` because the current shared Gemini/Veo lane uses local buffer-backed input and that path is not accepted in the shared sweep
-    - `openai` because the current shared lane lacks org-specific video inpaint/remix access guarantees
+    - `openai` because the current shared lane lacks org-specific video edit access guarantees
 - Optional narrowing:
   - `OPENCLAW_LIVE_VIDEO_GENERATION_PROVIDERS="deepinfra,google,openai,runway"`
   - `OPENCLAW_LIVE_VIDEO_GENERATION_MODELS="google/veo-3.1-fast-generate-preview,openai/sora-2,runway/gen4_aleph"`
diff --git a/docs/providers/openai.md b/docs/providers/openai.md
index cf6ca10b33f..74ff11175e4 100644
--- a/docs/providers/openai.md
+++ b/docs/providers/openai.md
@@ -516,9 +516,13 @@ The bundled `openai` plugin registers video generation through the `video_genera
 | Default model    | `openai/sora-2`                                                                   |
 | Modes            | Text-to-video, image-to-video, single-video edit                                  |
 | Reference inputs | 1 image or 1 video                                                                |
-| Size overrides   | Supported                                                                         |
+| Size overrides   | Supported for text-to-video and image-to-video                                    |
 | Other overrides  | `aspectRatio`, `resolution`, `audio`, `watermark` are ignored with a tool warning |
 
+OpenAI image-to-video requests use `POST /v1/videos` with an image
+`input_reference`. Single-video edits use `POST /v1/videos/edits` with the
+uploaded video in the `video` field.
+
 ```json5
 {
   agents: {
diff --git a/docs/tools/video-generation.md b/docs/tools/video-generation.md
index def7108bfcc..2497adce239 100644
--- a/docs/tools/video-generation.md
+++ b/docs/tools/video-generation.md
@@ -137,22 +137,22 @@ runtime modes at runtime.
 The explicit mode contract used by `video_generate`, contract tests, and
 the shared live sweep:
 
-| Provider   | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today                                                                                                                  |
-| ---------- | :--------: | :------------: | :------------: | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| Alibaba    |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs                               |
-| BytePlus   |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                               |
-| ComfyUI    |     ✓      |       ✓        |       -        | Not in the shared sweep; workflow-specific coverage lives with Comfy tests                                                               |
-| DeepInfra  |     ✓      |       -        |       -        | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract                                                     |
-| fal        |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video                                                   |
-| Google     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input  |
-| MiniMax    |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                               |
-| OpenAI     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side inpaint/remix access |
-| OpenRouter |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                               |
-| Qwen       |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs                               |
-| Runway     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph`                                      |
-| Together   |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                               |
-| Vydra      |     ✓      |       ✓        |       -        | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL            |
-| xAI        |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL                                |
+| Provider   | `generate` | `imageToVideo` | `videoToVideo` | Shared live lanes today                                                                                                                 |
+| ---------- | :--------: | :------------: | :------------: | --------------------------------------------------------------------------------------------------------------------------------------- |
+| Alibaba    |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs                              |
+| BytePlus   |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                              |
+| ComfyUI    |     ✓      |       ✓        |       -        | Not in the shared sweep; workflow-specific coverage lives with Comfy tests                                                              |
+| DeepInfra  |     ✓      |       -        |       -        | `generate`; native DeepInfra video schemas are text-to-video in the bundled contract                                                    |
+| fal        |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` only when using Seedance reference-to-video                                                  |
+| Google     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; shared `videoToVideo` skipped because the current buffer-backed Gemini/Veo sweep does not accept that input |
+| MiniMax    |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                              |
+| OpenAI     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; shared `videoToVideo` skipped because this org/input path currently needs provider-side video edit access   |
+| OpenRouter |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                              |
+| Qwen       |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider needs remote `http(s)` video URLs                              |
+| Runway     |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` runs only when the selected model is `runway/gen4_aleph`                                     |
+| Together   |     ✓      |       ✓        |       -        | `generate`, `imageToVideo`                                                                                                              |
+| Vydra      |     ✓      |       ✓        |       -        | `generate`; shared `imageToVideo` skipped because bundled `veo3` is text-only and bundled `kling` requires a remote image URL           |
+| xAI        |     ✓      |       ✓        |       ✓        | `generate`, `imageToVideo`; `videoToVideo` skipped because this provider currently needs a remote MP4 URL                               |
 
 ## Tool parameters
 
diff --git a/extensions/openai/video-generation-provider.test.ts b/extensions/openai/video-generation-provider.test.ts
index 356178317d9..532373522cf 100644
--- a/extensions/openai/video-generation-provider.test.ts
+++ b/extensions/openai/video-generation-provider.test.ts
@@ -98,6 +98,16 @@ describe("openai video generation provider", () => {
     expectExplicitVideoGenerationCapabilities(buildOpenAIVideoGenerationProvider());
   });
 
+  it("does not claim size or duration controls for OpenAI video edits", () => {
+    const provider = buildOpenAIVideoGenerationProvider();
+
+    expect(provider.capabilities.videoToVideo).toEqual({
+      enabled: true,
+      maxVideos: 1,
+      maxInputVideos: 1,
+    });
+  });
+
   it("uses JSON for text-only Sora requests", async () => {
     postJsonRequestMock.mockResolvedValue({
       response: {
@@ -440,7 +450,7 @@ describe("openai video generation provider", () => {
     expect(secondRelease).toHaveBeenCalledTimes(1);
   });
 
-  it("uses multipart input_reference for video-to-video uploads", async () => {
+  it("uses the video edits endpoint for video-to-video uploads", async () => {
     fetchWithTimeoutMock
       .mockResolvedValueOnce({
         ok: true,
@@ -473,8 +483,13 @@ describe("openai video generation provider", () => {
 
     expect(postJsonRequestMock).not.toHaveBeenCalled();
     const createRequest = postMultipartRequest();
-    expect(createRequest.url).toBe("https://api.openai.com/v1/videos");
+    expect(createRequest.url).toBe("https://api.openai.com/v1/videos/edits");
     expect(createRequest.body).toBeInstanceOf(FormData);
+    const form = createRequest.body as FormData;
+    expect(form.get("prompt")).toBe("Remix this clip");
+    expect(form.get("model")).toBe("sora-2");
+    expect(form.get("video")).toBeInstanceOf(File);
+    expect(form.get("input_reference")).toBeNull();
     expect(createRequest.timeoutMs).toBe(120000);
     expect(createRequest.fetchFn).toBe(fetch);
     expect(createRequest.allowPrivateNetwork).toBe(false);
@@ -523,7 +538,7 @@ describe("openai video generation provider", () => {
 
     expect(postJsonRequestMock).not.toHaveBeenCalled();
     const createRequest = postMultipartRequest();
-    expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos");
+    expect(createRequest.url).toBe("http://127.0.0.1:44080/v1/videos/edits");
     expect(createRequest.body).toBeInstanceOf(FormData);
     expect(createRequest.allowPrivateNetwork).toBe(true);
     expect(pollProviderOperationRequest().allowPrivateNetwork).toBe(true);
diff --git a/extensions/openai/video-generation-provider.ts b/extensions/openai/video-generation-provider.ts
index ba881cf15e5..a489cdb660c 100644
--- a/extensions/openai/video-generation-provider.ts
+++ b/extensions/openai/video-generation-provider.ts
@@ -39,6 +39,13 @@ type OpenAIVideoRequestPolicy = {
 
 type OpenAIVideoStatus = "queued" | "in_progress" | "completed" | "failed";
 
+type OpenAIReferenceAsset = {
+  kind: "image" | "video";
+  file: File;
+  buffer: Buffer;
+  mimeType: string;
+};
+
 type OpenAIVideoResponse = {
   id?: string;
   model?: string;
@@ -99,7 +106,7 @@ function resolveSize(params: {
   return undefined;
 }
 
-function resolveReferenceAsset(req: VideoGenerationRequest) {
+function resolveReferenceAsset(req: VideoGenerationRequest): OpenAIReferenceAsset | null {
   const allAssets = [...(req.inputImages ?? []), ...(req.inputVideos ?? [])];
   if (allAssets.length === 0) {
     return null;
@@ -113,15 +120,20 @@ function resolveReferenceAsset(req: VideoGenerationRequest) {
       "OpenAI video generation currently requires local image/video uploads for reference assets.",
     );
   }
+  const kind = (req.inputVideos?.length ?? 0) > 0 ? "video" : "image";
   const mimeType =
-    normalizeOptionalString(asset.mimeType) ||
-    ((req.inputVideos?.length ?? 0) > 0 ? "video/mp4" : "image/png");
+    normalizeOptionalString(asset.mimeType) || (kind === "video" ? "video/mp4" : "image/png");
   const extension =
     extensionForMime(mimeType)?.slice(1) ?? (mimeType.startsWith("video/") ? "mp4" : "png");
   const fileName =
     normalizeOptionalString(asset.fileName) ||
-    `${(req.inputVideos?.length ?? 0) > 0 ? "reference-video" : "reference-image"}.${extension}`;
-  return new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType });
+    `${kind === "video" ? "reference-video" : "reference-image"}.${extension}`;
+  return {
+    kind,
+    file: new File([toBlobBytes(asset.buffer)], fileName, { type: mimeType }),
+    buffer: asset.buffer,
+    mimeType,
+  };
 }
 
 async function pollOpenAIVideo(
@@ -285,10 +297,6 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
         enabled: true,
         maxVideos: 1,
         maxInputVideos: 1,
-        maxDurationSeconds: 12,
-        supportedDurationSeconds: OPENAI_VIDEO_SECONDS,
-        supportsSize: true,
-        sizes: OPENAI_VIDEO_SIZES,
       },
     },
     async generateVideo(req) {
@@ -328,16 +336,14 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
         aspectRatio: req.aspectRatio,
         resolution: req.resolution,
       });
-      const inputImage = req.inputImages?.[0];
       const referenceAsset = resolveReferenceAsset(req);
-      const requestUrl = `${baseUrl}/videos`;
       const requestResult = referenceAsset
-        ? inputImage?.buffer
+        ? referenceAsset.kind === "image"
           ? await (() => {
               const jsonHeaders = new Headers(headers);
               jsonHeaders.set("Content-Type", "application/json");
               return postJsonRequest({
-                url: requestUrl,
+                url: `${baseUrl}/videos`,
                 headers: jsonHeaders,
                 body: {
                   prompt: req.prompt,
@@ -345,10 +351,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
                   ...(seconds ? { seconds } : {}),
                   ...(size ? { size } : {}),
                   input_reference: {
-                    image_url: toOpenAIDataUrl(
-                      inputImage.buffer,
-                      normalizeOptionalString(inputImage.mimeType) ?? "image/png",
-                    ),
+                    image_url: toOpenAIDataUrl(referenceAsset.buffer, referenceAsset.mimeType),
                   },
                 },
                 timeoutMs: resolveProviderOperationTimeoutMs({
@@ -364,17 +367,11 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
               const form = new FormData();
               form.set("prompt", req.prompt);
               form.set("model", model);
-              if (seconds) {
-                form.set("seconds", seconds);
-              }
-              if (size) {
-                form.set("size", size);
-              }
-              form.set("input_reference", referenceAsset);
+              form.set("video", referenceAsset.file);
               const multipartHeaders = new Headers(headers);
               multipartHeaders.delete("Content-Type");
               return postMultipartRequest({
-                url: requestUrl,
+                url: `${baseUrl}/videos/edits`,
                 headers: multipartHeaders,
                 body: form,
                 timeoutMs: resolveProviderOperationTimeoutMs({
@@ -390,7 +387,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
             const jsonHeaders = new Headers(headers);
             jsonHeaders.set("Content-Type", "application/json");
             return postJsonRequest({
-              url: requestUrl,
+              url: `${baseUrl}/videos`,
               headers: jsonHeaders,
               body: {
                 prompt: req.prompt,