From 648422a6c1c3906db611d39b52bf7a9be9b9d9e5 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 20:47:08 +0100 Subject: [PATCH] fix(openai): send image edits as multipart uploads (#70657) --- CHANGELOG.md | 1 + .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- .../openai/image-generation-provider.test.ts | 95 +++++++++++++++---- .../openai/image-generation-provider.ts | 62 ++++++++---- extensions/openai/index.test.ts | 65 +++++++++---- src/media-understanding/shared.ts | 30 ++++++ src/plugin-sdk/provider-http.ts | 1 + 7 files changed, 197 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d64ddf99ca1..c3fe0582085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Auto-reply/system events: route async exec-event completion replies through the persisted session delivery context, so long-running command results return to the originating channel instead of being dropped when live origin metadata is missing. (#70258) Thanks @wzfukui. +- OpenAI/image generation: send reference-image edits as guarded multipart uploads instead of JSON data URLs, restoring complex multi-reference `gpt-image-2` edits. Fixes #70642. Thanks @dashhuang. - QA channel/security: reject non-HTTP(S) inbound attachment URLs before media fetch, and log rejected schemes so suspicious or misconfigured payloads are visible during debugging. (#70708) Thanks @vincentkoc. - Plugins/install: link the host OpenClaw package into external plugins that declare `openclaw` as a peer dependency, so peer-only plugin SDK imports resolve after install without bundling a duplicate host package. (#70462) Thanks @anishesg. - Teams/security: require shared Bot Framework audience tokens to name the configured Teams app via verified `appid` or `azp`, blocking cross-bot token replay on the global audience. (#70724) Thanks @vincentkoc. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 24c0af2d683..567b7a08ee1 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -562ba42e1b8da3a263a47961ea9bf9c2a56a736f475c15079cb4a05693451a33 plugin-sdk-api-baseline.json -1ed1dd8d6f7d04539f4e11fec66b62f5efa58c09b9f4de2867cbbffad2aebfb1 plugin-sdk-api-baseline.jsonl +bc55649a80027756f37892424598653a81fec4bff7b074358fe34d08c7696ebc plugin-sdk-api-baseline.json +312a29d50b4959e4a8e242bb7559548d895a2e03d5ed1b5a395b1133de090578 plugin-sdk-api-baseline.jsonl diff --git a/extensions/openai/image-generation-provider.test.ts b/extensions/openai/image-generation-provider.test.ts index faf891fc06e..626866217bd 100644 --- a/extensions/openai/image-generation-provider.test.ts +++ b/extensions/openai/image-generation-provider.test.ts @@ -4,11 +4,13 @@ import { buildOpenAIImageGenerationProvider } from "./image-generation-provider. const { resolveApiKeyForProviderMock, postJsonRequestMock, + postMultipartRequestMock, assertOkOrThrowHttpErrorMock, resolveProviderHttpRequestConfigMock, } = vi.hoisted(() => ({ resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openai-key" })), postJsonRequestMock: vi.fn(), + postMultipartRequestMock: vi.fn(), assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ baseUrl: params.baseUrl ?? params.defaultBaseUrl, @@ -25,16 +27,22 @@ vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ vi.mock("openclaw/plugin-sdk/provider-http", () => ({ assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, postJsonRequest: postJsonRequestMock, + postMultipartRequest: postMultipartRequestMock, resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, })); function mockGeneratedPngResponse() { + const response = { + json: async () => ({ + data: [{ b64_json: Buffer.from("png-bytes").toString("base64") }], + }), + }; postJsonRequestMock.mockResolvedValue({ - response: { - json: async () => ({ - data: [{ b64_json: Buffer.from("png-bytes").toString("base64") }], - }), - }, + response, + release: vi.fn(async () => {}), + }); + postMultipartRequestMock.mockResolvedValue({ + response, release: vi.fn(async () => {}), }); } @@ -43,6 +51,7 @@ describe("openai image generation provider", () => { afterEach(() => { resolveApiKeyForProviderMock.mockClear(); postJsonRequestMock.mockReset(); + postMultipartRequestMock.mockReset(); assertOkOrThrowHttpErrorMock.mockClear(); resolveProviderHttpRequestConfigMock.mockClear(); vi.unstubAllEnvs(); @@ -212,28 +221,71 @@ describe("openai image generation provider", () => { ], }); - expect(postJsonRequestMock).toHaveBeenCalledWith( + expect(postMultipartRequestMock).toHaveBeenCalledWith( expect.objectContaining({ url: "https://api.openai.com/v1/images/edits", - body: expect.objectContaining({ - model: "gpt-image-2", - prompt: "Change only the background to pale blue", - n: 2, - size: "1024x1536", - images: [ - { - image_url: "data:image/png;base64,cG5nLWJ5dGVz", - }, - { - image_url: "data:image/jpeg;base64,anBlZy1ieXRlcw==", - }, - ], - }), + body: expect.any(FormData), + allowPrivateNetwork: false, + dispatcherPolicy: undefined, + fetchFn: fetch, }), ); + const editCallArgs = postMultipartRequestMock.mock.calls[0]?.[0] as { + headers: Headers; + body: FormData; + }; + expect(editCallArgs.headers.has("Content-Type")).toBe(false); + const form = editCallArgs.body; + expect(form.get("model")).toBe("gpt-image-2"); + expect(form.get("prompt")).toBe("Change only the background to pale blue"); + expect(form.get("n")).toBe("2"); + expect(form.get("size")).toBe("1024x1536"); + const images = form.getAll("image[]") as File[]; + expect(images).toHaveLength(2); + expect(images[0]?.name).toBe("reference.png"); + expect(images[0]?.type).toBe("image/png"); + expect(images[1]?.name).toBe("style.jpg"); + expect(images[1]?.type).toBe("image/jpeg"); + expect(postJsonRequestMock).not.toHaveBeenCalledWith( + expect.objectContaining({ url: "https://api.openai.com/v1/images/edits" }), + ); expect(result.images).toHaveLength(1); }); + it("forwards SSRF guard fields to multipart edit requests", async () => { + mockGeneratedPngResponse(); + + const provider = buildOpenAIImageGenerationProvider(); + await provider.generateImage({ + provider: "openai", + model: "gpt-image-2", + prompt: "Edit cat", + cfg: { + models: { + providers: { + openai: { + baseUrl: "http://127.0.0.1:44080/v1", + models: [], + }, + }, + }, + }, + inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }], + }); + + expect(postMultipartRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "http://127.0.0.1:44080/v1/images/edits", + allowPrivateNetwork: false, + dispatcherPolicy: undefined, + fetchFn: fetch, + }), + ); + expect(postJsonRequestMock).not.toHaveBeenCalledWith( + expect.objectContaining({ url: "http://127.0.0.1:44080/v1/images/edits" }), + ); + }); + describe("azure openai support", () => { it("uses api-key header and deployment-scoped URL for Azure .openai.azure.com hosts", async () => { mockGeneratedPngResponse(); @@ -386,9 +438,10 @@ describe("openai image generation provider", () => { ], }); - expect(postJsonRequestMock).toHaveBeenCalledWith( + expect(postMultipartRequestMock).toHaveBeenCalledWith( expect.objectContaining({ url: "https://myresource.openai.azure.com/openai/deployments/gpt-image-2/images/edits?api-version=2024-12-01-preview", + body: expect.any(FormData), }), ); }); diff --git a/extensions/openai/image-generation-provider.ts b/extensions/openai/image-generation-provider.ts index 32115fb56d5..bf3906f5f28 100644 --- a/extensions/openai/image-generation-provider.ts +++ b/extensions/openai/image-generation-provider.ts @@ -1,3 +1,4 @@ +import path from "node:path"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; @@ -5,10 +6,11 @@ import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runt import { assertOkOrThrowHttpError, postJsonRequest, + postMultipartRequest, resolveProviderHttpRequestConfig, } from "openclaw/plugin-sdk/provider-http"; import { OPENAI_DEFAULT_IMAGE_MODEL as DEFAULT_OPENAI_IMAGE_MODEL } from "./default-models.js"; -import { resolveConfiguredOpenAIBaseUrl, toOpenAIDataUrl } from "./shared.js"; +import { resolveConfiguredOpenAIBaseUrl } from "./shared.js"; const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1"; const DEFAULT_OUTPUT_MIME = "image/png"; @@ -55,7 +57,10 @@ function buildAzureImageUrl( model: string, action: "generations" | "edits", ): string { - const cleanBase = rawBaseUrl.replace(/\/+$/, "").replace(/\/openai\/v1$/, "").replace(/\/v1$/, ""); + const cleanBase = rawBaseUrl + .replace(/\/+$/, "") + .replace(/\/openai\/v1$/, "") + .replace(/\/v1$/, ""); return `${cleanBase}/openai/deployments/${model}/images/${action}?api-version=${resolveAzureApiVersion()}`; } @@ -80,6 +85,20 @@ type OpenAIImageApiResponse = { }>; }; +function inferImageUploadFileName(params: { + fileName?: string; + mimeType?: string; + index: number; +}): string { + const fileName = params.fileName?.trim(); + if (fileName) { + return path.basename(fileName); + } + const mimeType = params.mimeType?.trim().toLowerCase() || DEFAULT_OUTPUT_MIME; + const ext = mimeType === "image/jpeg" ? "jpg" : mimeType.replace(/^image\//, "") || "png"; + return `image-${params.index + 1}.${ext}`; +} + export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { return { id: "openai", @@ -146,23 +165,30 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { : `${baseUrl}/images/${isEdit ? "edits" : "generations"}`; const requestResult = isEdit ? await (() => { - const jsonHeaders = new Headers(headers); - jsonHeaders.set("Content-Type", "application/json"); - return postJsonRequest({ + const form = new FormData(); + form.set("model", model); + form.set("prompt", req.prompt); + form.set("n", String(count)); + form.set("size", size); + for (const [index, image] of inputImages.entries()) { + const mimeType = image.mimeType?.trim() || DEFAULT_OUTPUT_MIME; + form.append( + "image[]", + new Blob([new Uint8Array(image.buffer)], { type: mimeType }), + inferImageUploadFileName({ + fileName: image.fileName, + mimeType, + index, + }), + ); + } + + const multipartHeaders = new Headers(headers); + multipartHeaders.delete("Content-Type"); + return postMultipartRequest({ url, - headers: jsonHeaders, - body: { - model, - prompt: req.prompt, - n: count, - size, - images: inputImages.map((image) => ({ - image_url: toOpenAIDataUrl( - image.buffer, - image.mimeType?.trim() || DEFAULT_OUTPUT_MIME, - ), - })), - }, + headers: multipartHeaders, + body: form, timeoutMs: req.timeoutMs, fetchFn: fetch, allowPrivateNetwork, diff --git a/extensions/openai/index.test.ts b/extensions/openai/index.test.ts index b8b2a373ca8..d79233b0dc3 100644 --- a/extensions/openai/index.test.ts +++ b/extensions/openai/index.test.ts @@ -122,8 +122,23 @@ function mockOpenAIImageApiResponse(params: { } as Response, release: vi.fn(async () => {}), }); + const postMultipartRequestSpy = vi.spyOn(providerHttp, "postMultipartRequest").mockResolvedValue({ + finalUrl: params.finalUrl, + response: { + ok: true, + json: async () => ({ + data: [ + { + b64_json: Buffer.from(params.imageData).toString("base64"), + ...(params.revisedPrompt ? { revised_prompt: params.revisedPrompt } : {}), + }, + ], + }), + } as Response, + release: vi.fn(async () => {}), + }); vi.spyOn(providerHttp, "assertOkOrThrowHttpError").mockResolvedValue(undefined); - return { resolveApiKeySpy, postJsonRequestSpy }; + return { resolveApiKeySpy, postJsonRequestSpy, postMultipartRequestSpy }; } describe("openai plugin", () => { @@ -190,10 +205,11 @@ describe("openai plugin", () => { }); it("submits reference-image edits to the OpenAI Images edits endpoint", async () => { - const { resolveApiKeySpy, postJsonRequestSpy } = mockOpenAIImageApiResponse({ - finalUrl: "https://api.openai.com/v1/images/edits", - imageData: "edited-image", - }); + const { resolveApiKeySpy, postJsonRequestSpy, postMultipartRequestSpy } = + mockOpenAIImageApiResponse({ + finalUrl: "https://api.openai.com/v1/images/edits", + imageData: "edited-image", + }); const provider = buildOpenAIImageGenerationProvider(); const authStore = { version: 1, profiles: {} }; @@ -218,25 +234,34 @@ describe("openai plugin", () => { store: authStore, }), ); - expect(postJsonRequestSpy).toHaveBeenCalledWith( + expect(postMultipartRequestSpy).toHaveBeenCalledWith( expect.objectContaining({ url: "https://api.openai.com/v1/images/edits", - body: { - model: "gpt-image-2", - prompt: "Edit this image", - n: 2, - size: "1536x1024", - images: [ - { - image_url: "data:image/png;base64,eA==", - }, - { - image_url: "data:image/jpeg;base64,eQ==", - }, - ], - }, + body: expect.any(FormData), + allowPrivateNetwork: false, + dispatcherPolicy: undefined, + fetchFn: fetch, }), ); + const editCallArgs = postMultipartRequestSpy.mock.calls[0]?.[0] as { + headers: Headers; + body: FormData; + }; + expect(editCallArgs.headers.has("Content-Type")).toBe(false); + const form = editCallArgs.body; + expect(form.get("model")).toBe("gpt-image-2"); + expect(form.get("prompt")).toBe("Edit this image"); + expect(form.get("n")).toBe("2"); + expect(form.get("size")).toBe("1536x1024"); + const images = form.getAll("image[]") as File[]; + expect(images).toHaveLength(2); + expect(images[0]?.name).toBe("image-1.png"); + expect(images[0]?.type).toBe("image/png"); + expect(images[1]?.name).toBe("ref.jpg"); + expect(images[1]?.type).toBe("image/jpeg"); + expect(postJsonRequestSpy).not.toHaveBeenCalledWith( + expect.objectContaining({ url: "https://api.openai.com/v1/images/edits" }), + ); expect(result).toEqual({ images: [ { diff --git a/src/media-understanding/shared.ts b/src/media-understanding/shared.ts index 785b77e62f8..9bdc31e7a0e 100644 --- a/src/media-understanding/shared.ts +++ b/src/media-understanding/shared.ts @@ -424,6 +424,36 @@ export async function postJsonRequest(params: { ); } +export async function postMultipartRequest(params: { + url: string; + headers: Headers; + body: BodyInit; + timeoutMs?: number; + fetchFn: typeof fetch; + pinDns?: boolean; + allowPrivateNetwork?: boolean; + dispatcherPolicy?: PinnedDispatcherPolicy; + auditContext?: string; + /** + * Override the guarded-fetch mode. Defaults to an auto-upgrade to + * `TRUSTED_ENV_PROXY` when `HTTP_PROXY`/`HTTPS_PROXY` is configured in the + * environment; pass `"strict"` to force pinned-DNS even inside a proxy. + */ + mode?: GuardedFetchMode; +}) { + return fetchWithTimeoutGuarded( + params.url, + { + method: "POST", + headers: params.headers, + body: params.body, + }, + params.timeoutMs, + params.fetchFn, + resolveGuardedPostRequestOptions(params), + ); +} + export async function readErrorResponse(res: Response): Promise { let reader: ReadableStreamDefaultReader | undefined; try { diff --git a/src/plugin-sdk/provider-http.ts b/src/plugin-sdk/provider-http.ts index 9847a72e92e..0a6fe79ea97 100644 --- a/src/plugin-sdk/provider-http.ts +++ b/src/plugin-sdk/provider-http.ts @@ -10,6 +10,7 @@ export { normalizeBaseUrl, pollProviderOperationJson, postJsonRequest, + postMultipartRequest, postTranscriptionRequest, resolveProviderOperationTimeoutMs, resolveProviderHttpRequestConfig,