diff --git a/extensions/openai/image-generation-provider.test.ts b/extensions/openai/image-generation-provider.test.ts index aa137b99589..734a62a7363 100644 --- a/extensions/openai/image-generation-provider.test.ts +++ b/extensions/openai/image-generation-provider.test.ts @@ -390,6 +390,66 @@ describe("openai image generation provider", () => { expect(result.images).toHaveLength(1); }); + it("normalizes legacy gpt-image-1 sizes before native OpenAI generation", async () => { + mockGeneratedPngResponse(); + + const provider = buildOpenAIImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "openai", + model: "gpt-image-1", + prompt: "Create a wide Matrix QA image", + cfg: {}, + size: "2048x1152", + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://api.openai.com/v1/images/generations", + body: expect.objectContaining({ + model: "gpt-image-1", + size: "1536x1024", + }), + }), + ); + expect(result.metadata).toEqual({ + requestedSize: "2048x1152", + normalizedSize: "1536x1024", + }); + }); + + it("does not normalize model-specific sizes for custom OpenAI-compatible endpoints", async () => { + mockGeneratedPngResponse(); + + const provider = buildOpenAIImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "openai", + model: "gpt-image-1", + prompt: "Create a wide local-provider image", + cfg: { + models: { + providers: { + openai: { + baseUrl: "https://openai-compatible.example.com/v1", + models: [], + }, + }, + }, + }, + size: "2048x1152", + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://openai-compatible.example.com/v1/images/generations", + body: expect.objectContaining({ + model: "gpt-image-1", + size: "2048x1152", + }), + }), + ); + expect(result.metadata).toBeUndefined(); + }); + it("forwards output and OpenAI-only options on direct generations", async () => { mockGeneratedPngResponse(); diff --git a/extensions/openai/image-generation-provider.ts b/extensions/openai/image-generation-provider.ts index eb22549bdc7..024e6908703 100644 --- a/extensions/openai/image-generation-provider.ts +++ b/extensions/openai/image-generation-provider.ts @@ -7,6 +7,7 @@ import type { ImageGenerationSourceImage, } from "openclaw/plugin-sdk/image-generation"; import { createSubsystemLogger } from "openclaw/plugin-sdk/logging-core"; +import { resolveClosestSize } from "openclaw/plugin-sdk/media-generation-runtime"; import { ensureAuthProfileStore, isProviderApiKeyConfigured, @@ -45,6 +46,7 @@ const OPENAI_SUPPORTED_SIZES = [ "3840x2160", "2160x3840", ] as const; +const OPENAI_LEGACY_IMAGE_SIZES = ["1024x1024", "1536x1024", "1024x1536"] as const; const OPENAI_MAX_INPUT_IMAGES = 5; const OPENAI_MAX_IMAGE_RESULTS = 4; const MAX_CODEX_IMAGE_SSE_BYTES = 64 * 1024 * 1024; @@ -217,6 +219,46 @@ function resolveOpenAIImageRequestModel( return model; } +function resolveNativeOpenAIImageSizesForModel(model: string): readonly string[] { + switch (model) { + case "gpt-image-1": + case "gpt-image-1-mini": + return OPENAI_LEGACY_IMAGE_SIZES; + default: + return OPENAI_SUPPORTED_SIZES; + } +} + +function resolveOpenAIImageRequestSize(params: { + model: string; + requestedSize?: string; + applyNativeLimits: boolean; +}): { + size: string; + metadata?: Record; +} { + const requestedSize = params.requestedSize ?? DEFAULT_SIZE; + if (!params.applyNativeLimits) { + return { size: requestedSize }; + } + const supportedSizes = resolveNativeOpenAIImageSizesForModel(params.model); + const size = + resolveClosestSize({ + requestedSize, + supportedSizes, + }) ?? DEFAULT_SIZE; + if (size === requestedSize) { + return { size }; + } + return { + size, + metadata: { + requestedSize, + normalizedSize: size, + }, + }; +} + function shouldAllowPrivateImageEndpoint(req: { provider: string; cfg: OpenClawConfig | undefined; @@ -587,7 +629,12 @@ async function generateOpenAICodexImage(params: { allowTransparentDefaultReroute: true, }); const count = resolveOpenAIImageCount(req.count); - const size = req.size ?? DEFAULT_SIZE; + const sizeResolution = resolveOpenAIImageRequestSize({ + model, + requestedSize: req.size, + applyNativeLimits: true, + }); + const size = sizeResolution.size; const timeoutMs = resolveOpenAIImageTimeoutMs(req.timeoutMs); const openai = req.providerOptions?.openai; const background = openai?.background ?? req.background; @@ -660,6 +707,7 @@ async function generateOpenAICodexImage(params: { ), model, metadata: { + ...sizeResolution.metadata, responses: results.map((result) => result.metadata).filter(Boolean), }, }; @@ -752,8 +800,13 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { allowTransparentDefaultReroute: publicOpenAIBaseUrl, }); const count = resolveOpenAIImageCount(req.count); - const size = req.size ?? DEFAULT_SIZE; const timeoutMs = resolveOpenAIImageTimeoutMs(req.timeoutMs, { isAzure }); + const sizeResolution = resolveOpenAIImageRequestSize({ + model, + requestedSize: req.size, + applyNativeLimits: publicOpenAIBaseUrl || isAzure, + }); + const size = sizeResolution.size; const url = isAzure ? buildAzureImageUrl(rawBaseUrl, model, isEdit ? "edits" : "generations") : `${baseUrl}/images/${isEdit ? "edits" : "generations"}`; @@ -842,6 +895,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider { return { images, model, + ...(sizeResolution.metadata ? { metadata: sizeResolution.metadata } : {}), }; } finally { await release();