From b66f01bdca7bb08f48c90c7b135b68b57200fed0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 19:58:33 +0100 Subject: [PATCH] fix: expose transparent image infer options --- CHANGELOG.md | 4 + docs/cli/infer.md | 6 ++ docs/providers/openai.md | 14 ++++ docs/tools/image-generation.md | 16 ++++ src/cli/capability-cli.test.ts | 142 +++++++++++++++++++++++++++++++++ src/cli/capability-cli.ts | 50 ++++++++++++ 6 files changed, 232 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd173921a41..f71c8d91907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai ### Changes +- CLI/image generation: expose `--output-format` and + `--openai-background` on `openclaw infer image generate` and + `openclaw infer image edit` for transparent-background OpenAI image runs. + Thanks @steipete. - Browser/config: allow local managed Chrome launch discovery and post-launch CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi. Fixes #66803. Thanks @beat843796. diff --git a/docs/cli/infer.md b/docs/cli/infer.md index f5dbd32fdda..dca170e0421 100644 --- a/docs/cli/infer.md +++ b/docs/cli/infer.md @@ -156,7 +156,9 @@ Use `image` for generation, edit, and description. ```bash openclaw infer image generate --prompt "friendly lobster illustration" --json openclaw infer image generate --prompt "cinematic product photo of headphones" --json +openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "simple red circle sticker on a transparent background" --json openclaw infer image generate --prompt "slow image backend" --timeout-ms 180000 --json +openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "keep the logo, remove the background" --json openclaw infer image describe --file ./photo.jpg --json openclaw infer image describe --file ./ui-screenshot.png --model openai/gpt-4.1-mini --json openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --json @@ -165,6 +167,10 @@ openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --j Notes: - Use `image edit` when starting from existing input files. +- Use `--output-format png --openai-background transparent` with + `--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output. + These OpenAI-specific flags are available on both `image generate` and + `image edit`. - Use `image providers --json` to verify which bundled image providers are discoverable, configured, selected, and which generation/edit capabilities each provider exposes. diff --git a/docs/providers/openai.md b/docs/providers/openai.md index bfaacf22ea5..809e00c983f 100644 --- a/docs/providers/openai.md +++ b/docs/providers/openai.md @@ -267,6 +267,20 @@ OpenAI Codex OAuth routes by rewriting default `openai/gpt-image-2` transparent requests to `gpt-image-1.5`; Azure and custom OpenAI-compatible endpoints keep their configured deployment/model names. +The same setting is exposed for headless CLI runs: + +```bash +openclaw infer image generate \ + --model openai/gpt-image-1.5 \ + --output-format png \ + --openai-background transparent \ + --prompt "A simple red circle sticker on a transparent background" \ + --json +``` + +Use the same `--output-format` and `--openai-background` flags with +`openclaw infer image edit` when starting from an input file. + For Codex OAuth installs, keep the same `openai/gpt-image-2` ref. When an `openai-codex` OAuth profile is configured, OpenClaw resolves that stored OAuth access token and sends image requests through the Codex Responses backend. It diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index b9bcaa19c3b..95a16a6e948 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -289,6 +289,22 @@ OpenAI or OpenAI Codex OAuth route, OpenClaw rewrites the provider request to `gpt-image-1.5`. Azure and custom OpenAI-compatible endpoints keep their configured deployment/model names. +For headless CLI generation, use the equivalent `openclaw infer` flags: + +```bash +openclaw infer image generate \ + --model openai/gpt-image-1.5 \ + --output-format png \ + --openai-background transparent \ + --prompt "A simple red circle sticker on a transparent background" \ + --json +``` + +The same `--output-format` and `--openai-background` flags are available on +`openclaw infer image edit`. Other bundled providers can return PNGs and may +preserve alpha when their backend emits it, but OpenClaw only exposes an +explicit transparent-background control for OpenAI image generation. + Generate one 4K landscape image: ``` diff --git a/src/cli/capability-cli.test.ts b/src/cli/capability-cli.test.ts index 4f845283558..8a6e984dbf0 100644 --- a/src/cli/capability-cli.test.ts +++ b/src/cli/capability-cli.test.ts @@ -553,6 +553,148 @@ describe("capability cli", () => { ); }); + it("passes image output format and OpenAI background hints through to generation runtime", async () => { + mocks.generateImage.mockResolvedValue({ + provider: "openai", + model: "gpt-image-1.5", + attempts: [], + images: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "transparent.png", + }, + ], + }); + + await runRegisteredCli({ + register: registerCapabilityCli as (program: Command) => void, + argv: [ + "capability", + "image", + "generate", + "--prompt", + "transparent sticker", + "--model", + "openai/gpt-image-1.5", + "--output-format", + "png", + "--openai-background", + "transparent", + "--json", + ], + }); + + expect(mocks.generateImage).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "transparent sticker", + modelOverride: "openai/gpt-image-1.5", + outputFormat: "png", + providerOptions: { + openai: { + background: "transparent", + }, + }, + }), + ); + }); + + it("passes image output format and OpenAI background hints through to edit runtime", async () => { + mocks.generateImage.mockResolvedValue({ + provider: "openai", + model: "gpt-image-1.5", + attempts: [], + images: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "transparent-edit.png", + }, + ], + }); + const inputPath = path.join(os.tmpdir(), `openclaw-image-edit-${Date.now()}.png`); + await fs.writeFile(inputPath, Buffer.from("png-input")); + + await runRegisteredCli({ + register: registerCapabilityCli as (program: Command) => void, + argv: [ + "capability", + "image", + "edit", + "--file", + inputPath, + "--prompt", + "make background transparent", + "--model", + "openai/gpt-image-1.5", + "--output-format", + "png", + "--openai-background", + "transparent", + "--json", + ], + }); + + expect(mocks.generateImage).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "make background transparent", + modelOverride: "openai/gpt-image-1.5", + outputFormat: "png", + providerOptions: { + openai: { + background: "transparent", + }, + }, + inputImages: [ + expect.objectContaining({ + fileName: path.basename(inputPath), + }), + ], + }), + ); + }); + + it("rejects unsupported image output format and OpenAI background hints", async () => { + await expect( + runRegisteredCli({ + register: registerCapabilityCli as (program: Command) => void, + argv: [ + "capability", + "image", + "generate", + "--prompt", + "transparent sticker", + "--output-format", + "gif", + "--json", + ], + }), + ).rejects.toThrow("exit 1"); + expect(mocks.runtime.error).toHaveBeenCalledWith( + "Error: --output-format must be one of png, jpeg, or webp", + ); + + mocks.runtime.error.mockClear(); + await expect( + runRegisteredCli({ + register: registerCapabilityCli as (program: Command) => void, + argv: [ + "capability", + "image", + "generate", + "--prompt", + "transparent sticker", + "--openai-background", + "clear", + "--json", + ], + }), + ).rejects.toThrow("exit 1"); + expect(mocks.runtime.error).toHaveBeenCalledWith( + "Error: --openai-background must be one of transparent, opaque, or auto", + ); + }); + it("streams url-only generated videos to --output paths", async () => { mocks.generateVideo.mockResolvedValue({ provider: "vydra", diff --git a/src/cli/capability-cli.ts b/src/cli/capability-cli.ts index ea97cedd9e1..b3bf460d1e4 100644 --- a/src/cli/capability-cli.ts +++ b/src/cli/capability-cli.ts @@ -22,6 +22,10 @@ import { buildGatewayConnectionDetailsWithResolvers } from "../gateway/connectio import { isLoopbackHost } from "../gateway/net.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js"; import { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js"; +import type { + ImageGenerationOpenAIBackground, + ImageGenerationOutputFormat, +} from "../image-generation/types.js"; import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js"; import { describeImageFile, @@ -78,6 +82,8 @@ import { removeCommandByName } from "./program/command-tree.js"; import { collectOption } from "./program/helpers.js"; type CapabilityTransport = "local" | "gateway"; +const IMAGE_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const; +const OPENAI_IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const; type CapabilityMetadata = { id: string; @@ -702,6 +708,8 @@ async function runImageGenerate(params: { size?: string; aspectRatio?: string; resolution?: "1K" | "2K" | "4K"; + outputFormat?: ImageGenerationOutputFormat; + openaiBackground?: ImageGenerationOpenAIBackground; file?: string[]; output?: string; timeoutMs?: number; @@ -728,6 +736,10 @@ async function runImageGenerate(params: { size: params.size, aspectRatio: params.aspectRatio, resolution: params.resolution, + outputFormat: params.outputFormat, + providerOptions: params.openaiBackground + ? { openai: { background: params.openaiBackground } } + : undefined, timeoutMs: params.timeoutMs, inputImages, }); @@ -851,6 +863,32 @@ function parseOptionalFiniteNumber( return value; } +function normalizeImageOutputFormat( + raw: string | undefined, +): ImageGenerationOutputFormat | undefined { + const normalized = normalizeLowercaseStringOrEmpty(raw); + if (!normalized) { + return undefined; + } + if ((IMAGE_OUTPUT_FORMATS as readonly string[]).includes(normalized)) { + return normalized as ImageGenerationOutputFormat; + } + throw new Error("--output-format must be one of png, jpeg, or webp"); +} + +function normalizeOpenAIImageBackground( + raw: string | undefined, +): ImageGenerationOpenAIBackground | undefined { + const normalized = normalizeLowercaseStringOrEmpty(raw); + if (!normalized) { + return undefined; + } + if ((OPENAI_IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) { + return normalized as ImageGenerationOpenAIBackground; + } + throw new Error("--openai-background must be one of transparent, opaque, or auto"); +} + function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined { const normalized = raw?.trim().toUpperCase(); if (!normalized) { @@ -1438,6 +1476,8 @@ export function registerCapabilityCli(program: Command) { .option("--size ", "Size hint like 1024x1024") .option("--aspect-ratio ", "Aspect ratio hint like 16:9") .option("--resolution ", "Resolution hint: 1K, 2K, or 4K") + .option("--output-format ", "Output format hint: png, jpeg, or webp") + .option("--openai-background ", "OpenAI background hint: transparent, opaque, or auto") .option("--timeout-ms ", "Provider request timeout in milliseconds") .option("--output ", "Output path") .option("--json", "Output JSON", false) @@ -1451,6 +1491,10 @@ export function registerCapabilityCli(program: Command) { size: opts.size as string | undefined, aspectRatio: opts.aspectRatio as string | undefined, resolution: opts.resolution as "1K" | "2K" | "4K" | undefined, + outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined), + openaiBackground: normalizeOpenAIImageBackground( + opts.openaiBackground as string | undefined, + ), timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"), output: opts.output as string | undefined, }); @@ -1464,6 +1508,8 @@ export function registerCapabilityCli(program: Command) { .requiredOption("--file ", "Input file", collectOption, []) .requiredOption("--prompt ", "Prompt text") .option("--model ", "Model override") + .option("--output-format ", "Output format hint: png, jpeg, or webp") + .option("--openai-background ", "OpenAI background hint: transparent, opaque, or auto") .option("--timeout-ms ", "Provider request timeout in milliseconds") .option("--output ", "Output path") .option("--json", "Output JSON", false) @@ -1475,6 +1521,10 @@ export function registerCapabilityCli(program: Command) { prompt: String(opts.prompt), model: opts.model as string | undefined, file: files, + outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined), + openaiBackground: normalizeOpenAIImageBackground( + opts.openaiBackground as string | undefined, + ), timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"), output: opts.output as string | undefined, });