fix: expose transparent image infer options

This commit is contained in:
Peter Steinberger
2026-04-25 19:58:33 +01:00
parent cd7a8f870b
commit b66f01bdca
6 changed files with 232 additions and 0 deletions

View File

@@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai
### Changes
- CLI/image generation: expose `--output-format` and
`--openai-background` on `openclaw infer image generate` and
`openclaw infer image edit` for transparent-background OpenAI image runs.
Thanks @steipete.
- Browser/config: allow local managed Chrome launch discovery and post-launch
CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi.
Fixes #66803. Thanks @beat843796.

View File

@@ -156,7 +156,9 @@ Use `image` for generation, edit, and description.
```bash
openclaw infer image generate --prompt "friendly lobster illustration" --json
openclaw infer image generate --prompt "cinematic product photo of headphones" --json
openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "simple red circle sticker on a transparent background" --json
openclaw infer image generate --prompt "slow image backend" --timeout-ms 180000 --json
openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "keep the logo, remove the background" --json
openclaw infer image describe --file ./photo.jpg --json
openclaw infer image describe --file ./ui-screenshot.png --model openai/gpt-4.1-mini --json
openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --json
@@ -165,6 +167,10 @@ openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --j
Notes:
- Use `image edit` when starting from existing input files.
- Use `--output-format png --openai-background transparent` with
`--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output.
These OpenAI-specific flags are available on both `image generate` and
`image edit`.
- Use `image providers --json` to verify which bundled image providers are
discoverable, configured, selected, and which generation/edit capabilities
each provider exposes.

View File

@@ -267,6 +267,20 @@ OpenAI Codex OAuth routes by rewriting default `openai/gpt-image-2` transparent
requests to `gpt-image-1.5`; Azure and custom OpenAI-compatible endpoints keep
their configured deployment/model names.
The same setting is exposed for headless CLI runs:
```bash
openclaw infer image generate \
--model openai/gpt-image-1.5 \
--output-format png \
--openai-background transparent \
--prompt "A simple red circle sticker on a transparent background" \
--json
```
Use the same `--output-format` and `--openai-background` flags with
`openclaw infer image edit` when starting from an input file.
For Codex OAuth installs, keep the same `openai/gpt-image-2` ref. When an
`openai-codex` OAuth profile is configured, OpenClaw resolves that stored OAuth
access token and sends image requests through the Codex Responses backend. It

View File

@@ -289,6 +289,22 @@ OpenAI or OpenAI Codex OAuth route, OpenClaw rewrites the provider request to
`gpt-image-1.5`. Azure and custom OpenAI-compatible endpoints keep their
configured deployment/model names.
For headless CLI generation, use the equivalent `openclaw infer` flags:
```bash
openclaw infer image generate \
--model openai/gpt-image-1.5 \
--output-format png \
--openai-background transparent \
--prompt "A simple red circle sticker on a transparent background" \
--json
```
The same `--output-format` and `--openai-background` flags are available on
`openclaw infer image edit`. Other bundled providers can return PNGs and may
preserve alpha when their backend emits it, but OpenClaw only exposes an
explicit transparent-background control for OpenAI image generation.
Generate one 4K landscape image:
```

View File

@@ -553,6 +553,148 @@ describe("capability cli", () => {
);
});
it("passes image output format and OpenAI background hints through to generation runtime", async () => {
mocks.generateImage.mockResolvedValue({
provider: "openai",
model: "gpt-image-1.5",
attempts: [],
images: [
{
buffer: Buffer.from("png-bytes"),
mimeType: "image/png",
fileName: "transparent.png",
},
],
});
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"image",
"generate",
"--prompt",
"transparent sticker",
"--model",
"openai/gpt-image-1.5",
"--output-format",
"png",
"--openai-background",
"transparent",
"--json",
],
});
expect(mocks.generateImage).toHaveBeenCalledWith(
expect.objectContaining({
prompt: "transparent sticker",
modelOverride: "openai/gpt-image-1.5",
outputFormat: "png",
providerOptions: {
openai: {
background: "transparent",
},
},
}),
);
});
it("passes image output format and OpenAI background hints through to edit runtime", async () => {
mocks.generateImage.mockResolvedValue({
provider: "openai",
model: "gpt-image-1.5",
attempts: [],
images: [
{
buffer: Buffer.from("png-bytes"),
mimeType: "image/png",
fileName: "transparent-edit.png",
},
],
});
const inputPath = path.join(os.tmpdir(), `openclaw-image-edit-${Date.now()}.png`);
await fs.writeFile(inputPath, Buffer.from("png-input"));
await runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"image",
"edit",
"--file",
inputPath,
"--prompt",
"make background transparent",
"--model",
"openai/gpt-image-1.5",
"--output-format",
"png",
"--openai-background",
"transparent",
"--json",
],
});
expect(mocks.generateImage).toHaveBeenCalledWith(
expect.objectContaining({
prompt: "make background transparent",
modelOverride: "openai/gpt-image-1.5",
outputFormat: "png",
providerOptions: {
openai: {
background: "transparent",
},
},
inputImages: [
expect.objectContaining({
fileName: path.basename(inputPath),
}),
],
}),
);
});
it("rejects unsupported image output format and OpenAI background hints", async () => {
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"image",
"generate",
"--prompt",
"transparent sticker",
"--output-format",
"gif",
"--json",
],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
"Error: --output-format must be one of png, jpeg, or webp",
);
mocks.runtime.error.mockClear();
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"image",
"generate",
"--prompt",
"transparent sticker",
"--openai-background",
"clear",
"--json",
],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
"Error: --openai-background must be one of transparent, opaque, or auto",
);
});
it("streams url-only generated videos to --output paths", async () => {
mocks.generateVideo.mockResolvedValue({
provider: "vydra",

View File

@@ -22,6 +22,10 @@ import { buildGatewayConnectionDetailsWithResolvers } from "../gateway/connectio
import { isLoopbackHost } from "../gateway/net.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js";
import { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";
import type {
ImageGenerationOpenAIBackground,
ImageGenerationOutputFormat,
} from "../image-generation/types.js";
import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
import {
describeImageFile,
@@ -78,6 +82,8 @@ import { removeCommandByName } from "./program/command-tree.js";
import { collectOption } from "./program/helpers.js";
type CapabilityTransport = "local" | "gateway";
const IMAGE_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const OPENAI_IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
type CapabilityMetadata = {
id: string;
@@ -702,6 +708,8 @@ async function runImageGenerate(params: {
size?: string;
aspectRatio?: string;
resolution?: "1K" | "2K" | "4K";
outputFormat?: ImageGenerationOutputFormat;
openaiBackground?: ImageGenerationOpenAIBackground;
file?: string[];
output?: string;
timeoutMs?: number;
@@ -728,6 +736,10 @@ async function runImageGenerate(params: {
size: params.size,
aspectRatio: params.aspectRatio,
resolution: params.resolution,
outputFormat: params.outputFormat,
providerOptions: params.openaiBackground
? { openai: { background: params.openaiBackground } }
: undefined,
timeoutMs: params.timeoutMs,
inputImages,
});
@@ -851,6 +863,32 @@ function parseOptionalFiniteNumber(
return value;
}
function normalizeImageOutputFormat(
raw: string | undefined,
): ImageGenerationOutputFormat | undefined {
const normalized = normalizeLowercaseStringOrEmpty(raw);
if (!normalized) {
return undefined;
}
if ((IMAGE_OUTPUT_FORMATS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOutputFormat;
}
throw new Error("--output-format must be one of png, jpeg, or webp");
}
function normalizeOpenAIImageBackground(
raw: string | undefined,
): ImageGenerationOpenAIBackground | undefined {
const normalized = normalizeLowercaseStringOrEmpty(raw);
if (!normalized) {
return undefined;
}
if ((OPENAI_IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOpenAIBackground;
}
throw new Error("--openai-background must be one of transparent, opaque, or auto");
}
function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
const normalized = raw?.trim().toUpperCase();
if (!normalized) {
@@ -1438,6 +1476,8 @@ export function registerCapabilityCli(program: Command) {
.option("--size <size>", "Size hint like 1024x1024")
.option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
.option("--resolution <value>", "Resolution hint: 1K, 2K, or 4K")
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
.option("--output <path>", "Output path")
.option("--json", "Output JSON", false)
@@ -1451,6 +1491,10 @@ export function registerCapabilityCli(program: Command) {
size: opts.size as string | undefined,
aspectRatio: opts.aspectRatio as string | undefined,
resolution: opts.resolution as "1K" | "2K" | "4K" | undefined,
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
openaiBackground: normalizeOpenAIImageBackground(
opts.openaiBackground as string | undefined,
),
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
output: opts.output as string | undefined,
});
@@ -1464,6 +1508,8 @@ export function registerCapabilityCli(program: Command) {
.requiredOption("--file <path>", "Input file", collectOption, [])
.requiredOption("--prompt <text>", "Prompt text")
.option("--model <provider/model>", "Model override")
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
.option("--output <path>", "Output path")
.option("--json", "Output JSON", false)
@@ -1475,6 +1521,10 @@ export function registerCapabilityCli(program: Command) {
prompt: String(opts.prompt),
model: opts.model as string | undefined,
file: files,
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
openaiBackground: normalizeOpenAIImageBackground(
opts.openaiBackground as string | undefined,
),
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
output: opts.output as string | undefined,
});