fix: expose transparent image infer options

2026-05-06 05:50:43 +00:00 · 2026-04-25 19:58:33 +01:00
parent cd7a8f870b
commit b66f01bdca
6 changed files with 232 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- CLI/image generation: expose `--output-format` and
+  `--openai-background` on `openclaw infer image generate` and
+  `openclaw infer image edit` for transparent-background OpenAI image runs.
+  Thanks @steipete.
 - Browser/config: allow local managed Chrome launch discovery and post-launch
  CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi.
  Fixes #66803. Thanks @beat843796.
--- a/docs/cli/infer.md
+++ b/docs/cli/infer.md
@@ -156,7 +156,9 @@ Use `image` for generation, edit, and description.
 ```bash
 openclaw infer image generate --prompt "friendly lobster illustration" --json
 openclaw infer image generate --prompt "cinematic product photo of headphones" --json
+openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "simple red circle sticker on a transparent background" --json
 openclaw infer image generate --prompt "slow image backend" --timeout-ms 180000 --json
+openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "keep the logo, remove the background" --json
 openclaw infer image describe --file ./photo.jpg --json
 openclaw infer image describe --file ./ui-screenshot.png --model openai/gpt-4.1-mini --json
 openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --json
@@ -165,6 +167,10 @@ openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --j
 Notes:

 - Use `image edit` when starting from existing input files.
+- Use `--output-format png --openai-background transparent` with
+  `--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output.
+  These OpenAI-specific flags are available on both `image generate` and
+  `image edit`.
 - Use `image providers --json` to verify which bundled image providers are
  discoverable, configured, selected, and which generation/edit capabilities
  each provider exposes.
--- a/docs/providers/openai.md
+++ b/docs/providers/openai.md
@@ -267,6 +267,20 @@ OpenAI Codex OAuth routes by rewriting default `openai/gpt-image-2` transparent
 requests to `gpt-image-1.5`; Azure and custom OpenAI-compatible endpoints keep
 their configured deployment/model names.

+The same setting is exposed for headless CLI runs:
+
+```bash
+openclaw infer image generate \
+  --model openai/gpt-image-1.5 \
+  --output-format png \
+  --openai-background transparent \
+  --prompt "A simple red circle sticker on a transparent background" \
+  --json
+```
+
+Use the same `--output-format` and `--openai-background` flags with
+`openclaw infer image edit` when starting from an input file.
+
 For Codex OAuth installs, keep the same `openai/gpt-image-2` ref. When an
 `openai-codex` OAuth profile is configured, OpenClaw resolves that stored OAuth
 access token and sends image requests through the Codex Responses backend. It
--- a/docs/tools/image-generation.md
+++ b/docs/tools/image-generation.md
@@ -289,6 +289,22 @@ OpenAI or OpenAI Codex OAuth route, OpenClaw rewrites the provider request to
 `gpt-image-1.5`. Azure and custom OpenAI-compatible endpoints keep their
 configured deployment/model names.

+For headless CLI generation, use the equivalent `openclaw infer` flags:
+
+```bash
+openclaw infer image generate \
+  --model openai/gpt-image-1.5 \
+  --output-format png \
+  --openai-background transparent \
+  --prompt "A simple red circle sticker on a transparent background" \
+  --json
+```
+
+The same `--output-format` and `--openai-background` flags are available on
+`openclaw infer image edit`. Other bundled providers can return PNGs and may
+preserve alpha when their backend emits it, but OpenClaw only exposes an
+explicit transparent-background control for OpenAI image generation.
+
 Generate one 4K landscape image:

 ```
--- a/src/cli/capability-cli.test.ts
+++ b/src/cli/capability-cli.test.ts
@@ -553,6 +553,148 @@ describe("capability cli", () => {
    );
  });

+  it("passes image output format and OpenAI background hints through to generation runtime", async () => {
+    mocks.generateImage.mockResolvedValue({
+      provider: "openai",
+      model: "gpt-image-1.5",
+      attempts: [],
+      images: [
+        {
+          buffer: Buffer.from("png-bytes"),
+          mimeType: "image/png",
+          fileName: "transparent.png",
+        },
+      ],
+    });
+
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: [
+        "capability",
+        "image",
+        "generate",
+        "--prompt",
+        "transparent sticker",
+        "--model",
+        "openai/gpt-image-1.5",
+        "--output-format",
+        "png",
+        "--openai-background",
+        "transparent",
+        "--json",
+      ],
+    });
+
+    expect(mocks.generateImage).toHaveBeenCalledWith(
+      expect.objectContaining({
+        prompt: "transparent sticker",
+        modelOverride: "openai/gpt-image-1.5",
+        outputFormat: "png",
+        providerOptions: {
+          openai: {
+            background: "transparent",
+          },
+        },
+      }),
+    );
+  });
+
+  it("passes image output format and OpenAI background hints through to edit runtime", async () => {
+    mocks.generateImage.mockResolvedValue({
+      provider: "openai",
+      model: "gpt-image-1.5",
+      attempts: [],
+      images: [
+        {
+          buffer: Buffer.from("png-bytes"),
+          mimeType: "image/png",
+          fileName: "transparent-edit.png",
+        },
+      ],
+    });
+    const inputPath = path.join(os.tmpdir(), `openclaw-image-edit-${Date.now()}.png`);
+    await fs.writeFile(inputPath, Buffer.from("png-input"));
+
+    await runRegisteredCli({
+      register: registerCapabilityCli as (program: Command) => void,
+      argv: [
+        "capability",
+        "image",
+        "edit",
+        "--file",
+        inputPath,
+        "--prompt",
+        "make background transparent",
+        "--model",
+        "openai/gpt-image-1.5",
+        "--output-format",
+        "png",
+        "--openai-background",
+        "transparent",
+        "--json",
+      ],
+    });
+
+    expect(mocks.generateImage).toHaveBeenCalledWith(
+      expect.objectContaining({
+        prompt: "make background transparent",
+        modelOverride: "openai/gpt-image-1.5",
+        outputFormat: "png",
+        providerOptions: {
+          openai: {
+            background: "transparent",
+          },
+        },
+        inputImages: [
+          expect.objectContaining({
+            fileName: path.basename(inputPath),
+          }),
+        ],
+      }),
+    );
+  });
+
+  it("rejects unsupported image output format and OpenAI background hints", async () => {
+    await expect(
+      runRegisteredCli({
+        register: registerCapabilityCli as (program: Command) => void,
+        argv: [
+          "capability",
+          "image",
+          "generate",
+          "--prompt",
+          "transparent sticker",
+          "--output-format",
+          "gif",
+          "--json",
+        ],
+      }),
+    ).rejects.toThrow("exit 1");
+    expect(mocks.runtime.error).toHaveBeenCalledWith(
+      "Error: --output-format must be one of png, jpeg, or webp",
+    );
+
+    mocks.runtime.error.mockClear();
+    await expect(
+      runRegisteredCli({
+        register: registerCapabilityCli as (program: Command) => void,
+        argv: [
+          "capability",
+          "image",
+          "generate",
+          "--prompt",
+          "transparent sticker",
+          "--openai-background",
+          "clear",
+          "--json",
+        ],
+      }),
+    ).rejects.toThrow("exit 1");
+    expect(mocks.runtime.error).toHaveBeenCalledWith(
+      "Error: --openai-background must be one of transparent, opaque, or auto",
+    );
+  });
+
  it("streams url-only generated videos to --output paths", async () => {
    mocks.generateVideo.mockResolvedValue({
      provider: "vydra",
--- a/src/cli/capability-cli.ts
+++ b/src/cli/capability-cli.ts
@@ -22,6 +22,10 @@ import { buildGatewayConnectionDetailsWithResolvers } from "../gateway/connectio
 import { isLoopbackHost } from "../gateway/net.js";
 import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js";
 import { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";
+import type {
+  ImageGenerationOpenAIBackground,
+  ImageGenerationOutputFormat,
+} from "../image-generation/types.js";
 import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
 import {
  describeImageFile,
@@ -78,6 +82,8 @@ import { removeCommandByName } from "./program/command-tree.js";
 import { collectOption } from "./program/helpers.js";

 type CapabilityTransport = "local" | "gateway";
+const IMAGE_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
+const OPENAI_IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;

 type CapabilityMetadata = {
  id: string;
@@ -702,6 +708,8 @@ async function runImageGenerate(params: {
  size?: string;
  aspectRatio?: string;
  resolution?: "1K" | "2K" | "4K";
+  outputFormat?: ImageGenerationOutputFormat;
+  openaiBackground?: ImageGenerationOpenAIBackground;
  file?: string[];
  output?: string;
  timeoutMs?: number;
@@ -728,6 +736,10 @@ async function runImageGenerate(params: {
    size: params.size,
    aspectRatio: params.aspectRatio,
    resolution: params.resolution,
+    outputFormat: params.outputFormat,
+    providerOptions: params.openaiBackground
+      ? { openai: { background: params.openaiBackground } }
+      : undefined,
    timeoutMs: params.timeoutMs,
    inputImages,
  });
@@ -851,6 +863,32 @@ function parseOptionalFiniteNumber(
  return value;
 }

+function normalizeImageOutputFormat(
+  raw: string | undefined,
+): ImageGenerationOutputFormat | undefined {
+  const normalized = normalizeLowercaseStringOrEmpty(raw);
+  if (!normalized) {
+    return undefined;
+  }
+  if ((IMAGE_OUTPUT_FORMATS as readonly string[]).includes(normalized)) {
+    return normalized as ImageGenerationOutputFormat;
+  }
+  throw new Error("--output-format must be one of png, jpeg, or webp");
+}
+
+function normalizeOpenAIImageBackground(
+  raw: string | undefined,
+): ImageGenerationOpenAIBackground | undefined {
+  const normalized = normalizeLowercaseStringOrEmpty(raw);
+  if (!normalized) {
+    return undefined;
+  }
+  if ((OPENAI_IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) {
+    return normalized as ImageGenerationOpenAIBackground;
+  }
+  throw new Error("--openai-background must be one of transparent, opaque, or auto");
+}
+
 function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
  const normalized = raw?.trim().toUpperCase();
  if (!normalized) {
@@ -1438,6 +1476,8 @@ export function registerCapabilityCli(program: Command) {
    .option("--size <size>", "Size hint like 1024x1024")
    .option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
    .option("--resolution <value>", "Resolution hint: 1K, 2K, or 4K")
+    .option("--output-format <format>", "Output format hint: png, jpeg, or webp")
+    .option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
    .option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
    .option("--output <path>", "Output path")
    .option("--json", "Output JSON", false)
@@ -1451,6 +1491,10 @@ export function registerCapabilityCli(program: Command) {
          size: opts.size as string | undefined,
          aspectRatio: opts.aspectRatio as string | undefined,
          resolution: opts.resolution as "1K" | "2K" | "4K" | undefined,
+          outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
+          openaiBackground: normalizeOpenAIImageBackground(
+            opts.openaiBackground as string | undefined,
+          ),
          timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
          output: opts.output as string | undefined,
        });
@@ -1464,6 +1508,8 @@ export function registerCapabilityCli(program: Command) {
    .requiredOption("--file <path>", "Input file", collectOption, [])
    .requiredOption("--prompt <text>", "Prompt text")
    .option("--model <provider/model>", "Model override")
+    .option("--output-format <format>", "Output format hint: png, jpeg, or webp")
+    .option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
    .option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
    .option("--output <path>", "Output path")
    .option("--json", "Output JSON", false)
@@ -1475,6 +1521,10 @@ export function registerCapabilityCli(program: Command) {
          prompt: String(opts.prompt),
          model: opts.model as string | undefined,
          file: files,
+          outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
+          openaiBackground: normalizeOpenAIImageBackground(
+            opts.openaiBackground as string | undefined,
+          ),
          timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
          output: opts.output as string | undefined,
        });