mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:50:43 +00:00
fix: expose transparent image infer options
This commit is contained in:
@@ -8,6 +8,10 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- CLI/image generation: expose `--output-format` and
|
||||
`--openai-background` on `openclaw infer image generate` and
|
||||
`openclaw infer image edit` for transparent-background OpenAI image runs.
|
||||
Thanks @steipete.
|
||||
- Browser/config: allow local managed Chrome launch discovery and post-launch
|
||||
CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi.
|
||||
Fixes #66803. Thanks @beat843796.
|
||||
|
||||
@@ -156,7 +156,9 @@ Use `image` for generation, edit, and description.
|
||||
```bash
|
||||
openclaw infer image generate --prompt "friendly lobster illustration" --json
|
||||
openclaw infer image generate --prompt "cinematic product photo of headphones" --json
|
||||
openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "simple red circle sticker on a transparent background" --json
|
||||
openclaw infer image generate --prompt "slow image backend" --timeout-ms 180000 --json
|
||||
openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "keep the logo, remove the background" --json
|
||||
openclaw infer image describe --file ./photo.jpg --json
|
||||
openclaw infer image describe --file ./ui-screenshot.png --model openai/gpt-4.1-mini --json
|
||||
openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --json
|
||||
@@ -165,6 +167,10 @@ openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --j
|
||||
Notes:
|
||||
|
||||
- Use `image edit` when starting from existing input files.
|
||||
- Use `--output-format png --openai-background transparent` with
|
||||
`--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output.
|
||||
These OpenAI-specific flags are available on both `image generate` and
|
||||
`image edit`.
|
||||
- Use `image providers --json` to verify which bundled image providers are
|
||||
discoverable, configured, selected, and which generation/edit capabilities
|
||||
each provider exposes.
|
||||
|
||||
@@ -267,6 +267,20 @@ OpenAI Codex OAuth routes by rewriting default `openai/gpt-image-2` transparent
|
||||
requests to `gpt-image-1.5`; Azure and custom OpenAI-compatible endpoints keep
|
||||
their configured deployment/model names.
|
||||
|
||||
The same setting is exposed for headless CLI runs:
|
||||
|
||||
```bash
|
||||
openclaw infer image generate \
|
||||
--model openai/gpt-image-1.5 \
|
||||
--output-format png \
|
||||
--openai-background transparent \
|
||||
--prompt "A simple red circle sticker on a transparent background" \
|
||||
--json
|
||||
```
|
||||
|
||||
Use the same `--output-format` and `--openai-background` flags with
|
||||
`openclaw infer image edit` when starting from an input file.
|
||||
|
||||
For Codex OAuth installs, keep the same `openai/gpt-image-2` ref. When an
|
||||
`openai-codex` OAuth profile is configured, OpenClaw resolves that stored OAuth
|
||||
access token and sends image requests through the Codex Responses backend. It
|
||||
|
||||
@@ -289,6 +289,22 @@ OpenAI or OpenAI Codex OAuth route, OpenClaw rewrites the provider request to
|
||||
`gpt-image-1.5`. Azure and custom OpenAI-compatible endpoints keep their
|
||||
configured deployment/model names.
|
||||
|
||||
For headless CLI generation, use the equivalent `openclaw infer` flags:
|
||||
|
||||
```bash
|
||||
openclaw infer image generate \
|
||||
--model openai/gpt-image-1.5 \
|
||||
--output-format png \
|
||||
--openai-background transparent \
|
||||
--prompt "A simple red circle sticker on a transparent background" \
|
||||
--json
|
||||
```
|
||||
|
||||
The same `--output-format` and `--openai-background` flags are available on
|
||||
`openclaw infer image edit`. Other bundled providers can return PNGs and may
|
||||
preserve alpha when their backend emits it, but OpenClaw only exposes an
|
||||
explicit transparent-background control for OpenAI image generation.
|
||||
|
||||
Generate one 4K landscape image:
|
||||
|
||||
```
|
||||
|
||||
@@ -553,6 +553,148 @@ describe("capability cli", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes image output format and OpenAI background hints through to generation runtime", async () => {
|
||||
mocks.generateImage.mockResolvedValue({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1.5",
|
||||
attempts: [],
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "transparent.png",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"image",
|
||||
"generate",
|
||||
"--prompt",
|
||||
"transparent sticker",
|
||||
"--model",
|
||||
"openai/gpt-image-1.5",
|
||||
"--output-format",
|
||||
"png",
|
||||
"--openai-background",
|
||||
"transparent",
|
||||
"--json",
|
||||
],
|
||||
});
|
||||
|
||||
expect(mocks.generateImage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
prompt: "transparent sticker",
|
||||
modelOverride: "openai/gpt-image-1.5",
|
||||
outputFormat: "png",
|
||||
providerOptions: {
|
||||
openai: {
|
||||
background: "transparent",
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("passes image output format and OpenAI background hints through to edit runtime", async () => {
|
||||
mocks.generateImage.mockResolvedValue({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1.5",
|
||||
attempts: [],
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "transparent-edit.png",
|
||||
},
|
||||
],
|
||||
});
|
||||
const inputPath = path.join(os.tmpdir(), `openclaw-image-edit-${Date.now()}.png`);
|
||||
await fs.writeFile(inputPath, Buffer.from("png-input"));
|
||||
|
||||
await runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"image",
|
||||
"edit",
|
||||
"--file",
|
||||
inputPath,
|
||||
"--prompt",
|
||||
"make background transparent",
|
||||
"--model",
|
||||
"openai/gpt-image-1.5",
|
||||
"--output-format",
|
||||
"png",
|
||||
"--openai-background",
|
||||
"transparent",
|
||||
"--json",
|
||||
],
|
||||
});
|
||||
|
||||
expect(mocks.generateImage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
prompt: "make background transparent",
|
||||
modelOverride: "openai/gpt-image-1.5",
|
||||
outputFormat: "png",
|
||||
providerOptions: {
|
||||
openai: {
|
||||
background: "transparent",
|
||||
},
|
||||
},
|
||||
inputImages: [
|
||||
expect.objectContaining({
|
||||
fileName: path.basename(inputPath),
|
||||
}),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects unsupported image output format and OpenAI background hints", async () => {
|
||||
await expect(
|
||||
runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"image",
|
||||
"generate",
|
||||
"--prompt",
|
||||
"transparent sticker",
|
||||
"--output-format",
|
||||
"gif",
|
||||
"--json",
|
||||
],
|
||||
}),
|
||||
).rejects.toThrow("exit 1");
|
||||
expect(mocks.runtime.error).toHaveBeenCalledWith(
|
||||
"Error: --output-format must be one of png, jpeg, or webp",
|
||||
);
|
||||
|
||||
mocks.runtime.error.mockClear();
|
||||
await expect(
|
||||
runRegisteredCli({
|
||||
register: registerCapabilityCli as (program: Command) => void,
|
||||
argv: [
|
||||
"capability",
|
||||
"image",
|
||||
"generate",
|
||||
"--prompt",
|
||||
"transparent sticker",
|
||||
"--openai-background",
|
||||
"clear",
|
||||
"--json",
|
||||
],
|
||||
}),
|
||||
).rejects.toThrow("exit 1");
|
||||
expect(mocks.runtime.error).toHaveBeenCalledWith(
|
||||
"Error: --openai-background must be one of transparent, opaque, or auto",
|
||||
);
|
||||
});
|
||||
|
||||
it("streams url-only generated videos to --output paths", async () => {
|
||||
mocks.generateVideo.mockResolvedValue({
|
||||
provider: "vydra",
|
||||
|
||||
@@ -22,6 +22,10 @@ import { buildGatewayConnectionDetailsWithResolvers } from "../gateway/connectio
|
||||
import { isLoopbackHost } from "../gateway/net.js";
|
||||
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js";
|
||||
import { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";
|
||||
import type {
|
||||
ImageGenerationOpenAIBackground,
|
||||
ImageGenerationOutputFormat,
|
||||
} from "../image-generation/types.js";
|
||||
import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
|
||||
import {
|
||||
describeImageFile,
|
||||
@@ -78,6 +82,8 @@ import { removeCommandByName } from "./program/command-tree.js";
|
||||
import { collectOption } from "./program/helpers.js";
|
||||
|
||||
type CapabilityTransport = "local" | "gateway";
|
||||
const IMAGE_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
|
||||
const OPENAI_IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
|
||||
|
||||
type CapabilityMetadata = {
|
||||
id: string;
|
||||
@@ -702,6 +708,8 @@ async function runImageGenerate(params: {
|
||||
size?: string;
|
||||
aspectRatio?: string;
|
||||
resolution?: "1K" | "2K" | "4K";
|
||||
outputFormat?: ImageGenerationOutputFormat;
|
||||
openaiBackground?: ImageGenerationOpenAIBackground;
|
||||
file?: string[];
|
||||
output?: string;
|
||||
timeoutMs?: number;
|
||||
@@ -728,6 +736,10 @@ async function runImageGenerate(params: {
|
||||
size: params.size,
|
||||
aspectRatio: params.aspectRatio,
|
||||
resolution: params.resolution,
|
||||
outputFormat: params.outputFormat,
|
||||
providerOptions: params.openaiBackground
|
||||
? { openai: { background: params.openaiBackground } }
|
||||
: undefined,
|
||||
timeoutMs: params.timeoutMs,
|
||||
inputImages,
|
||||
});
|
||||
@@ -851,6 +863,32 @@ function parseOptionalFiniteNumber(
|
||||
return value;
|
||||
}
|
||||
|
||||
function normalizeImageOutputFormat(
|
||||
raw: string | undefined,
|
||||
): ImageGenerationOutputFormat | undefined {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(raw);
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if ((IMAGE_OUTPUT_FORMATS as readonly string[]).includes(normalized)) {
|
||||
return normalized as ImageGenerationOutputFormat;
|
||||
}
|
||||
throw new Error("--output-format must be one of png, jpeg, or webp");
|
||||
}
|
||||
|
||||
function normalizeOpenAIImageBackground(
|
||||
raw: string | undefined,
|
||||
): ImageGenerationOpenAIBackground | undefined {
|
||||
const normalized = normalizeLowercaseStringOrEmpty(raw);
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
if ((OPENAI_IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) {
|
||||
return normalized as ImageGenerationOpenAIBackground;
|
||||
}
|
||||
throw new Error("--openai-background must be one of transparent, opaque, or auto");
|
||||
}
|
||||
|
||||
function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
|
||||
const normalized = raw?.trim().toUpperCase();
|
||||
if (!normalized) {
|
||||
@@ -1438,6 +1476,8 @@ export function registerCapabilityCli(program: Command) {
|
||||
.option("--size <size>", "Size hint like 1024x1024")
|
||||
.option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
|
||||
.option("--resolution <value>", "Resolution hint: 1K, 2K, or 4K")
|
||||
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
|
||||
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
|
||||
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
|
||||
.option("--output <path>", "Output path")
|
||||
.option("--json", "Output JSON", false)
|
||||
@@ -1451,6 +1491,10 @@ export function registerCapabilityCli(program: Command) {
|
||||
size: opts.size as string | undefined,
|
||||
aspectRatio: opts.aspectRatio as string | undefined,
|
||||
resolution: opts.resolution as "1K" | "2K" | "4K" | undefined,
|
||||
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
|
||||
openaiBackground: normalizeOpenAIImageBackground(
|
||||
opts.openaiBackground as string | undefined,
|
||||
),
|
||||
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
|
||||
output: opts.output as string | undefined,
|
||||
});
|
||||
@@ -1464,6 +1508,8 @@ export function registerCapabilityCli(program: Command) {
|
||||
.requiredOption("--file <path>", "Input file", collectOption, [])
|
||||
.requiredOption("--prompt <text>", "Prompt text")
|
||||
.option("--model <provider/model>", "Model override")
|
||||
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
|
||||
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
|
||||
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
|
||||
.option("--output <path>", "Output path")
|
||||
.option("--json", "Output JSON", false)
|
||||
@@ -1475,6 +1521,10 @@ export function registerCapabilityCli(program: Command) {
|
||||
prompt: String(opts.prompt),
|
||||
model: opts.model as string | undefined,
|
||||
file: files,
|
||||
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
|
||||
openaiBackground: normalizeOpenAIImageBackground(
|
||||
opts.openaiBackground as string | undefined,
|
||||
),
|
||||
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
|
||||
output: opts.output as string | undefined,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user