feat: expose generic image background option

This commit is contained in:
Peter Steinberger
2026-04-25 20:21:46 +01:00
parent 03fd7df929
commit 75d64cd4b8
19 changed files with 173 additions and 59 deletions

View File

@@ -8,9 +8,10 @@ Docs: https://docs.openclaw.ai
### Changes
- CLI/image generation: expose `--output-format` and
`--openai-background` on `openclaw infer image generate` and
`openclaw infer image edit` for transparent-background OpenAI image runs.
- CLI/image generation: expose generic `--background` on
`openclaw infer image generate` and `openclaw infer image edit`, keep
`--openai-background` as an OpenAI alias, and let fal image generation honor
`--output-format png|jpeg`. Thanks @steipete.
- Browser/config: allow local managed Chrome launch discovery and post-launch
CDP readiness timeouts to be raised for slower hosts such as Raspberry Pi.
Fixes #66803. Thanks @beat843796.

View File

@@ -156,9 +156,9 @@ Use `image` for generation, edit, and description.
```bash
openclaw infer image generate --prompt "friendly lobster illustration" --json
openclaw infer image generate --prompt "cinematic product photo of headphones" --json
openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "simple red circle sticker on a transparent background" --json
openclaw infer image generate --model openai/gpt-image-1.5 --output-format png --background transparent --prompt "simple red circle sticker on a transparent background" --json
openclaw infer image generate --prompt "slow image backend" --timeout-ms 180000 --json
openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --openai-background transparent --prompt "keep the logo, remove the background" --json
openclaw infer image edit --file ./logo.png --model openai/gpt-image-1.5 --output-format png --background transparent --prompt "keep the logo, remove the background" --json
openclaw infer image describe --file ./photo.jpg --json
openclaw infer image describe --file ./ui-screenshot.png --model openai/gpt-4.1-mini --json
openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --json
@@ -167,10 +167,10 @@ openclaw infer image describe --file ./photo.jpg --model ollama/qwen2.5vl:7b --j
Notes:
- Use `image edit` when starting from existing input files.
- Use `--output-format png --openai-background transparent` with
`--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output.
These OpenAI-specific flags are available on both `image generate` and
`image edit`.
- Use `--output-format png --background transparent` with
`--model openai/gpt-image-1.5` for transparent-background OpenAI PNG output;
`--openai-background` remains available as an OpenAI-specific alias. Providers
that do not declare background support report the hint as an ignored override.
- Use `image providers --json` to verify which bundled image providers are
discoverable, configured, selected, and which generation/edit capabilities
each provider exposes.

View File

@@ -50,11 +50,16 @@ The bundled `fal` image-generation provider defaults to
| Size overrides | Supported |
| Aspect ratio | Supported |
| Resolution | Supported |
| Output format | `png` or `jpeg` |
<Warning>
The fal image edit endpoint does **not** support `aspectRatio` overrides.
</Warning>
Use `outputFormat: "png"` when you want PNG output. fal does not declare an
explicit transparent-background control in OpenClaw, so `background:
"transparent"` is reported as an ignored override for fal models.
To use fal as the default image provider:
```json5

View File

@@ -262,7 +262,8 @@ PNG/WebP output; the current `gpt-image-2` API rejects
For a transparent-background request, agents should call `image_generate` with
`model: "openai/gpt-image-1.5"`, `outputFormat: "png"` or `"webp"`, and
`openai.background: "transparent"`. OpenClaw also protects the public OpenAI and
`background: "transparent"`; the older `openai.background` provider option is
still accepted. OpenClaw also protects the public OpenAI and
OpenAI Codex OAuth routes by rewriting default `openai/gpt-image-2` transparent
requests to `gpt-image-1.5`; Azure and custom OpenAI-compatible endpoints keep
their configured deployment/model names.
@@ -273,13 +274,14 @@ The same setting is exposed for headless CLI runs:
openclaw infer image generate \
--model openai/gpt-image-1.5 \
--output-format png \
--openai-background transparent \
--background transparent \
--prompt "A simple red circle sticker on a transparent background" \
--json
```
Use the same `--output-format` and `--openai-background` flags with
Use the same `--output-format` and `--background` flags with
`openclaw infer image edit` when starting from an input file.
`--openai-background` remains available as an OpenAI-specific alias.
For Codex OAuth installs, keep the same `openai/gpt-image-2` ref. When an
`openai-codex` OAuth profile is configured, OpenClaw resolves that stored OAuth
@@ -302,7 +304,7 @@ Generate:
Generate a transparent PNG:
```
/tool image_generate model=openai/gpt-image-1.5 prompt="A simple red circle sticker on a transparent background" outputFormat=png openai='{"background":"transparent"}'
/tool image_generate model=openai/gpt-image-1.5 prompt="A simple red circle sticker on a transparent background" outputFormat=png background=transparent
```
Edit:

View File

@@ -60,8 +60,10 @@ The agent calls `image_generate` automatically. No tool allow-listing needed —
The same `image_generate` tool handles text-to-image and reference-image
editing. Use `image` for one reference or `images` for multiple references.
Provider-supported output hints such as `quality`, `outputFormat`, and
OpenAI-specific `background` are forwarded when available and reported as
ignored when a provider does not support them.
`background` are forwarded when available and reported as ignored when a
provider does not support them. Current bundled transparent-background support
is OpenAI-specific; other providers may still preserve PNG alpha if their
backend emits it.
## Supported providers
@@ -126,6 +128,11 @@ Quality hint when the provider supports it.
Output format hint when the provider supports it.
</ParamField>
<ParamField path="background" type="'transparent' | 'opaque' | 'auto'">
Background hint when the provider supports it. Use `transparent` with
`outputFormat: "png"` or `"webp"` for transparency-capable providers.
</ParamField>
<ParamField path="count" type="number">
Number of images to generate (14).
</ParamField>
@@ -268,6 +275,11 @@ image model. OpenClaw routes default `gpt-image-2` transparent-background
requests to `gpt-image-1.5`. `openai.outputCompression` applies to JPEG/WebP
outputs.
The top-level `background` hint is provider-neutral and currently maps to the
same OpenAI `background` request field when the OpenAI provider is selected.
Providers that do not declare background support return it in `ignoredOverrides`
instead of receiving the unsupported parameter.
When asking an agent for a transparent-background OpenAI image, the expected
tool call is:
@@ -276,9 +288,7 @@ tool call is:
"model": "openai/gpt-image-1.5",
"prompt": "A simple red circle sticker on a transparent background",
"outputFormat": "png",
"openai": {
"background": "transparent"
}
"background": "transparent"
}
```
@@ -295,15 +305,16 @@ For headless CLI generation, use the equivalent `openclaw infer` flags:
openclaw infer image generate \
--model openai/gpt-image-1.5 \
--output-format png \
--openai-background transparent \
--background transparent \
--prompt "A simple red circle sticker on a transparent background" \
--json
```
The same `--output-format` and `--openai-background` flags are available on
`openclaw infer image edit`. Other bundled providers can return PNGs and may
preserve alpha when their backend emits it, but OpenClaw only exposes an
explicit transparent-background control for OpenAI image generation.
The same `--output-format` and `--background` flags are available on
`openclaw infer image edit`; `--openai-background` remains available as an
OpenAI-specific alias. Current bundled providers other than OpenAI do not
declare explicit background control, so `background: "transparent"` is reported
as ignored for them.
Generate one 4K landscape image:
@@ -314,7 +325,7 @@ Generate one 4K landscape image:
Generate a transparent PNG:
```
/tool image_generate action=generate model=openai/gpt-image-1.5 prompt="A simple red circle sticker on a transparent background" outputFormat=png openai='{"background":"transparent"}'
/tool image_generate action=generate model=openai/gpt-image-1.5 prompt="A simple red circle sticker on a transparent background" outputFormat=png background=transparent
```
Generate two square images:

View File

@@ -76,6 +76,7 @@ describe("fal image-generation provider", () => {
cfg: {},
count: 2,
size: "1536x1024",
outputFormat: "jpeg",
});
expectFalJsonPost({
@@ -85,7 +86,7 @@ describe("fal image-generation provider", () => {
prompt: "draw a cat",
image_size: { width: 1536, height: 1024 },
num_images: 2,
output_format: "png",
output_format: "jpeg",
},
});
expect(fetchWithSsrFGuardMock).toHaveBeenNthCalledWith(

View File

@@ -25,6 +25,7 @@ const DEFAULT_FAL_BASE_URL = "https://fal.run";
const DEFAULT_FAL_IMAGE_MODEL = "fal-ai/flux/dev";
const DEFAULT_FAL_EDIT_SUBPATH = "image-to-image";
const DEFAULT_OUTPUT_FORMAT = "png";
const FAL_OUTPUT_FORMATS = ["png", "jpeg"] as const;
const FAL_SUPPORTED_SIZES = [
"1024x1024",
"1024x1536",
@@ -292,6 +293,9 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider {
aspectRatios: [...FAL_SUPPORTED_ASPECT_RATIOS],
resolutions: ["1K", "2K", "4K"],
},
output: {
formats: [...FAL_OUTPUT_FORMATS],
},
},
async generateImage(req) {
const auth = await resolveApiKeyForProvider({
@@ -333,7 +337,7 @@ export function buildFalImageGenerationProvider(): ImageGenerationProvider {
const requestBody: Record<string, unknown> = {
prompt: req.prompt,
num_images: req.count ?? 1,
output_format: DEFAULT_OUTPUT_FORMAT,
output_format: req.outputFormat ?? DEFAULT_OUTPUT_FORMAT,
};
if (imageSize !== undefined) {
requestBody.image_size = imageSize;

View File

@@ -206,6 +206,7 @@ describe("openai image generation provider", () => {
expect(provider.capabilities.output).toEqual({
formats: ["png", "jpeg", "webp"],
qualities: ["low", "medium", "high", "auto"],
backgrounds: ["transparent", "opaque", "auto"],
});
});
@@ -443,11 +444,7 @@ describe("openai image generation provider", () => {
prompt: "Transparent sticker",
cfg: {},
outputFormat: "png",
providerOptions: {
openai: {
background: "transparent",
},
},
background: "transparent",
});
expect(postJsonRequestMock).toHaveBeenCalledWith(

View File

@@ -52,6 +52,7 @@ const MAX_CODEX_IMAGE_BASE64_CHARS = 64 * 1024 * 1024;
const LOG_VALUE_MAX_CHARS = 256;
const MOCK_OPENAI_PROVIDER_ID = "mock-openai";
const OPENAI_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const OPENAI_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
const OPENAI_QUALITIES = ["low", "medium", "high", "auto"] as const;
const OPENAI_IMAGE_MODELS = [
DEFAULT_OPENAI_IMAGE_MODEL,
@@ -174,10 +175,11 @@ function appendOpenAIImageOptions(
req: Parameters<ImageGenerationProvider["generateImage"]>[0],
): void {
const openai = req.providerOptions?.openai;
const background = openai?.background ?? req.background;
const entries: Record<string, unknown> = {
...(req.quality !== undefined ? { quality: req.quality } : {}),
...(req.outputFormat !== undefined ? { output_format: req.outputFormat } : {}),
...(openai?.background !== undefined ? { background: openai.background } : {}),
...(background !== undefined ? { background } : {}),
...(openai?.moderation !== undefined ? { moderation: openai.moderation } : {}),
...(openai?.outputCompression !== undefined
? { output_compression: openai.outputCompression }
@@ -201,7 +203,7 @@ function resolveOpenAIImageRequestModel(
if (
options?.allowTransparentDefaultReroute === true &&
model === DEFAULT_OPENAI_IMAGE_MODEL &&
req.providerOptions?.openai?.background === "transparent"
(req.providerOptions?.openai?.background ?? req.background) === "transparent"
) {
return OPENAI_TRANSPARENT_BACKGROUND_IMAGE_MODEL;
}
@@ -513,6 +515,7 @@ function createOpenAIImageGenerationProviderBase(params: {
output: {
formats: [...OPENAI_OUTPUT_FORMATS],
qualities: [...OPENAI_QUALITIES],
backgrounds: [...OPENAI_BACKGROUNDS],
},
},
generateImage: params.generateImage,
@@ -580,6 +583,7 @@ async function generateOpenAICodexImage(params: {
const size = req.size ?? DEFAULT_SIZE;
const timeoutMs = resolveOpenAIImageTimeoutMs(req.timeoutMs);
const openai = req.providerOptions?.openai;
const background = openai?.background ?? req.background;
headers.set("Content-Type", "application/json");
const content: Array<Record<string, unknown>> = [
{ type: "input_text", text: req.prompt },
@@ -610,7 +614,7 @@ async function generateOpenAICodexImage(params: {
size,
...(req.quality !== undefined ? { quality: req.quality } : {}),
...(req.outputFormat !== undefined ? { output_format: req.outputFormat } : {}),
...(openai?.background !== undefined ? { background: openai.background } : {}),
...(background !== undefined ? { background } : {}),
...(openai?.outputCompression !== undefined
? { output_compression: openai.outputCompression }
: {}),

View File

@@ -225,7 +225,8 @@ describe("createImageGenerateTool", () => {
const tool = requireImageGenerateTool(createImageGenerateTool({ config: {} }));
expect(tool.description).toContain('outputFormat="png" or "webp"');
expect(tool.description).toContain('openai.background="transparent"');
expect(tool.description).toContain('background="transparent"');
expect(tool.description).toContain("openai.background");
expect(tool.description).toContain("gpt-image-1.5");
expect(JSON.stringify(tool.parameters)).toContain("openai/gpt-image-1.5");
});

View File

@@ -8,6 +8,7 @@ import {
} from "../../image-generation/runtime.js";
import type {
ImageGenerationIgnoredOverride,
ImageGenerationBackground,
ImageGenerationOpenAIBackground,
ImageGenerationOpenAIModeration,
ImageGenerationOpenAIOptions,
@@ -62,7 +63,7 @@ const MAX_INPUT_IMAGES = 5;
const DEFAULT_RESOLUTION: ImageGenerationResolution = "1K";
const SUPPORTED_QUALITIES = ["low", "medium", "high", "auto"] as const;
const SUPPORTED_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const SUPPORTED_OPENAI_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
const SUPPORTED_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
const SUPPORTED_OPENAI_MODERATIONS = ["low", "auto"] as const;
const SUPPORTED_ASPECT_RATIOS = new Set([
"1:1",
@@ -131,9 +132,13 @@ const ImageGenerateToolSchema = Type.Object({
outputFormat: optionalStringEnum(SUPPORTED_OUTPUT_FORMATS, {
description: "Optional output format hint: png, jpeg, or webp when the provider supports it.",
}),
background: optionalStringEnum(SUPPORTED_BACKGROUNDS, {
description:
"Optional background hint: transparent, opaque, or auto when the provider supports it. For transparent output use outputFormat png or webp.",
}),
openai: Type.Optional(
Type.Object({
background: optionalStringEnum(SUPPORTED_OPENAI_BACKGROUNDS, {
background: optionalStringEnum(SUPPORTED_BACKGROUNDS, {
description:
"OpenAI-only background hint: transparent, opaque, or auto. For transparent output use outputFormat png or webp; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for this mode.",
}),
@@ -270,12 +275,23 @@ function normalizeOpenAIBackground(
if (!normalized) {
return undefined;
}
if ((SUPPORTED_OPENAI_BACKGROUNDS as readonly string[]).includes(normalized)) {
if ((SUPPORTED_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOpenAIBackground;
}
throw new ToolInputError("openai.background must be one of transparent, opaque, or auto");
}
function normalizeBackground(raw: string | undefined): ImageGenerationBackground | undefined {
const normalized = raw?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if ((SUPPORTED_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationBackground;
}
throw new ToolInputError("background must be one of transparent, opaque, or auto");
}
function normalizeOpenAIModeration(
raw: string | undefined,
): ImageGenerationOpenAIModeration | undefined {
@@ -574,7 +590,7 @@ export function createImageGenerateTool(options?: {
label: "Image Generation",
name: "image_generate",
description:
'Generate new images or edit reference images with the configured or inferred image-generation model. For transparent OpenAI backgrounds, use outputFormat="png" or "webp" and openai.background="transparent"; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for that mode. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
'Generate new images or edit reference images with the configured or inferred image-generation model. For transparent backgrounds, use outputFormat="png" or "webp" and background="transparent"; OpenAI also accepts openai.background and OpenClaw routes the default OpenAI image model to gpt-image-1.5 for that mode. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
parameters: ImageGenerateToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
@@ -616,6 +632,12 @@ export function createImageGenerateTool(options?: {
if ((provider.capabilities.geometry?.aspectRatios?.length ?? 0) > 0) {
caps.push(`aspect ratios ${provider.capabilities.geometry?.aspectRatios?.join(", ")}`);
}
if ((provider.capabilities.output?.formats?.length ?? 0) > 0) {
caps.push(`formats ${provider.capabilities.output?.formats?.join("/")}`);
}
if ((provider.capabilities.output?.backgrounds?.length ?? 0) > 0) {
caps.push(`backgrounds ${provider.capabilities.output?.backgrounds?.join("/")}`);
}
const modelLine =
provider.models.length > 0
? `models: ${provider.models.join(", ")}`
@@ -645,6 +667,7 @@ export function createImageGenerateTool(options?: {
const timeoutMs = readGenerationTimeoutMs(params) ?? imageGenerationModelConfig.timeoutMs;
const quality = normalizeQuality(readStringParam(params, "quality"));
const outputFormat = normalizeOutputFormat(readStringParam(params, "outputFormat"));
const background = normalizeBackground(readStringParam(params, "background"));
const providerOptions = normalizeProviderOptions(params);
const selectedProvider = resolveSelectedImageGenerationProvider({
config: effectiveCfg,
@@ -693,6 +716,7 @@ export function createImageGenerateTool(options?: {
resolution,
quality,
outputFormat,
background,
count,
inputImages,
timeoutMs,
@@ -780,6 +804,7 @@ export function createImageGenerateTool(options?: {
: {}),
...(quality ? { quality } : {}),
...(outputFormat ? { outputFormat } : {}),
...(background ? { background } : {}),
...(filename ? { filename } : {}),
...(timeoutMs !== undefined ? { timeoutMs } : {}),
attempts: result.attempts,

View File

@@ -553,7 +553,7 @@ describe("capability cli", () => {
);
});
it("passes image output format and OpenAI background hints through to generation runtime", async () => {
it("passes image output format and generic background hints through to generation runtime", async () => {
mocks.generateImage.mockResolvedValue({
provider: "openai",
model: "gpt-image-1.5",
@@ -579,7 +579,7 @@ describe("capability cli", () => {
"openai/gpt-image-1.5",
"--output-format",
"png",
"--openai-background",
"--background",
"transparent",
"--json",
],
@@ -590,11 +590,8 @@ describe("capability cli", () => {
prompt: "transparent sticker",
modelOverride: "openai/gpt-image-1.5",
outputFormat: "png",
providerOptions: {
openai: {
background: "transparent",
},
},
background: "transparent",
providerOptions: undefined,
}),
);
});
@@ -640,6 +637,7 @@ describe("capability cli", () => {
prompt: "make background transparent",
modelOverride: "openai/gpt-image-1.5",
outputFormat: "png",
background: undefined,
providerOptions: {
openai: {
background: "transparent",
@@ -654,7 +652,7 @@ describe("capability cli", () => {
);
});
it("rejects unsupported image output format and OpenAI background hints", async () => {
it("rejects unsupported image output format and background hints", async () => {
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
@@ -693,6 +691,26 @@ describe("capability cli", () => {
expect(mocks.runtime.error).toHaveBeenCalledWith(
"Error: --openai-background must be one of transparent, opaque, or auto",
);
mocks.runtime.error.mockClear();
await expect(
runRegisteredCli({
register: registerCapabilityCli as (program: Command) => void,
argv: [
"capability",
"image",
"generate",
"--prompt",
"transparent sticker",
"--background",
"clear",
"--json",
],
}),
).rejects.toThrow("exit 1");
expect(mocks.runtime.error).toHaveBeenCalledWith(
"Error: --background must be one of transparent, opaque, or auto",
);
});
it("streams url-only generated videos to --output paths", async () => {

View File

@@ -23,7 +23,7 @@ import { isLoopbackHost } from "../gateway/net.js";
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../gateway/protocol/client-info.js";
import { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";
import type {
ImageGenerationOpenAIBackground,
ImageGenerationBackground,
ImageGenerationOutputFormat,
} from "../image-generation/types.js";
import { buildMediaUnderstandingRegistry } from "../media-understanding/provider-registry.js";
@@ -83,7 +83,7 @@ import { collectOption } from "./program/helpers.js";
type CapabilityTransport = "local" | "gateway";
const IMAGE_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const OPENAI_IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
const IMAGE_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
type CapabilityMetadata = {
id: string;
@@ -101,6 +101,7 @@ type CapabilityEnvelope = {
model?: string;
attempts: Array<Record<string, unknown>>;
outputs: Array<Record<string, unknown>>;
ignoredOverrides?: Array<Record<string, unknown>>;
error?: string;
};
@@ -390,6 +391,9 @@ function formatEnvelopeForText(value: unknown): string {
`${envelope.capability} via ${envelope.transport}`,
...(envelope.provider ? [`provider: ${envelope.provider}`] : []),
...(envelope.model ? [`model: ${envelope.model}`] : []),
...(envelope.ignoredOverrides && envelope.ignoredOverrides.length > 0
? [`ignoredOverrides: ${JSON.stringify(envelope.ignoredOverrides)}`]
: []),
`outputs: ${String(envelope.outputs.length)}`,
];
for (const output of envelope.outputs) {
@@ -709,7 +713,8 @@ async function runImageGenerate(params: {
aspectRatio?: string;
resolution?: "1K" | "2K" | "4K";
outputFormat?: ImageGenerationOutputFormat;
openaiBackground?: ImageGenerationOpenAIBackground;
background?: ImageGenerationBackground;
openaiBackground?: ImageGenerationBackground;
file?: string[];
output?: string;
timeoutMs?: number;
@@ -737,6 +742,7 @@ async function runImageGenerate(params: {
aspectRatio: params.aspectRatio,
resolution: params.resolution,
outputFormat: params.outputFormat,
background: params.background,
providerOptions: params.openaiBackground
? { openai: { background: params.openaiBackground } }
: undefined,
@@ -771,6 +777,7 @@ async function runImageGenerate(params: {
model: result.model,
attempts: result.attempts,
outputs,
ignoredOverrides: result.ignoredOverrides,
} satisfies CapabilityEnvelope;
}
@@ -876,17 +883,18 @@ function normalizeImageOutputFormat(
throw new Error("--output-format must be one of png, jpeg, or webp");
}
function normalizeOpenAIImageBackground(
function normalizeImageBackground(
raw: string | undefined,
): ImageGenerationOpenAIBackground | undefined {
label = "--background",
): ImageGenerationBackground | undefined {
const normalized = normalizeLowercaseStringOrEmpty(raw);
if (!normalized) {
return undefined;
}
if ((OPENAI_IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOpenAIBackground;
if ((IMAGE_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationBackground;
}
throw new Error("--openai-background must be one of transparent, opaque, or auto");
throw new Error(`${label} must be one of transparent, opaque, or auto`);
}
function normalizeVideoResolution(raw: string | undefined): VideoGenerationResolution | undefined {
@@ -1477,6 +1485,7 @@ export function registerCapabilityCli(program: Command) {
.option("--aspect-ratio <ratio>", "Aspect ratio hint like 16:9")
.option("--resolution <value>", "Resolution hint: 1K, 2K, or 4K")
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
.option("--background <value>", "Background hint: transparent, opaque, or auto")
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
.option("--output <path>", "Output path")
@@ -1492,8 +1501,10 @@ export function registerCapabilityCli(program: Command) {
aspectRatio: opts.aspectRatio as string | undefined,
resolution: opts.resolution as "1K" | "2K" | "4K" | undefined,
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
openaiBackground: normalizeOpenAIImageBackground(
background: normalizeImageBackground(opts.background as string | undefined),
openaiBackground: normalizeImageBackground(
opts.openaiBackground as string | undefined,
"--openai-background",
),
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
output: opts.output as string | undefined,
@@ -1509,6 +1520,7 @@ export function registerCapabilityCli(program: Command) {
.requiredOption("--prompt <text>", "Prompt text")
.option("--model <provider/model>", "Model override")
.option("--output-format <format>", "Output format hint: png, jpeg, or webp")
.option("--background <value>", "Background hint: transparent, opaque, or auto")
.option("--openai-background <value>", "OpenAI background hint: transparent, opaque, or auto")
.option("--timeout-ms <ms>", "Provider request timeout in milliseconds")
.option("--output <path>", "Output path")
@@ -1522,8 +1534,10 @@ export function registerCapabilityCli(program: Command) {
model: opts.model as string | undefined,
file: files,
outputFormat: normalizeImageOutputFormat(opts.outputFormat as string | undefined),
openaiBackground: normalizeOpenAIImageBackground(
background: normalizeImageBackground(opts.background as string | undefined),
openaiBackground: normalizeImageBackground(
opts.openaiBackground as string | undefined,
"--openai-background",
),
timeoutMs: parseOptionalFiniteNumber(opts.timeoutMs, "--timeout-ms"),
output: opts.output as string | undefined,

View File

@@ -6,6 +6,7 @@ import {
type MediaNormalizationEntry,
} from "../media-generation/runtime-shared.js";
import type {
ImageGenerationBackground,
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationOutputFormat,
@@ -21,6 +22,7 @@ export type ResolvedImageGenerationOverrides = {
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
background?: ImageGenerationBackground;
ignoredOverrides: ImageGenerationIgnoredOverride[];
normalization?: ImageGenerationNormalization;
};
@@ -42,6 +44,7 @@ export function resolveImageGenerationOverrides(params: {
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
background?: ImageGenerationBackground;
inputImages?: ImageGenerationSourceImage[];
}): ResolvedImageGenerationOverrides {
const hasInputImages = (params.inputImages?.length ?? 0) > 0;
@@ -56,6 +59,7 @@ export function resolveImageGenerationOverrides(params: {
let resolution = params.resolution;
let quality = params.quality;
let outputFormat = params.outputFormat;
let background = params.background;
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
const normalizedSize = resolveClosestSize({
@@ -175,6 +179,12 @@ export function resolveImageGenerationOverrides(params: {
outputFormat = undefined;
}
const supportedBackgrounds = params.provider.capabilities.output?.backgrounds;
if (background && !(supportedBackgrounds ?? []).includes(background)) {
ignoredOverrides.push({ key: "background", value: background });
background = undefined;
}
if (
!normalization.aspectRatio &&
aspectRatio &&
@@ -220,6 +230,7 @@ export function resolveImageGenerationOverrides(params: {
resolution,
quality,
outputFormat,
background,
ignoredOverrides,
normalization: finalizeImageNormalization(normalization),
};

View File

@@ -3,6 +3,7 @@ import type { FallbackAttempt } from "../agents/model-fallback.types.js";
import type { OpenClawConfig } from "../config/types.openclaw.js";
import type {
GeneratedImageAsset,
ImageGenerationBackground,
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationOutputFormat,
@@ -25,6 +26,7 @@ export type GenerateImageParams = {
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
background?: ImageGenerationBackground;
inputImages?: ImageGenerationSourceImage[];
/** Optional per-request provider timeout in milliseconds. */
timeoutMs?: number;

View File

@@ -268,6 +268,7 @@ describe("image-generation runtime", () => {
| {
quality?: string;
outputFormat?: string;
background?: string;
providerOptions?: unknown;
}
| undefined;
@@ -285,12 +286,14 @@ describe("image-generation runtime", () => {
output: {
qualities: ["low", "medium", "high", "auto"],
formats: ["png", "jpeg", "webp"],
backgrounds: ["transparent", "opaque", "auto"],
},
},
async generateImage(req) {
seenRequest = {
quality: req.quality,
outputFormat: req.outputFormat,
background: req.background,
providerOptions: req.providerOptions,
};
return {
@@ -310,6 +313,7 @@ describe("image-generation runtime", () => {
prompt: "draw a cheap preview",
quality: "low",
outputFormat: "jpeg",
background: "opaque",
providerOptions: {
openai: {
background: "opaque",
@@ -323,6 +327,7 @@ describe("image-generation runtime", () => {
expect(seenRequest).toEqual({
quality: "low",
outputFormat: "jpeg",
background: "opaque",
providerOptions: {
openai: {
background: "opaque",
@@ -340,6 +345,7 @@ describe("image-generation runtime", () => {
| {
quality?: string;
outputFormat?: string;
background?: string;
}
| undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("vydra/grok-imagine");
@@ -355,6 +361,7 @@ describe("image-generation runtime", () => {
seenRequest = {
quality: req.quality,
outputFormat: req.outputFormat,
background: req.background,
};
return {
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
@@ -373,15 +380,18 @@ describe("image-generation runtime", () => {
prompt: "draw a cat",
quality: "low",
outputFormat: "jpeg",
background: "transparent",
});
expect(seenRequest).toEqual({
quality: undefined,
outputFormat: undefined,
background: undefined,
});
expect(result.ignoredOverrides).toEqual([
{ key: "quality", value: "low" },
{ key: "outputFormat", value: "jpeg" },
{ key: "background", value: "transparent" },
]);
});

View File

@@ -77,6 +77,7 @@ export async function generateImage(
resolution: params.resolution,
quality: params.quality,
outputFormat: params.outputFormat,
background: params.background,
inputImages: params.inputImages,
});
const result: ImageGenerationResult = await provider.generateImage({
@@ -92,6 +93,7 @@ export async function generateImage(
resolution: sanitized.resolution,
quality: sanitized.quality,
outputFormat: sanitized.outputFormat,
background: sanitized.background,
inputImages: params.inputImages,
...(timeoutMs !== undefined ? { timeoutMs } : {}),
providerOptions: params.providerOptions,

View File

@@ -16,7 +16,9 @@ export type ImageGenerationQuality = "low" | "medium" | "high" | "auto";
export type ImageGenerationOutputFormat = "png" | "jpeg" | "webp";
export type ImageGenerationOpenAIBackground = "transparent" | "opaque" | "auto";
export type ImageGenerationBackground = "transparent" | "opaque" | "auto";
export type ImageGenerationOpenAIBackground = ImageGenerationBackground;
export type ImageGenerationOpenAIModeration = "low" | "auto";
@@ -36,7 +38,8 @@ export type ImageGenerationIgnoredOverrideKey =
| "aspectRatio"
| "resolution"
| "quality"
| "outputFormat";
| "outputFormat"
| "background";
export type ImageGenerationIgnoredOverride = {
key: ImageGenerationIgnoredOverrideKey;
@@ -69,6 +72,7 @@ export type ImageGenerationRequest = {
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
background?: ImageGenerationBackground;
inputImages?: ImageGenerationSourceImage[];
providerOptions?: ImageGenerationProviderOptions;
};
@@ -100,6 +104,7 @@ export type ImageGenerationGeometryCapabilities = {
export type ImageGenerationOutputCapabilities = {
qualities?: ImageGenerationQuality[];
formats?: ImageGenerationOutputFormat[];
backgrounds?: ImageGenerationBackground[];
};
export type ImageGenerationNormalization = {

View File

@@ -2,6 +2,7 @@
export type {
GeneratedImageAsset,
ImageGenerationBackground,
ImageGenerationOpenAIBackground,
ImageGenerationOpenAIModeration,
ImageGenerationOpenAIOptions,