feat(openai): pass image output options (#70503)

Co-authored-by: Peter Steinberger <steipete@gmail.com>
Co-authored-by: Otto Deng <ottodeng@users.noreply.github.com>
This commit is contained in:
Otto Deng
2026-04-24 07:17:12 +08:00
committed by GitHub
parent b9a0795761
commit de3f3b8f93
16 changed files with 602 additions and 28 deletions

View File

@@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
- Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin.
- Providers/OpenAI: add forward-compatible `gpt-5.5` and `gpt-5.5-pro` support for OpenAI API keys, OpenAI Codex OAuth, and the Codex CLI default model.
- Providers/OpenAI: add image generation and reference-image editing through Codex OAuth, so `openai/gpt-image-2` works without an `OPENAI_API_KEY`. Fixes #70703.
- Image generation: let agents request provider-supported quality and output format hints, and pass OpenAI-specific background, moderation, compression, and user hints through the `image_generate` tool. (#70503) Thanks @ottodeng.
### Fixes

View File

@@ -1,2 +1,2 @@
748236b1167a22be8d5ae19905078df77becd4d52cec9a3bb45c78ab2abddb9f plugin-sdk-api-baseline.json
9ca0549360dd2a634e7b7e56a17d7a3978ee797d9ad80ce5a4a57c84c35a83d3 plugin-sdk-api-baseline.jsonl
64c5f94fe0234da8ae2312ab30694ebc5675091fadebac92c106210f45a66e91 plugin-sdk-api-baseline.json
fd00bb4cd8f1e32503f94e8542db95235ec641eb62ae45d6d4b653d9ff60cb09 plugin-sdk-api-baseline.jsonl

View File

@@ -14,7 +14,7 @@ detail unless a doc page explicitly promotes them.
For the plugin authoring guide, see [Plugin SDK overview](/plugins/sdk-overview).
### Plugin entry
## Plugin entry
| Subpath | Key exports |
| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |

View File

@@ -92,6 +92,14 @@ Aspect ratio: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `
Resolution hint.
</ParamField>
<ParamField path="quality" type="'low' | 'medium' | 'high' | 'auto'">
Quality hint when the provider supports it.
</ParamField>
<ParamField path="outputFormat" type="'png' | 'jpeg' | 'webp'">
Output format hint when the provider supports it.
</ParamField>
<ParamField path="count" type="number">
Number of images to generate (14).
</ParamField>
@@ -104,7 +112,11 @@ Optional provider request timeout in milliseconds.
Output filename hint.
</ParamField>
Not all providers support all parameters. When a fallback provider supports a nearby geometry option instead of the exact requested one, OpenClaw remaps to the closest supported size, aspect ratio, or resolution before submission. Truly unsupported overrides are still reported in the tool result.
<ParamField path="openai" type="object">
OpenAI-only hints: `background`, `moderation`, `outputCompression`, and `user`.
</ParamField>
Not all providers support all parameters. When a fallback provider supports a nearby geometry option instead of the exact requested one, OpenClaw remaps to the closest supported size, aspect ratio, or resolution before submission. Unsupported output hints such as `quality` or `outputFormat` are dropped for providers that do not declare support and are reported in the tool result.
Tool results report the applied settings. When OpenClaw remaps geometry during provider fallback, the returned `size`, `aspectRatio`, and `resolution` values reflect what was actually sent, and `details.normalization` captures the requested-to-applied translation.
@@ -173,9 +185,29 @@ image-generation and image-editing requests should use `gpt-image-2`.
`gpt-image-2` supports both text-to-image generation and reference-image
editing through the same `image_generate` tool. OpenClaw forwards `prompt`,
`count`, `size`, and reference images to OpenAI. OpenAI does not receive
`aspectRatio` or `resolution` directly; when possible OpenClaw maps those into a
supported `size`, otherwise the tool reports them as ignored overrides.
`count`, `size`, `quality`, `outputFormat`, and reference images to OpenAI.
OpenAI does not receive `aspectRatio` or `resolution` directly; when possible
OpenClaw maps those into a supported `size`, otherwise the tool reports them as
ignored overrides.
OpenAI-specific options live under the `openai` object:
```json
{
"quality": "low",
"outputFormat": "jpeg",
"openai": {
"background": "opaque",
"moderation": "low",
"outputCompression": 60,
"user": "end-user-42"
}
}
```
`openai.background` accepts `transparent`, `opaque`, or `auto`; transparent
outputs require `outputFormat` `png` or `webp`. `openai.outputCompression`
applies to JPEG/WebP outputs.
Generate one 4K landscape image:

View File

@@ -198,6 +198,10 @@ describe("openai image generation provider", () => {
expect(provider.capabilities.geometry?.sizes).toEqual(
expect.arrayContaining(["2048x2048", "3840x2160", "2160x3840"]),
);
expect(provider.capabilities.output).toEqual({
formats: ["png", "jpeg", "webp"],
qualities: ["low", "medium", "high", "auto"],
});
});
it("reports configured when either OpenAI API key auth or Codex OAuth auth is available", () => {
@@ -339,6 +343,50 @@ describe("openai image generation provider", () => {
expect(result.images).toHaveLength(1);
});
it("forwards output and OpenAI-only options on direct generations", async () => {
mockGeneratedPngResponse();
const provider = buildOpenAIImageGenerationProvider();
const result = await provider.generateImage({
provider: "openai",
model: "gpt-image-2",
prompt: "Cheap JPEG preview",
cfg: {},
quality: "low",
outputFormat: "jpeg",
providerOptions: {
openai: {
background: "opaque",
moderation: "low",
outputCompression: 60,
user: "end-user-42",
},
},
});
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://api.openai.com/v1/images/generations",
body: {
model: "gpt-image-2",
prompt: "Cheap JPEG preview",
n: 1,
size: "1024x1024",
quality: "low",
output_format: "jpeg",
background: "opaque",
moderation: "low",
output_compression: 60,
user: "end-user-42",
},
}),
);
expect(result.images[0]).toMatchObject({
mimeType: "image/jpeg",
fileName: "image-1.jpg",
});
});
it("allows loopback image requests for the synthetic mock-openai provider", async () => {
mockGeneratedPngResponse();
@@ -463,6 +511,44 @@ describe("openai image generation provider", () => {
expect(result.images).toHaveLength(1);
});
it("forwards output and OpenAI-only options on multipart edits", async () => {
mockGeneratedPngResponse();
const provider = buildOpenAIImageGenerationProvider();
const result = await provider.generateImage({
provider: "openai",
model: "gpt-image-2",
prompt: "Edit as WebP",
cfg: {},
inputImages: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
quality: "high",
outputFormat: "webp",
providerOptions: {
openai: {
background: "transparent",
moderation: "auto",
outputCompression: 75,
user: "end-user-99",
},
},
});
const editCallArgs = postMultipartRequestMock.mock.calls[0]?.[0] as {
body: FormData;
};
const form = editCallArgs.body;
expect(form.get("quality")).toBe("high");
expect(form.get("output_format")).toBe("webp");
expect(form.get("background")).toBe("transparent");
expect(form.get("moderation")).toBe("auto");
expect(form.get("output_compression")).toBe("75");
expect(form.get("user")).toBe("end-user-99");
expect(result.images[0]).toMatchObject({
mimeType: "image/webp",
fileName: "image-1.webp",
});
});
it("falls back to Codex OAuth image generation through Responses streaming", async () => {
mockCodexAuthOnly();
mockCodexImageStream({ imageData: "codex-image", revisedPrompt: "revised codex prompt" });
@@ -477,6 +563,14 @@ describe("openai image generation provider", () => {
authStore,
count: 1,
size: "1024x1536",
quality: "low",
outputFormat: "jpeg",
providerOptions: {
openai: {
background: "opaque",
outputCompression: 55,
},
},
});
expect(resolveApiKeyForProviderMock).toHaveBeenCalledWith(
@@ -517,6 +611,10 @@ describe("openai image generation provider", () => {
type: "image_generation",
model: "gpt-image-2",
size: "1024x1536",
quality: "low",
output_format: "jpeg",
background: "opaque",
output_compression: 55,
},
],
tool_choice: { type: "image_generation" },
@@ -530,8 +628,8 @@ describe("openai image generation provider", () => {
expect(result.images).toEqual([
{
buffer: Buffer.from("codex-image"),
mimeType: "image/png",
fileName: "image-1.png",
mimeType: "image/jpeg",
fileName: "image-1.jpg",
revisedPrompt: "revised codex prompt",
},
]);

View File

@@ -1,6 +1,7 @@
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type {
ImageGenerationOutputFormat,
ImageGenerationProvider,
ImageGenerationResult,
ImageGenerationSourceImage,
@@ -28,6 +29,7 @@ const DEFAULT_OPENAI_CODEX_IMAGE_BASE_URL = "https://chatgpt.com/backend-api/cod
const OPENAI_CODEX_IMAGE_INSTRUCTIONS = "You are an image generation assistant.";
const DEFAULT_OPENAI_IMAGE_TIMEOUT_MS = 180_000;
const DEFAULT_OUTPUT_MIME = "image/png";
const DEFAULT_OUTPUT_EXTENSION = "png";
const DEFAULT_SIZE = "1024x1024";
const OPENAI_SUPPORTED_SIZES = [
"1024x1024",
@@ -45,6 +47,8 @@ const MAX_CODEX_IMAGE_SSE_EVENTS = 512;
const MAX_CODEX_IMAGE_BASE64_CHARS = 64 * 1024 * 1024;
const LOG_VALUE_MAX_CHARS = 256;
const MOCK_OPENAI_PROVIDER_ID = "mock-openai";
const OPENAI_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const OPENAI_QUALITIES = ["low", "medium", "high", "auto"] as const;
const log = createSubsystemLogger("image-generation/openai");
const AZURE_HOSTNAME_SUFFIXES = [
@@ -139,6 +143,46 @@ function buildAzureImageUrl(
return `${cleanBase}/openai/deployments/${model}/images/${action}?api-version=${resolveAzureApiVersion()}`;
}
function resolveOutputMime(outputFormat?: ImageGenerationOutputFormat): {
mimeType: string;
extension: string;
} {
switch (outputFormat) {
case "jpeg":
return { mimeType: "image/jpeg", extension: "jpg" };
case "webp":
return { mimeType: "image/webp", extension: "webp" };
case "png":
case undefined:
return { mimeType: DEFAULT_OUTPUT_MIME, extension: DEFAULT_OUTPUT_EXTENSION };
}
return { mimeType: DEFAULT_OUTPUT_MIME, extension: DEFAULT_OUTPUT_EXTENSION };
}
function appendOpenAIImageOptions(
target: Record<string, unknown> | FormData,
req: Parameters<ImageGenerationProvider["generateImage"]>[0],
): void {
const openai = req.providerOptions?.openai;
const entries: Record<string, unknown> = {
...(req.quality !== undefined ? { quality: req.quality } : {}),
...(req.outputFormat !== undefined ? { output_format: req.outputFormat } : {}),
...(openai?.background !== undefined ? { background: openai.background } : {}),
...(openai?.moderation !== undefined ? { moderation: openai.moderation } : {}),
...(openai?.outputCompression !== undefined
? { output_compression: openai.outputCompression }
: {}),
...(openai?.user !== undefined ? { user: openai.user } : {}),
};
for (const [key, value] of Object.entries(entries)) {
if (target instanceof FormData) {
target.set(key, String(value));
} else {
target[key] = value;
}
}
}
function shouldAllowPrivateImageEndpoint(req: {
provider: string;
cfg: OpenClawConfig | undefined;
@@ -341,15 +385,17 @@ function decodeCodexImagePayload(payload: string): Buffer {
function toCodexImage(
entry: { result?: string; revised_prompt?: string },
index: number,
outputFormat?: ImageGenerationOutputFormat,
): ImageGenerationResult["images"][number] | null {
if (typeof entry.result !== "string" || entry.result.length === 0) {
return null;
}
const output = resolveOutputMime(outputFormat);
return Object.assign(
{
buffer: decodeCodexImagePayload(entry.result),
mimeType: DEFAULT_OUTPUT_MIME,
fileName: `image-${index + 1}.png`,
mimeType: output.mimeType,
fileName: `image-${index + 1}.${output.extension}`,
},
entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {},
);
@@ -358,6 +404,7 @@ function toCodexImage(
function extractCodexImageGenerationResult(params: {
body: string;
model: string;
outputFormat?: ImageGenerationOutputFormat;
}): ImageGenerationResult {
const events = parseCodexImageGenerationEvents(params.body);
const failure = events.find(
@@ -380,12 +427,14 @@ function extractCodexImageGenerationResult(params: {
event.item.result.length > 0,
)
.slice(0, OPENAI_MAX_IMAGE_RESULTS)
.map((event, index) => (event.item ? toCodexImage(event.item, index) : null))
.map((event, index) =>
event.item ? toCodexImage(event.item, index, params.outputFormat) : null,
)
.filter((image): image is NonNullable<typeof image> => image !== null);
const completedOutputImages = (completedResponse?.response?.output ?? [])
.filter((entry) => entry.type === "image_generation_call")
.slice(0, OPENAI_MAX_IMAGE_RESULTS)
.map((entry, index) => toCodexImage(entry, index))
.map((entry, index) => toCodexImage(entry, index, params.outputFormat))
.filter((image): image is NonNullable<typeof image> => image !== null);
const images = outputItemImages.length > 0 ? outputItemImages : completedOutputImages;
@@ -433,6 +482,10 @@ function createOpenAIImageGenerationProviderBase(params: {
geometry: {
sizes: [...OPENAI_SUPPORTED_SIZES],
},
output: {
formats: [...OPENAI_OUTPUT_FORMATS],
qualities: [...OPENAI_QUALITIES],
},
},
generateImage: params.generateImage,
};
@@ -494,6 +547,7 @@ async function generateOpenAICodexImage(params: {
const count = resolveOpenAIImageCount(req.count);
const size = req.size ?? DEFAULT_SIZE;
const timeoutMs = resolveOpenAIImageTimeoutMs(req.timeoutMs);
const openai = req.providerOptions?.openai;
headers.set("Content-Type", "application/json");
const content: Array<Record<string, unknown>> = [
{ type: "input_text", text: req.prompt },
@@ -522,6 +576,12 @@ async function generateOpenAICodexImage(params: {
type: "image_generation",
model,
size,
...(req.quality !== undefined ? { quality: req.quality } : {}),
...(req.outputFormat !== undefined ? { output_format: req.outputFormat } : {}),
...(openai?.background !== undefined ? { background: openai.background } : {}),
...(openai?.outputCompression !== undefined
? { output_compression: openai.outputCompression }
: {}),
},
],
tool_choice: { type: "image_generation" },
@@ -540,6 +600,7 @@ async function generateOpenAICodexImage(params: {
extractCodexImageGenerationResult({
body: await readResponseBodyText(response),
model,
outputFormat: req.outputFormat,
}),
);
} finally {
@@ -547,10 +608,11 @@ async function generateOpenAICodexImage(params: {
}
}
const images = results.flatMap((result) => result.images);
const output = resolveOutputMime(req.outputFormat);
return {
images: images.map((image, index) =>
Object.assign({}, image, {
fileName: `image-${index + 1}.png`,
fileName: `image-${index + 1}.${output.extension}`,
}),
),
model,
@@ -657,6 +719,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
form.set("prompt", req.prompt);
form.set("n", String(count));
form.set("size", size);
appendOpenAIImageOptions(form, req);
for (const [index, image] of inputImages.entries()) {
const mimeType = image.mimeType?.trim() || DEFAULT_OUTPUT_MIME;
form.append(
@@ -685,15 +748,17 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
: await (() => {
const jsonHeaders = new Headers(headers);
jsonHeaders.set("Content-Type", "application/json");
const body: Record<string, unknown> = {
model,
prompt: req.prompt,
n: count,
size,
};
appendOpenAIImageOptions(body, req);
return postJsonRequest({
url,
headers: jsonHeaders,
body: {
model,
prompt: req.prompt,
n: count,
size,
},
body,
timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
@@ -708,6 +773,7 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
);
const data = (await response.json()) as OpenAIImageApiResponse;
const output = resolveOutputMime(req.outputFormat);
const images = (data.data ?? [])
.map((entry, index) => {
if (!entry.b64_json) {
@@ -716,8 +782,8 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
return Object.assign(
{
buffer: Buffer.from(entry.b64_json, `base64`),
mimeType: DEFAULT_OUTPUT_MIME,
fileName: `image-${index + 1}.png`,
mimeType: output.mimeType,
fileName: `image-${index + 1}.${output.extension}`,
},
entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {},
);

View File

@@ -59,9 +59,9 @@ const allowedRawFetchCallsites = new Set([
bundledPluginCallsite("qqbot", "src/engine/tools/channel-api.ts", 178),
bundledPluginCallsite("qqbot", "src/engine/utils/stt.ts", 87),
bundledPluginCallsite("signal", "src/install-signal-cli.ts", 224),
bundledPluginCallsite("slack", "src/monitor/media.ts", 99),
bundledPluginCallsite("slack", "src/monitor/media.ts", 118),
bundledPluginCallsite("slack", "src/monitor/media.ts", 123),
bundledPluginCallsite("slack", "src/monitor/media.ts", 106),
bundledPluginCallsite("slack", "src/monitor/media.ts", 125),
bundledPluginCallsite("slack", "src/monitor/media.ts", 130),
bundledPluginCallsite("venice", "models.ts", 552),
bundledPluginCallsite("vercel-ai-gateway", "models.ts", 181),
bundledPluginCallsite("voice-call", "src/providers/twilio/api.ts", 23),

View File

@@ -478,6 +478,62 @@ describe("createImageGenerateTool", () => {
expect(text).toContain("MEDIA:/tmp/generated-2.png");
});
it("forwards output hints and OpenAI provider options", async () => {
const generateImage = vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
provider: "openai",
model: "gpt-image-2",
attempts: [],
ignoredOverrides: [],
images: [
{
buffer: Buffer.from("jpg-out"),
mimeType: "image/jpeg",
fileName: "preview.jpg",
},
],
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValue({
path: "/tmp/generated.jpg",
id: "generated.jpg",
size: 5,
contentType: "image/jpeg",
});
const tool = createToolWithPrimaryImageModel("openai/gpt-image-2");
const result = await tool.execute("call-openai-hints", {
prompt: "Cheap preview",
quality: "low",
outputFormat: "jpeg",
openai: {
background: "opaque",
moderation: "low",
outputCompression: 60,
user: "end-user-42",
},
});
expect(generateImage).toHaveBeenCalledWith(
expect.objectContaining({
quality: "low",
outputFormat: "jpeg",
providerOptions: {
openai: {
background: "opaque",
moderation: "low",
outputCompression: 60,
user: "end-user-42",
},
},
}),
);
expect(result).toMatchObject({
details: {
quality: "low",
outputFormat: "jpeg",
},
});
});
it("includes MEDIA paths in content text so follow-up replies use the real saved file", async () => {
vi.spyOn(imageGenerationRuntime, "listRuntimeImageGenerationProviders").mockReturnValue([
{

View File

@@ -8,7 +8,13 @@ import {
} from "../../image-generation/runtime.js";
import type {
ImageGenerationIgnoredOverride,
ImageGenerationOpenAIBackground,
ImageGenerationOpenAIModeration,
ImageGenerationOpenAIOptions,
ImageGenerationOutputFormat,
ImageGenerationProvider,
ImageGenerationProviderOptions,
ImageGenerationQuality,
ImageGenerationResolution,
ImageGenerationSourceImage,
} from "../../image-generation/types.js";
@@ -18,6 +24,7 @@ import { saveMediaBuffer } from "../../media/store.js";
import { loadWebMedia } from "../../media/web-media.js";
import { getProviderEnvVars } from "../../secrets/provider-env-vars.js";
import { resolveUserPath } from "../../utils.js";
import { optionalStringEnum } from "../schema/string-enum.js";
import { ToolInputError, readNumberParam, readStringParam } from "./common.js";
import { decodeDataUrl } from "./image-tool.helpers.js";
import {
@@ -44,6 +51,10 @@ const DEFAULT_COUNT = 1;
const MAX_COUNT = 4;
const MAX_INPUT_IMAGES = 5;
const DEFAULT_RESOLUTION: ImageGenerationResolution = "1K";
const SUPPORTED_QUALITIES = ["low", "medium", "high", "auto"] as const;
const SUPPORTED_OUTPUT_FORMATS = ["png", "jpeg", "webp"] as const;
const SUPPORTED_OPENAI_BACKGROUNDS = ["transparent", "opaque", "auto"] as const;
const SUPPORTED_OPENAI_MODERATIONS = ["low", "auto"] as const;
const SUPPORTED_ASPECT_RATIOS = new Set([
"1:1",
"2:3",
@@ -102,6 +113,34 @@ const ImageGenerateToolSchema = Type.Object({
"Optional resolution hint: 1K, 2K, or 4K. Useful for Google edit/generation flows.",
}),
),
quality: optionalStringEnum(SUPPORTED_QUALITIES, {
description: "Optional quality hint: low, medium, high, or auto when the provider supports it.",
}),
outputFormat: optionalStringEnum(SUPPORTED_OUTPUT_FORMATS, {
description: "Optional output format hint: png, jpeg, or webp when the provider supports it.",
}),
openai: Type.Optional(
Type.Object({
background: optionalStringEnum(SUPPORTED_OPENAI_BACKGROUNDS, {
description: "OpenAI-only background hint: transparent, opaque, or auto.",
}),
moderation: optionalStringEnum(SUPPORTED_OPENAI_MODERATIONS, {
description: "OpenAI-only moderation hint: low or auto.",
}),
outputCompression: Type.Optional(
Type.Number({
description: "OpenAI-only compression level for jpeg/webp outputFormat, 0-100.",
minimum: 0,
maximum: 100,
}),
),
user: Type.Optional(
Type.String({
description: "OpenAI-only stable end-user identifier for abuse monitoring.",
}),
),
}),
),
count: Type.Optional(
Type.Number({
description: `Optional number of images to request (1-${MAX_COUNT}).`,
@@ -176,6 +215,85 @@ function normalizeAspectRatio(raw: string | undefined): string | undefined {
);
}
function normalizeQuality(raw: string | undefined): ImageGenerationQuality | undefined {
const normalized = raw?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if ((SUPPORTED_QUALITIES as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationQuality;
}
throw new ToolInputError("quality must be one of low, medium, high, or auto");
}
function normalizeOutputFormat(raw: string | undefined): ImageGenerationOutputFormat | undefined {
const normalized = raw?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if ((SUPPORTED_OUTPUT_FORMATS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOutputFormat;
}
throw new ToolInputError("outputFormat must be one of png, jpeg, or webp");
}
function normalizeOpenAIBackground(
raw: string | undefined,
): ImageGenerationOpenAIBackground | undefined {
const normalized = raw?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if ((SUPPORTED_OPENAI_BACKGROUNDS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOpenAIBackground;
}
throw new ToolInputError("openai.background must be one of transparent, opaque, or auto");
}
function normalizeOpenAIModeration(
raw: string | undefined,
): ImageGenerationOpenAIModeration | undefined {
const normalized = raw?.trim().toLowerCase();
if (!normalized) {
return undefined;
}
if ((SUPPORTED_OPENAI_MODERATIONS as readonly string[]).includes(normalized)) {
return normalized as ImageGenerationOpenAIModeration;
}
throw new ToolInputError("openai.moderation must be one of low or auto");
}
function readRecordParam(params: Record<string, unknown>, key: string): Record<string, unknown> {
const raw = params[key];
return raw && typeof raw === "object" && !Array.isArray(raw)
? (raw as Record<string, unknown>)
: {};
}
function normalizeOpenAIOptions(args: Record<string, unknown>): ImageGenerationOpenAIOptions {
const raw = readRecordParam(args, "openai");
const background = normalizeOpenAIBackground(readStringParam(raw, "background"));
const moderation = normalizeOpenAIModeration(readStringParam(raw, "moderation"));
const outputCompression = readNumberParam(raw, "outputCompression", { integer: true });
const user = readStringParam(raw, "user");
if (outputCompression !== undefined && (outputCompression < 0 || outputCompression > 100)) {
throw new ToolInputError("openai.outputCompression must be between 0 and 100");
}
return {
...(background ? { background } : {}),
...(moderation ? { moderation } : {}),
...(outputCompression !== undefined ? { outputCompression } : {}),
...(user ? { user } : {}),
};
}
function normalizeProviderOptions(
args: Record<string, unknown>,
): ImageGenerationProviderOptions | undefined {
const openai = normalizeOpenAIOptions(args);
return Object.keys(openai).length > 0 ? { openai } : undefined;
}
function normalizeReferenceImages(args: Record<string, unknown>): string[] {
return normalizeMediaReferenceInputs({
args,
@@ -498,6 +616,9 @@ export function createImageGenerateTool(options?: {
const aspectRatio = normalizeAspectRatio(readStringParam(params, "aspectRatio"));
const explicitResolution = normalizeResolution(readStringParam(params, "resolution"));
const timeoutMs = readGenerationTimeoutMs(params);
const quality = normalizeQuality(readStringParam(params, "quality"));
const outputFormat = normalizeOutputFormat(readStringParam(params, "outputFormat"));
const providerOptions = normalizeProviderOptions(params);
const selectedProvider = resolveSelectedImageGenerationProvider({
config: effectiveCfg,
imageGenerationModelConfig,
@@ -541,9 +662,12 @@ export function createImageGenerateTool(options?: {
size,
aspectRatio,
resolution,
quality,
outputFormat,
count,
inputImages,
timeoutMs,
providerOptions,
});
const ignoredOverrides = result.ignoredOverrides ?? [];
const displayProvider = sanitizeInlineDirectiveText(result.provider);
@@ -625,6 +749,8 @@ export function createImageGenerateTool(options?: {
...(normalizedAspectRatio || aspectRatio
? { aspectRatio: normalizedAspectRatio ?? aspectRatio }
: {}),
...(quality ? { quality } : {}),
...(outputFormat ? { outputFormat } : {}),
...(filename ? { filename } : {}),
...(timeoutMs !== undefined ? { timeoutMs } : {}),
attempts: result.attempts,

View File

@@ -8,7 +8,9 @@ import {
import type {
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationOutputFormat,
ImageGenerationProvider,
ImageGenerationQuality,
ImageGenerationResolution,
ImageGenerationSourceImage,
} from "./types.js";
@@ -17,6 +19,8 @@ export type ResolvedImageGenerationOverrides = {
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
ignoredOverrides: ImageGenerationIgnoredOverride[];
normalization?: ImageGenerationNormalization;
};
@@ -36,6 +40,8 @@ export function resolveImageGenerationOverrides(params: {
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
inputImages?: ImageGenerationSourceImage[];
}): ResolvedImageGenerationOverrides {
const hasInputImages = (params.inputImages?.length ?? 0) > 0;
@@ -48,6 +54,8 @@ export function resolveImageGenerationOverrides(params: {
let size = params.size;
let aspectRatio = params.aspectRatio;
let resolution = params.resolution;
let quality = params.quality;
let outputFormat = params.outputFormat;
if (size && (geometry?.sizes?.length ?? 0) > 0 && modeCaps.supportsSize) {
const normalizedSize = resolveClosestSize({
@@ -155,6 +163,18 @@ export function resolveImageGenerationOverrides(params: {
resolution = undefined;
}
const supportedQualities = params.provider.capabilities.output?.qualities;
if (quality && !(supportedQualities ?? []).includes(quality)) {
ignoredOverrides.push({ key: "quality", value: quality });
quality = undefined;
}
const supportedFormats = params.provider.capabilities.output?.formats;
if (outputFormat && !(supportedFormats ?? []).includes(outputFormat)) {
ignoredOverrides.push({ key: "outputFormat", value: outputFormat });
outputFormat = undefined;
}
if (
!normalization.aspectRatio &&
aspectRatio &&
@@ -198,6 +218,8 @@ export function resolveImageGenerationOverrides(params: {
size,
aspectRatio,
resolution,
quality,
outputFormat,
ignoredOverrides,
normalization: finalizeImageNormalization(normalization),
};

View File

@@ -5,7 +5,10 @@ import type {
GeneratedImageAsset,
ImageGenerationIgnoredOverride,
ImageGenerationNormalization,
ImageGenerationOutputFormat,
ImageGenerationProvider,
ImageGenerationProviderOptions,
ImageGenerationQuality,
ImageGenerationResolution,
ImageGenerationSourceImage,
} from "./types.js";
@@ -20,9 +23,12 @@ export type GenerateImageParams = {
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
inputImages?: ImageGenerationSourceImage[];
/** Optional per-request provider timeout in milliseconds. */
timeoutMs?: number;
providerOptions?: ImageGenerationProviderOptions;
};
export type GenerateImageRuntimeResult = {

View File

@@ -221,6 +221,128 @@ describe("image-generation runtime", () => {
]);
});
it("filters image output hints by provider capabilities", async () => {
let seenRequest:
| {
quality?: string;
outputFormat?: string;
providerOptions?: unknown;
}
| undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("openai/gpt-image-2");
mocks.getImageGenerationProvider.mockReturnValue({
id: "openai",
capabilities: {
generate: {
supportsSize: true,
},
edit: {
enabled: true,
supportsSize: true,
},
output: {
qualities: ["low", "medium", "high", "auto"],
formats: ["png", "jpeg", "webp"],
},
},
async generateImage(req) {
seenRequest = {
quality: req.quality,
outputFormat: req.outputFormat,
providerOptions: req.providerOptions,
};
return {
images: [{ buffer: Buffer.from("jpeg-bytes"), mimeType: "image/jpeg" }],
};
},
});
const result = await generateImage({
cfg: {
agents: {
defaults: {
imageGenerationModel: { primary: "openai/gpt-image-2" },
},
},
} as OpenClawConfig,
prompt: "draw a cheap preview",
quality: "low",
outputFormat: "jpeg",
providerOptions: {
openai: {
background: "opaque",
moderation: "low",
outputCompression: 60,
user: "end-user-42",
},
},
});
expect(seenRequest).toEqual({
quality: "low",
outputFormat: "jpeg",
providerOptions: {
openai: {
background: "opaque",
moderation: "low",
outputCompression: 60,
user: "end-user-42",
},
},
});
expect(result.ignoredOverrides).toEqual([]);
});
it("drops unsupported image output hints and reports them", async () => {
let seenRequest:
| {
quality?: string;
outputFormat?: string;
}
| undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("vydra/grok-imagine");
mocks.getImageGenerationProvider.mockReturnValue({
id: "vydra",
capabilities: {
generate: {},
edit: {
enabled: false,
},
},
async generateImage(req) {
seenRequest = {
quality: req.quality,
outputFormat: req.outputFormat,
};
return {
images: [{ buffer: Buffer.from("png-bytes"), mimeType: "image/png" }],
};
},
});
const result = await generateImage({
cfg: {
agents: {
defaults: {
imageGenerationModel: { primary: "vydra/grok-imagine" },
},
},
} as OpenClawConfig,
prompt: "draw a cat",
quality: "low",
outputFormat: "jpeg",
});
expect(seenRequest).toEqual({
quality: undefined,
outputFormat: undefined,
});
expect(result.ignoredOverrides).toEqual([
{ key: "quality", value: "low" },
{ key: "outputFormat", value: "jpeg" },
]);
});
it("maps requested size to the closest supported fallback geometry", async () => {
let seenRequest:
| {

View File

@@ -71,6 +71,8 @@ export async function generateImage(
size: params.size,
aspectRatio: params.aspectRatio,
resolution: params.resolution,
quality: params.quality,
outputFormat: params.outputFormat,
inputImages: params.inputImages,
});
const result: ImageGenerationResult = await provider.generateImage({
@@ -84,8 +86,11 @@ export async function generateImage(
size: sanitized.size,
aspectRatio: sanitized.aspectRatio,
resolution: sanitized.resolution,
quality: sanitized.quality,
outputFormat: sanitized.outputFormat,
inputImages: params.inputImages,
...(params.timeoutMs !== undefined ? { timeoutMs: params.timeoutMs } : {}),
providerOptions: params.providerOptions,
});
if (!Array.isArray(result.images) || result.images.length === 0) {
throw new Error("Image generation provider returned no images.");

View File

@@ -12,7 +12,31 @@ export type GeneratedImageAsset = {
export type ImageGenerationResolution = "1K" | "2K" | "4K";
export type ImageGenerationIgnoredOverrideKey = "size" | "aspectRatio" | "resolution";
export type ImageGenerationQuality = "low" | "medium" | "high" | "auto";
export type ImageGenerationOutputFormat = "png" | "jpeg" | "webp";
export type ImageGenerationOpenAIBackground = "transparent" | "opaque" | "auto";
export type ImageGenerationOpenAIModeration = "low" | "auto";
export type ImageGenerationOpenAIOptions = {
background?: ImageGenerationOpenAIBackground;
moderation?: ImageGenerationOpenAIModeration;
outputCompression?: number;
user?: string;
};
export type ImageGenerationProviderOptions = {
openai?: ImageGenerationOpenAIOptions;
};
export type ImageGenerationIgnoredOverrideKey =
| "size"
| "aspectRatio"
| "resolution"
| "quality"
| "outputFormat";
export type ImageGenerationIgnoredOverride = {
key: ImageGenerationIgnoredOverrideKey;
@@ -43,7 +67,10 @@ export type ImageGenerationRequest = {
size?: string;
aspectRatio?: string;
resolution?: ImageGenerationResolution;
quality?: ImageGenerationQuality;
outputFormat?: ImageGenerationOutputFormat;
inputImages?: ImageGenerationSourceImage[];
providerOptions?: ImageGenerationProviderOptions;
};
export type ImageGenerationResult = {
@@ -70,6 +97,11 @@ export type ImageGenerationGeometryCapabilities = {
resolutions?: ImageGenerationResolution[];
};
export type ImageGenerationOutputCapabilities = {
qualities?: ImageGenerationQuality[];
formats?: ImageGenerationOutputFormat[];
};
export type ImageGenerationNormalization = {
size?: MediaNormalizationEntry<string>;
aspectRatio?: MediaNormalizationEntry<string>;
@@ -80,6 +112,7 @@ export type ImageGenerationProviderCapabilities = {
generate: ImageGenerationModeCapabilities;
edit: ImageGenerationEditCapabilities;
geometry?: ImageGenerationGeometryCapabilities;
output?: ImageGenerationOutputCapabilities;
};
export type ImageGenerationProvider = {

View File

@@ -16,6 +16,7 @@ const SAFE_BIN_DOC_DEFAULTS_START = '[//]: # "SAFE_BIN_DEFAULTS:START"';
const SAFE_BIN_DOC_DEFAULTS_END = '[//]: # "SAFE_BIN_DEFAULTS:END"';
const SAFE_BIN_DOC_DENIED_FLAGS_START = '[//]: # "SAFE_BIN_DENIED_FLAGS:START"';
const SAFE_BIN_DOC_DENIED_FLAGS_END = '[//]: # "SAFE_BIN_DENIED_FLAGS:END"';
const SAFE_BIN_DOC_PATH = "docs/tools/exec-approvals-advanced.md";
function normalizeGeneratedDocBlock(block: string): string {
const lines = block.split("\n");
@@ -187,7 +188,7 @@ describe("exec safe bin policy denied-flag matrix", () => {
describe("exec safe bin policy docs parity", () => {
it("keeps default safe-bin docs in sync with policy defaults", () => {
const docsPath = path.resolve(process.cwd(), "docs/tools/exec-approvals.md");
const docsPath = path.resolve(process.cwd(), SAFE_BIN_DOC_PATH);
const docs = fs.readFileSync(docsPath, "utf8").replaceAll("\r\n", "\n");
const start = docs.indexOf(SAFE_BIN_DOC_DEFAULTS_START);
const end = docs.indexOf(SAFE_BIN_DOC_DEFAULTS_END);
@@ -199,7 +200,7 @@ describe("exec safe bin policy docs parity", () => {
});
it("keeps denied-flag docs in sync with policy fixtures", () => {
const docsPath = path.resolve(process.cwd(), "docs/tools/exec-approvals.md");
const docsPath = path.resolve(process.cwd(), SAFE_BIN_DOC_PATH);
const docs = fs.readFileSync(docsPath, "utf8").replaceAll("\r\n", "\n");
const start = docs.indexOf(SAFE_BIN_DOC_DENIED_FLAGS_START);
const end = docs.indexOf(SAFE_BIN_DOC_DENIED_FLAGS_END);

View File

@@ -2,8 +2,14 @@
export type {
GeneratedImageAsset,
ImageGenerationOpenAIBackground,
ImageGenerationOpenAIModeration,
ImageGenerationOpenAIOptions,
ImageGenerationOutputFormat,
ImageGenerationProvider,
ImageGenerationProviderConfiguredContext,
ImageGenerationProviderOptions,
ImageGenerationQuality,
ImageGenerationResolution,
ImageGenerationRequest,
ImageGenerationResult,