fix: support transparent OpenAI image generation

This commit is contained in:
Peter Steinberger
2026-04-25 19:28:25 +01:00
parent 0bf4876add
commit de0097a23c
9 changed files with 362 additions and 26 deletions

View File

@@ -1,8 +1,45 @@
import { describe, expect, it } from "vitest";
import type { ResponseObject } from "./openai-ws-connection.js";
import { buildAssistantMessageFromResponse } from "./openai-ws-message-conversion.js";
import { buildAssistantMessageFromResponse, convertTools } from "./openai-ws-message-conversion.js";
describe("openai ws message conversion", () => {
it("preserves image_generate transparent-background guidance in OpenAI tool payloads", () => {
const [tool] = convertTools([
{
name: "image_generate",
description:
'Generate images. For transparent OpenAI backgrounds, use outputFormat="png" or "webp" and openai.background="transparent"; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for that mode.',
parameters: {
type: "object",
properties: {
model: {
type: "string",
description:
"Optional provider/model override; use openai/gpt-image-1.5 for transparent OpenAI backgrounds.",
},
outputFormat: { type: "string", enum: ["png", "jpeg", "webp"] },
openai: {
type: "object",
properties: {
background: {
type: "string",
enum: ["transparent", "opaque", "auto"],
description:
"For transparent output use outputFormat png or webp; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for this mode.",
},
},
},
},
},
},
]);
expect(tool?.description).toContain('openai.background="transparent"');
expect(tool?.description).toContain("gpt-image-1.5");
expect(JSON.stringify(tool?.parameters)).toContain("openai/gpt-image-1.5");
expect(JSON.stringify(tool?.parameters)).toContain("transparent");
});
it("preserves cached token usage from responses usage details", () => {
const response: ResponseObject = {
id: "resp_123",

View File

@@ -218,6 +218,18 @@ describe("createImageGenerateTool", () => {
expect(createImageGenerateTool({ config: {} })).toBeNull();
});
it("tells agents how to request transparent OpenAI backgrounds", () => {
vi.stubEnv("OPENAI_API_KEY", "openai-key");
stubImageGenerationProviders();
const tool = requireImageGenerateTool(createImageGenerateTool({ config: {} }));
expect(tool.description).toContain('outputFormat="png" or "webp"');
expect(tool.description).toContain('openai.background="transparent"');
expect(tool.description).toContain("gpt-image-1.5");
expect(JSON.stringify(tool.parameters)).toContain("openai/gpt-image-1.5");
});
it("matches image-generation providers across canonical provider aliases", () => {
vi.spyOn(imageGenerationRuntime, "listRuntimeImageGenerationProviders").mockReturnValue([
{
@@ -595,6 +607,62 @@ describe("createImageGenerateTool", () => {
});
});
it("forwards transparent OpenAI background requests with a PNG output format", async () => {
const generateImage = vi.spyOn(imageGenerationRuntime, "generateImage").mockResolvedValue({
provider: "openai",
model: "gpt-image-1.5",
attempts: [],
ignoredOverrides: [],
images: [
{
buffer: Buffer.from("png-out"),
mimeType: "image/png",
fileName: "transparent.png",
},
],
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValue({
path: "/tmp/transparent.png",
id: "transparent.png",
size: 7,
contentType: "image/png",
});
const tool = createToolWithPrimaryImageModel("openai/gpt-image-1.5");
const result = await tool.execute("call-openai-transparent", {
prompt: "A transparent badge",
outputFormat: "png",
openai: {
background: "transparent",
},
});
expect(generateImage).toHaveBeenCalledWith(
expect.objectContaining({
cfg: expect.objectContaining({
agents: expect.objectContaining({
defaults: expect.objectContaining({
imageGenerationModel: { primary: "openai/gpt-image-1.5" },
}),
}),
}),
outputFormat: "png",
providerOptions: {
openai: {
background: "transparent",
},
},
}),
);
expect(result).toMatchObject({
details: {
provider: "openai",
model: "gpt-image-1.5",
outputFormat: "png",
},
});
});
it("includes MEDIA paths in content text so follow-up replies use the real saved file", async () => {
vi.spyOn(imageGenerationRuntime, "listRuntimeImageGenerationProviders").mockReturnValue([
{

View File

@@ -96,7 +96,10 @@ const ImageGenerateToolSchema = Type.Object({
}),
),
model: Type.Optional(
Type.String({ description: "Optional provider/model override, e.g. openai/gpt-image-2." }),
Type.String({
description:
"Optional provider/model override, e.g. openai/gpt-image-2; use openai/gpt-image-1.5 for transparent OpenAI backgrounds.",
}),
),
filename: Type.Optional(
Type.String({
@@ -131,7 +134,8 @@ const ImageGenerateToolSchema = Type.Object({
openai: Type.Optional(
Type.Object({
background: optionalStringEnum(SUPPORTED_OPENAI_BACKGROUNDS, {
description: "OpenAI-only background hint: transparent, opaque, or auto.",
description:
"OpenAI-only background hint: transparent, opaque, or auto. For transparent output use outputFormat png or webp; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for this mode.",
}),
moderation: optionalStringEnum(SUPPORTED_OPENAI_MODERATIONS, {
description: "OpenAI-only moderation hint: low or auto.",
@@ -570,7 +574,7 @@ export function createImageGenerateTool(options?: {
label: "Image Generation",
name: "image_generate",
description:
'Generate new images or edit reference images with the configured or inferred image-generation model. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
'Generate new images or edit reference images with the configured or inferred image-generation model. For transparent OpenAI backgrounds, use outputFormat="png" or "webp" and openai.background="transparent"; OpenClaw routes the default OpenAI image model to gpt-image-1.5 for that mode. Set agents.defaults.imageGenerationModel.primary to pick a provider/model. Providers declare their own auth/readiness; use action="list" to inspect registered providers, models, readiness, and auth hints. Generated images are delivered automatically from the tool result as MEDIA paths.',
parameters: ImageGenerateToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;