feat(openai): add codex oauth image generation

This commit is contained in:
Peter Steinberger
2026-04-23 21:33:43 +01:00
parent 7ce36b4d12
commit c84a2f5244
8 changed files with 435 additions and 23 deletions

View File

@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
- Agents/subagents: add optional forked context for native `sessions_spawn` runs so agents can let a child inherit the requester transcript when needed, while keeping clean isolated sessions as the default; includes prompt guidance, context-engine hook metadata, docs, and QA coverage.
- Providers/OpenAI: add forward-compatible `gpt-5.5` and `gpt-5.5-pro` support for OpenAI API keys, OpenAI Codex OAuth, and the Codex CLI default model.
- Providers/OpenAI Codex: add image generation and reference-image editing through Codex OAuth, so `openai-codex/gpt-image-2` works without an `OPENAI_API_KEY`. Fixes #70703.
### Fixes

View File

@@ -210,12 +210,23 @@ See [Image Generation](/tools/image-generation) for shared tool parameters, prov
editing. `gpt-image-1` remains usable as an explicit model override, but new
OpenAI image workflows should use `openai/gpt-image-2`.
The `openai-codex` provider also exposes `gpt-image-2` for image generation and
reference-image editing through OpenAI Codex OAuth. Use
`openai-codex/gpt-image-2` when the agent is signed in with Codex OAuth but does
not have an `OPENAI_API_KEY`.
Generate:
```
/tool image_generate model=openai/gpt-image-2 prompt="A polished launch poster for OpenClaw on macOS" size=3840x2160 count=1
```
Generate with Codex OAuth:
```
/tool image_generate model=openai-codex/gpt-image-2 prompt="A polished launch poster for OpenClaw on macOS" size=3840x2160 count=1
```
Edit:
```

View File

@@ -36,15 +36,16 @@ The agent calls `image_generate` automatically. No tool allow-listing needed —
## Supported providers
| Provider | Default model | Edit support | API key |
| -------- | -------------------------------- | ---------------------------------- | ----------------------------------------------------- |
| OpenAI | `gpt-image-2` | Yes (up to 5 images) | `OPENAI_API_KEY` |
| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) |
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
| Vydra | `grok-imagine` | No | `VYDRA_API_KEY` |
| xAI | `grok-imagine-image` | Yes (up to 5 images) | `XAI_API_KEY` |
| Provider | Default model | Edit support | API key |
| ------------ | -------------------------------- | ---------------------------------- | ----------------------------------------------------- |
| OpenAI | `gpt-image-2` | Yes (up to 4 images) | `OPENAI_API_KEY` |
| OpenAI Codex | `gpt-image-2` | Yes (up to 4 images) | OpenAI Codex OAuth |
| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) |
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
| Vydra | `grok-imagine` | No | `VYDRA_API_KEY` |
| xAI | `grok-imagine-image` | Yes (up to 5 images) | `XAI_API_KEY` |
Use `action: "list"` to inspect available providers and models at runtime:

View File

@@ -1,5 +1,8 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildOpenAIImageGenerationProvider } from "./image-generation-provider.js";
import {
buildOpenAICodexImageGenerationProvider,
buildOpenAIImageGenerationProvider,
} from "./image-generation-provider.js";
const {
resolveApiKeyForProviderMock,
@@ -47,6 +50,32 @@ function mockGeneratedPngResponse() {
});
}
function mockCodexImageStream(params: { imageData?: string; revisedPrompt?: string } = {}) {
const image = Buffer.from(params.imageData ?? "codex-png-bytes").toString("base64");
const events = [
{
type: "response.output_item.done",
item: {
type: "image_generation_call",
result: image,
...(params.revisedPrompt ? { revised_prompt: params.revisedPrompt } : {}),
},
},
{
type: "response.completed",
response: {
usage: { input_tokens: 10, output_tokens: 20, total_tokens: 30 },
tool_usage: { image_gen: { total_tokens: 30 } },
},
},
];
const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("");
postJsonRequestMock.mockImplementation(async () => ({
response: new Response(body),
release: vi.fn(async () => {}),
}));
}
describe("openai image generation provider", () => {
afterEach(() => {
resolveApiKeyForProviderMock.mockClear();
@@ -252,6 +281,132 @@ describe("openai image generation provider", () => {
expect(result.images).toHaveLength(1);
});
it("registers Codex OAuth image generation through Responses streaming", async () => {
mockCodexImageStream({ imageData: "codex-image", revisedPrompt: "revised codex prompt" });
const provider = buildOpenAICodexImageGenerationProvider();
const authStore = { version: 1, profiles: {} };
const result = await provider.generateImage({
provider: "openai-codex",
model: "gpt-image-2",
prompt: "Draw a Codex lighthouse",
cfg: {},
authStore,
count: 1,
size: "1024x1536",
});
expect(resolveApiKeyForProviderMock).toHaveBeenCalledWith(
expect.objectContaining({
provider: "openai-codex",
store: authStore,
}),
);
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
expect.objectContaining({
defaultBaseUrl: "https://chatgpt.com/backend-api/codex",
defaultHeaders: expect.objectContaining({
Authorization: "Bearer openai-key",
Accept: "text/event-stream",
}),
provider: "openai-codex",
api: "openai-codex-responses",
capability: "image",
}),
);
expect(postJsonRequestMock).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://chatgpt.com/backend-api/codex/responses",
body: expect.objectContaining({
model: "gpt-5.4",
instructions: "You are an image generation assistant.",
stream: true,
store: false,
tools: [
{
type: "image_generation",
model: "gpt-image-2",
size: "1024x1536",
},
],
tool_choice: { type: "image_generation" },
}),
}),
);
expect(postMultipartRequestMock).not.toHaveBeenCalled();
expect(result.images).toEqual([
{
buffer: Buffer.from("codex-image"),
mimeType: "image/png",
fileName: "image-1.png",
revisedPrompt: "revised codex prompt",
},
]);
expect(result.metadata).toEqual({
responses: [
{
usage: { input_tokens: 10, output_tokens: 20, total_tokens: 30 },
toolUsage: { image_gen: { total_tokens: 30 } },
},
],
});
});
it("sends Codex reference images as Responses input images", async () => {
mockCodexImageStream();
const provider = buildOpenAICodexImageGenerationProvider();
await provider.generateImage({
provider: "openai-codex",
model: "gpt-image-2",
prompt: "Use the reference image",
cfg: {},
inputImages: [
{ buffer: Buffer.from("png-bytes"), mimeType: "image/png", fileName: "ref.png" },
],
});
const body = postJsonRequestMock.mock.calls[0]?.[0].body as {
input: Array<{ content: Array<Record<string, string>> }>;
};
expect(body.input[0]?.content).toEqual([
{ type: "input_text", text: "Use the reference image" },
{
type: "input_image",
image_url: `data:image/png;base64,${Buffer.from("png-bytes").toString("base64")}`,
detail: "auto",
},
]);
expect(postJsonRequestMock).not.toHaveBeenCalledWith(
expect.objectContaining({ url: expect.stringContaining("/images/edits") }),
);
expect(postMultipartRequestMock).not.toHaveBeenCalled();
});
it("satisfies Codex count by issuing one Responses request per image", async () => {
mockCodexImageStream({ imageData: "codex-image" });
const provider = buildOpenAICodexImageGenerationProvider();
const result = await provider.generateImage({
provider: "openai-codex",
model: "gpt-image-2",
prompt: "Draw two Codex icons",
cfg: {},
count: 2,
});
expect(postJsonRequestMock).toHaveBeenCalledTimes(2);
const firstBody = postJsonRequestMock.mock.calls[0]?.[0].body as {
tools: Array<Record<string, unknown>>;
};
expect(firstBody.tools[0]).toEqual({
type: "image_generation",
model: "gpt-image-2",
size: "1024x1024",
});
expect(result.images.map((image) => image.fileName)).toEqual(["image-1.png", "image-2.png"]);
});
it("forwards SSRF guard fields to multipart edit requests", async () => {
mockGeneratedPngResponse();

View File

@@ -1,6 +1,10 @@
import path from "node:path";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
import type {
ImageGenerationProvider,
ImageGenerationResult,
ImageGenerationSourceImage,
} from "openclaw/plugin-sdk/image-generation";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
@@ -13,6 +17,8 @@ import { OPENAI_DEFAULT_IMAGE_MODEL as DEFAULT_OPENAI_IMAGE_MODEL } from "./defa
import { resolveConfiguredOpenAIBaseUrl } from "./shared.js";
const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1";
const DEFAULT_OPENAI_CODEX_IMAGE_BASE_URL = "https://chatgpt.com/backend-api/codex";
const OPENAI_CODEX_IMAGE_INSTRUCTIONS = "You are an image generation assistant.";
const DEFAULT_OUTPUT_MIME = "image/png";
const DEFAULT_SIZE = "1024x1024";
const OPENAI_SUPPORTED_SIZES = [
@@ -85,6 +91,24 @@ type OpenAIImageApiResponse = {
}>;
};
type OpenAICodexImageGenerationEvent = {
type?: string;
item?: {
type?: string;
result?: string;
revised_prompt?: string;
};
response?: {
usage?: unknown;
tool_usage?: unknown;
};
error?: {
code?: string;
message?: string;
};
message?: string;
};
function inferImageUploadFileName(params: {
fileName?: string;
mimeType?: string;
@@ -99,17 +123,115 @@ function inferImageUploadFileName(params: {
return `image-${params.index + 1}.${ext}`;
}
export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
function toOpenAIDataUrl(image: ImageGenerationSourceImage): string {
const mimeType = image.mimeType?.trim() || DEFAULT_OUTPUT_MIME;
return `data:${mimeType};base64,${Buffer.from(image.buffer).toString("base64")}`;
}
async function readResponseBodyText(response: Response): Promise<string> {
if (!response.body) {
return await response.text();
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let text = "";
try {
while (true) {
const { value, done } = await reader.read();
if (value) {
text += decoder.decode(value, { stream: !done });
}
if (done) {
text += decoder.decode();
return text;
}
}
} finally {
reader.releaseLock();
}
}
function parseCodexImageGenerationEvents(body: string): OpenAICodexImageGenerationEvent[] {
const events: OpenAICodexImageGenerationEvent[] = [];
for (const line of body.split(/\r?\n/)) {
if (!line.startsWith("data: ")) {
continue;
}
const data = line.slice(6).trim();
if (!data || data === "[DONE]") {
continue;
}
try {
events.push(JSON.parse(data) as OpenAICodexImageGenerationEvent);
} catch {
// Ignore non-JSON SSE payloads from intermediaries; failed HTTP statuses
// are handled before this parser runs.
}
}
return events;
}
function extractCodexImageGenerationResult(params: {
body: string;
model: string;
}): ImageGenerationResult {
const events = parseCodexImageGenerationEvents(params.body);
const failure = events.find(
(event) => event.type === "response.failed" || event.type === "error",
);
if (failure) {
const message =
failure.error?.message ??
failure.message ??
(failure.error?.code ? `OpenAI Codex image generation failed (${failure.error.code})` : "");
throw new Error(message || "OpenAI Codex image generation failed");
}
const completedResponse = events.find((event) => event.type === "response.completed");
const images = events
.filter(
(event) =>
event.type === "response.output_item.done" &&
event.item?.type === "image_generation_call" &&
typeof event.item.result === "string" &&
event.item.result.length > 0,
)
.map((event, index) =>
Object.assign(
{
buffer: Buffer.from(event.item?.result ?? "", "base64"),
mimeType: DEFAULT_OUTPUT_MIME,
fileName: `image-${index + 1}.png`,
},
event.item?.revised_prompt ? { revisedPrompt: event.item.revised_prompt } : {},
),
);
return {
id: "openai",
label: "OpenAI",
images,
model: params.model,
...(completedResponse?.response
? {
metadata: {
usage: completedResponse.response.usage,
toolUsage: completedResponse.response.tool_usage,
},
}
: {}),
};
}
function createOpenAIImageGenerationProviderBase(params: {
id: "openai" | "openai-codex";
label: string;
isConfigured: ImageGenerationProvider["isConfigured"];
generateImage: ImageGenerationProvider["generateImage"];
}): ImageGenerationProvider {
return {
id: params.id,
label: params.label,
defaultModel: DEFAULT_OPENAI_IMAGE_MODEL,
models: [DEFAULT_OPENAI_IMAGE_MODEL],
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "openai",
agentDir,
}),
isConfigured: params.isConfigured,
capabilities: {
generate: {
maxCount: 4,
@@ -129,6 +251,19 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
sizes: [...OPENAI_SUPPORTED_SIZES],
},
},
generateImage: params.generateImage,
};
}
export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
return createOpenAIImageGenerationProviderBase({
id: "openai",
label: "OpenAI",
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "openai",
agentDir,
}),
async generateImage(req) {
const inputImages = req.inputImages ?? [];
const isEdit = inputImages.length > 0;
@@ -245,5 +380,110 @@ export function buildOpenAIImageGenerationProvider(): ImageGenerationProvider {
await release();
}
},
};
});
}
export function buildOpenAICodexImageGenerationProvider(): ImageGenerationProvider {
return createOpenAIImageGenerationProviderBase({
id: "openai-codex",
label: "OpenAI Codex",
isConfigured: ({ agentDir }) =>
isProviderApiKeyConfigured({
provider: "openai-codex",
agentDir,
}),
async generateImage(req) {
const inputImages = req.inputImages ?? [];
const auth = await resolveApiKeyForProvider({
provider: "openai-codex",
cfg: req.cfg,
agentDir: req.agentDir,
store: req.authStore,
});
if (!auth.apiKey) {
throw new Error("OpenAI Codex OAuth missing");
}
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
resolveProviderHttpRequestConfig({
defaultBaseUrl: DEFAULT_OPENAI_CODEX_IMAGE_BASE_URL,
defaultHeaders: {
Authorization: `Bearer ${auth.apiKey}`,
Accept: "text/event-stream",
},
provider: "openai-codex",
api: "openai-codex-responses",
capability: "image",
transport: "http",
});
const model = req.model || DEFAULT_OPENAI_IMAGE_MODEL;
const count = req.count ?? 1;
const size = req.size ?? DEFAULT_SIZE;
headers.set("Content-Type", "application/json");
const content: Array<Record<string, unknown>> = [
{ type: "input_text", text: req.prompt },
...inputImages.map((image) => ({
type: "input_image",
image_url: toOpenAIDataUrl(image),
detail: "auto",
})),
];
const results: ImageGenerationResult[] = [];
for (let index = 0; index < count; index += 1) {
const requestResult = await postJsonRequest({
url: `${baseUrl}/responses`,
headers,
body: {
model: "gpt-5.4",
input: [
{
role: "user",
content,
},
],
instructions: OPENAI_CODEX_IMAGE_INSTRUCTIONS,
tools: [
{
type: "image_generation",
model,
size,
},
],
tool_choice: { type: "image_generation" },
stream: true,
store: false,
},
timeoutMs: req.timeoutMs,
fetchFn: fetch,
allowPrivateNetwork,
dispatcherPolicy,
});
const { response, release } = requestResult;
try {
await assertOkOrThrowHttpError(response, "OpenAI Codex image generation failed");
results.push(
extractCodexImageGenerationResult({
body: await readResponseBodyText(response),
model,
}),
);
} finally {
await release();
}
}
const images = results.flatMap((result) => result.images);
return {
images: images.map((image, index) =>
Object.assign({}, image, {
fileName: `image-${index + 1}.png`,
}),
),
model,
metadata: {
responses: results.map((result) => result.metadata).filter(Boolean),
},
};
},
});
}

View File

@@ -2,7 +2,10 @@ import { resolvePluginConfigObject } from "openclaw/plugin-sdk/config-runtime";
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import { buildProviderToolCompatFamilyHooks } from "openclaw/plugin-sdk/provider-tools";
import { buildOpenAICodexCliBackend } from "./cli-backend.js";
import { buildOpenAIImageGenerationProvider } from "./image-generation-provider.js";
import {
buildOpenAICodexImageGenerationProvider,
buildOpenAIImageGenerationProvider,
} from "./image-generation-provider.js";
import {
openaiCodexMediaUnderstandingProvider,
openaiMediaUnderstandingProvider,
@@ -49,6 +52,7 @@ export default definePluginEntry({
api.registerProvider(buildProviderWithPromptContribution(buildOpenAICodexProviderPlugin()));
api.registerMemoryEmbeddingProvider(openAiMemoryEmbeddingProviderAdapter);
api.registerImageGenerationProvider(buildOpenAIImageGenerationProvider());
api.registerImageGenerationProvider(buildOpenAICodexImageGenerationProvider());
api.registerRealtimeTranscriptionProvider(buildOpenAIRealtimeTranscriptionProvider());
api.registerRealtimeVoiceProvider(buildOpenAIRealtimeVoiceProvider());
api.registerSpeechProvider(buildOpenAISpeechProvider());

View File

@@ -54,7 +54,7 @@
"realtimeVoiceProviders": ["openai"],
"memoryEmbeddingProviders": ["openai"],
"mediaUnderstandingProviders": ["openai", "openai-codex"],
"imageGenerationProviders": ["openai"],
"imageGenerationProviders": ["openai", "openai-codex"],
"videoGenerationProviders": ["openai"]
},
"mediaUnderstandingProviderMetadata": {

View File

@@ -104,7 +104,7 @@ export const pluginRegistrationContractCases = {
realtimeTranscriptionProviderIds: ["openai"],
realtimeVoiceProviderIds: ["openai"],
mediaUnderstandingProviderIds: ["openai", "openai-codex"],
imageGenerationProviderIds: ["openai"],
imageGenerationProviderIds: ["openai", "openai-codex"],
requireSpeechVoices: true,
requireDescribeImages: true,
requireGenerateImage: true,