mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:10:44 +00:00
feat: add OpenRouter image generation (#67668)
Adds OpenRouter image generation support for image_generate. Fixes #55066. Thanks @notamicrodose.
This commit is contained in:
committed by
GitHub
parent
3c5ee63c66
commit
0f026addaa
@@ -11,6 +11,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin.
|
||||
- Dependencies/Pi: update bundled Pi packages to `0.70.0`, use Pi's upstream `gpt-5.5` catalog metadata for OpenAI and OpenAI Codex, and keep only local `gpt-5.5-pro` forward-compat handling.
|
||||
- Providers/OpenAI: add image generation and reference-image editing through Codex OAuth, so `openai/gpt-image-2` works without an `OPENAI_API_KEY`. Fixes #70703.
|
||||
- Providers/OpenRouter: add image generation and reference-image editing through `image_generate`, so OpenRouter image models work with `OPENROUTER_API_KEY`. Fixes #55066 via #67668. Thanks @notamicrodose.
|
||||
- Image generation: let agents request provider-supported quality and output format hints, and pass OpenAI-specific background, moderation, compression, and user hints through the `image_generate` tool. (#70503) Thanks @ottodeng.
|
||||
|
||||
### Fixes
|
||||
|
||||
@@ -868,12 +868,13 @@ If you want to rely on env keys (e.g. exported in your `~/.profile`), run local
|
||||
- `google`
|
||||
- `minimax`
|
||||
- `openai`
|
||||
- `openrouter`
|
||||
- `vydra`
|
||||
- `xai`
|
||||
- Optional narrowing:
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS="openai,google,xai"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_MODELS="openai/gpt-image-2,google/gemini-3.1-flash-image-preview,xai/grok-imagine-image"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_CASES="google:flash-generate,google:pro-edit,xai:default-generate,xai:default-edit"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_PROVIDERS="openai,google,openrouter,xai"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_MODELS="openai/gpt-image-2,google/gemini-3.1-flash-image-preview,openrouter/google/gemini-3.1-flash-image-preview,xai/grok-imagine-image"`
|
||||
- `OPENCLAW_LIVE_IMAGE_GENERATION_CASES="google:flash-generate,google:pro-edit,openrouter:generate,xai:default-generate,xai:default-edit"`
|
||||
- Optional auth behavior:
|
||||
- `OPENCLAW_LIVE_REQUIRE_PROFILE_KEYS=1` to force profile-store auth and ignore env-only overrides
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ summary: "Use OpenRouter's unified API to access many models in OpenClaw"
|
||||
read_when:
|
||||
- You want a single API key for many LLMs
|
||||
- You want to run models via OpenRouter in OpenClaw
|
||||
- You want to use OpenRouter for image generation
|
||||
title: "OpenRouter"
|
||||
---
|
||||
|
||||
@@ -59,6 +60,25 @@ Bundled fallback examples:
|
||||
| `openrouter/openrouter/healer-alpha` | OpenRouter Healer Alpha route |
|
||||
| `openrouter/openrouter/hunter-alpha` | OpenRouter Hunter Alpha route |
|
||||
|
||||
## Image generation
|
||||
|
||||
OpenRouter can also back the `image_generate` tool. Use an OpenRouter image model under `agents.defaults.imageGenerationModel`:
|
||||
|
||||
```json5
|
||||
{
|
||||
env: { OPENROUTER_API_KEY: "sk-or-..." },
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openrouter/google/gemini-3.1-flash-image-preview",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
OpenClaw sends image requests to OpenRouter's chat completions image API with `modalities: ["image", "text"]`. Gemini image models receive supported `aspectRatio` and `resolution` hints through OpenRouter's `image_config`.
|
||||
|
||||
## Authentication and headers
|
||||
|
||||
OpenRouter uses a Bearer token with your API key under the hood.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
summary: "Generate and edit images using configured providers (OpenAI, OpenAI Codex OAuth, Google Gemini, fal, MiniMax, ComfyUI, Vydra, xAI)"
|
||||
summary: "Generate and edit images using configured providers (OpenAI, OpenAI Codex OAuth, Google Gemini, OpenRouter, fal, MiniMax, ComfyUI, Vydra, xAI)"
|
||||
read_when:
|
||||
- Generating images via the agent
|
||||
- Configuring image generation providers and models
|
||||
@@ -15,7 +15,7 @@ The tool only appears when at least one image generation provider is available.
|
||||
|
||||
## Quick start
|
||||
|
||||
1. Set an API key for at least one provider (for example `OPENAI_API_KEY` or `GEMINI_API_KEY`) or sign in with OpenAI Codex OAuth.
|
||||
1. Set an API key for at least one provider (for example `OPENAI_API_KEY`, `GEMINI_API_KEY`, or `OPENROUTER_API_KEY`) or sign in with OpenAI Codex OAuth.
|
||||
2. Optionally set your preferred model:
|
||||
|
||||
```json5
|
||||
@@ -46,15 +46,16 @@ The agent calls `image_generate` automatically. No tool allow-listing needed —
|
||||
|
||||
## Supported providers
|
||||
|
||||
| Provider | Default model | Edit support | Auth |
|
||||
| -------- | -------------------------------- | ---------------------------------- | ----------------------------------------------------- |
|
||||
| OpenAI | `gpt-image-2` | Yes (up to 4 images) | `OPENAI_API_KEY` or OpenAI Codex OAuth |
|
||||
| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
|
||||
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
|
||||
| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) |
|
||||
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
|
||||
| Vydra | `grok-imagine` | No | `VYDRA_API_KEY` |
|
||||
| xAI | `grok-imagine-image` | Yes (up to 5 images) | `XAI_API_KEY` |
|
||||
| Provider | Default model | Edit support | Auth |
|
||||
| ---------- | --------------------------------------- | ---------------------------------- | ----------------------------------------------------- |
|
||||
| OpenAI | `gpt-image-2` | Yes (up to 4 images) | `OPENAI_API_KEY` or OpenAI Codex OAuth |
|
||||
| OpenRouter | `google/gemini-3.1-flash-image-preview` | Yes (up to 5 input images) | `OPENROUTER_API_KEY` |
|
||||
| Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` |
|
||||
| fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` |
|
||||
| MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) |
|
||||
| ComfyUI | `workflow` | Yes (1 image, workflow-configured) | `COMFY_API_KEY` or `COMFY_CLOUD_API_KEY` for cloud |
|
||||
| Vydra | `grok-imagine` | No | `VYDRA_API_KEY` |
|
||||
| xAI | `grok-imagine-image` | Yes (up to 5 images) | `XAI_API_KEY` |
|
||||
|
||||
Use `action: "list"` to inspect available providers and models at runtime:
|
||||
|
||||
@@ -134,7 +135,11 @@ Tool results report the applied settings. When OpenClaw remaps geometry during p
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openai/gpt-image-2",
|
||||
fallbacks: ["google/gemini-3.1-flash-image-preview", "fal/fal-ai/flux/dev"],
|
||||
fallbacks: [
|
||||
"openrouter/google/gemini-3.1-flash-image-preview",
|
||||
"google/gemini-3.1-flash-image-preview",
|
||||
"fal/fal-ai/flux/dev",
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -167,13 +172,31 @@ Notes:
|
||||
|
||||
### Image editing
|
||||
|
||||
OpenAI, Google, fal, MiniMax, ComfyUI, and xAI support editing reference images. Pass a reference image path or URL:
|
||||
OpenAI, OpenRouter, Google, fal, MiniMax, ComfyUI, and xAI support editing reference images. Pass a reference image path or URL:
|
||||
|
||||
```
|
||||
"Generate a watercolor version of this photo" + image: "/path/to/photo.jpg"
|
||||
```
|
||||
|
||||
OpenAI, Google, and xAI support up to 5 reference images via the `images` parameter. fal, MiniMax, and ComfyUI support 1.
|
||||
OpenAI, OpenRouter, Google, and xAI support up to 5 reference images via the `images` parameter. fal, MiniMax, and ComfyUI support 1.
|
||||
|
||||
### OpenRouter image models
|
||||
|
||||
OpenRouter image generation uses the same `OPENROUTER_API_KEY` and routes through OpenRouter's chat completions image API. Select OpenRouter image models with the `openrouter/` prefix:
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "openrouter/google/gemini-3.1-flash-image-preview",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
OpenClaw forwards `prompt`, `count`, reference images, and Gemini-compatible `aspectRatio` / `resolution` hints to OpenRouter. Current built-in OpenRouter image model shortcuts include `google/gemini-3.1-flash-image-preview`, `google/gemini-3-pro-image-preview`, and `openai/gpt-5.4-image-2`; use `action: "list"` to see what your configured plugin exposes.
|
||||
|
||||
### OpenAI `gpt-image-2`
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
|
||||
export { buildOpenrouterProvider } from "./provider-catalog.js";
|
||||
export {
|
||||
applyOpenrouterConfig,
|
||||
|
||||
201
extensions/openrouter/image-generation-provider.test.ts
Normal file
201
extensions/openrouter/image-generation-provider.test.ts
Normal file
@@ -0,0 +1,201 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
buildOpenRouterImageGenerationProvider,
|
||||
extractOpenRouterImagesFromResponse,
|
||||
} from "./image-generation-provider.js";
|
||||
|
||||
const {
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequestMock,
|
||||
resolveApiKeyForProviderMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "openrouter-key" })),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params: Record<string, unknown>) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl ?? "https://openrouter.ai/api/v1",
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders as HeadersInit | undefined),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("openrouter image generation provider", () => {
|
||||
afterEach(() => {
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("builds provider metadata and capabilities", () => {
|
||||
const provider = buildOpenRouterImageGenerationProvider();
|
||||
expect(provider.id).toBe("openrouter");
|
||||
expect(provider.label).toBe("OpenRouter");
|
||||
expect(provider.defaultModel).toBe("google/gemini-3.1-flash-image-preview");
|
||||
expect(provider.models).toContain("google/gemini-3-pro-image-preview");
|
||||
expect(provider.capabilities.generate.maxCount).toBe(4);
|
||||
expect(provider.capabilities.generate.supportsAspectRatio).toBe(true);
|
||||
expect(provider.capabilities.edit.enabled).toBe(true);
|
||||
expect(provider.capabilities.edit.maxInputImages).toBe(5);
|
||||
});
|
||||
|
||||
it("sends chat completion image requests with Gemini image config and count", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
images: [
|
||||
{
|
||||
imageUrl: {
|
||||
url: `data:image/png;base64,${Buffer.from("png-one").toString("base64")}`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
release,
|
||||
});
|
||||
|
||||
const provider = buildOpenRouterImageGenerationProvider();
|
||||
const result = await provider.generateImage({
|
||||
provider: "openrouter",
|
||||
model: "google/gemini-3.1-flash-image-preview",
|
||||
prompt: "draw a sticker",
|
||||
aspectRatio: "16:9",
|
||||
resolution: "2K",
|
||||
count: 2,
|
||||
timeoutMs: 12_345,
|
||||
cfg: {
|
||||
models: {
|
||||
providers: {
|
||||
openrouter: {
|
||||
baseUrl: "https://custom.openrouter.test/api/v1",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(resolveApiKeyForProviderMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ provider: "openrouter" }),
|
||||
);
|
||||
expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
provider: "openrouter",
|
||||
capability: "image",
|
||||
baseUrl: "https://custom.openrouter.test/api/v1",
|
||||
}),
|
||||
);
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://custom.openrouter.test/api/v1/chat/completions",
|
||||
timeoutMs: 12_345,
|
||||
body: expect.objectContaining({
|
||||
model: "google/gemini-3.1-flash-image-preview",
|
||||
modalities: ["image", "text"],
|
||||
n: 2,
|
||||
image_config: {
|
||||
aspect_ratio: "16:9",
|
||||
image_size: "2K",
|
||||
},
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "draw a sticker",
|
||||
},
|
||||
],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result.images[0]?.buffer.toString()).toBe("png-one");
|
||||
expect(result.images[0]?.mimeType).toBe("image/png");
|
||||
expect(release).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("sends reference images as data URLs for edit-style requests", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
content: [
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:image/webp;base64,${Buffer.from("webp-one").toString("base64")}`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
|
||||
const provider = buildOpenRouterImageGenerationProvider();
|
||||
const result = await provider.generateImage({
|
||||
provider: "openrouter",
|
||||
model: "google/gemini-3.1-flash-image-preview",
|
||||
prompt: "turn this into watercolor",
|
||||
inputImages: [{ buffer: Buffer.from("source-image"), mimeType: "image/png" }],
|
||||
cfg: {} as never,
|
||||
});
|
||||
|
||||
const body = postJsonRequestMock.mock.calls[0]?.[0].body as {
|
||||
messages?: Array<{ content?: unknown }>;
|
||||
};
|
||||
expect(body.messages?.[0]?.content).toEqual([
|
||||
{ type: "text", text: "turn this into watercolor" },
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:image/png;base64,${Buffer.from("source-image").toString("base64")}`,
|
||||
},
|
||||
},
|
||||
]);
|
||||
expect(result.images[0]?.buffer.toString()).toBe("webp-one");
|
||||
expect(result.images[0]?.mimeType).toBe("image/webp");
|
||||
});
|
||||
|
||||
it("extracts image fallbacks from string content and raw b64 parts", () => {
|
||||
const png = Buffer.from("png-inline").toString("base64");
|
||||
const raw = Buffer.from("raw-inline").toString("base64");
|
||||
const images = extractOpenRouterImagesFromResponse({
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
content: `done data:image/png;base64,${png}`,
|
||||
},
|
||||
},
|
||||
{
|
||||
message: {
|
||||
content: [{ b64_json: raw }],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(images.map((image) => image.buffer.toString())).toEqual(["png-inline", "raw-inline"]);
|
||||
});
|
||||
});
|
||||
302
extensions/openrouter/image-generation-provider.ts
Normal file
302
extensions/openrouter/image-generation-provider.ts
Normal file
@@ -0,0 +1,302 @@
|
||||
import type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationRequest,
|
||||
} from "openclaw/plugin-sdk/image-generation";
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import { OPENROUTER_BASE_URL } from "./provider-catalog.js";
|
||||
|
||||
const DEFAULT_MODEL = "google/gemini-3.1-flash-image-preview";
|
||||
const DEFAULT_OUTPUT_MIME = "image/png";
|
||||
const DEFAULT_TIMEOUT_MS = 90_000;
|
||||
const MAX_IMAGE_RESULTS = 4;
|
||||
const SUPPORTED_MODELS = [
|
||||
DEFAULT_MODEL,
|
||||
"google/gemini-3-pro-image-preview",
|
||||
"openai/gpt-5.4-image-2",
|
||||
] as const;
|
||||
const SUPPORTED_ASPECT_RATIOS = [
|
||||
"1:1",
|
||||
"2:3",
|
||||
"3:2",
|
||||
"3:4",
|
||||
"4:3",
|
||||
"4:5",
|
||||
"5:4",
|
||||
"9:16",
|
||||
"16:9",
|
||||
"21:9",
|
||||
] as const;
|
||||
|
||||
type OpenRouterImageEntry = {
|
||||
image_url?: { url?: string };
|
||||
imageUrl?: { url?: string };
|
||||
};
|
||||
|
||||
type OpenRouterChatCompletionResponse = {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string | unknown[] | null;
|
||||
images?: OpenRouterImageEntry[];
|
||||
};
|
||||
}>;
|
||||
};
|
||||
|
||||
function parseDataUrl(dataUrl: string): { mimeType: string; data: string } | undefined {
|
||||
const match = dataUrl.match(/^data:([^;]+);base64,(.+)$/s);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
const [, mimeType, data] = match;
|
||||
if (!mimeType || !data) {
|
||||
return undefined;
|
||||
}
|
||||
return { mimeType, data };
|
||||
}
|
||||
|
||||
function fileExtensionForMimeType(mimeType: string): string {
|
||||
if (mimeType.includes("jpeg") || mimeType.includes("jpg")) {
|
||||
return "jpg";
|
||||
}
|
||||
if (mimeType.includes("webp")) {
|
||||
return "webp";
|
||||
}
|
||||
if (mimeType.includes("gif")) {
|
||||
return "gif";
|
||||
}
|
||||
return mimeType.split("/")[1] ?? "png";
|
||||
}
|
||||
|
||||
function toGeneratedImage(params: {
|
||||
base64: string;
|
||||
index: number;
|
||||
mimeType?: string;
|
||||
}): GeneratedImageAsset {
|
||||
const mimeType = params.mimeType ?? DEFAULT_OUTPUT_MIME;
|
||||
return {
|
||||
buffer: Buffer.from(params.base64, "base64"),
|
||||
mimeType,
|
||||
fileName: `image-${params.index + 1}.${fileExtensionForMimeType(mimeType)}`,
|
||||
};
|
||||
}
|
||||
|
||||
function pushDataUrlImage(images: GeneratedImageAsset[], dataUrl: string): void {
|
||||
const parsed = parseDataUrl(dataUrl);
|
||||
if (!parsed) {
|
||||
return;
|
||||
}
|
||||
images.push(
|
||||
toGeneratedImage({
|
||||
base64: parsed.data,
|
||||
index: images.length,
|
||||
mimeType: parsed.mimeType,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
function extractImagesFromPart(images: GeneratedImageAsset[], part: unknown): void {
|
||||
if (!part || typeof part !== "object") {
|
||||
return;
|
||||
}
|
||||
const value = part as Record<string, unknown>;
|
||||
if (value.type === "image_url") {
|
||||
const imageUrl = (value.image_url ?? value.imageUrl) as Record<string, unknown> | undefined;
|
||||
const url = typeof imageUrl?.url === "string" ? imageUrl.url : undefined;
|
||||
if (url) {
|
||||
pushDataUrlImage(images, url);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const rawBase64 = typeof value.b64_json === "string" ? value.b64_json : undefined;
|
||||
if (rawBase64) {
|
||||
images.push(toGeneratedImage({ base64: rawBase64, index: images.length }));
|
||||
return;
|
||||
}
|
||||
|
||||
const inlineData = (value.inlineData ?? value.inline_data) as Record<string, unknown> | undefined;
|
||||
const data = typeof inlineData?.data === "string" ? inlineData.data.trim() : undefined;
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
const mimeType =
|
||||
(typeof inlineData?.mimeType === "string" ? inlineData.mimeType : undefined) ??
|
||||
(typeof inlineData?.mime_type === "string" ? inlineData.mime_type : undefined) ??
|
||||
DEFAULT_OUTPUT_MIME;
|
||||
images.push(toGeneratedImage({ base64: data, index: images.length, mimeType }));
|
||||
}
|
||||
|
||||
export function extractOpenRouterImagesFromResponse(
|
||||
body: OpenRouterChatCompletionResponse,
|
||||
): GeneratedImageAsset[] {
|
||||
const images: GeneratedImageAsset[] = [];
|
||||
for (const choice of body.choices ?? []) {
|
||||
const message = choice.message;
|
||||
if (!message) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of message.images ?? []) {
|
||||
const url = entry.image_url?.url ?? entry.imageUrl?.url;
|
||||
if (typeof url === "string") {
|
||||
pushDataUrlImage(images, url);
|
||||
}
|
||||
}
|
||||
|
||||
const content = message.content;
|
||||
if (typeof content === "string" && content.length > 0) {
|
||||
const dataUrlPattern = /data:image\/[^;]+;base64,[A-Za-z0-9+/=]+/g;
|
||||
for (const match of content.matchAll(dataUrlPattern)) {
|
||||
pushDataUrlImage(images, match[0]);
|
||||
}
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const part of content) {
|
||||
extractImagesFromPart(images, part);
|
||||
}
|
||||
}
|
||||
}
|
||||
return images;
|
||||
}
|
||||
|
||||
function toDataUrl(image: { buffer: Buffer; mimeType: string }): string {
|
||||
return `data:${image.mimeType};base64,${image.buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function resolveImageCount(count: number | undefined): number {
|
||||
if (typeof count !== "number" || !Number.isFinite(count)) {
|
||||
return 1;
|
||||
}
|
||||
return Math.max(1, Math.min(MAX_IMAGE_RESULTS, Math.trunc(count)));
|
||||
}
|
||||
|
||||
function isGeminiImageModel(model: string): boolean {
|
||||
return model.startsWith("google/gemini-");
|
||||
}
|
||||
|
||||
function buildMessageContent(
|
||||
req: ImageGenerationRequest,
|
||||
):
|
||||
| string
|
||||
| Array<{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }> {
|
||||
const inputImages = req.inputImages ?? [];
|
||||
if (inputImages.length === 0) {
|
||||
return req.prompt;
|
||||
}
|
||||
return [
|
||||
{ type: "text", text: req.prompt },
|
||||
...inputImages.map((image) => ({
|
||||
type: "image_url" as const,
|
||||
image_url: { url: toDataUrl(image) },
|
||||
})),
|
||||
];
|
||||
}
|
||||
|
||||
function buildImageConfig(req: ImageGenerationRequest, model: string): Record<string, string> {
|
||||
if (!isGeminiImageModel(model)) {
|
||||
return {};
|
||||
}
|
||||
const imageConfig: Record<string, string> = {};
|
||||
const aspectRatio = normalizeOptionalString(req.aspectRatio);
|
||||
if (aspectRatio) {
|
||||
imageConfig.aspect_ratio = aspectRatio;
|
||||
}
|
||||
const resolution = normalizeOptionalString(req.resolution);
|
||||
if (resolution) {
|
||||
imageConfig.image_size = resolution;
|
||||
}
|
||||
return imageConfig;
|
||||
}
|
||||
|
||||
export function buildOpenRouterImageGenerationProvider(): ImageGenerationProvider {
|
||||
return {
|
||||
id: "openrouter",
|
||||
label: "OpenRouter",
|
||||
defaultModel: DEFAULT_MODEL,
|
||||
models: [...SUPPORTED_MODELS],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({ provider: "openrouter", agentDir }),
|
||||
capabilities: {
|
||||
generate: {
|
||||
maxCount: MAX_IMAGE_RESULTS,
|
||||
supportsSize: false,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
},
|
||||
edit: {
|
||||
enabled: true,
|
||||
maxCount: MAX_IMAGE_RESULTS,
|
||||
maxInputImages: 5,
|
||||
supportsSize: false,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
},
|
||||
geometry: {
|
||||
aspectRatios: [...SUPPORTED_ASPECT_RATIOS],
|
||||
resolutions: ["1K", "2K", "4K"],
|
||||
},
|
||||
},
|
||||
async generateImage(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "openrouter",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("OpenRouter API key missing");
|
||||
}
|
||||
|
||||
const model = normalizeOptionalString(req.model) ?? DEFAULT_MODEL;
|
||||
const imageConfig = buildImageConfig(req, model);
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: req.cfg?.models?.providers?.openrouter?.baseUrl,
|
||||
defaultBaseUrl: OPENROUTER_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"HTTP-Referer": "https://openclaw.ai",
|
||||
"X-OpenRouter-Title": "OpenClaw",
|
||||
},
|
||||
provider: "openrouter",
|
||||
capability: "image",
|
||||
transport: "http",
|
||||
});
|
||||
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}/chat/completions`,
|
||||
headers,
|
||||
body: {
|
||||
model,
|
||||
messages: [{ role: "user", content: buildMessageContent(req) }],
|
||||
modalities: ["image", "text"],
|
||||
n: resolveImageCount(req.count),
|
||||
...(Object.keys(imageConfig).length > 0 ? { image_config: imageConfig } : {}),
|
||||
},
|
||||
timeoutMs: req.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
fetchFn: fetch,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "OpenRouter image generation failed");
|
||||
const payload = (await response.json()) as OpenRouterChatCompletionResponse;
|
||||
const images = extractOpenRouterImagesFromResponse(payload);
|
||||
if (images.length === 0) {
|
||||
throw new Error("OpenRouter image generation response missing image data");
|
||||
}
|
||||
return { images, model };
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
getOpenRouterModelCapabilities,
|
||||
loadOpenRouterModelCapabilities,
|
||||
} from "openclaw/plugin-sdk/provider-stream-family";
|
||||
import { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
|
||||
import { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { applyOpenrouterConfig, OPENROUTER_DEFAULT_MODEL_REF } from "./onboard.js";
|
||||
import {
|
||||
@@ -143,5 +144,6 @@ export default definePluginEntry({
|
||||
isCacheTtlEligible: (ctx) => isOpenRouterCacheTtlModel(ctx.modelId),
|
||||
});
|
||||
api.registerMediaUnderstandingProvider(openrouterMediaUnderstandingProvider);
|
||||
api.registerImageGenerationProvider(buildOpenRouterImageGenerationProvider());
|
||||
},
|
||||
});
|
||||
|
||||
@@ -21,7 +21,8 @@
|
||||
}
|
||||
],
|
||||
"contracts": {
|
||||
"mediaUnderstandingProviders": ["openrouter"]
|
||||
"mediaUnderstandingProviders": ["openrouter"],
|
||||
"imageGenerationProviders": ["openrouter"]
|
||||
},
|
||||
"mediaUnderstandingProviderMetadata": {
|
||||
"openrouter": {
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
export { buildOpenRouterImageGenerationProvider } from "./image-generation-provider.js";
|
||||
export { openrouterMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
|
||||
@@ -15,6 +15,7 @@ export const DEFAULT_LIVE_IMAGE_MODELS: Record<string, string> = {
|
||||
google: "google/gemini-3.1-flash-image-preview",
|
||||
minimax: "minimax/image-01",
|
||||
openai: "openai/gpt-image-2",
|
||||
openrouter: "openrouter/google/gemini-3.1-flash-image-preview",
|
||||
vydra: "vydra/grok-imagine",
|
||||
xai: "xai/grok-imagine-image",
|
||||
};
|
||||
|
||||
@@ -113,7 +113,9 @@ export const pluginRegistrationContractCases = {
|
||||
pluginId: "openrouter",
|
||||
providerIds: ["openrouter"],
|
||||
mediaUnderstandingProviderIds: ["openrouter"],
|
||||
imageGenerationProviderIds: ["openrouter"],
|
||||
requireDescribeImages: true,
|
||||
requireGenerateImage: true,
|
||||
},
|
||||
perplexity: {
|
||||
pluginId: "perplexity",
|
||||
|
||||
@@ -74,6 +74,11 @@ const PROVIDER_CASES: LiveProviderCase[] = [
|
||||
pluginName: "OpenAI Provider",
|
||||
providerId: "openai",
|
||||
},
|
||||
{
|
||||
pluginId: "openrouter",
|
||||
pluginName: "OpenRouter Provider",
|
||||
providerId: "openrouter",
|
||||
},
|
||||
{
|
||||
pluginId: "vydra",
|
||||
pluginName: "Vydra Provider",
|
||||
|
||||
Reference in New Issue
Block a user