diff --git a/.github/labeler.yml b/.github/labeler.yml index ddfdc2c0d93..ebd77d64925 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -315,6 +315,11 @@ - changed-files: - any-glob-to-any-file: - "extensions/lmstudio/**" +"extensions: litellm": + - changed-files: + - any-glob-to-any-file: + - "extensions/litellm/**" + - "docs/providers/litellm.md" "extensions: openai": - changed-files: - any-glob-to-any-file: diff --git a/CHANGELOG.md b/CHANGELOG.md index 0761d2755ec..c7deea66c3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Docs: https://docs.openclaw.ai - Providers/Xiaomi: add MiMo TTS as a bundled speech provider with MP3/WAV output and voice-note Opus transcoding. Fixes #52376. (#55614) Thanks @zoujiejun. - Providers/ElevenLabs: include `eleven_v3` in the bundled TTS model catalog so model selection surfaces can offer ElevenLabs v3. (#68321) Thanks @itsuzef. - Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain. +- Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris. ### Fixes diff --git a/docs/providers/litellm.md b/docs/providers/litellm.md index 65f0c75c68d..617bd545fa7 100644 --- a/docs/providers/litellm.md +++ b/docs/providers/litellm.md @@ -108,6 +108,38 @@ export LITELLM_API_KEY="sk-litellm-key" ## Advanced configuration +### Image generation + +LiteLLM can also back the `image_generate` tool through OpenAI-compatible +`/images/generations` and `/images/edits` routes. Configure a LiteLLM image +model under `agents.defaults.imageGenerationModel`: + +```json5 +{ + models: { + providers: { + litellm: { + baseUrl: "http://localhost:4000", + apiKey: "${LITELLM_API_KEY}", + }, + }, + }, + agents: { + defaults: { + imageGenerationModel: { + primary: "litellm/gpt-image-2", + timeoutMs: 180_000, + }, + }, + }, +} +``` + +Loopback LiteLLM URLs such as `http://localhost:4000` work without a global +private-network override. For a LAN-hosted proxy, set +`models.providers.litellm.request.allowPrivateNetwork: true` because the API key +will be sent to the configured proxy host. + Create a dedicated key for OpenClaw with spend limits: diff --git a/docs/tools/image-generation.md b/docs/tools/image-generation.md index e4ee385ee68..715b11ae5e1 100644 --- a/docs/tools/image-generation.md +++ b/docs/tools/image-generation.md @@ -1,5 +1,5 @@ --- -summary: "Generate and edit images using configured providers (OpenAI, OpenAI Codex OAuth, Google Gemini, OpenRouter, fal, MiniMax, ComfyUI, Vydra, xAI)" +summary: "Generate and edit images using configured providers (OpenAI, OpenAI Codex OAuth, Google Gemini, OpenRouter, LiteLLM, fal, MiniMax, ComfyUI, Vydra, xAI)" read_when: - Generating images via the agent - Configuring image generation providers and models @@ -53,6 +53,7 @@ The agent calls `image_generate` automatically. No tool allow-listing needed — | OpenAI image generation with API billing | `openai/gpt-image-2` | `OPENAI_API_KEY` | | OpenAI image generation with Codex subscription auth | `openai/gpt-image-2` | OpenAI Codex OAuth | | OpenRouter image generation | `openrouter/google/gemini-3.1-flash-image-preview` | `OPENROUTER_API_KEY` | +| LiteLLM image generation | `litellm/gpt-image-2` | `LITELLM_API_KEY` | | Google Gemini image generation | `google/gemini-3.1-flash-image-preview` | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | The same `image_generate` tool handles text-to-image and reference-image @@ -67,6 +68,7 @@ ignored when a provider does not support them. | ---------- | --------------------------------------- | ---------------------------------- | ----------------------------------------------------- | | OpenAI | `gpt-image-2` | Yes (up to 4 images) | `OPENAI_API_KEY` or OpenAI Codex OAuth | | OpenRouter | `google/gemini-3.1-flash-image-preview` | Yes (up to 5 input images) | `OPENROUTER_API_KEY` | +| LiteLLM | `gpt-image-2` | Yes (up to 5 input images) | `LITELLM_API_KEY` | | Google | `gemini-3.1-flash-image-preview` | Yes | `GEMINI_API_KEY` or `GOOGLE_API_KEY` | | fal | `fal-ai/flux/dev` | Yes | `FAL_KEY` | | MiniMax | `image-01` | Yes (subject reference) | `MINIMAX_API_KEY` or MiniMax OAuth (`minimax-portal`) | diff --git a/extensions/litellm/image-generation-provider.test.ts b/extensions/litellm/image-generation-provider.test.ts new file mode 100644 index 00000000000..5cd835de33e --- /dev/null +++ b/extensions/litellm/image-generation-provider.test.ts @@ -0,0 +1,331 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { buildLitellmImageGenerationProvider } from "./image-generation-provider.js"; + +const { + resolveApiKeyForProviderMock, + postJsonRequestMock, + assertOkOrThrowHttpErrorMock, + resolveProviderHttpRequestConfigMock, + sanitizeConfiguredModelProviderRequestMock, +} = vi.hoisted(() => ({ + resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "litellm-key" })), + postJsonRequestMock: vi.fn(), + assertOkOrThrowHttpErrorMock: vi.fn(async () => {}), + resolveProviderHttpRequestConfigMock: vi.fn((params) => ({ + baseUrl: params.baseUrl ?? params.defaultBaseUrl, + allowPrivateNetwork: Boolean(params.allowPrivateNetwork ?? params.request?.allowPrivateNetwork), + headers: new Headers(params.defaultHeaders), + dispatcherPolicy: undefined as unknown, + })), + sanitizeConfiguredModelProviderRequestMock: vi.fn((request) => request), +})); + +vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({ + resolveApiKeyForProvider: resolveApiKeyForProviderMock, +})); + +vi.mock("openclaw/plugin-sdk/provider-http", () => ({ + assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock, + postJsonRequest: postJsonRequestMock, + resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock, + sanitizeConfiguredModelProviderRequest: sanitizeConfiguredModelProviderRequestMock, +})); + +function mockGeneratedPngResponse() { + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + data: [{ b64_json: Buffer.from("png-bytes").toString("base64") }], + }), + }, + release: vi.fn(async () => {}), + }); +} + +describe("litellm image generation provider", () => { + afterEach(() => { + resolveApiKeyForProviderMock.mockClear(); + postJsonRequestMock.mockReset(); + assertOkOrThrowHttpErrorMock.mockClear(); + resolveProviderHttpRequestConfigMock.mockClear(); + sanitizeConfiguredModelProviderRequestMock.mockClear(); + }); + + it("declares litellm id and OpenAI-compatible size hints", () => { + const provider = buildLitellmImageGenerationProvider(); + + expect(provider.id).toBe("litellm"); + expect(provider.label).toBe("LiteLLM"); + expect(provider.defaultModel).toBe("gpt-image-2"); + expect(provider.capabilities.geometry?.sizes).toEqual( + expect.arrayContaining(["1024x1024", "2048x2048", "3840x2160"]), + ); + expect(provider.capabilities.edit?.enabled).toBe(true); + }); + + it("defaults to the loopback proxy and allows private network for localhost", async () => { + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "Draw a QA lighthouse", + cfg: {}, + }); + + expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith( + expect.objectContaining({ + baseUrl: "http://localhost:4000", + allowPrivateNetwork: true, + }), + ); + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "http://localhost:4000/images/generations", + allowPrivateNetwork: true, + }), + ); + }); + + it("honors configured baseUrl and keeps private-network off for public endpoints", async () => { + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "campaign hero", + cfg: { + models: { + providers: { + litellm: { + baseUrl: "https://proxy.example.com/v1", + models: [], + }, + }, + }, + }, + }); + + expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith( + expect.objectContaining({ + baseUrl: "https://proxy.example.com/v1", + allowPrivateNetwork: undefined, + }), + ); + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "https://proxy.example.com/v1/images/generations", + allowPrivateNetwork: false, + }), + ); + }); + + it("forwards count and size overrides on generation requests", async () => { + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "dall-e-3", + prompt: "two landscape variants", + cfg: {}, + count: 2, + size: "3840x2160", + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "http://localhost:4000/images/generations", + body: { + model: "dall-e-3", + prompt: "two landscape variants", + n: 2, + size: "3840x2160", + }, + }), + ); + }); + + it("routes to the edit endpoint when input images are provided", async () => { + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "refine the hero", + cfg: {}, + inputImages: [ + { + buffer: Buffer.from("fake-input"), + mimeType: "image/png", + }, + ], + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ + url: "http://localhost:4000/images/edits", + }), + ); + const call = postJsonRequestMock.mock.calls[0][0] as { body: { images: unknown[] } }; + expect(call.body.images).toHaveLength(1); + }); + + it("throws a clear error when the API key is missing", async () => { + resolveApiKeyForProviderMock.mockResolvedValueOnce({ apiKey: "" }); + + const provider = buildLitellmImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "x", + cfg: {}, + }), + ).rejects.toThrow("LiteLLM API key missing"); + }); + + it("forwards dispatcherPolicy from resolveProviderHttpRequestConfig to postJsonRequest", async () => { + const dispatcherPolicy = { proxyUrl: "http://corp-proxy:3128" } as unknown; + resolveProviderHttpRequestConfigMock.mockReturnValueOnce({ + baseUrl: "https://proxy.example.com/v1", + allowPrivateNetwork: false, + headers: new Headers({ Authorization: "Bearer litellm-key" }), + dispatcherPolicy, + }); + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "hi", + cfg: { + models: { + providers: { + litellm: { baseUrl: "https://proxy.example.com/v1", models: [] }, + }, + }, + }, + }); + + expect(postJsonRequestMock).toHaveBeenCalledWith(expect.objectContaining({ dispatcherPolicy })); + }); + + it("auto-allows private network for loopback-style baseUrls", async () => { + const cases = [ + "http://localhost:4000", + "http://127.0.0.1:4000", + "http://[::1]:4000", + "http://host.docker.internal:4000", + "https://localhost:4000", + ] as const; + for (const baseUrl of cases) { + resolveProviderHttpRequestConfigMock.mockClear(); + mockGeneratedPngResponse(); + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "x", + cfg: { models: { providers: { litellm: { baseUrl, models: [] } } } }, + }); + expect( + resolveProviderHttpRequestConfigMock, + `expected allowPrivateNetwork=true for ${baseUrl}`, + ).toHaveBeenCalledWith(expect.objectContaining({ allowPrivateNetwork: true })); + } + }); + + it("requires explicit private-network opt-in for LAN and internal baseUrls", async () => { + const cases = [ + "http://10.0.0.42:4000", + "http://192.168.5.10:4000", + "http://172.16.0.5:4000", + "https://192.168.5.10:4000", + "http://printer.local:4000", + "http://proxy.internal:4000", + "https://metadata.google.internal", + ] as const; + for (const baseUrl of cases) { + resolveProviderHttpRequestConfigMock.mockClear(); + mockGeneratedPngResponse(); + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "x", + cfg: { models: { providers: { litellm: { baseUrl, models: [] } } } }, + }); + expect( + resolveProviderHttpRequestConfigMock, + `expected no automatic allowPrivateNetwork for ${baseUrl}`, + ).toHaveBeenCalledWith(expect.objectContaining({ allowPrivateNetwork: undefined })); + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ allowPrivateNetwork: false }), + ); + } + }); + + it("honors explicit private-network opt-in for a LAN LiteLLM proxy", async () => { + mockGeneratedPngResponse(); + + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "x", + cfg: { + models: { + providers: { + litellm: { + baseUrl: "http://192.168.5.10:4000", + request: { allowPrivateNetwork: true }, + models: [], + }, + }, + }, + }, + }); + + expect(resolveProviderHttpRequestConfigMock).toHaveBeenCalledWith( + expect.objectContaining({ + allowPrivateNetwork: undefined, + request: { allowPrivateNetwork: true }, + }), + ); + expect(postJsonRequestMock).toHaveBeenCalledWith( + expect.objectContaining({ allowPrivateNetwork: true }), + ); + }); + + it("does not allow private network for public hosts that embed private strings in the URL", async () => { + // Must not be fooled by an attacker-controlled URL that mentions + // "host.docker.internal" (or any private-looking literal) in the path, + // query string, or fragment. Only the parsed hostname should count. + const cases = [ + "https://evil.example.com/?target=host.docker.internal", + "https://evil.example.com/host.docker.internal/foo", + "https://evil.example.com/redirect?to=127.0.0.1", + "https://public-api.openai.com/v1", + ] as const; + for (const baseUrl of cases) { + resolveProviderHttpRequestConfigMock.mockClear(); + mockGeneratedPngResponse(); + const provider = buildLitellmImageGenerationProvider(); + await provider.generateImage({ + provider: "litellm", + model: "gpt-image-2", + prompt: "x", + cfg: { models: { providers: { litellm: { baseUrl, models: [] } } } }, + }); + expect( + resolveProviderHttpRequestConfigMock, + `expected allowPrivateNetwork=false for ${baseUrl}`, + ).toHaveBeenCalledWith(expect.objectContaining({ allowPrivateNetwork: undefined })); + } + }); +}); diff --git a/extensions/litellm/image-generation-provider.ts b/extensions/litellm/image-generation-provider.ts new file mode 100644 index 00000000000..ec904ecc62b --- /dev/null +++ b/extensions/litellm/image-generation-provider.ts @@ -0,0 +1,220 @@ +import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime"; +import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; +import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; +import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; +import { + assertOkOrThrowHttpError, + postJsonRequest, + resolveProviderHttpRequestConfig, + sanitizeConfiguredModelProviderRequest, +} from "openclaw/plugin-sdk/provider-http"; +import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime"; +import { LITELLM_BASE_URL } from "./onboard.js"; + +const DEFAULT_OUTPUT_MIME = "image/png"; +const DEFAULT_SIZE = "1024x1024"; +const DEFAULT_LITELLM_IMAGE_MODEL = "gpt-image-2"; +const LITELLM_SUPPORTED_SIZES = [ + "256x256", + "512x512", + "1024x1024", + "1024x1536", + "1024x1792", + "1536x1024", + "1792x1024", + "2048x2048", + "2048x1152", + "3840x2160", + "2160x3840", +] as const; +const LITELLM_MAX_INPUT_IMAGES = 5; + +type LitellmProviderConfig = NonNullable< + NonNullable["providers"] +>[string]; + +function resolveLitellmProviderConfig( + cfg: OpenClawConfig | undefined, +): LitellmProviderConfig | undefined { + return cfg?.models?.providers?.litellm; +} + +function resolveConfiguredLitellmBaseUrl(cfg: OpenClawConfig | undefined): string { + return normalizeOptionalString(resolveLitellmProviderConfig(cfg)?.baseUrl) ?? LITELLM_BASE_URL; +} + +// LiteLLM's default proxy is loopback. Auto-enable private-network access only +// for loopback-style hosts; LAN/custom private endpoints should use the +// explicit models.providers.litellm.request.allowPrivateNetwork opt-in. +function isAutoAllowedLitellmHostname(hostname: string): boolean { + if (!hostname) { + return false; + } + // Strip IPv6 brackets if any: "[::1]" -> "::1". + const host = + hostname.startsWith("[") && hostname.endsWith("]") ? hostname.slice(1, -1) : hostname; + const lowered = host.toLowerCase(); + if ( + lowered === "localhost" || + lowered === "host.docker.internal" || + lowered.endsWith(".localhost") + ) { + return true; + } + if (lowered === "127.0.0.1" || lowered.startsWith("127.")) { + return true; + } + if (lowered === "::1" || lowered === "0:0:0:0:0:0:0:1") { + return true; + } + return false; +} + +function shouldAutoAllowPrivateLitellmEndpoint(baseUrl: string): boolean { + try { + const parsed = new URL(baseUrl); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + return false; + } + return isAutoAllowedLitellmHostname(parsed.hostname); + } catch { + return false; + } +} + +function toDataUrl(buffer: Buffer, mimeType: string): string { + return `data:${mimeType};base64,${buffer.toString("base64")}`; +} + +type LitellmImageApiResponse = { + data?: Array<{ + b64_json?: string; + revised_prompt?: string; + }>; +}; + +export function buildLitellmImageGenerationProvider(): ImageGenerationProvider { + return { + id: "litellm", + label: "LiteLLM", + defaultModel: DEFAULT_LITELLM_IMAGE_MODEL, + models: [DEFAULT_LITELLM_IMAGE_MODEL], + isConfigured: ({ agentDir }) => + isProviderApiKeyConfigured({ + provider: "litellm", + agentDir, + }), + capabilities: { + generate: { + maxCount: 4, + supportsSize: true, + supportsAspectRatio: false, + supportsResolution: false, + }, + edit: { + enabled: true, + maxCount: 4, + maxInputImages: LITELLM_MAX_INPUT_IMAGES, + supportsSize: true, + supportsAspectRatio: false, + supportsResolution: false, + }, + geometry: { + sizes: [...LITELLM_SUPPORTED_SIZES], + }, + }, + async generateImage(req) { + const inputImages = req.inputImages ?? []; + const isEdit = inputImages.length > 0; + const auth = await resolveApiKeyForProvider({ + provider: "litellm", + cfg: req.cfg, + agentDir: req.agentDir, + store: req.authStore, + }); + if (!auth.apiKey) { + throw new Error("LiteLLM API key missing"); + } + const providerConfig = resolveLitellmProviderConfig(req.cfg); + const resolvedBaseUrl = resolveConfiguredLitellmBaseUrl(req.cfg); + const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } = + resolveProviderHttpRequestConfig({ + baseUrl: resolvedBaseUrl, + defaultBaseUrl: LITELLM_BASE_URL, + allowPrivateNetwork: shouldAutoAllowPrivateLitellmEndpoint(resolvedBaseUrl) + ? true + : undefined, + request: sanitizeConfiguredModelProviderRequest(providerConfig?.request), + defaultHeaders: { + Authorization: `Bearer ${auth.apiKey}`, + }, + provider: "litellm", + capability: "image", + transport: "http", + }); + + const model = req.model || DEFAULT_LITELLM_IMAGE_MODEL; + const count = req.count ?? 1; + const size = req.size ?? DEFAULT_SIZE; + + const jsonHeaders = new Headers(headers); + jsonHeaders.set("Content-Type", "application/json"); + const endpoint = isEdit ? "images/edits" : "images/generations"; + const body = isEdit + ? { + model, + prompt: req.prompt, + n: count, + size, + images: inputImages.map((image) => ({ + image_url: toDataUrl(image.buffer, image.mimeType?.trim() || DEFAULT_OUTPUT_MIME), + })), + } + : { + model, + prompt: req.prompt, + n: count, + size, + }; + const { response, release } = await postJsonRequest({ + url: `${baseUrl}/${endpoint}`, + headers: jsonHeaders, + body, + timeoutMs: req.timeoutMs, + fetchFn: fetch, + allowPrivateNetwork, + dispatcherPolicy, + }); + try { + await assertOkOrThrowHttpError( + response, + isEdit ? "LiteLLM image edit failed" : "LiteLLM image generation failed", + ); + + const data = (await response.json()) as LitellmImageApiResponse; + const images = (data.data ?? []) + .map((entry, index) => { + if (!entry.b64_json) { + return null; + } + return Object.assign( + { + buffer: Buffer.from(entry.b64_json, `base64`), + mimeType: DEFAULT_OUTPUT_MIME, + fileName: `image-${index + 1}.png`, + }, + entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {}, + ); + }) + .filter((entry): entry is NonNullable => entry !== null); + + return { + images, + model, + }; + } finally { + await release(); + } + }, + }; +} diff --git a/extensions/litellm/index.ts b/extensions/litellm/index.ts index a2bb3d5e9bd..8a2c17f66d5 100644 --- a/extensions/litellm/index.ts +++ b/extensions/litellm/index.ts @@ -1,4 +1,5 @@ import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry"; +import { buildLitellmImageGenerationProvider } from "./image-generation-provider.js"; import { applyLitellmConfig, LITELLM_DEFAULT_MODEL_REF } from "./onboard.js"; import { buildLitellmProvider } from "./provider-catalog.js"; @@ -38,4 +39,7 @@ export default defineSingleProviderPluginEntry({ allowExplicitBaseUrl: true, }, }, + register(api) { + api.registerImageGenerationProvider(buildLitellmImageGenerationProvider()); + }, }); diff --git a/extensions/litellm/openclaw.plugin.json b/extensions/litellm/openclaw.plugin.json index d36cdb1e228..b7492e1a4be 100644 --- a/extensions/litellm/openclaw.plugin.json +++ b/extensions/litellm/openclaw.plugin.json @@ -21,6 +21,9 @@ "cliDescription": "LiteLLM API key" } ], + "contracts": { + "imageGenerationProviders": ["litellm"] + }, "configSchema": { "type": "object", "additionalProperties": false,