From aa2d5aaa0cd67169487cfc73805145411c56b58d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 16 Mar 2026 22:56:14 -0700 Subject: [PATCH] feat(plugins): add image generation capability --- extensions/lobster/src/lobster-tool.test.ts | 1 + extensions/openai/index.ts | 2 + extensions/test-utils/plugin-api.ts | 1 + extensions/test-utils/plugin-runtime-mock.ts | 6 + package.json | 16 ++ scripts/lib/plugin-sdk-entrypoints.json | 4 + src/auto-reply/reply/route-reply.test.ts | 1 + .../channel-setup/plugin-install.test.ts | 1 + src/config/schema.help.ts | 4 + src/config/schema.labels.ts | 2 + src/config/types.agent-defaults.ts | 2 + src/config/zod-schema.agent-defaults.ts | 1 + src/gateway/server-plugins.test.ts | 1 + src/gateway/test-helpers.mocks.ts | 1 + src/image-generation/provider-registry.ts | 71 ++++++++ src/image-generation/providers/openai.test.ts | 55 ++++++ src/image-generation/providers/openai.ts | 79 +++++++++ src/image-generation/runtime.test.ts | 81 +++++++++ src/image-generation/runtime.ts | 162 ++++++++++++++++++ src/image-generation/types.ts | 33 ++++ src/media-understanding/runtime.ts | 34 ++++ src/plugin-sdk/image-generation-runtime.ts | 3 + src/plugin-sdk/image-generation.ts | 10 ++ src/plugin-sdk/index.ts | 1 + src/plugin-sdk/media-understanding-runtime.ts | 9 + src/plugin-sdk/speech-runtime.ts | 3 + src/plugins/captured-registration.ts | 7 + .../contracts/registry.contract.test.ts | 36 ++++ src/plugins/contracts/registry.ts | 13 ++ src/plugins/hooks.test-helpers.ts | 2 + src/plugins/loader.ts | 1 + src/plugins/registry.ts | 24 +++ src/plugins/runtime/index.test.ts | 7 + src/plugins/runtime/index.ts | 12 +- src/plugins/runtime/types-core.ts | 11 +- src/plugins/types.ts | 3 + src/test-utils/channel-plugins.ts | 1 + src/tts/runtime.ts | 4 + 38 files changed, 701 insertions(+), 4 deletions(-) create mode 100644 src/image-generation/provider-registry.ts create mode 100644 src/image-generation/providers/openai.test.ts create mode 100644 src/image-generation/providers/openai.ts create mode 100644 src/image-generation/runtime.test.ts create mode 100644 src/image-generation/runtime.ts create mode 100644 src/image-generation/types.ts create mode 100644 src/plugin-sdk/image-generation-runtime.ts create mode 100644 src/plugin-sdk/image-generation.ts create mode 100644 src/plugin-sdk/media-understanding-runtime.ts create mode 100644 src/plugin-sdk/speech-runtime.ts create mode 100644 src/tts/runtime.ts diff --git a/extensions/lobster/src/lobster-tool.test.ts b/extensions/lobster/src/lobster-tool.test.ts index cba95624f07..b154e067116 100644 --- a/extensions/lobster/src/lobster-tool.test.ts +++ b/extensions/lobster/src/lobster-tool.test.ts @@ -46,6 +46,7 @@ function fakeApi(overrides: Partial = {}): OpenClawPluginApi registerProvider() {}, registerSpeechProvider() {}, registerMediaUnderstandingProvider() {}, + registerImageGenerationProvider() {}, registerWebSearchProvider() {}, registerInteractiveHandler() {}, registerHook() {}, diff --git a/extensions/openai/index.ts b/extensions/openai/index.ts index d22b7275691..dd8bbdd615d 100644 --- a/extensions/openai/index.ts +++ b/extensions/openai/index.ts @@ -1,4 +1,5 @@ import { emptyPluginConfigSchema, type OpenClawPluginApi } from "openclaw/plugin-sdk/core"; +import { buildOpenAIImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; import { buildOpenAISpeechProvider } from "openclaw/plugin-sdk/speech"; import { openaiMediaUnderstandingProvider } from "./media-understanding-provider.js"; import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js"; @@ -14,6 +15,7 @@ const openAIPlugin = { api.registerProvider(buildOpenAICodexProviderPlugin()); api.registerSpeechProvider(buildOpenAISpeechProvider()); api.registerMediaUnderstandingProvider(openaiMediaUnderstandingProvider); + api.registerImageGenerationProvider(buildOpenAIImageGenerationProvider()); }, }; diff --git a/extensions/test-utils/plugin-api.ts b/extensions/test-utils/plugin-api.ts index 2080359d961..bb94c326ee8 100644 --- a/extensions/test-utils/plugin-api.ts +++ b/extensions/test-utils/plugin-api.ts @@ -17,6 +17,7 @@ export function createTestPluginApi(api: TestPluginApiInput): OpenClawPluginApi registerProvider() {}, registerSpeechProvider() {}, registerMediaUnderstandingProvider() {}, + registerImageGenerationProvider() {}, registerWebSearchProvider() {}, registerInteractiveHandler() {}, registerCommand() {}, diff --git a/extensions/test-utils/plugin-runtime-mock.ts b/extensions/test-utils/plugin-runtime-mock.ts index c9f2c44cf10..fbc9bcdc7fd 100644 --- a/extensions/test-utils/plugin-runtime-mock.ts +++ b/extensions/test-utils/plugin-runtime-mock.ts @@ -110,11 +110,17 @@ export function createPluginRuntimeMock(overrides: DeepPartial = runFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["runFile"], describeImageFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFile"], + describeImageFileWithModel: + vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFileWithModel"], describeVideoFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeVideoFile"], transcribeAudioFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["transcribeAudioFile"], }, + imageGeneration: { + generate: vi.fn() as unknown as PluginRuntime["imageGeneration"]["generate"], + listProviders: vi.fn() as unknown as PluginRuntime["imageGeneration"]["listProviders"], + }, webSearch: { listProviders: vi.fn() as unknown as PluginRuntime["webSearch"]["listProviders"], search: vi.fn() as unknown as PluginRuntime["webSearch"]["search"], diff --git a/package.json b/package.json index 002dff9d4e5..4bb825d0d7a 100644 --- a/package.json +++ b/package.json @@ -102,6 +102,10 @@ "types": "./dist/plugin-sdk/media-runtime.d.ts", "default": "./dist/plugin-sdk/media-runtime.js" }, + "./plugin-sdk/media-understanding-runtime": { + "types": "./dist/plugin-sdk/media-understanding-runtime.d.ts", + "default": "./dist/plugin-sdk/media-understanding-runtime.js" + }, "./plugin-sdk/conversation-runtime": { "types": "./dist/plugin-sdk/conversation-runtime.d.ts", "default": "./dist/plugin-sdk/conversation-runtime.js" @@ -114,6 +118,10 @@ "types": "./dist/plugin-sdk/agent-runtime.d.ts", "default": "./dist/plugin-sdk/agent-runtime.js" }, + "./plugin-sdk/speech-runtime": { + "types": "./dist/plugin-sdk/speech-runtime.d.ts", + "default": "./dist/plugin-sdk/speech-runtime.js" + }, "./plugin-sdk/plugin-runtime": { "types": "./dist/plugin-sdk/plugin-runtime.d.ts", "default": "./dist/plugin-sdk/plugin-runtime.js" @@ -378,6 +386,14 @@ "types": "./dist/plugin-sdk/provider-web-search.d.ts", "default": "./dist/plugin-sdk/provider-web-search.js" }, + "./plugin-sdk/image-generation": { + "types": "./dist/plugin-sdk/image-generation.d.ts", + "default": "./dist/plugin-sdk/image-generation.js" + }, + "./plugin-sdk/image-generation-runtime": { + "types": "./dist/plugin-sdk/image-generation-runtime.d.ts", + "default": "./dist/plugin-sdk/image-generation-runtime.js" + }, "./plugin-sdk/reply-history": { "types": "./dist/plugin-sdk/reply-history.d.ts", "default": "./dist/plugin-sdk/reply-history.js" diff --git a/scripts/lib/plugin-sdk-entrypoints.json b/scripts/lib/plugin-sdk-entrypoints.json index ce8b623577f..205982588fd 100644 --- a/scripts/lib/plugin-sdk-entrypoints.json +++ b/scripts/lib/plugin-sdk-entrypoints.json @@ -15,9 +15,11 @@ "channel-runtime", "infra-runtime", "media-runtime", + "media-understanding-runtime", "conversation-runtime", "text-runtime", "agent-runtime", + "speech-runtime", "plugin-runtime", "security-runtime", "gateway-runtime", @@ -84,6 +86,8 @@ "provider-stream", "provider-usage", "provider-web-search", + "image-generation", + "image-generation-runtime", "reply-history", "media-understanding", "google", diff --git a/src/auto-reply/reply/route-reply.test.ts b/src/auto-reply/reply/route-reply.test.ts index 4c5dd7be889..98fd1144f77 100644 --- a/src/auto-reply/reply/route-reply.test.ts +++ b/src/auto-reply/reply/route-reply.test.ts @@ -93,6 +93,7 @@ const createRegistry = (channels: PluginRegistry["channels"]): PluginRegistry => providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/commands/channel-setup/plugin-install.test.ts b/src/commands/channel-setup/plugin-install.test.ts index 96ca60e2197..88c70bc26ef 100644 --- a/src/commands/channel-setup/plugin-install.test.ts +++ b/src/commands/channel-setup/plugin-install.test.ts @@ -339,6 +339,7 @@ describe("ensureChannelSetupPluginInstalled", () => { providerIds: [], speechProviderIds: [], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 1f4aa63ff62..779abbb609b 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1019,6 +1019,10 @@ export const FIELD_HELP: Record = { "agents.defaults.imageModel.primary": "Optional image model (provider/model) used when the primary model lacks image input.", "agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).", + "agents.defaults.imageGenerationModel.primary": + "Optional image-generation model (provider/model) used by the shared image generation capability.", + "agents.defaults.imageGenerationModel.fallbacks": + "Ordered fallback image-generation models (provider/model).", "agents.defaults.pdfModel.primary": "Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.", "agents.defaults.pdfModel.fallbacks": "Ordered fallback PDF models (provider/model).", diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index c3e820a7d4b..62302e976af 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -454,6 +454,8 @@ export const FIELD_LABELS: Record = { "agents.defaults.model.fallbacks": "Model Fallbacks", "agents.defaults.imageModel.primary": "Image Model", "agents.defaults.imageModel.fallbacks": "Image Model Fallbacks", + "agents.defaults.imageGenerationModel.primary": "Image Generation Model", + "agents.defaults.imageGenerationModel.fallbacks": "Image Generation Model Fallbacks", "agents.defaults.pdfModel.primary": "PDF Model", "agents.defaults.pdfModel.fallbacks": "PDF Model Fallbacks", "agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)", diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index e5613c7649d..68506e8be3c 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -122,6 +122,8 @@ export type AgentDefaultsConfig = { model?: AgentModelConfig; /** Optional image-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */ imageModel?: AgentModelConfig; + /** Optional image-generation model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */ + imageGenerationModel?: AgentModelConfig; /** Optional PDF-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */ pdfModel?: AgentModelConfig; /** Maximum PDF file size in megabytes (default: 10). */ diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index b2cc5603c90..a631ae725b8 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -18,6 +18,7 @@ export const AgentDefaultsSchema = z .object({ model: AgentModelSchema.optional(), imageModel: AgentModelSchema.optional(), + imageGenerationModel: AgentModelSchema.optional(), pdfModel: AgentModelSchema.optional(), pdfMaxBytesMb: z.number().positive().optional(), pdfMaxPages: z.number().int().positive().optional(), diff --git a/src/gateway/server-plugins.test.ts b/src/gateway/server-plugins.test.ts index 184cb706762..ddaaa64c02b 100644 --- a/src/gateway/server-plugins.test.ts +++ b/src/gateway/server-plugins.test.ts @@ -31,6 +31,7 @@ const createRegistry = (diagnostics: PluginDiagnostic[]): PluginRegistry => ({ providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/gateway/test-helpers.mocks.ts b/src/gateway/test-helpers.mocks.ts index 3617bc896bd..36d24537a14 100644 --- a/src/gateway/test-helpers.mocks.ts +++ b/src/gateway/test-helpers.mocks.ts @@ -148,6 +148,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({ providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/image-generation/provider-registry.ts b/src/image-generation/provider-registry.ts new file mode 100644 index 00000000000..500c7c9a34a --- /dev/null +++ b/src/image-generation/provider-registry.ts @@ -0,0 +1,71 @@ +import { normalizeProviderId } from "../agents/model-selection.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { loadOpenClawPlugins } from "../plugins/loader.js"; +import { getActivePluginRegistry } from "../plugins/runtime.js"; +import type { ImageGenerationProviderPlugin } from "../plugins/types.js"; + +const BUILTIN_IMAGE_GENERATION_PROVIDERS: readonly ImageGenerationProviderPlugin[] = []; + +function normalizeImageGenerationProviderId(id: string | undefined): string | undefined { + const normalized = normalizeProviderId(id ?? ""); + return normalized || undefined; +} + +function resolvePluginImageGenerationProviders( + cfg?: OpenClawConfig, +): ImageGenerationProviderPlugin[] { + const active = getActivePluginRegistry(); + const registry = + (active?.imageGenerationProviders?.length ?? 0) > 0 || !cfg + ? active + : loadOpenClawPlugins({ config: cfg }); + return registry?.imageGenerationProviders?.map((entry) => entry.provider) ?? []; +} + +function buildProviderMaps(cfg?: OpenClawConfig): { + canonical: Map; + aliases: Map; +} { + const canonical = new Map(); + const aliases = new Map(); + const register = (provider: ImageGenerationProviderPlugin) => { + const id = normalizeImageGenerationProviderId(provider.id); + if (!id) { + return; + } + canonical.set(id, provider); + aliases.set(id, provider); + for (const alias of provider.aliases ?? []) { + const normalizedAlias = normalizeImageGenerationProviderId(alias); + if (normalizedAlias) { + aliases.set(normalizedAlias, provider); + } + } + }; + + for (const provider of BUILTIN_IMAGE_GENERATION_PROVIDERS) { + register(provider); + } + for (const provider of resolvePluginImageGenerationProviders(cfg)) { + register(provider); + } + + return { canonical, aliases }; +} + +export function listImageGenerationProviders( + cfg?: OpenClawConfig, +): ImageGenerationProviderPlugin[] { + return [...buildProviderMaps(cfg).canonical.values()]; +} + +export function getImageGenerationProvider( + providerId: string | undefined, + cfg?: OpenClawConfig, +): ImageGenerationProviderPlugin | undefined { + const normalized = normalizeImageGenerationProviderId(providerId); + if (!normalized) { + return undefined; + } + return buildProviderMaps(cfg).aliases.get(normalized); +} diff --git a/src/image-generation/providers/openai.test.ts b/src/image-generation/providers/openai.test.ts new file mode 100644 index 00000000000..a55e6107d3b --- /dev/null +++ b/src/image-generation/providers/openai.test.ts @@ -0,0 +1,55 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import * as modelAuth from "../../agents/model-auth.js"; +import { buildOpenAIImageGenerationProvider } from "./openai.js"; + +describe("OpenAI image-generation provider", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("generates PNG buffers from the OpenAI Images API", async () => { + vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({ + apiKey: "sk-test", + source: "env", + mode: "api-key", + }); + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + data: [ + { + b64_json: Buffer.from("png-data").toString("base64"), + revised_prompt: "revised", + }, + ], + }), + }); + vi.stubGlobal("fetch", fetchMock); + + const provider = buildOpenAIImageGenerationProvider(); + const result = await provider.generateImage({ + provider: "openai", + model: "gpt-image-1", + prompt: "draw a cat", + cfg: {}, + }); + + expect(fetchMock).toHaveBeenCalledWith( + "https://api.openai.com/v1/images/generations", + expect.objectContaining({ + method: "POST", + }), + ); + expect(result).toEqual({ + images: [ + { + buffer: Buffer.from("png-data"), + mimeType: "image/png", + fileName: "image-1.png", + revisedPrompt: "revised", + }, + ], + model: "gpt-image-1", + }); + }); +}); diff --git a/src/image-generation/providers/openai.ts b/src/image-generation/providers/openai.ts new file mode 100644 index 00000000000..0c7788fb5d5 --- /dev/null +++ b/src/image-generation/providers/openai.ts @@ -0,0 +1,79 @@ +import { resolveApiKeyForProvider } from "../../agents/model-auth.js"; +import type { ImageGenerationProviderPlugin } from "../../plugins/types.js"; + +const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1"; +const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1"; +const DEFAULT_OUTPUT_MIME = "image/png"; +const DEFAULT_SIZE = "1024x1024"; + +type OpenAIImageApiResponse = { + data?: Array<{ + b64_json?: string; + revised_prompt?: string; + }>; +}; + +function resolveOpenAIBaseUrl(cfg: Parameters[0]["cfg"]): string { + const direct = cfg?.models?.providers?.openai?.baseUrl?.trim(); + return direct || DEFAULT_OPENAI_IMAGE_BASE_URL; +} + +export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlugin { + return { + id: "openai", + label: "OpenAI", + supportedSizes: ["1024x1024", "1024x1536", "1536x1024"], + async generateImage(req) { + const auth = await resolveApiKeyForProvider({ + provider: "openai", + cfg: req.cfg, + agentDir: req.agentDir, + }); + if (!auth.apiKey) { + throw new Error("OpenAI API key missing"); + } + + const response = await fetch(`${resolveOpenAIBaseUrl(req.cfg)}/images/generations`, { + method: "POST", + headers: { + Authorization: `Bearer ${auth.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: req.model || DEFAULT_OPENAI_IMAGE_MODEL, + prompt: req.prompt, + n: req.count ?? 1, + size: req.size ?? DEFAULT_SIZE, + response_format: "b64_json", + }), + }); + + if (!response.ok) { + const text = await response.text().catch(() => ""); + throw new Error( + `OpenAI image generation failed (${response.status}): ${text || response.statusText}`, + ); + } + + const data = (await response.json()) as OpenAIImageApiResponse; + const images = (data.data ?? []) + .map((entry, index) => { + if (!entry.b64_json) { + return null; + } + return { + buffer: Buffer.from(entry.b64_json, "base64"), + mimeType: DEFAULT_OUTPUT_MIME, + fileName: `image-${index + 1}.png`, + ...(entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {}), + }; + }) + .filter((entry): entry is NonNullable => entry !== null); + + return { + images, + model: req.model || DEFAULT_OPENAI_IMAGE_MODEL, + }; + }, + }; +} diff --git a/src/image-generation/runtime.test.ts b/src/image-generation/runtime.test.ts new file mode 100644 index 00000000000..4ef478b3349 --- /dev/null +++ b/src/image-generation/runtime.test.ts @@ -0,0 +1,81 @@ +import { afterEach, describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/config.js"; +import { createEmptyPluginRegistry } from "../plugins/registry.js"; +import { setActivePluginRegistry } from "../plugins/runtime.js"; +import { generateImage, listRuntimeImageGenerationProviders } from "./runtime.js"; + +describe("image-generation runtime helpers", () => { + afterEach(() => { + setActivePluginRegistry(createEmptyPluginRegistry()); + }); + + it("generates images through the active image-generation registry", async () => { + const pluginRegistry = createEmptyPluginRegistry(); + pluginRegistry.imageGenerationProviders.push({ + pluginId: "image-plugin", + pluginName: "Image Plugin", + source: "test", + provider: { + id: "image-plugin", + async generateImage() { + return { + images: [ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "sample.png", + }, + ], + model: "img-v1", + }; + }, + }, + }); + setActivePluginRegistry(pluginRegistry); + + const cfg = { + agents: { + defaults: { + imageGenerationModel: { + primary: "image-plugin/img-v1", + }, + }, + }, + } as OpenClawConfig; + + const result = await generateImage({ + cfg, + prompt: "draw a cat", + agentDir: "/tmp/agent", + }); + + expect(result.provider).toBe("image-plugin"); + expect(result.model).toBe("img-v1"); + expect(result.attempts).toEqual([]); + expect(result.images).toEqual([ + { + buffer: Buffer.from("png-bytes"), + mimeType: "image/png", + fileName: "sample.png", + }, + ]); + }); + + it("lists runtime image-generation providers from the active registry", () => { + const pluginRegistry = createEmptyPluginRegistry(); + pluginRegistry.imageGenerationProviders.push({ + pluginId: "image-plugin", + pluginName: "Image Plugin", + source: "test", + provider: { + id: "image-plugin", + generateImage: async () => ({ + images: [{ buffer: Buffer.from("x"), mimeType: "image/png" }], + }), + }, + }); + setActivePluginRegistry(pluginRegistry); + + expect(listRuntimeImageGenerationProviders()).toMatchObject([{ id: "image-plugin" }]); + }); +}); diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts new file mode 100644 index 00000000000..8c9104edd5d --- /dev/null +++ b/src/image-generation/runtime.ts @@ -0,0 +1,162 @@ +import { describeFailoverError, isFailoverError } from "../agents/failover-error.js"; +import type { FallbackAttempt } from "../agents/model-fallback.types.js"; +import type { OpenClawConfig } from "../config/config.js"; +import { + resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue, +} from "../config/model-input.js"; +import { createSubsystemLogger } from "../logging/subsystem.js"; +import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js"; +import type { GeneratedImageAsset, ImageGenerationResult } from "./types.js"; + +const log = createSubsystemLogger("image-generation"); + +export type GenerateImageParams = { + cfg: OpenClawConfig; + prompt: string; + agentDir?: string; + modelOverride?: string; + count?: number; + size?: string; +}; + +export type GenerateImageRuntimeResult = { + images: GeneratedImageAsset[]; + provider: string; + model: string; + attempts: FallbackAttempt[]; + metadata?: Record; +}; + +function parseModelRef(raw: string | undefined): { provider: string; model: string } | null { + const trimmed = raw?.trim(); + if (!trimmed) { + return null; + } + const slashIndex = trimmed.indexOf("/"); + if (slashIndex <= 0 || slashIndex === trimmed.length - 1) { + return null; + } + return { + provider: trimmed.slice(0, slashIndex).trim(), + model: trimmed.slice(slashIndex + 1).trim(), + }; +} + +function resolveImageGenerationCandidates(params: { + cfg: OpenClawConfig; + modelOverride?: string; +}): Array<{ provider: string; model: string }> { + const candidates: Array<{ provider: string; model: string }> = []; + const seen = new Set(); + const add = (raw: string | undefined) => { + const parsed = parseModelRef(raw); + if (!parsed) { + return; + } + const key = `${parsed.provider}/${parsed.model}`; + if (seen.has(key)) { + return; + } + seen.add(key); + candidates.push(parsed); + }; + + add(params.modelOverride); + add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel)); + for (const fallback of resolveAgentModelFallbackValues( + params.cfg.agents?.defaults?.imageGenerationModel, + )) { + add(fallback); + } + return candidates; +} + +function throwImageGenerationFailure(params: { + attempts: FallbackAttempt[]; + lastError: unknown; +}): never { + if (params.attempts.length <= 1 && params.lastError) { + throw params.lastError; + } + const summary = + params.attempts.length > 0 + ? params.attempts + .map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`) + .join(" | ") + : "unknown"; + throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, { + cause: params.lastError instanceof Error ? params.lastError : undefined, + }); +} + +export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) { + return listImageGenerationProviders(params?.config); +} + +export async function generateImage( + params: GenerateImageParams, +): Promise { + const candidates = resolveImageGenerationCandidates({ + cfg: params.cfg, + modelOverride: params.modelOverride, + }); + if (candidates.length === 0) { + throw new Error( + "No image-generation model configured. Set agents.defaults.imageGenerationModel.primary or agents.defaults.imageGenerationModel.fallbacks.", + ); + } + + const attempts: FallbackAttempt[] = []; + let lastError: unknown; + + for (const candidate of candidates) { + const provider = getImageGenerationProvider(candidate.provider, params.cfg); + if (!provider) { + const error = `No image-generation provider registered for ${candidate.provider}`; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error, + }); + lastError = new Error(error); + continue; + } + + try { + const result: ImageGenerationResult = await provider.generateImage({ + provider: candidate.provider, + model: candidate.model, + prompt: params.prompt, + cfg: params.cfg, + agentDir: params.agentDir, + count: params.count, + size: params.size, + }); + if (!Array.isArray(result.images) || result.images.length === 0) { + throw new Error("Image generation provider returned no images."); + } + return { + images: result.images, + provider: candidate.provider, + model: result.model ?? candidate.model, + attempts, + metadata: result.metadata, + }; + } catch (err) { + lastError = err; + const described = isFailoverError(err) ? describeFailoverError(err) : undefined; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: described?.message ?? (err instanceof Error ? err.message : String(err)), + reason: described?.reason, + status: described?.status, + code: described?.code, + }); + log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`); + } + } + + throwImageGenerationFailure({ attempts, lastError }); +} diff --git a/src/image-generation/types.ts b/src/image-generation/types.ts new file mode 100644 index 00000000000..ff33d6079ee --- /dev/null +++ b/src/image-generation/types.ts @@ -0,0 +1,33 @@ +import type { OpenClawConfig } from "../config/config.js"; + +export type GeneratedImageAsset = { + buffer: Buffer; + mimeType: string; + fileName?: string; + revisedPrompt?: string; + metadata?: Record; +}; + +export type ImageGenerationRequest = { + provider: string; + model: string; + prompt: string; + cfg: OpenClawConfig; + agentDir?: string; + count?: number; + size?: string; +}; + +export type ImageGenerationResult = { + images: GeneratedImageAsset[]; + model?: string; + metadata?: Record; +}; + +export type ImageGenerationProvider = { + id: string; + aliases?: string[]; + label?: string; + supportedSizes?: string[]; + generateImage: (req: ImageGenerationRequest) => Promise; +}; diff --git a/src/media-understanding/runtime.ts b/src/media-understanding/runtime.ts index 043baf81f91..74f125135dd 100644 --- a/src/media-understanding/runtime.ts +++ b/src/media-understanding/runtime.ts @@ -1,6 +1,8 @@ +import fs from "node:fs/promises"; import path from "node:path"; import type { MsgContext } from "../auto-reply/templating.js"; import type { OpenClawConfig } from "../config/config.js"; +import { getMediaUnderstandingProvider } from "./providers/index.js"; import { buildProviderRegistry, createMediaAttachmentCache, @@ -90,6 +92,38 @@ export async function describeImageFile(params: { return await runMediaUnderstandingFile({ ...params, capability: "image" }); } +export async function describeImageFileWithModel(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + provider: string; + model: string; + prompt: string; + maxTokens?: number; + timeoutMs?: number; +}) { + const timeoutMs = params.timeoutMs ?? 30_000; + const providerRegistry = buildProviderRegistry(undefined, params.cfg); + const provider = getMediaUnderstandingProvider(params.provider, providerRegistry); + if (!provider?.describeImage) { + throw new Error(`Provider does not support image analysis: ${params.provider}`); + } + const buffer = await fs.readFile(params.filePath); + return await provider.describeImage({ + buffer, + fileName: path.basename(params.filePath), + mime: params.mime, + provider: params.provider, + model: params.model, + prompt: params.prompt, + maxTokens: params.maxTokens, + timeoutMs, + cfg: params.cfg, + agentDir: params.agentDir ?? "", + }); +} + export async function describeVideoFile(params: { filePath: string; cfg: OpenClawConfig; diff --git a/src/plugin-sdk/image-generation-runtime.ts b/src/plugin-sdk/image-generation-runtime.ts new file mode 100644 index 00000000000..54f91d0d558 --- /dev/null +++ b/src/plugin-sdk/image-generation-runtime.ts @@ -0,0 +1,3 @@ +// Public runtime-facing image-generation helpers for feature/channel plugins. + +export { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js"; diff --git a/src/plugin-sdk/image-generation.ts b/src/plugin-sdk/image-generation.ts new file mode 100644 index 00000000000..9ca98074743 --- /dev/null +++ b/src/plugin-sdk/image-generation.ts @@ -0,0 +1,10 @@ +// Public image-generation helpers and types for provider plugins. + +export type { + GeneratedImageAsset, + ImageGenerationProvider, + ImageGenerationRequest, + ImageGenerationResult, +} from "../image-generation/types.js"; + +export { buildOpenAIImageGenerationProvider } from "../image-generation/providers/openai.js"; diff --git a/src/plugin-sdk/index.ts b/src/plugin-sdk/index.ts index 20af3448e8f..1f9198d4e7f 100644 --- a/src/plugin-sdk/index.ts +++ b/src/plugin-sdk/index.ts @@ -40,6 +40,7 @@ export type { export type { OpenClawConfig } from "../config/config.js"; /** @deprecated Use OpenClawConfig instead */ export type { OpenClawConfig as ClawdbotConfig } from "../config/config.js"; +export * from "./image-generation.js"; export type { SecretInput, SecretRef } from "../config/types.secrets.js"; export type { RuntimeEnv } from "../runtime.js"; export type { HookEntry } from "../hooks/types.js"; diff --git a/src/plugin-sdk/media-understanding-runtime.ts b/src/plugin-sdk/media-understanding-runtime.ts new file mode 100644 index 00000000000..5a4c6cdff65 --- /dev/null +++ b/src/plugin-sdk/media-understanding-runtime.ts @@ -0,0 +1,9 @@ +// Public runtime-facing media-understanding helpers for feature/channel plugins. + +export { + describeImageFile, + describeImageFileWithModel, + describeVideoFile, + runMediaUnderstandingFile, + transcribeAudioFile, +} from "../media-understanding/runtime.js"; diff --git a/src/plugin-sdk/speech-runtime.ts b/src/plugin-sdk/speech-runtime.ts new file mode 100644 index 00000000000..afe192c4f53 --- /dev/null +++ b/src/plugin-sdk/speech-runtime.ts @@ -0,0 +1,3 @@ +// Public runtime-facing speech helpers for feature/channel plugins. + +export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../tts/runtime.js"; diff --git a/src/plugins/captured-registration.ts b/src/plugins/captured-registration.ts index dd5ba78a9c4..fd2c359b463 100644 --- a/src/plugins/captured-registration.ts +++ b/src/plugins/captured-registration.ts @@ -1,5 +1,6 @@ import type { AnyAgentTool, + ImageGenerationProviderPlugin, MediaUnderstandingProviderPlugin, OpenClawPluginApi, ProviderPlugin, @@ -12,6 +13,7 @@ export type CapturedPluginRegistration = { providers: ProviderPlugin[]; speechProviders: SpeechProviderPlugin[]; mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[]; + imageGenerationProviders: ImageGenerationProviderPlugin[]; webSearchProviders: WebSearchProviderPlugin[]; tools: AnyAgentTool[]; }; @@ -20,6 +22,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration { const providers: ProviderPlugin[] = []; const speechProviders: SpeechProviderPlugin[] = []; const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = []; + const imageGenerationProviders: ImageGenerationProviderPlugin[] = []; const webSearchProviders: WebSearchProviderPlugin[] = []; const tools: AnyAgentTool[] = []; @@ -27,6 +30,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration { providers, speechProviders, mediaUnderstandingProviders, + imageGenerationProviders, webSearchProviders, tools, api: { @@ -39,6 +43,9 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration { registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) { mediaUnderstandingProviders.push(provider); }, + registerImageGenerationProvider(provider: ImageGenerationProviderPlugin) { + imageGenerationProviders.push(provider); + }, registerWebSearchProvider(provider: WebSearchProviderPlugin) { webSearchProviders.push(provider); }, diff --git a/src/plugins/contracts/registry.contract.test.ts b/src/plugins/contracts/registry.contract.test.ts index f7b89c2296e..762612cc45a 100644 --- a/src/plugins/contracts/registry.contract.test.ts +++ b/src/plugins/contracts/registry.contract.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { loadPluginManifestRegistry } from "../manifest-registry.js"; import { resolvePluginWebSearchProviders } from "../web-search-providers.js"; import { + imageGenerationProviderContractRegistry, mediaUnderstandingProviderContractRegistry, pluginRegistrationContractRegistry, providerContractPluginIds, @@ -56,6 +57,23 @@ function findMediaUnderstandingProviderForPlugin(pluginId: string) { return entry.provider; } +function findImageGenerationProviderIdsForPlugin(pluginId: string) { + return imageGenerationProviderContractRegistry + .filter((entry) => entry.pluginId === pluginId) + .map((entry) => entry.provider.id) + .toSorted((left, right) => left.localeCompare(right)); +} + +function findImageGenerationProviderForPlugin(pluginId: string) { + const entry = imageGenerationProviderContractRegistry.find( + (candidate) => candidate.pluginId === pluginId, + ); + if (!entry) { + throw new Error(`image-generation provider contract missing for ${pluginId}`); + } + return entry.provider; +} + function findRegistrationForPlugin(pluginId: string) { const entry = pluginRegistrationContractRegistry.find( (candidate) => candidate.pluginId === pluginId, @@ -108,6 +126,10 @@ describe("plugin contract registry", () => { ).toEqual(bundledWebSearchPluginIds); }); + it("does not duplicate bundled image-generation provider ids", () => { + const ids = imageGenerationProviderContractRegistry.map((entry) => entry.provider.id); + expect(ids).toEqual([...new Set(ids)]); + }); it("keeps multi-provider plugin ownership explicit", () => { expect(findProviderIdsForPlugin("google")).toEqual(["google", "google-gemini-cli"]); expect(findProviderIdsForPlugin("minimax")).toEqual(["minimax", "minimax-portal"]); @@ -142,11 +164,16 @@ describe("plugin contract registry", () => { expect(findMediaUnderstandingProviderIdsForPlugin("zai")).toEqual(["zai"]); }); + it("keeps bundled image-generation ownership explicit", () => { + expect(findImageGenerationProviderIdsForPlugin("openai")).toEqual(["openai"]); + }); + it("keeps bundled provider and web search tool ownership explicit", () => { expect(findRegistrationForPlugin("firecrawl")).toMatchObject({ providerIds: [], speechProviderIds: [], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], webSearchProviderIds: ["firecrawl"], toolNames: ["firecrawl_search", "firecrawl_scrape"], }); @@ -157,16 +184,19 @@ describe("plugin contract registry", () => { providerIds: ["openai", "openai-codex"], speechProviderIds: ["openai"], mediaUnderstandingProviderIds: ["openai"], + imageGenerationProviderIds: ["openai"], }); expect(findRegistrationForPlugin("elevenlabs")).toMatchObject({ providerIds: [], speechProviderIds: ["elevenlabs"], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], }); expect(findRegistrationForPlugin("microsoft")).toMatchObject({ providerIds: [], speechProviderIds: ["microsoft"], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], }); }); @@ -213,4 +243,10 @@ describe("plugin contract registry", () => { expect.any(Function), ); }); + + it("keeps bundled image-generation support explicit", () => { + expect(findImageGenerationProviderForPlugin("openai").generateImage).toEqual( + expect.any(Function), + ); + }); }); diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts index 8ab7422c1e2..a4d2f815d7b 100644 --- a/src/plugins/contracts/registry.ts +++ b/src/plugins/contracts/registry.ts @@ -37,6 +37,7 @@ import xiaomiPlugin from "../../../extensions/xiaomi/index.js"; import zaiPlugin from "../../../extensions/zai/index.js"; import { createCapturedPluginRegistration } from "../captured-registration.js"; import type { + ImageGenerationProviderPlugin, MediaUnderstandingProviderPlugin, ProviderPlugin, SpeechProviderPlugin, @@ -62,12 +63,14 @@ type WebSearchProviderContractEntry = CapabilityContractEntry; type MediaUnderstandingProviderContractEntry = CapabilityContractEntry; +type ImageGenerationProviderContractEntry = CapabilityContractEntry; type PluginRegistrationContractEntry = { pluginId: string; providerIds: string[]; speechProviderIds: string[]; mediaUnderstandingProviderIds: string[]; + imageGenerationProviderIds: string[]; webSearchProviderIds: string[]; toolNames: string[]; }; @@ -128,6 +131,8 @@ const bundledMediaUnderstandingPlugins: RegistrablePlugin[] = [ zaiPlugin, ]; +const bundledImageGenerationPlugins: RegistrablePlugin[] = [openAIPlugin]; + function captureRegistrations(plugin: RegistrablePlugin) { const captured = createCapturedPluginRegistration(); plugin.register(captured.api); @@ -207,12 +212,19 @@ export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProvi select: (captured) => captured.mediaUnderstandingProviders, }); +export const imageGenerationProviderContractRegistry: ImageGenerationProviderContractEntry[] = + buildCapabilityContractRegistry({ + plugins: bundledImageGenerationPlugins, + select: (captured) => captured.imageGenerationProviders, + }); + const bundledPluginRegistrationList = [ ...new Map( [ ...bundledProviderPlugins, ...bundledSpeechPlugins, ...bundledMediaUnderstandingPlugins, + ...bundledImageGenerationPlugins, ...bundledWebSearchPlugins, ].map((plugin) => [plugin.id, plugin]), ).values(), @@ -228,6 +240,7 @@ export const pluginRegistrationContractRegistry: PluginRegistrationContractEntry mediaUnderstandingProviderIds: captured.mediaUnderstandingProviders.map( (provider) => provider.id, ), + imageGenerationProviderIds: captured.imageGenerationProviders.map((provider) => provider.id), webSearchProviderIds: captured.webSearchProviders.map((provider) => provider.id), toolNames: captured.tools.map((tool) => tool.name), }; diff --git a/src/plugins/hooks.test-helpers.ts b/src/plugins/hooks.test-helpers.ts index ea01163d4b0..559f70a1dc7 100644 --- a/src/plugins/hooks.test-helpers.ts +++ b/src/plugins/hooks.test-helpers.ts @@ -19,6 +19,7 @@ export function createMockPluginRegistry( providerIds: [], speechProviderIds: [], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], @@ -43,6 +44,7 @@ export function createMockPluginRegistry( providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], httpRoutes: [], gatewayHandlers: {}, diff --git a/src/plugins/loader.ts b/src/plugins/loader.ts index 86273793006..8d064d477c3 100644 --- a/src/plugins/loader.ts +++ b/src/plugins/loader.ts @@ -497,6 +497,7 @@ function createPluginRecord(params: { providerIds: [], speechProviderIds: [], mediaUnderstandingProviderIds: [], + imageGenerationProviderIds: [], webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], diff --git a/src/plugins/registry.ts b/src/plugins/registry.ts index c81c2253e0a..ca4e40ee54c 100644 --- a/src/plugins/registry.ts +++ b/src/plugins/registry.ts @@ -22,6 +22,7 @@ import { stripPromptMutationFieldsFromLegacyHookResult, } from "./types.js"; import type { + ImageGenerationProviderPlugin, OpenClawPluginApi, OpenClawPluginChannelRegistration, OpenClawPluginCliRegistrar, @@ -116,6 +117,8 @@ export type PluginSpeechProviderRegistration = PluginOwnedProviderRegistration; export type PluginMediaUnderstandingProviderRegistration = PluginOwnedProviderRegistration; +export type PluginImageGenerationProviderRegistration = + PluginOwnedProviderRegistration; export type PluginWebSearchProviderRegistration = PluginOwnedProviderRegistration; @@ -165,6 +168,7 @@ export type PluginRecord = { providerIds: string[]; speechProviderIds: string[]; mediaUnderstandingProviderIds: string[]; + imageGenerationProviderIds: string[]; webSearchProviderIds: string[]; gatewayMethods: string[]; cliCommands: string[]; @@ -187,6 +191,7 @@ export type PluginRegistry = { providers: PluginProviderRegistration[]; speechProviders: PluginSpeechProviderRegistration[]; mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[]; + imageGenerationProviders: PluginImageGenerationProviderRegistration[]; webSearchProviders: PluginWebSearchProviderRegistration[]; gatewayHandlers: GatewayRequestHandlers; httpRoutes: PluginHttpRouteRegistration[]; @@ -234,6 +239,7 @@ export function createEmptyPluginRegistry(): PluginRegistry { providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], @@ -631,6 +637,19 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { }); }; + const registerImageGenerationProvider = ( + record: PluginRecord, + provider: ImageGenerationProviderPlugin, + ) => { + registerUniqueProviderLike({ + record, + provider, + kindLabel: "image-generation provider", + registrations: registry.imageGenerationProviders, + ownedIds: record.imageGenerationProviderIds, + }); + }; + const registerWebSearchProvider = (record: PluginRecord, provider: WebSearchProviderPlugin) => { registerUniqueProviderLike({ record, @@ -857,6 +876,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { registrationMode === "full" ? (provider) => registerMediaUnderstandingProvider(record, provider) : () => {}, + registerImageGenerationProvider: + registrationMode === "full" + ? (provider) => registerImageGenerationProvider(record, provider) + : () => {}, registerWebSearchProvider: registrationMode === "full" ? (provider) => registerWebSearchProvider(record, provider) @@ -932,6 +955,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { registerProvider, registerSpeechProvider, registerMediaUnderstandingProvider, + registerImageGenerationProvider, registerWebSearchProvider, registerGatewayMethod, registerCli, diff --git a/src/plugins/runtime/index.test.ts b/src/plugins/runtime/index.test.ts index 2022ac07d37..5ffbd60aa2e 100644 --- a/src/plugins/runtime/index.test.ts +++ b/src/plugins/runtime/index.test.ts @@ -59,10 +59,17 @@ describe("plugin runtime command execution", () => { const runtime = createPluginRuntime(); expect(typeof runtime.mediaUnderstanding.runFile).toBe("function"); expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function"); + expect(typeof runtime.mediaUnderstanding.describeImageFileWithModel).toBe("function"); expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function"); expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile); }); + it("exposes runtime.imageGeneration helpers", () => { + const runtime = createPluginRuntime(); + expect(typeof runtime.imageGeneration.generate).toBe("function"); + expect(typeof runtime.imageGeneration.listProviders).toBe("function"); + }); + it("exposes runtime.webSearch helpers", () => { const runtime = createPluginRuntime(); expect(typeof runtime.webSearch.listProviders).toBe("function"); diff --git a/src/plugins/runtime/index.ts b/src/plugins/runtime/index.ts index cd76a21916b..3f5b80d1caa 100644 --- a/src/plugins/runtime/index.ts +++ b/src/plugins/runtime/index.ts @@ -4,13 +4,18 @@ import { resolveApiKeyForProvider as resolveApiKeyForProviderRaw, } from "../../agents/model-auth.js"; import { resolveStateDir } from "../../config/paths.js"; +import { + generateImage, + listRuntimeImageGenerationProviders, +} from "../../image-generation/runtime.js"; import { describeImageFile, + describeImageFileWithModel, describeVideoFile, runMediaUnderstandingFile, transcribeAudioFile, } from "../../media-understanding/runtime.js"; -import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/tts.js"; +import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/runtime.js"; import { listWebSearchProviders, runWebSearch } from "../../web-search/runtime.js"; import { createRuntimeAgent } from "./runtime-agent.js"; import { createRuntimeChannel } from "./runtime-channel.js"; @@ -145,9 +150,14 @@ export function createPluginRuntime(_options: CreatePluginRuntimeOptions = {}): mediaUnderstanding: { runFile: runMediaUnderstandingFile, describeImageFile, + describeImageFileWithModel, describeVideoFile, transcribeAudioFile, }, + imageGeneration: { + generate: generateImage, + listProviders: listRuntimeImageGenerationProviders, + }, webSearch: { listProviders: listWebSearchProviders, search: runWebSearch, diff --git a/src/plugins/runtime/types-core.ts b/src/plugins/runtime/types-core.ts index 528c488d987..e5951a1ce57 100644 --- a/src/plugins/runtime/types-core.ts +++ b/src/plugins/runtime/types-core.ts @@ -47,16 +47,21 @@ export type PluginRuntimeCore = { resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg; }; tts: { - textToSpeech: typeof import("../../tts/tts.js").textToSpeech; - textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony; - listVoices: typeof import("../../tts/tts.js").listSpeechVoices; + textToSpeech: typeof import("../../tts/runtime.js").textToSpeech; + textToSpeechTelephony: typeof import("../../tts/runtime.js").textToSpeechTelephony; + listVoices: typeof import("../../tts/runtime.js").listSpeechVoices; }; mediaUnderstanding: { runFile: typeof import("../../media-understanding/runtime.js").runMediaUnderstandingFile; describeImageFile: typeof import("../../media-understanding/runtime.js").describeImageFile; + describeImageFileWithModel: typeof import("../../media-understanding/runtime.js").describeImageFileWithModel; describeVideoFile: typeof import("../../media-understanding/runtime.js").describeVideoFile; transcribeAudioFile: typeof import("../../media-understanding/runtime.js").transcribeAudioFile; }; + imageGeneration: { + generate: typeof import("../../image-generation/runtime.js").generateImage; + listProviders: typeof import("../../image-generation/runtime.js").listRuntimeImageGenerationProviders; + }; webSearch: { listProviders: typeof import("../../web-search/runtime.js").listWebSearchProviders; search: typeof import("../../web-search/runtime.js").runWebSearch; diff --git a/src/plugins/types.ts b/src/plugins/types.ts index 52cb2787977..6deb59669f1 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -22,6 +22,7 @@ import type { ModelProviderConfig } from "../config/types.js"; import type { GatewayRequestHandler } from "../gateway/server-methods/types.js"; import type { InternalHookHandler } from "../hooks/internal-hooks.js"; import type { HookEntry } from "../hooks/types.js"; +import type { ImageGenerationProvider } from "../image-generation/types.js"; import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js"; import type { MediaUnderstandingProvider } from "../media-understanding/types.js"; import type { RuntimeEnv } from "../runtime.js"; @@ -890,6 +891,7 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & { }; export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider; +export type ImageGenerationProviderPlugin = ImageGenerationProvider; export type OpenClawPluginGatewayMethod = { method: string; @@ -1251,6 +1253,7 @@ export type OpenClawPluginApi = { registerProvider: (provider: ProviderPlugin) => void; registerSpeechProvider: (provider: SpeechProviderPlugin) => void; registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void; + registerImageGenerationProvider: (provider: ImageGenerationProviderPlugin) => void; registerWebSearchProvider: (provider: WebSearchProviderPlugin) => void; registerInteractiveHandler: (registration: PluginInteractiveHandlerRegistration) => void; /** diff --git a/src/test-utils/channel-plugins.ts b/src/test-utils/channel-plugins.ts index 1283ac9f506..6ecf718f895 100644 --- a/src/test-utils/channel-plugins.ts +++ b/src/test-utils/channel-plugins.ts @@ -28,6 +28,7 @@ export const createTestRegistry = (channels: TestChannelRegistration[] = []): Pl providers: [], speechProviders: [], mediaUnderstandingProviders: [], + imageGenerationProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/tts/runtime.ts b/src/tts/runtime.ts new file mode 100644 index 00000000000..2235a1124e0 --- /dev/null +++ b/src/tts/runtime.ts @@ -0,0 +1,4 @@ +// Shared runtime-facing speech helpers. Keep channel/feature plugins on this +// boundary instead of importing the full TTS orchestrator module directly. + +export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "./tts.js";