feat(plugins): add image generation capability

This commit is contained in:
Peter Steinberger
2026-03-16 22:56:14 -07:00
parent c79ade10e6
commit aa2d5aaa0c
38 changed files with 701 additions and 4 deletions

View File

@@ -46,6 +46,7 @@ function fakeApi(overrides: Partial<OpenClawPluginApi> = {}): OpenClawPluginApi
registerProvider() {},
registerSpeechProvider() {},
registerMediaUnderstandingProvider() {},
registerImageGenerationProvider() {},
registerWebSearchProvider() {},
registerInteractiveHandler() {},
registerHook() {},

View File

@@ -1,4 +1,5 @@
import { emptyPluginConfigSchema, type OpenClawPluginApi } from "openclaw/plugin-sdk/core";
import { buildOpenAIImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
import { buildOpenAISpeechProvider } from "openclaw/plugin-sdk/speech";
import { openaiMediaUnderstandingProvider } from "./media-understanding-provider.js";
import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js";
@@ -14,6 +15,7 @@ const openAIPlugin = {
api.registerProvider(buildOpenAICodexProviderPlugin());
api.registerSpeechProvider(buildOpenAISpeechProvider());
api.registerMediaUnderstandingProvider(openaiMediaUnderstandingProvider);
api.registerImageGenerationProvider(buildOpenAIImageGenerationProvider());
},
};

View File

@@ -17,6 +17,7 @@ export function createTestPluginApi(api: TestPluginApiInput): OpenClawPluginApi
registerProvider() {},
registerSpeechProvider() {},
registerMediaUnderstandingProvider() {},
registerImageGenerationProvider() {},
registerWebSearchProvider() {},
registerInteractiveHandler() {},
registerCommand() {},

View File

@@ -110,11 +110,17 @@ export function createPluginRuntimeMock(overrides: DeepPartial<PluginRuntime> =
runFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["runFile"],
describeImageFile:
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFile"],
describeImageFileWithModel:
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFileWithModel"],
describeVideoFile:
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeVideoFile"],
transcribeAudioFile:
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["transcribeAudioFile"],
},
imageGeneration: {
generate: vi.fn() as unknown as PluginRuntime["imageGeneration"]["generate"],
listProviders: vi.fn() as unknown as PluginRuntime["imageGeneration"]["listProviders"],
},
webSearch: {
listProviders: vi.fn() as unknown as PluginRuntime["webSearch"]["listProviders"],
search: vi.fn() as unknown as PluginRuntime["webSearch"]["search"],

View File

@@ -102,6 +102,10 @@
"types": "./dist/plugin-sdk/media-runtime.d.ts",
"default": "./dist/plugin-sdk/media-runtime.js"
},
"./plugin-sdk/media-understanding-runtime": {
"types": "./dist/plugin-sdk/media-understanding-runtime.d.ts",
"default": "./dist/plugin-sdk/media-understanding-runtime.js"
},
"./plugin-sdk/conversation-runtime": {
"types": "./dist/plugin-sdk/conversation-runtime.d.ts",
"default": "./dist/plugin-sdk/conversation-runtime.js"
@@ -114,6 +118,10 @@
"types": "./dist/plugin-sdk/agent-runtime.d.ts",
"default": "./dist/plugin-sdk/agent-runtime.js"
},
"./plugin-sdk/speech-runtime": {
"types": "./dist/plugin-sdk/speech-runtime.d.ts",
"default": "./dist/plugin-sdk/speech-runtime.js"
},
"./plugin-sdk/plugin-runtime": {
"types": "./dist/plugin-sdk/plugin-runtime.d.ts",
"default": "./dist/plugin-sdk/plugin-runtime.js"
@@ -378,6 +386,14 @@
"types": "./dist/plugin-sdk/provider-web-search.d.ts",
"default": "./dist/plugin-sdk/provider-web-search.js"
},
"./plugin-sdk/image-generation": {
"types": "./dist/plugin-sdk/image-generation.d.ts",
"default": "./dist/plugin-sdk/image-generation.js"
},
"./plugin-sdk/image-generation-runtime": {
"types": "./dist/plugin-sdk/image-generation-runtime.d.ts",
"default": "./dist/plugin-sdk/image-generation-runtime.js"
},
"./plugin-sdk/reply-history": {
"types": "./dist/plugin-sdk/reply-history.d.ts",
"default": "./dist/plugin-sdk/reply-history.js"

View File

@@ -15,9 +15,11 @@
"channel-runtime",
"infra-runtime",
"media-runtime",
"media-understanding-runtime",
"conversation-runtime",
"text-runtime",
"agent-runtime",
"speech-runtime",
"plugin-runtime",
"security-runtime",
"gateway-runtime",
@@ -84,6 +86,8 @@
"provider-stream",
"provider-usage",
"provider-web-search",
"image-generation",
"image-generation-runtime",
"reply-history",
"media-understanding",
"google",

View File

@@ -93,6 +93,7 @@ const createRegistry = (channels: PluginRegistry["channels"]): PluginRegistry =>
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
gatewayHandlers: {},
httpRoutes: [],

View File

@@ -339,6 +339,7 @@ describe("ensureChannelSetupPluginInstalled", () => {
providerIds: [],
speechProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webSearchProviderIds: [],
gatewayMethods: [],
cliCommands: [],

View File

@@ -1019,6 +1019,10 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.imageModel.primary":
"Optional image model (provider/model) used when the primary model lacks image input.",
"agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).",
"agents.defaults.imageGenerationModel.primary":
"Optional image-generation model (provider/model) used by the shared image generation capability.",
"agents.defaults.imageGenerationModel.fallbacks":
"Ordered fallback image-generation models (provider/model).",
"agents.defaults.pdfModel.primary":
"Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",
"agents.defaults.pdfModel.fallbacks": "Ordered fallback PDF models (provider/model).",

View File

@@ -454,6 +454,8 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.model.fallbacks": "Model Fallbacks",
"agents.defaults.imageModel.primary": "Image Model",
"agents.defaults.imageModel.fallbacks": "Image Model Fallbacks",
"agents.defaults.imageGenerationModel.primary": "Image Generation Model",
"agents.defaults.imageGenerationModel.fallbacks": "Image Generation Model Fallbacks",
"agents.defaults.pdfModel.primary": "PDF Model",
"agents.defaults.pdfModel.fallbacks": "PDF Model Fallbacks",
"agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)",

View File

@@ -122,6 +122,8 @@ export type AgentDefaultsConfig = {
model?: AgentModelConfig;
/** Optional image-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
imageModel?: AgentModelConfig;
/** Optional image-generation model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
imageGenerationModel?: AgentModelConfig;
/** Optional PDF-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
pdfModel?: AgentModelConfig;
/** Maximum PDF file size in megabytes (default: 10). */

View File

@@ -18,6 +18,7 @@ export const AgentDefaultsSchema = z
.object({
model: AgentModelSchema.optional(),
imageModel: AgentModelSchema.optional(),
imageGenerationModel: AgentModelSchema.optional(),
pdfModel: AgentModelSchema.optional(),
pdfMaxBytesMb: z.number().positive().optional(),
pdfMaxPages: z.number().int().positive().optional(),

View File

@@ -31,6 +31,7 @@ const createRegistry = (diagnostics: PluginDiagnostic[]): PluginRegistry => ({
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
gatewayHandlers: {},
httpRoutes: [],

View File

@@ -148,6 +148,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
gatewayHandlers: {},
httpRoutes: [],

View File

@@ -0,0 +1,71 @@
import { normalizeProviderId } from "../agents/model-selection.js";
import type { OpenClawConfig } from "../config/config.js";
import { loadOpenClawPlugins } from "../plugins/loader.js";
import { getActivePluginRegistry } from "../plugins/runtime.js";
import type { ImageGenerationProviderPlugin } from "../plugins/types.js";
const BUILTIN_IMAGE_GENERATION_PROVIDERS: readonly ImageGenerationProviderPlugin[] = [];
function normalizeImageGenerationProviderId(id: string | undefined): string | undefined {
const normalized = normalizeProviderId(id ?? "");
return normalized || undefined;
}
function resolvePluginImageGenerationProviders(
cfg?: OpenClawConfig,
): ImageGenerationProviderPlugin[] {
const active = getActivePluginRegistry();
const registry =
(active?.imageGenerationProviders?.length ?? 0) > 0 || !cfg
? active
: loadOpenClawPlugins({ config: cfg });
return registry?.imageGenerationProviders?.map((entry) => entry.provider) ?? [];
}
function buildProviderMaps(cfg?: OpenClawConfig): {
canonical: Map<string, ImageGenerationProviderPlugin>;
aliases: Map<string, ImageGenerationProviderPlugin>;
} {
const canonical = new Map<string, ImageGenerationProviderPlugin>();
const aliases = new Map<string, ImageGenerationProviderPlugin>();
const register = (provider: ImageGenerationProviderPlugin) => {
const id = normalizeImageGenerationProviderId(provider.id);
if (!id) {
return;
}
canonical.set(id, provider);
aliases.set(id, provider);
for (const alias of provider.aliases ?? []) {
const normalizedAlias = normalizeImageGenerationProviderId(alias);
if (normalizedAlias) {
aliases.set(normalizedAlias, provider);
}
}
};
for (const provider of BUILTIN_IMAGE_GENERATION_PROVIDERS) {
register(provider);
}
for (const provider of resolvePluginImageGenerationProviders(cfg)) {
register(provider);
}
return { canonical, aliases };
}
export function listImageGenerationProviders(
cfg?: OpenClawConfig,
): ImageGenerationProviderPlugin[] {
return [...buildProviderMaps(cfg).canonical.values()];
}
export function getImageGenerationProvider(
providerId: string | undefined,
cfg?: OpenClawConfig,
): ImageGenerationProviderPlugin | undefined {
const normalized = normalizeImageGenerationProviderId(providerId);
if (!normalized) {
return undefined;
}
return buildProviderMaps(cfg).aliases.get(normalized);
}

View File

@@ -0,0 +1,55 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import * as modelAuth from "../../agents/model-auth.js";
import { buildOpenAIImageGenerationProvider } from "./openai.js";
describe("OpenAI image-generation provider", () => {
afterEach(() => {
vi.restoreAllMocks();
});
it("generates PNG buffers from the OpenAI Images API", async () => {
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
apiKey: "sk-test",
source: "env",
mode: "api-key",
});
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
data: [
{
b64_json: Buffer.from("png-data").toString("base64"),
revised_prompt: "revised",
},
],
}),
});
vi.stubGlobal("fetch", fetchMock);
const provider = buildOpenAIImageGenerationProvider();
const result = await provider.generateImage({
provider: "openai",
model: "gpt-image-1",
prompt: "draw a cat",
cfg: {},
});
expect(fetchMock).toHaveBeenCalledWith(
"https://api.openai.com/v1/images/generations",
expect.objectContaining({
method: "POST",
}),
);
expect(result).toEqual({
images: [
{
buffer: Buffer.from("png-data"),
mimeType: "image/png",
fileName: "image-1.png",
revisedPrompt: "revised",
},
],
model: "gpt-image-1",
});
});
});

View File

@@ -0,0 +1,79 @@
import { resolveApiKeyForProvider } from "../../agents/model-auth.js";
import type { ImageGenerationProviderPlugin } from "../../plugins/types.js";
const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1";
const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1";
const DEFAULT_OUTPUT_MIME = "image/png";
const DEFAULT_SIZE = "1024x1024";
type OpenAIImageApiResponse = {
data?: Array<{
b64_json?: string;
revised_prompt?: string;
}>;
};
function resolveOpenAIBaseUrl(cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"]): string {
const direct = cfg?.models?.providers?.openai?.baseUrl?.trim();
return direct || DEFAULT_OPENAI_IMAGE_BASE_URL;
}
export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlugin {
return {
id: "openai",
label: "OpenAI",
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
async generateImage(req) {
const auth = await resolveApiKeyForProvider({
provider: "openai",
cfg: req.cfg,
agentDir: req.agentDir,
});
if (!auth.apiKey) {
throw new Error("OpenAI API key missing");
}
const response = await fetch(`${resolveOpenAIBaseUrl(req.cfg)}/images/generations`, {
method: "POST",
headers: {
Authorization: `Bearer ${auth.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: req.model || DEFAULT_OPENAI_IMAGE_MODEL,
prompt: req.prompt,
n: req.count ?? 1,
size: req.size ?? DEFAULT_SIZE,
response_format: "b64_json",
}),
});
if (!response.ok) {
const text = await response.text().catch(() => "");
throw new Error(
`OpenAI image generation failed (${response.status}): ${text || response.statusText}`,
);
}
const data = (await response.json()) as OpenAIImageApiResponse;
const images = (data.data ?? [])
.map((entry, index) => {
if (!entry.b64_json) {
return null;
}
return {
buffer: Buffer.from(entry.b64_json, "base64"),
mimeType: DEFAULT_OUTPUT_MIME,
fileName: `image-${index + 1}.png`,
...(entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {}),
};
})
.filter((entry): entry is NonNullable<typeof entry> => entry !== null);
return {
images,
model: req.model || DEFAULT_OPENAI_IMAGE_MODEL,
};
},
};
}

View File

@@ -0,0 +1,81 @@
import { afterEach, describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { createEmptyPluginRegistry } from "../plugins/registry.js";
import { setActivePluginRegistry } from "../plugins/runtime.js";
import { generateImage, listRuntimeImageGenerationProviders } from "./runtime.js";
describe("image-generation runtime helpers", () => {
afterEach(() => {
setActivePluginRegistry(createEmptyPluginRegistry());
});
it("generates images through the active image-generation registry", async () => {
const pluginRegistry = createEmptyPluginRegistry();
pluginRegistry.imageGenerationProviders.push({
pluginId: "image-plugin",
pluginName: "Image Plugin",
source: "test",
provider: {
id: "image-plugin",
async generateImage() {
return {
images: [
{
buffer: Buffer.from("png-bytes"),
mimeType: "image/png",
fileName: "sample.png",
},
],
model: "img-v1",
};
},
},
});
setActivePluginRegistry(pluginRegistry);
const cfg = {
agents: {
defaults: {
imageGenerationModel: {
primary: "image-plugin/img-v1",
},
},
},
} as OpenClawConfig;
const result = await generateImage({
cfg,
prompt: "draw a cat",
agentDir: "/tmp/agent",
});
expect(result.provider).toBe("image-plugin");
expect(result.model).toBe("img-v1");
expect(result.attempts).toEqual([]);
expect(result.images).toEqual([
{
buffer: Buffer.from("png-bytes"),
mimeType: "image/png",
fileName: "sample.png",
},
]);
});
it("lists runtime image-generation providers from the active registry", () => {
const pluginRegistry = createEmptyPluginRegistry();
pluginRegistry.imageGenerationProviders.push({
pluginId: "image-plugin",
pluginName: "Image Plugin",
source: "test",
provider: {
id: "image-plugin",
generateImage: async () => ({
images: [{ buffer: Buffer.from("x"), mimeType: "image/png" }],
}),
},
});
setActivePluginRegistry(pluginRegistry);
expect(listRuntimeImageGenerationProviders()).toMatchObject([{ id: "image-plugin" }]);
});
});

View File

@@ -0,0 +1,162 @@
import { describeFailoverError, isFailoverError } from "../agents/failover-error.js";
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
import type { OpenClawConfig } from "../config/config.js";
import {
resolveAgentModelFallbackValues,
resolveAgentModelPrimaryValue,
} from "../config/model-input.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js";
import type { GeneratedImageAsset, ImageGenerationResult } from "./types.js";
const log = createSubsystemLogger("image-generation");
export type GenerateImageParams = {
cfg: OpenClawConfig;
prompt: string;
agentDir?: string;
modelOverride?: string;
count?: number;
size?: string;
};
export type GenerateImageRuntimeResult = {
images: GeneratedImageAsset[];
provider: string;
model: string;
attempts: FallbackAttempt[];
metadata?: Record<string, unknown>;
};
function parseModelRef(raw: string | undefined): { provider: string; model: string } | null {
const trimmed = raw?.trim();
if (!trimmed) {
return null;
}
const slashIndex = trimmed.indexOf("/");
if (slashIndex <= 0 || slashIndex === trimmed.length - 1) {
return null;
}
return {
provider: trimmed.slice(0, slashIndex).trim(),
model: trimmed.slice(slashIndex + 1).trim(),
};
}
function resolveImageGenerationCandidates(params: {
cfg: OpenClawConfig;
modelOverride?: string;
}): Array<{ provider: string; model: string }> {
const candidates: Array<{ provider: string; model: string }> = [];
const seen = new Set<string>();
const add = (raw: string | undefined) => {
const parsed = parseModelRef(raw);
if (!parsed) {
return;
}
const key = `${parsed.provider}/${parsed.model}`;
if (seen.has(key)) {
return;
}
seen.add(key);
candidates.push(parsed);
};
add(params.modelOverride);
add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel));
for (const fallback of resolveAgentModelFallbackValues(
params.cfg.agents?.defaults?.imageGenerationModel,
)) {
add(fallback);
}
return candidates;
}
function throwImageGenerationFailure(params: {
attempts: FallbackAttempt[];
lastError: unknown;
}): never {
if (params.attempts.length <= 1 && params.lastError) {
throw params.lastError;
}
const summary =
params.attempts.length > 0
? params.attempts
.map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`)
.join(" | ")
: "unknown";
throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, {
cause: params.lastError instanceof Error ? params.lastError : undefined,
});
}
export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) {
return listImageGenerationProviders(params?.config);
}
export async function generateImage(
params: GenerateImageParams,
): Promise<GenerateImageRuntimeResult> {
const candidates = resolveImageGenerationCandidates({
cfg: params.cfg,
modelOverride: params.modelOverride,
});
if (candidates.length === 0) {
throw new Error(
"No image-generation model configured. Set agents.defaults.imageGenerationModel.primary or agents.defaults.imageGenerationModel.fallbacks.",
);
}
const attempts: FallbackAttempt[] = [];
let lastError: unknown;
for (const candidate of candidates) {
const provider = getImageGenerationProvider(candidate.provider, params.cfg);
if (!provider) {
const error = `No image-generation provider registered for ${candidate.provider}`;
attempts.push({
provider: candidate.provider,
model: candidate.model,
error,
});
lastError = new Error(error);
continue;
}
try {
const result: ImageGenerationResult = await provider.generateImage({
provider: candidate.provider,
model: candidate.model,
prompt: params.prompt,
cfg: params.cfg,
agentDir: params.agentDir,
count: params.count,
size: params.size,
});
if (!Array.isArray(result.images) || result.images.length === 0) {
throw new Error("Image generation provider returned no images.");
}
return {
images: result.images,
provider: candidate.provider,
model: result.model ?? candidate.model,
attempts,
metadata: result.metadata,
};
} catch (err) {
lastError = err;
const described = isFailoverError(err) ? describeFailoverError(err) : undefined;
attempts.push({
provider: candidate.provider,
model: candidate.model,
error: described?.message ?? (err instanceof Error ? err.message : String(err)),
reason: described?.reason,
status: described?.status,
code: described?.code,
});
log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`);
}
}
throwImageGenerationFailure({ attempts, lastError });
}

View File

@@ -0,0 +1,33 @@
import type { OpenClawConfig } from "../config/config.js";
export type GeneratedImageAsset = {
buffer: Buffer;
mimeType: string;
fileName?: string;
revisedPrompt?: string;
metadata?: Record<string, unknown>;
};
export type ImageGenerationRequest = {
provider: string;
model: string;
prompt: string;
cfg: OpenClawConfig;
agentDir?: string;
count?: number;
size?: string;
};
export type ImageGenerationResult = {
images: GeneratedImageAsset[];
model?: string;
metadata?: Record<string, unknown>;
};
export type ImageGenerationProvider = {
id: string;
aliases?: string[];
label?: string;
supportedSizes?: string[];
generateImage: (req: ImageGenerationRequest) => Promise<ImageGenerationResult>;
};

View File

@@ -1,6 +1,8 @@
import fs from "node:fs/promises";
import path from "node:path";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import { getMediaUnderstandingProvider } from "./providers/index.js";
import {
buildProviderRegistry,
createMediaAttachmentCache,
@@ -90,6 +92,38 @@ export async function describeImageFile(params: {
return await runMediaUnderstandingFile({ ...params, capability: "image" });
}
export async function describeImageFileWithModel(params: {
filePath: string;
cfg: OpenClawConfig;
agentDir?: string;
mime?: string;
provider: string;
model: string;
prompt: string;
maxTokens?: number;
timeoutMs?: number;
}) {
const timeoutMs = params.timeoutMs ?? 30_000;
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
const provider = getMediaUnderstandingProvider(params.provider, providerRegistry);
if (!provider?.describeImage) {
throw new Error(`Provider does not support image analysis: ${params.provider}`);
}
const buffer = await fs.readFile(params.filePath);
return await provider.describeImage({
buffer,
fileName: path.basename(params.filePath),
mime: params.mime,
provider: params.provider,
model: params.model,
prompt: params.prompt,
maxTokens: params.maxTokens,
timeoutMs,
cfg: params.cfg,
agentDir: params.agentDir ?? "",
});
}
export async function describeVideoFile(params: {
filePath: string;
cfg: OpenClawConfig;

View File

@@ -0,0 +1,3 @@
// Public runtime-facing image-generation helpers for feature/channel plugins.
export { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";

View File

@@ -0,0 +1,10 @@
// Public image-generation helpers and types for provider plugins.
export type {
GeneratedImageAsset,
ImageGenerationProvider,
ImageGenerationRequest,
ImageGenerationResult,
} from "../image-generation/types.js";
export { buildOpenAIImageGenerationProvider } from "../image-generation/providers/openai.js";

View File

@@ -40,6 +40,7 @@ export type {
export type { OpenClawConfig } from "../config/config.js";
/** @deprecated Use OpenClawConfig instead */
export type { OpenClawConfig as ClawdbotConfig } from "../config/config.js";
export * from "./image-generation.js";
export type { SecretInput, SecretRef } from "../config/types.secrets.js";
export type { RuntimeEnv } from "../runtime.js";
export type { HookEntry } from "../hooks/types.js";

View File

@@ -0,0 +1,9 @@
// Public runtime-facing media-understanding helpers for feature/channel plugins.
export {
describeImageFile,
describeImageFileWithModel,
describeVideoFile,
runMediaUnderstandingFile,
transcribeAudioFile,
} from "../media-understanding/runtime.js";

View File

@@ -0,0 +1,3 @@
// Public runtime-facing speech helpers for feature/channel plugins.
export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../tts/runtime.js";

View File

@@ -1,5 +1,6 @@
import type {
AnyAgentTool,
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
OpenClawPluginApi,
ProviderPlugin,
@@ -12,6 +13,7 @@ export type CapturedPluginRegistration = {
providers: ProviderPlugin[];
speechProviders: SpeechProviderPlugin[];
mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[];
imageGenerationProviders: ImageGenerationProviderPlugin[];
webSearchProviders: WebSearchProviderPlugin[];
tools: AnyAgentTool[];
};
@@ -20,6 +22,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
const providers: ProviderPlugin[] = [];
const speechProviders: SpeechProviderPlugin[] = [];
const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = [];
const imageGenerationProviders: ImageGenerationProviderPlugin[] = [];
const webSearchProviders: WebSearchProviderPlugin[] = [];
const tools: AnyAgentTool[] = [];
@@ -27,6 +30,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
providers,
speechProviders,
mediaUnderstandingProviders,
imageGenerationProviders,
webSearchProviders,
tools,
api: {
@@ -39,6 +43,9 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) {
mediaUnderstandingProviders.push(provider);
},
registerImageGenerationProvider(provider: ImageGenerationProviderPlugin) {
imageGenerationProviders.push(provider);
},
registerWebSearchProvider(provider: WebSearchProviderPlugin) {
webSearchProviders.push(provider);
},

View File

@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { loadPluginManifestRegistry } from "../manifest-registry.js";
import { resolvePluginWebSearchProviders } from "../web-search-providers.js";
import {
imageGenerationProviderContractRegistry,
mediaUnderstandingProviderContractRegistry,
pluginRegistrationContractRegistry,
providerContractPluginIds,
@@ -56,6 +57,23 @@ function findMediaUnderstandingProviderForPlugin(pluginId: string) {
return entry.provider;
}
function findImageGenerationProviderIdsForPlugin(pluginId: string) {
return imageGenerationProviderContractRegistry
.filter((entry) => entry.pluginId === pluginId)
.map((entry) => entry.provider.id)
.toSorted((left, right) => left.localeCompare(right));
}
function findImageGenerationProviderForPlugin(pluginId: string) {
const entry = imageGenerationProviderContractRegistry.find(
(candidate) => candidate.pluginId === pluginId,
);
if (!entry) {
throw new Error(`image-generation provider contract missing for ${pluginId}`);
}
return entry.provider;
}
function findRegistrationForPlugin(pluginId: string) {
const entry = pluginRegistrationContractRegistry.find(
(candidate) => candidate.pluginId === pluginId,
@@ -108,6 +126,10 @@ describe("plugin contract registry", () => {
).toEqual(bundledWebSearchPluginIds);
});
it("does not duplicate bundled image-generation provider ids", () => {
const ids = imageGenerationProviderContractRegistry.map((entry) => entry.provider.id);
expect(ids).toEqual([...new Set(ids)]);
});
it("keeps multi-provider plugin ownership explicit", () => {
expect(findProviderIdsForPlugin("google")).toEqual(["google", "google-gemini-cli"]);
expect(findProviderIdsForPlugin("minimax")).toEqual(["minimax", "minimax-portal"]);
@@ -142,11 +164,16 @@ describe("plugin contract registry", () => {
expect(findMediaUnderstandingProviderIdsForPlugin("zai")).toEqual(["zai"]);
});
it("keeps bundled image-generation ownership explicit", () => {
expect(findImageGenerationProviderIdsForPlugin("openai")).toEqual(["openai"]);
});
it("keeps bundled provider and web search tool ownership explicit", () => {
expect(findRegistrationForPlugin("firecrawl")).toMatchObject({
providerIds: [],
speechProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webSearchProviderIds: ["firecrawl"],
toolNames: ["firecrawl_search", "firecrawl_scrape"],
});
@@ -157,16 +184,19 @@ describe("plugin contract registry", () => {
providerIds: ["openai", "openai-codex"],
speechProviderIds: ["openai"],
mediaUnderstandingProviderIds: ["openai"],
imageGenerationProviderIds: ["openai"],
});
expect(findRegistrationForPlugin("elevenlabs")).toMatchObject({
providerIds: [],
speechProviderIds: ["elevenlabs"],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
});
expect(findRegistrationForPlugin("microsoft")).toMatchObject({
providerIds: [],
speechProviderIds: ["microsoft"],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
});
});
@@ -213,4 +243,10 @@ describe("plugin contract registry", () => {
expect.any(Function),
);
});
it("keeps bundled image-generation support explicit", () => {
expect(findImageGenerationProviderForPlugin("openai").generateImage).toEqual(
expect.any(Function),
);
});
});

View File

@@ -37,6 +37,7 @@ import xiaomiPlugin from "../../../extensions/xiaomi/index.js";
import zaiPlugin from "../../../extensions/zai/index.js";
import { createCapturedPluginRegistration } from "../captured-registration.js";
import type {
ImageGenerationProviderPlugin,
MediaUnderstandingProviderPlugin,
ProviderPlugin,
SpeechProviderPlugin,
@@ -62,12 +63,14 @@ type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderP
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
type MediaUnderstandingProviderContractEntry =
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
type ImageGenerationProviderContractEntry = CapabilityContractEntry<ImageGenerationProviderPlugin>;
type PluginRegistrationContractEntry = {
pluginId: string;
providerIds: string[];
speechProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webSearchProviderIds: string[];
toolNames: string[];
};
@@ -128,6 +131,8 @@ const bundledMediaUnderstandingPlugins: RegistrablePlugin[] = [
zaiPlugin,
];
const bundledImageGenerationPlugins: RegistrablePlugin[] = [openAIPlugin];
function captureRegistrations(plugin: RegistrablePlugin) {
const captured = createCapturedPluginRegistration();
plugin.register(captured.api);
@@ -207,12 +212,19 @@ export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProvi
select: (captured) => captured.mediaUnderstandingProviders,
});
export const imageGenerationProviderContractRegistry: ImageGenerationProviderContractEntry[] =
buildCapabilityContractRegistry({
plugins: bundledImageGenerationPlugins,
select: (captured) => captured.imageGenerationProviders,
});
const bundledPluginRegistrationList = [
...new Map(
[
...bundledProviderPlugins,
...bundledSpeechPlugins,
...bundledMediaUnderstandingPlugins,
...bundledImageGenerationPlugins,
...bundledWebSearchPlugins,
].map((plugin) => [plugin.id, plugin]),
).values(),
@@ -228,6 +240,7 @@ export const pluginRegistrationContractRegistry: PluginRegistrationContractEntry
mediaUnderstandingProviderIds: captured.mediaUnderstandingProviders.map(
(provider) => provider.id,
),
imageGenerationProviderIds: captured.imageGenerationProviders.map((provider) => provider.id),
webSearchProviderIds: captured.webSearchProviders.map((provider) => provider.id),
toolNames: captured.tools.map((tool) => tool.name),
};

View File

@@ -19,6 +19,7 @@ export function createMockPluginRegistry(
providerIds: [],
speechProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webSearchProviderIds: [],
gatewayMethods: [],
cliCommands: [],
@@ -43,6 +44,7 @@ export function createMockPluginRegistry(
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
httpRoutes: [],
gatewayHandlers: {},

View File

@@ -497,6 +497,7 @@ function createPluginRecord(params: {
providerIds: [],
speechProviderIds: [],
mediaUnderstandingProviderIds: [],
imageGenerationProviderIds: [],
webSearchProviderIds: [],
gatewayMethods: [],
cliCommands: [],

View File

@@ -22,6 +22,7 @@ import {
stripPromptMutationFieldsFromLegacyHookResult,
} from "./types.js";
import type {
ImageGenerationProviderPlugin,
OpenClawPluginApi,
OpenClawPluginChannelRegistration,
OpenClawPluginCliRegistrar,
@@ -116,6 +117,8 @@ export type PluginSpeechProviderRegistration =
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
export type PluginMediaUnderstandingProviderRegistration =
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
export type PluginImageGenerationProviderRegistration =
PluginOwnedProviderRegistration<ImageGenerationProviderPlugin>;
export type PluginWebSearchProviderRegistration =
PluginOwnedProviderRegistration<WebSearchProviderPlugin>;
@@ -165,6 +168,7 @@ export type PluginRecord = {
providerIds: string[];
speechProviderIds: string[];
mediaUnderstandingProviderIds: string[];
imageGenerationProviderIds: string[];
webSearchProviderIds: string[];
gatewayMethods: string[];
cliCommands: string[];
@@ -187,6 +191,7 @@ export type PluginRegistry = {
providers: PluginProviderRegistration[];
speechProviders: PluginSpeechProviderRegistration[];
mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[];
imageGenerationProviders: PluginImageGenerationProviderRegistration[];
webSearchProviders: PluginWebSearchProviderRegistration[];
gatewayHandlers: GatewayRequestHandlers;
httpRoutes: PluginHttpRouteRegistration[];
@@ -234,6 +239,7 @@ export function createEmptyPluginRegistry(): PluginRegistry {
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
gatewayHandlers: {},
httpRoutes: [],
@@ -631,6 +637,19 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
});
};
const registerImageGenerationProvider = (
record: PluginRecord,
provider: ImageGenerationProviderPlugin,
) => {
registerUniqueProviderLike({
record,
provider,
kindLabel: "image-generation provider",
registrations: registry.imageGenerationProviders,
ownedIds: record.imageGenerationProviderIds,
});
};
const registerWebSearchProvider = (record: PluginRecord, provider: WebSearchProviderPlugin) => {
registerUniqueProviderLike({
record,
@@ -857,6 +876,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registrationMode === "full"
? (provider) => registerMediaUnderstandingProvider(record, provider)
: () => {},
registerImageGenerationProvider:
registrationMode === "full"
? (provider) => registerImageGenerationProvider(record, provider)
: () => {},
registerWebSearchProvider:
registrationMode === "full"
? (provider) => registerWebSearchProvider(record, provider)
@@ -932,6 +955,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
registerProvider,
registerSpeechProvider,
registerMediaUnderstandingProvider,
registerImageGenerationProvider,
registerWebSearchProvider,
registerGatewayMethod,
registerCli,

View File

@@ -59,10 +59,17 @@ describe("plugin runtime command execution", () => {
const runtime = createPluginRuntime();
expect(typeof runtime.mediaUnderstanding.runFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeImageFileWithModel).toBe("function");
expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function");
expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile);
});
it("exposes runtime.imageGeneration helpers", () => {
const runtime = createPluginRuntime();
expect(typeof runtime.imageGeneration.generate).toBe("function");
expect(typeof runtime.imageGeneration.listProviders).toBe("function");
});
it("exposes runtime.webSearch helpers", () => {
const runtime = createPluginRuntime();
expect(typeof runtime.webSearch.listProviders).toBe("function");

View File

@@ -4,13 +4,18 @@ import {
resolveApiKeyForProvider as resolveApiKeyForProviderRaw,
} from "../../agents/model-auth.js";
import { resolveStateDir } from "../../config/paths.js";
import {
generateImage,
listRuntimeImageGenerationProviders,
} from "../../image-generation/runtime.js";
import {
describeImageFile,
describeImageFileWithModel,
describeVideoFile,
runMediaUnderstandingFile,
transcribeAudioFile,
} from "../../media-understanding/runtime.js";
import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/tts.js";
import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/runtime.js";
import { listWebSearchProviders, runWebSearch } from "../../web-search/runtime.js";
import { createRuntimeAgent } from "./runtime-agent.js";
import { createRuntimeChannel } from "./runtime-channel.js";
@@ -145,9 +150,14 @@ export function createPluginRuntime(_options: CreatePluginRuntimeOptions = {}):
mediaUnderstanding: {
runFile: runMediaUnderstandingFile,
describeImageFile,
describeImageFileWithModel,
describeVideoFile,
transcribeAudioFile,
},
imageGeneration: {
generate: generateImage,
listProviders: listRuntimeImageGenerationProviders,
},
webSearch: {
listProviders: listWebSearchProviders,
search: runWebSearch,

View File

@@ -47,16 +47,21 @@ export type PluginRuntimeCore = {
resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg;
};
tts: {
textToSpeech: typeof import("../../tts/tts.js").textToSpeech;
textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony;
listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
textToSpeech: typeof import("../../tts/runtime.js").textToSpeech;
textToSpeechTelephony: typeof import("../../tts/runtime.js").textToSpeechTelephony;
listVoices: typeof import("../../tts/runtime.js").listSpeechVoices;
};
mediaUnderstanding: {
runFile: typeof import("../../media-understanding/runtime.js").runMediaUnderstandingFile;
describeImageFile: typeof import("../../media-understanding/runtime.js").describeImageFile;
describeImageFileWithModel: typeof import("../../media-understanding/runtime.js").describeImageFileWithModel;
describeVideoFile: typeof import("../../media-understanding/runtime.js").describeVideoFile;
transcribeAudioFile: typeof import("../../media-understanding/runtime.js").transcribeAudioFile;
};
imageGeneration: {
generate: typeof import("../../image-generation/runtime.js").generateImage;
listProviders: typeof import("../../image-generation/runtime.js").listRuntimeImageGenerationProviders;
};
webSearch: {
listProviders: typeof import("../../web-search/runtime.js").listWebSearchProviders;
search: typeof import("../../web-search/runtime.js").runWebSearch;

View File

@@ -22,6 +22,7 @@ import type { ModelProviderConfig } from "../config/types.js";
import type { GatewayRequestHandler } from "../gateway/server-methods/types.js";
import type { InternalHookHandler } from "../hooks/internal-hooks.js";
import type { HookEntry } from "../hooks/types.js";
import type { ImageGenerationProvider } from "../image-generation/types.js";
import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js";
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
import type { RuntimeEnv } from "../runtime.js";
@@ -890,6 +891,7 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & {
};
export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider;
export type ImageGenerationProviderPlugin = ImageGenerationProvider;
export type OpenClawPluginGatewayMethod = {
method: string;
@@ -1251,6 +1253,7 @@ export type OpenClawPluginApi = {
registerProvider: (provider: ProviderPlugin) => void;
registerSpeechProvider: (provider: SpeechProviderPlugin) => void;
registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void;
registerImageGenerationProvider: (provider: ImageGenerationProviderPlugin) => void;
registerWebSearchProvider: (provider: WebSearchProviderPlugin) => void;
registerInteractiveHandler: (registration: PluginInteractiveHandlerRegistration) => void;
/**

View File

@@ -28,6 +28,7 @@ export const createTestRegistry = (channels: TestChannelRegistration[] = []): Pl
providers: [],
speechProviders: [],
mediaUnderstandingProviders: [],
imageGenerationProviders: [],
webSearchProviders: [],
gatewayHandlers: {},
httpRoutes: [],

4
src/tts/runtime.ts Normal file
View File

@@ -0,0 +1,4 @@
// Shared runtime-facing speech helpers. Keep channel/feature plugins on this
// boundary instead of importing the full TTS orchestrator module directly.
export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "./tts.js";