mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-20 14:30:57 +00:00
feat(plugins): add image generation capability
This commit is contained in:
@@ -46,6 +46,7 @@ function fakeApi(overrides: Partial<OpenClawPluginApi> = {}): OpenClawPluginApi
|
||||
registerProvider() {},
|
||||
registerSpeechProvider() {},
|
||||
registerMediaUnderstandingProvider() {},
|
||||
registerImageGenerationProvider() {},
|
||||
registerWebSearchProvider() {},
|
||||
registerInteractiveHandler() {},
|
||||
registerHook() {},
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { emptyPluginConfigSchema, type OpenClawPluginApi } from "openclaw/plugin-sdk/core";
|
||||
import { buildOpenAIImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
|
||||
import { buildOpenAISpeechProvider } from "openclaw/plugin-sdk/speech";
|
||||
import { openaiMediaUnderstandingProvider } from "./media-understanding-provider.js";
|
||||
import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js";
|
||||
@@ -14,6 +15,7 @@ const openAIPlugin = {
|
||||
api.registerProvider(buildOpenAICodexProviderPlugin());
|
||||
api.registerSpeechProvider(buildOpenAISpeechProvider());
|
||||
api.registerMediaUnderstandingProvider(openaiMediaUnderstandingProvider);
|
||||
api.registerImageGenerationProvider(buildOpenAIImageGenerationProvider());
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ export function createTestPluginApi(api: TestPluginApiInput): OpenClawPluginApi
|
||||
registerProvider() {},
|
||||
registerSpeechProvider() {},
|
||||
registerMediaUnderstandingProvider() {},
|
||||
registerImageGenerationProvider() {},
|
||||
registerWebSearchProvider() {},
|
||||
registerInteractiveHandler() {},
|
||||
registerCommand() {},
|
||||
|
||||
@@ -110,11 +110,17 @@ export function createPluginRuntimeMock(overrides: DeepPartial<PluginRuntime> =
|
||||
runFile: vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["runFile"],
|
||||
describeImageFile:
|
||||
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFile"],
|
||||
describeImageFileWithModel:
|
||||
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeImageFileWithModel"],
|
||||
describeVideoFile:
|
||||
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["describeVideoFile"],
|
||||
transcribeAudioFile:
|
||||
vi.fn() as unknown as PluginRuntime["mediaUnderstanding"]["transcribeAudioFile"],
|
||||
},
|
||||
imageGeneration: {
|
||||
generate: vi.fn() as unknown as PluginRuntime["imageGeneration"]["generate"],
|
||||
listProviders: vi.fn() as unknown as PluginRuntime["imageGeneration"]["listProviders"],
|
||||
},
|
||||
webSearch: {
|
||||
listProviders: vi.fn() as unknown as PluginRuntime["webSearch"]["listProviders"],
|
||||
search: vi.fn() as unknown as PluginRuntime["webSearch"]["search"],
|
||||
|
||||
16
package.json
16
package.json
@@ -102,6 +102,10 @@
|
||||
"types": "./dist/plugin-sdk/media-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/media-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/media-understanding-runtime": {
|
||||
"types": "./dist/plugin-sdk/media-understanding-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/media-understanding-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/conversation-runtime": {
|
||||
"types": "./dist/plugin-sdk/conversation-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/conversation-runtime.js"
|
||||
@@ -114,6 +118,10 @@
|
||||
"types": "./dist/plugin-sdk/agent-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/agent-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/speech-runtime": {
|
||||
"types": "./dist/plugin-sdk/speech-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/speech-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/plugin-runtime": {
|
||||
"types": "./dist/plugin-sdk/plugin-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/plugin-runtime.js"
|
||||
@@ -378,6 +386,14 @@
|
||||
"types": "./dist/plugin-sdk/provider-web-search.d.ts",
|
||||
"default": "./dist/plugin-sdk/provider-web-search.js"
|
||||
},
|
||||
"./plugin-sdk/image-generation": {
|
||||
"types": "./dist/plugin-sdk/image-generation.d.ts",
|
||||
"default": "./dist/plugin-sdk/image-generation.js"
|
||||
},
|
||||
"./plugin-sdk/image-generation-runtime": {
|
||||
"types": "./dist/plugin-sdk/image-generation-runtime.d.ts",
|
||||
"default": "./dist/plugin-sdk/image-generation-runtime.js"
|
||||
},
|
||||
"./plugin-sdk/reply-history": {
|
||||
"types": "./dist/plugin-sdk/reply-history.d.ts",
|
||||
"default": "./dist/plugin-sdk/reply-history.js"
|
||||
|
||||
@@ -15,9 +15,11 @@
|
||||
"channel-runtime",
|
||||
"infra-runtime",
|
||||
"media-runtime",
|
||||
"media-understanding-runtime",
|
||||
"conversation-runtime",
|
||||
"text-runtime",
|
||||
"agent-runtime",
|
||||
"speech-runtime",
|
||||
"plugin-runtime",
|
||||
"security-runtime",
|
||||
"gateway-runtime",
|
||||
@@ -84,6 +86,8 @@
|
||||
"provider-stream",
|
||||
"provider-usage",
|
||||
"provider-web-search",
|
||||
"image-generation",
|
||||
"image-generation-runtime",
|
||||
"reply-history",
|
||||
"media-understanding",
|
||||
"google",
|
||||
|
||||
@@ -93,6 +93,7 @@ const createRegistry = (channels: PluginRegistry["channels"]): PluginRegistry =>
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
gatewayHandlers: {},
|
||||
httpRoutes: [],
|
||||
|
||||
@@ -339,6 +339,7 @@ describe("ensureChannelSetupPluginInstalled", () => {
|
||||
providerIds: [],
|
||||
speechProviderIds: [],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
webSearchProviderIds: [],
|
||||
gatewayMethods: [],
|
||||
cliCommands: [],
|
||||
|
||||
@@ -1019,6 +1019,10 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"agents.defaults.imageModel.primary":
|
||||
"Optional image model (provider/model) used when the primary model lacks image input.",
|
||||
"agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).",
|
||||
"agents.defaults.imageGenerationModel.primary":
|
||||
"Optional image-generation model (provider/model) used by the shared image generation capability.",
|
||||
"agents.defaults.imageGenerationModel.fallbacks":
|
||||
"Ordered fallback image-generation models (provider/model).",
|
||||
"agents.defaults.pdfModel.primary":
|
||||
"Optional PDF model (provider/model) for the PDF analysis tool. Defaults to imageModel, then session model.",
|
||||
"agents.defaults.pdfModel.fallbacks": "Ordered fallback PDF models (provider/model).",
|
||||
|
||||
@@ -454,6 +454,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.model.fallbacks": "Model Fallbacks",
|
||||
"agents.defaults.imageModel.primary": "Image Model",
|
||||
"agents.defaults.imageModel.fallbacks": "Image Model Fallbacks",
|
||||
"agents.defaults.imageGenerationModel.primary": "Image Generation Model",
|
||||
"agents.defaults.imageGenerationModel.fallbacks": "Image Generation Model Fallbacks",
|
||||
"agents.defaults.pdfModel.primary": "PDF Model",
|
||||
"agents.defaults.pdfModel.fallbacks": "PDF Model Fallbacks",
|
||||
"agents.defaults.pdfMaxBytesMb": "PDF Max Size (MB)",
|
||||
|
||||
@@ -122,6 +122,8 @@ export type AgentDefaultsConfig = {
|
||||
model?: AgentModelConfig;
|
||||
/** Optional image-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
|
||||
imageModel?: AgentModelConfig;
|
||||
/** Optional image-generation model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
|
||||
imageGenerationModel?: AgentModelConfig;
|
||||
/** Optional PDF-capable model and fallbacks (provider/model). Accepts string or {primary,fallbacks}. */
|
||||
pdfModel?: AgentModelConfig;
|
||||
/** Maximum PDF file size in megabytes (default: 10). */
|
||||
|
||||
@@ -18,6 +18,7 @@ export const AgentDefaultsSchema = z
|
||||
.object({
|
||||
model: AgentModelSchema.optional(),
|
||||
imageModel: AgentModelSchema.optional(),
|
||||
imageGenerationModel: AgentModelSchema.optional(),
|
||||
pdfModel: AgentModelSchema.optional(),
|
||||
pdfMaxBytesMb: z.number().positive().optional(),
|
||||
pdfMaxPages: z.number().int().positive().optional(),
|
||||
|
||||
@@ -31,6 +31,7 @@ const createRegistry = (diagnostics: PluginDiagnostic[]): PluginRegistry => ({
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
gatewayHandlers: {},
|
||||
httpRoutes: [],
|
||||
|
||||
@@ -148,6 +148,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
gatewayHandlers: {},
|
||||
httpRoutes: [],
|
||||
|
||||
71
src/image-generation/provider-registry.ts
Normal file
71
src/image-generation/provider-registry.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import { normalizeProviderId } from "../agents/model-selection.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { loadOpenClawPlugins } from "../plugins/loader.js";
|
||||
import { getActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import type { ImageGenerationProviderPlugin } from "../plugins/types.js";
|
||||
|
||||
const BUILTIN_IMAGE_GENERATION_PROVIDERS: readonly ImageGenerationProviderPlugin[] = [];
|
||||
|
||||
function normalizeImageGenerationProviderId(id: string | undefined): string | undefined {
|
||||
const normalized = normalizeProviderId(id ?? "");
|
||||
return normalized || undefined;
|
||||
}
|
||||
|
||||
function resolvePluginImageGenerationProviders(
|
||||
cfg?: OpenClawConfig,
|
||||
): ImageGenerationProviderPlugin[] {
|
||||
const active = getActivePluginRegistry();
|
||||
const registry =
|
||||
(active?.imageGenerationProviders?.length ?? 0) > 0 || !cfg
|
||||
? active
|
||||
: loadOpenClawPlugins({ config: cfg });
|
||||
return registry?.imageGenerationProviders?.map((entry) => entry.provider) ?? [];
|
||||
}
|
||||
|
||||
function buildProviderMaps(cfg?: OpenClawConfig): {
|
||||
canonical: Map<string, ImageGenerationProviderPlugin>;
|
||||
aliases: Map<string, ImageGenerationProviderPlugin>;
|
||||
} {
|
||||
const canonical = new Map<string, ImageGenerationProviderPlugin>();
|
||||
const aliases = new Map<string, ImageGenerationProviderPlugin>();
|
||||
const register = (provider: ImageGenerationProviderPlugin) => {
|
||||
const id = normalizeImageGenerationProviderId(provider.id);
|
||||
if (!id) {
|
||||
return;
|
||||
}
|
||||
canonical.set(id, provider);
|
||||
aliases.set(id, provider);
|
||||
for (const alias of provider.aliases ?? []) {
|
||||
const normalizedAlias = normalizeImageGenerationProviderId(alias);
|
||||
if (normalizedAlias) {
|
||||
aliases.set(normalizedAlias, provider);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (const provider of BUILTIN_IMAGE_GENERATION_PROVIDERS) {
|
||||
register(provider);
|
||||
}
|
||||
for (const provider of resolvePluginImageGenerationProviders(cfg)) {
|
||||
register(provider);
|
||||
}
|
||||
|
||||
return { canonical, aliases };
|
||||
}
|
||||
|
||||
export function listImageGenerationProviders(
|
||||
cfg?: OpenClawConfig,
|
||||
): ImageGenerationProviderPlugin[] {
|
||||
return [...buildProviderMaps(cfg).canonical.values()];
|
||||
}
|
||||
|
||||
export function getImageGenerationProvider(
|
||||
providerId: string | undefined,
|
||||
cfg?: OpenClawConfig,
|
||||
): ImageGenerationProviderPlugin | undefined {
|
||||
const normalized = normalizeImageGenerationProviderId(providerId);
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
return buildProviderMaps(cfg).aliases.get(normalized);
|
||||
}
|
||||
55
src/image-generation/providers/openai.test.ts
Normal file
55
src/image-generation/providers/openai.test.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import * as modelAuth from "../../agents/model-auth.js";
|
||||
import { buildOpenAIImageGenerationProvider } from "./openai.js";
|
||||
|
||||
describe("OpenAI image-generation provider", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("generates PNG buffers from the OpenAI Images API", async () => {
|
||||
vi.spyOn(modelAuth, "resolveApiKeyForProvider").mockResolvedValue({
|
||||
apiKey: "sk-test",
|
||||
source: "env",
|
||||
mode: "api-key",
|
||||
});
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: async () => ({
|
||||
data: [
|
||||
{
|
||||
b64_json: Buffer.from("png-data").toString("base64"),
|
||||
revised_prompt: "revised",
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const provider = buildOpenAIImageGenerationProvider();
|
||||
const result = await provider.generateImage({
|
||||
provider: "openai",
|
||||
model: "gpt-image-1",
|
||||
prompt: "draw a cat",
|
||||
cfg: {},
|
||||
});
|
||||
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
"https://api.openai.com/v1/images/generations",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual({
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-data"),
|
||||
mimeType: "image/png",
|
||||
fileName: "image-1.png",
|
||||
revisedPrompt: "revised",
|
||||
},
|
||||
],
|
||||
model: "gpt-image-1",
|
||||
});
|
||||
});
|
||||
});
|
||||
79
src/image-generation/providers/openai.ts
Normal file
79
src/image-generation/providers/openai.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { resolveApiKeyForProvider } from "../../agents/model-auth.js";
|
||||
import type { ImageGenerationProviderPlugin } from "../../plugins/types.js";
|
||||
|
||||
const DEFAULT_OPENAI_IMAGE_BASE_URL = "https://api.openai.com/v1";
|
||||
const DEFAULT_OPENAI_IMAGE_MODEL = "gpt-image-1";
|
||||
const DEFAULT_OUTPUT_MIME = "image/png";
|
||||
const DEFAULT_SIZE = "1024x1024";
|
||||
|
||||
type OpenAIImageApiResponse = {
|
||||
data?: Array<{
|
||||
b64_json?: string;
|
||||
revised_prompt?: string;
|
||||
}>;
|
||||
};
|
||||
|
||||
function resolveOpenAIBaseUrl(cfg: Parameters<typeof resolveApiKeyForProvider>[0]["cfg"]): string {
|
||||
const direct = cfg?.models?.providers?.openai?.baseUrl?.trim();
|
||||
return direct || DEFAULT_OPENAI_IMAGE_BASE_URL;
|
||||
}
|
||||
|
||||
export function buildOpenAIImageGenerationProvider(): ImageGenerationProviderPlugin {
|
||||
return {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
supportedSizes: ["1024x1024", "1024x1536", "1536x1024"],
|
||||
async generateImage(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "openai",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("OpenAI API key missing");
|
||||
}
|
||||
|
||||
const response = await fetch(`${resolveOpenAIBaseUrl(req.cfg)}/images/generations`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: req.model || DEFAULT_OPENAI_IMAGE_MODEL,
|
||||
prompt: req.prompt,
|
||||
n: req.count ?? 1,
|
||||
size: req.size ?? DEFAULT_SIZE,
|
||||
response_format: "b64_json",
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => "");
|
||||
throw new Error(
|
||||
`OpenAI image generation failed (${response.status}): ${text || response.statusText}`,
|
||||
);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as OpenAIImageApiResponse;
|
||||
const images = (data.data ?? [])
|
||||
.map((entry, index) => {
|
||||
if (!entry.b64_json) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
buffer: Buffer.from(entry.b64_json, "base64"),
|
||||
mimeType: DEFAULT_OUTPUT_MIME,
|
||||
fileName: `image-${index + 1}.png`,
|
||||
...(entry.revised_prompt ? { revisedPrompt: entry.revised_prompt } : {}),
|
||||
};
|
||||
})
|
||||
.filter((entry): entry is NonNullable<typeof entry> => entry !== null);
|
||||
|
||||
return {
|
||||
images,
|
||||
model: req.model || DEFAULT_OPENAI_IMAGE_MODEL,
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
81
src/image-generation/runtime.test.ts
Normal file
81
src/image-generation/runtime.test.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { createEmptyPluginRegistry } from "../plugins/registry.js";
|
||||
import { setActivePluginRegistry } from "../plugins/runtime.js";
|
||||
import { generateImage, listRuntimeImageGenerationProviders } from "./runtime.js";
|
||||
|
||||
describe("image-generation runtime helpers", () => {
|
||||
afterEach(() => {
|
||||
setActivePluginRegistry(createEmptyPluginRegistry());
|
||||
});
|
||||
|
||||
it("generates images through the active image-generation registry", async () => {
|
||||
const pluginRegistry = createEmptyPluginRegistry();
|
||||
pluginRegistry.imageGenerationProviders.push({
|
||||
pluginId: "image-plugin",
|
||||
pluginName: "Image Plugin",
|
||||
source: "test",
|
||||
provider: {
|
||||
id: "image-plugin",
|
||||
async generateImage() {
|
||||
return {
|
||||
images: [
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "sample.png",
|
||||
},
|
||||
],
|
||||
model: "img-v1",
|
||||
};
|
||||
},
|
||||
},
|
||||
});
|
||||
setActivePluginRegistry(pluginRegistry);
|
||||
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
imageGenerationModel: {
|
||||
primary: "image-plugin/img-v1",
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = await generateImage({
|
||||
cfg,
|
||||
prompt: "draw a cat",
|
||||
agentDir: "/tmp/agent",
|
||||
});
|
||||
|
||||
expect(result.provider).toBe("image-plugin");
|
||||
expect(result.model).toBe("img-v1");
|
||||
expect(result.attempts).toEqual([]);
|
||||
expect(result.images).toEqual([
|
||||
{
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
mimeType: "image/png",
|
||||
fileName: "sample.png",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("lists runtime image-generation providers from the active registry", () => {
|
||||
const pluginRegistry = createEmptyPluginRegistry();
|
||||
pluginRegistry.imageGenerationProviders.push({
|
||||
pluginId: "image-plugin",
|
||||
pluginName: "Image Plugin",
|
||||
source: "test",
|
||||
provider: {
|
||||
id: "image-plugin",
|
||||
generateImage: async () => ({
|
||||
images: [{ buffer: Buffer.from("x"), mimeType: "image/png" }],
|
||||
}),
|
||||
},
|
||||
});
|
||||
setActivePluginRegistry(pluginRegistry);
|
||||
|
||||
expect(listRuntimeImageGenerationProviders()).toMatchObject([{ id: "image-plugin" }]);
|
||||
});
|
||||
});
|
||||
162
src/image-generation/runtime.ts
Normal file
162
src/image-generation/runtime.ts
Normal file
@@ -0,0 +1,162 @@
|
||||
import { describeFailoverError, isFailoverError } from "../agents/failover-error.js";
|
||||
import type { FallbackAttempt } from "../agents/model-fallback.types.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
resolveAgentModelFallbackValues,
|
||||
resolveAgentModelPrimaryValue,
|
||||
} from "../config/model-input.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js";
|
||||
import type { GeneratedImageAsset, ImageGenerationResult } from "./types.js";
|
||||
|
||||
const log = createSubsystemLogger("image-generation");
|
||||
|
||||
export type GenerateImageParams = {
|
||||
cfg: OpenClawConfig;
|
||||
prompt: string;
|
||||
agentDir?: string;
|
||||
modelOverride?: string;
|
||||
count?: number;
|
||||
size?: string;
|
||||
};
|
||||
|
||||
export type GenerateImageRuntimeResult = {
|
||||
images: GeneratedImageAsset[];
|
||||
provider: string;
|
||||
model: string;
|
||||
attempts: FallbackAttempt[];
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function parseModelRef(raw: string | undefined): { provider: string; model: string } | null {
|
||||
const trimmed = raw?.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
const slashIndex = trimmed.indexOf("/");
|
||||
if (slashIndex <= 0 || slashIndex === trimmed.length - 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: trimmed.slice(0, slashIndex).trim(),
|
||||
model: trimmed.slice(slashIndex + 1).trim(),
|
||||
};
|
||||
}
|
||||
|
||||
function resolveImageGenerationCandidates(params: {
|
||||
cfg: OpenClawConfig;
|
||||
modelOverride?: string;
|
||||
}): Array<{ provider: string; model: string }> {
|
||||
const candidates: Array<{ provider: string; model: string }> = [];
|
||||
const seen = new Set<string>();
|
||||
const add = (raw: string | undefined) => {
|
||||
const parsed = parseModelRef(raw);
|
||||
if (!parsed) {
|
||||
return;
|
||||
}
|
||||
const key = `${parsed.provider}/${parsed.model}`;
|
||||
if (seen.has(key)) {
|
||||
return;
|
||||
}
|
||||
seen.add(key);
|
||||
candidates.push(parsed);
|
||||
};
|
||||
|
||||
add(params.modelOverride);
|
||||
add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel));
|
||||
for (const fallback of resolveAgentModelFallbackValues(
|
||||
params.cfg.agents?.defaults?.imageGenerationModel,
|
||||
)) {
|
||||
add(fallback);
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function throwImageGenerationFailure(params: {
|
||||
attempts: FallbackAttempt[];
|
||||
lastError: unknown;
|
||||
}): never {
|
||||
if (params.attempts.length <= 1 && params.lastError) {
|
||||
throw params.lastError;
|
||||
}
|
||||
const summary =
|
||||
params.attempts.length > 0
|
||||
? params.attempts
|
||||
.map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`)
|
||||
.join(" | ")
|
||||
: "unknown";
|
||||
throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, {
|
||||
cause: params.lastError instanceof Error ? params.lastError : undefined,
|
||||
});
|
||||
}
|
||||
|
||||
export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) {
|
||||
return listImageGenerationProviders(params?.config);
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
params: GenerateImageParams,
|
||||
): Promise<GenerateImageRuntimeResult> {
|
||||
const candidates = resolveImageGenerationCandidates({
|
||||
cfg: params.cfg,
|
||||
modelOverride: params.modelOverride,
|
||||
});
|
||||
if (candidates.length === 0) {
|
||||
throw new Error(
|
||||
"No image-generation model configured. Set agents.defaults.imageGenerationModel.primary or agents.defaults.imageGenerationModel.fallbacks.",
|
||||
);
|
||||
}
|
||||
|
||||
const attempts: FallbackAttempt[] = [];
|
||||
let lastError: unknown;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const provider = getImageGenerationProvider(candidate.provider, params.cfg);
|
||||
if (!provider) {
|
||||
const error = `No image-generation provider registered for ${candidate.provider}`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
});
|
||||
lastError = new Error(error);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const result: ImageGenerationResult = await provider.generateImage({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
prompt: params.prompt,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
count: params.count,
|
||||
size: params.size,
|
||||
});
|
||||
if (!Array.isArray(result.images) || result.images.length === 0) {
|
||||
throw new Error("Image generation provider returned no images.");
|
||||
}
|
||||
return {
|
||||
images: result.images,
|
||||
provider: candidate.provider,
|
||||
model: result.model ?? candidate.model,
|
||||
attempts,
|
||||
metadata: result.metadata,
|
||||
};
|
||||
} catch (err) {
|
||||
lastError = err;
|
||||
const described = isFailoverError(err) ? describeFailoverError(err) : undefined;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: described?.message ?? (err instanceof Error ? err.message : String(err)),
|
||||
reason: described?.reason,
|
||||
status: described?.status,
|
||||
code: described?.code,
|
||||
});
|
||||
log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`);
|
||||
}
|
||||
}
|
||||
|
||||
throwImageGenerationFailure({ attempts, lastError });
|
||||
}
|
||||
33
src/image-generation/types.ts
Normal file
33
src/image-generation/types.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
export type GeneratedImageAsset = {
|
||||
buffer: Buffer;
|
||||
mimeType: string;
|
||||
fileName?: string;
|
||||
revisedPrompt?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type ImageGenerationRequest = {
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
count?: number;
|
||||
size?: string;
|
||||
};
|
||||
|
||||
export type ImageGenerationResult = {
|
||||
images: GeneratedImageAsset[];
|
||||
model?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
export type ImageGenerationProvider = {
|
||||
id: string;
|
||||
aliases?: string[];
|
||||
label?: string;
|
||||
supportedSizes?: string[];
|
||||
generateImage: (req: ImageGenerationRequest) => Promise<ImageGenerationResult>;
|
||||
};
|
||||
@@ -1,6 +1,8 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { getMediaUnderstandingProvider } from "./providers/index.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
@@ -90,6 +92,38 @@ export async function describeImageFile(params: {
|
||||
return await runMediaUnderstandingFile({ ...params, capability: "image" });
|
||||
}
|
||||
|
||||
export async function describeImageFileWithModel(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
prompt: string;
|
||||
maxTokens?: number;
|
||||
timeoutMs?: number;
|
||||
}) {
|
||||
const timeoutMs = params.timeoutMs ?? 30_000;
|
||||
const providerRegistry = buildProviderRegistry(undefined, params.cfg);
|
||||
const provider = getMediaUnderstandingProvider(params.provider, providerRegistry);
|
||||
if (!provider?.describeImage) {
|
||||
throw new Error(`Provider does not support image analysis: ${params.provider}`);
|
||||
}
|
||||
const buffer = await fs.readFile(params.filePath);
|
||||
return await provider.describeImage({
|
||||
buffer,
|
||||
fileName: path.basename(params.filePath),
|
||||
mime: params.mime,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
prompt: params.prompt,
|
||||
maxTokens: params.maxTokens,
|
||||
timeoutMs,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir ?? "",
|
||||
});
|
||||
}
|
||||
|
||||
export async function describeVideoFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
|
||||
3
src/plugin-sdk/image-generation-runtime.ts
Normal file
3
src/plugin-sdk/image-generation-runtime.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
// Public runtime-facing image-generation helpers for feature/channel plugins.
|
||||
|
||||
export { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js";
|
||||
10
src/plugin-sdk/image-generation.ts
Normal file
10
src/plugin-sdk/image-generation.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
// Public image-generation helpers and types for provider plugins.
|
||||
|
||||
export type {
|
||||
GeneratedImageAsset,
|
||||
ImageGenerationProvider,
|
||||
ImageGenerationRequest,
|
||||
ImageGenerationResult,
|
||||
} from "../image-generation/types.js";
|
||||
|
||||
export { buildOpenAIImageGenerationProvider } from "../image-generation/providers/openai.js";
|
||||
@@ -40,6 +40,7 @@ export type {
|
||||
export type { OpenClawConfig } from "../config/config.js";
|
||||
/** @deprecated Use OpenClawConfig instead */
|
||||
export type { OpenClawConfig as ClawdbotConfig } from "../config/config.js";
|
||||
export * from "./image-generation.js";
|
||||
export type { SecretInput, SecretRef } from "../config/types.secrets.js";
|
||||
export type { RuntimeEnv } from "../runtime.js";
|
||||
export type { HookEntry } from "../hooks/types.js";
|
||||
|
||||
9
src/plugin-sdk/media-understanding-runtime.ts
Normal file
9
src/plugin-sdk/media-understanding-runtime.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
// Public runtime-facing media-understanding helpers for feature/channel plugins.
|
||||
|
||||
export {
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
describeVideoFile,
|
||||
runMediaUnderstandingFile,
|
||||
transcribeAudioFile,
|
||||
} from "../media-understanding/runtime.js";
|
||||
3
src/plugin-sdk/speech-runtime.ts
Normal file
3
src/plugin-sdk/speech-runtime.ts
Normal file
@@ -0,0 +1,3 @@
|
||||
// Public runtime-facing speech helpers for feature/channel plugins.
|
||||
|
||||
export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../tts/runtime.js";
|
||||
@@ -1,5 +1,6 @@
|
||||
import type {
|
||||
AnyAgentTool,
|
||||
ImageGenerationProviderPlugin,
|
||||
MediaUnderstandingProviderPlugin,
|
||||
OpenClawPluginApi,
|
||||
ProviderPlugin,
|
||||
@@ -12,6 +13,7 @@ export type CapturedPluginRegistration = {
|
||||
providers: ProviderPlugin[];
|
||||
speechProviders: SpeechProviderPlugin[];
|
||||
mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[];
|
||||
imageGenerationProviders: ImageGenerationProviderPlugin[];
|
||||
webSearchProviders: WebSearchProviderPlugin[];
|
||||
tools: AnyAgentTool[];
|
||||
};
|
||||
@@ -20,6 +22,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
|
||||
const providers: ProviderPlugin[] = [];
|
||||
const speechProviders: SpeechProviderPlugin[] = [];
|
||||
const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = [];
|
||||
const imageGenerationProviders: ImageGenerationProviderPlugin[] = [];
|
||||
const webSearchProviders: WebSearchProviderPlugin[] = [];
|
||||
const tools: AnyAgentTool[] = [];
|
||||
|
||||
@@ -27,6 +30,7 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
|
||||
providers,
|
||||
speechProviders,
|
||||
mediaUnderstandingProviders,
|
||||
imageGenerationProviders,
|
||||
webSearchProviders,
|
||||
tools,
|
||||
api: {
|
||||
@@ -39,6 +43,9 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration {
|
||||
registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) {
|
||||
mediaUnderstandingProviders.push(provider);
|
||||
},
|
||||
registerImageGenerationProvider(provider: ImageGenerationProviderPlugin) {
|
||||
imageGenerationProviders.push(provider);
|
||||
},
|
||||
registerWebSearchProvider(provider: WebSearchProviderPlugin) {
|
||||
webSearchProviders.push(provider);
|
||||
},
|
||||
|
||||
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
|
||||
import { loadPluginManifestRegistry } from "../manifest-registry.js";
|
||||
import { resolvePluginWebSearchProviders } from "../web-search-providers.js";
|
||||
import {
|
||||
imageGenerationProviderContractRegistry,
|
||||
mediaUnderstandingProviderContractRegistry,
|
||||
pluginRegistrationContractRegistry,
|
||||
providerContractPluginIds,
|
||||
@@ -56,6 +57,23 @@ function findMediaUnderstandingProviderForPlugin(pluginId: string) {
|
||||
return entry.provider;
|
||||
}
|
||||
|
||||
function findImageGenerationProviderIdsForPlugin(pluginId: string) {
|
||||
return imageGenerationProviderContractRegistry
|
||||
.filter((entry) => entry.pluginId === pluginId)
|
||||
.map((entry) => entry.provider.id)
|
||||
.toSorted((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
function findImageGenerationProviderForPlugin(pluginId: string) {
|
||||
const entry = imageGenerationProviderContractRegistry.find(
|
||||
(candidate) => candidate.pluginId === pluginId,
|
||||
);
|
||||
if (!entry) {
|
||||
throw new Error(`image-generation provider contract missing for ${pluginId}`);
|
||||
}
|
||||
return entry.provider;
|
||||
}
|
||||
|
||||
function findRegistrationForPlugin(pluginId: string) {
|
||||
const entry = pluginRegistrationContractRegistry.find(
|
||||
(candidate) => candidate.pluginId === pluginId,
|
||||
@@ -108,6 +126,10 @@ describe("plugin contract registry", () => {
|
||||
).toEqual(bundledWebSearchPluginIds);
|
||||
});
|
||||
|
||||
it("does not duplicate bundled image-generation provider ids", () => {
|
||||
const ids = imageGenerationProviderContractRegistry.map((entry) => entry.provider.id);
|
||||
expect(ids).toEqual([...new Set(ids)]);
|
||||
});
|
||||
it("keeps multi-provider plugin ownership explicit", () => {
|
||||
expect(findProviderIdsForPlugin("google")).toEqual(["google", "google-gemini-cli"]);
|
||||
expect(findProviderIdsForPlugin("minimax")).toEqual(["minimax", "minimax-portal"]);
|
||||
@@ -142,11 +164,16 @@ describe("plugin contract registry", () => {
|
||||
expect(findMediaUnderstandingProviderIdsForPlugin("zai")).toEqual(["zai"]);
|
||||
});
|
||||
|
||||
it("keeps bundled image-generation ownership explicit", () => {
|
||||
expect(findImageGenerationProviderIdsForPlugin("openai")).toEqual(["openai"]);
|
||||
});
|
||||
|
||||
it("keeps bundled provider and web search tool ownership explicit", () => {
|
||||
expect(findRegistrationForPlugin("firecrawl")).toMatchObject({
|
||||
providerIds: [],
|
||||
speechProviderIds: [],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
webSearchProviderIds: ["firecrawl"],
|
||||
toolNames: ["firecrawl_search", "firecrawl_scrape"],
|
||||
});
|
||||
@@ -157,16 +184,19 @@ describe("plugin contract registry", () => {
|
||||
providerIds: ["openai", "openai-codex"],
|
||||
speechProviderIds: ["openai"],
|
||||
mediaUnderstandingProviderIds: ["openai"],
|
||||
imageGenerationProviderIds: ["openai"],
|
||||
});
|
||||
expect(findRegistrationForPlugin("elevenlabs")).toMatchObject({
|
||||
providerIds: [],
|
||||
speechProviderIds: ["elevenlabs"],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
});
|
||||
expect(findRegistrationForPlugin("microsoft")).toMatchObject({
|
||||
providerIds: [],
|
||||
speechProviderIds: ["microsoft"],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -213,4 +243,10 @@ describe("plugin contract registry", () => {
|
||||
expect.any(Function),
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps bundled image-generation support explicit", () => {
|
||||
expect(findImageGenerationProviderForPlugin("openai").generateImage).toEqual(
|
||||
expect.any(Function),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -37,6 +37,7 @@ import xiaomiPlugin from "../../../extensions/xiaomi/index.js";
|
||||
import zaiPlugin from "../../../extensions/zai/index.js";
|
||||
import { createCapturedPluginRegistration } from "../captured-registration.js";
|
||||
import type {
|
||||
ImageGenerationProviderPlugin,
|
||||
MediaUnderstandingProviderPlugin,
|
||||
ProviderPlugin,
|
||||
SpeechProviderPlugin,
|
||||
@@ -62,12 +63,14 @@ type WebSearchProviderContractEntry = CapabilityContractEntry<WebSearchProviderP
|
||||
type SpeechProviderContractEntry = CapabilityContractEntry<SpeechProviderPlugin>;
|
||||
type MediaUnderstandingProviderContractEntry =
|
||||
CapabilityContractEntry<MediaUnderstandingProviderPlugin>;
|
||||
type ImageGenerationProviderContractEntry = CapabilityContractEntry<ImageGenerationProviderPlugin>;
|
||||
|
||||
type PluginRegistrationContractEntry = {
|
||||
pluginId: string;
|
||||
providerIds: string[];
|
||||
speechProviderIds: string[];
|
||||
mediaUnderstandingProviderIds: string[];
|
||||
imageGenerationProviderIds: string[];
|
||||
webSearchProviderIds: string[];
|
||||
toolNames: string[];
|
||||
};
|
||||
@@ -128,6 +131,8 @@ const bundledMediaUnderstandingPlugins: RegistrablePlugin[] = [
|
||||
zaiPlugin,
|
||||
];
|
||||
|
||||
const bundledImageGenerationPlugins: RegistrablePlugin[] = [openAIPlugin];
|
||||
|
||||
function captureRegistrations(plugin: RegistrablePlugin) {
|
||||
const captured = createCapturedPluginRegistration();
|
||||
plugin.register(captured.api);
|
||||
@@ -207,12 +212,19 @@ export const mediaUnderstandingProviderContractRegistry: MediaUnderstandingProvi
|
||||
select: (captured) => captured.mediaUnderstandingProviders,
|
||||
});
|
||||
|
||||
export const imageGenerationProviderContractRegistry: ImageGenerationProviderContractEntry[] =
|
||||
buildCapabilityContractRegistry({
|
||||
plugins: bundledImageGenerationPlugins,
|
||||
select: (captured) => captured.imageGenerationProviders,
|
||||
});
|
||||
|
||||
const bundledPluginRegistrationList = [
|
||||
...new Map(
|
||||
[
|
||||
...bundledProviderPlugins,
|
||||
...bundledSpeechPlugins,
|
||||
...bundledMediaUnderstandingPlugins,
|
||||
...bundledImageGenerationPlugins,
|
||||
...bundledWebSearchPlugins,
|
||||
].map((plugin) => [plugin.id, plugin]),
|
||||
).values(),
|
||||
@@ -228,6 +240,7 @@ export const pluginRegistrationContractRegistry: PluginRegistrationContractEntry
|
||||
mediaUnderstandingProviderIds: captured.mediaUnderstandingProviders.map(
|
||||
(provider) => provider.id,
|
||||
),
|
||||
imageGenerationProviderIds: captured.imageGenerationProviders.map((provider) => provider.id),
|
||||
webSearchProviderIds: captured.webSearchProviders.map((provider) => provider.id),
|
||||
toolNames: captured.tools.map((tool) => tool.name),
|
||||
};
|
||||
|
||||
@@ -19,6 +19,7 @@ export function createMockPluginRegistry(
|
||||
providerIds: [],
|
||||
speechProviderIds: [],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
webSearchProviderIds: [],
|
||||
gatewayMethods: [],
|
||||
cliCommands: [],
|
||||
@@ -43,6 +44,7 @@ export function createMockPluginRegistry(
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
httpRoutes: [],
|
||||
gatewayHandlers: {},
|
||||
|
||||
@@ -497,6 +497,7 @@ function createPluginRecord(params: {
|
||||
providerIds: [],
|
||||
speechProviderIds: [],
|
||||
mediaUnderstandingProviderIds: [],
|
||||
imageGenerationProviderIds: [],
|
||||
webSearchProviderIds: [],
|
||||
gatewayMethods: [],
|
||||
cliCommands: [],
|
||||
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
stripPromptMutationFieldsFromLegacyHookResult,
|
||||
} from "./types.js";
|
||||
import type {
|
||||
ImageGenerationProviderPlugin,
|
||||
OpenClawPluginApi,
|
||||
OpenClawPluginChannelRegistration,
|
||||
OpenClawPluginCliRegistrar,
|
||||
@@ -116,6 +117,8 @@ export type PluginSpeechProviderRegistration =
|
||||
PluginOwnedProviderRegistration<SpeechProviderPlugin>;
|
||||
export type PluginMediaUnderstandingProviderRegistration =
|
||||
PluginOwnedProviderRegistration<MediaUnderstandingProviderPlugin>;
|
||||
export type PluginImageGenerationProviderRegistration =
|
||||
PluginOwnedProviderRegistration<ImageGenerationProviderPlugin>;
|
||||
export type PluginWebSearchProviderRegistration =
|
||||
PluginOwnedProviderRegistration<WebSearchProviderPlugin>;
|
||||
|
||||
@@ -165,6 +168,7 @@ export type PluginRecord = {
|
||||
providerIds: string[];
|
||||
speechProviderIds: string[];
|
||||
mediaUnderstandingProviderIds: string[];
|
||||
imageGenerationProviderIds: string[];
|
||||
webSearchProviderIds: string[];
|
||||
gatewayMethods: string[];
|
||||
cliCommands: string[];
|
||||
@@ -187,6 +191,7 @@ export type PluginRegistry = {
|
||||
providers: PluginProviderRegistration[];
|
||||
speechProviders: PluginSpeechProviderRegistration[];
|
||||
mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[];
|
||||
imageGenerationProviders: PluginImageGenerationProviderRegistration[];
|
||||
webSearchProviders: PluginWebSearchProviderRegistration[];
|
||||
gatewayHandlers: GatewayRequestHandlers;
|
||||
httpRoutes: PluginHttpRouteRegistration[];
|
||||
@@ -234,6 +239,7 @@ export function createEmptyPluginRegistry(): PluginRegistry {
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
gatewayHandlers: {},
|
||||
httpRoutes: [],
|
||||
@@ -631,6 +637,19 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
|
||||
});
|
||||
};
|
||||
|
||||
const registerImageGenerationProvider = (
|
||||
record: PluginRecord,
|
||||
provider: ImageGenerationProviderPlugin,
|
||||
) => {
|
||||
registerUniqueProviderLike({
|
||||
record,
|
||||
provider,
|
||||
kindLabel: "image-generation provider",
|
||||
registrations: registry.imageGenerationProviders,
|
||||
ownedIds: record.imageGenerationProviderIds,
|
||||
});
|
||||
};
|
||||
|
||||
const registerWebSearchProvider = (record: PluginRecord, provider: WebSearchProviderPlugin) => {
|
||||
registerUniqueProviderLike({
|
||||
record,
|
||||
@@ -857,6 +876,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
|
||||
registrationMode === "full"
|
||||
? (provider) => registerMediaUnderstandingProvider(record, provider)
|
||||
: () => {},
|
||||
registerImageGenerationProvider:
|
||||
registrationMode === "full"
|
||||
? (provider) => registerImageGenerationProvider(record, provider)
|
||||
: () => {},
|
||||
registerWebSearchProvider:
|
||||
registrationMode === "full"
|
||||
? (provider) => registerWebSearchProvider(record, provider)
|
||||
@@ -932,6 +955,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) {
|
||||
registerProvider,
|
||||
registerSpeechProvider,
|
||||
registerMediaUnderstandingProvider,
|
||||
registerImageGenerationProvider,
|
||||
registerWebSearchProvider,
|
||||
registerGatewayMethod,
|
||||
registerCli,
|
||||
|
||||
@@ -59,10 +59,17 @@ describe("plugin runtime command execution", () => {
|
||||
const runtime = createPluginRuntime();
|
||||
expect(typeof runtime.mediaUnderstanding.runFile).toBe("function");
|
||||
expect(typeof runtime.mediaUnderstanding.describeImageFile).toBe("function");
|
||||
expect(typeof runtime.mediaUnderstanding.describeImageFileWithModel).toBe("function");
|
||||
expect(typeof runtime.mediaUnderstanding.describeVideoFile).toBe("function");
|
||||
expect(runtime.mediaUnderstanding.transcribeAudioFile).toBe(runtime.stt.transcribeAudioFile);
|
||||
});
|
||||
|
||||
it("exposes runtime.imageGeneration helpers", () => {
|
||||
const runtime = createPluginRuntime();
|
||||
expect(typeof runtime.imageGeneration.generate).toBe("function");
|
||||
expect(typeof runtime.imageGeneration.listProviders).toBe("function");
|
||||
});
|
||||
|
||||
it("exposes runtime.webSearch helpers", () => {
|
||||
const runtime = createPluginRuntime();
|
||||
expect(typeof runtime.webSearch.listProviders).toBe("function");
|
||||
|
||||
@@ -4,13 +4,18 @@ import {
|
||||
resolveApiKeyForProvider as resolveApiKeyForProviderRaw,
|
||||
} from "../../agents/model-auth.js";
|
||||
import { resolveStateDir } from "../../config/paths.js";
|
||||
import {
|
||||
generateImage,
|
||||
listRuntimeImageGenerationProviders,
|
||||
} from "../../image-generation/runtime.js";
|
||||
import {
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
describeVideoFile,
|
||||
runMediaUnderstandingFile,
|
||||
transcribeAudioFile,
|
||||
} from "../../media-understanding/runtime.js";
|
||||
import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/tts.js";
|
||||
import { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../../tts/runtime.js";
|
||||
import { listWebSearchProviders, runWebSearch } from "../../web-search/runtime.js";
|
||||
import { createRuntimeAgent } from "./runtime-agent.js";
|
||||
import { createRuntimeChannel } from "./runtime-channel.js";
|
||||
@@ -145,9 +150,14 @@ export function createPluginRuntime(_options: CreatePluginRuntimeOptions = {}):
|
||||
mediaUnderstanding: {
|
||||
runFile: runMediaUnderstandingFile,
|
||||
describeImageFile,
|
||||
describeImageFileWithModel,
|
||||
describeVideoFile,
|
||||
transcribeAudioFile,
|
||||
},
|
||||
imageGeneration: {
|
||||
generate: generateImage,
|
||||
listProviders: listRuntimeImageGenerationProviders,
|
||||
},
|
||||
webSearch: {
|
||||
listProviders: listWebSearchProviders,
|
||||
search: runWebSearch,
|
||||
|
||||
@@ -47,16 +47,21 @@ export type PluginRuntimeCore = {
|
||||
resizeToJpeg: typeof import("../../media/image-ops.js").resizeToJpeg;
|
||||
};
|
||||
tts: {
|
||||
textToSpeech: typeof import("../../tts/tts.js").textToSpeech;
|
||||
textToSpeechTelephony: typeof import("../../tts/tts.js").textToSpeechTelephony;
|
||||
listVoices: typeof import("../../tts/tts.js").listSpeechVoices;
|
||||
textToSpeech: typeof import("../../tts/runtime.js").textToSpeech;
|
||||
textToSpeechTelephony: typeof import("../../tts/runtime.js").textToSpeechTelephony;
|
||||
listVoices: typeof import("../../tts/runtime.js").listSpeechVoices;
|
||||
};
|
||||
mediaUnderstanding: {
|
||||
runFile: typeof import("../../media-understanding/runtime.js").runMediaUnderstandingFile;
|
||||
describeImageFile: typeof import("../../media-understanding/runtime.js").describeImageFile;
|
||||
describeImageFileWithModel: typeof import("../../media-understanding/runtime.js").describeImageFileWithModel;
|
||||
describeVideoFile: typeof import("../../media-understanding/runtime.js").describeVideoFile;
|
||||
transcribeAudioFile: typeof import("../../media-understanding/runtime.js").transcribeAudioFile;
|
||||
};
|
||||
imageGeneration: {
|
||||
generate: typeof import("../../image-generation/runtime.js").generateImage;
|
||||
listProviders: typeof import("../../image-generation/runtime.js").listRuntimeImageGenerationProviders;
|
||||
};
|
||||
webSearch: {
|
||||
listProviders: typeof import("../../web-search/runtime.js").listWebSearchProviders;
|
||||
search: typeof import("../../web-search/runtime.js").runWebSearch;
|
||||
|
||||
@@ -22,6 +22,7 @@ import type { ModelProviderConfig } from "../config/types.js";
|
||||
import type { GatewayRequestHandler } from "../gateway/server-methods/types.js";
|
||||
import type { InternalHookHandler } from "../hooks/internal-hooks.js";
|
||||
import type { HookEntry } from "../hooks/types.js";
|
||||
import type { ImageGenerationProvider } from "../image-generation/types.js";
|
||||
import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js";
|
||||
import type { MediaUnderstandingProvider } from "../media-understanding/types.js";
|
||||
import type { RuntimeEnv } from "../runtime.js";
|
||||
@@ -890,6 +891,7 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & {
|
||||
};
|
||||
|
||||
export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider;
|
||||
export type ImageGenerationProviderPlugin = ImageGenerationProvider;
|
||||
|
||||
export type OpenClawPluginGatewayMethod = {
|
||||
method: string;
|
||||
@@ -1251,6 +1253,7 @@ export type OpenClawPluginApi = {
|
||||
registerProvider: (provider: ProviderPlugin) => void;
|
||||
registerSpeechProvider: (provider: SpeechProviderPlugin) => void;
|
||||
registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void;
|
||||
registerImageGenerationProvider: (provider: ImageGenerationProviderPlugin) => void;
|
||||
registerWebSearchProvider: (provider: WebSearchProviderPlugin) => void;
|
||||
registerInteractiveHandler: (registration: PluginInteractiveHandlerRegistration) => void;
|
||||
/**
|
||||
|
||||
@@ -28,6 +28,7 @@ export const createTestRegistry = (channels: TestChannelRegistration[] = []): Pl
|
||||
providers: [],
|
||||
speechProviders: [],
|
||||
mediaUnderstandingProviders: [],
|
||||
imageGenerationProviders: [],
|
||||
webSearchProviders: [],
|
||||
gatewayHandlers: {},
|
||||
httpRoutes: [],
|
||||
|
||||
4
src/tts/runtime.ts
Normal file
4
src/tts/runtime.ts
Normal file
@@ -0,0 +1,4 @@
|
||||
// Shared runtime-facing speech helpers. Keep channel/feature plugins on this
|
||||
// boundary instead of importing the full TTS orchestrator module directly.
|
||||
|
||||
export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "./tts.js";
|
||||
Reference in New Issue
Block a user