diff --git a/extensions/anthropic/index.ts b/extensions/anthropic/index.ts index a2491dfbd87..aad11b99a5b 100644 --- a/extensions/anthropic/index.ts +++ b/extensions/anthropic/index.ts @@ -23,6 +23,7 @@ import { import { buildTokenProfileId, validateAnthropicSetupToken } from "../../src/commands/auth-token.js"; import { applyAuthProfileConfig } from "../../src/commands/onboard-auth.js"; import { fetchClaudeUsage } from "../../src/infra/provider-usage.fetch.js"; +import { anthropicProvider } from "../../src/media-understanding/providers/anthropic/index.js"; import { createProviderApiKeyAuthMethod } from "../../src/plugins/provider-api-key-auth.js"; import type { ProviderAuthResult } from "../../src/plugins/types.js"; import { normalizeSecretInput } from "../../src/utils/normalize-secret-input.js"; @@ -394,6 +395,7 @@ const anthropicPlugin = { profileId: ctx.profileId, }), }); + api.registerMediaUnderstandingProvider(anthropicProvider); }, }; diff --git a/extensions/google/index.ts b/extensions/google/index.ts index 59d417e9349..177de77e49d 100644 --- a/extensions/google/index.ts +++ b/extensions/google/index.ts @@ -7,6 +7,7 @@ import { GOOGLE_GEMINI_DEFAULT_MODEL, applyGoogleGeminiModelDefault, } from "../../src/commands/google-gemini-model-default.js"; +import { googleProvider } from "../../src/media-understanding/providers/google/index.js"; import { emptyPluginConfigSchema } from "../../src/plugins/config-schema.js"; import { createProviderApiKeyAuthMethod } from "../../src/plugins/provider-api-key-auth.js"; import type { OpenClawPluginApi } from "../../src/plugins/types.js"; @@ -51,6 +52,7 @@ const googlePlugin = { isModernModelRef: ({ modelId }) => isModernGoogleModel(modelId), }); registerGoogleGeminiCliProvider(api); + api.registerMediaUnderstandingProvider(googleProvider); api.registerWebSearchProvider( createPluginBackedWebSearchProvider({ id: "gemini", diff --git a/extensions/lobster/src/lobster-tool.test.ts b/extensions/lobster/src/lobster-tool.test.ts index 0ed5c0eda97..cba95624f07 100644 --- a/extensions/lobster/src/lobster-tool.test.ts +++ b/extensions/lobster/src/lobster-tool.test.ts @@ -45,6 +45,7 @@ function fakeApi(overrides: Partial = {}): OpenClawPluginApi registerService() {}, registerProvider() {}, registerSpeechProvider() {}, + registerMediaUnderstandingProvider() {}, registerWebSearchProvider() {}, registerInteractiveHandler() {}, registerHook() {}, diff --git a/extensions/minimax/index.ts b/extensions/minimax/index.ts index 9330e9c4651..8325f6bb078 100644 --- a/extensions/minimax/index.ts +++ b/extensions/minimax/index.ts @@ -9,6 +9,10 @@ import { import { ensureAuthProfileStore, listProfilesForProvider } from "../../src/agents/auth-profiles.js"; import { MINIMAX_OAUTH_MARKER } from "../../src/agents/model-auth-markers.js"; import { fetchMinimaxUsage } from "../../src/infra/provider-usage.fetch.js"; +import { + minimaxPortalProvider, + minimaxProvider, +} from "../../src/media-understanding/providers/minimax/index.js"; import { createProviderApiKeyAuthMethod } from "../../src/plugins/provider-api-key-auth.js"; import { loginMiniMaxPortalOAuth, type MiniMaxRegion } from "./oauth.js"; import { applyMinimaxApiConfig, applyMinimaxApiConfigCn } from "./onboard.js"; @@ -270,6 +274,8 @@ const minimaxPlugin = { ], isModernModelRef: ({ modelId }) => isModernMiniMaxModel(modelId), }); + api.registerMediaUnderstandingProvider(minimaxProvider); + api.registerMediaUnderstandingProvider(minimaxPortalProvider); }, }; diff --git a/extensions/mistral/index.ts b/extensions/mistral/index.ts index 10211480a29..7e252281555 100644 --- a/extensions/mistral/index.ts +++ b/extensions/mistral/index.ts @@ -1,4 +1,5 @@ import { emptyPluginConfigSchema, type OpenClawPluginApi } from "openclaw/plugin-sdk/core"; +import { mistralProvider } from "../../src/media-understanding/providers/mistral/index.js"; import { createProviderApiKeyAuthMethod } from "../../src/plugins/provider-api-key-auth.js"; import { applyMistralConfig, MISTRAL_DEFAULT_MODEL_REF } from "./onboard.js"; @@ -50,6 +51,7 @@ const mistralPlugin = { ], }, }); + api.registerMediaUnderstandingProvider(mistralProvider); }, }; diff --git a/extensions/moonshot/index.ts b/extensions/moonshot/index.ts index 09605ccff85..5cf18d96d8b 100644 --- a/extensions/moonshot/index.ts +++ b/extensions/moonshot/index.ts @@ -7,6 +7,7 @@ import { getScopedCredentialValue, setScopedCredentialValue, } from "../../src/agents/tools/web-search-plugin-factory.js"; +import { moonshotProvider } from "../../src/media-understanding/providers/moonshot/index.js"; import { emptyPluginConfigSchema } from "../../src/plugins/config-schema.js"; import { createProviderApiKeyAuthMethod } from "../../src/plugins/provider-api-key-auth.js"; import type { OpenClawPluginApi } from "../../src/plugins/types.js"; @@ -99,6 +100,7 @@ const moonshotPlugin = { return createMoonshotThinkingWrapper(ctx.streamFn, thinkingType); }, }); + api.registerMediaUnderstandingProvider(moonshotProvider); api.registerWebSearchProvider( createPluginBackedWebSearchProvider({ id: "kimi", diff --git a/extensions/openai/index.ts b/extensions/openai/index.ts index cd528f72211..2fd57473693 100644 --- a/extensions/openai/index.ts +++ b/extensions/openai/index.ts @@ -1,4 +1,5 @@ import { emptyPluginConfigSchema, type OpenClawPluginApi } from "openclaw/plugin-sdk/core"; +import { openaiProvider } from "../../src/media-understanding/providers/openai/index.js"; import { buildOpenAISpeechProvider } from "../../src/tts/providers/openai.js"; import { buildOpenAICodexProviderPlugin } from "./openai-codex-provider.js"; import { buildOpenAIProvider } from "./openai-provider.js"; @@ -12,6 +13,7 @@ const openAIPlugin = { api.registerProvider(buildOpenAIProvider()); api.registerProvider(buildOpenAICodexProviderPlugin()); api.registerSpeechProvider(buildOpenAISpeechProvider()); + api.registerMediaUnderstandingProvider(openaiProvider); }, }; diff --git a/extensions/test-utils/plugin-api.ts b/extensions/test-utils/plugin-api.ts index 281e151aeb7..82fe818fdec 100644 --- a/extensions/test-utils/plugin-api.ts +++ b/extensions/test-utils/plugin-api.ts @@ -16,6 +16,7 @@ export function createTestPluginApi(api: TestPluginApiInput): OpenClawPluginApi registerService() {}, registerProvider() {}, registerSpeechProvider() {}, + registerMediaUnderstandingProvider() {}, registerWebSearchProvider() {}, registerInteractiveHandler() {}, registerCommand() {}, diff --git a/extensions/zai/index.ts b/extensions/zai/index.ts index aee000ec412..f38058dd9e9 100644 --- a/extensions/zai/index.ts +++ b/extensions/zai/index.ts @@ -24,6 +24,7 @@ import { applyAuthProfileConfig } from "../../src/commands/onboard-auth.js"; import type { SecretInput } from "../../src/config/types.secrets.js"; import { resolveRequiredHomeDir } from "../../src/infra/home-dir.js"; import { fetchZaiUsage } from "../../src/infra/provider-usage.fetch.js"; +import { zaiProvider } from "../../src/media-understanding/providers/zai/index.js"; import { normalizeOptionalSecretInput } from "../../src/utils/normalize-secret-input.js"; import { detectZaiEndpoint, type ZaiEndpointId } from "./detect.js"; import { applyZaiConfig, applyZaiProviderConfig, ZAI_DEFAULT_MODEL_REF } from "./onboard.js"; @@ -334,6 +335,7 @@ const zaiPlugin = { fetchUsageSnapshot: async (ctx) => await fetchZaiUsage(ctx.token, ctx.timeoutMs, ctx.fetchFn), isCacheTtlEligible: () => true, }); + api.registerMediaUnderstandingProvider(zaiProvider); }, }; diff --git a/src/auto-reply/reply/route-reply.test.ts b/src/auto-reply/reply/route-reply.test.ts index 5bf5f5c2cec..4c5dd7be889 100644 --- a/src/auto-reply/reply/route-reply.test.ts +++ b/src/auto-reply/reply/route-reply.test.ts @@ -92,6 +92,7 @@ const createRegistry = (channels: PluginRegistry["channels"]): PluginRegistry => })), providers: [], speechProviders: [], + mediaUnderstandingProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/commands/channel-setup/plugin-install.test.ts b/src/commands/channel-setup/plugin-install.test.ts index 5ad6399fa4a..96ca60e2197 100644 --- a/src/commands/channel-setup/plugin-install.test.ts +++ b/src/commands/channel-setup/plugin-install.test.ts @@ -338,6 +338,7 @@ describe("ensureChannelSetupPluginInstalled", () => { channelIds: [], providerIds: [], speechProviderIds: [], + mediaUnderstandingProviderIds: [], webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], diff --git a/src/gateway/server-plugins.test.ts b/src/gateway/server-plugins.test.ts index 58f5c9da4eb..184cb706762 100644 --- a/src/gateway/server-plugins.test.ts +++ b/src/gateway/server-plugins.test.ts @@ -30,6 +30,7 @@ const createRegistry = (diagnostics: PluginDiagnostic[]): PluginRegistry => ({ commands: [], providers: [], speechProviders: [], + mediaUnderstandingProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/gateway/test-helpers.mocks.ts b/src/gateway/test-helpers.mocks.ts index e05fcc85320..3617bc896bd 100644 --- a/src/gateway/test-helpers.mocks.ts +++ b/src/gateway/test-helpers.mocks.ts @@ -147,6 +147,7 @@ const createStubPluginRegistry = (): PluginRegistry => ({ channelSetups: [], providers: [], speechProviders: [], + mediaUnderstandingProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/media-understanding/providers/index.test.ts b/src/media-understanding/providers/index.test.ts index 9294d44acd5..3441b3a9a25 100644 --- a/src/media-understanding/providers/index.test.ts +++ b/src/media-understanding/providers/index.test.ts @@ -1,7 +1,13 @@ -import { describe, expect, it } from "vitest"; +import { afterEach, describe, expect, it } from "vitest"; +import { createEmptyPluginRegistry } from "../../plugins/registry.js"; +import { setActivePluginRegistry } from "../../plugins/runtime.js"; import { buildMediaUnderstandingRegistry, getMediaUnderstandingProvider } from "./index.js"; describe("media-understanding provider registry", () => { + afterEach(() => { + setActivePluginRegistry(createEmptyPluginRegistry()); + }); + it("registers the Mistral provider", () => { const registry = buildMediaUnderstandingRegistry(); const provider = getMediaUnderstandingProvider("mistral", registry); @@ -32,4 +38,27 @@ describe("media-understanding provider registry", () => { expect(provider?.id).toBe("minimax-portal"); expect(provider?.capabilities).toEqual(["image"]); }); + + it("merges plugin-registered media providers into the active registry", async () => { + const pluginRegistry = createEmptyPluginRegistry(); + pluginRegistry.mediaUnderstandingProviders.push({ + pluginId: "google", + pluginName: "Google Plugin", + source: "test", + provider: { + id: "google", + capabilities: ["image", "audio", "video"], + describeImage: async () => ({ text: "plugin image" }), + transcribeAudio: async () => ({ text: "plugin audio" }), + describeVideo: async () => ({ text: "plugin video" }), + }, + }); + setActivePluginRegistry(pluginRegistry); + + const registry = buildMediaUnderstandingRegistry(); + const provider = getMediaUnderstandingProvider("gemini", registry); + + expect(provider?.id).toBe("google"); + expect(await provider?.describeVideo?.({} as never)).toEqual({ text: "plugin video" }); + }); }); diff --git a/src/media-understanding/providers/index.ts b/src/media-understanding/providers/index.ts index 0ceaa78fd80..6c2e484dbe5 100644 --- a/src/media-understanding/providers/index.ts +++ b/src/media-understanding/providers/index.ts @@ -1,4 +1,5 @@ import { normalizeProviderId } from "../../agents/model-selection.js"; +import { getActivePluginRegistry } from "../../plugins/runtime.js"; import type { MediaUnderstandingProvider } from "../types.js"; import { anthropicProvider } from "./anthropic/index.js"; import { deepgramProvider } from "./deepgram/index.js"; @@ -23,6 +24,22 @@ const PROVIDERS: MediaUnderstandingProvider[] = [ deepgramProvider, ]; +function mergeProviderIntoRegistry( + registry: Map, + provider: MediaUnderstandingProvider, +) { + const normalizedKey = normalizeMediaProviderId(provider.id); + const existing = registry.get(normalizedKey); + const merged = existing + ? { + ...existing, + ...provider, + capabilities: provider.capabilities ?? existing.capabilities, + } + : provider; + registry.set(normalizedKey, merged); +} + export function normalizeMediaProviderId(id: string): string { const normalized = normalizeProviderId(id); if (normalized === "gemini") { @@ -36,7 +53,10 @@ export function buildMediaUnderstandingRegistry( ): Map { const registry = new Map(); for (const provider of PROVIDERS) { - registry.set(normalizeMediaProviderId(provider.id), provider); + mergeProviderIntoRegistry(registry, provider); + } + for (const entry of getActivePluginRegistry()?.mediaUnderstandingProviders ?? []) { + mergeProviderIntoRegistry(registry, entry.provider); } if (overrides) { for (const [key, provider] of Object.entries(overrides)) { diff --git a/src/plugin-sdk/core.ts b/src/plugin-sdk/core.ts index 00621521067..13b075e3352 100644 --- a/src/plugin-sdk/core.ts +++ b/src/plugin-sdk/core.ts @@ -1,5 +1,6 @@ export type { AnyAgentTool, + MediaUnderstandingProviderPlugin, OpenClawPluginConfigSchema, ProviderDiscoveryContext, ProviderCatalogContext, diff --git a/src/plugin-sdk/index.ts b/src/plugin-sdk/index.ts index 1e78ee1c7e2..c5ba9d90541 100644 --- a/src/plugin-sdk/index.ts +++ b/src/plugin-sdk/index.ts @@ -108,6 +108,7 @@ export { ACP_ERROR_CODES, AcpRuntimeError } from "../acp/runtime/errors.js"; export type { AcpRuntimeErrorCode } from "../acp/runtime/errors.js"; export type { AnyAgentTool, + MediaUnderstandingProviderPlugin, OpenClawPluginConfigSchema, OpenClawPluginApi, OpenClawPluginService, diff --git a/src/plugins/contracts/loader.contract.test.ts b/src/plugins/contracts/loader.contract.test.ts index a42c24712ec..874a94a0b5e 100644 --- a/src/plugins/contracts/loader.contract.test.ts +++ b/src/plugins/contracts/loader.contract.test.ts @@ -19,6 +19,7 @@ describe("plugin loader contract", () => { loadOpenClawPluginsMock.mockReset(); loadOpenClawPluginsMock.mockReturnValue({ providers: [], + mediaUnderstandingProviders: [], webSearchProviders: [], }); }); diff --git a/src/plugins/contracts/registry.contract.test.ts b/src/plugins/contracts/registry.contract.test.ts index 48da6c3d9a1..06430449808 100644 --- a/src/plugins/contracts/registry.contract.test.ts +++ b/src/plugins/contracts/registry.contract.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "vitest"; import { + mediaUnderstandingProviderContractRegistry, pluginRegistrationContractRegistry, providerContractRegistry, speechProviderContractRegistry, @@ -35,6 +36,13 @@ function findSpeechProviderForPlugin(pluginId: string) { return entry.provider; } +function findMediaUnderstandingProviderIdsForPlugin(pluginId: string) { + return mediaUnderstandingProviderContractRegistry + .filter((entry) => entry.pluginId === pluginId) + .map((entry) => entry.provider.id) + .toSorted((left, right) => left.localeCompare(right)); +} + function findRegistrationForPlugin(pluginId: string) { const entry = pluginRegistrationContractRegistry.find( (candidate) => candidate.pluginId === pluginId, @@ -61,6 +69,11 @@ describe("plugin contract registry", () => { expect(ids).toEqual([...new Set(ids)]); }); + it("does not duplicate bundled media provider ids", () => { + const ids = mediaUnderstandingProviderContractRegistry.map((entry) => entry.provider.id); + expect(ids).toEqual([...new Set(ids)]); + }); + it("keeps multi-provider plugin ownership explicit", () => { expect(findProviderIdsForPlugin("google")).toEqual(["google", "google-gemini-cli"]); expect(findProviderIdsForPlugin("minimax")).toEqual(["minimax", "minimax-portal"]); @@ -82,10 +95,24 @@ describe("plugin contract registry", () => { expect(findSpeechProviderIdsForPlugin("openai")).toEqual(["openai"]); }); + it("keeps bundled media-understanding ownership explicit", () => { + expect(findMediaUnderstandingProviderIdsForPlugin("anthropic")).toEqual(["anthropic"]); + expect(findMediaUnderstandingProviderIdsForPlugin("google")).toEqual(["google"]); + expect(findMediaUnderstandingProviderIdsForPlugin("minimax")).toEqual([ + "minimax", + "minimax-portal", + ]); + expect(findMediaUnderstandingProviderIdsForPlugin("mistral")).toEqual(["mistral"]); + expect(findMediaUnderstandingProviderIdsForPlugin("moonshot")).toEqual(["moonshot"]); + expect(findMediaUnderstandingProviderIdsForPlugin("openai")).toEqual(["openai"]); + expect(findMediaUnderstandingProviderIdsForPlugin("zai")).toEqual(["zai"]); + }); + it("keeps bundled provider and web search tool ownership explicit", () => { expect(findRegistrationForPlugin("firecrawl")).toMatchObject({ providerIds: [], speechProviderIds: [], + mediaUnderstandingProviderIds: [], webSearchProviderIds: ["firecrawl"], toolNames: ["firecrawl_search", "firecrawl_scrape"], }); @@ -95,14 +122,17 @@ describe("plugin contract registry", () => { expect(findRegistrationForPlugin("openai")).toMatchObject({ providerIds: ["openai", "openai-codex"], speechProviderIds: ["openai"], + mediaUnderstandingProviderIds: ["openai"], }); expect(findRegistrationForPlugin("elevenlabs")).toMatchObject({ providerIds: [], speechProviderIds: ["elevenlabs"], + mediaUnderstandingProviderIds: [], }); expect(findRegistrationForPlugin("microsoft")).toMatchObject({ providerIds: [], speechProviderIds: ["microsoft"], + mediaUnderstandingProviderIds: [], }); }); diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts index 1dc997d7b2e..14dbb17262c 100644 --- a/src/plugins/contracts/registry.ts +++ b/src/plugins/contracts/registry.ts @@ -35,7 +35,12 @@ import xaiPlugin from "../../../extensions/xai/index.js"; import xiaomiPlugin from "../../../extensions/xiaomi/index.js"; import zaiPlugin from "../../../extensions/zai/index.js"; import { createCapturedPluginRegistration } from "../../test-utils/plugin-registration.js"; -import type { ProviderPlugin, SpeechProviderPlugin, WebSearchProviderPlugin } from "../types.js"; +import type { + MediaUnderstandingProviderPlugin, + ProviderPlugin, + SpeechProviderPlugin, + WebSearchProviderPlugin, +} from "../types.js"; type RegistrablePlugin = { id: string; @@ -58,10 +63,16 @@ type SpeechProviderContractEntry = { provider: SpeechProviderPlugin; }; +type MediaUnderstandingProviderContractEntry = { + pluginId: string; + provider: MediaUnderstandingProviderPlugin; +}; + type PluginRegistrationContractEntry = { pluginId: string; providerIds: string[]; speechProviderIds: string[]; + mediaUnderstandingProviderIds: string[]; webSearchProviderIds: string[]; toolNames: string[]; }; @@ -111,6 +122,16 @@ const bundledWebSearchPlugins: Array { + const captured = captureRegistrations(plugin); + return captured.mediaUnderstandingProviders.map((provider) => ({ + pluginId: plugin.id, + provider, + })); + }); + const bundledPluginRegistrationList = [ ...new Map( - [...bundledProviderPlugins, ...bundledSpeechPlugins, ...bundledWebSearchPlugins].map( - (plugin) => [plugin.id, plugin], - ), + [ + ...bundledProviderPlugins, + ...bundledSpeechPlugins, + ...bundledMediaUnderstandingPlugins, + ...bundledWebSearchPlugins, + ].map((plugin) => [plugin.id, plugin]), ).values(), ]; @@ -161,6 +194,9 @@ export const pluginRegistrationContractRegistry: PluginRegistrationContractEntry pluginId: plugin.id, providerIds: captured.providers.map((provider) => provider.id), speechProviderIds: captured.speechProviders.map((provider) => provider.id), + mediaUnderstandingProviderIds: captured.mediaUnderstandingProviders.map( + (provider) => provider.id, + ), webSearchProviderIds: captured.webSearchProviders.map((provider) => provider.id), toolNames: captured.tools.map((tool) => tool.name), }; diff --git a/src/plugins/hooks.test-helpers.ts b/src/plugins/hooks.test-helpers.ts index 7954257e714..ea01163d4b0 100644 --- a/src/plugins/hooks.test-helpers.ts +++ b/src/plugins/hooks.test-helpers.ts @@ -17,6 +17,9 @@ export function createMockPluginRegistry( hookNames: [], channelIds: [], providerIds: [], + speechProviderIds: [], + mediaUnderstandingProviderIds: [], + webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], services: [], @@ -35,13 +38,18 @@ export function createMockPluginRegistry( source: "test", })), tools: [], + channels: [], + channelSetups: [], + providers: [], + speechProviders: [], + mediaUnderstandingProviders: [], + webSearchProviders: [], httpRoutes: [], - channelRegistrations: [], gatewayHandlers: {}, cliRegistrars: [], services: [], - providers: [], commands: [], + diagnostics: [], } as unknown as PluginRegistry; } diff --git a/src/plugins/loader.ts b/src/plugins/loader.ts index a2e05fc06b9..873fff6b9bf 100644 --- a/src/plugins/loader.ts +++ b/src/plugins/loader.ts @@ -495,6 +495,7 @@ function createPluginRecord(params: { channelIds: [], providerIds: [], speechProviderIds: [], + mediaUnderstandingProviderIds: [], webSearchProviderIds: [], gatewayMethods: [], cliCommands: [], diff --git a/src/plugins/registry.ts b/src/plugins/registry.ts index 231e6f267aa..bad444289ac 100644 --- a/src/plugins/registry.ts +++ b/src/plugins/registry.ts @@ -31,6 +31,7 @@ import type { OpenClawPluginHttpRouteHandler, OpenClawPluginHttpRouteParams, OpenClawPluginHookOptions, + MediaUnderstandingProviderPlugin, ProviderPlugin, OpenClawPluginService, OpenClawPluginToolContext, @@ -119,6 +120,14 @@ export type PluginSpeechProviderRegistration = { rootDir?: string; }; +export type PluginMediaUnderstandingProviderRegistration = { + pluginId: string; + pluginName?: string; + provider: MediaUnderstandingProviderPlugin; + source: string; + rootDir?: string; +}; + export type PluginHookRegistration = { pluginId: string; entry: HookEntry; @@ -164,6 +173,7 @@ export type PluginRecord = { channelIds: string[]; providerIds: string[]; speechProviderIds: string[]; + mediaUnderstandingProviderIds: string[]; webSearchProviderIds: string[]; gatewayMethods: string[]; cliCommands: string[]; @@ -185,6 +195,7 @@ export type PluginRegistry = { channelSetups: PluginChannelSetupRegistration[]; providers: PluginProviderRegistration[]; speechProviders: PluginSpeechProviderRegistration[]; + mediaUnderstandingProviders: PluginMediaUnderstandingProviderRegistration[]; webSearchProviders: PluginWebSearchProviderRegistration[]; gatewayHandlers: GatewayRequestHandlers; httpRoutes: PluginHttpRouteRegistration[]; @@ -231,6 +242,7 @@ export function createEmptyPluginRegistry(): PluginRegistry { channelSetups: [], providers: [], speechProviders: [], + mediaUnderstandingProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], @@ -593,6 +605,40 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { }); }; + const registerMediaUnderstandingProvider = ( + record: PluginRecord, + provider: MediaUnderstandingProviderPlugin, + ) => { + const id = provider.id.trim(); + if (!id) { + pushDiagnostic({ + level: "error", + pluginId: record.id, + source: record.source, + message: "media provider registration missing id", + }); + return; + } + const existing = registry.mediaUnderstandingProviders.find((entry) => entry.provider.id === id); + if (existing) { + pushDiagnostic({ + level: "error", + pluginId: record.id, + source: record.source, + message: `media provider already registered: ${id} (${existing.pluginId})`, + }); + return; + } + record.mediaUnderstandingProviderIds.push(id); + registry.mediaUnderstandingProviders.push({ + pluginId: record.id, + pluginName: record.name, + provider, + source: record.source, + rootDir: record.rootDir, + }); + }; + const registerWebSearchProvider = (record: PluginRecord, provider: WebSearchProviderPlugin) => { const id = provider.id.trim(); if (!id) { @@ -836,6 +882,10 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { registrationMode === "full" ? (provider) => registerSpeechProvider(record, provider) : () => {}, + registerMediaUnderstandingProvider: + registrationMode === "full" + ? (provider) => registerMediaUnderstandingProvider(record, provider) + : () => {}, registerWebSearchProvider: registrationMode === "full" ? (provider) => registerWebSearchProvider(record, provider) @@ -910,6 +960,7 @@ export function createPluginRegistry(registryParams: PluginRegistryParams) { registerChannel, registerProvider, registerSpeechProvider, + registerMediaUnderstandingProvider, registerWebSearchProvider, registerGatewayMethod, registerCli, diff --git a/src/plugins/types.ts b/src/plugins/types.ts index 0add5cdcf42..23e761940df 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -25,6 +25,7 @@ import type { GatewayRequestHandler } from "../gateway/server-methods/types.js"; import type { InternalHookHandler } from "../hooks/internal-hooks.js"; import type { HookEntry } from "../hooks/types.js"; import type { ProviderUsageSnapshot } from "../infra/provider-usage.types.js"; +import type { MediaUnderstandingProvider } from "../media-understanding/types.js"; import type { RuntimeEnv } from "../runtime.js"; import type { RuntimeWebSearchMetadata } from "../secrets/runtime-web-tools.types.js"; import type { @@ -881,6 +882,8 @@ export type PluginSpeechProviderEntry = SpeechProviderPlugin & { pluginId: string; }; +export type MediaUnderstandingProviderPlugin = MediaUnderstandingProvider; + export type OpenClawPluginGatewayMethod = { method: string; handler: GatewayRequestHandler; @@ -1240,6 +1243,7 @@ export type OpenClawPluginApi = { registerService: (service: OpenClawPluginService) => void; registerProvider: (provider: ProviderPlugin) => void; registerSpeechProvider: (provider: SpeechProviderPlugin) => void; + registerMediaUnderstandingProvider: (provider: MediaUnderstandingProviderPlugin) => void; registerWebSearchProvider: (provider: WebSearchProviderPlugin) => void; registerInteractiveHandler: (registration: PluginInteractiveHandlerRegistration) => void; /** diff --git a/src/test-utils/channel-plugins.ts b/src/test-utils/channel-plugins.ts index 588c1ca7db6..1283ac9f506 100644 --- a/src/test-utils/channel-plugins.ts +++ b/src/test-utils/channel-plugins.ts @@ -27,6 +27,7 @@ export const createTestRegistry = (channels: TestChannelRegistration[] = []): Pl })), providers: [], speechProviders: [], + mediaUnderstandingProviders: [], webSearchProviders: [], gatewayHandlers: {}, httpRoutes: [], diff --git a/src/test-utils/plugin-registration.ts b/src/test-utils/plugin-registration.ts index 6231dedf17b..de8e5422ccf 100644 --- a/src/test-utils/plugin-registration.ts +++ b/src/test-utils/plugin-registration.ts @@ -1,5 +1,6 @@ import type { AnyAgentTool, + MediaUnderstandingProviderPlugin, OpenClawPluginApi, ProviderPlugin, SpeechProviderPlugin, @@ -10,6 +11,7 @@ export type CapturedPluginRegistration = { api: OpenClawPluginApi; providers: ProviderPlugin[]; speechProviders: SpeechProviderPlugin[]; + mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[]; webSearchProviders: WebSearchProviderPlugin[]; tools: AnyAgentTool[]; }; @@ -17,12 +19,14 @@ export type CapturedPluginRegistration = { export function createCapturedPluginRegistration(): CapturedPluginRegistration { const providers: ProviderPlugin[] = []; const speechProviders: SpeechProviderPlugin[] = []; + const mediaUnderstandingProviders: MediaUnderstandingProviderPlugin[] = []; const webSearchProviders: WebSearchProviderPlugin[] = []; const tools: AnyAgentTool[] = []; return { providers, speechProviders, + mediaUnderstandingProviders, webSearchProviders, tools, api: { @@ -32,6 +36,9 @@ export function createCapturedPluginRegistration(): CapturedPluginRegistration { registerSpeechProvider(provider: SpeechProviderPlugin) { speechProviders.push(provider); }, + registerMediaUnderstandingProvider(provider: MediaUnderstandingProviderPlugin) { + mediaUnderstandingProviders.push(provider); + }, registerWebSearchProvider(provider: WebSearchProviderPlugin) { webSearchProviders.push(provider); },