diff --git a/extensions/ollama/index.test.ts b/extensions/ollama/index.test.ts index 8e74a32df6f..43a86220371 100644 --- a/extensions/ollama/index.test.ts +++ b/extensions/ollama/index.test.ts @@ -495,4 +495,42 @@ describe("ollama plugin", () => { expect(baseStreamFn).toHaveBeenCalledTimes(1); expect(payloadSeen?.think).toBeUndefined(); }); + + it("registers an image-capable media understanding provider so image tool can route ollama/*", () => { + const mediaProviders: Array<{ + id: string; + capabilities?: string[]; + defaultModels?: Record; + autoPriority?: Record; + describeImage?: unknown; + describeImages?: unknown; + }> = []; + + plugin.register( + createTestPluginApi({ + id: "ollama", + name: "Ollama", + source: "test", + config: {}, + pluginConfig: {}, + runtime: {} as never, + registerProvider() {}, + registerMediaUnderstandingProvider(provider) { + mediaProviders.push(provider); + }, + }), + ); + + expect(mediaProviders).toHaveLength(1); + const [ollamaMedia] = mediaProviders; + expect(ollamaMedia.id).toBe("ollama"); + expect(ollamaMedia.capabilities).toEqual(["image"]); + expect(typeof ollamaMedia.describeImage).toBe("function"); + expect(typeof ollamaMedia.describeImages).toBe("function"); + // Intentional: no defaultModels or autoPriority. Ollama vision models are + // user-installed (llava, qwen2.5vl, …) with no universal default, and we + // don't want Ollama to auto-steal image duty from configured providers. + expect(ollamaMedia.defaultModels).toBeUndefined(); + expect(ollamaMedia.autoPriority).toBeUndefined(); + }); }); diff --git a/extensions/ollama/index.ts b/extensions/ollama/index.ts index 4542bd80ec0..29d77050f40 100644 --- a/extensions/ollama/index.ts +++ b/extensions/ollama/index.ts @@ -25,6 +25,7 @@ import { DEFAULT_OLLAMA_EMBEDDING_MODEL, createOllamaEmbeddingProvider, } from "./src/embedding-provider.js"; +import { ollamaMediaUnderstandingProvider } from "./src/media-understanding-provider.js"; import { ollamaMemoryEmbeddingProviderAdapter } from "./src/memory-embedding-adapter.js"; import { createConfiguredOllamaCompatStreamWrapper, @@ -55,6 +56,7 @@ export default definePluginEntry({ description: "Bundled Ollama provider plugin", register(api: OpenClawPluginApi) { api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter); + api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider); const pluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig; api.registerWebSearchProvider(createOllamaWebSearchProvider()); api.registerProvider({ diff --git a/extensions/ollama/src/media-understanding-provider.ts b/extensions/ollama/src/media-understanding-provider.ts new file mode 100644 index 00000000000..307e70862d3 --- /dev/null +++ b/extensions/ollama/src/media-understanding-provider.ts @@ -0,0 +1,18 @@ +import { + describeImageWithModel, + describeImagesWithModel, + type MediaUnderstandingProvider, +} from "openclaw/plugin-sdk/media-understanding"; +import { OLLAMA_PROVIDER_ID } from "./discovery-shared.js"; + +// Ollama vision support depends on which models the user has pulled (llava, +// qwen2.5vl, llama3.2-vision, …) — there is no single canonical default. We +// register the provider so the image tool can route `ollama/` +// requests, but leave `defaultModels` and `autoPriority` unset so Ollama +// only participates when the user explicitly configures an image model. +export const ollamaMediaUnderstandingProvider: MediaUnderstandingProvider = { + id: OLLAMA_PROVIDER_ID, + capabilities: ["image"], + describeImage: describeImageWithModel, + describeImages: describeImagesWithModel, +};