From 9a22cd212b1e4a01d571680a329657f38f0dd54c Mon Sep 17 00:00:00 2001 From: soloclz Date: Wed, 22 Apr 2026 02:47:39 +0800 Subject: [PATCH] fix(ollama): register media-understanding provider so image tool can route ollama/* models Ollama chat models already support image inputs (extensions/ollama/src/stream.ts extracts image parts and forwards them via the Ollama API), but the ollama plugin did not register a MediaUnderstandingProvider. The image tool's provider registry therefore had no 'ollama' entry, so requests like `imageModel: 'ollama/qwen2.5vl:7b'` failed to resolve and fell back to unrelated providers. Register ollamaMediaUnderstandingProvider with: - capabilities: ['image'] - describeImage/describeImages wired to the shared core helpers (reuses the same pi-ai complete path Ollama chat already goes through) - no defaultModels or autoPriority: Ollama vision support depends on which model the user has pulled, so we don't pick a canonical default and don't auto-steal image duty from configured providers. Fixes #69071 (and supersedes #60280). --- extensions/ollama/index.test.ts | 38 +++++++++++++++++++ extensions/ollama/index.ts | 2 + .../src/media-understanding-provider.ts | 18 +++++++++ 3 files changed, 58 insertions(+) create mode 100644 extensions/ollama/src/media-understanding-provider.ts diff --git a/extensions/ollama/index.test.ts b/extensions/ollama/index.test.ts index 8e74a32df6f..43a86220371 100644 --- a/extensions/ollama/index.test.ts +++ b/extensions/ollama/index.test.ts @@ -495,4 +495,42 @@ describe("ollama plugin", () => { expect(baseStreamFn).toHaveBeenCalledTimes(1); expect(payloadSeen?.think).toBeUndefined(); }); + + it("registers an image-capable media understanding provider so image tool can route ollama/*", () => { + const mediaProviders: Array<{ + id: string; + capabilities?: string[]; + defaultModels?: Record; + autoPriority?: Record; + describeImage?: unknown; + describeImages?: unknown; + }> = []; + + plugin.register( + createTestPluginApi({ + id: "ollama", + name: "Ollama", + source: "test", + config: {}, + pluginConfig: {}, + runtime: {} as never, + registerProvider() {}, + registerMediaUnderstandingProvider(provider) { + mediaProviders.push(provider); + }, + }), + ); + + expect(mediaProviders).toHaveLength(1); + const [ollamaMedia] = mediaProviders; + expect(ollamaMedia.id).toBe("ollama"); + expect(ollamaMedia.capabilities).toEqual(["image"]); + expect(typeof ollamaMedia.describeImage).toBe("function"); + expect(typeof ollamaMedia.describeImages).toBe("function"); + // Intentional: no defaultModels or autoPriority. Ollama vision models are + // user-installed (llava, qwen2.5vl, …) with no universal default, and we + // don't want Ollama to auto-steal image duty from configured providers. + expect(ollamaMedia.defaultModels).toBeUndefined(); + expect(ollamaMedia.autoPriority).toBeUndefined(); + }); }); diff --git a/extensions/ollama/index.ts b/extensions/ollama/index.ts index 4542bd80ec0..29d77050f40 100644 --- a/extensions/ollama/index.ts +++ b/extensions/ollama/index.ts @@ -25,6 +25,7 @@ import { DEFAULT_OLLAMA_EMBEDDING_MODEL, createOllamaEmbeddingProvider, } from "./src/embedding-provider.js"; +import { ollamaMediaUnderstandingProvider } from "./src/media-understanding-provider.js"; import { ollamaMemoryEmbeddingProviderAdapter } from "./src/memory-embedding-adapter.js"; import { createConfiguredOllamaCompatStreamWrapper, @@ -55,6 +56,7 @@ export default definePluginEntry({ description: "Bundled Ollama provider plugin", register(api: OpenClawPluginApi) { api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter); + api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider); const pluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig; api.registerWebSearchProvider(createOllamaWebSearchProvider()); api.registerProvider({ diff --git a/extensions/ollama/src/media-understanding-provider.ts b/extensions/ollama/src/media-understanding-provider.ts new file mode 100644 index 00000000000..307e70862d3 --- /dev/null +++ b/extensions/ollama/src/media-understanding-provider.ts @@ -0,0 +1,18 @@ +import { + describeImageWithModel, + describeImagesWithModel, + type MediaUnderstandingProvider, +} from "openclaw/plugin-sdk/media-understanding"; +import { OLLAMA_PROVIDER_ID } from "./discovery-shared.js"; + +// Ollama vision support depends on which models the user has pulled (llava, +// qwen2.5vl, llama3.2-vision, …) — there is no single canonical default. We +// register the provider so the image tool can route `ollama/` +// requests, but leave `defaultModels` and `autoPriority` unset so Ollama +// only participates when the user explicitly configures an image model. +export const ollamaMediaUnderstandingProvider: MediaUnderstandingProvider = { + id: OLLAMA_PROVIDER_ID, + capabilities: ["image"], + describeImage: describeImageWithModel, + describeImages: describeImagesWithModel, +};