fix(ollama): register media-understanding provider so image tool can route ollama/* models

Ollama chat models already support image inputs (extensions/ollama/src/stream.ts extracts image parts and forwards them via the Ollama API), but the ollama plugin did not register a MediaUnderstandingProvider. The image tool's provider registry therefore had no 'ollama' entry, so requests like `imageModel: 'ollama/qwen2.5vl:7b'` failed to resolve and fell back to unrelated providers. Register ollamaMediaUnderstandingProvider with: - capabilities: ['image'] - describeImage/describeImages wired to the shared core helpers (reuses the same pi-ai complete path Ollama chat already goes through) - no defaultModels or autoPriority: Ollama vision support depends on which model the user has pulled, so we don't pick a canonical default and don't auto-steal image duty from configured providers. Fixes #69071 (and supersedes #60280).
2026-05-06 11:40:42 +00:00 · 2026-04-22 02:47:39 +08:00
parent b2f96f7f05
commit 9a22cd212b
3 changed files with 58 additions and 0 deletions
--- a/extensions/ollama/index.test.ts
+++ b/extensions/ollama/index.test.ts
@@ -495,4 +495,42 @@ describe("ollama plugin", () => {
    expect(baseStreamFn).toHaveBeenCalledTimes(1);
    expect(payloadSeen?.think).toBeUndefined();
  });
+
+  it("registers an image-capable media understanding provider so image tool can route ollama/*", () => {
+    const mediaProviders: Array<{
+      id: string;
+      capabilities?: string[];
+      defaultModels?: Record<string, string>;
+      autoPriority?: Record<string, number>;
+      describeImage?: unknown;
+      describeImages?: unknown;
+    }> = [];
+
+    plugin.register(
+      createTestPluginApi({
+        id: "ollama",
+        name: "Ollama",
+        source: "test",
+        config: {},
+        pluginConfig: {},
+        runtime: {} as never,
+        registerProvider() {},
+        registerMediaUnderstandingProvider(provider) {
+          mediaProviders.push(provider);
+        },
+      }),
+    );
+
+    expect(mediaProviders).toHaveLength(1);
+    const [ollamaMedia] = mediaProviders;
+    expect(ollamaMedia.id).toBe("ollama");
+    expect(ollamaMedia.capabilities).toEqual(["image"]);
+    expect(typeof ollamaMedia.describeImage).toBe("function");
+    expect(typeof ollamaMedia.describeImages).toBe("function");
+    // Intentional: no defaultModels or autoPriority. Ollama vision models are
+    // user-installed (llava, qwen2.5vl, …) with no universal default, and we
+    // don't want Ollama to auto-steal image duty from configured providers.
+    expect(ollamaMedia.defaultModels).toBeUndefined();
+    expect(ollamaMedia.autoPriority).toBeUndefined();
+  });
 });
--- a/extensions/ollama/index.ts
+++ b/extensions/ollama/index.ts
@@ -25,6 +25,7 @@ import {
  DEFAULT_OLLAMA_EMBEDDING_MODEL,
  createOllamaEmbeddingProvider,
 } from "./src/embedding-provider.js";
+import { ollamaMediaUnderstandingProvider } from "./src/media-understanding-provider.js";
 import { ollamaMemoryEmbeddingProviderAdapter } from "./src/memory-embedding-adapter.js";
 import {
  createConfiguredOllamaCompatStreamWrapper,
@@ -55,6 +56,7 @@ export default definePluginEntry({
  description: "Bundled Ollama provider plugin",
  register(api: OpenClawPluginApi) {
    api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter);
+    api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider);
    const pluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig;
    api.registerWebSearchProvider(createOllamaWebSearchProvider());
    api.registerProvider({
--- a/extensions/ollama/src/media-understanding-provider.ts
+++ b/extensions/ollama/src/media-understanding-provider.ts
@@ -0,0 +1,18 @@
+import {
+  describeImageWithModel,
+  describeImagesWithModel,
+  type MediaUnderstandingProvider,
+} from "openclaw/plugin-sdk/media-understanding";
+import { OLLAMA_PROVIDER_ID } from "./discovery-shared.js";
+
+// Ollama vision support depends on which models the user has pulled (llava,
+// qwen2.5vl, llama3.2-vision, …) — there is no single canonical default. We
+// register the provider so the image tool can route `ollama/<vision-model>`
+// requests, but leave `defaultModels` and `autoPriority` unset so Ollama
+// only participates when the user explicitly configures an image model.
+export const ollamaMediaUnderstandingProvider: MediaUnderstandingProvider = {
+  id: OLLAMA_PROVIDER_ID,
+  capabilities: ["image"],
+  describeImage: describeImageWithModel,
+  describeImages: describeImagesWithModel,
+};