fix(ollama): register media-understanding provider so image tool can route ollama/* models

Ollama chat models already support image inputs (extensions/ollama/src/stream.ts
extracts image parts and forwards them via the Ollama API), but the ollama
plugin did not register a MediaUnderstandingProvider. The image tool's provider
registry therefore had no 'ollama' entry, so requests like
`imageModel: 'ollama/qwen2.5vl:7b'` failed to resolve and fell back to
unrelated providers.

Register ollamaMediaUnderstandingProvider with:
- capabilities: ['image']
- describeImage/describeImages wired to the shared core helpers (reuses the
  same pi-ai complete path Ollama chat already goes through)
- no defaultModels or autoPriority: Ollama vision support depends on which
  model the user has pulled, so we don't pick a canonical default and don't
  auto-steal image duty from configured providers.

Fixes #69071 (and supersedes #60280).
This commit is contained in:
soloclz
2026-04-22 02:47:39 +08:00
committed by Peter Steinberger
parent b2f96f7f05
commit 9a22cd212b
3 changed files with 58 additions and 0 deletions

View File

@@ -495,4 +495,42 @@ describe("ollama plugin", () => {
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBeUndefined();
});
it("registers an image-capable media understanding provider so image tool can route ollama/*", () => {
const mediaProviders: Array<{
id: string;
capabilities?: string[];
defaultModels?: Record<string, string>;
autoPriority?: Record<string, number>;
describeImage?: unknown;
describeImages?: unknown;
}> = [];
plugin.register(
createTestPluginApi({
id: "ollama",
name: "Ollama",
source: "test",
config: {},
pluginConfig: {},
runtime: {} as never,
registerProvider() {},
registerMediaUnderstandingProvider(provider) {
mediaProviders.push(provider);
},
}),
);
expect(mediaProviders).toHaveLength(1);
const [ollamaMedia] = mediaProviders;
expect(ollamaMedia.id).toBe("ollama");
expect(ollamaMedia.capabilities).toEqual(["image"]);
expect(typeof ollamaMedia.describeImage).toBe("function");
expect(typeof ollamaMedia.describeImages).toBe("function");
// Intentional: no defaultModels or autoPriority. Ollama vision models are
// user-installed (llava, qwen2.5vl, …) with no universal default, and we
// don't want Ollama to auto-steal image duty from configured providers.
expect(ollamaMedia.defaultModels).toBeUndefined();
expect(ollamaMedia.autoPriority).toBeUndefined();
});
});

View File

@@ -25,6 +25,7 @@ import {
DEFAULT_OLLAMA_EMBEDDING_MODEL,
createOllamaEmbeddingProvider,
} from "./src/embedding-provider.js";
import { ollamaMediaUnderstandingProvider } from "./src/media-understanding-provider.js";
import { ollamaMemoryEmbeddingProviderAdapter } from "./src/memory-embedding-adapter.js";
import {
createConfiguredOllamaCompatStreamWrapper,
@@ -55,6 +56,7 @@ export default definePluginEntry({
description: "Bundled Ollama provider plugin",
register(api: OpenClawPluginApi) {
api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter);
api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider);
const pluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig;
api.registerWebSearchProvider(createOllamaWebSearchProvider());
api.registerProvider({

View File

@@ -0,0 +1,18 @@
import {
describeImageWithModel,
describeImagesWithModel,
type MediaUnderstandingProvider,
} from "openclaw/plugin-sdk/media-understanding";
import { OLLAMA_PROVIDER_ID } from "./discovery-shared.js";
// Ollama vision support depends on which models the user has pulled (llava,
// qwen2.5vl, llama3.2-vision, …) — there is no single canonical default. We
// register the provider so the image tool can route `ollama/<vision-model>`
// requests, but leave `defaultModels` and `autoPriority` unset so Ollama
// only participates when the user explicitly configures an image model.
export const ollamaMediaUnderstandingProvider: MediaUnderstandingProvider = {
id: OLLAMA_PROVIDER_ID,
capabilities: ["image"],
describeImage: describeImageWithModel,
describeImages: describeImagesWithModel,
};