mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:40:42 +00:00
fix(ollama): register media-understanding provider so image tool can route ollama/* models
Ollama chat models already support image inputs (extensions/ollama/src/stream.ts extracts image parts and forwards them via the Ollama API), but the ollama plugin did not register a MediaUnderstandingProvider. The image tool's provider registry therefore had no 'ollama' entry, so requests like `imageModel: 'ollama/qwen2.5vl:7b'` failed to resolve and fell back to unrelated providers. Register ollamaMediaUnderstandingProvider with: - capabilities: ['image'] - describeImage/describeImages wired to the shared core helpers (reuses the same pi-ai complete path Ollama chat already goes through) - no defaultModels or autoPriority: Ollama vision support depends on which model the user has pulled, so we don't pick a canonical default and don't auto-steal image duty from configured providers. Fixes #69071 (and supersedes #60280).
This commit is contained in:
committed by
Peter Steinberger
parent
b2f96f7f05
commit
9a22cd212b
@@ -495,4 +495,42 @@ describe("ollama plugin", () => {
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("registers an image-capable media understanding provider so image tool can route ollama/*", () => {
|
||||
const mediaProviders: Array<{
|
||||
id: string;
|
||||
capabilities?: string[];
|
||||
defaultModels?: Record<string, string>;
|
||||
autoPriority?: Record<string, number>;
|
||||
describeImage?: unknown;
|
||||
describeImages?: unknown;
|
||||
}> = [];
|
||||
|
||||
plugin.register(
|
||||
createTestPluginApi({
|
||||
id: "ollama",
|
||||
name: "Ollama",
|
||||
source: "test",
|
||||
config: {},
|
||||
pluginConfig: {},
|
||||
runtime: {} as never,
|
||||
registerProvider() {},
|
||||
registerMediaUnderstandingProvider(provider) {
|
||||
mediaProviders.push(provider);
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
expect(mediaProviders).toHaveLength(1);
|
||||
const [ollamaMedia] = mediaProviders;
|
||||
expect(ollamaMedia.id).toBe("ollama");
|
||||
expect(ollamaMedia.capabilities).toEqual(["image"]);
|
||||
expect(typeof ollamaMedia.describeImage).toBe("function");
|
||||
expect(typeof ollamaMedia.describeImages).toBe("function");
|
||||
// Intentional: no defaultModels or autoPriority. Ollama vision models are
|
||||
// user-installed (llava, qwen2.5vl, …) with no universal default, and we
|
||||
// don't want Ollama to auto-steal image duty from configured providers.
|
||||
expect(ollamaMedia.defaultModels).toBeUndefined();
|
||||
expect(ollamaMedia.autoPriority).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -25,6 +25,7 @@ import {
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
createOllamaEmbeddingProvider,
|
||||
} from "./src/embedding-provider.js";
|
||||
import { ollamaMediaUnderstandingProvider } from "./src/media-understanding-provider.js";
|
||||
import { ollamaMemoryEmbeddingProviderAdapter } from "./src/memory-embedding-adapter.js";
|
||||
import {
|
||||
createConfiguredOllamaCompatStreamWrapper,
|
||||
@@ -55,6 +56,7 @@ export default definePluginEntry({
|
||||
description: "Bundled Ollama provider plugin",
|
||||
register(api: OpenClawPluginApi) {
|
||||
api.registerMemoryEmbeddingProvider(ollamaMemoryEmbeddingProviderAdapter);
|
||||
api.registerMediaUnderstandingProvider(ollamaMediaUnderstandingProvider);
|
||||
const pluginConfig = (api.pluginConfig ?? {}) as OllamaPluginConfig;
|
||||
api.registerWebSearchProvider(createOllamaWebSearchProvider());
|
||||
api.registerProvider({
|
||||
|
||||
18
extensions/ollama/src/media-understanding-provider.ts
Normal file
18
extensions/ollama/src/media-understanding-provider.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import {
|
||||
describeImageWithModel,
|
||||
describeImagesWithModel,
|
||||
type MediaUnderstandingProvider,
|
||||
} from "openclaw/plugin-sdk/media-understanding";
|
||||
import { OLLAMA_PROVIDER_ID } from "./discovery-shared.js";
|
||||
|
||||
// Ollama vision support depends on which models the user has pulled (llava,
|
||||
// qwen2.5vl, llama3.2-vision, …) — there is no single canonical default. We
|
||||
// register the provider so the image tool can route `ollama/<vision-model>`
|
||||
// requests, but leave `defaultModels` and `autoPriority` unset so Ollama
|
||||
// only participates when the user explicitly configures an image model.
|
||||
export const ollamaMediaUnderstandingProvider: MediaUnderstandingProvider = {
|
||||
id: OLLAMA_PROVIDER_ID,
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
describeImages: describeImagesWithModel,
|
||||
};
|
||||
Reference in New Issue
Block a user