From 3da58226bfe6917ebc2010e79fe561fb6f1917ca Mon Sep 17 00:00:00 2001 From: Luke <92253590+ImLukeF@users.noreply.github.com> Date: Sat, 11 Apr 2026 22:30:24 +1000 Subject: [PATCH] Ollama: cache model show metadata (#64753) Merged via squash. Prepared head SHA: de56dfb91657019b209a37642ea2fb0f26158a24 Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com> Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com> Reviewed-by: @ImLukeF --- CHANGELOG.md | 1 + extensions/ollama/src/provider-models.test.ts | 118 ++++++++++++++++++ extensions/ollama/src/provider-models.ts | 67 +++++++++- 3 files changed, 181 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5011a4ffec7..46f9cba1af2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai - Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang. - Models/providers: surface how configured OpenAI-compatible endpoints are classified in embedded-agent debug logs, so local and proxy routing issues are easier to diagnose. (#64754) Thanks @ImLukeF. +- Ollama: cache `/api/show` context-window and capability metadata during model discovery so repeated picker refreshes stop refetching unchanged models, while still retrying after empty responses and invalidating on digest changes. (#64753) Thanks @ImLukeF. ### Fixes diff --git a/extensions/ollama/src/provider-models.test.ts b/extensions/ollama/src/provider-models.test.ts index bf1b568454a..ed6ce868a01 100644 --- a/extensions/ollama/src/provider-models.test.ts +++ b/extensions/ollama/src/provider-models.test.ts @@ -3,12 +3,14 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel import { buildOllamaModelDefinition, enrichOllamaModelsWithContext, + resetOllamaModelShowInfoCacheForTest, resolveOllamaApiBase, type OllamaTagModel, } from "./provider-models.js"; describe("ollama provider models", () => { afterEach(() => { + resetOllamaModelShowInfoCacheForTest(); vi.unstubAllGlobals(); }); @@ -80,6 +82,122 @@ describe("ollama provider models", () => { ]); }); + it("reuses cached /api/show metadata when the model digest is unchanged", async () => { + const models: OllamaTagModel[] = [ + { name: "qwen3:32b", digest: "sha256:abc123", modified_at: "2026-04-11T00:00:00Z" }, + ]; + const fetchMock = vi.fn(async () => + jsonResponse({ + model_info: { "qwen3.context_length": 131072 }, + capabilities: ["thinking", "tools"], + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models); + const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models); + + expect(first).toEqual(second); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + it("refreshes cached /api/show metadata when the model digest changes", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + jsonResponse({ + model_info: { "qwen3.context_length": 131072 }, + capabilities: ["thinking", "tools"], + }), + ) + .mockResolvedValueOnce( + jsonResponse({ + model_info: { "qwen3.context_length": 262144 }, + capabilities: ["vision", "thinking", "tools"], + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [ + { name: "qwen3:32b", digest: "sha256:abc123" }, + ]); + const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [ + { name: "qwen3:32b", digest: "sha256:def456" }, + ]); + + expect(first).toEqual([ + { + name: "qwen3:32b", + digest: "sha256:abc123", + contextWindow: 131072, + capabilities: ["thinking", "tools"], + }, + ]); + expect(second).toEqual([ + { + name: "qwen3:32b", + digest: "sha256:def456", + contextWindow: 262144, + capabilities: ["vision", "thinking", "tools"], + }, + ]); + expect(fetchMock).toHaveBeenCalledTimes(2); + }); + + it("retries /api/show after an empty result for the same digest", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce(jsonResponse({})) + .mockResolvedValueOnce( + jsonResponse({ + model_info: { "qwen3.context_length": 131072 }, + capabilities: ["thinking", "tools"], + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" }; + const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]); + const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]); + + expect(first).toEqual([ + { + name: "qwen3:32b", + digest: "sha256:abc123", + contextWindow: undefined, + capabilities: undefined, + }, + ]); + expect(second).toEqual([ + { + name: "qwen3:32b", + digest: "sha256:abc123", + contextWindow: 131072, + capabilities: ["thinking", "tools"], + }, + ]); + expect(fetchMock).toHaveBeenCalledTimes(2); + }); + + it("normalizes /v1 base URLs before fetching and reuses the same cache entry", async () => { + const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" }; + const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => { + expect(requestUrl(input)).toBe("http://127.0.0.1:11434/api/show"); + expect(JSON.parse(requestBodyText(init?.body))).toEqual({ name: "qwen3:32b" }); + return jsonResponse({ + model_info: { "qwen3.context_length": 131072 }, + capabilities: ["thinking", "tools"], + }); + }); + vi.stubGlobal("fetch", fetchMock); + + const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434/v1/", [model]); + const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]); + + expect(first).toEqual(second); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + it("buildOllamaModelDefinition sets input to text+image when vision capability is present", () => { const visionModel = buildOllamaModelDefinition("kimi-k2.5:cloud", 262144, [ "vision", diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts index 3dce0ef9887..e7482273f59 100644 --- a/extensions/ollama/src/provider-models.ts +++ b/extensions/ollama/src/provider-models.ts @@ -1,5 +1,5 @@ import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-onboard"; -import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime"; +import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime"; import { OLLAMA_DEFAULT_BASE_URL, OLLAMA_DEFAULT_CONTEXT_WINDOW, @@ -29,8 +29,10 @@ export type OllamaModelWithContext = OllamaTagModel & { }; const OLLAMA_SHOW_CONCURRENCY = 8; +const MAX_OLLAMA_SHOW_CACHE_ENTRIES = 256; +const ollamaModelShowInfoCache = new Map>(); -export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined { +export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string) { const trimmed = baseUrl.trim(); if (!trimmed) { return undefined; @@ -62,20 +64,46 @@ export type OllamaModelShowInfo = { capabilities?: string[]; }; +function buildOllamaModelShowCacheKey( + apiBase: string, + model: Pick, +): string | undefined { + const version = model.digest?.trim() || model.modified_at?.trim(); + if (!version) { + return undefined; + } + return `${resolveOllamaApiBase(apiBase)}|${model.name}|${version}`; +} + +function setOllamaModelShowCacheEntry(key: string, value: Promise): void { + if (ollamaModelShowInfoCache.size >= MAX_OLLAMA_SHOW_CACHE_ENTRIES) { + const oldestKey = ollamaModelShowInfoCache.keys().next().value; + if (typeof oldestKey === "string") { + ollamaModelShowInfoCache.delete(oldestKey); + } + } + ollamaModelShowInfoCache.set(key, value); +} + +function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean { + return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0; +} + export async function queryOllamaModelShowInfo( apiBase: string, modelName: string, ): Promise { + const normalizedApiBase = resolveOllamaApiBase(apiBase); try { const { response, release } = await fetchWithSsrFGuard({ - url: `${apiBase}/api/show`, + url: `${normalizedApiBase}/api/show`, init: { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ name: modelName }), signal: AbortSignal.timeout(3000), }, - policy: buildOllamaBaseUrlSsrFPolicy(apiBase), + policy: buildOllamaBaseUrlSsrFPolicy(normalizedApiBase), auditContext: "ollama-provider-models.show", }); try { @@ -117,6 +145,31 @@ export async function queryOllamaModelShowInfo( } } +async function queryOllamaModelShowInfoCached( + apiBase: string, + model: Pick, +): Promise { + const normalizedApiBase = resolveOllamaApiBase(apiBase); + const cacheKey = buildOllamaModelShowCacheKey(normalizedApiBase, model); + if (!cacheKey) { + return await queryOllamaModelShowInfo(normalizedApiBase, model.name); + } + + const cached = ollamaModelShowInfoCache.get(cacheKey); + if (cached) { + return await cached; + } + + const pending = queryOllamaModelShowInfo(normalizedApiBase, model.name).then((result) => { + if (!hasCachedOllamaModelShowInfo(result)) { + ollamaModelShowInfoCache.delete(cacheKey); + } + return result; + }); + setOllamaModelShowCacheEntry(cacheKey, pending); + return await pending; +} + /** @deprecated Use queryOllamaModelShowInfo instead. */ export async function queryOllamaContextWindow( apiBase: string, @@ -136,7 +189,7 @@ export async function enrichOllamaModelsWithContext( const batch = models.slice(index, index + concurrency); const batchResults = await Promise.all( batch.map(async (model) => { - const showInfo = await queryOllamaModelShowInfo(apiBase, model.name); + const showInfo = await queryOllamaModelShowInfoCached(apiBase, model); return { ...model, contextWindow: showInfo.contextWindow, @@ -198,3 +251,7 @@ export async function fetchOllamaModels( return { reachable: false, models: [] }; } } + +export function resetOllamaModelShowInfoCacheForTest(): void { + ollamaModelShowInfoCache.clear(); +}