Ollama: cache model show metadata (#64753)

Merged via squash.

Prepared head SHA: de56dfb916
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Reviewed-by: @ImLukeF
This commit is contained in:
Luke
2026-04-11 22:30:24 +10:00
committed by GitHub
parent af428d9b8a
commit 3da58226bf
3 changed files with 181 additions and 5 deletions

View File

@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
- Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
- Models/providers: surface how configured OpenAI-compatible endpoints are classified in embedded-agent debug logs, so local and proxy routing issues are easier to diagnose. (#64754) Thanks @ImLukeF.
- Ollama: cache `/api/show` context-window and capability metadata during model discovery so repeated picker refreshes stop refetching unchanged models, while still retrying after empty responses and invalidating on digest changes. (#64753) Thanks @ImLukeF.
### Fixes

View File

@@ -3,12 +3,14 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel
import {
buildOllamaModelDefinition,
enrichOllamaModelsWithContext,
resetOllamaModelShowInfoCacheForTest,
resolveOllamaApiBase,
type OllamaTagModel,
} from "./provider-models.js";
describe("ollama provider models", () => {
afterEach(() => {
resetOllamaModelShowInfoCacheForTest();
vi.unstubAllGlobals();
});
@@ -80,6 +82,122 @@ describe("ollama provider models", () => {
]);
});
it("reuses cached /api/show metadata when the model digest is unchanged", async () => {
const models: OllamaTagModel[] = [
{ name: "qwen3:32b", digest: "sha256:abc123", modified_at: "2026-04-11T00:00:00Z" },
];
const fetchMock = vi.fn(async () =>
jsonResponse({
model_info: { "qwen3.context_length": 131072 },
capabilities: ["thinking", "tools"],
}),
);
vi.stubGlobal("fetch", fetchMock);
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
expect(first).toEqual(second);
expect(fetchMock).toHaveBeenCalledTimes(1);
});
it("refreshes cached /api/show metadata when the model digest changes", async () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(
jsonResponse({
model_info: { "qwen3.context_length": 131072 },
capabilities: ["thinking", "tools"],
}),
)
.mockResolvedValueOnce(
jsonResponse({
model_info: { "qwen3.context_length": 262144 },
capabilities: ["vision", "thinking", "tools"],
}),
);
vi.stubGlobal("fetch", fetchMock);
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
{ name: "qwen3:32b", digest: "sha256:abc123" },
]);
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
{ name: "qwen3:32b", digest: "sha256:def456" },
]);
expect(first).toEqual([
{
name: "qwen3:32b",
digest: "sha256:abc123",
contextWindow: 131072,
capabilities: ["thinking", "tools"],
},
]);
expect(second).toEqual([
{
name: "qwen3:32b",
digest: "sha256:def456",
contextWindow: 262144,
capabilities: ["vision", "thinking", "tools"],
},
]);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
it("retries /api/show after an empty result for the same digest", async () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(jsonResponse({}))
.mockResolvedValueOnce(
jsonResponse({
model_info: { "qwen3.context_length": 131072 },
capabilities: ["thinking", "tools"],
}),
);
vi.stubGlobal("fetch", fetchMock);
const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
expect(first).toEqual([
{
name: "qwen3:32b",
digest: "sha256:abc123",
contextWindow: undefined,
capabilities: undefined,
},
]);
expect(second).toEqual([
{
name: "qwen3:32b",
digest: "sha256:abc123",
contextWindow: 131072,
capabilities: ["thinking", "tools"],
},
]);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
it("normalizes /v1 base URLs before fetching and reuses the same cache entry", async () => {
const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => {
expect(requestUrl(input)).toBe("http://127.0.0.1:11434/api/show");
expect(JSON.parse(requestBodyText(init?.body))).toEqual({ name: "qwen3:32b" });
return jsonResponse({
model_info: { "qwen3.context_length": 131072 },
capabilities: ["thinking", "tools"],
});
});
vi.stubGlobal("fetch", fetchMock);
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434/v1/", [model]);
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
expect(first).toEqual(second);
expect(fetchMock).toHaveBeenCalledTimes(1);
});
it("buildOllamaModelDefinition sets input to text+image when vision capability is present", () => {
const visionModel = buildOllamaModelDefinition("kimi-k2.5:cloud", 262144, [
"vision",

View File

@@ -1,5 +1,5 @@
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-onboard";
import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
import {
OLLAMA_DEFAULT_BASE_URL,
OLLAMA_DEFAULT_CONTEXT_WINDOW,
@@ -29,8 +29,10 @@ export type OllamaModelWithContext = OllamaTagModel & {
};
const OLLAMA_SHOW_CONCURRENCY = 8;
const MAX_OLLAMA_SHOW_CACHE_ENTRIES = 256;
const ollamaModelShowInfoCache = new Map<string, Promise<OllamaModelShowInfo>>();
export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined {
export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string) {
const trimmed = baseUrl.trim();
if (!trimmed) {
return undefined;
@@ -62,20 +64,46 @@ export type OllamaModelShowInfo = {
capabilities?: string[];
};
function buildOllamaModelShowCacheKey(
apiBase: string,
model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
): string | undefined {
const version = model.digest?.trim() || model.modified_at?.trim();
if (!version) {
return undefined;
}
return `${resolveOllamaApiBase(apiBase)}|${model.name}|${version}`;
}
function setOllamaModelShowCacheEntry(key: string, value: Promise<OllamaModelShowInfo>): void {
if (ollamaModelShowInfoCache.size >= MAX_OLLAMA_SHOW_CACHE_ENTRIES) {
const oldestKey = ollamaModelShowInfoCache.keys().next().value;
if (typeof oldestKey === "string") {
ollamaModelShowInfoCache.delete(oldestKey);
}
}
ollamaModelShowInfoCache.set(key, value);
}
function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean {
return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0;
}
export async function queryOllamaModelShowInfo(
apiBase: string,
modelName: string,
): Promise<OllamaModelShowInfo> {
const normalizedApiBase = resolveOllamaApiBase(apiBase);
try {
const { response, release } = await fetchWithSsrFGuard({
url: `${apiBase}/api/show`,
url: `${normalizedApiBase}/api/show`,
init: {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: modelName }),
signal: AbortSignal.timeout(3000),
},
policy: buildOllamaBaseUrlSsrFPolicy(apiBase),
policy: buildOllamaBaseUrlSsrFPolicy(normalizedApiBase),
auditContext: "ollama-provider-models.show",
});
try {
@@ -117,6 +145,31 @@ export async function queryOllamaModelShowInfo(
}
}
async function queryOllamaModelShowInfoCached(
apiBase: string,
model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
): Promise<OllamaModelShowInfo> {
const normalizedApiBase = resolveOllamaApiBase(apiBase);
const cacheKey = buildOllamaModelShowCacheKey(normalizedApiBase, model);
if (!cacheKey) {
return await queryOllamaModelShowInfo(normalizedApiBase, model.name);
}
const cached = ollamaModelShowInfoCache.get(cacheKey);
if (cached) {
return await cached;
}
const pending = queryOllamaModelShowInfo(normalizedApiBase, model.name).then((result) => {
if (!hasCachedOllamaModelShowInfo(result)) {
ollamaModelShowInfoCache.delete(cacheKey);
}
return result;
});
setOllamaModelShowCacheEntry(cacheKey, pending);
return await pending;
}
/** @deprecated Use queryOllamaModelShowInfo instead. */
export async function queryOllamaContextWindow(
apiBase: string,
@@ -136,7 +189,7 @@ export async function enrichOllamaModelsWithContext(
const batch = models.slice(index, index + concurrency);
const batchResults = await Promise.all(
batch.map(async (model) => {
const showInfo = await queryOllamaModelShowInfo(apiBase, model.name);
const showInfo = await queryOllamaModelShowInfoCached(apiBase, model);
return {
...model,
contextWindow: showInfo.contextWindow,
@@ -198,3 +251,7 @@ export async function fetchOllamaModels(
return { reachable: false, models: [] };
}
}
export function resetOllamaModelShowInfoCacheForTest(): void {
ollamaModelShowInfoCache.clear();
}