Ollama: cache model show metadata (#64753)

Merged via squash. Prepared head SHA: de56dfb916 Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com> Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com> Reviewed-by: @ImLukeF
2026-07-13 20:26:08 +00:00 · 2026-04-11 22:30:24 +10:00
parent af428d9b8a
commit 3da58226bf
3 changed files with 181 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai

 - Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
 - Models/providers: surface how configured OpenAI-compatible endpoints are classified in embedded-agent debug logs, so local and proxy routing issues are easier to diagnose. (#64754) Thanks @ImLukeF.
+- Ollama: cache `/api/show` context-window and capability metadata during model discovery so repeated picker refreshes stop refetching unchanged models, while still retrying after empty responses and invalidating on digest changes. (#64753) Thanks @ImLukeF.

 ### Fixes

--- a/extensions/ollama/src/provider-models.test.ts
+++ b/extensions/ollama/src/provider-models.test.ts
@@ -3,12 +3,14 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel
 import {
  buildOllamaModelDefinition,
  enrichOllamaModelsWithContext,
+  resetOllamaModelShowInfoCacheForTest,
  resolveOllamaApiBase,
  type OllamaTagModel,
 } from "./provider-models.js";

 describe("ollama provider models", () => {
  afterEach(() => {
+    resetOllamaModelShowInfoCacheForTest();
    vi.unstubAllGlobals();
  });

@@ -80,6 +82,122 @@ describe("ollama provider models", () => {
    ]);
  });

+  it("reuses cached /api/show metadata when the model digest is unchanged", async () => {
+    const models: OllamaTagModel[] = [
+      { name: "qwen3:32b", digest: "sha256:abc123", modified_at: "2026-04-11T00:00:00Z" },
+    ];
+    const fetchMock = vi.fn(async () =>
+      jsonResponse({
+        model_info: { "qwen3.context_length": 131072 },
+        capabilities: ["thinking", "tools"],
+      }),
+    );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
+
+    expect(first).toEqual(second);
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+
+  it("refreshes cached /api/show metadata when the model digest changes", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 131072 },
+          capabilities: ["thinking", "tools"],
+        }),
+      )
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 262144 },
+          capabilities: ["vision", "thinking", "tools"],
+        }),
+      );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
+      { name: "qwen3:32b", digest: "sha256:abc123" },
+    ]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
+      { name: "qwen3:32b", digest: "sha256:def456" },
+    ]);
+
+    expect(first).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: 131072,
+        capabilities: ["thinking", "tools"],
+      },
+    ]);
+    expect(second).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:def456",
+        contextWindow: 262144,
+        capabilities: ["vision", "thinking", "tools"],
+      },
+    ]);
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("retries /api/show after an empty result for the same digest", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(jsonResponse({}))
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 131072 },
+          capabilities: ["thinking", "tools"],
+        }),
+      );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+
+    expect(first).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: undefined,
+        capabilities: undefined,
+      },
+    ]);
+    expect(second).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: 131072,
+        capabilities: ["thinking", "tools"],
+      },
+    ]);
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("normalizes /v1 base URLs before fetching and reuses the same cache entry", async () => {
+    const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
+    const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => {
+      expect(requestUrl(input)).toBe("http://127.0.0.1:11434/api/show");
+      expect(JSON.parse(requestBodyText(init?.body))).toEqual({ name: "qwen3:32b" });
+      return jsonResponse({
+        model_info: { "qwen3.context_length": 131072 },
+        capabilities: ["thinking", "tools"],
+      });
+    });
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434/v1/", [model]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+
+    expect(first).toEqual(second);
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+
  it("buildOllamaModelDefinition sets input to text+image when vision capability is present", () => {
    const visionModel = buildOllamaModelDefinition("kimi-k2.5:cloud", 262144, [
      "vision",
--- a/extensions/ollama/src/provider-models.ts
+++ b/extensions/ollama/src/provider-models.ts
@@ -1,5 +1,5 @@
 import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-onboard";
-import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
+import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
 import {
  OLLAMA_DEFAULT_BASE_URL,
  OLLAMA_DEFAULT_CONTEXT_WINDOW,
@@ -29,8 +29,10 @@ export type OllamaModelWithContext = OllamaTagModel & {
 };

 const OLLAMA_SHOW_CONCURRENCY = 8;
+const MAX_OLLAMA_SHOW_CACHE_ENTRIES = 256;
+const ollamaModelShowInfoCache = new Map<string, Promise<OllamaModelShowInfo>>();

-export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined {
+export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string) {
  const trimmed = baseUrl.trim();
  if (!trimmed) {
    return undefined;
@@ -62,20 +64,46 @@ export type OllamaModelShowInfo = {
  capabilities?: string[];
 };

+function buildOllamaModelShowCacheKey(
+  apiBase: string,
+  model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
+): string | undefined {
+  const version = model.digest?.trim() || model.modified_at?.trim();
+  if (!version) {
+    return undefined;
+  }
+  return `${resolveOllamaApiBase(apiBase)}|${model.name}|${version}`;
+}
+
+function setOllamaModelShowCacheEntry(key: string, value: Promise<OllamaModelShowInfo>): void {
+  if (ollamaModelShowInfoCache.size >= MAX_OLLAMA_SHOW_CACHE_ENTRIES) {
+    const oldestKey = ollamaModelShowInfoCache.keys().next().value;
+    if (typeof oldestKey === "string") {
+      ollamaModelShowInfoCache.delete(oldestKey);
+    }
+  }
+  ollamaModelShowInfoCache.set(key, value);
+}
+
+function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean {
+  return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0;
+}
+
 export async function queryOllamaModelShowInfo(
  apiBase: string,
  modelName: string,
 ): Promise<OllamaModelShowInfo> {
+  const normalizedApiBase = resolveOllamaApiBase(apiBase);
  try {
    const { response, release } = await fetchWithSsrFGuard({
-      url: `${apiBase}/api/show`,
+      url: `${normalizedApiBase}/api/show`,
      init: {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({ name: modelName }),
        signal: AbortSignal.timeout(3000),
      },
-      policy: buildOllamaBaseUrlSsrFPolicy(apiBase),
+      policy: buildOllamaBaseUrlSsrFPolicy(normalizedApiBase),
      auditContext: "ollama-provider-models.show",
    });
    try {
@@ -117,6 +145,31 @@ export async function queryOllamaModelShowInfo(
  }
 }

+async function queryOllamaModelShowInfoCached(
+  apiBase: string,
+  model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
+): Promise<OllamaModelShowInfo> {
+  const normalizedApiBase = resolveOllamaApiBase(apiBase);
+  const cacheKey = buildOllamaModelShowCacheKey(normalizedApiBase, model);
+  if (!cacheKey) {
+    return await queryOllamaModelShowInfo(normalizedApiBase, model.name);
+  }
+
+  const cached = ollamaModelShowInfoCache.get(cacheKey);
+  if (cached) {
+    return await cached;
+  }
+
+  const pending = queryOllamaModelShowInfo(normalizedApiBase, model.name).then((result) => {
+    if (!hasCachedOllamaModelShowInfo(result)) {
+      ollamaModelShowInfoCache.delete(cacheKey);
+    }
+    return result;
+  });
+  setOllamaModelShowCacheEntry(cacheKey, pending);
+  return await pending;
+}
+
 /** @deprecated Use queryOllamaModelShowInfo instead. */
 export async function queryOllamaContextWindow(
  apiBase: string,
@@ -136,7 +189,7 @@ export async function enrichOllamaModelsWithContext(
    const batch = models.slice(index, index + concurrency);
    const batchResults = await Promise.all(
      batch.map(async (model) => {
-        const showInfo = await queryOllamaModelShowInfo(apiBase, model.name);
+        const showInfo = await queryOllamaModelShowInfoCached(apiBase, model);
        return {
          ...model,
          contextWindow: showInfo.contextWindow,
@@ -198,3 +251,7 @@ export async function fetchOllamaModels(
    return { reachable: false, models: [] };
  }
 }
+
+export function resetOllamaModelShowInfoCacheForTest(): void {
+  ollamaModelShowInfoCache.clear();
+}