From 3da58226bfe6917ebc2010e79fe561fb6f1917ca Mon Sep 17 00:00:00 2001
From: Luke <92253590+ImLukeF@users.noreply.github.com>
Date: Sat, 11 Apr 2026 22:30:24 +1000
Subject: [PATCH] Ollama: cache model show metadata (#64753)

Merged via squash.

Prepared head SHA: de56dfb91657019b209a37642ea2fb0f26158a24
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Reviewed-by: @ImLukeF
---
 CHANGELOG.md                                  |   1 +
 extensions/ollama/src/provider-models.test.ts | 118 ++++++++++++++++++
 extensions/ollama/src/provider-models.ts      |  67 +++++++++-
 3 files changed, 181 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5011a4ffec7..46f9cba1af2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
 
 - Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
 - Models/providers: surface how configured OpenAI-compatible endpoints are classified in embedded-agent debug logs, so local and proxy routing issues are easier to diagnose. (#64754) Thanks @ImLukeF.
+- Ollama: cache `/api/show` context-window and capability metadata during model discovery so repeated picker refreshes stop refetching unchanged models, while still retrying after empty responses and invalidating on digest changes. (#64753) Thanks @ImLukeF.
 
 ### Fixes
 
diff --git a/extensions/ollama/src/provider-models.test.ts b/extensions/ollama/src/provider-models.test.ts
index bf1b568454a..ed6ce868a01 100644
--- a/extensions/ollama/src/provider-models.test.ts
+++ b/extensions/ollama/src/provider-models.test.ts
@@ -3,12 +3,14 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel
 import {
   buildOllamaModelDefinition,
   enrichOllamaModelsWithContext,
+  resetOllamaModelShowInfoCacheForTest,
   resolveOllamaApiBase,
   type OllamaTagModel,
 } from "./provider-models.js";
 
 describe("ollama provider models", () => {
   afterEach(() => {
+    resetOllamaModelShowInfoCacheForTest();
     vi.unstubAllGlobals();
   });
 
@@ -80,6 +82,122 @@ describe("ollama provider models", () => {
     ]);
   });
 
+  it("reuses cached /api/show metadata when the model digest is unchanged", async () => {
+    const models: OllamaTagModel[] = [
+      { name: "qwen3:32b", digest: "sha256:abc123", modified_at: "2026-04-11T00:00:00Z" },
+    ];
+    const fetchMock = vi.fn(async () =>
+      jsonResponse({
+        model_info: { "qwen3.context_length": 131072 },
+        capabilities: ["thinking", "tools"],
+      }),
+    );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
+
+    expect(first).toEqual(second);
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+
+  it("refreshes cached /api/show metadata when the model digest changes", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 131072 },
+          capabilities: ["thinking", "tools"],
+        }),
+      )
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 262144 },
+          capabilities: ["vision", "thinking", "tools"],
+        }),
+      );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
+      { name: "qwen3:32b", digest: "sha256:abc123" },
+    ]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
+      { name: "qwen3:32b", digest: "sha256:def456" },
+    ]);
+
+    expect(first).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: 131072,
+        capabilities: ["thinking", "tools"],
+      },
+    ]);
+    expect(second).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:def456",
+        contextWindow: 262144,
+        capabilities: ["vision", "thinking", "tools"],
+      },
+    ]);
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("retries /api/show after an empty result for the same digest", async () => {
+    const fetchMock = vi
+      .fn()
+      .mockResolvedValueOnce(jsonResponse({}))
+      .mockResolvedValueOnce(
+        jsonResponse({
+          model_info: { "qwen3.context_length": 131072 },
+          capabilities: ["thinking", "tools"],
+        }),
+      );
+    vi.stubGlobal("fetch", fetchMock);
+
+    const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+
+    expect(first).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: undefined,
+        capabilities: undefined,
+      },
+    ]);
+    expect(second).toEqual([
+      {
+        name: "qwen3:32b",
+        digest: "sha256:abc123",
+        contextWindow: 131072,
+        capabilities: ["thinking", "tools"],
+      },
+    ]);
+    expect(fetchMock).toHaveBeenCalledTimes(2);
+  });
+
+  it("normalizes /v1 base URLs before fetching and reuses the same cache entry", async () => {
+    const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
+    const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => {
+      expect(requestUrl(input)).toBe("http://127.0.0.1:11434/api/show");
+      expect(JSON.parse(requestBodyText(init?.body))).toEqual({ name: "qwen3:32b" });
+      return jsonResponse({
+        model_info: { "qwen3.context_length": 131072 },
+        capabilities: ["thinking", "tools"],
+      });
+    });
+    vi.stubGlobal("fetch", fetchMock);
+
+    const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434/v1/", [model]);
+    const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
+
+    expect(first).toEqual(second);
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+  });
+
   it("buildOllamaModelDefinition sets input to text+image when vision capability is present", () => {
     const visionModel = buildOllamaModelDefinition("kimi-k2.5:cloud", 262144, [
       "vision",
diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts
index 3dce0ef9887..e7482273f59 100644
--- a/extensions/ollama/src/provider-models.ts
+++ b/extensions/ollama/src/provider-models.ts
@@ -1,5 +1,5 @@
 import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-onboard";
-import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
+import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
 import {
   OLLAMA_DEFAULT_BASE_URL,
   OLLAMA_DEFAULT_CONTEXT_WINDOW,
@@ -29,8 +29,10 @@ export type OllamaModelWithContext = OllamaTagModel & {
 };
 
 const OLLAMA_SHOW_CONCURRENCY = 8;
+const MAX_OLLAMA_SHOW_CACHE_ENTRIES = 256;
+const ollamaModelShowInfoCache = new Map<string, Promise<OllamaModelShowInfo>>();
 
-export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined {
+export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string) {
   const trimmed = baseUrl.trim();
   if (!trimmed) {
     return undefined;
@@ -62,20 +64,46 @@ export type OllamaModelShowInfo = {
   capabilities?: string[];
 };
 
+function buildOllamaModelShowCacheKey(
+  apiBase: string,
+  model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
+): string | undefined {
+  const version = model.digest?.trim() || model.modified_at?.trim();
+  if (!version) {
+    return undefined;
+  }
+  return `${resolveOllamaApiBase(apiBase)}|${model.name}|${version}`;
+}
+
+function setOllamaModelShowCacheEntry(key: string, value: Promise<OllamaModelShowInfo>): void {
+  if (ollamaModelShowInfoCache.size >= MAX_OLLAMA_SHOW_CACHE_ENTRIES) {
+    const oldestKey = ollamaModelShowInfoCache.keys().next().value;
+    if (typeof oldestKey === "string") {
+      ollamaModelShowInfoCache.delete(oldestKey);
+    }
+  }
+  ollamaModelShowInfoCache.set(key, value);
+}
+
+function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean {
+  return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0;
+}
+
 export async function queryOllamaModelShowInfo(
   apiBase: string,
   modelName: string,
 ): Promise<OllamaModelShowInfo> {
+  const normalizedApiBase = resolveOllamaApiBase(apiBase);
   try {
     const { response, release } = await fetchWithSsrFGuard({
-      url: `${apiBase}/api/show`,
+      url: `${normalizedApiBase}/api/show`,
       init: {
         method: "POST",
         headers: { "Content-Type": "application/json" },
         body: JSON.stringify({ name: modelName }),
         signal: AbortSignal.timeout(3000),
       },
-      policy: buildOllamaBaseUrlSsrFPolicy(apiBase),
+      policy: buildOllamaBaseUrlSsrFPolicy(normalizedApiBase),
       auditContext: "ollama-provider-models.show",
     });
     try {
@@ -117,6 +145,31 @@ export async function queryOllamaModelShowInfo(
   }
 }
 
+async function queryOllamaModelShowInfoCached(
+  apiBase: string,
+  model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
+): Promise<OllamaModelShowInfo> {
+  const normalizedApiBase = resolveOllamaApiBase(apiBase);
+  const cacheKey = buildOllamaModelShowCacheKey(normalizedApiBase, model);
+  if (!cacheKey) {
+    return await queryOllamaModelShowInfo(normalizedApiBase, model.name);
+  }
+
+  const cached = ollamaModelShowInfoCache.get(cacheKey);
+  if (cached) {
+    return await cached;
+  }
+
+  const pending = queryOllamaModelShowInfo(normalizedApiBase, model.name).then((result) => {
+    if (!hasCachedOllamaModelShowInfo(result)) {
+      ollamaModelShowInfoCache.delete(cacheKey);
+    }
+    return result;
+  });
+  setOllamaModelShowCacheEntry(cacheKey, pending);
+  return await pending;
+}
+
 /** @deprecated Use queryOllamaModelShowInfo instead. */
 export async function queryOllamaContextWindow(
   apiBase: string,
@@ -136,7 +189,7 @@ export async function enrichOllamaModelsWithContext(
     const batch = models.slice(index, index + concurrency);
     const batchResults = await Promise.all(
       batch.map(async (model) => {
-        const showInfo = await queryOllamaModelShowInfo(apiBase, model.name);
+        const showInfo = await queryOllamaModelShowInfoCached(apiBase, model);
         return {
           ...model,
           contextWindow: showInfo.contextWindow,
@@ -198,3 +251,7 @@ export async function fetchOllamaModels(
     return { reachable: false, models: [] };
   }
 }
+
+export function resetOllamaModelShowInfoCacheForTest(): void {
+  ollamaModelShowInfoCache.clear();
+}