mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-12 01:31:08 +00:00
Ollama: cache model show metadata (#64753)
Merged via squash.
Prepared head SHA: de56dfb916
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Reviewed-by: @ImLukeF
This commit is contained in:
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
- Tools/video_generate: allow providers and plugins to return URL-only generated video assets so agent delivery and `openclaw capability video generate --output ...` can forward or stream large videos without requiring the full file in memory first. (#61988) Thanks @xieyongliang.
|
||||
- Models/providers: surface how configured OpenAI-compatible endpoints are classified in embedded-agent debug logs, so local and proxy routing issues are easier to diagnose. (#64754) Thanks @ImLukeF.
|
||||
- Ollama: cache `/api/show` context-window and capability metadata during model discovery so repeated picker refreshes stop refetching unchanged models, while still retrying after empty responses and invalidating on digest changes. (#64753) Thanks @ImLukeF.
|
||||
|
||||
### Fixes
|
||||
|
||||
|
||||
@@ -3,12 +3,14 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel
|
||||
import {
|
||||
buildOllamaModelDefinition,
|
||||
enrichOllamaModelsWithContext,
|
||||
resetOllamaModelShowInfoCacheForTest,
|
||||
resolveOllamaApiBase,
|
||||
type OllamaTagModel,
|
||||
} from "./provider-models.js";
|
||||
|
||||
describe("ollama provider models", () => {
|
||||
afterEach(() => {
|
||||
resetOllamaModelShowInfoCacheForTest();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
@@ -80,6 +82,122 @@ describe("ollama provider models", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("reuses cached /api/show metadata when the model digest is unchanged", async () => {
|
||||
const models: OllamaTagModel[] = [
|
||||
{ name: "qwen3:32b", digest: "sha256:abc123", modified_at: "2026-04-11T00:00:00Z" },
|
||||
];
|
||||
const fetchMock = vi.fn(async () =>
|
||||
jsonResponse({
|
||||
model_info: { "qwen3.context_length": 131072 },
|
||||
capabilities: ["thinking", "tools"],
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
|
||||
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models);
|
||||
|
||||
expect(first).toEqual(second);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("refreshes cached /api/show metadata when the model digest changes", async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(
|
||||
jsonResponse({
|
||||
model_info: { "qwen3.context_length": 131072 },
|
||||
capabilities: ["thinking", "tools"],
|
||||
}),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
jsonResponse({
|
||||
model_info: { "qwen3.context_length": 262144 },
|
||||
capabilities: ["vision", "thinking", "tools"],
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
|
||||
{ name: "qwen3:32b", digest: "sha256:abc123" },
|
||||
]);
|
||||
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [
|
||||
{ name: "qwen3:32b", digest: "sha256:def456" },
|
||||
]);
|
||||
|
||||
expect(first).toEqual([
|
||||
{
|
||||
name: "qwen3:32b",
|
||||
digest: "sha256:abc123",
|
||||
contextWindow: 131072,
|
||||
capabilities: ["thinking", "tools"],
|
||||
},
|
||||
]);
|
||||
expect(second).toEqual([
|
||||
{
|
||||
name: "qwen3:32b",
|
||||
digest: "sha256:def456",
|
||||
contextWindow: 262144,
|
||||
capabilities: ["vision", "thinking", "tools"],
|
||||
},
|
||||
]);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("retries /api/show after an empty result for the same digest", async () => {
|
||||
const fetchMock = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(jsonResponse({}))
|
||||
.mockResolvedValueOnce(
|
||||
jsonResponse({
|
||||
model_info: { "qwen3.context_length": 131072 },
|
||||
capabilities: ["thinking", "tools"],
|
||||
}),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
|
||||
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
|
||||
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
|
||||
|
||||
expect(first).toEqual([
|
||||
{
|
||||
name: "qwen3:32b",
|
||||
digest: "sha256:abc123",
|
||||
contextWindow: undefined,
|
||||
capabilities: undefined,
|
||||
},
|
||||
]);
|
||||
expect(second).toEqual([
|
||||
{
|
||||
name: "qwen3:32b",
|
||||
digest: "sha256:abc123",
|
||||
contextWindow: 131072,
|
||||
capabilities: ["thinking", "tools"],
|
||||
},
|
||||
]);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("normalizes /v1 base URLs before fetching and reuses the same cache entry", async () => {
|
||||
const model: OllamaTagModel = { name: "qwen3:32b", digest: "sha256:abc123" };
|
||||
const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => {
|
||||
expect(requestUrl(input)).toBe("http://127.0.0.1:11434/api/show");
|
||||
expect(JSON.parse(requestBodyText(init?.body))).toEqual({ name: "qwen3:32b" });
|
||||
return jsonResponse({
|
||||
model_info: { "qwen3.context_length": 131072 },
|
||||
capabilities: ["thinking", "tools"],
|
||||
});
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const first = await enrichOllamaModelsWithContext("http://127.0.0.1:11434/v1/", [model]);
|
||||
const second = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", [model]);
|
||||
|
||||
expect(first).toEqual(second);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("buildOllamaModelDefinition sets input to text+image when vision capability is present", () => {
|
||||
const visionModel = buildOllamaModelDefinition("kimi-k2.5:cloud", 262144, [
|
||||
"vision",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-onboard";
|
||||
import { fetchWithSsrFGuard, type SsrFPolicy } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import { fetchWithSsrFGuard } from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import {
|
||||
OLLAMA_DEFAULT_BASE_URL,
|
||||
OLLAMA_DEFAULT_CONTEXT_WINDOW,
|
||||
@@ -29,8 +29,10 @@ export type OllamaModelWithContext = OllamaTagModel & {
|
||||
};
|
||||
|
||||
const OLLAMA_SHOW_CONCURRENCY = 8;
|
||||
const MAX_OLLAMA_SHOW_CACHE_ENTRIES = 256;
|
||||
const ollamaModelShowInfoCache = new Map<string, Promise<OllamaModelShowInfo>>();
|
||||
|
||||
export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined {
|
||||
export function buildOllamaBaseUrlSsrFPolicy(baseUrl: string) {
|
||||
const trimmed = baseUrl.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
@@ -62,20 +64,46 @@ export type OllamaModelShowInfo = {
|
||||
capabilities?: string[];
|
||||
};
|
||||
|
||||
function buildOllamaModelShowCacheKey(
|
||||
apiBase: string,
|
||||
model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
|
||||
): string | undefined {
|
||||
const version = model.digest?.trim() || model.modified_at?.trim();
|
||||
if (!version) {
|
||||
return undefined;
|
||||
}
|
||||
return `${resolveOllamaApiBase(apiBase)}|${model.name}|${version}`;
|
||||
}
|
||||
|
||||
function setOllamaModelShowCacheEntry(key: string, value: Promise<OllamaModelShowInfo>): void {
|
||||
if (ollamaModelShowInfoCache.size >= MAX_OLLAMA_SHOW_CACHE_ENTRIES) {
|
||||
const oldestKey = ollamaModelShowInfoCache.keys().next().value;
|
||||
if (typeof oldestKey === "string") {
|
||||
ollamaModelShowInfoCache.delete(oldestKey);
|
||||
}
|
||||
}
|
||||
ollamaModelShowInfoCache.set(key, value);
|
||||
}
|
||||
|
||||
function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean {
|
||||
return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0;
|
||||
}
|
||||
|
||||
export async function queryOllamaModelShowInfo(
|
||||
apiBase: string,
|
||||
modelName: string,
|
||||
): Promise<OllamaModelShowInfo> {
|
||||
const normalizedApiBase = resolveOllamaApiBase(apiBase);
|
||||
try {
|
||||
const { response, release } = await fetchWithSsrFGuard({
|
||||
url: `${apiBase}/api/show`,
|
||||
url: `${normalizedApiBase}/api/show`,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name: modelName }),
|
||||
signal: AbortSignal.timeout(3000),
|
||||
},
|
||||
policy: buildOllamaBaseUrlSsrFPolicy(apiBase),
|
||||
policy: buildOllamaBaseUrlSsrFPolicy(normalizedApiBase),
|
||||
auditContext: "ollama-provider-models.show",
|
||||
});
|
||||
try {
|
||||
@@ -117,6 +145,31 @@ export async function queryOllamaModelShowInfo(
|
||||
}
|
||||
}
|
||||
|
||||
async function queryOllamaModelShowInfoCached(
|
||||
apiBase: string,
|
||||
model: Pick<OllamaTagModel, "name" | "digest" | "modified_at">,
|
||||
): Promise<OllamaModelShowInfo> {
|
||||
const normalizedApiBase = resolveOllamaApiBase(apiBase);
|
||||
const cacheKey = buildOllamaModelShowCacheKey(normalizedApiBase, model);
|
||||
if (!cacheKey) {
|
||||
return await queryOllamaModelShowInfo(normalizedApiBase, model.name);
|
||||
}
|
||||
|
||||
const cached = ollamaModelShowInfoCache.get(cacheKey);
|
||||
if (cached) {
|
||||
return await cached;
|
||||
}
|
||||
|
||||
const pending = queryOllamaModelShowInfo(normalizedApiBase, model.name).then((result) => {
|
||||
if (!hasCachedOllamaModelShowInfo(result)) {
|
||||
ollamaModelShowInfoCache.delete(cacheKey);
|
||||
}
|
||||
return result;
|
||||
});
|
||||
setOllamaModelShowCacheEntry(cacheKey, pending);
|
||||
return await pending;
|
||||
}
|
||||
|
||||
/** @deprecated Use queryOllamaModelShowInfo instead. */
|
||||
export async function queryOllamaContextWindow(
|
||||
apiBase: string,
|
||||
@@ -136,7 +189,7 @@ export async function enrichOllamaModelsWithContext(
|
||||
const batch = models.slice(index, index + concurrency);
|
||||
const batchResults = await Promise.all(
|
||||
batch.map(async (model) => {
|
||||
const showInfo = await queryOllamaModelShowInfo(apiBase, model.name);
|
||||
const showInfo = await queryOllamaModelShowInfoCached(apiBase, model);
|
||||
return {
|
||||
...model,
|
||||
contextWindow: showInfo.contextWindow,
|
||||
@@ -198,3 +251,7 @@ export async function fetchOllamaModels(
|
||||
return { reachable: false, models: [] };
|
||||
}
|
||||
}
|
||||
|
||||
export function resetOllamaModelShowInfoCacheForTest(): void {
|
||||
ollamaModelShowInfoCache.clear();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user