From 69daef8246f15bd8af6500e73f6f88210fe5d56e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 02:31:46 +0100 Subject: [PATCH] fix: honor Ollama Modelfile num_ctx discovery --- CHANGELOG.md | 1 + docs/providers/ollama.md | 4 +- extensions/ollama/src/provider-models.test.ts | 60 +++++++++++++++++++ extensions/ollama/src/provider-models.ts | 25 ++++++++ 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36e50c9d8cd..033b8f07fda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Docs: https://docs.openclaw.ai - Logging: write validated diagnostic trace context as top-level `traceId`, `spanId`, `parentSpanId`, and `traceFlags` fields in file-log JSONL records so traced requests and model calls are easier to correlate in log processors. Refs #40353. Thanks @liangruochong44-ui. - Logging/sessions: apply configured redaction patterns to persisted session transcript text and accept escaped character classes in safe custom redaction regexes, so transcript JSONL no longer keeps matching sensitive text in the clear. Fixes #42982. Thanks @panpan0000. - Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026. +- Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana. - Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n. - Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys. - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 27f5aead375..0233c70f6fb 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -155,7 +155,7 @@ When you set `OLLAMA_API_KEY` (or an auth profile) and **do not** define `models | Behavior | Detail | | -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Catalog query | Queries `/api/tags` | -| Capability detection | Uses best-effort `/api/show` lookups to read `contextWindow` and detect capabilities (including vision) | +| Capability detection | Uses best-effort `/api/show` lookups to read `contextWindow`, expanded `num_ctx` Modelfile parameters, and capabilities including vision/tools | | Vision models | Models with a `vision` capability reported by `/api/show` are marked as image-capable (`input: ["text", "image"]`), so OpenClaw auto-injects images into the prompt | | Reasoning detection | Marks `reasoning` with a model-name heuristic (`r1`, `reasoning`, `think`) | | Token limits | Sets `maxTokens` to the default Ollama max-token cap used by OpenClaw | @@ -399,7 +399,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s - For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, otherwise it falls back to the default Ollama context window used by OpenClaw. + For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw. You can override `contextWindow` and `maxTokens` in explicit provider config: diff --git a/extensions/ollama/src/provider-models.test.ts b/extensions/ollama/src/provider-models.test.ts index 76f85fbf34f..ea4dbf8933d 100644 --- a/extensions/ollama/src/provider-models.test.ts +++ b/extensions/ollama/src/provider-models.test.ts @@ -3,6 +3,7 @@ import { jsonResponse, requestBodyText, requestUrl } from "../../../src/test-hel import { buildOllamaModelDefinition, enrichOllamaModelsWithContext, + parseOllamaNumCtxParameter, resetOllamaModelShowInfoCacheForTest, resolveOllamaApiBase, type OllamaTagModel, @@ -42,6 +43,58 @@ describe("ollama provider models", () => { ]); }); + it("uses Modelfile num_ctx when it expands the discovered context window", async () => { + const models: OllamaTagModel[] = [{ name: "llama3-32k:latest" }]; + const fetchMock = vi.fn(async () => + jsonResponse({ + model_info: { "llama.context_length": 8192 }, + parameters: 'stop "<|eot_id|>"\nnum_ctx 32768\nnum_keep 5', + capabilities: ["completion"], + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const enriched = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models); + + expect(enriched).toEqual([ + { + name: "llama3-32k:latest", + contextWindow: 32768, + capabilities: ["completion"], + }, + ]); + }); + + it("keeps the larger native context window when Modelfile num_ctx is smaller", async () => { + const models: OllamaTagModel[] = [{ name: "llama3.2:latest" }]; + const fetchMock = vi.fn(async () => + jsonResponse({ + model_info: { "llama.context_length": 131072 }, + parameters: "num_ctx 4096", + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const enriched = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models); + + expect(enriched[0]?.contextWindow).toBe(131072); + }); + + it("uses positive num_ctx when /api/show omits model context metadata", async () => { + const models: OllamaTagModel[] = [{ name: "custom-model:latest" }]; + const fetchMock = vi.fn(async () => + jsonResponse({ + model_info: {}, + parameters: "num_ctx 16384", + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const enriched = await enrichOllamaModelsWithContext("http://127.0.0.1:11434", models); + + expect(enriched[0]?.contextWindow).toBe(16384); + }); + it("sets models with vision capability from /api/show capabilities", async () => { const models: OllamaTagModel[] = [{ name: "kimi-k2.5:cloud" }, { name: "glm-5.1:cloud" }]; const fetchMock = vi.fn(async (input: string | URL | Request, init?: RequestInit) => { @@ -225,4 +278,11 @@ describe("ollama provider models", () => { expect(model.reasoning).toBe(false); expect(model.compat?.supportsTools).toBe(false); }); + + it("parses the last positive Modelfile num_ctx value", () => { + expect(parseOllamaNumCtxParameter("num_ctx 8192\nnum_ctx 32768")).toBe(32768); + expect(parseOllamaNumCtxParameter("temperature 0.8\nnum_ctx -1\nnum_ctx 0")).toBeUndefined(); + expect(parseOllamaNumCtxParameter('stop "<|eot_id|>"')).toBeUndefined(); + expect(parseOllamaNumCtxParameter({ num_ctx: 8192 })).toBeUndefined(); + }); }); diff --git a/extensions/ollama/src/provider-models.ts b/extensions/ollama/src/provider-models.ts index f3c891fae94..401c88ab83b 100644 --- a/extensions/ollama/src/provider-models.ts +++ b/extensions/ollama/src/provider-models.ts @@ -95,6 +95,25 @@ function hasCachedOllamaModelShowInfo(info: OllamaModelShowInfo): boolean { return typeof info.contextWindow === "number" || (info.capabilities?.length ?? 0) > 0; } +export function parseOllamaNumCtxParameter(parameters: unknown): number | undefined { + if (typeof parameters !== "string" || !parameters.trim()) { + return undefined; + } + + let lastValue: number | undefined; + for (const rawLine of parameters.split(/\r?\n/)) { + const match = rawLine.trim().match(/^num_ctx\s+(-?\d+)\b/); + if (!match) { + continue; + } + const parsed = Number.parseInt(match[1], 10); + if (Number.isFinite(parsed) && parsed > 0) { + lastValue = parsed; + } + } + return lastValue; +} + export async function queryOllamaModelShowInfo( apiBase: string, modelName: string, @@ -119,6 +138,7 @@ export async function queryOllamaModelShowInfo( const data = (await response.json()) as { model_info?: Record; capabilities?: unknown; + parameters?: unknown; }; let contextWindow: number | undefined; @@ -138,6 +158,11 @@ export async function queryOllamaModelShowInfo( } } + const paramCtx = parseOllamaNumCtxParameter(data.parameters); + if (paramCtx !== undefined && (contextWindow === undefined || paramCtx > contextWindow)) { + contextWindow = paramCtx; + } + const capabilities = Array.isArray(data.capabilities) ? (data.capabilities as unknown[]).filter((c): c is string => typeof c === "string") : undefined;