From aa071e0b60613f5ed49345a1c9b9bbdaa10cae19 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 03:08:03 +0100 Subject: [PATCH] fix(ollama): forward native model params --- CHANGELOG.md | 1 + docs/providers/ollama.md | 5 ++ extensions/ollama/ollama.live.test.ts | 7 +- extensions/ollama/src/stream-runtime.test.ts | 27 ++++++- extensions/ollama/src/stream.ts | 80 +++++++++++++++++++- 5 files changed, 114 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0313dea5da4..f874cbde582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai - Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026. - Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana. - Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404. +- Providers/Ollama: forward whitelisted native Ollama model params such as `temperature`, `top_p`, and top-level `think` so users can disable API-level thinking or tune local models from config without proxy shims. Fixes #48010. Thanks @tangzhi, @pandego, @maweibin, @Adam-Researchh, and @EmpireCreator. - Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n. - Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys. - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 692b25e20fd..acd8c1a5e8e 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -403,6 +403,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`. + Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models. + ```json5 { models: { @@ -415,6 +417,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s maxTokens: 65536, params: { num_ctx: 32768, + temperature: 0.7, + top_p: 0.9, + thinking: false, }, } ] diff --git a/extensions/ollama/ollama.live.test.ts b/extensions/ollama/ollama.live.test.ts index 88304ddadcf..b4fb48a1b1e 100644 --- a/extensions/ollama/ollama.live.test.ts +++ b/extensions/ollama/ollama.live.test.ts @@ -26,7 +26,8 @@ describe.skipIf(!LIVE)("ollama live", () => { let payload: | { model?: string; - options?: { num_ctx?: number }; + think?: boolean; + options?: { num_ctx?: number; top_p?: number }; tools?: Array<{ function?: { parameters?: { @@ -43,7 +44,7 @@ describe.skipIf(!LIVE)("ollama live", () => { api: "ollama", provider: PROVIDER_ID, contextWindow: 8192, - params: { num_ctx: 4096 }, + params: { num_ctx: 4096, top_p: 0.9, thinking: false }, } as never, { messages: [{ role: "user", content: "Reply exactly OK." }], @@ -82,6 +83,8 @@ describe.skipIf(!LIVE)("ollama live", () => { expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true); expect(payload?.model).toBe(CHAT_MODEL); expect(payload?.options?.num_ctx).toBe(4096); + expect(payload?.options?.top_p).toBe(0.9); + expect(payload?.think).toBe(false); const properties = payload?.tools?.[0]?.function?.parameters?.properties; expect(properties?.city?.type).toBe("string"); expect(properties?.units?.type).toBe("string"); diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts index 4d7e8958f4c..a34862aaf4c 100644 --- a/extensions/ollama/src/stream-runtime.test.ts +++ b/extensions/ollama/src/stream-runtime.test.ts @@ -919,6 +919,7 @@ async function createOllamaTestStream(params: { options?: { apiKey?: string; maxTokens?: number; + temperature?: number; signal?: AbortSignal; headers?: Record; }; @@ -1205,7 +1206,17 @@ describe("createOllamaStreamFn", () => { async (fetchMock) => { const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434", - model: { params: { num_ctx: 32768 }, contextWindow: 131072 }, + model: { + params: { + num_ctx: 32768, + temperature: 0.2, + top_p: 0.9, + thinking: false, + streaming: false, + }, + contextWindow: 131072, + }, + options: { temperature: 0.7, maxTokens: 55 }, }); const events = await collectStreamEvents(stream); @@ -1216,9 +1227,21 @@ describe("createOllamaStreamFn", () => { throw new Error("Expected string request body"); } const requestBody = JSON.parse(requestInit.body) as { - options: { num_ctx?: number }; + think?: boolean; + options: { + num_ctx?: number; + num_predict?: number; + temperature?: number; + top_p?: number; + streaming?: boolean; + }; }; expect(requestBody.options.num_ctx).toBe(32768); + expect(requestBody.options.num_predict).toBe(55); + expect(requestBody.options.temperature).toBe(0.7); + expect(requestBody.options.top_p).toBe(0.9); + expect(requestBody.options.streaming).toBeUndefined(); + expect(requestBody.think).toBe(false); }, ); }); diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts index 6845b47a713..29fd46523d1 100644 --- a/extensions/ollama/src/stream.ts +++ b/extensions/ollama/src/stream.ts @@ -152,7 +152,31 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num }); } -type OllamaThinkValue = boolean | "low" | "medium" | "high"; +type OllamaThinkValue = boolean | "low" | "medium" | "high" | "max"; + +const OLLAMA_OPTION_PARAM_KEYS = new Set([ + "num_keep", + "seed", + "num_predict", + "top_k", + "top_p", + "min_p", + "typical_p", + "repeat_last_n", + "temperature", + "repeat_penalty", + "presence_penalty", + "frequency_penalty", + "stop", + "num_ctx", + "num_batch", + "num_gpu", + "main_gpu", + "use_mmap", + "num_thread", +]); + +const OLLAMA_TOP_LEVEL_PARAM_KEYS = new Set(["format", "keep_alive", "truncate", "shift"]); function createOllamaThinkingWrapper( baseFn: StreamFn | undefined, @@ -181,6 +205,22 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und return undefined; } +function resolveOllamaThinkParamValue( + params: Record | undefined, +): OllamaThinkValue | undefined { + const raw = params?.think ?? params?.thinking; + if (typeof raw === "boolean") { + return raw; + } + if (raw === "off") { + return false; + } + if (raw === "low" || raw === "medium" || raw === "high" || raw === "max") { + return raw; + } + return undefined; +} + function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined { const raw = model.params?.num_ctx; if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) { @@ -196,6 +236,39 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number { ); } +function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record { + const options: Record = {}; + const params = model.params; + if (params && typeof params === "object" && !Array.isArray(params)) { + for (const [key, value] of Object.entries(params)) { + if (value !== undefined && OLLAMA_OPTION_PARAM_KEYS.has(key)) { + options[key] = value; + } + } + } + options.num_ctx = resolveOllamaNumCtx(model); + return options; +} + +function resolveOllamaTopLevelParams( + model: ProviderRuntimeModel, +): Record | undefined { + const requestParams: Record = {}; + const params = model.params; + if (params && typeof params === "object" && !Array.isArray(params)) { + for (const [key, value] of Object.entries(params)) { + if (value !== undefined && OLLAMA_TOP_LEVEL_PARAM_KEYS.has(key)) { + requestParams[key] = value; + } + } + } + const think = resolveOllamaThinkParamValue(params); + if (think !== undefined) { + requestParams.think = think; + } + return Object.keys(requestParams).length > 0 ? requestParams : undefined; +} + function isOllamaCloudKimiModelRef(modelId: string): boolean { const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId); return normalizedModelId.startsWith("kimi-k") && normalizedModelId.includes(":cloud"); @@ -257,6 +330,7 @@ export function buildOllamaChatRequest(params: { messages: OllamaChatMessage[]; tools?: OllamaTool[]; options?: Record; + requestParams?: Record; stream?: boolean; }): OllamaChatRequest { return { @@ -265,6 +339,7 @@ export function buildOllamaChatRequest(params: { stream: params.stream ?? true, ...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}), ...(params.options ? { options: params.options } : {}), + ...params.requestParams, }; } @@ -754,7 +829,7 @@ export function createOllamaStreamFn( ); const ollamaTools = extractOllamaTools(context.tools); - const ollamaOptions: Record = { num_ctx: resolveOllamaNumCtx(model) }; + const ollamaOptions: Record = resolveOllamaModelOptions(model); if (typeof options?.temperature === "number") { ollamaOptions.temperature = options.temperature; } @@ -769,6 +844,7 @@ export function createOllamaStreamFn( stream: true, tools: ollamaTools, options: ollamaOptions, + requestParams: resolveOllamaTopLevelParams(model), }); options?.onPayload?.(body, model); const headers: Record = {