From aa071e0b60613f5ed49345a1c9b9bbdaa10cae19 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 03:08:03 +0100
Subject: [PATCH] fix(ollama): forward native model params

---
 CHANGELOG.md                                 |  1 +
 docs/providers/ollama.md                     |  5 ++
 extensions/ollama/ollama.live.test.ts        |  7 +-
 extensions/ollama/src/stream-runtime.test.ts | 27 ++++++-
 extensions/ollama/src/stream.ts              | 80 +++++++++++++++++++-
 5 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0313dea5da4..f874cbde582 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
 - Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
 - Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
+- Providers/Ollama: forward whitelisted native Ollama model params such as `temperature`, `top_p`, and top-level `think` so users can disable API-level thinking or tune local models from config without proxy shims. Fixes #48010. Thanks @tangzhi, @pandego, @maweibin, @Adam-Researchh, and @EmpireCreator.
 - Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
 - Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
 - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md
index 692b25e20fd..acd8c1a5e8e 100644
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -403,6 +403,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
 
     You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
 
+    Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
+
     ```json5
     {
       models: {
@@ -415,6 +417,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
                 maxTokens: 65536,
                 params: {
                   num_ctx: 32768,
+                  temperature: 0.7,
+                  top_p: 0.9,
+                  thinking: false,
                 },
               }
             ]
diff --git a/extensions/ollama/ollama.live.test.ts b/extensions/ollama/ollama.live.test.ts
index 88304ddadcf..b4fb48a1b1e 100644
--- a/extensions/ollama/ollama.live.test.ts
+++ b/extensions/ollama/ollama.live.test.ts
@@ -26,7 +26,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
     let payload:
       | {
           model?: string;
-          options?: { num_ctx?: number };
+          think?: boolean;
+          options?: { num_ctx?: number; top_p?: number };
           tools?: Array<{
             function?: {
               parameters?: {
@@ -43,7 +44,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
         api: "ollama",
         provider: PROVIDER_ID,
         contextWindow: 8192,
-        params: { num_ctx: 4096 },
+        params: { num_ctx: 4096, top_p: 0.9, thinking: false },
       } as never,
       {
         messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -82,6 +83,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
     expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
     expect(payload?.model).toBe(CHAT_MODEL);
     expect(payload?.options?.num_ctx).toBe(4096);
+    expect(payload?.options?.top_p).toBe(0.9);
+    expect(payload?.think).toBe(false);
     const properties = payload?.tools?.[0]?.function?.parameters?.properties;
     expect(properties?.city?.type).toBe("string");
     expect(properties?.units?.type).toBe("string");
diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts
index 4d7e8958f4c..a34862aaf4c 100644
--- a/extensions/ollama/src/stream-runtime.test.ts
+++ b/extensions/ollama/src/stream-runtime.test.ts
@@ -919,6 +919,7 @@ async function createOllamaTestStream(params: {
   options?: {
     apiKey?: string;
     maxTokens?: number;
+    temperature?: number;
     signal?: AbortSignal;
     headers?: Record<string, string>;
   };
@@ -1205,7 +1206,17 @@ describe("createOllamaStreamFn", () => {
       async (fetchMock) => {
         const stream = await createOllamaTestStream({
           baseUrl: "http://ollama-host:11434",
-          model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
+          model: {
+            params: {
+              num_ctx: 32768,
+              temperature: 0.2,
+              top_p: 0.9,
+              thinking: false,
+              streaming: false,
+            },
+            contextWindow: 131072,
+          },
+          options: { temperature: 0.7, maxTokens: 55 },
         });
 
         const events = await collectStreamEvents(stream);
@@ -1216,9 +1227,21 @@ describe("createOllamaStreamFn", () => {
           throw new Error("Expected string request body");
         }
         const requestBody = JSON.parse(requestInit.body) as {
-          options: { num_ctx?: number };
+          think?: boolean;
+          options: {
+            num_ctx?: number;
+            num_predict?: number;
+            temperature?: number;
+            top_p?: number;
+            streaming?: boolean;
+          };
         };
         expect(requestBody.options.num_ctx).toBe(32768);
+        expect(requestBody.options.num_predict).toBe(55);
+        expect(requestBody.options.temperature).toBe(0.7);
+        expect(requestBody.options.top_p).toBe(0.9);
+        expect(requestBody.options.streaming).toBeUndefined();
+        expect(requestBody.think).toBe(false);
       },
     );
   });
diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts
index 6845b47a713..29fd46523d1 100644
--- a/extensions/ollama/src/stream.ts
+++ b/extensions/ollama/src/stream.ts
@@ -152,7 +152,31 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
     });
 }
 
-type OllamaThinkValue = boolean | "low" | "medium" | "high";
+type OllamaThinkValue = boolean | "low" | "medium" | "high" | "max";
+
+const OLLAMA_OPTION_PARAM_KEYS = new Set([
+  "num_keep",
+  "seed",
+  "num_predict",
+  "top_k",
+  "top_p",
+  "min_p",
+  "typical_p",
+  "repeat_last_n",
+  "temperature",
+  "repeat_penalty",
+  "presence_penalty",
+  "frequency_penalty",
+  "stop",
+  "num_ctx",
+  "num_batch",
+  "num_gpu",
+  "main_gpu",
+  "use_mmap",
+  "num_thread",
+]);
+
+const OLLAMA_TOP_LEVEL_PARAM_KEYS = new Set(["format", "keep_alive", "truncate", "shift"]);
 
 function createOllamaThinkingWrapper(
   baseFn: StreamFn | undefined,
@@ -181,6 +205,22 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
   return undefined;
 }
 
+function resolveOllamaThinkParamValue(
+  params: Record<string, unknown> | undefined,
+): OllamaThinkValue | undefined {
+  const raw = params?.think ?? params?.thinking;
+  if (typeof raw === "boolean") {
+    return raw;
+  }
+  if (raw === "off") {
+    return false;
+  }
+  if (raw === "low" || raw === "medium" || raw === "high" || raw === "max") {
+    return raw;
+  }
+  return undefined;
+}
+
 function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
   const raw = model.params?.num_ctx;
   if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
@@ -196,6 +236,39 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
   );
 }
 
+function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
+  const options: Record<string, unknown> = {};
+  const params = model.params;
+  if (params && typeof params === "object" && !Array.isArray(params)) {
+    for (const [key, value] of Object.entries(params)) {
+      if (value !== undefined && OLLAMA_OPTION_PARAM_KEYS.has(key)) {
+        options[key] = value;
+      }
+    }
+  }
+  options.num_ctx = resolveOllamaNumCtx(model);
+  return options;
+}
+
+function resolveOllamaTopLevelParams(
+  model: ProviderRuntimeModel,
+): Record<string, unknown> | undefined {
+  const requestParams: Record<string, unknown> = {};
+  const params = model.params;
+  if (params && typeof params === "object" && !Array.isArray(params)) {
+    for (const [key, value] of Object.entries(params)) {
+      if (value !== undefined && OLLAMA_TOP_LEVEL_PARAM_KEYS.has(key)) {
+        requestParams[key] = value;
+      }
+    }
+  }
+  const think = resolveOllamaThinkParamValue(params);
+  if (think !== undefined) {
+    requestParams.think = think;
+  }
+  return Object.keys(requestParams).length > 0 ? requestParams : undefined;
+}
+
 function isOllamaCloudKimiModelRef(modelId: string): boolean {
   const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId);
   return normalizedModelId.startsWith("kimi-k") && normalizedModelId.includes(":cloud");
@@ -257,6 +330,7 @@ export function buildOllamaChatRequest(params: {
   messages: OllamaChatMessage[];
   tools?: OllamaTool[];
   options?: Record<string, unknown>;
+  requestParams?: Record<string, unknown>;
   stream?: boolean;
 }): OllamaChatRequest {
   return {
@@ -265,6 +339,7 @@ export function buildOllamaChatRequest(params: {
     stream: params.stream ?? true,
     ...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}),
     ...(params.options ? { options: params.options } : {}),
+    ...params.requestParams,
   };
 }
 
@@ -754,7 +829,7 @@ export function createOllamaStreamFn(
         );
         const ollamaTools = extractOllamaTools(context.tools);
 
-        const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
+        const ollamaOptions: Record<string, unknown> = resolveOllamaModelOptions(model);
         if (typeof options?.temperature === "number") {
           ollamaOptions.temperature = options.temperature;
         }
@@ -769,6 +844,7 @@ export function createOllamaStreamFn(
           stream: true,
           tools: ollamaTools,
           options: ollamaOptions,
+          requestParams: resolveOllamaTopLevelParams(model),
         });
         options?.onPayload?.(body, model);
         const headers: Record<string, string> = {