fix(ollama): forward native model params

2026-05-06 09:10:45 +00:00 · 2026-04-27 03:08:03 +01:00
parent f4cf7e3b4f
commit aa071e0b60
5 changed files with 114 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
 - Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
 - Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
+- Providers/Ollama: forward whitelisted native Ollama model params such as `temperature`, `top_p`, and top-level `think` so users can disable API-level thinking or tune local models from config without proxy shims. Fixes #48010. Thanks @tangzhi, @pandego, @maweibin, @Adam-Researchh, and @EmpireCreator.
 - Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
 - Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
 - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -403,6 +403,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s

    You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.

+    Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
+
    ```json5
    {
      models: {
@@ -415,6 +417,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
                maxTokens: 65536,
                params: {
                  num_ctx: 32768,
+                  temperature: 0.7,
+                  top_p: 0.9,
+                  thinking: false,
                },
              }
            ]
--- a/extensions/ollama/ollama.live.test.ts
+++ b/extensions/ollama/ollama.live.test.ts
@@ -26,7 +26,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
    let payload:
      | {
          model?: string;
-          options?: { num_ctx?: number };
+          think?: boolean;
+          options?: { num_ctx?: number; top_p?: number };
          tools?: Array<{
            function?: {
              parameters?: {
@@ -43,7 +44,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
        api: "ollama",
        provider: PROVIDER_ID,
        contextWindow: 8192,
-        params: { num_ctx: 4096 },
+        params: { num_ctx: 4096, top_p: 0.9, thinking: false },
      } as never,
      {
        messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -82,6 +83,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
    expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
    expect(payload?.model).toBe(CHAT_MODEL);
    expect(payload?.options?.num_ctx).toBe(4096);
+    expect(payload?.options?.top_p).toBe(0.9);
+    expect(payload?.think).toBe(false);
    const properties = payload?.tools?.[0]?.function?.parameters?.properties;
    expect(properties?.city?.type).toBe("string");
    expect(properties?.units?.type).toBe("string");
--- a/extensions/ollama/src/stream-runtime.test.ts
+++ b/extensions/ollama/src/stream-runtime.test.ts
@@ -919,6 +919,7 @@ async function createOllamaTestStream(params: {
  options?: {
    apiKey?: string;
    maxTokens?: number;
+    temperature?: number;
    signal?: AbortSignal;
    headers?: Record<string, string>;
  };
@@ -1205,7 +1206,17 @@ describe("createOllamaStreamFn", () => {
      async (fetchMock) => {
        const stream = await createOllamaTestStream({
          baseUrl: "http://ollama-host:11434",
-          model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
+          model: {
+            params: {
+              num_ctx: 32768,
+              temperature: 0.2,
+              top_p: 0.9,
+              thinking: false,
+              streaming: false,
+            },
+            contextWindow: 131072,
+          },
+          options: { temperature: 0.7, maxTokens: 55 },
        });

        const events = await collectStreamEvents(stream);
@@ -1216,9 +1227,21 @@ describe("createOllamaStreamFn", () => {
          throw new Error("Expected string request body");
        }
        const requestBody = JSON.parse(requestInit.body) as {
-          options: { num_ctx?: number };
+          think?: boolean;
+          options: {
+            num_ctx?: number;
+            num_predict?: number;
+            temperature?: number;
+            top_p?: number;
+            streaming?: boolean;
+          };
        };
        expect(requestBody.options.num_ctx).toBe(32768);
+        expect(requestBody.options.num_predict).toBe(55);
+        expect(requestBody.options.temperature).toBe(0.7);
+        expect(requestBody.options.top_p).toBe(0.9);
+        expect(requestBody.options.streaming).toBeUndefined();
+        expect(requestBody.think).toBe(false);
      },
    );
  });
--- a/extensions/ollama/src/stream.ts
+++ b/extensions/ollama/src/stream.ts
@@ -152,7 +152,31 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
    });
 }

-type OllamaThinkValue = boolean | "low" | "medium" | "high";
+type OllamaThinkValue = boolean | "low" | "medium" | "high" | "max";
+
+const OLLAMA_OPTION_PARAM_KEYS = new Set([
+  "num_keep",
+  "seed",
+  "num_predict",
+  "top_k",
+  "top_p",
+  "min_p",
+  "typical_p",
+  "repeat_last_n",
+  "temperature",
+  "repeat_penalty",
+  "presence_penalty",
+  "frequency_penalty",
+  "stop",
+  "num_ctx",
+  "num_batch",
+  "num_gpu",
+  "main_gpu",
+  "use_mmap",
+  "num_thread",
+]);
+
+const OLLAMA_TOP_LEVEL_PARAM_KEYS = new Set(["format", "keep_alive", "truncate", "shift"]);

 function createOllamaThinkingWrapper(
  baseFn: StreamFn | undefined,
@@ -181,6 +205,22 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
  return undefined;
 }

+function resolveOllamaThinkParamValue(
+  params: Record<string, unknown> | undefined,
+): OllamaThinkValue | undefined {
+  const raw = params?.think ?? params?.thinking;
+  if (typeof raw === "boolean") {
+    return raw;
+  }
+  if (raw === "off") {
+    return false;
+  }
+  if (raw === "low" || raw === "medium" || raw === "high" || raw === "max") {
+    return raw;
+  }
+  return undefined;
+}
+
 function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
  const raw = model.params?.num_ctx;
  if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
@@ -196,6 +236,39 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
  );
 }

+function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
+  const options: Record<string, unknown> = {};
+  const params = model.params;
+  if (params && typeof params === "object" && !Array.isArray(params)) {
+    for (const [key, value] of Object.entries(params)) {
+      if (value !== undefined && OLLAMA_OPTION_PARAM_KEYS.has(key)) {
+        options[key] = value;
+      }
+    }
+  }
+  options.num_ctx = resolveOllamaNumCtx(model);
+  return options;
+}
+
+function resolveOllamaTopLevelParams(
+  model: ProviderRuntimeModel,
+): Record<string, unknown> | undefined {
+  const requestParams: Record<string, unknown> = {};
+  const params = model.params;
+  if (params && typeof params === "object" && !Array.isArray(params)) {
+    for (const [key, value] of Object.entries(params)) {
+      if (value !== undefined && OLLAMA_TOP_LEVEL_PARAM_KEYS.has(key)) {
+        requestParams[key] = value;
+      }
+    }
+  }
+  const think = resolveOllamaThinkParamValue(params);
+  if (think !== undefined) {
+    requestParams.think = think;
+  }
+  return Object.keys(requestParams).length > 0 ? requestParams : undefined;
+}
+
 function isOllamaCloudKimiModelRef(modelId: string): boolean {
  const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId);
  return normalizedModelId.startsWith("kimi-k") && normalizedModelId.includes(":cloud");
@@ -257,6 +330,7 @@ export function buildOllamaChatRequest(params: {
  messages: OllamaChatMessage[];
  tools?: OllamaTool[];
  options?: Record<string, unknown>;
+  requestParams?: Record<string, unknown>;
  stream?: boolean;
 }): OllamaChatRequest {
  return {
@@ -265,6 +339,7 @@ export function buildOllamaChatRequest(params: {
    stream: params.stream ?? true,
    ...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}),
    ...(params.options ? { options: params.options } : {}),
+    ...params.requestParams,
  };
 }

@@ -754,7 +829,7 @@ export function createOllamaStreamFn(
        );
        const ollamaTools = extractOllamaTools(context.tools);

-        const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
+        const ollamaOptions: Record<string, unknown> = resolveOllamaModelOptions(model);
        if (typeof options?.temperature === "number") {
          ollamaOptions.temperature = options.temperature;
        }
@@ -769,6 +844,7 @@ export function createOllamaStreamFn(
          stream: true,
          tools: ollamaTools,
          options: ollamaOptions,
+          requestParams: resolveOllamaTopLevelParams(model),
        });
        options?.onPayload?.(body, model);
        const headers: Record<string, string> = {