From d4f91a354ec1abd4fad902ed53dd2ebe956ee361 Mon Sep 17 00:00:00 2001
From: Zihao WAN <80884605+WZH8898@users.noreply.github.com>
Date: Wed, 22 Apr 2026 06:49:16 +0200
Subject: [PATCH] fix(ollama): forward think:false for qwen3 chat requests
 (#69967)

Forward top-level Ollama think flags on native /api/chat requests so --thinking off sends think:false.\n\nThanks @WZH8898.
---
 CHANGELOG.md                                 |   1 +
 docs/providers/ollama.md                     |   2 +
 extensions/ollama/src/stream-runtime.test.ts | 106 +++++++++++++++++++
 extensions/ollama/src/stream.ts              |  13 +--
 4 files changed, 114 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ccfb4b1af3..43f82f49da7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Ollama: forward OpenClaw thinking control to native `/api/chat` requests as top-level `think`, so `/think off` and `openclaw agent --thinking off` suppress thinking on models such as qwen3 instead of idling until the watchdog fires. Fixes #69902. (#69967) Thanks @WZH8898.
 - Memory-core/dreaming: suppress the startup-only managed dreaming cron unavailable warning when the cron service is still attaching, while preserving the runtime warning if cron genuinely remains unavailable. Fixes #69939. (#69941) Thanks @Sanjays2402.
 - Mattermost: suppress reasoning-only payloads even when they arrive as blockquoted `> Reasoning:` text, preventing `/reasoning on` from leaking thinking into channel posts. (#69927) Thanks @lawrence3699.
 - Discord: read `channel.parentId` through a safe accessor in the slash-command, reaction, and model-picker paths so partial `GuildThreadChannel` prototype getters no longer throw `Cannot access rawData on partial Channel` when commands like `/new` run from inside a thread. Fixes #69861. (#69908) Thanks @neeravmakwana.
diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md
index 84d4fb70170..ab3f6d35b57 100644
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -463,6 +463,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
   <Accordion title="Streaming configuration">
     OpenClaw's Ollama integration uses the **native Ollama API** (`/api/chat`) by default, which fully supports streaming and tool calling simultaneously. No special configuration is needed.
 
+    For native `/api/chat` requests, OpenClaw also forwards thinking control directly to Ollama: `/think off` and `openclaw agent --thinking off` send top-level `think: false`, while non-`off` thinking levels send `think: true`.
+
     <Tip>
     If you need to use the OpenAI-compatible endpoint, see the "Legacy OpenAI-compatible mode" section above. Streaming and tool calling may not work simultaneously in that mode.
     </Tip>
diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts
index 298a45255fb..8597d6b537c 100644
--- a/extensions/ollama/src/stream-runtime.test.ts
+++ b/extensions/ollama/src/stream-runtime.test.ts
@@ -96,6 +96,112 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
       options: { num_ctx: 262144 },
     });
   });
+
+  it("forwards think=false on native Ollama chat requests when thinking is off", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
+        const model = {
+          api: "ollama",
+          provider: "ollama",
+          id: "qwen3:32b",
+          contextWindow: 131072,
+        };
+
+        const wrapped = createConfiguredOllamaCompatStreamWrapper({
+          provider: "ollama",
+          modelId: "qwen3:32b",
+          model,
+          streamFn: baseStreamFn,
+          thinkingLevel: "off",
+        } as never);
+        if (!wrapped) {
+          throw new Error("Expected wrapped Ollama stream function");
+        }
+
+        const stream = await Promise.resolve(
+          wrapped(
+            model as never,
+            {
+              messages: [{ role: "user", content: "hello" }],
+            } as never,
+            {} as never,
+          ),
+        );
+
+        await collectStreamEvents(stream);
+
+        const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
+        if (typeof requestInit.body !== "string") {
+          throw new Error("Expected string request body");
+        }
+        const requestBody = JSON.parse(requestInit.body) as {
+          think?: boolean;
+          options?: { think?: boolean; num_ctx?: number };
+        };
+        expect(requestBody.think).toBe(false);
+        expect(requestBody.options?.think).toBeUndefined();
+        expect(requestBody.options?.num_ctx).toBe(131072);
+      },
+    );
+  });
+
+  it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
+        const model = {
+          api: "ollama",
+          provider: "ollama",
+          id: "qwen3:32b",
+          contextWindow: 131072,
+        };
+
+        const wrapped = createConfiguredOllamaCompatStreamWrapper({
+          provider: "ollama",
+          modelId: "qwen3:32b",
+          model,
+          streamFn: baseStreamFn,
+          thinkingLevel: "low",
+        } as never);
+        if (!wrapped) {
+          throw new Error("Expected wrapped Ollama stream function");
+        }
+
+        const stream = await Promise.resolve(
+          wrapped(
+            model as never,
+            {
+              messages: [{ role: "user", content: "hello" }],
+            } as never,
+            {} as never,
+          ),
+        );
+
+        await collectStreamEvents(stream);
+
+        const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
+        if (typeof requestInit.body !== "string") {
+          throw new Error("Expected string request body");
+        }
+        const requestBody = JSON.parse(requestInit.body) as {
+          think?: boolean;
+          options?: { think?: boolean; num_ctx?: number };
+        };
+        expect(requestBody.think).toBe(true);
+        expect(requestBody.options?.think).toBeUndefined();
+        expect(requestBody.options?.num_ctx).toBe(131072);
+      },
+    );
+  });
 });
 
 describe("convertToOllamaMessages", () => {
diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts
index aea126f6b53..62aa4d86597 100644
--- a/extensions/ollama/src/stream.ts
+++ b/extensions/ollama/src/stream.ts
@@ -153,14 +153,10 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
 
 function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
   const streamFn = baseFn ?? streamSimple;
-  return (model, context, options) => {
-    if (model.api !== "ollama") {
-      return streamFn(model, context, options);
-    }
-    return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
+  return (model, context, options) =>
+    streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
       payloadRecord.think = think;
     });
-  };
 }
 
 function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
@@ -178,6 +174,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
   let streamFn = ctx.streamFn;
   const model = ctx.model;
   let injectNumCtx = false;
+  const isNativeOllamaTransport = model?.api === "ollama";
 
   if (model) {
     const providerId =
@@ -199,9 +196,9 @@ export function createConfiguredOllamaCompatStreamWrapper(
     streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
   }
 
-  if (ctx.thinkingLevel === "off") {
+  if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
     streamFn = createOllamaThinkingWrapper(streamFn, false);
-  } else if (ctx.thinkingLevel) {
+  } else if (isNativeOllamaTransport && ctx.thinkingLevel) {
     // Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
     // should enable Ollama's native thinking mode.
     streamFn = createOllamaThinkingWrapper(streamFn, true);