From d4f91a354ec1abd4fad902ed53dd2ebe956ee361 Mon Sep 17 00:00:00 2001 From: Zihao WAN <80884605+WZH8898@users.noreply.github.com> Date: Wed, 22 Apr 2026 06:49:16 +0200 Subject: [PATCH] fix(ollama): forward think:false for qwen3 chat requests (#69967) Forward top-level Ollama think flags on native /api/chat requests so --thinking off sends think:false.\n\nThanks @WZH8898. --- CHANGELOG.md | 1 + docs/providers/ollama.md | 2 + extensions/ollama/src/stream-runtime.test.ts | 106 +++++++++++++++++++ extensions/ollama/src/stream.ts | 13 +-- 4 files changed, 114 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ccfb4b1af3..43f82f49da7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Ollama: forward OpenClaw thinking control to native `/api/chat` requests as top-level `think`, so `/think off` and `openclaw agent --thinking off` suppress thinking on models such as qwen3 instead of idling until the watchdog fires. Fixes #69902. (#69967) Thanks @WZH8898. - Memory-core/dreaming: suppress the startup-only managed dreaming cron unavailable warning when the cron service is still attaching, while preserving the runtime warning if cron genuinely remains unavailable. Fixes #69939. (#69941) Thanks @Sanjays2402. - Mattermost: suppress reasoning-only payloads even when they arrive as blockquoted `> Reasoning:` text, preventing `/reasoning on` from leaking thinking into channel posts. (#69927) Thanks @lawrence3699. - Discord: read `channel.parentId` through a safe accessor in the slash-command, reaction, and model-picker paths so partial `GuildThreadChannel` prototype getters no longer throw `Cannot access rawData on partial Channel` when commands like `/new` run from inside a thread. Fixes #69861. (#69908) Thanks @neeravmakwana. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 84d4fb70170..ab3f6d35b57 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -463,6 +463,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s OpenClaw's Ollama integration uses the **native Ollama API** (`/api/chat`) by default, which fully supports streaming and tool calling simultaneously. No special configuration is needed. + For native `/api/chat` requests, OpenClaw also forwards thinking control directly to Ollama: `/think off` and `openclaw agent --thinking off` send top-level `think: false`, while non-`off` thinking levels send `think: true`. + If you need to use the OpenAI-compatible endpoint, see the "Legacy OpenAI-compatible mode" section above. Streaming and tool calling may not work simultaneously in that mode. diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts index 298a45255fb..8597d6b537c 100644 --- a/extensions/ollama/src/stream-runtime.test.ts +++ b/extensions/ollama/src/stream-runtime.test.ts @@ -96,6 +96,112 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { options: { num_ctx: 262144 }, }); }); + + it("forwards think=false on native Ollama chat requests when thinking is off", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434"); + const model = { + api: "ollama", + provider: "ollama", + id: "qwen3:32b", + contextWindow: 131072, + }; + + const wrapped = createConfiguredOllamaCompatStreamWrapper({ + provider: "ollama", + modelId: "qwen3:32b", + model, + streamFn: baseStreamFn, + thinkingLevel: "off", + } as never); + if (!wrapped) { + throw new Error("Expected wrapped Ollama stream function"); + } + + const stream = await Promise.resolve( + wrapped( + model as never, + { + messages: [{ role: "user", content: "hello" }], + } as never, + {} as never, + ), + ); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + think?: boolean; + options?: { think?: boolean; num_ctx?: number }; + }; + expect(requestBody.think).toBe(false); + expect(requestBody.options?.think).toBeUndefined(); + expect(requestBody.options?.num_ctx).toBe(131072); + }, + ); + }); + + it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434"); + const model = { + api: "ollama", + provider: "ollama", + id: "qwen3:32b", + contextWindow: 131072, + }; + + const wrapped = createConfiguredOllamaCompatStreamWrapper({ + provider: "ollama", + modelId: "qwen3:32b", + model, + streamFn: baseStreamFn, + thinkingLevel: "low", + } as never); + if (!wrapped) { + throw new Error("Expected wrapped Ollama stream function"); + } + + const stream = await Promise.resolve( + wrapped( + model as never, + { + messages: [{ role: "user", content: "hello" }], + } as never, + {} as never, + ), + ); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + think?: boolean; + options?: { think?: boolean; num_ctx?: number }; + }; + expect(requestBody.think).toBe(true); + expect(requestBody.options?.think).toBeUndefined(); + expect(requestBody.options?.num_ctx).toBe(131072); + }, + ); + }); }); describe("convertToOllamaMessages", () => { diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts index aea126f6b53..62aa4d86597 100644 --- a/extensions/ollama/src/stream.ts +++ b/extensions/ollama/src/stream.ts @@ -153,14 +153,10 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn { const streamFn = baseFn ?? streamSimple; - return (model, context, options) => { - if (model.api !== "ollama") { - return streamFn(model, context, options); - } - return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => { + return (model, context, options) => + streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => { payloadRecord.think = think; }); - }; } function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number { @@ -178,6 +174,7 @@ export function createConfiguredOllamaCompatStreamWrapper( let streamFn = ctx.streamFn; const model = ctx.model; let injectNumCtx = false; + const isNativeOllamaTransport = model?.api === "ollama"; if (model) { const providerId = @@ -199,9 +196,9 @@ export function createConfiguredOllamaCompatStreamWrapper( streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model)); } - if (ctx.thinkingLevel === "off") { + if (isNativeOllamaTransport && ctx.thinkingLevel === "off") { streamFn = createOllamaThinkingWrapper(streamFn, false); - } else if (ctx.thinkingLevel) { + } else if (isNativeOllamaTransport && ctx.thinkingLevel) { // Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max) // should enable Ollama's native thinking mode. streamFn = createOllamaThinkingWrapper(streamFn, true);