fix(ollama): forward think:false for qwen3 chat requests (#69967)

Forward top-level Ollama think flags on native /api/chat requests so --thinking off sends think:false.\n\nThanks @WZH8898.
This commit is contained in:
Zihao WAN
2026-04-22 06:49:16 +02:00
committed by GitHub
parent 276c00015c
commit d4f91a354e
4 changed files with 114 additions and 8 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Ollama: forward OpenClaw thinking control to native `/api/chat` requests as top-level `think`, so `/think off` and `openclaw agent --thinking off` suppress thinking on models such as qwen3 instead of idling until the watchdog fires. Fixes #69902. (#69967) Thanks @WZH8898.
- Memory-core/dreaming: suppress the startup-only managed dreaming cron unavailable warning when the cron service is still attaching, while preserving the runtime warning if cron genuinely remains unavailable. Fixes #69939. (#69941) Thanks @Sanjays2402.
- Mattermost: suppress reasoning-only payloads even when they arrive as blockquoted `> Reasoning:` text, preventing `/reasoning on` from leaking thinking into channel posts. (#69927) Thanks @lawrence3699.
- Discord: read `channel.parentId` through a safe accessor in the slash-command, reaction, and model-picker paths so partial `GuildThreadChannel` prototype getters no longer throw `Cannot access rawData on partial Channel` when commands like `/new` run from inside a thread. Fixes #69861. (#69908) Thanks @neeravmakwana.

View File

@@ -463,6 +463,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
<Accordion title="Streaming configuration">
OpenClaw's Ollama integration uses the **native Ollama API** (`/api/chat`) by default, which fully supports streaming and tool calling simultaneously. No special configuration is needed.
For native `/api/chat` requests, OpenClaw also forwards thinking control directly to Ollama: `/think off` and `openclaw agent --thinking off` send top-level `think: false`, while non-`off` thinking levels send `think: true`.
<Tip>
If you need to use the OpenAI-compatible endpoint, see the "Legacy OpenAI-compatible mode" section above. Streaming and tool calling may not work simultaneously in that mode.
</Tip>

View File

@@ -96,6 +96,112 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
options: { num_ctx: 262144 },
});
});
it("forwards think=false on native Ollama chat requests when thinking is off", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean;
options?: { think?: boolean; num_ctx?: number };
};
expect(requestBody.think).toBe(false);
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
thinkingLevel: "low",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean;
options?: { think?: boolean; num_ctx?: number };
};
expect(requestBody.think).toBe(true);
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
});
describe("convertToOllamaMessages", () => {

View File

@@ -153,14 +153,10 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) => {
if (model.api !== "ollama") {
return streamFn(model, context, options);
}
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
return (model, context, options) =>
streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
payloadRecord.think = think;
});
};
}
function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
@@ -178,6 +174,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
let streamFn = ctx.streamFn;
const model = ctx.model;
let injectNumCtx = false;
const isNativeOllamaTransport = model?.api === "ollama";
if (model) {
const providerId =
@@ -199,9 +196,9 @@ export function createConfiguredOllamaCompatStreamWrapper(
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
}
if (ctx.thinkingLevel === "off") {
if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
streamFn = createOllamaThinkingWrapper(streamFn, false);
} else if (ctx.thinkingLevel) {
} else if (isNativeOllamaTransport && ctx.thinkingLevel) {
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
// should enable Ollama's native thinking mode.
streamFn = createOllamaThinkingWrapper(streamFn, true);