mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:40:44 +00:00
fix(ollama): forward think:false for qwen3 chat requests (#69967)
Forward top-level Ollama think flags on native /api/chat requests so --thinking off sends think:false.\n\nThanks @WZH8898.
This commit is contained in:
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Ollama: forward OpenClaw thinking control to native `/api/chat` requests as top-level `think`, so `/think off` and `openclaw agent --thinking off` suppress thinking on models such as qwen3 instead of idling until the watchdog fires. Fixes #69902. (#69967) Thanks @WZH8898.
|
||||
- Memory-core/dreaming: suppress the startup-only managed dreaming cron unavailable warning when the cron service is still attaching, while preserving the runtime warning if cron genuinely remains unavailable. Fixes #69939. (#69941) Thanks @Sanjays2402.
|
||||
- Mattermost: suppress reasoning-only payloads even when they arrive as blockquoted `> Reasoning:` text, preventing `/reasoning on` from leaking thinking into channel posts. (#69927) Thanks @lawrence3699.
|
||||
- Discord: read `channel.parentId` through a safe accessor in the slash-command, reaction, and model-picker paths so partial `GuildThreadChannel` prototype getters no longer throw `Cannot access rawData on partial Channel` when commands like `/new` run from inside a thread. Fixes #69861. (#69908) Thanks @neeravmakwana.
|
||||
|
||||
@@ -463,6 +463,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
<Accordion title="Streaming configuration">
|
||||
OpenClaw's Ollama integration uses the **native Ollama API** (`/api/chat`) by default, which fully supports streaming and tool calling simultaneously. No special configuration is needed.
|
||||
|
||||
For native `/api/chat` requests, OpenClaw also forwards thinking control directly to Ollama: `/think off` and `openclaw agent --thinking off` send top-level `think: false`, while non-`off` thinking levels send `think: true`.
|
||||
|
||||
<Tip>
|
||||
If you need to use the OpenAI-compatible endpoint, see the "Legacy OpenAI-compatible mode" section above. Streaming and tool calling may not work simultaneously in that mode.
|
||||
</Tip>
|
||||
|
||||
@@ -96,6 +96,112 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
options: { num_ctx: 262144 },
|
||||
});
|
||||
});
|
||||
|
||||
it("forwards think=false on native Ollama chat requests when thinking is off", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||||
const model = {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3:32b",
|
||||
contextWindow: 131072,
|
||||
};
|
||||
|
||||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||||
provider: "ollama",
|
||||
modelId: "qwen3:32b",
|
||||
model,
|
||||
streamFn: baseStreamFn,
|
||||
thinkingLevel: "off",
|
||||
} as never);
|
||||
if (!wrapped) {
|
||||
throw new Error("Expected wrapped Ollama stream function");
|
||||
}
|
||||
|
||||
const stream = await Promise.resolve(
|
||||
wrapped(
|
||||
model as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
);
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: boolean;
|
||||
options?: { think?: boolean; num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.think).toBe(false);
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||||
const model = {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3:32b",
|
||||
contextWindow: 131072,
|
||||
};
|
||||
|
||||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||||
provider: "ollama",
|
||||
modelId: "qwen3:32b",
|
||||
model,
|
||||
streamFn: baseStreamFn,
|
||||
thinkingLevel: "low",
|
||||
} as never);
|
||||
if (!wrapped) {
|
||||
throw new Error("Expected wrapped Ollama stream function");
|
||||
}
|
||||
|
||||
const stream = await Promise.resolve(
|
||||
wrapped(
|
||||
model as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
);
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: boolean;
|
||||
options?: { think?: boolean; num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.think).toBe(true);
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("convertToOllamaMessages", () => {
|
||||
|
||||
@@ -153,14 +153,10 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
|
||||
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
|
||||
const streamFn = baseFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
if (model.api !== "ollama") {
|
||||
return streamFn(model, context, options);
|
||||
}
|
||||
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
|
||||
return (model, context, options) =>
|
||||
streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
|
||||
payloadRecord.think = think;
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
|
||||
@@ -178,6 +174,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
let streamFn = ctx.streamFn;
|
||||
const model = ctx.model;
|
||||
let injectNumCtx = false;
|
||||
const isNativeOllamaTransport = model?.api === "ollama";
|
||||
|
||||
if (model) {
|
||||
const providerId =
|
||||
@@ -199,9 +196,9 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
|
||||
}
|
||||
|
||||
if (ctx.thinkingLevel === "off") {
|
||||
if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, false);
|
||||
} else if (ctx.thinkingLevel) {
|
||||
} else if (isNativeOllamaTransport && ctx.thinkingLevel) {
|
||||
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
|
||||
// should enable Ollama's native thinking mode.
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, true);
|
||||
|
||||
Reference in New Issue
Block a user