diff --git a/CHANGELOG.md b/CHANGELOG.md index ef15f14a3e8..0a91a7e44b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Gateway/WebSocket: log expected startup `1013 gateway starting` retry closes at debug instead of warn while preserving WARN for unexpected pre-connect failures. Fixes #76361. (#82457) Thanks @IWhatsskill. - Providers/Xiaomi: strip synthetic empty array `items` from MiMo tool schemas while preserving typed array items, avoiding strict OpenAI-compatible schema rejection. - Telegram: send the transcript-backed full final answer after progress-mode tool drafts when the dispatcher final payload is an ellipsis-truncated snapshot. Fixes #82409. Thanks @PashaGanson. +- Providers/Ollama: omit truthy native `think` payloads for models marked non-reasoning while preserving supported thinking models and explicit `think: false`. (#82445) Thanks @leno23. - CLI/context engines: bootstrap and finalize non-legacy context engines for CLI turns while preserving transcript snapshots and deferred maintenance ownership. (#81869) Thanks @sahilsatralkar. - Telegram: persist polling updates through restart replay so queued same-topic messages resume in order instead of losing context after a gateway restart. (#82256) Thanks @VACInc. - Gateway/Gmail: abort in-flight Gmail watcher startup and hot-reload restarts before shutdown so reloads cannot spawn `gog serve` after the Gateway is closing. Thanks @frankekn. diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts index 920ce386c04..fbb0138efa5 100644 --- a/extensions/ollama/src/stream-runtime.test.ts +++ b/extensions/ollama/src/stream-runtime.test.ts @@ -292,6 +292,113 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { ); }); + it("does not forward truthy configured native Ollama thinking for non-reasoning models", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434"); + const model = { + api: "ollama", + provider: "ollama", + id: "llama3.2:latest", + contextWindow: 8192, + reasoning: false, + params: { thinking: "medium" }, + }; + + const wrapped = createConfiguredOllamaCompatStreamWrapper({ + provider: "ollama", + modelId: "llama3.2:latest", + model, + streamFn: baseStreamFn, + thinkingLevel: "off", + } as never); + if (!wrapped) { + throw new Error("Expected wrapped Ollama stream function"); + } + + const stream = await Promise.resolve( + wrapped( + model as never, + { + messages: [{ role: "user", content: "hello" }], + } as never, + {} as never, + ), + ); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + think?: string; + options?: { think?: string }; + }; + expect(requestBody.think).toBeUndefined(); + expect(requestBody.options?.think).toBeUndefined(); + }, + ); + }); + + it("does not forward runtime native Ollama thinking for non-reasoning models", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434"); + const model = { + api: "ollama", + provider: "ollama", + id: "llama3.2:latest", + contextWindow: 8192, + reasoning: false, + }; + + const wrapped = createConfiguredOllamaCompatStreamWrapper({ + provider: "ollama", + modelId: "llama3.2:latest", + model, + streamFn: baseStreamFn, + thinkingLevel: "low", + } as never); + if (!wrapped) { + throw new Error("Expected wrapped Ollama stream function"); + } + + const stream = await Promise.resolve( + wrapped( + model as never, + { + messages: [{ role: "user", content: "hello" }], + } as never, + {} as never, + ), + ); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + think?: string; + options?: { think?: string }; + }; + expect(requestBody.think).toBeUndefined(); + expect(requestBody.options?.think).toBeUndefined(); + }, + ); + }); + it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => { await withMockNdjsonFetch( [ diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts index d51368fc7d2..ef704215ad1 100644 --- a/extensions/ollama/src/stream.ts +++ b/extensions/ollama/src/stream.ts @@ -278,6 +278,15 @@ function resolveOllamaThinkParamValue( return undefined; } +function shouldForwardNativeOllamaThink( + model: ProviderRuntimeModel | undefined, + think: OllamaThinkValue, +): boolean { + // Ollama accepts top-level `think` as the native chat contract, but rejects + // truthy values for models known not to expose thinking support. + return think === false || model?.reasoning !== false; +} + function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined { const raw = model.params?.num_ctx; if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) { @@ -341,7 +350,7 @@ function resolveOllamaTopLevelParams( } } const think = resolveOllamaThinkParamValue(params); - if (think !== undefined) { + if (think !== undefined && shouldForwardNativeOllamaThink(model, think)) { requestParams.think = think; } return Object.keys(requestParams).length > 0 ? requestParams : undefined; @@ -390,7 +399,7 @@ export function createConfiguredOllamaCompatStreamWrapper( runtimeThinkValue === false && configuredThinkValue !== undefined ? undefined : runtimeThinkValue; - if (ollamaThinkValue !== undefined) { + if (ollamaThinkValue !== undefined && shouldForwardNativeOllamaThink(model, ollamaThinkValue)) { streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue); }