fix: omit Ollama think for non-reasoning models

Preserve native Ollama thinking controls for supported models and explicit think=false, but avoid sending truthy think payloads for models marked reasoning=false.\n\nCo-authored-by: 吴杨帆 <85487201+leno23@users.noreply.github.com>
This commit is contained in:
吴杨帆
2026-05-16 22:10:12 +08:00
committed by GitHub
parent caf8fa2ebf
commit eebdbabae9
3 changed files with 119 additions and 2 deletions

View File

@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
- Gateway/WebSocket: log expected startup `1013 gateway starting` retry closes at debug instead of warn while preserving WARN for unexpected pre-connect failures. Fixes #76361. (#82457) Thanks @IWhatsskill.
- Providers/Xiaomi: strip synthetic empty array `items` from MiMo tool schemas while preserving typed array items, avoiding strict OpenAI-compatible schema rejection.
- Telegram: send the transcript-backed full final answer after progress-mode tool drafts when the dispatcher final payload is an ellipsis-truncated snapshot. Fixes #82409. Thanks @PashaGanson.
- Providers/Ollama: omit truthy native `think` payloads for models marked non-reasoning while preserving supported thinking models and explicit `think: false`. (#82445) Thanks @leno23.
- CLI/context engines: bootstrap and finalize non-legacy context engines for CLI turns while preserving transcript snapshots and deferred maintenance ownership. (#81869) Thanks @sahilsatralkar.
- Telegram: persist polling updates through restart replay so queued same-topic messages resume in order instead of losing context after a gateway restart. (#82256) Thanks @VACInc.
- Gateway/Gmail: abort in-flight Gmail watcher startup and hot-reload restarts before shutdown so reloads cannot spawn `gog serve` after the Gateway is closing. Thanks @frankekn.

View File

@@ -292,6 +292,113 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
);
});
it("does not forward truthy configured native Ollama thinking for non-reasoning models", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "llama3.2:latest",
contextWindow: 8192,
reasoning: false,
params: { thinking: "medium" },
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "llama3.2:latest",
model,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: string;
options?: { think?: string };
};
expect(requestBody.think).toBeUndefined();
expect(requestBody.options?.think).toBeUndefined();
},
);
});
it("does not forward runtime native Ollama thinking for non-reasoning models", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "llama3.2:latest",
contextWindow: 8192,
reasoning: false,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "llama3.2:latest",
model,
streamFn: baseStreamFn,
thinkingLevel: "low",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: string;
options?: { think?: string };
};
expect(requestBody.think).toBeUndefined();
expect(requestBody.options?.think).toBeUndefined();
},
);
});
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
await withMockNdjsonFetch(
[

View File

@@ -278,6 +278,15 @@ function resolveOllamaThinkParamValue(
return undefined;
}
function shouldForwardNativeOllamaThink(
model: ProviderRuntimeModel | undefined,
think: OllamaThinkValue,
): boolean {
// Ollama accepts top-level `think` as the native chat contract, but rejects
// truthy values for models known not to expose thinking support.
return think === false || model?.reasoning !== false;
}
function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
const raw = model.params?.num_ctx;
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
@@ -341,7 +350,7 @@ function resolveOllamaTopLevelParams(
}
}
const think = resolveOllamaThinkParamValue(params);
if (think !== undefined) {
if (think !== undefined && shouldForwardNativeOllamaThink(model, think)) {
requestParams.think = think;
}
return Object.keys(requestParams).length > 0 ? requestParams : undefined;
@@ -390,7 +399,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
runtimeThinkValue === false && configuredThinkValue !== undefined
? undefined
: runtimeThinkValue;
if (ollamaThinkValue !== undefined) {
if (ollamaThinkValue !== undefined && shouldForwardNativeOllamaThink(model, ollamaThinkValue)) {
streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
}