mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-18 12:04:47 +00:00
fix: omit Ollama think for non-reasoning models
Preserve native Ollama thinking controls for supported models and explicit think=false, but avoid sending truthy think payloads for models marked reasoning=false.\n\nCo-authored-by: 吴杨帆 <85487201+leno23@users.noreply.github.com>
This commit is contained in:
@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/WebSocket: log expected startup `1013 gateway starting` retry closes at debug instead of warn while preserving WARN for unexpected pre-connect failures. Fixes #76361. (#82457) Thanks @IWhatsskill.
|
||||
- Providers/Xiaomi: strip synthetic empty array `items` from MiMo tool schemas while preserving typed array items, avoiding strict OpenAI-compatible schema rejection.
|
||||
- Telegram: send the transcript-backed full final answer after progress-mode tool drafts when the dispatcher final payload is an ellipsis-truncated snapshot. Fixes #82409. Thanks @PashaGanson.
|
||||
- Providers/Ollama: omit truthy native `think` payloads for models marked non-reasoning while preserving supported thinking models and explicit `think: false`. (#82445) Thanks @leno23.
|
||||
- CLI/context engines: bootstrap and finalize non-legacy context engines for CLI turns while preserving transcript snapshots and deferred maintenance ownership. (#81869) Thanks @sahilsatralkar.
|
||||
- Telegram: persist polling updates through restart replay so queued same-topic messages resume in order instead of losing context after a gateway restart. (#82256) Thanks @VACInc.
|
||||
- Gateway/Gmail: abort in-flight Gmail watcher startup and hot-reload restarts before shutdown so reloads cannot spawn `gog serve` after the Gateway is closing. Thanks @frankekn.
|
||||
|
||||
@@ -292,6 +292,113 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("does not forward truthy configured native Ollama thinking for non-reasoning models", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||||
const model = {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "llama3.2:latest",
|
||||
contextWindow: 8192,
|
||||
reasoning: false,
|
||||
params: { thinking: "medium" },
|
||||
};
|
||||
|
||||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||||
provider: "ollama",
|
||||
modelId: "llama3.2:latest",
|
||||
model,
|
||||
streamFn: baseStreamFn,
|
||||
thinkingLevel: "off",
|
||||
} as never);
|
||||
if (!wrapped) {
|
||||
throw new Error("Expected wrapped Ollama stream function");
|
||||
}
|
||||
|
||||
const stream = await Promise.resolve(
|
||||
wrapped(
|
||||
model as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
);
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: string;
|
||||
options?: { think?: string };
|
||||
};
|
||||
expect(requestBody.think).toBeUndefined();
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("does not forward runtime native Ollama thinking for non-reasoning models", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||||
const model = {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "llama3.2:latest",
|
||||
contextWindow: 8192,
|
||||
reasoning: false,
|
||||
};
|
||||
|
||||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||||
provider: "ollama",
|
||||
modelId: "llama3.2:latest",
|
||||
model,
|
||||
streamFn: baseStreamFn,
|
||||
thinkingLevel: "low",
|
||||
} as never);
|
||||
if (!wrapped) {
|
||||
throw new Error("Expected wrapped Ollama stream function");
|
||||
}
|
||||
|
||||
const stream = await Promise.resolve(
|
||||
wrapped(
|
||||
model as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
);
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: string;
|
||||
options?: { think?: string };
|
||||
};
|
||||
expect(requestBody.think).toBeUndefined();
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
|
||||
@@ -278,6 +278,15 @@ function resolveOllamaThinkParamValue(
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function shouldForwardNativeOllamaThink(
|
||||
model: ProviderRuntimeModel | undefined,
|
||||
think: OllamaThinkValue,
|
||||
): boolean {
|
||||
// Ollama accepts top-level `think` as the native chat contract, but rejects
|
||||
// truthy values for models known not to expose thinking support.
|
||||
return think === false || model?.reasoning !== false;
|
||||
}
|
||||
|
||||
function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
|
||||
const raw = model.params?.num_ctx;
|
||||
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
|
||||
@@ -341,7 +350,7 @@ function resolveOllamaTopLevelParams(
|
||||
}
|
||||
}
|
||||
const think = resolveOllamaThinkParamValue(params);
|
||||
if (think !== undefined) {
|
||||
if (think !== undefined && shouldForwardNativeOllamaThink(model, think)) {
|
||||
requestParams.think = think;
|
||||
}
|
||||
return Object.keys(requestParams).length > 0 ? requestParams : undefined;
|
||||
@@ -390,7 +399,7 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
runtimeThinkValue === false && configuredThinkValue !== undefined
|
||||
? undefined
|
||||
: runtimeThinkValue;
|
||||
if (ollamaThinkValue !== undefined) {
|
||||
if (ollamaThinkValue !== undefined && shouldForwardNativeOllamaThink(model, ollamaThinkValue)) {
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user