fix: enable thinking support for the ollama api (#62712)

Merged via squash.

Prepared head SHA: c0b995035e
Co-authored-by: hoyyeva <63033505+hoyyeva@users.noreply.github.com>
Co-authored-by: BruceMacD <5853428+BruceMacD@users.noreply.github.com>
Reviewed-by: @BruceMacD
This commit is contained in:
Eva H
2026-04-08 13:26:18 -07:00
committed by GitHub
parent 37fb1eb9ad
commit d7bf97adb3
4 changed files with 457 additions and 16 deletions

View File

@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
- Providers/Ollama: allow Ollama models using the native `api: "ollama"` path to optionally display thinking output when `/think` is set to a non-off level. (#62712) Thanks @hoyyeva.
## 2026.4.8

View File

@@ -445,4 +445,111 @@ describe("ollama plugin", () => {
expect(payloadSeen?.think).toBe(false);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
const provider = registerProvider();
let payloadSeen: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = {
messages: [],
options: { num_ctx: 65536 },
stream: true,
};
options?.onPayload?.(payload, _model);
payloadSeen = payload;
return {} as never;
});
const wrapped = provider.wrapStreamFn?.({
config: {
models: {
providers: {
ollama: {
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
models: [],
},
},
},
},
provider: "ollama",
modelId: "qwen3.5:9b",
thinkingLevel: "low",
model: {
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
baseUrl: "http://127.0.0.1:11434",
contextWindow: 131_072,
},
streamFn: baseStreamFn,
});
expect(typeof wrapped).toBe("function");
void wrapped?.(
{
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
} as never,
{} as never,
{},
);
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBe(true);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("does not set think param when thinkingLevel is undefined", () => {
const provider = registerProvider();
let payloadSeen: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = {
messages: [],
options: { num_ctx: 65536 },
stream: true,
};
options?.onPayload?.(payload, _model);
payloadSeen = payload;
return {} as never;
});
const wrapped = provider.wrapStreamFn?.({
config: {
models: {
providers: {
ollama: {
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
models: [],
},
},
},
},
provider: "ollama",
modelId: "qwen3.5:9b",
thinkingLevel: undefined,
model: {
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
baseUrl: "http://127.0.0.1:11434",
contextWindow: 131_072,
},
streamFn: baseStreamFn,
});
expect(typeof wrapped).toBe("function");
void wrapped?.(
{
api: "ollama",
provider: "ollama",
id: "qwen3.5:9b",
} as never,
{} as never,
{},
);
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBeUndefined();
});
});

View File

@@ -0,0 +1,228 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { buildAssistantMessage, createOllamaStreamFn } from "./stream.js";
function makeOllamaResponse(params: {
content?: string;
thinking?: string;
reasoning?: string;
tool_calls?: Array<{ function: { name: string; arguments: Record<string, unknown> } }>;
}) {
return {
model: "qwen3.5",
created_at: new Date().toISOString(),
message: {
role: "assistant" as const,
content: params.content ?? "",
...(params.thinking != null ? { thinking: params.thinking } : {}),
...(params.reasoning != null ? { reasoning: params.reasoning } : {}),
...(params.tool_calls ? { tool_calls: params.tool_calls } : {}),
},
done: true,
prompt_eval_count: 100,
eval_count: 50,
};
}
const MODEL_INFO = { api: "ollama", provider: "ollama", id: "qwen3.5" };
describe("buildAssistantMessage", () => {
it("includes thinking block when response has thinking field", () => {
const response = makeOllamaResponse({
thinking: "Let me think about this",
content: "The answer is 42",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(2);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Let me think about this" });
expect(msg.content[1]).toEqual({ type: "text", text: "The answer is 42" });
});
it("includes thinking block when response has reasoning field", () => {
const response = makeOllamaResponse({
reasoning: "Step by step analysis",
content: "Result is 7",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(2);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Step by step analysis" });
expect(msg.content[1]).toEqual({ type: "text", text: "Result is 7" });
});
it("prefers thinking over reasoning when both are present", () => {
const response = makeOllamaResponse({
thinking: "From thinking field",
reasoning: "From reasoning field",
content: "Answer",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "From thinking field" });
});
it("omits thinking block when no thinking or reasoning field", () => {
const response = makeOllamaResponse({
content: "Just text",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(1);
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
});
it("omits thinking block when thinking field is empty", () => {
const response = makeOllamaResponse({
thinking: "",
content: "Just text",
});
const msg = buildAssistantMessage(response, MODEL_INFO);
expect(msg.content).toHaveLength(1);
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
});
});
describe("createOllamaStreamFn thinking events", () => {
afterEach(() => vi.unstubAllGlobals());
function makeNdjsonBody(chunks: Array<Record<string, unknown>>): ReadableStream<Uint8Array> {
const encoder = new TextEncoder();
const lines = chunks.map((c) => JSON.stringify(c) + "\n").join("");
return new ReadableStream({
start(controller) {
controller.enqueue(encoder.encode(lines));
controller.close();
},
});
}
it("emits thinking_start, thinking_delta, and thinking_end events for thinking content", async () => {
const thinkingChunks = [
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant", content: "", thinking: "Step 1" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:01Z",
message: { role: "assistant", content: "", thinking: " and step 2" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:02Z",
message: { role: "assistant", content: "The answer", thinking: "" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:03Z",
message: { role: "assistant", content: "" },
done: true,
done_reason: "stop",
prompt_eval_count: 10,
eval_count: 5,
},
];
const body = makeNdjsonBody(thinkingChunks);
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
body,
});
vi.stubGlobal("fetch", fetchMock);
const streamFn = createOllamaStreamFn("http://localhost:11434");
const stream = streamFn(
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
{ messages: [{ role: "user", content: "test" }] } as never,
{},
);
const events: Array<{ type: string; [key: string]: unknown }> = [];
for await (const event of stream as AsyncIterable<{ type: string; [key: string]: unknown }>) {
events.push(event);
}
const eventTypes = events.map((e) => e.type);
expect(eventTypes).toContain("thinking_start");
expect(eventTypes).toContain("thinking_delta");
expect(eventTypes).toContain("thinking_end");
expect(eventTypes).toContain("text_start");
expect(eventTypes).toContain("text_delta");
expect(eventTypes).toContain("done");
// thinking_start comes before text_start
const thinkingStartIndex = eventTypes.indexOf("thinking_start");
const textStartIndex = eventTypes.indexOf("text_start");
expect(thinkingStartIndex).toBeLessThan(textStartIndex);
// thinking_end comes before text_start
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
expect(thinkingEndIndex).toBeLessThan(textStartIndex);
// Thinking deltas have correct content
const thinkingDeltas = events.filter((e) => e.type === "thinking_delta");
expect(thinkingDeltas).toHaveLength(2);
expect(thinkingDeltas[0].delta).toBe("Step 1");
expect(thinkingDeltas[1].delta).toBe(" and step 2");
// Content index: thinking at 0, text at 1
const thinkingStart = events.find((e) => e.type === "thinking_start");
expect(thinkingStart?.contentIndex).toBe(0);
const textStart = events.find((e) => e.type === "text_start");
expect(textStart?.contentIndex).toBe(1);
// Final message has thinking block
const done = events.find((e) => e.type === "done") as { message?: { content: unknown[] } };
const content = done?.message?.content ?? [];
expect(content[0]).toMatchObject({ type: "thinking", thinking: "Step 1 and step 2" });
expect(content[1]).toMatchObject({ type: "text", text: "The answer" });
});
it("streams without thinking events when no thinking content is present", async () => {
const chunks = [
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant", content: "Hello" },
done: false,
},
{
model: "qwen3.5",
created_at: "2026-01-01T00:00:01Z",
message: { role: "assistant", content: "" },
done: true,
done_reason: "stop",
prompt_eval_count: 10,
eval_count: 5,
},
];
const body = makeNdjsonBody(chunks);
vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: true, body }));
const streamFn = createOllamaStreamFn("http://localhost:11434");
const stream = streamFn(
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
{ messages: [{ role: "user", content: "test" }] } as never,
{},
);
const events: Array<{ type: string }> = [];
for await (const event of stream as AsyncIterable<{ type: string }>) {
events.push(event);
}
const eventTypes = events.map((e) => e.type);
expect(eventTypes).not.toContain("thinking_start");
expect(eventTypes).not.toContain("thinking_delta");
expect(eventTypes).not.toContain("thinking_end");
expect(eventTypes).toContain("text_start");
expect(eventTypes).toContain("text_delta");
expect(eventTypes).toContain("done");
// Text content index should be 0 (no thinking block)
const textStart = events.find((e) => e.type === "text_start") as { contentIndex?: number };
expect(textStart?.contentIndex).toBe(0);
});
});

View File

@@ -4,6 +4,7 @@ import type {
AssistantMessage,
StopReason,
TextContent,
ThinkingContent,
ToolCall,
Tool,
Usage,
@@ -148,14 +149,14 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
});
}
function createOllamaThinkingOffWrapper(baseFn: StreamFn | undefined): StreamFn {
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) => {
if (model.api !== "ollama") {
return streamFn(model, context, options);
}
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
payloadRecord.think = false;
payloadRecord.think = think;
});
};
}
@@ -197,7 +198,11 @@ export function createConfiguredOllamaCompatStreamWrapper(
}
if (ctx.thinkingLevel === "off") {
streamFn = createOllamaThinkingOffWrapper(streamFn);
streamFn = createOllamaThinkingWrapper(streamFn, false);
} else if (ctx.thinkingLevel) {
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive)
// should enable Ollama's native thinking mode.
streamFn = createOllamaThinkingWrapper(streamFn, true);
}
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
@@ -511,7 +516,11 @@ export function buildAssistantMessage(
response: OllamaChatResponse,
modelInfo: StreamModelDescriptor,
): AssistantMessage {
const content: (TextContent | ToolCall)[] = [];
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
if (thinking) {
content.push({ type: "thinking", thinking });
}
const text = response.message.content || "";
if (text) {
content.push({ type: "text", text });
@@ -654,39 +663,78 @@ export function createOllamaStreamFn(
const reader = response.body.getReader();
let accumulatedContent = "";
let accumulatedThinking = "";
const accumulatedToolCalls: OllamaToolCall[] = [];
let finalResponse: OllamaChatResponse | undefined;
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
let streamStarted = false;
let thinkingStarted = false;
let thinkingEnded = false;
let textBlockStarted = false;
let textBlockClosed = false;
// Content index tracking: thinking block (if present) is index 0,
// text block follows at index 1 (or 0 when no thinking).
const textContentIndex = () => (thinkingStarted ? 1 : 0);
const buildCurrentContent = (): (TextContent | ThinkingContent | ToolCall)[] => {
const parts: (TextContent | ThinkingContent | ToolCall)[] = [];
if (accumulatedThinking) {
parts.push({
type: "thinking",
thinking: accumulatedThinking,
});
}
if (accumulatedContent) {
parts.push({ type: "text", text: accumulatedContent });
}
return parts;
};
const closeThinkingBlock = () => {
if (!thinkingStarted || thinkingEnded) {
return;
}
thinkingEnded = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "thinking_end",
contentIndex: 0,
content: accumulatedThinking,
partial,
});
};
const closeTextBlock = () => {
if (!streamStarted || textBlockClosed) {
if (!textBlockStarted || textBlockClosed) {
return;
}
textBlockClosed = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: [{ type: "text", text: accumulatedContent }],
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "text_end",
contentIndex: 0,
contentIndex: textContentIndex(),
content: accumulatedContent,
partial,
});
};
for await (const chunk of parseNdjsonStream(reader)) {
if (chunk.message?.content) {
const delta = chunk.message.content;
// Handle thinking/reasoning deltas from Ollama's native think mode.
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
if (thinkingDelta) {
if (!streamStarted) {
streamStarted = true;
// Emit start/text_start with an empty partial before accumulating
// the first delta, matching the Anthropic/OpenAI provider contract.
const emptyPartial = buildStreamAssistantMessage({
model: modelInfo,
content: [],
@@ -694,19 +742,72 @@ export function createOllamaStreamFn(
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "start", partial: emptyPartial });
stream.push({ type: "text_start", contentIndex: 0, partial: emptyPartial });
}
if (!thinkingStarted) {
thinkingStarted = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "thinking_start", contentIndex: 0, partial });
}
accumulatedThinking += thinkingDelta;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({
type: "thinking_delta",
contentIndex: 0,
delta: thinkingDelta,
partial,
});
}
if (chunk.message?.content) {
const delta = chunk.message.content;
// Transition from thinking to text: close the thinking block first.
if (thinkingStarted && !thinkingEnded) {
closeThinkingBlock();
}
if (!streamStarted) {
streamStarted = true;
const emptyPartial = buildStreamAssistantMessage({
model: modelInfo,
content: [],
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "start", partial: emptyPartial });
}
if (!textBlockStarted) {
textBlockStarted = true;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "text_start", contentIndex: textContentIndex(), partial });
}
accumulatedContent += delta;
const partial = buildStreamAssistantMessage({
model: modelInfo,
content: [{ type: "text", text: accumulatedContent }],
content: buildCurrentContent(),
stopReason: "stop",
usage: buildUsageWithNoCost({}),
});
stream.push({ type: "text_delta", contentIndex: 0, delta, partial });
stream.push({ type: "text_delta", contentIndex: textContentIndex(), delta, partial });
}
if (chunk.message?.tool_calls) {
closeThinkingBlock();
closeTextBlock();
accumulatedToolCalls.push(...chunk.message.tool_calls);
}
@@ -721,13 +822,17 @@ export function createOllamaStreamFn(
}
finalResponse.message.content = accumulatedContent;
if (accumulatedThinking) {
finalResponse.message.thinking = accumulatedThinking;
}
if (accumulatedToolCalls.length > 0) {
finalResponse.message.tool_calls = accumulatedToolCalls;
}
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo);
// Close the text block if we emitted any text_delta events.
// Close any open blocks before emitting the done event.
closeThinkingBlock();
closeTextBlock();
stream.push({