mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-14 18:51:04 +00:00
fix: enable thinking support for the ollama api (#62712)
Merged via squash.
Prepared head SHA: c0b995035e
Co-authored-by: hoyyeva <63033505+hoyyeva@users.noreply.github.com>
Co-authored-by: BruceMacD <5853428+BruceMacD@users.noreply.github.com>
Reviewed-by: @BruceMacD
This commit is contained in:
@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Codex CLI: pass OpenClaw's system prompt through Codex's `model_instructions_file` config override so fresh Codex CLI sessions receive the same prompt guidance as Claude CLI sessions.
|
||||
- Matrix/gateway: wait for Matrix sync readiness before marking startup successful, keep Matrix background handler failures contained, and route fatal Matrix sync stops through channel-level restart handling instead of crashing the whole gateway. (#62779) Thanks @gumadeiras.
|
||||
- Browser/security: re-run blocked-destination safety checks after interaction-driven main-frame navigations from click, evaluate, hook-triggered click, and batched action flows, so browser interactions cannot bypass the SSRF quarantine when they land on forbidden URLs. (#63226) Thanks @eleqtrizit.
|
||||
- Providers/Ollama: allow Ollama models using the native `api: "ollama"` path to optionally display thinking output when `/think` is set to a non-off level. (#62712) Thanks @hoyyeva.
|
||||
|
||||
## 2026.4.8
|
||||
|
||||
|
||||
@@ -445,4 +445,111 @@ describe("ollama plugin", () => {
|
||||
expect(payloadSeen?.think).toBe(false);
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
|
||||
const provider = registerProvider();
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {
|
||||
messages: [],
|
||||
options: { num_ctx: 65536 },
|
||||
stream: true,
|
||||
};
|
||||
options?.onPayload?.(payload, _model);
|
||||
payloadSeen = payload;
|
||||
return {} as never;
|
||||
});
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
api: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
thinkingLevel: "low",
|
||||
model: {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
contextWindow: 131_072,
|
||||
},
|
||||
streamFn: baseStreamFn,
|
||||
});
|
||||
|
||||
expect(typeof wrapped).toBe("function");
|
||||
void wrapped?.(
|
||||
{
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
} as never,
|
||||
{} as never,
|
||||
{},
|
||||
);
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBe(true);
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not set think param when thinkingLevel is undefined", () => {
|
||||
const provider = registerProvider();
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {
|
||||
messages: [],
|
||||
options: { num_ctx: 65536 },
|
||||
stream: true,
|
||||
};
|
||||
options?.onPayload?.(payload, _model);
|
||||
payloadSeen = payload;
|
||||
return {} as never;
|
||||
});
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
api: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
provider: "ollama",
|
||||
modelId: "qwen3.5:9b",
|
||||
thinkingLevel: undefined,
|
||||
model: {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
contextWindow: 131_072,
|
||||
},
|
||||
streamFn: baseStreamFn,
|
||||
});
|
||||
|
||||
expect(typeof wrapped).toBe("function");
|
||||
void wrapped?.(
|
||||
{
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "qwen3.5:9b",
|
||||
} as never,
|
||||
{} as never,
|
||||
{},
|
||||
);
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
228
extensions/ollama/src/stream.test.ts
Normal file
228
extensions/ollama/src/stream.test.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildAssistantMessage, createOllamaStreamFn } from "./stream.js";
|
||||
|
||||
function makeOllamaResponse(params: {
|
||||
content?: string;
|
||||
thinking?: string;
|
||||
reasoning?: string;
|
||||
tool_calls?: Array<{ function: { name: string; arguments: Record<string, unknown> } }>;
|
||||
}) {
|
||||
return {
|
||||
model: "qwen3.5",
|
||||
created_at: new Date().toISOString(),
|
||||
message: {
|
||||
role: "assistant" as const,
|
||||
content: params.content ?? "",
|
||||
...(params.thinking != null ? { thinking: params.thinking } : {}),
|
||||
...(params.reasoning != null ? { reasoning: params.reasoning } : {}),
|
||||
...(params.tool_calls ? { tool_calls: params.tool_calls } : {}),
|
||||
},
|
||||
done: true,
|
||||
prompt_eval_count: 100,
|
||||
eval_count: 50,
|
||||
};
|
||||
}
|
||||
|
||||
const MODEL_INFO = { api: "ollama", provider: "ollama", id: "qwen3.5" };
|
||||
|
||||
describe("buildAssistantMessage", () => {
|
||||
it("includes thinking block when response has thinking field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "Let me think about this",
|
||||
content: "The answer is 42",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(2);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Let me think about this" });
|
||||
expect(msg.content[1]).toEqual({ type: "text", text: "The answer is 42" });
|
||||
});
|
||||
|
||||
it("includes thinking block when response has reasoning field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
reasoning: "Step by step analysis",
|
||||
content: "Result is 7",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(2);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "Step by step analysis" });
|
||||
expect(msg.content[1]).toEqual({ type: "text", text: "Result is 7" });
|
||||
});
|
||||
|
||||
it("prefers thinking over reasoning when both are present", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "From thinking field",
|
||||
reasoning: "From reasoning field",
|
||||
content: "Answer",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content[0]).toEqual({ type: "thinking", thinking: "From thinking field" });
|
||||
});
|
||||
|
||||
it("omits thinking block when no thinking or reasoning field", () => {
|
||||
const response = makeOllamaResponse({
|
||||
content: "Just text",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(1);
|
||||
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
|
||||
});
|
||||
|
||||
it("omits thinking block when thinking field is empty", () => {
|
||||
const response = makeOllamaResponse({
|
||||
thinking: "",
|
||||
content: "Just text",
|
||||
});
|
||||
const msg = buildAssistantMessage(response, MODEL_INFO);
|
||||
expect(msg.content).toHaveLength(1);
|
||||
expect(msg.content[0]).toEqual({ type: "text", text: "Just text" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("createOllamaStreamFn thinking events", () => {
|
||||
afterEach(() => vi.unstubAllGlobals());
|
||||
|
||||
function makeNdjsonBody(chunks: Array<Record<string, unknown>>): ReadableStream<Uint8Array> {
|
||||
const encoder = new TextEncoder();
|
||||
const lines = chunks.map((c) => JSON.stringify(c) + "\n").join("");
|
||||
return new ReadableStream({
|
||||
start(controller) {
|
||||
controller.enqueue(encoder.encode(lines));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
it("emits thinking_start, thinking_delta, and thinking_end events for thinking content", async () => {
|
||||
const thinkingChunks = [
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:00Z",
|
||||
message: { role: "assistant", content: "", thinking: "Step 1" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:01Z",
|
||||
message: { role: "assistant", content: "", thinking: " and step 2" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:02Z",
|
||||
message: { role: "assistant", content: "The answer", thinking: "" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:03Z",
|
||||
message: { role: "assistant", content: "" },
|
||||
done: true,
|
||||
done_reason: "stop",
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
},
|
||||
];
|
||||
|
||||
const body = makeNdjsonBody(thinkingChunks);
|
||||
const fetchMock = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
body,
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const streamFn = createOllamaStreamFn("http://localhost:11434");
|
||||
const stream = streamFn(
|
||||
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
|
||||
{ messages: [{ role: "user", content: "test" }] } as never,
|
||||
{},
|
||||
);
|
||||
|
||||
const events: Array<{ type: string; [key: string]: unknown }> = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string; [key: string]: unknown }>) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const eventTypes = events.map((e) => e.type);
|
||||
|
||||
expect(eventTypes).toContain("thinking_start");
|
||||
expect(eventTypes).toContain("thinking_delta");
|
||||
expect(eventTypes).toContain("thinking_end");
|
||||
expect(eventTypes).toContain("text_start");
|
||||
expect(eventTypes).toContain("text_delta");
|
||||
expect(eventTypes).toContain("done");
|
||||
|
||||
// thinking_start comes before text_start
|
||||
const thinkingStartIndex = eventTypes.indexOf("thinking_start");
|
||||
const textStartIndex = eventTypes.indexOf("text_start");
|
||||
expect(thinkingStartIndex).toBeLessThan(textStartIndex);
|
||||
|
||||
// thinking_end comes before text_start
|
||||
const thinkingEndIndex = eventTypes.indexOf("thinking_end");
|
||||
expect(thinkingEndIndex).toBeLessThan(textStartIndex);
|
||||
|
||||
// Thinking deltas have correct content
|
||||
const thinkingDeltas = events.filter((e) => e.type === "thinking_delta");
|
||||
expect(thinkingDeltas).toHaveLength(2);
|
||||
expect(thinkingDeltas[0].delta).toBe("Step 1");
|
||||
expect(thinkingDeltas[1].delta).toBe(" and step 2");
|
||||
|
||||
// Content index: thinking at 0, text at 1
|
||||
const thinkingStart = events.find((e) => e.type === "thinking_start");
|
||||
expect(thinkingStart?.contentIndex).toBe(0);
|
||||
const textStart = events.find((e) => e.type === "text_start");
|
||||
expect(textStart?.contentIndex).toBe(1);
|
||||
|
||||
// Final message has thinking block
|
||||
const done = events.find((e) => e.type === "done") as { message?: { content: unknown[] } };
|
||||
const content = done?.message?.content ?? [];
|
||||
expect(content[0]).toMatchObject({ type: "thinking", thinking: "Step 1 and step 2" });
|
||||
expect(content[1]).toMatchObject({ type: "text", text: "The answer" });
|
||||
});
|
||||
|
||||
it("streams without thinking events when no thinking content is present", async () => {
|
||||
const chunks = [
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:00Z",
|
||||
message: { role: "assistant", content: "Hello" },
|
||||
done: false,
|
||||
},
|
||||
{
|
||||
model: "qwen3.5",
|
||||
created_at: "2026-01-01T00:00:01Z",
|
||||
message: { role: "assistant", content: "" },
|
||||
done: true,
|
||||
done_reason: "stop",
|
||||
prompt_eval_count: 10,
|
||||
eval_count: 5,
|
||||
},
|
||||
];
|
||||
|
||||
const body = makeNdjsonBody(chunks);
|
||||
vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: true, body }));
|
||||
|
||||
const streamFn = createOllamaStreamFn("http://localhost:11434");
|
||||
const stream = streamFn(
|
||||
{ api: "ollama", provider: "ollama", id: "qwen3.5", contextWindow: 65536 } as never,
|
||||
{ messages: [{ role: "user", content: "test" }] } as never,
|
||||
{},
|
||||
);
|
||||
|
||||
const events: Array<{ type: string }> = [];
|
||||
for await (const event of stream as AsyncIterable<{ type: string }>) {
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
const eventTypes = events.map((e) => e.type);
|
||||
expect(eventTypes).not.toContain("thinking_start");
|
||||
expect(eventTypes).not.toContain("thinking_delta");
|
||||
expect(eventTypes).not.toContain("thinking_end");
|
||||
expect(eventTypes).toContain("text_start");
|
||||
expect(eventTypes).toContain("text_delta");
|
||||
expect(eventTypes).toContain("done");
|
||||
|
||||
// Text content index should be 0 (no thinking block)
|
||||
const textStart = events.find((e) => e.type === "text_start") as { contentIndex?: number };
|
||||
expect(textStart?.contentIndex).toBe(0);
|
||||
});
|
||||
});
|
||||
@@ -4,6 +4,7 @@ import type {
|
||||
AssistantMessage,
|
||||
StopReason,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
ToolCall,
|
||||
Tool,
|
||||
Usage,
|
||||
@@ -148,14 +149,14 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
});
|
||||
}
|
||||
|
||||
function createOllamaThinkingOffWrapper(baseFn: StreamFn | undefined): StreamFn {
|
||||
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
|
||||
const streamFn = baseFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
if (model.api !== "ollama") {
|
||||
return streamFn(model, context, options);
|
||||
}
|
||||
return streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
|
||||
payloadRecord.think = false;
|
||||
payloadRecord.think = think;
|
||||
});
|
||||
};
|
||||
}
|
||||
@@ -197,7 +198,11 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
}
|
||||
|
||||
if (ctx.thinkingLevel === "off") {
|
||||
streamFn = createOllamaThinkingOffWrapper(streamFn);
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, false);
|
||||
} else if (ctx.thinkingLevel) {
|
||||
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive)
|
||||
// should enable Ollama's native thinking mode.
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, true);
|
||||
}
|
||||
|
||||
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
|
||||
@@ -511,7 +516,11 @@ export function buildAssistantMessage(
|
||||
response: OllamaChatResponse,
|
||||
modelInfo: StreamModelDescriptor,
|
||||
): AssistantMessage {
|
||||
const content: (TextContent | ToolCall)[] = [];
|
||||
const content: (TextContent | ThinkingContent | ToolCall)[] = [];
|
||||
const thinking = response.message.thinking ?? response.message.reasoning ?? "";
|
||||
if (thinking) {
|
||||
content.push({ type: "thinking", thinking });
|
||||
}
|
||||
const text = response.message.content || "";
|
||||
if (text) {
|
||||
content.push({ type: "text", text });
|
||||
@@ -654,39 +663,78 @@ export function createOllamaStreamFn(
|
||||
|
||||
const reader = response.body.getReader();
|
||||
let accumulatedContent = "";
|
||||
let accumulatedThinking = "";
|
||||
const accumulatedToolCalls: OllamaToolCall[] = [];
|
||||
let finalResponse: OllamaChatResponse | undefined;
|
||||
const modelInfo = { api: model.api, provider: model.provider, id: model.id };
|
||||
let streamStarted = false;
|
||||
let thinkingStarted = false;
|
||||
let thinkingEnded = false;
|
||||
let textBlockStarted = false;
|
||||
let textBlockClosed = false;
|
||||
|
||||
// Content index tracking: thinking block (if present) is index 0,
|
||||
// text block follows at index 1 (or 0 when no thinking).
|
||||
const textContentIndex = () => (thinkingStarted ? 1 : 0);
|
||||
|
||||
const buildCurrentContent = (): (TextContent | ThinkingContent | ToolCall)[] => {
|
||||
const parts: (TextContent | ThinkingContent | ToolCall)[] = [];
|
||||
if (accumulatedThinking) {
|
||||
parts.push({
|
||||
type: "thinking",
|
||||
thinking: accumulatedThinking,
|
||||
});
|
||||
}
|
||||
if (accumulatedContent) {
|
||||
parts.push({ type: "text", text: accumulatedContent });
|
||||
}
|
||||
return parts;
|
||||
};
|
||||
|
||||
const closeThinkingBlock = () => {
|
||||
if (!thinkingStarted || thinkingEnded) {
|
||||
return;
|
||||
}
|
||||
thinkingEnded = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: 0,
|
||||
content: accumulatedThinking,
|
||||
partial,
|
||||
});
|
||||
};
|
||||
|
||||
const closeTextBlock = () => {
|
||||
if (!streamStarted || textBlockClosed) {
|
||||
if (!textBlockStarted || textBlockClosed) {
|
||||
return;
|
||||
}
|
||||
textBlockClosed = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [{ type: "text", text: accumulatedContent }],
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: 0,
|
||||
contentIndex: textContentIndex(),
|
||||
content: accumulatedContent,
|
||||
partial,
|
||||
});
|
||||
};
|
||||
|
||||
for await (const chunk of parseNdjsonStream(reader)) {
|
||||
if (chunk.message?.content) {
|
||||
const delta = chunk.message.content;
|
||||
|
||||
// Handle thinking/reasoning deltas from Ollama's native think mode.
|
||||
const thinkingDelta = chunk.message?.thinking ?? chunk.message?.reasoning;
|
||||
if (thinkingDelta) {
|
||||
if (!streamStarted) {
|
||||
streamStarted = true;
|
||||
// Emit start/text_start with an empty partial before accumulating
|
||||
// the first delta, matching the Anthropic/OpenAI provider contract.
|
||||
const emptyPartial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [],
|
||||
@@ -694,19 +742,72 @@ export function createOllamaStreamFn(
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "start", partial: emptyPartial });
|
||||
stream.push({ type: "text_start", contentIndex: 0, partial: emptyPartial });
|
||||
}
|
||||
if (!thinkingStarted) {
|
||||
thinkingStarted = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "thinking_start", contentIndex: 0, partial });
|
||||
}
|
||||
accumulatedThinking += thinkingDelta;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: 0,
|
||||
delta: thinkingDelta,
|
||||
partial,
|
||||
});
|
||||
}
|
||||
|
||||
if (chunk.message?.content) {
|
||||
const delta = chunk.message.content;
|
||||
|
||||
// Transition from thinking to text: close the thinking block first.
|
||||
if (thinkingStarted && !thinkingEnded) {
|
||||
closeThinkingBlock();
|
||||
}
|
||||
|
||||
if (!streamStarted) {
|
||||
streamStarted = true;
|
||||
const emptyPartial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [],
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "start", partial: emptyPartial });
|
||||
}
|
||||
if (!textBlockStarted) {
|
||||
textBlockStarted = true;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "text_start", contentIndex: textContentIndex(), partial });
|
||||
}
|
||||
|
||||
accumulatedContent += delta;
|
||||
const partial = buildStreamAssistantMessage({
|
||||
model: modelInfo,
|
||||
content: [{ type: "text", text: accumulatedContent }],
|
||||
content: buildCurrentContent(),
|
||||
stopReason: "stop",
|
||||
usage: buildUsageWithNoCost({}),
|
||||
});
|
||||
stream.push({ type: "text_delta", contentIndex: 0, delta, partial });
|
||||
stream.push({ type: "text_delta", contentIndex: textContentIndex(), delta, partial });
|
||||
}
|
||||
if (chunk.message?.tool_calls) {
|
||||
closeThinkingBlock();
|
||||
closeTextBlock();
|
||||
accumulatedToolCalls.push(...chunk.message.tool_calls);
|
||||
}
|
||||
@@ -721,13 +822,17 @@ export function createOllamaStreamFn(
|
||||
}
|
||||
|
||||
finalResponse.message.content = accumulatedContent;
|
||||
if (accumulatedThinking) {
|
||||
finalResponse.message.thinking = accumulatedThinking;
|
||||
}
|
||||
if (accumulatedToolCalls.length > 0) {
|
||||
finalResponse.message.tool_calls = accumulatedToolCalls;
|
||||
}
|
||||
|
||||
const assistantMessage = buildAssistantMessage(finalResponse, modelInfo);
|
||||
|
||||
// Close the text block if we emitted any text_delta events.
|
||||
// Close any open blocks before emitting the done event.
|
||||
closeThinkingBlock();
|
||||
closeTextBlock();
|
||||
|
||||
stream.push({
|
||||
|
||||
Reference in New Issue
Block a user