Files
openclaw/extensions/ollama/src/stream-runtime.test.ts
2026-05-27 02:41:30 +01:00

2803 lines
100 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { afterEach, describe, expect, it, vi } from "vitest";
const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({
fetchWithSsrFGuardMock: vi.fn(),
}));
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
}));
import {
buildOllamaChatRequest,
createConfiguredOllamaCompatStreamWrapper,
createConfiguredOllamaStreamFn,
createOllamaStreamFn,
convertToOllamaMessages,
buildAssistantMessage,
parseNdjsonStream,
resolveOllamaBaseUrlForRun,
} from "./stream.js";
type GuardedFetchCall = {
url: string;
init?: RequestInit;
policy?: unknown;
signal?: AbortSignal;
timeoutMs?: number;
auditContext?: string;
};
function requireRecord(value: unknown, label: string): Record<string, unknown> {
if (!value || typeof value !== "object") {
throw new Error(`expected ${label}`);
}
return value as Record<string, unknown>;
}
function requireHeaders(value: unknown): Record<string, string> {
return requireRecord(value, "request headers") as Record<string, string>;
}
function expectToolCallContent(
value: unknown,
expected: { name: string; arguments: Record<string, unknown> },
) {
const content = requireRecord(value, "tool call content");
expect(content.type).toBe("toolCall");
expect(content.name).toBe(expected.name);
expect(content.arguments).toEqual(expected.arguments);
}
function expectIteratorEvent(
value: unknown,
expected: { type?: string; delta?: string; content?: string; done: boolean },
) {
const result = requireRecord(value, "iterator result");
expect(result.done).toBe(expected.done);
if (expected.type !== undefined) {
const event = requireRecord(result.value, "iterator result value");
expect(event.type).toBe(expected.type);
if (expected.delta !== undefined) {
expect(event.delta).toBe(expected.delta);
}
if (expected.content !== undefined) {
expect(event.content).toBe(expected.content);
}
} else {
expect(result.value).toBeUndefined();
}
}
afterEach(() => {
fetchWithSsrFGuardMock.mockReset();
});
describe("buildOllamaChatRequest", () => {
it("omits tools when none are provided", () => {
expect(
buildOllamaChatRequest({
modelId: "qwen3.5:9b",
messages: [{ role: "user", content: "hello" }],
options: { num_ctx: 65536 },
}),
).toEqual({
model: "qwen3.5:9b",
messages: [{ role: "user", content: "hello" }],
stream: true,
options: { num_ctx: 65536 },
});
});
it("strips the ollama/ prefix from chat model ids", () => {
const request = buildOllamaChatRequest({
modelId: "ollama/qwen3:14b-q8_0",
messages: [{ role: "user", content: "hello" }],
});
expect(request.model).toBe("qwen3:14b-q8_0");
});
it("strips the active custom provider prefix from chat model ids", () => {
const request = buildOllamaChatRequest({
modelId: "ollama-spark/qwen3:32b",
providerId: "ollama-spark",
messages: [{ role: "user", content: "hello" }],
});
expect(request.model).toBe("qwen3:32b");
});
it("keeps unrelated slash-containing Ollama model ids intact", () => {
const request = buildOllamaChatRequest({
modelId: "library/qwen3:32b",
providerId: "ollama-spark",
messages: [{ role: "user", content: "hello" }],
});
expect(request.model).toBe("library/qwen3:32b");
});
});
describe("createConfiguredOllamaCompatStreamWrapper", () => {
it("adds Moonshot thinking config for Ollama cloud Kimi compat requests", async () => {
let patchedPayload: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
options?.onPayload?.({ tool_choice: "auto" });
return (async function* () {})();
});
const model = {
api: "openai-completions",
provider: "ollama",
id: "kimi-k2.5:cloud",
contextWindow: 262144,
params: { num_ctx: 65536 },
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "kimi-k2.5:cloud",
model,
streamFn: baseStreamFn,
thinkingLevel: "high",
extraParams: {},
} as never);
await wrapped?.(
model as never,
{ messages: [] } as never,
{
onPayload: (payload: unknown) => {
patchedPayload = payload as Record<string, unknown>;
},
} as never,
);
const payload = requireRecord(patchedPayload, "patched payload");
expect(payload.thinking).toEqual({ type: "enabled" });
expect(payload.options).toEqual({ num_ctx: 65536 });
});
it("falls back to contextWindow when configured num_ctx is invalid", async () => {
let patchedPayload: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
options?.onPayload?.({});
return (async function* () {})();
});
const model = {
api: "openai-completions",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
params: { num_ctx: 0 },
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
} as never);
await wrapped?.(
model as never,
{ messages: [] } as never,
{
onPayload: (payload: unknown) => {
patchedPayload = payload as Record<string, unknown>;
},
} as never,
);
const payload = requireRecord(patchedPayload, "patched payload");
expect(payload.options).toEqual({ num_ctx: 131072 });
});
it("forwards think=false on native Ollama chat requests when thinking is off", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean;
options?: { think?: boolean; num_ctx?: number };
};
expect(requestBody.think).toBe(false);
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("does not overwrite configured native Ollama params.thinking with implicit off", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
params: { thinking: "medium" },
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as { think?: string };
expect(requestBody.think).toBe("medium");
},
);
});
it("does not forward truthy configured native Ollama thinking for non-reasoning models", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "llama3.2:latest",
contextWindow: 8192,
reasoning: false,
params: { thinking: "medium" },
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "llama3.2:latest",
model,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: string;
options?: { think?: string };
};
expect(requestBody.think).toBeUndefined();
expect(requestBody.options?.think).toBeUndefined();
},
);
});
it("does not forward runtime native Ollama thinking for non-reasoning models", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "llama3.2:latest",
contextWindow: 8192,
reasoning: false,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "llama3.2:latest",
model,
streamFn: baseStreamFn,
thinkingLevel: "low",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: string;
options?: { think?: string };
};
expect(requestBody.think).toBeUndefined();
expect(requestBody.options?.think).toBeUndefined();
},
);
});
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "qwen3:32b",
model,
streamFn: baseStreamFn,
thinkingLevel: "low",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean | string;
options?: { think?: boolean | string; num_ctx?: number };
};
expect(requestBody.think).toBe("low");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("passes resolved provider request timeouts to native Ollama chat fetches", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { requestTimeoutMs: 450_000 },
});
await collectStreamEvents(stream);
expect(getGuardedFetchCall(fetchMock).timeoutMs).toBe(450_000);
},
);
});
it("passes caller abort signals at guard level when a timeout is present", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const signal = new AbortController().signal;
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
options: { signal, timeoutMs: 123_456 },
});
await collectStreamEvents(stream);
const request = getGuardedFetchCall(fetchMock);
expect(request.timeoutMs).toBe(123_456);
expect(request.signal).toBe(signal);
expect(request.init?.signal).toBeUndefined();
},
);
});
it("maps native Ollama max thinking to think=high on the wire", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "gpt-oss:20b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "gpt-oss:20b",
model,
streamFn: baseStreamFn,
thinkingLevel: "max",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean | string;
options?: { think?: boolean | string; num_ctx?: number };
};
expect(requestBody.think).toBe("high");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("sends custom-provider Ollama chat requests with the bare Ollama model id", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama-spark",
id: "ollama-spark/qwen3:32b",
contextWindow: 131072,
};
const stream = await Promise.resolve(
streamFn(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as { model?: string };
expect(requestBody.model).toBe("qwen3:32b");
},
);
});
it("adds direct type hints to native Ollama tool schemas before sending them", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "qwen3:32b",
contextWindow: 131072,
};
const stream = await Promise.resolve(
streamFn(
model as never,
{
messages: [{ role: "user", content: "hello" }],
tools: [
{
name: "search",
description: "search",
parameters: {
properties: {
query: {
anyOf: [{ type: "string" }, { type: "null" }],
},
tags: {
items: { type: "string" },
},
},
required: ["query"],
},
},
],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
tools?: Array<{
function?: {
parameters?: {
type?: string;
properties?: Record<string, { type?: string }>;
};
};
}>;
};
const parameters = requestBody.tools?.[0]?.function?.parameters;
expect(parameters?.type).toBe("object");
expect(parameters?.properties?.query?.type).toBe("string");
expect(parameters?.properties?.tags?.type).toBe("array");
},
);
});
});
describe("convertToOllamaMessages", () => {
it("converts user text messages", () => {
const messages = [{ role: "user", content: "hello" }];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "user", content: "hello" }]);
});
it("converts user messages with content parts", () => {
const messages = [
{
role: "user",
content: [
{ type: "text", text: "describe this" },
{ type: "image", data: "base64data" },
],
},
];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "user", content: "describe this", images: ["base64data"] }]);
});
it("prepends system message when provided", () => {
const messages = [{ role: "user", content: "hello" }];
const result = convertToOllamaMessages(messages, "You are helpful.");
expect(result[0]).toEqual({ role: "system", content: "You are helpful." });
expect(result[1]).toEqual({ role: "user", content: "hello" });
});
it("converts assistant messages with toolCall content blocks", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "text", text: "Let me check." },
{ type: "toolCall", id: "call_1", name: "bash", arguments: { command: "ls" } },
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].role).toBe("assistant");
expect(result[0].content).toBe("Let me check.");
expect(result[0].tool_calls).toEqual([
{ id: "call_1", function: { name: "bash", arguments: { command: "ls" } } },
]);
});
it("preserves assistant tool-call ids before Ollama replay", () => {
const messages = [
{
role: "assistant",
content: [
{
type: "toolCall",
id: "fc_ollama_123",
name: "bash",
arguments: { command: "pwd" },
},
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{ id: "fc_ollama_123", function: { name: "bash", arguments: { command: "pwd" } } },
]);
});
it("normalizes provider-prefixed tool-call names before Ollama replay", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "functions.exec", arguments: { command: "pwd" } },
{ type: "tool_use", id: "call_2", name: "tools/read", input: { path: "README.md" } },
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{ id: "call_1", function: { name: "exec", arguments: { command: "pwd" } } },
{ id: "call_2", function: { name: "read", arguments: { path: "README.md" } } },
]);
});
it("preserves exact allowlisted tool-prefix names before Ollama replay", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "tool_a", arguments: { value: 1 } },
{ type: "tool_use", id: "call_2", name: "tools_invoke_test", input: { value: 2 } },
{ type: "toolCall", id: "call_3", name: "function-run", arguments: { value: 3 } },
],
},
];
const result = convertToOllamaMessages(messages, undefined, {
availableToolNames: new Set(["tool_a", "tools_invoke_test", "function-run"]),
});
expect(result[0].tool_calls).toEqual([
{ id: "call_1", function: { name: "tool_a", arguments: { value: 1 } } },
{ id: "call_2", function: { name: "tools_invoke_test", arguments: { value: 2 } } },
{ id: "call_3", function: { name: "function-run", arguments: { value: 3 } } },
]);
});
it("strips underscore and dash provider prefixes only when the suffix is allowlisted", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "tools_exec", arguments: { command: "pwd" } },
{ type: "tool_use", id: "call_2", name: "function-read", input: { path: "." } },
{ type: "toolCall", id: "call_3", name: "tool_missing", arguments: {} },
],
},
];
const result = convertToOllamaMessages(messages, undefined, {
availableToolNames: new Set(["exec", "read"]),
});
expect(result[0].tool_calls).toEqual([
{ id: "call_1", function: { name: "exec", arguments: { command: "pwd" } } },
{ id: "call_2", function: { name: "read", arguments: { path: "." } } },
{ id: "call_3", function: { name: "tool_missing", arguments: {} } },
]);
});
it("keeps non-prefixed Ollama replay tool names intact", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "toolCall", id: "call_1", name: "functionshell", arguments: {} },
{ type: "toolCall", id: "call_2", name: "tooling", arguments: {} },
{ type: "toolCall", id: "call_3", name: "tools", arguments: {} },
{ type: "toolCall", id: "call_4", name: "tool_a", arguments: {} },
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{ id: "call_1", function: { name: "functionshell", arguments: {} } },
{ id: "call_2", function: { name: "tooling", arguments: {} } },
{ id: "call_3", function: { name: "tools", arguments: {} } },
{ id: "call_4", function: { name: "tool_a", arguments: {} } },
]);
});
it("deserializes string arguments back to objects for Ollama (round-trip fix)", () => {
// When tool calls round-trip through OpenAI-format storage, arguments
// are serialized as a JSON string. Ollama expects an object.
const messages = [
{
role: "assistant",
content: [
{
type: "toolCall",
id: "call_2",
name: "Read",
arguments: '{"file_path":"/tmp/test.txt"}',
},
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{ id: "call_2", function: { name: "Read", arguments: { file_path: "/tmp/test.txt" } } },
]);
});
it("handles tool_use blocks with string input (Anthropic format round-trip)", () => {
const messages = [
{
role: "assistant",
content: [
{ type: "tool_use", id: "toolu_1", name: "exec", input: '{"command":"echo hello"}' },
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{ id: "toolu_1", function: { name: "exec", arguments: { command: "echo hello" } } },
]);
});
it("preserves unsafe integers as strings when replay args are deserialized", () => {
const messages = [
{
role: "assistant",
content: [
{
type: "toolCall",
id: "call_3",
name: "read",
arguments: '{"path":9223372036854775807,"nested":{"thread":1234567890123456789}}',
},
],
},
];
const result = convertToOllamaMessages(messages);
expect(result[0].tool_calls).toEqual([
{
id: "call_3",
function: {
name: "read",
arguments: {
path: "9223372036854775807",
nested: { thread: "1234567890123456789" },
},
},
},
]);
});
it("converts tool result messages with 'tool' role", () => {
const messages = [{ role: "tool", content: "file1.txt\nfile2.txt" }];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "tool", content: "file1.txt\nfile2.txt" }]);
});
it("converts SDK 'toolResult' role to Ollama 'tool' role", () => {
const messages = [{ role: "toolResult", content: "command output here" }];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "tool", content: "command output here" }]);
});
it("includes tool_name from SDK toolResult messages", () => {
const messages = [{ role: "toolResult", content: "file contents here", toolName: "read" }];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "tool", content: "file contents here", tool_name: "read" }]);
});
it("omits tool_name when not provided in toolResult", () => {
const messages = [{ role: "toolResult", content: "output" }];
const result = convertToOllamaMessages(messages);
expect(result).toEqual([{ role: "tool", content: "output" }]);
expect(result[0]).not.toHaveProperty("tool_name");
});
it("handles empty messages array", () => {
const result = convertToOllamaMessages([]);
expect(result).toStrictEqual([]);
});
});
describe("buildAssistantMessage", () => {
const modelInfo = { api: "ollama", provider: "ollama", id: "qwen3:32b" };
it("builds text-only response", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant" as const, content: "Hello!" },
done: true,
prompt_eval_count: 10,
eval_count: 5,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.role).toBe("assistant");
expect(result.content).toEqual([{ type: "text", text: "Hello!" }]);
expect(result.stopReason).toBe("stop");
expect(result.usage.input).toBe(10);
expect(result.usage.output).toBe(5);
expect(result.usage.totalTokens).toBe(15);
});
it("keeps thinking-only output when content is empty", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
thinking: "Thinking output",
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.stopReason).toBe("stop");
expect(result.content).toEqual([{ type: "thinking", thinking: "Thinking output" }]);
});
it("keeps reasoning-only output when content and thinking are empty", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
reasoning: "Reasoning output",
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.stopReason).toBe("stop");
expect(result.content).toEqual([{ type: "thinking", thinking: "Reasoning output" }]);
});
it("strips inline reasoning prefix from kimi cloud visible text", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. Final answer only.",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "kimi-k2.6:cloud",
});
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
});
it("strips inline reasoning for provider-qualified Kimi cloud refs", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. Final answer only.",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "ollama/kimi-k2.6:cloud",
});
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
});
it("strips inline reasoning when the Kimi boundary is followed by whitespace", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. Final answer only.",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "kimi-k2.6:cloud",
});
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
});
it("strips inline reasoning before short Kimi cloud answers", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. OK.",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "kimi-k2.6:cloud",
});
expect(result.content).toEqual([{ type: "text", text: "OK." }]);
});
it("strips inline reasoning when the Kimi boundary has no visible answer", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "kimi-k2.6:cloud",
});
expect(result.content).toEqual([]);
});
it("does not strip inline boundary marker on non-kimi models", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "intro keep this intact",
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.content).toEqual([{ type: "text", text: "intro keep this intact" }]);
});
it("does not treat emoji variation selectors as Kimi inline-reasoning boundaries", () => {
const response = {
model: "kimi-k2.6:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content:
"This is a normal Kimi cloud answer with enough length to cross the prefix threshold and no hidden reasoning leak. ☀sunshine should remain visible to the user.",
},
done: true,
};
const result = buildAssistantMessage(response, {
api: "ollama",
provider: "ollama",
id: "kimi-k2.6:cloud",
});
expect(result.content).toEqual([
{
type: "text",
text: "This is a normal Kimi cloud answer with enough length to cross the prefix threshold and no hidden reasoning leak. ☀sunshine should remain visible to the user.",
},
]);
});
it("estimates usage when Ollama omits eval counters", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant" as const, content: "Estimated output" },
done: true,
};
const result = buildAssistantMessage(response, modelInfo, { input: 11, output: 4 });
expect(result.usage.input).toBe(11);
expect(result.usage.output).toBe(4);
expect(result.usage.totalTokens).toBe(15);
});
it("preserves explicit zero usage counters from Ollama", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant" as const, content: "" },
done: true,
prompt_eval_count: 0,
eval_count: 0,
};
const result = buildAssistantMessage(response, modelInfo, { input: 11, output: 4 });
expect(result.usage.input).toBe(0);
expect(result.usage.output).toBe(0);
expect(result.usage.totalTokens).toBe(0);
});
it("builds response with tool calls", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [{ function: { name: "bash", arguments: { command: "ls -la" } } }],
},
done: true,
prompt_eval_count: 20,
eval_count: 10,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.stopReason).toBe("toolUse");
expect(result.content.length).toBe(1); // toolCall only (empty content is skipped)
expect(result.content[0].type).toBe("toolCall");
const toolCall = result.content[0] as {
type: "toolCall";
id: string;
name: string;
arguments: Record<string, unknown>;
};
expect(toolCall.name).toBe("bash");
expect(toolCall.arguments).toEqual({ command: "ls -la" });
expect(toolCall.id).toMatch(/^ollama_call_[0-9a-f-]{36}$/);
});
it("preserves Ollama response tool-call ids", () => {
const response = {
model: "gemini-3-flash-preview:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{ id: "fc_ollama_real_1", function: { name: "bash", arguments: { command: "pwd" } } },
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expectToolCallContent(result.content[0], { name: "bash", arguments: { command: "pwd" } });
expect((result.content[0] as { id?: string }).id).toBe("fc_ollama_real_1");
});
it("preserves parallel Ollama response tool-call ids independently", () => {
const response = {
model: "gemini-3-flash-preview:cloud",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{ id: "fc_ollama_real_1", function: { name: "read", arguments: { path: "a.txt" } } },
{ id: "fc_ollama_real_2", function: { name: "exec", arguments: { command: "date" } } },
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.content.map((part) => (part as { id?: string }).id)).toEqual([
"fc_ollama_real_1",
"fc_ollama_real_2",
]);
expectToolCallContent(result.content[0], { name: "read", arguments: { path: "a.txt" } });
expectToolCallContent(result.content[1], { name: "exec", arguments: { command: "date" } });
});
it("normalizes provider-prefixed tool-call names in Ollama responses", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{ function: { name: "functions.exec", arguments: { command: "pwd" } } },
{ function: { name: "tools/read", arguments: { path: "README.md" } } },
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.content).toHaveLength(2);
expectToolCallContent(result.content[0], { name: "exec", arguments: { command: "pwd" } });
expectToolCallContent(result.content[1], { name: "read", arguments: { path: "README.md" } });
});
it("preserves exact allowlisted tool-prefix names in Ollama responses", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{ function: { name: "tool_a", arguments: { value: 1 } } },
{ function: { name: "tools_invoke_test", arguments: { value: 2 } } },
{ function: { name: "function-run", arguments: { value: 3 } } },
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo, undefined, {
availableToolNames: new Set(["tool_a", "tools_invoke_test", "function-run"]),
});
expect(result.content).toHaveLength(3);
expectToolCallContent(result.content[0], { name: "tool_a", arguments: { value: 1 } });
expectToolCallContent(result.content[1], {
name: "tools_invoke_test",
arguments: { value: 2 },
});
expectToolCallContent(result.content[2], { name: "function-run", arguments: { value: 3 } });
});
it("keeps non-prefixed Ollama response tool names intact", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{ function: { name: "functionshell", arguments: {} } },
{ function: { name: "tooling", arguments: {} } },
{ function: { name: "tools", arguments: {} } },
{ function: { name: "tool_a", arguments: {} } },
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.content).toHaveLength(4);
expectToolCallContent(result.content[0], { name: "functionshell", arguments: {} });
expectToolCallContent(result.content[1], { name: "tooling", arguments: {} });
expectToolCallContent(result.content[2], { name: "tools", arguments: {} });
expectToolCallContent(result.content[3], { name: "tool_a", arguments: {} });
});
it("parses stringified tool call arguments from Ollama responses", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [{ function: { name: "bash", arguments: '{"command":"ls","path":"/tmp"}' } }],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expectToolCallContent(result.content[0], {
name: "bash",
arguments: { command: "ls", path: "/tmp" },
});
});
it("preserves unsafe integers in stringified tool call arguments", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [
{
function: {
name: "send",
arguments: '{"target":9223372036854775807,"nested":{"thread":1234567890123456789}}',
},
},
],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expectToolCallContent(result.content[0], {
name: "send",
arguments: {
target: "9223372036854775807",
nested: { thread: "1234567890123456789" },
},
});
});
it("falls back to empty arguments for malformed stringified tool call arguments", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: {
role: "assistant" as const,
content: "",
tool_calls: [{ function: { name: "bash", arguments: '{"command":"ls"' } }],
},
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expectToolCallContent(result.content[0], { name: "bash", arguments: {} });
});
it("sets all costs to zero for local models", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
message: { role: "assistant" as const, content: "ok" },
done: true,
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.usage.cost).toEqual({
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
});
});
});
// Helper: build a ReadableStreamDefaultReader from NDJSON lines
function mockNdjsonReader(lines: string[]): ReadableStreamDefaultReader<Uint8Array> {
const encoder = new TextEncoder();
const payload = lines.join("\n") + "\n";
let consumed = false;
return {
read: async () => {
if (consumed) {
return { done: true as const, value: undefined };
}
consumed = true;
return { done: false as const, value: encoder.encode(payload) };
},
releaseLock: () => {},
cancel: async () => {},
closed: Promise.resolve(undefined),
} as unknown as ReadableStreamDefaultReader<Uint8Array>;
}
async function expectDoneEventContent(lines: string[], expectedContent: unknown) {
await withMockNdjsonFetch(lines, async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const doneEvent = events.at(-1);
if (!doneEvent || doneEvent.type !== "done") {
throw new Error("Expected done event");
}
expect(doneEvent.message.content).toEqual(expectedContent);
});
}
describe("parseNdjsonStream", () => {
it("parses text-only streaming chunks", async () => {
const reader = mockNdjsonReader([
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Hello"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" world"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":5,"eval_count":2}',
]);
const chunks = [];
for await (const chunk of parseNdjsonStream(reader)) {
chunks.push(chunk);
}
expect(chunks).toHaveLength(3);
expect(chunks[0].message.content).toBe("Hello");
expect(chunks[1].message.content).toBe(" world");
expect(chunks[2].done).toBe(true);
});
it("parses tool_calls from intermediate chunk (not final)", async () => {
// Ollama sends tool_calls in done:false chunk, final done:true has no tool_calls
const reader = mockNdjsonReader([
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
]);
const chunks = [];
for await (const chunk of parseNdjsonStream(reader)) {
chunks.push(chunk);
}
expect(chunks).toHaveLength(2);
expect(chunks[0].done).toBe(false);
expect(chunks[0].message.tool_calls).toHaveLength(1);
expect(chunks[0].message.tool_calls![0].function.name).toBe("bash");
expect(chunks[1].done).toBe(true);
expect(chunks[1].message.tool_calls).toBeUndefined();
});
it("accumulates tool_calls across multiple intermediate chunks", async () => {
const reader = mockNdjsonReader([
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"read","arguments":{"path":"/tmp/a"}}}]},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
]);
// Simulate the accumulation logic from createOllamaStreamFn
const accumulatedToolCalls: Array<{
function: { name: string; arguments: unknown };
}> = [];
const chunks = [];
for await (const chunk of parseNdjsonStream(reader)) {
chunks.push(chunk);
if (chunk.message?.tool_calls) {
accumulatedToolCalls.push(...chunk.message.tool_calls);
}
}
expect(accumulatedToolCalls).toHaveLength(2);
expect(accumulatedToolCalls[0].function.name).toBe("read");
expect(accumulatedToolCalls[1].function.name).toBe("bash");
// Final done:true chunk has no tool_calls
expect(chunks[2].message.tool_calls).toBeUndefined();
});
it("preserves unsafe integer tool arguments as exact strings", async () => {
const reader = mockNdjsonReader([
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"send","arguments":{"target":1234567890123456789,"nested":{"thread":9223372036854775807}}}}]},"done":false}',
]);
const chunks = [];
for await (const chunk of parseNdjsonStream(reader)) {
chunks.push(chunk);
}
const args = chunks[0]?.message.tool_calls?.[0]?.function.arguments as
| { target?: unknown; nested?: { thread?: unknown } }
| undefined;
expect(args?.target).toBe("1234567890123456789");
expect(args?.nested?.thread).toBe("9223372036854775807");
});
it("keeps safe integer tool arguments as numbers", async () => {
const reader = mockNdjsonReader([
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"send","arguments":{"retries":3,"delayMs":2500}}}]},"done":false}',
]);
const chunks = [];
for await (const chunk of parseNdjsonStream(reader)) {
chunks.push(chunk);
}
const args = chunks[0]?.message.tool_calls?.[0]?.function.arguments as
| { retries?: unknown; delayMs?: unknown }
| undefined;
expect(args?.retries).toBe(3);
expect(args?.delayMs).toBe(2500);
});
});
async function withMockNdjsonFetch(
lines: string[],
run: (fetchMock: typeof fetchWithSsrFGuardMock) => Promise<void>,
): Promise<void> {
fetchWithSsrFGuardMock.mockImplementation(async () => {
const payload = lines.join("\n");
return {
response: new Response(`${payload}\n`, {
status: 200,
headers: { "Content-Type": "application/x-ndjson" },
}),
release: vi.fn(async () => undefined),
};
});
await run(fetchWithSsrFGuardMock);
}
function createControlledNdjsonFetch(): {
fetchImpl: () => Promise<{ response: Response; release: () => Promise<void> }>;
pushLine: (line: string) => void;
close: () => void;
} {
const encoder = new TextEncoder();
let controller: ReadableStreamDefaultController<Uint8Array> | undefined;
const body = new ReadableStream<Uint8Array>({
start(streamController) {
controller = streamController;
},
});
return {
fetchImpl: async () => ({
response: new Response(body, {
status: 200,
headers: { "Content-Type": "application/x-ndjson" },
}),
release: vi.fn(async () => undefined),
}),
pushLine(line: string) {
if (!controller) {
throw new Error("NDJSON controller not initialized");
}
controller.enqueue(encoder.encode(`${line}\n`));
},
close() {
if (!controller) {
throw new Error("NDJSON controller not initialized");
}
controller.close();
},
};
}
function getGuardedFetchCall(fetchMock: typeof fetchWithSsrFGuardMock): GuardedFetchCall {
return (fetchMock.mock.calls.at(0)?.[0] as GuardedFetchCall | undefined) ?? { url: "" };
}
async function createOllamaTestStream(params: {
baseUrl: string;
defaultHeaders?: Record<string, string>;
model?: Record<string, unknown>;
options?: {
apiKey?: string;
maxTokens?: number;
temperature?: number;
signal?: AbortSignal;
timeoutMs?: number;
headers?: Record<string, string>;
};
}) {
const streamFn = createOllamaStreamFn(params.baseUrl, params.defaultHeaders);
return streamFn(
{
id: "qwen3:32b",
api: "ollama",
provider: "custom-ollama",
contextWindow: 131072,
...params.model,
} as unknown as Parameters<typeof streamFn>[0],
{
messages: [{ role: "user", content: "hello" }],
} as unknown as Parameters<typeof streamFn>[1],
(params.options ?? {}) as unknown as Parameters<typeof streamFn>[2],
);
}
async function collectStreamEvents<T>(stream: AsyncIterable<T>): Promise<T[]> {
const events: T[] = [];
for await (const event of stream) {
events.push(event);
}
return events;
}
async function nextEventWithin<T>(
iterator: AsyncIterator<T>,
timeoutMs = 100,
): Promise<IteratorResult<T> | "timeout"> {
let timer: NodeJS.Timeout | undefined;
try {
return await Promise.race([
iterator.next(),
new Promise<"timeout">((resolve) => {
timer = setTimeout(() => resolve("timeout"), timeoutMs);
}),
]);
} finally {
if (timer) {
clearTimeout(timer);
}
}
}
describe("createOllamaStreamFn streaming events", () => {
it("emits start, text_start, text_delta, text_end, done for text responses", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Hello"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" world"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":5,"eval_count":2}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual([
"start",
"text_start",
"text_delta",
"text_delta",
"text_end",
"done",
]);
// text_delta events carry incremental deltas
const deltas = events.filter((e) => e.type === "text_delta");
expect(deltas[0]?.contentIndex).toBe(0);
expect(deltas[0]?.delta).toBe("Hello");
expect(deltas[1]?.contentIndex).toBe(0);
expect(deltas[1]?.delta).toBe(" world");
// text_end carries the full accumulated content
const textEnd = events.find((e) => e.type === "text_end");
expect(textEnd?.contentIndex).toBe(0);
expect(textEnd?.content).toBe("Hello world");
// start/text_start carry empty partials (before any content accumulates)
const startEvent = events.find((e) => e.type === "start");
expect(startEvent?.partial.content).toStrictEqual([]);
const textStartEvent = events.find((e) => e.type === "text_start");
expect(textStartEvent?.partial.content).toStrictEqual([]);
// text_delta partials accumulate content progressively
expect(deltas[0].partial.content).toEqual([{ type: "text", text: "Hello" }]);
expect(deltas[1].partial.content).toEqual([{ type: "text", text: "Hello world" }]);
// done event contains the final message
const doneEvent = events.at(-1);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.content).toEqual([{ type: "text", text: "Hello world" }]);
}
},
);
});
it("emits only done for tool-call-only responses (no text content)", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
// No text content means no start/text_start/text_delta/text_end events
const types = events.map((e) => e.type);
expect(types).toEqual(["done"]);
const doneEvent = events[0];
if (doneEvent.type === "done") {
expect(doneEvent.reason).toBe("toolUse");
}
},
);
});
it("estimates usage when the final Ollama chunk omits counters", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Estimated answer"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const doneEvent = events.at(-1);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.usage.input).toBeGreaterThan(0);
expect(doneEvent.message.usage.output).toBeGreaterThan(0);
expect(doneEvent.message.usage.totalTokens).toBeGreaterThan(0);
}
},
);
});
it("counts image payloads in prompt usage estimates when Ollama omits counters", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"vision answer"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
],
async () => {
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
const stream = await Promise.resolve(
streamFn(
{
id: "llava",
api: "ollama",
provider: "custom-ollama",
contextWindow: 131072,
} as never,
{
messages: [
{
role: "user",
content: [{ type: "image", data: "a".repeat(400) }],
},
],
} as never,
{} as never,
),
);
const events = await collectStreamEvents(stream);
const doneEvent = events.at(-1);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.usage.input).toBeGreaterThan(50);
}
},
);
});
it("emits text streaming events before done for mixed text + tool responses", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Let me check."},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual(["start", "text_start", "text_delta", "text_end", "done"]);
const doneEvent = events.at(-1);
if (doneEvent?.type === "done") {
expect(doneEvent.reason).toBe("toolUse");
}
},
);
});
it("emits text_end as soon as Ollama switches from text to tool calls", async () => {
const controlledFetch = createControlledNdjsonFetch();
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
try {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const iterator = stream[Symbol.asyncIterator]();
controlledFetch.pushLine(
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Let me check."},"done":false}',
);
const startEvent = await nextEventWithin(iterator);
const textStartEvent = await nextEventWithin(iterator);
const textDeltaEvent = await nextEventWithin(iterator);
expect(startEvent).not.toBe("timeout");
expect(textStartEvent).not.toBe("timeout");
expect(textDeltaEvent).not.toBe("timeout");
expectIteratorEvent(startEvent, { type: "start", done: false });
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
expectIteratorEvent(textDeltaEvent, {
type: "text_delta",
delta: "Let me check.",
done: false,
});
controlledFetch.pushLine(
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
);
const textEndEvent = await nextEventWithin(iterator);
expect(textEndEvent).not.toBe("timeout");
expectIteratorEvent(textEndEvent, {
type: "text_end",
content: "Let me check.",
done: false,
});
if (textEndEvent !== "timeout") {
const textEndValue = requireRecord(textEndEvent.value, "text_end value");
expect(textEndValue.contentIndex).toBe(0);
expect(requireRecord(textEndValue.partial, "text_end partial").content).toEqual([
{ type: "text", text: "Let me check." },
]);
}
controlledFetch.pushLine(
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
);
controlledFetch.close();
const doneEvent = await nextEventWithin(iterator);
expect(doneEvent).not.toBe("timeout");
if (doneEvent !== "timeout" && doneEvent.done === false) {
expectIteratorEvent(doneEvent, { type: "done", done: false });
expect(requireRecord(doneEvent.value, "done value").reason).toBe("toolUse");
const streamEnd = await nextEventWithin(iterator);
expect(streamEnd).not.toBe("timeout");
expectIteratorEvent(streamEnd, { done: true });
} else {
expectIteratorEvent(doneEvent, { done: true });
}
} finally {
fetchWithSsrFGuardMock.mockReset();
}
});
it("emits error without text_end when stream fails mid-response", async () => {
// Simulate a stream that sends one content chunk then ends without done:true.
// The stream function throws "Ollama API stream ended without a final response".
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"partial"},"done":false}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
// Should have streaming events for the partial content, then error (no text_end).
expect(types).toEqual(["start", "text_start", "text_delta", "error"]);
const errorEvent = events.at(-1);
expect(errorEvent?.type).toBe("error");
},
);
});
it("emits an error instead of accepting garbled Kimi visible text", async () => {
const garbled =
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.5:cloud",
created_at: "t",
message: { role: "assistant", content: garbled },
done: false,
}),
'{"model":"kimi-k2.5:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.5:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual(["error"]);
const errorEvent = events.at(-1);
expect(errorEvent?.type).toBe("error");
if (errorEvent?.type === "error") {
expect(errorEvent.error.errorMessage).toContain("garbled visible text");
}
},
);
});
it("buffers Kimi inline reasoning until the streaming boundary is safe", async () => {
const controlledFetch = createControlledNdjsonFetch();
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
try {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const iterator = stream[Symbol.asyncIterator]();
controlledFetch.pushLine(
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: {
role: "assistant",
content:
"The user is asking for a short answer. I should reason privately before answering. I need to avoid showing this planning text to the user.",
},
done: false,
}),
);
const pendingStartEvent = iterator.next();
expect(
await Promise.race([
pendingStartEvent.then(() => "event" as const),
new Promise<"timeout">((resolve) => setTimeout(() => resolve("timeout"), 100)),
]),
).toBe("timeout");
controlledFetch.pushLine(
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: " OK." },
done: false,
}),
);
const startEvent = await pendingStartEvent;
expectIteratorEvent(startEvent, { type: "start", done: false });
const textStartEvent = await nextEventWithin(iterator);
expect(textStartEvent).not.toBe("timeout");
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
const textDeltaEvent = await nextEventWithin(iterator);
expect(textDeltaEvent).not.toBe("timeout");
expectIteratorEvent(textDeltaEvent, {
type: "text_delta",
delta: "OK.",
done: false,
});
if (textDeltaEvent !== "timeout" && textDeltaEvent.done === false) {
const value = requireRecord(textDeltaEvent.value, "text_delta value");
expect(JSON.stringify(value)).not.toContain("The user is asking");
}
controlledFetch.pushLine(
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
);
controlledFetch.close();
const textEndEvent = await nextEventWithin(iterator);
expect(textEndEvent).not.toBe("timeout");
expectIteratorEvent(textEndEvent, {
type: "text_end",
content: "OK.",
done: false,
});
const doneEvent = await nextEventWithin(iterator);
expect(doneEvent).not.toBe("timeout");
expectIteratorEvent(doneEvent, { type: "done", done: false });
if (doneEvent !== "timeout" && doneEvent.done === false) {
const value = requireRecord(doneEvent.value, "done value");
expect(JSON.stringify(value)).not.toContain("The user is asking");
}
const streamEnd = await nextEventWithin(iterator);
expect(streamEnd).not.toBe("timeout");
expectIteratorEvent(streamEnd, { done: true });
} finally {
fetchWithSsrFGuardMock.mockReset();
}
});
it("streams marker-less Kimi answers after the bounded sanitizer window", async () => {
const controlledFetch = createControlledNdjsonFetch();
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
try {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const iterator = stream[Symbol.asyncIterator]();
const visibleAnswer = "This is a normal Kimi cloud answer without hidden reasoning. ".repeat(
12,
);
controlledFetch.pushLine(
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: visibleAnswer },
done: false,
}),
);
const startEvent = await nextEventWithin(iterator);
expect(startEvent).not.toBe("timeout");
expectIteratorEvent(startEvent, { type: "start", done: false });
const textStartEvent = await nextEventWithin(iterator);
expect(textStartEvent).not.toBe("timeout");
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
const textDeltaEvent = await nextEventWithin(iterator);
expect(textDeltaEvent).not.toBe("timeout");
expectIteratorEvent(textDeltaEvent, {
type: "text_delta",
delta: visibleAnswer,
done: false,
});
controlledFetch.pushLine(
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
);
controlledFetch.close();
const textEndEvent = await nextEventWithin(iterator);
expect(textEndEvent).not.toBe("timeout");
expectIteratorEvent(textEndEvent, {
type: "text_end",
content: visibleAnswer,
done: false,
});
const doneEvent = await nextEventWithin(iterator);
expect(doneEvent).not.toBe("timeout");
expectIteratorEvent(doneEvent, { type: "done", done: false });
const streamEnd = await nextEventWithin(iterator);
expect(streamEnd).not.toBe("timeout");
expectIteratorEvent(streamEnd, { done: true });
} finally {
fetchWithSsrFGuardMock.mockReset();
}
});
it("keeps Kimi deltas append-only after the bounded sanitizer window is bypassed", async () => {
const longPrefix = "This Kimi cloud output has streamed past the sanitizer window. ".repeat(10);
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: longPrefix },
done: false,
}),
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: " OK." },
done: false,
}),
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
const deltas = events.filter((event) => event.type === "text_delta");
expect(deltas.map((event) => event.delta)).toEqual([longPrefix, " OK."]);
const rawText = `${longPrefix} OK.`;
const textEnd = events.find((event) => event.type === "text_end");
expect(textEnd?.content).toBe(rawText);
const doneEvent = events.find((event) => event.type === "done");
expect(doneEvent?.message.content).toEqual([{ type: "text", text: rawText }]);
},
);
});
it("does not reject punctuation-heavy text from unrelated Ollama models", async () => {
const punctuationHeavy =
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
await withMockNdjsonFetch(
[
JSON.stringify({
model: "qwen3:32b",
created_at: "t",
message: { role: "assistant", content: punctuationHeavy },
done: false,
}),
'{"model":"qwen3:32b","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
expect(events.map((e) => e.type)).toEqual([
"start",
"text_start",
"text_delta",
"text_end",
"done",
]);
},
);
});
it("emits a single text_delta for single-chunk responses", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"one shot"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async () => {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual(["start", "text_start", "text_delta", "text_end", "done"]);
const textStart = events.find((e) => e.type === "text_start");
expect(textStart?.partial.content).toEqual([]);
const delta = events.find((e) => e.type === "text_delta");
expect(delta?.delta).toBe("one shot");
},
);
});
it("sanitizes Kimi inline reasoning in text_delta, text_end, partial, and done output", async () => {
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: {
role: "assistant",
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. Final answer",
},
done: false,
}),
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: " only." },
done: false,
}),
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":3,"eval_count":4}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
const types = events.map((e) => e.type);
expect(types).toEqual([
"start",
"text_start",
"text_delta",
"text_delta",
"text_end",
"done",
]);
const textStart = events.find((e) => e.type === "text_start");
expect(textStart?.partial.content).toEqual([]);
const deltas = events.filter((e) => e.type === "text_delta");
expect(deltas).toHaveLength(2);
expect(deltas[0]?.delta).toBe("Final answer");
expect(deltas[1]?.delta).toBe(" only.");
expect(deltas[0]?.partial.content).toEqual([{ type: "text", text: "Final answer" }]);
expect(deltas[1]?.partial.content).toEqual([{ type: "text", text: "Final answer only." }]);
const textEnd = events.find((e) => e.type === "text_end");
expect(textEnd?.content).toBe("Final answer only.");
expect(textEnd?.partial.content).toEqual([{ type: "text", text: "Final answer only." }]);
const doneEvent = events.at(-1);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.content).toEqual([{ type: "text", text: "Final answer only." }]);
}
},
);
});
it("does not re-sanitize visible Kimi stream output before done", async () => {
const visibleAnswer =
"This visible answer is intentionally long enough to look like a reasoning prefix if it is sanitized a second time. keep this marker visible.";
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: {
role: "assistant",
content:
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ",
},
done: false,
}),
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: visibleAnswer },
done: false,
}),
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":3,"eval_count":4}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
const textEnd = events.find((event) => event.type === "text_end");
const doneEvent = events.at(-1);
expect(textEnd?.content).toBe(visibleAnswer);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.content).toEqual([{ type: "text", text: visibleAnswer }]);
}
},
);
});
it("does not leak Kimi inline reasoning when a boundary is followed by tool calls only", async () => {
const hiddenPrefix =
"I should think privately and not leak this planning text in the answer. " +
"I need to keep deciding what tool to call before showing any visible text.";
await withMockNdjsonFetch(
[
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: { role: "assistant", content: `${hiddenPrefix} ` },
done: false,
}),
JSON.stringify({
model: "kimi-k2.6:cloud",
created_at: "t",
message: {
role: "assistant",
content: "",
tool_calls: [{ function: { name: "bash", arguments: { command: "ls" } } }],
},
done: false,
}),
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
],
async () => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
});
const events = await collectStreamEvents(stream);
expect(events.map((e) => e.type)).toEqual(["done"]);
const doneEvent = events.at(-1);
expect(doneEvent?.type).toBe("done");
if (doneEvent?.type === "done") {
expect(doneEvent.message.content).toEqual([
{
type: "toolCall",
id: expect.any(String),
name: "bash",
arguments: { command: "ls" },
},
]);
expect(JSON.stringify(doneEvent)).not.toContain("I should think privately");
}
},
);
});
});
describe("createOllamaStreamFn", () => {
it("normalizes /v1 baseUrl and maps maxTokens + signal", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const signal = new AbortController().signal;
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434/v1/",
options: { maxTokens: 123, signal },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
expect(fetchMock).toHaveBeenCalledTimes(1);
const request = getGuardedFetchCall(fetchMock);
expect(request.url).toBe("http://ollama-host:11434/api/chat");
expect(request.auditContext).toBe("ollama-stream.chat");
expect(request.signal).toBe(signal);
const requestInit = request.init ?? {};
expect(requestInit.signal).toBeUndefined();
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number; num_predict?: number };
};
if (!requestBody.options) {
throw new Error("Expected Ollama request options");
}
expect(requestBody.options?.num_ctx).toBeUndefined();
expect(requestBody.options.num_predict).toBe(123);
},
);
});
it("uses configured params.num_ctx for native Ollama chat options", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: {
params: {
num_ctx: 32768,
temperature: 0.2,
top_p: 0.9,
thinking: false,
streaming: false,
},
contextWindow: 131072,
},
options: { temperature: 0.7, maxTokens: 55 },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean;
options: {
num_ctx?: number;
num_predict?: number;
temperature?: number;
top_p?: number;
streaming?: boolean;
};
};
expect(requestBody.options.num_ctx).toBe(32768);
expect(requestBody.options.num_predict).toBe(55);
expect(requestBody.options.temperature).toBe(0.7);
expect(requestBody.options.top_p).toBe(0.9);
expect(requestBody.options.streaming).toBeUndefined();
expect(requestBody.think).toBe(false);
},
);
});
it("sets top_p=1 for native Ollama greedy sampling requests", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: {
params: {
num_ctx: 4096,
top_p: 0.9,
thinking: false,
},
},
options: { temperature: 0 },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options: {
temperature?: number;
top_p?: number;
};
};
expect(requestBody.options.temperature).toBe(0);
expect(requestBody.options.top_p).toBe(1);
},
);
});
it("sets top_p=1 for native Ollama greedy requests without configured top_p", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: {
params: {
num_ctx: 4096,
thinking: false,
},
},
options: { temperature: 0 },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options: {
temperature?: number;
top_p?: number;
};
};
expect(requestBody.options.temperature).toBe(0);
expect(requestBody.options.top_p).toBe(1);
},
);
});
it("preserves configured top_p for native Ollama non-greedy sampling requests", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: {
params: {
top_p: 0.9,
},
},
options: { temperature: 0.2 },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options: {
temperature?: number;
top_p?: number;
};
};
expect(requestBody.options.temperature).toBe(0.2);
expect(requestBody.options.top_p).toBe(0.9);
},
);
});
it("omits num_ctx when the model has no params.num_ctx and no catalog window", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
// Override the helper default contextWindow back to undefined so the
// request body should leave Ollama's Modelfile to decide num_ctx.
model: { contextWindow: undefined },
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number };
};
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("does not fall back to catalog contextWindow as native Ollama num_ctx", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { contextWindow: 32768 },
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number };
};
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("does not fall back to catalog maxTokens as native Ollama num_ctx", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
// The helper default contextWindow is overridden back to undefined so
// the right side of `model.contextWindow ?? model.maxTokens` is the
// load-bearing branch.
model: { contextWindow: undefined, maxTokens: 65536 },
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options?: { num_ctx?: number };
};
expect(requestBody.options?.num_ctx).toBeUndefined();
},
);
});
it("maps configured native Ollama params.thinking=max to the stable top-level think value", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { params: { thinking: "max" } },
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: string;
options?: { think?: string };
};
expect(requestBody.think).toBe("high");
expect(requestBody.options?.think).toBeUndefined();
},
);
});
it("uses the default loopback policy when baseUrl is empty", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({ baseUrl: "" });
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const request = getGuardedFetchCall(fetchMock);
expect(request.url).toBe("http://127.0.0.1:11434/api/chat");
const policy = requireRecord(request.policy, "ssrf policy");
expect(policy.hostnameAllowlist).toEqual(["127.0.0.1"]);
expect(policy.allowPrivateNetwork).toBe(true);
},
);
});
it("merges default headers and allows request headers to override them", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
defaultHeaders: {
"X-OLLAMA-KEY": "provider-secret",
"X-Trace": "default",
},
options: {
headers: {
"X-Trace": "request",
"X-Request-Only": "1",
},
},
});
const events = await collectStreamEvents(stream);
expect(events.at(-1)?.type).toBe("done");
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
const headers = requireHeaders(requestInit.headers);
expect(headers["Content-Type"]).toBe("application/json");
expect(headers["X-OLLAMA-KEY"]).toBe("provider-secret");
expect(headers["X-Trace"]).toBe("request");
expect(headers["X-Request-Only"]).toBe("1");
},
);
});
it("preserves an explicit Authorization header when apiKey is a local marker", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
defaultHeaders: {
Authorization: "Bearer proxy-token",
},
options: {
apiKey: "ollama-local", // pragma: allowlist secret
headers: {
Authorization: "Bearer proxy-token",
},
},
});
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer proxy-token");
},
);
});
it("allows a real apiKey to override an explicit Authorization header", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const streamFn = createOllamaStreamFn("http://ollama-host:11434", {
Authorization: "Bearer proxy-token",
});
const stream = await Promise.resolve(
streamFn(
{
id: "qwen3:32b",
api: "ollama",
provider: "custom-ollama",
contextWindow: 131072,
} as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{
apiKey: "real-token", // pragma: allowlist secret
} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer real-token");
},
);
});
it("surfaces non-2xx HTTP response as status-prefixed error", async () => {
fetchWithSsrFGuardMock.mockResolvedValue({
response: new Response("Service Unavailable", {
status: 503,
statusText: "Service Unavailable",
}),
release: vi.fn(async () => undefined),
});
try {
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
const events = await collectStreamEvents(stream);
const errorEvent = events.find((e) => e.type === "error") as
| { type: "error"; error: { errorMessage?: string } }
| undefined;
if (!errorEvent) {
throw new Error("expected Ollama stream error event");
}
// The error message must start with the HTTP status code so that
// extractLeadingHttpStatus can parse it for failover/retry logic.
expect(errorEvent.error.errorMessage).toMatch(/^503\b/);
} finally {
fetchWithSsrFGuardMock.mockReset();
}
});
it("keeps thinking chunks when no final content is emitted", async () => {
await expectDoneEventContent(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"reasoned"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":" output"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
],
[{ type: "thinking", thinking: "reasoned output" }],
);
});
it("keeps streamed content after earlier thinking chunks", async () => {
await expectDoneEventContent(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"internal"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"final"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" answer"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
],
[
{ type: "thinking", thinking: "internal" },
{ type: "text", text: "final answer" },
],
);
});
it("keeps reasoning chunks when no final content is emitted", async () => {
await expectDoneEventContent(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
],
[{ type: "thinking", thinking: "reasoned output" }],
);
});
it("keeps streamed content after earlier reasoning chunks", async () => {
await expectDoneEventContent(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"internal"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"final"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" answer"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
],
[
{ type: "thinking", thinking: "internal" },
{ type: "text", text: "final answer" },
],
);
});
});
describe("resolveOllamaBaseUrlForRun", () => {
it("prefers provider baseUrl over model baseUrl", () => {
expect(
resolveOllamaBaseUrlForRun({
modelBaseUrl: "http://model-host:11434",
providerBaseUrl: "http://provider-host:11434",
}),
).toBe("http://provider-host:11434");
});
it("falls back to model baseUrl when provider baseUrl is missing", () => {
expect(
resolveOllamaBaseUrlForRun({
modelBaseUrl: "http://model-host:11434",
}),
).toBe("http://model-host:11434");
});
it("falls back to native default when neither baseUrl is configured", () => {
expect(resolveOllamaBaseUrlForRun({})).toBe("http://127.0.0.1:11434");
});
});
describe("createConfiguredOllamaStreamFn", () => {
it("uses provider-level baseUrl when model baseUrl is absent", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const streamFn = createConfiguredOllamaStreamFn({
model: {
headers: { Authorization: "Bearer proxy-token" },
},
providerBaseUrl: "http://provider-host:11434/v1",
});
const stream = await Promise.resolve(
streamFn(
{
id: "qwen3:32b",
api: "ollama",
provider: "custom-ollama",
contextWindow: 131072,
} as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{
apiKey: "ollama-local", // pragma: allowlist secret
} as never,
),
);
await collectStreamEvents(stream);
const request = getGuardedFetchCall(fetchMock);
expect(request.url).toBe("http://provider-host:11434/api/chat");
const requestInit = request.init ?? {};
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer proxy-token");
},
);
});
});