mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-05 19:22:52 +00:00
2803 lines
100 KiB
TypeScript
2803 lines
100 KiB
TypeScript
import { afterEach, describe, expect, it, vi } from "vitest";
|
||
|
||
const { fetchWithSsrFGuardMock } = vi.hoisted(() => ({
|
||
fetchWithSsrFGuardMock: vi.fn(),
|
||
}));
|
||
|
||
vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||
fetchWithSsrFGuard: fetchWithSsrFGuardMock,
|
||
}));
|
||
|
||
import {
|
||
buildOllamaChatRequest,
|
||
createConfiguredOllamaCompatStreamWrapper,
|
||
createConfiguredOllamaStreamFn,
|
||
createOllamaStreamFn,
|
||
convertToOllamaMessages,
|
||
buildAssistantMessage,
|
||
parseNdjsonStream,
|
||
resolveOllamaBaseUrlForRun,
|
||
} from "./stream.js";
|
||
|
||
type GuardedFetchCall = {
|
||
url: string;
|
||
init?: RequestInit;
|
||
policy?: unknown;
|
||
signal?: AbortSignal;
|
||
timeoutMs?: number;
|
||
auditContext?: string;
|
||
};
|
||
|
||
function requireRecord(value: unknown, label: string): Record<string, unknown> {
|
||
if (!value || typeof value !== "object") {
|
||
throw new Error(`expected ${label}`);
|
||
}
|
||
return value as Record<string, unknown>;
|
||
}
|
||
|
||
function requireHeaders(value: unknown): Record<string, string> {
|
||
return requireRecord(value, "request headers") as Record<string, string>;
|
||
}
|
||
|
||
function expectToolCallContent(
|
||
value: unknown,
|
||
expected: { name: string; arguments: Record<string, unknown> },
|
||
) {
|
||
const content = requireRecord(value, "tool call content");
|
||
expect(content.type).toBe("toolCall");
|
||
expect(content.name).toBe(expected.name);
|
||
expect(content.arguments).toEqual(expected.arguments);
|
||
}
|
||
|
||
function expectIteratorEvent(
|
||
value: unknown,
|
||
expected: { type?: string; delta?: string; content?: string; done: boolean },
|
||
) {
|
||
const result = requireRecord(value, "iterator result");
|
||
expect(result.done).toBe(expected.done);
|
||
if (expected.type !== undefined) {
|
||
const event = requireRecord(result.value, "iterator result value");
|
||
expect(event.type).toBe(expected.type);
|
||
if (expected.delta !== undefined) {
|
||
expect(event.delta).toBe(expected.delta);
|
||
}
|
||
if (expected.content !== undefined) {
|
||
expect(event.content).toBe(expected.content);
|
||
}
|
||
} else {
|
||
expect(result.value).toBeUndefined();
|
||
}
|
||
}
|
||
|
||
afterEach(() => {
|
||
fetchWithSsrFGuardMock.mockReset();
|
||
});
|
||
|
||
describe("buildOllamaChatRequest", () => {
|
||
it("omits tools when none are provided", () => {
|
||
expect(
|
||
buildOllamaChatRequest({
|
||
modelId: "qwen3.5:9b",
|
||
messages: [{ role: "user", content: "hello" }],
|
||
options: { num_ctx: 65536 },
|
||
}),
|
||
).toEqual({
|
||
model: "qwen3.5:9b",
|
||
messages: [{ role: "user", content: "hello" }],
|
||
stream: true,
|
||
options: { num_ctx: 65536 },
|
||
});
|
||
});
|
||
|
||
it("strips the ollama/ prefix from chat model ids", () => {
|
||
const request = buildOllamaChatRequest({
|
||
modelId: "ollama/qwen3:14b-q8_0",
|
||
messages: [{ role: "user", content: "hello" }],
|
||
});
|
||
expect(request.model).toBe("qwen3:14b-q8_0");
|
||
});
|
||
|
||
it("strips the active custom provider prefix from chat model ids", () => {
|
||
const request = buildOllamaChatRequest({
|
||
modelId: "ollama-spark/qwen3:32b",
|
||
providerId: "ollama-spark",
|
||
messages: [{ role: "user", content: "hello" }],
|
||
});
|
||
expect(request.model).toBe("qwen3:32b");
|
||
});
|
||
|
||
it("keeps unrelated slash-containing Ollama model ids intact", () => {
|
||
const request = buildOllamaChatRequest({
|
||
modelId: "library/qwen3:32b",
|
||
providerId: "ollama-spark",
|
||
messages: [{ role: "user", content: "hello" }],
|
||
});
|
||
expect(request.model).toBe("library/qwen3:32b");
|
||
});
|
||
});
|
||
|
||
describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||
it("adds Moonshot thinking config for Ollama cloud Kimi compat requests", async () => {
|
||
let patchedPayload: Record<string, unknown> | undefined;
|
||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||
options?.onPayload?.({ tool_choice: "auto" });
|
||
return (async function* () {})();
|
||
});
|
||
const model = {
|
||
api: "openai-completions",
|
||
provider: "ollama",
|
||
id: "kimi-k2.5:cloud",
|
||
contextWindow: 262144,
|
||
params: { num_ctx: 65536 },
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "kimi-k2.5:cloud",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "high",
|
||
extraParams: {},
|
||
} as never);
|
||
|
||
await wrapped?.(
|
||
model as never,
|
||
{ messages: [] } as never,
|
||
{
|
||
onPayload: (payload: unknown) => {
|
||
patchedPayload = payload as Record<string, unknown>;
|
||
},
|
||
} as never,
|
||
);
|
||
|
||
const payload = requireRecord(patchedPayload, "patched payload");
|
||
expect(payload.thinking).toEqual({ type: "enabled" });
|
||
expect(payload.options).toEqual({ num_ctx: 65536 });
|
||
});
|
||
|
||
it("falls back to contextWindow when configured num_ctx is invalid", async () => {
|
||
let patchedPayload: Record<string, unknown> | undefined;
|
||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||
options?.onPayload?.({});
|
||
return (async function* () {})();
|
||
});
|
||
const model = {
|
||
api: "openai-completions",
|
||
provider: "ollama",
|
||
id: "qwen3:32b",
|
||
contextWindow: 131072,
|
||
params: { num_ctx: 0 },
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "qwen3:32b",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
} as never);
|
||
|
||
await wrapped?.(
|
||
model as never,
|
||
{ messages: [] } as never,
|
||
{
|
||
onPayload: (payload: unknown) => {
|
||
patchedPayload = payload as Record<string, unknown>;
|
||
},
|
||
} as never,
|
||
);
|
||
|
||
const payload = requireRecord(patchedPayload, "patched payload");
|
||
expect(payload.options).toEqual({ num_ctx: 131072 });
|
||
});
|
||
|
||
it("forwards think=false on native Ollama chat requests when thinking is off", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "qwen3:32b",
|
||
contextWindow: 131072,
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "qwen3:32b",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "off",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: boolean;
|
||
options?: { think?: boolean; num_ctx?: number };
|
||
};
|
||
expect(requestBody.think).toBe(false);
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not overwrite configured native Ollama params.thinking with implicit off", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "qwen3:32b",
|
||
contextWindow: 131072,
|
||
params: { thinking: "medium" },
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "qwen3:32b",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "off",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as { think?: string };
|
||
expect(requestBody.think).toBe("medium");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not forward truthy configured native Ollama thinking for non-reasoning models", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "llama3.2:latest",
|
||
contextWindow: 8192,
|
||
reasoning: false,
|
||
params: { thinking: "medium" },
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "llama3.2:latest",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "off",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: string;
|
||
options?: { think?: string };
|
||
};
|
||
expect(requestBody.think).toBeUndefined();
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not forward runtime native Ollama thinking for non-reasoning models", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "llama3.2:latest",
|
||
contextWindow: 8192,
|
||
reasoning: false,
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "llama3.2:latest",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "low",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: string;
|
||
options?: { think?: string };
|
||
};
|
||
expect(requestBody.think).toBeUndefined();
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "qwen3:32b",
|
||
contextWindow: 131072,
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "qwen3:32b",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "low",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: boolean | string;
|
||
options?: { think?: boolean | string; num_ctx?: number };
|
||
};
|
||
expect(requestBody.think).toBe("low");
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("passes resolved provider request timeouts to native Ollama chat fetches", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { requestTimeoutMs: 450_000 },
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
expect(getGuardedFetchCall(fetchMock).timeoutMs).toBe(450_000);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("passes caller abort signals at guard level when a timeout is present", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const signal = new AbortController().signal;
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
options: { signal, timeoutMs: 123_456 },
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const request = getGuardedFetchCall(fetchMock);
|
||
expect(request.timeoutMs).toBe(123_456);
|
||
expect(request.signal).toBe(signal);
|
||
expect(request.init?.signal).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("maps native Ollama max thinking to think=high on the wire", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "gpt-oss:20b",
|
||
contextWindow: 131072,
|
||
};
|
||
|
||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||
provider: "ollama",
|
||
modelId: "gpt-oss:20b",
|
||
model,
|
||
streamFn: baseStreamFn,
|
||
thinkingLevel: "max",
|
||
} as never);
|
||
if (!wrapped) {
|
||
throw new Error("Expected wrapped Ollama stream function");
|
||
}
|
||
|
||
const stream = await Promise.resolve(
|
||
wrapped(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: boolean | string;
|
||
options?: { think?: boolean | string; num_ctx?: number };
|
||
};
|
||
expect(requestBody.think).toBe("high");
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("sends custom-provider Ollama chat requests with the bare Ollama model id", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama-spark",
|
||
id: "ollama-spark/qwen3:32b",
|
||
contextWindow: 131072,
|
||
};
|
||
|
||
const stream = await Promise.resolve(
|
||
streamFn(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as { model?: string };
|
||
expect(requestBody.model).toBe("qwen3:32b");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("adds direct type hints to native Ollama tool schemas before sending them", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const model = {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "qwen3:32b",
|
||
contextWindow: 131072,
|
||
};
|
||
|
||
const stream = await Promise.resolve(
|
||
streamFn(
|
||
model as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
tools: [
|
||
{
|
||
name: "search",
|
||
description: "search",
|
||
parameters: {
|
||
properties: {
|
||
query: {
|
||
anyOf: [{ type: "string" }, { type: "null" }],
|
||
},
|
||
tags: {
|
||
items: { type: "string" },
|
||
},
|
||
},
|
||
required: ["query"],
|
||
},
|
||
},
|
||
],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
tools?: Array<{
|
||
function?: {
|
||
parameters?: {
|
||
type?: string;
|
||
properties?: Record<string, { type?: string }>;
|
||
};
|
||
};
|
||
}>;
|
||
};
|
||
const parameters = requestBody.tools?.[0]?.function?.parameters;
|
||
expect(parameters?.type).toBe("object");
|
||
expect(parameters?.properties?.query?.type).toBe("string");
|
||
expect(parameters?.properties?.tags?.type).toBe("array");
|
||
},
|
||
);
|
||
});
|
||
});
|
||
|
||
describe("convertToOllamaMessages", () => {
|
||
it("converts user text messages", () => {
|
||
const messages = [{ role: "user", content: "hello" }];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "user", content: "hello" }]);
|
||
});
|
||
|
||
it("converts user messages with content parts", () => {
|
||
const messages = [
|
||
{
|
||
role: "user",
|
||
content: [
|
||
{ type: "text", text: "describe this" },
|
||
{ type: "image", data: "base64data" },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "user", content: "describe this", images: ["base64data"] }]);
|
||
});
|
||
|
||
it("prepends system message when provided", () => {
|
||
const messages = [{ role: "user", content: "hello" }];
|
||
const result = convertToOllamaMessages(messages, "You are helpful.");
|
||
expect(result[0]).toEqual({ role: "system", content: "You are helpful." });
|
||
expect(result[1]).toEqual({ role: "user", content: "hello" });
|
||
});
|
||
|
||
it("converts assistant messages with toolCall content blocks", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "text", text: "Let me check." },
|
||
{ type: "toolCall", id: "call_1", name: "bash", arguments: { command: "ls" } },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].role).toBe("assistant");
|
||
expect(result[0].content).toBe("Let me check.");
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_1", function: { name: "bash", arguments: { command: "ls" } } },
|
||
]);
|
||
});
|
||
|
||
it("preserves assistant tool-call ids before Ollama replay", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{
|
||
type: "toolCall",
|
||
id: "fc_ollama_123",
|
||
name: "bash",
|
||
arguments: { command: "pwd" },
|
||
},
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "fc_ollama_123", function: { name: "bash", arguments: { command: "pwd" } } },
|
||
]);
|
||
});
|
||
|
||
it("normalizes provider-prefixed tool-call names before Ollama replay", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "toolCall", id: "call_1", name: "functions.exec", arguments: { command: "pwd" } },
|
||
{ type: "tool_use", id: "call_2", name: "tools/read", input: { path: "README.md" } },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_1", function: { name: "exec", arguments: { command: "pwd" } } },
|
||
{ id: "call_2", function: { name: "read", arguments: { path: "README.md" } } },
|
||
]);
|
||
});
|
||
|
||
it("preserves exact allowlisted tool-prefix names before Ollama replay", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "toolCall", id: "call_1", name: "tool_a", arguments: { value: 1 } },
|
||
{ type: "tool_use", id: "call_2", name: "tools_invoke_test", input: { value: 2 } },
|
||
{ type: "toolCall", id: "call_3", name: "function-run", arguments: { value: 3 } },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages, undefined, {
|
||
availableToolNames: new Set(["tool_a", "tools_invoke_test", "function-run"]),
|
||
});
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_1", function: { name: "tool_a", arguments: { value: 1 } } },
|
||
{ id: "call_2", function: { name: "tools_invoke_test", arguments: { value: 2 } } },
|
||
{ id: "call_3", function: { name: "function-run", arguments: { value: 3 } } },
|
||
]);
|
||
});
|
||
|
||
it("strips underscore and dash provider prefixes only when the suffix is allowlisted", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "toolCall", id: "call_1", name: "tools_exec", arguments: { command: "pwd" } },
|
||
{ type: "tool_use", id: "call_2", name: "function-read", input: { path: "." } },
|
||
{ type: "toolCall", id: "call_3", name: "tool_missing", arguments: {} },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages, undefined, {
|
||
availableToolNames: new Set(["exec", "read"]),
|
||
});
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_1", function: { name: "exec", arguments: { command: "pwd" } } },
|
||
{ id: "call_2", function: { name: "read", arguments: { path: "." } } },
|
||
{ id: "call_3", function: { name: "tool_missing", arguments: {} } },
|
||
]);
|
||
});
|
||
|
||
it("keeps non-prefixed Ollama replay tool names intact", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "toolCall", id: "call_1", name: "functionshell", arguments: {} },
|
||
{ type: "toolCall", id: "call_2", name: "tooling", arguments: {} },
|
||
{ type: "toolCall", id: "call_3", name: "tools", arguments: {} },
|
||
{ type: "toolCall", id: "call_4", name: "tool_a", arguments: {} },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_1", function: { name: "functionshell", arguments: {} } },
|
||
{ id: "call_2", function: { name: "tooling", arguments: {} } },
|
||
{ id: "call_3", function: { name: "tools", arguments: {} } },
|
||
{ id: "call_4", function: { name: "tool_a", arguments: {} } },
|
||
]);
|
||
});
|
||
|
||
it("deserializes string arguments back to objects for Ollama (round-trip fix)", () => {
|
||
// When tool calls round-trip through OpenAI-format storage, arguments
|
||
// are serialized as a JSON string. Ollama expects an object.
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{
|
||
type: "toolCall",
|
||
id: "call_2",
|
||
name: "Read",
|
||
arguments: '{"file_path":"/tmp/test.txt"}',
|
||
},
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "call_2", function: { name: "Read", arguments: { file_path: "/tmp/test.txt" } } },
|
||
]);
|
||
});
|
||
|
||
it("handles tool_use blocks with string input (Anthropic format round-trip)", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{ type: "tool_use", id: "toolu_1", name: "exec", input: '{"command":"echo hello"}' },
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{ id: "toolu_1", function: { name: "exec", arguments: { command: "echo hello" } } },
|
||
]);
|
||
});
|
||
|
||
it("preserves unsafe integers as strings when replay args are deserialized", () => {
|
||
const messages = [
|
||
{
|
||
role: "assistant",
|
||
content: [
|
||
{
|
||
type: "toolCall",
|
||
id: "call_3",
|
||
name: "read",
|
||
arguments: '{"path":9223372036854775807,"nested":{"thread":1234567890123456789}}',
|
||
},
|
||
],
|
||
},
|
||
];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result[0].tool_calls).toEqual([
|
||
{
|
||
id: "call_3",
|
||
function: {
|
||
name: "read",
|
||
arguments: {
|
||
path: "9223372036854775807",
|
||
nested: { thread: "1234567890123456789" },
|
||
},
|
||
},
|
||
},
|
||
]);
|
||
});
|
||
it("converts tool result messages with 'tool' role", () => {
|
||
const messages = [{ role: "tool", content: "file1.txt\nfile2.txt" }];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "tool", content: "file1.txt\nfile2.txt" }]);
|
||
});
|
||
|
||
it("converts SDK 'toolResult' role to Ollama 'tool' role", () => {
|
||
const messages = [{ role: "toolResult", content: "command output here" }];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "tool", content: "command output here" }]);
|
||
});
|
||
|
||
it("includes tool_name from SDK toolResult messages", () => {
|
||
const messages = [{ role: "toolResult", content: "file contents here", toolName: "read" }];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "tool", content: "file contents here", tool_name: "read" }]);
|
||
});
|
||
|
||
it("omits tool_name when not provided in toolResult", () => {
|
||
const messages = [{ role: "toolResult", content: "output" }];
|
||
const result = convertToOllamaMessages(messages);
|
||
expect(result).toEqual([{ role: "tool", content: "output" }]);
|
||
expect(result[0]).not.toHaveProperty("tool_name");
|
||
});
|
||
|
||
it("handles empty messages array", () => {
|
||
const result = convertToOllamaMessages([]);
|
||
expect(result).toStrictEqual([]);
|
||
});
|
||
});
|
||
|
||
describe("buildAssistantMessage", () => {
|
||
const modelInfo = { api: "ollama", provider: "ollama", id: "qwen3:32b" };
|
||
|
||
it("builds text-only response", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: { role: "assistant" as const, content: "Hello!" },
|
||
done: true,
|
||
prompt_eval_count: 10,
|
||
eval_count: 5,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.role).toBe("assistant");
|
||
expect(result.content).toEqual([{ type: "text", text: "Hello!" }]);
|
||
expect(result.stopReason).toBe("stop");
|
||
expect(result.usage.input).toBe(10);
|
||
expect(result.usage.output).toBe(5);
|
||
expect(result.usage.totalTokens).toBe(15);
|
||
});
|
||
|
||
it("keeps thinking-only output when content is empty", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
thinking: "Thinking output",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.stopReason).toBe("stop");
|
||
expect(result.content).toEqual([{ type: "thinking", thinking: "Thinking output" }]);
|
||
});
|
||
|
||
it("keeps reasoning-only output when content and thinking are empty", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
reasoning: "Reasoning output",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.stopReason).toBe("stop");
|
||
expect(result.content).toEqual([{ type: "thinking", thinking: "Reasoning output" }]);
|
||
});
|
||
|
||
it("strips inline reasoning prefix from kimi cloud visible text", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️Final answer only.",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
});
|
||
|
||
it("strips inline reasoning for provider-qualified Kimi cloud refs", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️Final answer only.",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "ollama/kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
});
|
||
|
||
it("strips inline reasoning when the Kimi boundary is followed by whitespace", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️ Final answer only.",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
});
|
||
|
||
it("strips inline reasoning before short Kimi cloud answers", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️ OK.",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([{ type: "text", text: "OK." }]);
|
||
});
|
||
|
||
it("strips inline reasoning when the Kimi boundary has no visible answer", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️ ",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([]);
|
||
});
|
||
|
||
it("does not strip inline boundary marker on non-kimi models", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "intro ️keep this intact",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.content).toEqual([{ type: "text", text: "intro ️keep this intact" }]);
|
||
});
|
||
|
||
it("does not treat emoji variation selectors as Kimi inline-reasoning boundaries", () => {
|
||
const response = {
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content:
|
||
"This is a normal Kimi cloud answer with enough length to cross the prefix threshold and no hidden reasoning leak. ☀️sunshine should remain visible to the user.",
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, {
|
||
api: "ollama",
|
||
provider: "ollama",
|
||
id: "kimi-k2.6:cloud",
|
||
});
|
||
expect(result.content).toEqual([
|
||
{
|
||
type: "text",
|
||
text: "This is a normal Kimi cloud answer with enough length to cross the prefix threshold and no hidden reasoning leak. ☀️sunshine should remain visible to the user.",
|
||
},
|
||
]);
|
||
});
|
||
|
||
it("estimates usage when Ollama omits eval counters", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: { role: "assistant" as const, content: "Estimated output" },
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo, { input: 11, output: 4 });
|
||
expect(result.usage.input).toBe(11);
|
||
expect(result.usage.output).toBe(4);
|
||
expect(result.usage.totalTokens).toBe(15);
|
||
});
|
||
|
||
it("preserves explicit zero usage counters from Ollama", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: { role: "assistant" as const, content: "" },
|
||
done: true,
|
||
prompt_eval_count: 0,
|
||
eval_count: 0,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo, { input: 11, output: 4 });
|
||
expect(result.usage.input).toBe(0);
|
||
expect(result.usage.output).toBe(0);
|
||
expect(result.usage.totalTokens).toBe(0);
|
||
});
|
||
|
||
it("builds response with tool calls", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [{ function: { name: "bash", arguments: { command: "ls -la" } } }],
|
||
},
|
||
done: true,
|
||
prompt_eval_count: 20,
|
||
eval_count: 10,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.stopReason).toBe("toolUse");
|
||
expect(result.content.length).toBe(1); // toolCall only (empty content is skipped)
|
||
expect(result.content[0].type).toBe("toolCall");
|
||
const toolCall = result.content[0] as {
|
||
type: "toolCall";
|
||
id: string;
|
||
name: string;
|
||
arguments: Record<string, unknown>;
|
||
};
|
||
expect(toolCall.name).toBe("bash");
|
||
expect(toolCall.arguments).toEqual({ command: "ls -la" });
|
||
expect(toolCall.id).toMatch(/^ollama_call_[0-9a-f-]{36}$/);
|
||
});
|
||
|
||
it("preserves Ollama response tool-call ids", () => {
|
||
const response = {
|
||
model: "gemini-3-flash-preview:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{ id: "fc_ollama_real_1", function: { name: "bash", arguments: { command: "pwd" } } },
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expectToolCallContent(result.content[0], { name: "bash", arguments: { command: "pwd" } });
|
||
expect((result.content[0] as { id?: string }).id).toBe("fc_ollama_real_1");
|
||
});
|
||
|
||
it("preserves parallel Ollama response tool-call ids independently", () => {
|
||
const response = {
|
||
model: "gemini-3-flash-preview:cloud",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{ id: "fc_ollama_real_1", function: { name: "read", arguments: { path: "a.txt" } } },
|
||
{ id: "fc_ollama_real_2", function: { name: "exec", arguments: { command: "date" } } },
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.content.map((part) => (part as { id?: string }).id)).toEqual([
|
||
"fc_ollama_real_1",
|
||
"fc_ollama_real_2",
|
||
]);
|
||
expectToolCallContent(result.content[0], { name: "read", arguments: { path: "a.txt" } });
|
||
expectToolCallContent(result.content[1], { name: "exec", arguments: { command: "date" } });
|
||
});
|
||
|
||
it("normalizes provider-prefixed tool-call names in Ollama responses", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{ function: { name: "functions.exec", arguments: { command: "pwd" } } },
|
||
{ function: { name: "tools/read", arguments: { path: "README.md" } } },
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.content).toHaveLength(2);
|
||
expectToolCallContent(result.content[0], { name: "exec", arguments: { command: "pwd" } });
|
||
expectToolCallContent(result.content[1], { name: "read", arguments: { path: "README.md" } });
|
||
});
|
||
|
||
it("preserves exact allowlisted tool-prefix names in Ollama responses", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{ function: { name: "tool_a", arguments: { value: 1 } } },
|
||
{ function: { name: "tools_invoke_test", arguments: { value: 2 } } },
|
||
{ function: { name: "function-run", arguments: { value: 3 } } },
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo, undefined, {
|
||
availableToolNames: new Set(["tool_a", "tools_invoke_test", "function-run"]),
|
||
});
|
||
expect(result.content).toHaveLength(3);
|
||
expectToolCallContent(result.content[0], { name: "tool_a", arguments: { value: 1 } });
|
||
expectToolCallContent(result.content[1], {
|
||
name: "tools_invoke_test",
|
||
arguments: { value: 2 },
|
||
});
|
||
expectToolCallContent(result.content[2], { name: "function-run", arguments: { value: 3 } });
|
||
});
|
||
|
||
it("keeps non-prefixed Ollama response tool names intact", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{ function: { name: "functionshell", arguments: {} } },
|
||
{ function: { name: "tooling", arguments: {} } },
|
||
{ function: { name: "tools", arguments: {} } },
|
||
{ function: { name: "tool_a", arguments: {} } },
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.content).toHaveLength(4);
|
||
expectToolCallContent(result.content[0], { name: "functionshell", arguments: {} });
|
||
expectToolCallContent(result.content[1], { name: "tooling", arguments: {} });
|
||
expectToolCallContent(result.content[2], { name: "tools", arguments: {} });
|
||
expectToolCallContent(result.content[3], { name: "tool_a", arguments: {} });
|
||
});
|
||
|
||
it("parses stringified tool call arguments from Ollama responses", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [{ function: { name: "bash", arguments: '{"command":"ls","path":"/tmp"}' } }],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expectToolCallContent(result.content[0], {
|
||
name: "bash",
|
||
arguments: { command: "ls", path: "/tmp" },
|
||
});
|
||
});
|
||
|
||
it("preserves unsafe integers in stringified tool call arguments", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [
|
||
{
|
||
function: {
|
||
name: "send",
|
||
arguments: '{"target":9223372036854775807,"nested":{"thread":1234567890123456789}}',
|
||
},
|
||
},
|
||
],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expectToolCallContent(result.content[0], {
|
||
name: "send",
|
||
arguments: {
|
||
target: "9223372036854775807",
|
||
nested: { thread: "1234567890123456789" },
|
||
},
|
||
});
|
||
});
|
||
|
||
it("falls back to empty arguments for malformed stringified tool call arguments", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: {
|
||
role: "assistant" as const,
|
||
content: "",
|
||
tool_calls: [{ function: { name: "bash", arguments: '{"command":"ls"' } }],
|
||
},
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expectToolCallContent(result.content[0], { name: "bash", arguments: {} });
|
||
});
|
||
|
||
it("sets all costs to zero for local models", () => {
|
||
const response = {
|
||
model: "qwen3:32b",
|
||
created_at: "2026-01-01T00:00:00Z",
|
||
message: { role: "assistant" as const, content: "ok" },
|
||
done: true,
|
||
};
|
||
const result = buildAssistantMessage(response, modelInfo);
|
||
expect(result.usage.cost).toEqual({
|
||
input: 0,
|
||
output: 0,
|
||
cacheRead: 0,
|
||
cacheWrite: 0,
|
||
total: 0,
|
||
});
|
||
});
|
||
});
|
||
|
||
// Helper: build a ReadableStreamDefaultReader from NDJSON lines
|
||
function mockNdjsonReader(lines: string[]): ReadableStreamDefaultReader<Uint8Array> {
|
||
const encoder = new TextEncoder();
|
||
const payload = lines.join("\n") + "\n";
|
||
let consumed = false;
|
||
return {
|
||
read: async () => {
|
||
if (consumed) {
|
||
return { done: true as const, value: undefined };
|
||
}
|
||
consumed = true;
|
||
return { done: false as const, value: encoder.encode(payload) };
|
||
},
|
||
releaseLock: () => {},
|
||
cancel: async () => {},
|
||
closed: Promise.resolve(undefined),
|
||
} as unknown as ReadableStreamDefaultReader<Uint8Array>;
|
||
}
|
||
|
||
async function expectDoneEventContent(lines: string[], expectedContent: unknown) {
|
||
await withMockNdjsonFetch(lines, async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const doneEvent = events.at(-1);
|
||
if (!doneEvent || doneEvent.type !== "done") {
|
||
throw new Error("Expected done event");
|
||
}
|
||
|
||
expect(doneEvent.message.content).toEqual(expectedContent);
|
||
});
|
||
}
|
||
|
||
describe("parseNdjsonStream", () => {
|
||
it("parses text-only streaming chunks", async () => {
|
||
const reader = mockNdjsonReader([
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Hello"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" world"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":5,"eval_count":2}',
|
||
]);
|
||
const chunks = [];
|
||
for await (const chunk of parseNdjsonStream(reader)) {
|
||
chunks.push(chunk);
|
||
}
|
||
expect(chunks).toHaveLength(3);
|
||
expect(chunks[0].message.content).toBe("Hello");
|
||
expect(chunks[1].message.content).toBe(" world");
|
||
expect(chunks[2].done).toBe(true);
|
||
});
|
||
|
||
it("parses tool_calls from intermediate chunk (not final)", async () => {
|
||
// Ollama sends tool_calls in done:false chunk, final done:true has no tool_calls
|
||
const reader = mockNdjsonReader([
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
|
||
]);
|
||
const chunks = [];
|
||
for await (const chunk of parseNdjsonStream(reader)) {
|
||
chunks.push(chunk);
|
||
}
|
||
expect(chunks).toHaveLength(2);
|
||
expect(chunks[0].done).toBe(false);
|
||
expect(chunks[0].message.tool_calls).toHaveLength(1);
|
||
expect(chunks[0].message.tool_calls![0].function.name).toBe("bash");
|
||
expect(chunks[1].done).toBe(true);
|
||
expect(chunks[1].message.tool_calls).toBeUndefined();
|
||
});
|
||
|
||
it("accumulates tool_calls across multiple intermediate chunks", async () => {
|
||
const reader = mockNdjsonReader([
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"read","arguments":{"path":"/tmp/a"}}}]},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
|
||
]);
|
||
|
||
// Simulate the accumulation logic from createOllamaStreamFn
|
||
const accumulatedToolCalls: Array<{
|
||
function: { name: string; arguments: unknown };
|
||
}> = [];
|
||
const chunks = [];
|
||
for await (const chunk of parseNdjsonStream(reader)) {
|
||
chunks.push(chunk);
|
||
if (chunk.message?.tool_calls) {
|
||
accumulatedToolCalls.push(...chunk.message.tool_calls);
|
||
}
|
||
}
|
||
expect(accumulatedToolCalls).toHaveLength(2);
|
||
expect(accumulatedToolCalls[0].function.name).toBe("read");
|
||
expect(accumulatedToolCalls[1].function.name).toBe("bash");
|
||
// Final done:true chunk has no tool_calls
|
||
expect(chunks[2].message.tool_calls).toBeUndefined();
|
||
});
|
||
|
||
it("preserves unsafe integer tool arguments as exact strings", async () => {
|
||
const reader = mockNdjsonReader([
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"send","arguments":{"target":1234567890123456789,"nested":{"thread":9223372036854775807}}}}]},"done":false}',
|
||
]);
|
||
|
||
const chunks = [];
|
||
for await (const chunk of parseNdjsonStream(reader)) {
|
||
chunks.push(chunk);
|
||
}
|
||
|
||
const args = chunks[0]?.message.tool_calls?.[0]?.function.arguments as
|
||
| { target?: unknown; nested?: { thread?: unknown } }
|
||
| undefined;
|
||
expect(args?.target).toBe("1234567890123456789");
|
||
expect(args?.nested?.thread).toBe("9223372036854775807");
|
||
});
|
||
|
||
it("keeps safe integer tool arguments as numbers", async () => {
|
||
const reader = mockNdjsonReader([
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"send","arguments":{"retries":3,"delayMs":2500}}}]},"done":false}',
|
||
]);
|
||
|
||
const chunks = [];
|
||
for await (const chunk of parseNdjsonStream(reader)) {
|
||
chunks.push(chunk);
|
||
}
|
||
|
||
const args = chunks[0]?.message.tool_calls?.[0]?.function.arguments as
|
||
| { retries?: unknown; delayMs?: unknown }
|
||
| undefined;
|
||
expect(args?.retries).toBe(3);
|
||
expect(args?.delayMs).toBe(2500);
|
||
});
|
||
});
|
||
|
||
async function withMockNdjsonFetch(
|
||
lines: string[],
|
||
run: (fetchMock: typeof fetchWithSsrFGuardMock) => Promise<void>,
|
||
): Promise<void> {
|
||
fetchWithSsrFGuardMock.mockImplementation(async () => {
|
||
const payload = lines.join("\n");
|
||
return {
|
||
response: new Response(`${payload}\n`, {
|
||
status: 200,
|
||
headers: { "Content-Type": "application/x-ndjson" },
|
||
}),
|
||
release: vi.fn(async () => undefined),
|
||
};
|
||
});
|
||
await run(fetchWithSsrFGuardMock);
|
||
}
|
||
|
||
function createControlledNdjsonFetch(): {
|
||
fetchImpl: () => Promise<{ response: Response; release: () => Promise<void> }>;
|
||
pushLine: (line: string) => void;
|
||
close: () => void;
|
||
} {
|
||
const encoder = new TextEncoder();
|
||
let controller: ReadableStreamDefaultController<Uint8Array> | undefined;
|
||
const body = new ReadableStream<Uint8Array>({
|
||
start(streamController) {
|
||
controller = streamController;
|
||
},
|
||
});
|
||
return {
|
||
fetchImpl: async () => ({
|
||
response: new Response(body, {
|
||
status: 200,
|
||
headers: { "Content-Type": "application/x-ndjson" },
|
||
}),
|
||
release: vi.fn(async () => undefined),
|
||
}),
|
||
pushLine(line: string) {
|
||
if (!controller) {
|
||
throw new Error("NDJSON controller not initialized");
|
||
}
|
||
controller.enqueue(encoder.encode(`${line}\n`));
|
||
},
|
||
close() {
|
||
if (!controller) {
|
||
throw new Error("NDJSON controller not initialized");
|
||
}
|
||
controller.close();
|
||
},
|
||
};
|
||
}
|
||
|
||
function getGuardedFetchCall(fetchMock: typeof fetchWithSsrFGuardMock): GuardedFetchCall {
|
||
return (fetchMock.mock.calls.at(0)?.[0] as GuardedFetchCall | undefined) ?? { url: "" };
|
||
}
|
||
|
||
async function createOllamaTestStream(params: {
|
||
baseUrl: string;
|
||
defaultHeaders?: Record<string, string>;
|
||
model?: Record<string, unknown>;
|
||
options?: {
|
||
apiKey?: string;
|
||
maxTokens?: number;
|
||
temperature?: number;
|
||
signal?: AbortSignal;
|
||
timeoutMs?: number;
|
||
headers?: Record<string, string>;
|
||
};
|
||
}) {
|
||
const streamFn = createOllamaStreamFn(params.baseUrl, params.defaultHeaders);
|
||
return streamFn(
|
||
{
|
||
id: "qwen3:32b",
|
||
api: "ollama",
|
||
provider: "custom-ollama",
|
||
contextWindow: 131072,
|
||
...params.model,
|
||
} as unknown as Parameters<typeof streamFn>[0],
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as unknown as Parameters<typeof streamFn>[1],
|
||
(params.options ?? {}) as unknown as Parameters<typeof streamFn>[2],
|
||
);
|
||
}
|
||
|
||
async function collectStreamEvents<T>(stream: AsyncIterable<T>): Promise<T[]> {
|
||
const events: T[] = [];
|
||
for await (const event of stream) {
|
||
events.push(event);
|
||
}
|
||
return events;
|
||
}
|
||
|
||
async function nextEventWithin<T>(
|
||
iterator: AsyncIterator<T>,
|
||
timeoutMs = 100,
|
||
): Promise<IteratorResult<T> | "timeout"> {
|
||
let timer: NodeJS.Timeout | undefined;
|
||
try {
|
||
return await Promise.race([
|
||
iterator.next(),
|
||
new Promise<"timeout">((resolve) => {
|
||
timer = setTimeout(() => resolve("timeout"), timeoutMs);
|
||
}),
|
||
]);
|
||
} finally {
|
||
if (timer) {
|
||
clearTimeout(timer);
|
||
}
|
||
}
|
||
}
|
||
|
||
describe("createOllamaStreamFn streaming events", () => {
|
||
it("emits start, text_start, text_delta, text_end, done for text responses", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Hello"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" world"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":5,"eval_count":2}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual([
|
||
"start",
|
||
"text_start",
|
||
"text_delta",
|
||
"text_delta",
|
||
"text_end",
|
||
"done",
|
||
]);
|
||
|
||
// text_delta events carry incremental deltas
|
||
const deltas = events.filter((e) => e.type === "text_delta");
|
||
expect(deltas[0]?.contentIndex).toBe(0);
|
||
expect(deltas[0]?.delta).toBe("Hello");
|
||
expect(deltas[1]?.contentIndex).toBe(0);
|
||
expect(deltas[1]?.delta).toBe(" world");
|
||
|
||
// text_end carries the full accumulated content
|
||
const textEnd = events.find((e) => e.type === "text_end");
|
||
expect(textEnd?.contentIndex).toBe(0);
|
||
expect(textEnd?.content).toBe("Hello world");
|
||
|
||
// start/text_start carry empty partials (before any content accumulates)
|
||
const startEvent = events.find((e) => e.type === "start");
|
||
expect(startEvent?.partial.content).toStrictEqual([]);
|
||
const textStartEvent = events.find((e) => e.type === "text_start");
|
||
expect(textStartEvent?.partial.content).toStrictEqual([]);
|
||
|
||
// text_delta partials accumulate content progressively
|
||
expect(deltas[0].partial.content).toEqual([{ type: "text", text: "Hello" }]);
|
||
expect(deltas[1].partial.content).toEqual([{ type: "text", text: "Hello world" }]);
|
||
|
||
// done event contains the final message
|
||
const doneEvent = events.at(-1);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.content).toEqual([{ type: "text", text: "Hello world" }]);
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("emits only done for tool-call-only responses (no text content)", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
// No text content means no start/text_start/text_delta/text_end events
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual(["done"]);
|
||
const doneEvent = events[0];
|
||
if (doneEvent.type === "done") {
|
||
expect(doneEvent.reason).toBe("toolUse");
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("estimates usage when the final Ollama chunk omits counters", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Estimated answer"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const doneEvent = events.at(-1);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.usage.input).toBeGreaterThan(0);
|
||
expect(doneEvent.message.usage.output).toBeGreaterThan(0);
|
||
expect(doneEvent.message.usage.totalTokens).toBeGreaterThan(0);
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("counts image payloads in prompt usage estimates when Ollama omits counters", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"vision answer"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true}',
|
||
],
|
||
async () => {
|
||
const streamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||
const stream = await Promise.resolve(
|
||
streamFn(
|
||
{
|
||
id: "llava",
|
||
api: "ollama",
|
||
provider: "custom-ollama",
|
||
contextWindow: 131072,
|
||
} as never,
|
||
{
|
||
messages: [
|
||
{
|
||
role: "user",
|
||
content: [{ type: "image", data: "a".repeat(400) }],
|
||
},
|
||
],
|
||
} as never,
|
||
{} as never,
|
||
),
|
||
);
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const doneEvent = events.at(-1);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.usage.input).toBeGreaterThan(50);
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("emits text streaming events before done for mixed text + tool responses", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Let me check."},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual(["start", "text_start", "text_delta", "text_end", "done"]);
|
||
const doneEvent = events.at(-1);
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.reason).toBe("toolUse");
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("emits text_end as soon as Ollama switches from text to tool calls", async () => {
|
||
const controlledFetch = createControlledNdjsonFetch();
|
||
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
|
||
|
||
try {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const iterator = stream[Symbol.asyncIterator]();
|
||
|
||
controlledFetch.pushLine(
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"Let me check."},"done":false}',
|
||
);
|
||
|
||
const startEvent = await nextEventWithin(iterator);
|
||
const textStartEvent = await nextEventWithin(iterator);
|
||
const textDeltaEvent = await nextEventWithin(iterator);
|
||
|
||
expect(startEvent).not.toBe("timeout");
|
||
expect(textStartEvent).not.toBe("timeout");
|
||
expect(textDeltaEvent).not.toBe("timeout");
|
||
expectIteratorEvent(startEvent, { type: "start", done: false });
|
||
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
|
||
expectIteratorEvent(textDeltaEvent, {
|
||
type: "text_delta",
|
||
delta: "Let me check.",
|
||
done: false,
|
||
});
|
||
|
||
controlledFetch.pushLine(
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"bash","arguments":{"command":"ls"}}}]},"done":false}',
|
||
);
|
||
|
||
const textEndEvent = await nextEventWithin(iterator);
|
||
expect(textEndEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textEndEvent, {
|
||
type: "text_end",
|
||
content: "Let me check.",
|
||
done: false,
|
||
});
|
||
if (textEndEvent !== "timeout") {
|
||
const textEndValue = requireRecord(textEndEvent.value, "text_end value");
|
||
expect(textEndValue.contentIndex).toBe(0);
|
||
expect(requireRecord(textEndValue.partial, "text_end partial").content).toEqual([
|
||
{ type: "text", text: "Let me check." },
|
||
]);
|
||
}
|
||
|
||
controlledFetch.pushLine(
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":10,"eval_count":5}',
|
||
);
|
||
controlledFetch.close();
|
||
|
||
const doneEvent = await nextEventWithin(iterator);
|
||
expect(doneEvent).not.toBe("timeout");
|
||
if (doneEvent !== "timeout" && doneEvent.done === false) {
|
||
expectIteratorEvent(doneEvent, { type: "done", done: false });
|
||
expect(requireRecord(doneEvent.value, "done value").reason).toBe("toolUse");
|
||
|
||
const streamEnd = await nextEventWithin(iterator);
|
||
expect(streamEnd).not.toBe("timeout");
|
||
expectIteratorEvent(streamEnd, { done: true });
|
||
} else {
|
||
expectIteratorEvent(doneEvent, { done: true });
|
||
}
|
||
} finally {
|
||
fetchWithSsrFGuardMock.mockReset();
|
||
}
|
||
});
|
||
|
||
it("emits error without text_end when stream fails mid-response", async () => {
|
||
// Simulate a stream that sends one content chunk then ends without done:true.
|
||
// The stream function throws "Ollama API stream ended without a final response".
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"partial"},"done":false}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const types = events.map((e) => e.type);
|
||
// Should have streaming events for the partial content, then error (no text_end).
|
||
expect(types).toEqual(["start", "text_start", "text_delta", "error"]);
|
||
const errorEvent = events.at(-1);
|
||
expect(errorEvent?.type).toBe("error");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("emits an error instead of accepting garbled Kimi visible text", async () => {
|
||
const garbled =
|
||
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
|
||
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "kimi-k2.5:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: garbled },
|
||
done: false,
|
||
}),
|
||
'{"model":"kimi-k2.5:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.5:cloud", provider: "ollama" },
|
||
});
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual(["error"]);
|
||
const errorEvent = events.at(-1);
|
||
expect(errorEvent?.type).toBe("error");
|
||
if (errorEvent?.type === "error") {
|
||
expect(errorEvent.error.errorMessage).toContain("garbled visible text");
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("buffers Kimi inline reasoning until the streaming boundary is safe", async () => {
|
||
const controlledFetch = createControlledNdjsonFetch();
|
||
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
|
||
|
||
try {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const iterator = stream[Symbol.asyncIterator]();
|
||
|
||
controlledFetch.pushLine(
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: {
|
||
role: "assistant",
|
||
content:
|
||
"The user is asking for a short answer. I should reason privately before answering. I need to avoid showing this planning text to the user.",
|
||
},
|
||
done: false,
|
||
}),
|
||
);
|
||
|
||
const pendingStartEvent = iterator.next();
|
||
expect(
|
||
await Promise.race([
|
||
pendingStartEvent.then(() => "event" as const),
|
||
new Promise<"timeout">((resolve) => setTimeout(() => resolve("timeout"), 100)),
|
||
]),
|
||
).toBe("timeout");
|
||
|
||
controlledFetch.pushLine(
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: " ️ OK." },
|
||
done: false,
|
||
}),
|
||
);
|
||
|
||
const startEvent = await pendingStartEvent;
|
||
expectIteratorEvent(startEvent, { type: "start", done: false });
|
||
|
||
const textStartEvent = await nextEventWithin(iterator);
|
||
expect(textStartEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
|
||
|
||
const textDeltaEvent = await nextEventWithin(iterator);
|
||
expect(textDeltaEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textDeltaEvent, {
|
||
type: "text_delta",
|
||
delta: "OK.",
|
||
done: false,
|
||
});
|
||
if (textDeltaEvent !== "timeout" && textDeltaEvent.done === false) {
|
||
const value = requireRecord(textDeltaEvent.value, "text_delta value");
|
||
expect(JSON.stringify(value)).not.toContain("The user is asking");
|
||
}
|
||
|
||
controlledFetch.pushLine(
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
);
|
||
controlledFetch.close();
|
||
|
||
const textEndEvent = await nextEventWithin(iterator);
|
||
expect(textEndEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textEndEvent, {
|
||
type: "text_end",
|
||
content: "OK.",
|
||
done: false,
|
||
});
|
||
|
||
const doneEvent = await nextEventWithin(iterator);
|
||
expect(doneEvent).not.toBe("timeout");
|
||
expectIteratorEvent(doneEvent, { type: "done", done: false });
|
||
if (doneEvent !== "timeout" && doneEvent.done === false) {
|
||
const value = requireRecord(doneEvent.value, "done value");
|
||
expect(JSON.stringify(value)).not.toContain("The user is asking");
|
||
}
|
||
|
||
const streamEnd = await nextEventWithin(iterator);
|
||
expect(streamEnd).not.toBe("timeout");
|
||
expectIteratorEvent(streamEnd, { done: true });
|
||
} finally {
|
||
fetchWithSsrFGuardMock.mockReset();
|
||
}
|
||
});
|
||
|
||
it("streams marker-less Kimi answers after the bounded sanitizer window", async () => {
|
||
const controlledFetch = createControlledNdjsonFetch();
|
||
fetchWithSsrFGuardMock.mockImplementation(controlledFetch.fetchImpl);
|
||
|
||
try {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const iterator = stream[Symbol.asyncIterator]();
|
||
const visibleAnswer = "This is a normal Kimi cloud answer without hidden reasoning. ".repeat(
|
||
12,
|
||
);
|
||
|
||
controlledFetch.pushLine(
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: visibleAnswer },
|
||
done: false,
|
||
}),
|
||
);
|
||
|
||
const startEvent = await nextEventWithin(iterator);
|
||
expect(startEvent).not.toBe("timeout");
|
||
expectIteratorEvent(startEvent, { type: "start", done: false });
|
||
|
||
const textStartEvent = await nextEventWithin(iterator);
|
||
expect(textStartEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textStartEvent, { type: "text_start", done: false });
|
||
|
||
const textDeltaEvent = await nextEventWithin(iterator);
|
||
expect(textDeltaEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textDeltaEvent, {
|
||
type: "text_delta",
|
||
delta: visibleAnswer,
|
||
done: false,
|
||
});
|
||
|
||
controlledFetch.pushLine(
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
);
|
||
controlledFetch.close();
|
||
|
||
const textEndEvent = await nextEventWithin(iterator);
|
||
expect(textEndEvent).not.toBe("timeout");
|
||
expectIteratorEvent(textEndEvent, {
|
||
type: "text_end",
|
||
content: visibleAnswer,
|
||
done: false,
|
||
});
|
||
|
||
const doneEvent = await nextEventWithin(iterator);
|
||
expect(doneEvent).not.toBe("timeout");
|
||
expectIteratorEvent(doneEvent, { type: "done", done: false });
|
||
|
||
const streamEnd = await nextEventWithin(iterator);
|
||
expect(streamEnd).not.toBe("timeout");
|
||
expectIteratorEvent(streamEnd, { done: true });
|
||
} finally {
|
||
fetchWithSsrFGuardMock.mockReset();
|
||
}
|
||
});
|
||
|
||
it("keeps Kimi deltas append-only after the bounded sanitizer window is bypassed", async () => {
|
||
const longPrefix = "This Kimi cloud output has streamed past the sanitizer window. ".repeat(10);
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: longPrefix },
|
||
done: false,
|
||
}),
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: " ️ OK." },
|
||
done: false,
|
||
}),
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const events = await collectStreamEvents(stream);
|
||
const deltas = events.filter((event) => event.type === "text_delta");
|
||
expect(deltas.map((event) => event.delta)).toEqual([longPrefix, " ️ OK."]);
|
||
|
||
const rawText = `${longPrefix} ️ OK.`;
|
||
const textEnd = events.find((event) => event.type === "text_end");
|
||
expect(textEnd?.content).toBe(rawText);
|
||
const doneEvent = events.find((event) => event.type === "done");
|
||
expect(doneEvent?.message.content).toEqual([{ type: "text", text: rawText }]);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not reject punctuation-heavy text from unrelated Ollama models", async () => {
|
||
const punctuationHeavy =
|
||
'$$"##"%#"##"####""$""""##""$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' +
|
||
'#"$"$"""$""""#$"""$"""%"%###"""#%""""&"#"""$"""#"#""""%#""""&"#"""$"""$"""#%"""';
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "qwen3:32b",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: punctuationHeavy },
|
||
done: false,
|
||
}),
|
||
'{"model":"qwen3:32b","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
expect(events.map((e) => e.type)).toEqual([
|
||
"start",
|
||
"text_start",
|
||
"text_delta",
|
||
"text_end",
|
||
"done",
|
||
]);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("emits a single text_delta for single-chunk responses", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"one shot"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual(["start", "text_start", "text_delta", "text_end", "done"]);
|
||
|
||
const textStart = events.find((e) => e.type === "text_start");
|
||
expect(textStart?.partial.content).toEqual([]);
|
||
const delta = events.find((e) => e.type === "text_delta");
|
||
expect(delta?.delta).toBe("one shot");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("sanitizes Kimi inline reasoning in text_delta, text_end, partial, and done output", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: {
|
||
role: "assistant",
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️Final answer",
|
||
},
|
||
done: false,
|
||
}),
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: " only." },
|
||
done: false,
|
||
}),
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":3,"eval_count":4}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const events = await collectStreamEvents(stream);
|
||
const types = events.map((e) => e.type);
|
||
expect(types).toEqual([
|
||
"start",
|
||
"text_start",
|
||
"text_delta",
|
||
"text_delta",
|
||
"text_end",
|
||
"done",
|
||
]);
|
||
|
||
const textStart = events.find((e) => e.type === "text_start");
|
||
expect(textStart?.partial.content).toEqual([]);
|
||
const deltas = events.filter((e) => e.type === "text_delta");
|
||
expect(deltas).toHaveLength(2);
|
||
expect(deltas[0]?.delta).toBe("Final answer");
|
||
expect(deltas[1]?.delta).toBe(" only.");
|
||
expect(deltas[0]?.partial.content).toEqual([{ type: "text", text: "Final answer" }]);
|
||
expect(deltas[1]?.partial.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
|
||
const textEnd = events.find((e) => e.type === "text_end");
|
||
expect(textEnd?.content).toBe("Final answer only.");
|
||
expect(textEnd?.partial.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
|
||
const doneEvent = events.at(-1);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.content).toEqual([{ type: "text", text: "Final answer only." }]);
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not re-sanitize visible Kimi stream output before done", async () => {
|
||
const visibleAnswer =
|
||
"This visible answer is intentionally long enough to look like a reasoning prefix if it is sanitized a second time. ️ keep this marker visible.";
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: {
|
||
role: "assistant",
|
||
content:
|
||
"I should think privately and not leak this planning text in the answer. I need to keep deciding what to say next. ️",
|
||
},
|
||
done: false,
|
||
}),
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: visibleAnswer },
|
||
done: false,
|
||
}),
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":3,"eval_count":4}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const events = await collectStreamEvents(stream);
|
||
const textEnd = events.find((event) => event.type === "text_end");
|
||
const doneEvent = events.at(-1);
|
||
|
||
expect(textEnd?.content).toBe(visibleAnswer);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.content).toEqual([{ type: "text", text: visibleAnswer }]);
|
||
}
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not leak Kimi inline reasoning when a boundary is followed by tool calls only", async () => {
|
||
const hiddenPrefix =
|
||
"I should think privately and not leak this planning text in the answer. " +
|
||
"I need to keep deciding what tool to call before showing any visible text.";
|
||
await withMockNdjsonFetch(
|
||
[
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: { role: "assistant", content: `${hiddenPrefix} ️ ` },
|
||
done: false,
|
||
}),
|
||
JSON.stringify({
|
||
model: "kimi-k2.6:cloud",
|
||
created_at: "t",
|
||
message: {
|
||
role: "assistant",
|
||
content: "",
|
||
tool_calls: [{ function: { name: "bash", arguments: { command: "ls" } } }],
|
||
},
|
||
done: false,
|
||
}),
|
||
'{"model":"kimi-k2.6:cloud","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":20,"eval_count":40}',
|
||
],
|
||
async () => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { id: "kimi-k2.6:cloud", provider: "ollama" },
|
||
});
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
expect(events.map((e) => e.type)).toEqual(["done"]);
|
||
const doneEvent = events.at(-1);
|
||
expect(doneEvent?.type).toBe("done");
|
||
if (doneEvent?.type === "done") {
|
||
expect(doneEvent.message.content).toEqual([
|
||
{
|
||
type: "toolCall",
|
||
id: expect.any(String),
|
||
name: "bash",
|
||
arguments: { command: "ls" },
|
||
},
|
||
]);
|
||
expect(JSON.stringify(doneEvent)).not.toContain("I should think privately");
|
||
}
|
||
},
|
||
);
|
||
});
|
||
});
|
||
|
||
describe("createOllamaStreamFn", () => {
|
||
it("normalizes /v1 baseUrl and maps maxTokens + signal", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const signal = new AbortController().signal;
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434/v1/",
|
||
options: { maxTokens: 123, signal },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
expect(fetchMock).toHaveBeenCalledTimes(1);
|
||
const request = getGuardedFetchCall(fetchMock);
|
||
expect(request.url).toBe("http://ollama-host:11434/api/chat");
|
||
expect(request.auditContext).toBe("ollama-stream.chat");
|
||
expect(request.signal).toBe(signal);
|
||
const requestInit = request.init ?? {};
|
||
expect(requestInit.signal).toBeUndefined();
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options?: { num_ctx?: number; num_predict?: number };
|
||
};
|
||
if (!requestBody.options) {
|
||
throw new Error("Expected Ollama request options");
|
||
}
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
expect(requestBody.options.num_predict).toBe(123);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("uses configured params.num_ctx for native Ollama chat options", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: {
|
||
params: {
|
||
num_ctx: 32768,
|
||
temperature: 0.2,
|
||
top_p: 0.9,
|
||
thinking: false,
|
||
streaming: false,
|
||
},
|
||
contextWindow: 131072,
|
||
},
|
||
options: { temperature: 0.7, maxTokens: 55 },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: boolean;
|
||
options: {
|
||
num_ctx?: number;
|
||
num_predict?: number;
|
||
temperature?: number;
|
||
top_p?: number;
|
||
streaming?: boolean;
|
||
};
|
||
};
|
||
expect(requestBody.options.num_ctx).toBe(32768);
|
||
expect(requestBody.options.num_predict).toBe(55);
|
||
expect(requestBody.options.temperature).toBe(0.7);
|
||
expect(requestBody.options.top_p).toBe(0.9);
|
||
expect(requestBody.options.streaming).toBeUndefined();
|
||
expect(requestBody.think).toBe(false);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("sets top_p=1 for native Ollama greedy sampling requests", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: {
|
||
params: {
|
||
num_ctx: 4096,
|
||
top_p: 0.9,
|
||
thinking: false,
|
||
},
|
||
},
|
||
options: { temperature: 0 },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options: {
|
||
temperature?: number;
|
||
top_p?: number;
|
||
};
|
||
};
|
||
expect(requestBody.options.temperature).toBe(0);
|
||
expect(requestBody.options.top_p).toBe(1);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("sets top_p=1 for native Ollama greedy requests without configured top_p", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: {
|
||
params: {
|
||
num_ctx: 4096,
|
||
thinking: false,
|
||
},
|
||
},
|
||
options: { temperature: 0 },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options: {
|
||
temperature?: number;
|
||
top_p?: number;
|
||
};
|
||
};
|
||
expect(requestBody.options.temperature).toBe(0);
|
||
expect(requestBody.options.top_p).toBe(1);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("preserves configured top_p for native Ollama non-greedy sampling requests", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: {
|
||
params: {
|
||
top_p: 0.9,
|
||
},
|
||
},
|
||
options: { temperature: 0.2 },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options: {
|
||
temperature?: number;
|
||
top_p?: number;
|
||
};
|
||
};
|
||
expect(requestBody.options.temperature).toBe(0.2);
|
||
expect(requestBody.options.top_p).toBe(0.9);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("omits num_ctx when the model has no params.num_ctx and no catalog window", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
// Override the helper default contextWindow back to undefined so the
|
||
// request body should leave Ollama's Modelfile to decide num_ctx.
|
||
model: { contextWindow: undefined },
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options?: { num_ctx?: number };
|
||
};
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not fall back to catalog contextWindow as native Ollama num_ctx", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { contextWindow: 32768 },
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options?: { num_ctx?: number };
|
||
};
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("does not fall back to catalog maxTokens as native Ollama num_ctx", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
// The helper default contextWindow is overridden back to undefined so
|
||
// the right side of `model.contextWindow ?? model.maxTokens` is the
|
||
// load-bearing branch.
|
||
model: { contextWindow: undefined, maxTokens: 65536 },
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
options?: { num_ctx?: number };
|
||
};
|
||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("maps configured native Ollama params.thinking=max to the stable top-level think value", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
model: { params: { thinking: "max" } },
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
if (typeof requestInit.body !== "string") {
|
||
throw new Error("Expected string request body");
|
||
}
|
||
const requestBody = JSON.parse(requestInit.body) as {
|
||
think?: string;
|
||
options?: { think?: string };
|
||
};
|
||
expect(requestBody.think).toBe("high");
|
||
expect(requestBody.options?.think).toBeUndefined();
|
||
},
|
||
);
|
||
});
|
||
|
||
it("uses the default loopback policy when baseUrl is empty", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({ baseUrl: "" });
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const request = getGuardedFetchCall(fetchMock);
|
||
expect(request.url).toBe("http://127.0.0.1:11434/api/chat");
|
||
const policy = requireRecord(request.policy, "ssrf policy");
|
||
expect(policy.hostnameAllowlist).toEqual(["127.0.0.1"]);
|
||
expect(policy.allowPrivateNetwork).toBe(true);
|
||
},
|
||
);
|
||
});
|
||
|
||
it("merges default headers and allows request headers to override them", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
defaultHeaders: {
|
||
"X-OLLAMA-KEY": "provider-secret",
|
||
"X-Trace": "default",
|
||
},
|
||
options: {
|
||
headers: {
|
||
"X-Trace": "request",
|
||
"X-Request-Only": "1",
|
||
},
|
||
},
|
||
});
|
||
|
||
const events = await collectStreamEvents(stream);
|
||
expect(events.at(-1)?.type).toBe("done");
|
||
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
const headers = requireHeaders(requestInit.headers);
|
||
expect(headers["Content-Type"]).toBe("application/json");
|
||
expect(headers["X-OLLAMA-KEY"]).toBe("provider-secret");
|
||
expect(headers["X-Trace"]).toBe("request");
|
||
expect(headers["X-Request-Only"]).toBe("1");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("preserves an explicit Authorization header when apiKey is a local marker", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const stream = await createOllamaTestStream({
|
||
baseUrl: "http://ollama-host:11434",
|
||
defaultHeaders: {
|
||
Authorization: "Bearer proxy-token",
|
||
},
|
||
options: {
|
||
apiKey: "ollama-local", // pragma: allowlist secret
|
||
headers: {
|
||
Authorization: "Bearer proxy-token",
|
||
},
|
||
},
|
||
});
|
||
|
||
await collectStreamEvents(stream);
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer proxy-token");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("allows a real apiKey to override an explicit Authorization header", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const streamFn = createOllamaStreamFn("http://ollama-host:11434", {
|
||
Authorization: "Bearer proxy-token",
|
||
});
|
||
const stream = await Promise.resolve(
|
||
streamFn(
|
||
{
|
||
id: "qwen3:32b",
|
||
api: "ollama",
|
||
provider: "custom-ollama",
|
||
contextWindow: 131072,
|
||
} as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{
|
||
apiKey: "real-token", // pragma: allowlist secret
|
||
} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer real-token");
|
||
},
|
||
);
|
||
});
|
||
|
||
it("surfaces non-2xx HTTP response as status-prefixed error", async () => {
|
||
fetchWithSsrFGuardMock.mockResolvedValue({
|
||
response: new Response("Service Unavailable", {
|
||
status: 503,
|
||
statusText: "Service Unavailable",
|
||
}),
|
||
release: vi.fn(async () => undefined),
|
||
});
|
||
try {
|
||
const stream = await createOllamaTestStream({ baseUrl: "http://ollama-host:11434" });
|
||
const events = await collectStreamEvents(stream);
|
||
|
||
const errorEvent = events.find((e) => e.type === "error") as
|
||
| { type: "error"; error: { errorMessage?: string } }
|
||
| undefined;
|
||
if (!errorEvent) {
|
||
throw new Error("expected Ollama stream error event");
|
||
}
|
||
// The error message must start with the HTTP status code so that
|
||
// extractLeadingHttpStatus can parse it for failover/retry logic.
|
||
expect(errorEvent.error.errorMessage).toMatch(/^503\b/);
|
||
} finally {
|
||
fetchWithSsrFGuardMock.mockReset();
|
||
}
|
||
});
|
||
|
||
it("keeps thinking chunks when no final content is emitted", async () => {
|
||
await expectDoneEventContent(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"reasoned"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":" output"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||
],
|
||
[{ type: "thinking", thinking: "reasoned output" }],
|
||
);
|
||
});
|
||
|
||
it("keeps streamed content after earlier thinking chunks", async () => {
|
||
await expectDoneEventContent(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"internal"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"final"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" answer"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||
],
|
||
[
|
||
{ type: "thinking", thinking: "internal" },
|
||
{ type: "text", text: "final answer" },
|
||
],
|
||
);
|
||
});
|
||
|
||
it("keeps reasoning chunks when no final content is emitted", async () => {
|
||
await expectDoneEventContent(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":" output"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||
],
|
||
[{ type: "thinking", thinking: "reasoned output" }],
|
||
);
|
||
});
|
||
|
||
it("keeps streamed content after earlier reasoning chunks", async () => {
|
||
await expectDoneEventContent(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"internal"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"final"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":" answer"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":2}',
|
||
],
|
||
[
|
||
{ type: "thinking", thinking: "internal" },
|
||
{ type: "text", text: "final answer" },
|
||
],
|
||
);
|
||
});
|
||
});
|
||
|
||
describe("resolveOllamaBaseUrlForRun", () => {
|
||
it("prefers provider baseUrl over model baseUrl", () => {
|
||
expect(
|
||
resolveOllamaBaseUrlForRun({
|
||
modelBaseUrl: "http://model-host:11434",
|
||
providerBaseUrl: "http://provider-host:11434",
|
||
}),
|
||
).toBe("http://provider-host:11434");
|
||
});
|
||
|
||
it("falls back to model baseUrl when provider baseUrl is missing", () => {
|
||
expect(
|
||
resolveOllamaBaseUrlForRun({
|
||
modelBaseUrl: "http://model-host:11434",
|
||
}),
|
||
).toBe("http://model-host:11434");
|
||
});
|
||
|
||
it("falls back to native default when neither baseUrl is configured", () => {
|
||
expect(resolveOllamaBaseUrlForRun({})).toBe("http://127.0.0.1:11434");
|
||
});
|
||
});
|
||
|
||
describe("createConfiguredOllamaStreamFn", () => {
|
||
it("uses provider-level baseUrl when model baseUrl is absent", async () => {
|
||
await withMockNdjsonFetch(
|
||
[
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||
],
|
||
async (fetchMock) => {
|
||
const streamFn = createConfiguredOllamaStreamFn({
|
||
model: {
|
||
headers: { Authorization: "Bearer proxy-token" },
|
||
},
|
||
providerBaseUrl: "http://provider-host:11434/v1",
|
||
});
|
||
const stream = await Promise.resolve(
|
||
streamFn(
|
||
{
|
||
id: "qwen3:32b",
|
||
api: "ollama",
|
||
provider: "custom-ollama",
|
||
contextWindow: 131072,
|
||
} as never,
|
||
{
|
||
messages: [{ role: "user", content: "hello" }],
|
||
} as never,
|
||
{
|
||
apiKey: "ollama-local", // pragma: allowlist secret
|
||
} as never,
|
||
),
|
||
);
|
||
|
||
await collectStreamEvents(stream);
|
||
const request = getGuardedFetchCall(fetchMock);
|
||
expect(request.url).toBe("http://provider-host:11434/api/chat");
|
||
const requestInit = request.init ?? {};
|
||
expect(requireHeaders(requestInit.headers).Authorization).toBe("Bearer proxy-token");
|
||
},
|
||
);
|
||
});
|
||
});
|