fix(ollama): expose native thinking efforts

This commit is contained in:
Peter Steinberger
2026-04-26 22:49:06 +01:00
parent 2cd23957c0
commit ff570f3a61
7 changed files with 107 additions and 20 deletions

View File

@@ -69,7 +69,9 @@ function registerProviderWithPluginConfig(pluginConfig: Record<string, unknown>)
return registerProviderMock.mock.calls[0]?.[0];
}
function captureWrappedOllamaPayload(thinkingLevel: "off" | "low" | undefined) {
function captureWrappedOllamaPayload(
thinkingLevel: "off" | "minimal" | "low" | "medium" | "high" | "max" | undefined,
) {
const provider = registerProvider();
let payloadSeen: Record<string, unknown> | undefined;
const baseStreamFn = vi.fn((_model, _context, options) => {
@@ -528,7 +530,7 @@ describe("ollama plugin", () => {
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("keeps native Ollama thinking off by default while exposing an opt-in toggle", () => {
it("keeps native Ollama thinking off by default while exposing opt-in effort levels", () => {
const provider = registerProvider();
expect(
@@ -549,15 +551,22 @@ describe("ollama plugin", () => {
reasoning: true,
}),
).toEqual({
levels: [{ id: "off" }, { id: "low", label: "on" }],
levels: [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }],
defaultLevel: "off",
});
});
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
it("wraps native Ollama payloads with top-level think effort when thinking is enabled", () => {
const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("low");
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBe(true);
expect(payloadSeen?.think).toBe("low");
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});
it("maps native Ollama max thinking to the highest supported wire effort", () => {
const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("max");
expect(baseStreamFn).toHaveBeenCalledTimes(1);
expect(payloadSeen?.think).toBe("high");
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
});

View File

@@ -167,7 +167,10 @@ export default definePluginEntry({
usesOllamaOpenAICompatTransport(model) ? { supportsUsageInStreaming: true } : undefined,
resolveReasoningOutputMode: () => "native",
resolveThinkingProfile: ({ reasoning }) => ({
levels: reasoning === true ? [{ id: "off" }, { id: "low", label: "on" }] : [{ id: "off" }],
levels:
reasoning === true
? [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }]
: [{ id: "off" }],
defaultLevel: "off",
}),
wrapStreamFn: createConfiguredOllamaCompatStreamWrapper,

View File

@@ -150,7 +150,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
);
});
it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
@@ -193,10 +193,63 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean;
options?: { think?: boolean; num_ctx?: number };
think?: boolean | string;
options?: { think?: boolean | string; num_ctx?: number };
};
expect(requestBody.think).toBe(true);
expect(requestBody.think).toBe("low");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},
);
});
it("maps native Ollama max thinking to think=high on the wire", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
const model = {
api: "ollama",
provider: "ollama",
id: "gpt-oss:20b",
contextWindow: 131072,
};
const wrapped = createConfiguredOllamaCompatStreamWrapper({
provider: "ollama",
modelId: "gpt-oss:20b",
model,
streamFn: baseStreamFn,
thinkingLevel: "max",
} as never);
if (!wrapped) {
throw new Error("Expected wrapped Ollama stream function");
}
const stream = await Promise.resolve(
wrapped(
model as never,
{
messages: [{ role: "user", content: "hello" }],
} as never,
{} as never,
),
);
await collectStreamEvents(stream);
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
if (typeof requestInit.body !== "string") {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
think?: boolean | string;
options?: { think?: boolean | string; num_ctx?: number };
};
expect(requestBody.think).toBe("high");
expect(requestBody.options?.think).toBeUndefined();
expect(requestBody.options?.num_ctx).toBe(131072);
},

View File

@@ -151,7 +151,12 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
});
}
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
type OllamaThinkValue = boolean | "low" | "medium" | "high";
function createOllamaThinkingWrapper(
baseFn: StreamFn | undefined,
think: OllamaThinkValue,
): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) =>
streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
@@ -159,6 +164,22 @@ function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolea
});
}
function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | undefined {
if (thinkingLevel === "off") {
return false;
}
if (thinkingLevel === "low" || thinkingLevel === "medium" || thinkingLevel === "high") {
return thinkingLevel;
}
if (thinkingLevel === "minimal") {
return "low";
}
if (thinkingLevel === "xhigh" || thinkingLevel === "adaptive" || thinkingLevel === "max") {
return "high";
}
return undefined;
}
function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
return Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS));
}
@@ -196,12 +217,11 @@ export function createConfiguredOllamaCompatStreamWrapper(
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
}
if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
streamFn = createOllamaThinkingWrapper(streamFn, false);
} else if (isNativeOllamaTransport && ctx.thinkingLevel) {
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
// should enable Ollama's native thinking mode.
streamFn = createOllamaThinkingWrapper(streamFn, true);
const ollamaThinkValue = isNativeOllamaTransport
? resolveOllamaThinkValue(ctx.thinkingLevel)
: undefined;
if (ollamaThinkValue !== undefined) {
streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
}
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
@@ -310,7 +330,7 @@ interface OllamaChatRequest {
stream: boolean;
tools?: OllamaTool[];
options?: Record<string, unknown>;
think?: boolean;
think?: OllamaThinkValue;
}
interface OllamaChatMessage {