fix(ollama): expose native thinking efforts

2026-05-06 18:20:44 +00:00 · 2026-04-26 22:49:06 +01:00
parent 2cd23957c0
commit ff570f3a61
7 changed files with 107 additions and 20 deletions
--- a/extensions/ollama/index.test.ts
+++ b/extensions/ollama/index.test.ts
@@ -69,7 +69,9 @@ function registerProviderWithPluginConfig(pluginConfig: Record<string, unknown>)
  return registerProviderMock.mock.calls[0]?.[0];
 }

-function captureWrappedOllamaPayload(thinkingLevel: "off" | "low" | undefined) {
+function captureWrappedOllamaPayload(
+  thinkingLevel: "off" | "minimal" | "low" | "medium" | "high" | "max" | undefined,
+) {
  const provider = registerProvider();
  let payloadSeen: Record<string, unknown> | undefined;
  const baseStreamFn = vi.fn((_model, _context, options) => {
@@ -528,7 +530,7 @@ describe("ollama plugin", () => {
    expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
  });

-  it("keeps native Ollama thinking off by default while exposing an opt-in toggle", () => {
+  it("keeps native Ollama thinking off by default while exposing opt-in effort levels", () => {
    const provider = registerProvider();

    expect(
@@ -549,15 +551,22 @@ describe("ollama plugin", () => {
        reasoning: true,
      }),
    ).toEqual({
-      levels: [{ id: "off" }, { id: "low", label: "on" }],
+      levels: [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }],
      defaultLevel: "off",
    });
  });

-  it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
+  it("wraps native Ollama payloads with top-level think effort when thinking is enabled", () => {
    const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("low");
    expect(baseStreamFn).toHaveBeenCalledTimes(1);
-    expect(payloadSeen?.think).toBe(true);
+    expect(payloadSeen?.think).toBe("low");
+    expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
+  });
+
+  it("maps native Ollama max thinking to the highest supported wire effort", () => {
+    const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("max");
+    expect(baseStreamFn).toHaveBeenCalledTimes(1);
+    expect(payloadSeen?.think).toBe("high");
    expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
  });

--- a/extensions/ollama/index.ts
+++ b/extensions/ollama/index.ts
@@ -167,7 +167,10 @@ export default definePluginEntry({
        usesOllamaOpenAICompatTransport(model) ? { supportsUsageInStreaming: true } : undefined,
      resolveReasoningOutputMode: () => "native",
      resolveThinkingProfile: ({ reasoning }) => ({
-        levels: reasoning === true ? [{ id: "off" }, { id: "low", label: "on" }] : [{ id: "off" }],
+        levels:
+          reasoning === true
+            ? [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }]
+            : [{ id: "off" }],
        defaultLevel: "off",
      }),
      wrapStreamFn: createConfiguredOllamaCompatStreamWrapper,
--- a/extensions/ollama/src/stream-runtime.test.ts
+++ b/extensions/ollama/src/stream-runtime.test.ts
@@ -150,7 +150,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
    );
  });

-  it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
+  it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
    await withMockNdjsonFetch(
      [
        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
@@ -193,10 +193,63 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
          throw new Error("Expected string request body");
        }
        const requestBody = JSON.parse(requestInit.body) as {
-          think?: boolean;
-          options?: { think?: boolean; num_ctx?: number };
+          think?: boolean | string;
+          options?: { think?: boolean | string; num_ctx?: number };
        };
-        expect(requestBody.think).toBe(true);
+        expect(requestBody.think).toBe("low");
+        expect(requestBody.options?.think).toBeUndefined();
+        expect(requestBody.options?.num_ctx).toBe(131072);
+      },
+    );
+  });
+
+  it("maps native Ollama max thinking to think=high on the wire", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
+        const model = {
+          api: "ollama",
+          provider: "ollama",
+          id: "gpt-oss:20b",
+          contextWindow: 131072,
+        };
+
+        const wrapped = createConfiguredOllamaCompatStreamWrapper({
+          provider: "ollama",
+          modelId: "gpt-oss:20b",
+          model,
+          streamFn: baseStreamFn,
+          thinkingLevel: "max",
+        } as never);
+        if (!wrapped) {
+          throw new Error("Expected wrapped Ollama stream function");
+        }
+
+        const stream = await Promise.resolve(
+          wrapped(
+            model as never,
+            {
+              messages: [{ role: "user", content: "hello" }],
+            } as never,
+            {} as never,
+          ),
+        );
+
+        await collectStreamEvents(stream);
+
+        const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
+        if (typeof requestInit.body !== "string") {
+          throw new Error("Expected string request body");
+        }
+        const requestBody = JSON.parse(requestInit.body) as {
+          think?: boolean | string;
+          options?: { think?: boolean | string; num_ctx?: number };
+        };
+        expect(requestBody.think).toBe("high");
        expect(requestBody.options?.think).toBeUndefined();
        expect(requestBody.options?.num_ctx).toBe(131072);
      },
--- a/extensions/ollama/src/stream.ts
+++ b/extensions/ollama/src/stream.ts
@@ -151,7 +151,12 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
    });
 }

-function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
+type OllamaThinkValue = boolean | "low" | "medium" | "high";
+
+function createOllamaThinkingWrapper(
+  baseFn: StreamFn | undefined,
+  think: OllamaThinkValue,
+): StreamFn {
  const streamFn = baseFn ?? streamSimple;
  return (model, context, options) =>
    streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
@@ -159,6 +164,22 @@ function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolea
    });
 }

+function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | undefined {
+  if (thinkingLevel === "off") {
+    return false;
+  }
+  if (thinkingLevel === "low" || thinkingLevel === "medium" || thinkingLevel === "high") {
+    return thinkingLevel;
+  }
+  if (thinkingLevel === "minimal") {
+    return "low";
+  }
+  if (thinkingLevel === "xhigh" || thinkingLevel === "adaptive" || thinkingLevel === "max") {
+    return "high";
+  }
+  return undefined;
+}
+
 function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
  return Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS));
 }
@@ -196,12 +217,11 @@ export function createConfiguredOllamaCompatStreamWrapper(
    streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
  }

-  if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
-    streamFn = createOllamaThinkingWrapper(streamFn, false);
-  } else if (isNativeOllamaTransport && ctx.thinkingLevel) {
-    // Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
-    // should enable Ollama's native thinking mode.
-    streamFn = createOllamaThinkingWrapper(streamFn, true);
+  const ollamaThinkValue = isNativeOllamaTransport
+    ? resolveOllamaThinkValue(ctx.thinkingLevel)
+    : undefined;
+  if (ollamaThinkValue !== undefined) {
+    streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
  }

  if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
@@ -310,7 +330,7 @@ interface OllamaChatRequest {
  stream: boolean;
  tools?: OllamaTool[];
  options?: Record<string, unknown>;
-  think?: boolean;
+  think?: OllamaThinkValue;
 }

 interface OllamaChatMessage {