mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:20:44 +00:00
fix(ollama): expose native thinking efforts
This commit is contained in:
@@ -69,7 +69,9 @@ function registerProviderWithPluginConfig(pluginConfig: Record<string, unknown>)
|
||||
return registerProviderMock.mock.calls[0]?.[0];
|
||||
}
|
||||
|
||||
function captureWrappedOllamaPayload(thinkingLevel: "off" | "low" | undefined) {
|
||||
function captureWrappedOllamaPayload(
|
||||
thinkingLevel: "off" | "minimal" | "low" | "medium" | "high" | "max" | undefined,
|
||||
) {
|
||||
const provider = registerProvider();
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseStreamFn = vi.fn((_model, _context, options) => {
|
||||
@@ -528,7 +530,7 @@ describe("ollama plugin", () => {
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps native Ollama thinking off by default while exposing an opt-in toggle", () => {
|
||||
it("keeps native Ollama thinking off by default while exposing opt-in effort levels", () => {
|
||||
const provider = registerProvider();
|
||||
|
||||
expect(
|
||||
@@ -549,15 +551,22 @@ describe("ollama plugin", () => {
|
||||
reasoning: true,
|
||||
}),
|
||||
).toEqual({
|
||||
levels: [{ id: "off" }, { id: "low", label: "on" }],
|
||||
levels: [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }],
|
||||
defaultLevel: "off",
|
||||
});
|
||||
});
|
||||
|
||||
it("wraps native Ollama payloads with top-level think=true when thinking is enabled", () => {
|
||||
it("wraps native Ollama payloads with top-level think effort when thinking is enabled", () => {
|
||||
const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("low");
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBe(true);
|
||||
expect(payloadSeen?.think).toBe("low");
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
it("maps native Ollama max thinking to the highest supported wire effort", () => {
|
||||
const { baseStreamFn, payloadSeen } = captureWrappedOllamaPayload("max");
|
||||
expect(baseStreamFn).toHaveBeenCalledTimes(1);
|
||||
expect(payloadSeen?.think).toBe("high");
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.think).toBeUndefined();
|
||||
});
|
||||
|
||||
|
||||
@@ -167,7 +167,10 @@ export default definePluginEntry({
|
||||
usesOllamaOpenAICompatTransport(model) ? { supportsUsageInStreaming: true } : undefined,
|
||||
resolveReasoningOutputMode: () => "native",
|
||||
resolveThinkingProfile: ({ reasoning }) => ({
|
||||
levels: reasoning === true ? [{ id: "off" }, { id: "low", label: "on" }] : [{ id: "off" }],
|
||||
levels:
|
||||
reasoning === true
|
||||
? [{ id: "off" }, { id: "low" }, { id: "medium" }, { id: "high" }, { id: "max" }]
|
||||
: [{ id: "off" }],
|
||||
defaultLevel: "off",
|
||||
}),
|
||||
wrapStreamFn: createConfiguredOllamaCompatStreamWrapper,
|
||||
|
||||
@@ -150,7 +150,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("forwards think=true on native Ollama chat requests when thinking is enabled", async () => {
|
||||
it("forwards the native think effort on native Ollama chat requests when thinking is enabled", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
@@ -193,10 +193,63 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: boolean;
|
||||
options?: { think?: boolean; num_ctx?: number };
|
||||
think?: boolean | string;
|
||||
options?: { think?: boolean | string; num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.think).toBe(true);
|
||||
expect(requestBody.think).toBe("low");
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("maps native Ollama max thinking to think=high on the wire", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const baseStreamFn = createOllamaStreamFn("http://ollama-host:11434");
|
||||
const model = {
|
||||
api: "ollama",
|
||||
provider: "ollama",
|
||||
id: "gpt-oss:20b",
|
||||
contextWindow: 131072,
|
||||
};
|
||||
|
||||
const wrapped = createConfiguredOllamaCompatStreamWrapper({
|
||||
provider: "ollama",
|
||||
modelId: "gpt-oss:20b",
|
||||
model,
|
||||
streamFn: baseStreamFn,
|
||||
thinkingLevel: "max",
|
||||
} as never);
|
||||
if (!wrapped) {
|
||||
throw new Error("Expected wrapped Ollama stream function");
|
||||
}
|
||||
|
||||
const stream = await Promise.resolve(
|
||||
wrapped(
|
||||
model as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
} as never,
|
||||
{} as never,
|
||||
),
|
||||
);
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
think?: boolean | string;
|
||||
options?: { think?: boolean | string; num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.think).toBe("high");
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
|
||||
@@ -151,7 +151,12 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
});
|
||||
}
|
||||
|
||||
function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolean): StreamFn {
|
||||
type OllamaThinkValue = boolean | "low" | "medium" | "high";
|
||||
|
||||
function createOllamaThinkingWrapper(
|
||||
baseFn: StreamFn | undefined,
|
||||
think: OllamaThinkValue,
|
||||
): StreamFn {
|
||||
const streamFn = baseFn ?? streamSimple;
|
||||
return (model, context, options) =>
|
||||
streamWithPayloadPatch(streamFn, model, context, options, (payloadRecord) => {
|
||||
@@ -159,6 +164,22 @@ function createOllamaThinkingWrapper(baseFn: StreamFn | undefined, think: boolea
|
||||
});
|
||||
}
|
||||
|
||||
function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | undefined {
|
||||
if (thinkingLevel === "off") {
|
||||
return false;
|
||||
}
|
||||
if (thinkingLevel === "low" || thinkingLevel === "medium" || thinkingLevel === "high") {
|
||||
return thinkingLevel;
|
||||
}
|
||||
if (thinkingLevel === "minimal") {
|
||||
return "low";
|
||||
}
|
||||
if (thinkingLevel === "xhigh" || thinkingLevel === "adaptive" || thinkingLevel === "max") {
|
||||
return "high";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOllamaCompatNumCtx(model: ProviderRuntimeModel): number {
|
||||
return Math.max(1, Math.floor(model.contextWindow ?? model.maxTokens ?? DEFAULT_CONTEXT_TOKENS));
|
||||
}
|
||||
@@ -196,12 +217,11 @@ export function createConfiguredOllamaCompatStreamWrapper(
|
||||
streamFn = wrapOllamaCompatNumCtx(streamFn, resolveOllamaCompatNumCtx(model));
|
||||
}
|
||||
|
||||
if (isNativeOllamaTransport && ctx.thinkingLevel === "off") {
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, false);
|
||||
} else if (isNativeOllamaTransport && ctx.thinkingLevel) {
|
||||
// Any non-off ThinkLevel (minimal, low, medium, high, xhigh, adaptive, max)
|
||||
// should enable Ollama's native thinking mode.
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, true);
|
||||
const ollamaThinkValue = isNativeOllamaTransport
|
||||
? resolveOllamaThinkValue(ctx.thinkingLevel)
|
||||
: undefined;
|
||||
if (ollamaThinkValue !== undefined) {
|
||||
streamFn = createOllamaThinkingWrapper(streamFn, ollamaThinkValue);
|
||||
}
|
||||
|
||||
if (normalizeProviderId(ctx.provider) === "ollama" && isOllamaCloudKimiModelRef(ctx.modelId)) {
|
||||
@@ -310,7 +330,7 @@ interface OllamaChatRequest {
|
||||
stream: boolean;
|
||||
tools?: OllamaTool[];
|
||||
options?: Record<string, unknown>;
|
||||
think?: boolean;
|
||||
think?: OllamaThinkValue;
|
||||
}
|
||||
|
||||
interface OllamaChatMessage {
|
||||
|
||||
Reference in New Issue
Block a user