fix(ollama): forward native model params

This commit is contained in:
Peter Steinberger
2026-04-27 03:08:03 +01:00
parent f4cf7e3b4f
commit aa071e0b60
5 changed files with 114 additions and 6 deletions

View File

@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
- Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
- Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
- Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
- Providers/Ollama: forward whitelisted native Ollama model params such as `temperature`, `top_p`, and top-level `think` so users can disable API-level thinking or tune local models from config without proxy shims. Fixes #48010. Thanks @tangzhi, @pandego, @maweibin, @Adam-Researchh, and @EmpireCreator.
- Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
- Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
- Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.

View File

@@ -403,6 +403,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
```json5
{
models: {
@@ -415,6 +417,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
maxTokens: 65536,
params: {
num_ctx: 32768,
temperature: 0.7,
top_p: 0.9,
thinking: false,
},
}
]

View File

@@ -26,7 +26,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
let payload:
| {
model?: string;
options?: { num_ctx?: number };
think?: boolean;
options?: { num_ctx?: number; top_p?: number };
tools?: Array<{
function?: {
parameters?: {
@@ -43,7 +44,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
api: "ollama",
provider: PROVIDER_ID,
contextWindow: 8192,
params: { num_ctx: 4096 },
params: { num_ctx: 4096, top_p: 0.9, thinking: false },
} as never,
{
messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -82,6 +83,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
expect(payload?.model).toBe(CHAT_MODEL);
expect(payload?.options?.num_ctx).toBe(4096);
expect(payload?.options?.top_p).toBe(0.9);
expect(payload?.think).toBe(false);
const properties = payload?.tools?.[0]?.function?.parameters?.properties;
expect(properties?.city?.type).toBe("string");
expect(properties?.units?.type).toBe("string");

View File

@@ -919,6 +919,7 @@ async function createOllamaTestStream(params: {
options?: {
apiKey?: string;
maxTokens?: number;
temperature?: number;
signal?: AbortSignal;
headers?: Record<string, string>;
};
@@ -1205,7 +1206,17 @@ describe("createOllamaStreamFn", () => {
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
model: {
params: {
num_ctx: 32768,
temperature: 0.2,
top_p: 0.9,
thinking: false,
streaming: false,
},
contextWindow: 131072,
},
options: { temperature: 0.7, maxTokens: 55 },
});
const events = await collectStreamEvents(stream);
@@ -1216,9 +1227,21 @@ describe("createOllamaStreamFn", () => {
throw new Error("Expected string request body");
}
const requestBody = JSON.parse(requestInit.body) as {
options: { num_ctx?: number };
think?: boolean;
options: {
num_ctx?: number;
num_predict?: number;
temperature?: number;
top_p?: number;
streaming?: boolean;
};
};
expect(requestBody.options.num_ctx).toBe(32768);
expect(requestBody.options.num_predict).toBe(55);
expect(requestBody.options.temperature).toBe(0.7);
expect(requestBody.options.top_p).toBe(0.9);
expect(requestBody.options.streaming).toBeUndefined();
expect(requestBody.think).toBe(false);
},
);
});

View File

@@ -152,7 +152,31 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
});
}
type OllamaThinkValue = boolean | "low" | "medium" | "high";
type OllamaThinkValue = boolean | "low" | "medium" | "high" | "max";
const OLLAMA_OPTION_PARAM_KEYS = new Set([
"num_keep",
"seed",
"num_predict",
"top_k",
"top_p",
"min_p",
"typical_p",
"repeat_last_n",
"temperature",
"repeat_penalty",
"presence_penalty",
"frequency_penalty",
"stop",
"num_ctx",
"num_batch",
"num_gpu",
"main_gpu",
"use_mmap",
"num_thread",
]);
const OLLAMA_TOP_LEVEL_PARAM_KEYS = new Set(["format", "keep_alive", "truncate", "shift"]);
function createOllamaThinkingWrapper(
baseFn: StreamFn | undefined,
@@ -181,6 +205,22 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
return undefined;
}
function resolveOllamaThinkParamValue(
params: Record<string, unknown> | undefined,
): OllamaThinkValue | undefined {
const raw = params?.think ?? params?.thinking;
if (typeof raw === "boolean") {
return raw;
}
if (raw === "off") {
return false;
}
if (raw === "low" || raw === "medium" || raw === "high" || raw === "max") {
return raw;
}
return undefined;
}
function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
const raw = model.params?.num_ctx;
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
@@ -196,6 +236,39 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
);
}
function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
const options: Record<string, unknown> = {};
const params = model.params;
if (params && typeof params === "object" && !Array.isArray(params)) {
for (const [key, value] of Object.entries(params)) {
if (value !== undefined && OLLAMA_OPTION_PARAM_KEYS.has(key)) {
options[key] = value;
}
}
}
options.num_ctx = resolveOllamaNumCtx(model);
return options;
}
function resolveOllamaTopLevelParams(
model: ProviderRuntimeModel,
): Record<string, unknown> | undefined {
const requestParams: Record<string, unknown> = {};
const params = model.params;
if (params && typeof params === "object" && !Array.isArray(params)) {
for (const [key, value] of Object.entries(params)) {
if (value !== undefined && OLLAMA_TOP_LEVEL_PARAM_KEYS.has(key)) {
requestParams[key] = value;
}
}
}
const think = resolveOllamaThinkParamValue(params);
if (think !== undefined) {
requestParams.think = think;
}
return Object.keys(requestParams).length > 0 ? requestParams : undefined;
}
function isOllamaCloudKimiModelRef(modelId: string): boolean {
const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId);
return normalizedModelId.startsWith("kimi-k") && normalizedModelId.includes(":cloud");
@@ -257,6 +330,7 @@ export function buildOllamaChatRequest(params: {
messages: OllamaChatMessage[];
tools?: OllamaTool[];
options?: Record<string, unknown>;
requestParams?: Record<string, unknown>;
stream?: boolean;
}): OllamaChatRequest {
return {
@@ -265,6 +339,7 @@ export function buildOllamaChatRequest(params: {
stream: params.stream ?? true,
...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}),
...(params.options ? { options: params.options } : {}),
...params.requestParams,
};
}
@@ -754,7 +829,7 @@ export function createOllamaStreamFn(
);
const ollamaTools = extractOllamaTools(context.tools);
const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
const ollamaOptions: Record<string, unknown> = resolveOllamaModelOptions(model);
if (typeof options?.temperature === "number") {
ollamaOptions.temperature = options.temperature;
}
@@ -769,6 +844,7 @@ export function createOllamaStreamFn(
stream: true,
tools: ollamaTools,
options: ollamaOptions,
requestParams: resolveOllamaTopLevelParams(model),
});
options?.onPayload?.(body, model);
const headers: Record<string, string> = {