mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:10:45 +00:00
fix(ollama): forward native model params
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/Ollama: honor `/api/show` capabilities when registering local models so non-tool Ollama models no longer receive the agent tool surface, and keep native Ollama thinking opt-in instead of enabling it by default. Fixes #64710 and duplicate #65343. Thanks @yuan-b, @netherby, @xilopaint, and @Diyforfun2026.
|
||||
- Providers/Ollama: read larger custom Modelfile `PARAMETER num_ctx` values from `/api/show` so auto-discovered Ollama models with expanded context no longer stay pinned to the base model context. Fixes #68344. Thanks @neeravmakwana.
|
||||
- Providers/Ollama: honor configured model `params.num_ctx` in native and OpenAI-compatible Ollama requests so local models can cap runtime context without rebuilding Modelfiles. Fixes #44550 and #52206; supersedes #69464. Thanks @taitruong, @armi0024, and @LokiCode404.
|
||||
- Providers/Ollama: forward whitelisted native Ollama model params such as `temperature`, `top_p`, and top-level `think` so users can disable API-level thinking or tune local models from config without proxy shims. Fixes #48010. Thanks @tangzhi, @pandego, @maweibin, @Adam-Researchh, and @EmpireCreator.
|
||||
- Providers/Ollama: expose native Ollama thinking effort levels so `/think max` is accepted for reasoning-capable Ollama models and maps to Ollama's highest supported `think` effort. Fixes #71584. Thanks @g0st1n.
|
||||
- Providers/Ollama: strip the active custom Ollama provider prefix before native chat and embedding requests, so custom provider ids like `ollama-spark/qwen3:32b` reach Ollama as the real model name. Fixes #72353. Thanks @maximus-dss and @hclsys.
|
||||
- Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge.
|
||||
|
||||
@@ -403,6 +403,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
|
||||
You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`.
|
||||
|
||||
Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models.
|
||||
|
||||
```json5
|
||||
{
|
||||
models: {
|
||||
@@ -415,6 +417,9 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
maxTokens: 65536,
|
||||
params: {
|
||||
num_ctx: 32768,
|
||||
temperature: 0.7,
|
||||
top_p: 0.9,
|
||||
thinking: false,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -26,7 +26,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
|
||||
let payload:
|
||||
| {
|
||||
model?: string;
|
||||
options?: { num_ctx?: number };
|
||||
think?: boolean;
|
||||
options?: { num_ctx?: number; top_p?: number };
|
||||
tools?: Array<{
|
||||
function?: {
|
||||
parameters?: {
|
||||
@@ -43,7 +44,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
|
||||
api: "ollama",
|
||||
provider: PROVIDER_ID,
|
||||
contextWindow: 8192,
|
||||
params: { num_ctx: 4096 },
|
||||
params: { num_ctx: 4096, top_p: 0.9, thinking: false },
|
||||
} as never,
|
||||
{
|
||||
messages: [{ role: "user", content: "Reply exactly OK." }],
|
||||
@@ -82,6 +83,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
|
||||
expect(events.some((event) => (event as { type?: string }).type === "done")).toBe(true);
|
||||
expect(payload?.model).toBe(CHAT_MODEL);
|
||||
expect(payload?.options?.num_ctx).toBe(4096);
|
||||
expect(payload?.options?.top_p).toBe(0.9);
|
||||
expect(payload?.think).toBe(false);
|
||||
const properties = payload?.tools?.[0]?.function?.parameters?.properties;
|
||||
expect(properties?.city?.type).toBe("string");
|
||||
expect(properties?.units?.type).toBe("string");
|
||||
|
||||
@@ -919,6 +919,7 @@ async function createOllamaTestStream(params: {
|
||||
options?: {
|
||||
apiKey?: string;
|
||||
maxTokens?: number;
|
||||
temperature?: number;
|
||||
signal?: AbortSignal;
|
||||
headers?: Record<string, string>;
|
||||
};
|
||||
@@ -1205,7 +1206,17 @@ describe("createOllamaStreamFn", () => {
|
||||
async (fetchMock) => {
|
||||
const stream = await createOllamaTestStream({
|
||||
baseUrl: "http://ollama-host:11434",
|
||||
model: { params: { num_ctx: 32768 }, contextWindow: 131072 },
|
||||
model: {
|
||||
params: {
|
||||
num_ctx: 32768,
|
||||
temperature: 0.2,
|
||||
top_p: 0.9,
|
||||
thinking: false,
|
||||
streaming: false,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
},
|
||||
options: { temperature: 0.7, maxTokens: 55 },
|
||||
});
|
||||
|
||||
const events = await collectStreamEvents(stream);
|
||||
@@ -1216,9 +1227,21 @@ describe("createOllamaStreamFn", () => {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
options: { num_ctx?: number };
|
||||
think?: boolean;
|
||||
options: {
|
||||
num_ctx?: number;
|
||||
num_predict?: number;
|
||||
temperature?: number;
|
||||
top_p?: number;
|
||||
streaming?: boolean;
|
||||
};
|
||||
};
|
||||
expect(requestBody.options.num_ctx).toBe(32768);
|
||||
expect(requestBody.options.num_predict).toBe(55);
|
||||
expect(requestBody.options.temperature).toBe(0.7);
|
||||
expect(requestBody.options.top_p).toBe(0.9);
|
||||
expect(requestBody.options.streaming).toBeUndefined();
|
||||
expect(requestBody.think).toBe(false);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@@ -152,7 +152,31 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
});
|
||||
}
|
||||
|
||||
type OllamaThinkValue = boolean | "low" | "medium" | "high";
|
||||
type OllamaThinkValue = boolean | "low" | "medium" | "high" | "max";
|
||||
|
||||
const OLLAMA_OPTION_PARAM_KEYS = new Set([
|
||||
"num_keep",
|
||||
"seed",
|
||||
"num_predict",
|
||||
"top_k",
|
||||
"top_p",
|
||||
"min_p",
|
||||
"typical_p",
|
||||
"repeat_last_n",
|
||||
"temperature",
|
||||
"repeat_penalty",
|
||||
"presence_penalty",
|
||||
"frequency_penalty",
|
||||
"stop",
|
||||
"num_ctx",
|
||||
"num_batch",
|
||||
"num_gpu",
|
||||
"main_gpu",
|
||||
"use_mmap",
|
||||
"num_thread",
|
||||
]);
|
||||
|
||||
const OLLAMA_TOP_LEVEL_PARAM_KEYS = new Set(["format", "keep_alive", "truncate", "shift"]);
|
||||
|
||||
function createOllamaThinkingWrapper(
|
||||
baseFn: StreamFn | undefined,
|
||||
@@ -181,6 +205,22 @@ function resolveOllamaThinkValue(thinkingLevel: unknown): OllamaThinkValue | und
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOllamaThinkParamValue(
|
||||
params: Record<string, unknown> | undefined,
|
||||
): OllamaThinkValue | undefined {
|
||||
const raw = params?.think ?? params?.thinking;
|
||||
if (typeof raw === "boolean") {
|
||||
return raw;
|
||||
}
|
||||
if (raw === "off") {
|
||||
return false;
|
||||
}
|
||||
if (raw === "low" || raw === "medium" || raw === "high" || raw === "max") {
|
||||
return raw;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOllamaConfiguredNumCtx(model: ProviderRuntimeModel): number | undefined {
|
||||
const raw = model.params?.num_ctx;
|
||||
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
|
||||
@@ -196,6 +236,39 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
|
||||
);
|
||||
}
|
||||
|
||||
function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
|
||||
const options: Record<string, unknown> = {};
|
||||
const params = model.params;
|
||||
if (params && typeof params === "object" && !Array.isArray(params)) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
if (value !== undefined && OLLAMA_OPTION_PARAM_KEYS.has(key)) {
|
||||
options[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
options.num_ctx = resolveOllamaNumCtx(model);
|
||||
return options;
|
||||
}
|
||||
|
||||
function resolveOllamaTopLevelParams(
|
||||
model: ProviderRuntimeModel,
|
||||
): Record<string, unknown> | undefined {
|
||||
const requestParams: Record<string, unknown> = {};
|
||||
const params = model.params;
|
||||
if (params && typeof params === "object" && !Array.isArray(params)) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
if (value !== undefined && OLLAMA_TOP_LEVEL_PARAM_KEYS.has(key)) {
|
||||
requestParams[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
const think = resolveOllamaThinkParamValue(params);
|
||||
if (think !== undefined) {
|
||||
requestParams.think = think;
|
||||
}
|
||||
return Object.keys(requestParams).length > 0 ? requestParams : undefined;
|
||||
}
|
||||
|
||||
function isOllamaCloudKimiModelRef(modelId: string): boolean {
|
||||
const normalizedModelId = normalizeLowercaseStringOrEmpty(modelId);
|
||||
return normalizedModelId.startsWith("kimi-k") && normalizedModelId.includes(":cloud");
|
||||
@@ -257,6 +330,7 @@ export function buildOllamaChatRequest(params: {
|
||||
messages: OllamaChatMessage[];
|
||||
tools?: OllamaTool[];
|
||||
options?: Record<string, unknown>;
|
||||
requestParams?: Record<string, unknown>;
|
||||
stream?: boolean;
|
||||
}): OllamaChatRequest {
|
||||
return {
|
||||
@@ -265,6 +339,7 @@ export function buildOllamaChatRequest(params: {
|
||||
stream: params.stream ?? true,
|
||||
...(params.tools && params.tools.length > 0 ? { tools: params.tools } : {}),
|
||||
...(params.options ? { options: params.options } : {}),
|
||||
...params.requestParams,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -754,7 +829,7 @@ export function createOllamaStreamFn(
|
||||
);
|
||||
const ollamaTools = extractOllamaTools(context.tools);
|
||||
|
||||
const ollamaOptions: Record<string, unknown> = { num_ctx: resolveOllamaNumCtx(model) };
|
||||
const ollamaOptions: Record<string, unknown> = resolveOllamaModelOptions(model);
|
||||
if (typeof options?.temperature === "number") {
|
||||
ollamaOptions.temperature = options.temperature;
|
||||
}
|
||||
@@ -769,6 +844,7 @@ export function createOllamaStreamFn(
|
||||
stream: true,
|
||||
tools: ollamaTools,
|
||||
options: ollamaOptions,
|
||||
requestParams: resolveOllamaTopLevelParams(model),
|
||||
});
|
||||
options?.onPayload?.(body, model);
|
||||
const headers: Record<string, string> = {
|
||||
|
||||
Reference in New Issue
Block a user