diff --git a/CHANGELOG.md b/CHANGELOG.md index 686a3c02e16..79dd190cd7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ Docs: https://docs.openclaw.ai - Providers/Ollama: preserve explicit configured model input modalities when merging discovered provider metadata so custom vision models keep image support instead of silently dropping attachments. Fixes #39690; carries forward #39785. Thanks @Skrblik and @Mriris. - Providers/Ollama: estimate native Ollama transcript usage when `/api/chat` omits prompt/eval counters while preserving exact zero counters, keeping local model runs visible in usage surfaces. Carries forward #39112. Thanks @TylonHH. - Agents/Ollama: retry native Ollama turns that finish without user-visible text, including unsigned thinking-only responses, so constrained reasoning turns can continue instead of surfacing an empty reply. Carries forward #66552 and #61223. Thanks @yfge and @L3G. +- Docs/Ollama: expand setup recipes for local, LAN, cloud, multi-host, web search, embeddings, thinking control, and large-context troubleshooting. Thanks @codex. - Providers/PDF/Ollama: add bounded network timeouts for Ollama model pulls and native Anthropic/Gemini PDF analysis requests so unresponsive provider endpoints no longer hang sessions indefinitely. Fixes #54142; supersedes #54144 and #54145. Thanks @jinduwang1001-max and @arkyu2077. - LLM Task/Ollama: accept model overrides that already include the selected provider prefix, avoiding doubled ids such as `ollama/ollama/llama3.2:latest`, and live-verify local Ollama JSON tasks return parsed output. Fixes #50052. Thanks @ralphy-maplebots and @Hollychou924. - Memory/doctor: treat Ollama memory embeddings as key-optional so `openclaw doctor` no longer warns about a missing API key when the gateway reports embeddings are ready. Fixes #46584. Thanks @fengly78. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index d1dfa0813b1..ee5a81c35ad 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -335,6 +335,222 @@ OpenClaw rejects image-description requests for models that are not marked image +## Common recipes + +Use these as starting points and replace model IDs with the exact names from `ollama list` or `openclaw models list --provider ollama`. + + + + Use this when Ollama runs on the same machine as the Gateway and you want OpenClaw to discover the installed models automatically. + + ```bash + ollama serve + ollama pull gemma4 + export OLLAMA_API_KEY="ollama-local" + openclaw models list --provider ollama + openclaw models set ollama/gemma4 + ``` + + This path keeps config minimal. Do not add a `models.providers.ollama` block unless you want to define models manually. + + + + + Use native Ollama URLs for LAN hosts. Do not add `/v1`. + + ```json5 + { + models: { + providers: { + ollama: { + baseUrl: "http://gpu-box.local:11434", + apiKey: "ollama-local", + api: "ollama", + timeoutSeconds: 300, + contextWindow: 32768, + maxTokens: 8192, + models: [ + { + id: "qwen3.5:9b", + name: "qwen3.5:9b", + reasoning: true, + input: ["text"], + params: { + num_ctx: 32768, + keep_alive: "15m", + }, + }, + ], + }, + }, + }, + agents: { + defaults: { + model: { primary: "ollama/qwen3.5:9b" }, + }, + }, + } + ``` + + `contextWindow` is the OpenClaw-side context budget. `params.num_ctx` is sent to Ollama for the request. Keep them aligned when your hardware cannot run the model's full advertised context. + + + + + Use this when you do not run a local daemon and want hosted Ollama models directly. + + ```bash + export OLLAMA_API_KEY="your-ollama-api-key" + ``` + + ```json5 + { + models: { + providers: { + ollama: { + baseUrl: "https://ollama.com", + apiKey: "OLLAMA_API_KEY", + api: "ollama", + models: [ + { + id: "kimi-k2.5:cloud", + name: "kimi-k2.5:cloud", + reasoning: false, + input: ["text", "image"], + contextWindow: 128000, + maxTokens: 8192, + }, + ], + }, + }, + }, + agents: { + defaults: { + model: { primary: "ollama/kimi-k2.5:cloud" }, + }, + }, + } + ``` + + + + + Use this when a local or LAN Ollama daemon is signed in with `ollama signin` and should serve both local models and `:cloud` models. + + ```bash + ollama signin + ollama pull gemma4 + ``` + + ```json5 + { + models: { + providers: { + ollama: { + baseUrl: "http://127.0.0.1:11434", + apiKey: "ollama-local", + api: "ollama", + timeoutSeconds: 300, + models: [ + { id: "gemma4", name: "gemma4", input: ["text"] }, + { id: "kimi-k2.5:cloud", name: "kimi-k2.5:cloud", input: ["text", "image"] }, + ], + }, + }, + }, + agents: { + defaults: { + model: { + primary: "ollama/gemma4", + fallbacks: ["ollama/kimi-k2.5:cloud"], + }, + }, + }, + } + ``` + + + + + Use custom provider IDs when you have more than one Ollama server. Each provider gets its own host, models, auth, timeout, and model refs. + + ```json5 + { + models: { + providers: { + "ollama-fast": { + baseUrl: "http://mini.local:11434", + apiKey: "ollama-local", + api: "ollama", + contextWindow: 32768, + models: [{ id: "gemma4", name: "gemma4", input: ["text"] }], + }, + "ollama-large": { + baseUrl: "http://gpu-box.local:11434", + apiKey: "ollama-local", + api: "ollama", + timeoutSeconds: 420, + contextWindow: 131072, + maxTokens: 16384, + models: [{ id: "qwen3.5:27b", name: "qwen3.5:27b", input: ["text"] }], + }, + }, + }, + agents: { + defaults: { + model: { + primary: "ollama-fast/gemma4", + fallbacks: ["ollama-large/qwen3.5:27b"], + }, + }, + }, + } + ``` + + When OpenClaw sends the request, the active provider prefix is stripped so `ollama-large/qwen3.5:27b` reaches Ollama as `qwen3.5:27b`. + + + + + Some local models can answer simple prompts but struggle with the full agent tool surface. Start by limiting tools and context before changing global runtime settings. + + ```json5 + { + agents: { + defaults: { + experimental: { + localModelLean: true, + }, + model: { primary: "ollama/gemma4" }, + }, + }, + models: { + providers: { + ollama: { + baseUrl: "http://127.0.0.1:11434", + apiKey: "ollama-local", + api: "ollama", + contextWindow: 32768, + models: [ + { + id: "gemma4", + name: "gemma4", + input: ["text"], + params: { num_ctx: 32768 }, + compat: { supportsTools: false }, + }, + ], + }, + }, + }, + } + ``` + + Use `compat.supportsTools: false` only when the model or server reliably fails on tool schemas. It trades agent capability for stability. + + + + ### Model selection Once configured, all your Ollama models are available: @@ -383,6 +599,24 @@ headers, body streaming, and the total guarded-fetch abort. `params.keep_alive` is forwarded to Ollama as top-level `keep_alive` on native `/api/chat` requests; set it per model when first-turn load time is the bottleneck. +### Quick verification + +```bash +# Ollama daemon visible to this machine +curl http://127.0.0.1:11434/api/tags + +# OpenClaw catalog and selected model +openclaw models list --provider ollama +openclaw models status + +# Direct model smoke +openclaw infer model run \ + --model ollama/gemma4 \ + --prompt "Reply with exactly: ok" +``` + +For remote hosts, replace `127.0.0.1` with the host used in `baseUrl`. If `curl` works but OpenClaw does not, check whether the Gateway runs on a different machine, container, or service account. + ## Ollama Web Search OpenClaw supports **Ollama Web Search** as a bundled `web_search` provider. @@ -407,6 +641,30 @@ Choose **Ollama Web Search** during `openclaw onboard` or `openclaw configure -- } ``` +For direct hosted search through Ollama Cloud: + +```json5 +{ + models: { + providers: { + ollama: { + baseUrl: "https://ollama.com", + apiKey: "OLLAMA_API_KEY", + api: "ollama", + models: [{ id: "kimi-k2.5:cloud", name: "kimi-k2.5:cloud", input: ["text"] }], + }, + }, + }, + tools: { + web: { + search: { provider: "ollama" }, + }, + }, +} +``` + +For a signed-in local daemon, OpenClaw uses the daemon's `/api/experimental/web_search` proxy. For `https://ollama.com`, it calls the hosted `/api/web_search` endpoint directly. + For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-search). @@ -495,6 +753,34 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s + + For native Ollama models, OpenClaw forwards thinking control as Ollama expects it: top-level `think`, not `options.think`. + + ```bash + openclaw agent --model ollama/gemma4 --thinking off + openclaw agent --model ollama/gemma4 --thinking low + ``` + + You can also set a model default: + + ```json5 + { + agents: { + defaults: { + models: { + "ollama/gemma4": { + thinking: "low", + }, + }, + }, + }, + } + ``` + + Per-model `params.think` or `params.thinking` can disable or force Ollama API thinking for a specific configured model. Runtime commands such as `/think off` still apply to the active run. + + + OpenClaw treats models with names such as `deepseek-r1`, `reasoning`, or `think` as reasoning-capable by default. @@ -533,6 +819,25 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s } ``` + For a remote embedding host, keep auth scoped to that host: + + ```json5 + { + agents: { + defaults: { + memorySearch: { + provider: "ollama", + remote: { + baseUrl: "http://gpu-box.local:11434", + model: "nomic-embed-text", + apiKey: "ollama-local", + }, + }, + }, + }, + } + ``` + @@ -590,6 +895,45 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s + + Verify from the same machine and runtime that runs the Gateway: + + ```bash + openclaw gateway status --deep + curl http://ollama-host:11434/api/tags + ``` + + Common causes: + + - `baseUrl` points at `localhost`, but the Gateway runs in Docker or on another host. + - The URL uses `/v1`, which selects OpenAI-compatible behavior instead of native Ollama. + - The remote host needs firewall or LAN binding changes on the Ollama side. + - The model is present on your laptop's daemon but not on the remote daemon. + + + + + This usually means the provider is using OpenAI-compatible mode or the model cannot handle tool schemas. + + Prefer native Ollama mode: + + ```json5 + { + models: { + providers: { + ollama: { + baseUrl: "http://ollama-host:11434", + api: "ollama", + }, + }, + }, + } + ``` + + If a small local model still fails on tool schemas, set `compat.supportsTools: false` on that model entry and retest. + + + Large local models can need a long first load before streaming begins. Keep the timeout scoped to the Ollama provider, and optionally ask Ollama to keep the model loaded between turns: @@ -615,6 +959,33 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s If the host itself is slow to accept connections, `timeoutSeconds` also extends the guarded Undici connect timeout for this provider. + + + Many Ollama models advertise contexts that are larger than your hardware can run comfortably. Cap both OpenClaw's budget and Ollama's request context: + + ```json5 + { + models: { + providers: { + ollama: { + contextWindow: 32768, + maxTokens: 8192, + models: [ + { + id: "qwen3.5:9b", + name: "qwen3.5:9b", + params: { num_ctx: 32768 }, + }, + ], + }, + }, + }, + } + ``` + + Lower `contextWindow` first if the prompt ingestion phase is slow. Lower `maxTokens` if generation runs too long. + + diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index e7268b5787c..e43ba2f5cee 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -109,6 +109,8 @@ const GEMINI_INCOMPLETE_TURN_PROVIDER_IDS = new Set([ "google-gemini-cli", ]); const GEMINI_INCOMPLETE_TURN_MODEL_ID_PATTERN = /^gemini(?:[.-]|$)/; +// Ollama native `/api/chat` can finish with only thinking/internal blocks when +// constrained, but it should not inherit the stricter planning-only/ack prompts. const OLLAMA_INCOMPLETE_TURN_PROVIDER_ID_PATTERN = /^ollama(?:-|$)/; const DEFAULT_PLANNING_ONLY_RETRY_LIMIT = 1; const STRICT_AGENTIC_PLANNING_ONLY_RETRY_LIMIT = 2; @@ -550,6 +552,9 @@ export function resolveEmptyResponseRetryInstruction(params: { modelId: params.modelId, executionContract: params.executionContract, }) || + // Keep the generic zero-usage stop retry for providers that expose a + // provider-neutral "nothing was generated" signal, even outside the + // provider allowlist above. isZeroUsageEmptyStopAssistantTurn( params.attempt.currentAttemptAssistant ?? params.attempt.lastAssistant ?? null, ) @@ -582,6 +587,10 @@ function shouldApplyNonVisibleTurnRetryGuard(params: { if (shouldApplyPlanningOnlyRetryGuard(params)) { return true; } + // Non-visible final turns are narrower than planning-only turns: there is no + // user text to classify, just a replay-safe empty/thinking-only result. Ollama + // gets this continuation guard without getting the planning-only or ack + // fast-path wording, which would be too opinionated for local models. return OLLAMA_INCOMPLETE_TURN_PROVIDER_ID_PATTERN.test( normalizeLowercaseStringOrEmpty(params.provider ?? ""), );