From c1b9af27707119591cebea1916ff5710b1e35486 Mon Sep 17 00:00:00 2001 From: Chunyue Wang <80630709+openperf@users.noreply.github.com> Date: Sun, 3 May 2026 19:47:36 +0800 Subject: [PATCH] fix(ollama): restore catalog-driven num_ctx for native /api/chat (#76181) Merged via squash. Prepared head SHA: d3142252d5a3a7b3ba0973d0cd54a5d9aa3e9e54 Co-authored-by: openperf <80630709+openperf@users.noreply.github.com> Co-authored-by: openperf <80630709+openperf@users.noreply.github.com> Reviewed-by: @openperf --- CHANGELOG.md | 1 + extensions/ollama/src/stream-runtime.test.ts | 93 +++++++++++++- extensions/ollama/src/stream.ts | 30 ++++- src/agents/pi-embedded-runner/run/attempt.ts | 1 + .../run/llm-idle-timeout.test.ts | 115 ++++++++++++++++++ .../run/llm-idle-timeout.ts | 87 +++++++++++++ 6 files changed, 322 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 903b41f5147..ae13e16276d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -144,6 +144,7 @@ Docs: https://docs.openclaw.ai - Slack: allow draft preview streaming in top-level DMs when `replyToMode` is `off` while keeping Slack native streaming and assistant thread status gated on reply threads. Fixes #56480. (#56544) Thanks @HangGlidersRule. - Control UI/chat: remove the delete-confirm popover outside-click listener on every dismiss path, so Cancel, Delete, outside clicks, and same-button toggles no longer leave stale document listeners behind. Refs #75590 and #69982. Thanks @Ricardo-M-L. - Memory-core: treat exhausted file watcher limits as non-fatal for builtin memory auto-sync while preserving fatal handling for unrelated disk-full errors. (#73357) Thanks @solodmd. +- Providers/Ollama: restore catalog context-window forwarding as `num_ctx` for native `/api/chat` requests; fixes tool selection and context truncation regressions on models with catalog entries (qwen3, llama3, gemma3, …) when no explicit `params.num_ctx` was configured. Fixes #76117. (#76181) Thanks @openperf. ## 2026.5.2 diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts index a5176643e96..1d4dea85289 100644 --- a/extensions/ollama/src/stream-runtime.test.ts +++ b/extensions/ollama/src/stream-runtime.test.ts @@ -208,7 +208,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { }; expect(requestBody.think).toBe(false); expect(requestBody.options?.think).toBeUndefined(); - expect(requestBody.options?.num_ctx).toBeUndefined(); + expect(requestBody.options?.num_ctx).toBe(131072); }, ); }); @@ -310,7 +310,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { }; expect(requestBody.think).toBe("low"); expect(requestBody.options?.think).toBeUndefined(); - expect(requestBody.options?.num_ctx).toBeUndefined(); + expect(requestBody.options?.num_ctx).toBe(131072); }, ); }); @@ -405,7 +405,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { }; expect(requestBody.think).toBe("high"); expect(requestBody.options?.think).toBeUndefined(); - expect(requestBody.options?.num_ctx).toBeUndefined(); + expect(requestBody.options?.num_ctx).toBe(131072); }, ); }); @@ -1602,7 +1602,9 @@ describe("createOllamaStreamFn", () => { if (!requestBody.options) { throw new Error("Expected Ollama request options"); } - expect(requestBody.options?.num_ctx).toBeUndefined(); + // Catalog `contextWindow` flows through as `num_ctx` so the request + // does not silently truncate to Ollama's small Modelfile default. + expect(requestBody.options?.num_ctx).toBe(131072); expect(requestBody.options.num_predict).toBe(123); }, ); @@ -1657,6 +1659,89 @@ describe("createOllamaStreamFn", () => { ); }); + it("omits num_ctx when the model has no params.num_ctx and no catalog window", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const stream = await createOllamaTestStream({ + baseUrl: "http://ollama-host:11434", + // Override the helper default contextWindow back to undefined so the + // request body should leave Ollama's Modelfile to decide num_ctx. + model: { contextWindow: undefined }, + }); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + options?: { num_ctx?: number }; + }; + expect(requestBody.options?.num_ctx).toBeUndefined(); + }, + ); + }); + + it("falls back to catalog contextWindow as num_ctx when params.num_ctx is unset", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const stream = await createOllamaTestStream({ + baseUrl: "http://ollama-host:11434", + model: { contextWindow: 32768 }, + }); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + options?: { num_ctx?: number }; + }; + expect(requestBody.options?.num_ctx).toBe(32768); + }, + ); + }); + + it("falls back to catalog maxTokens as num_ctx when contextWindow is absent", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const stream = await createOllamaTestStream({ + baseUrl: "http://ollama-host:11434", + // The helper default contextWindow is overridden back to undefined so + // the right side of `model.contextWindow ?? model.maxTokens` is the + // load-bearing branch. + model: { contextWindow: undefined, maxTokens: 65536 }, + }); + + await collectStreamEvents(stream); + + const requestInit = getGuardedFetchCall(fetchMock).init ?? {}; + if (typeof requestInit.body !== "string") { + throw new Error("Expected string request body"); + } + const requestBody = JSON.parse(requestInit.body) as { + options?: { num_ctx?: number }; + }; + expect(requestBody.options?.num_ctx).toBe(65536); + }, + ); + }); + it("maps configured native Ollama params.thinking=max to the stable top-level think value", async () => { await withMockNdjsonFetch( [ diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts index e6348e736b1..440fc732f64 100644 --- a/extensions/ollama/src/stream.ts +++ b/extensions/ollama/src/stream.ts @@ -290,6 +290,34 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number { ); } +/** + * Resolves num_ctx for native /api/chat requests: + * 1. explicit `params.num_ctx` set on the model wins, + * 2. otherwise the catalog `contextWindow` / `maxTokens` is forwarded so + * OpenClaw's known model windows survive the trip and `/api/chat` does + * not silently truncate to Ollama's small Modelfile default (typically + * 2048 tokens) — which is too small for a system prompt plus tool + * definitions and produces "model picks wrong tools / says nonsense" + * symptoms on agent turns, + * 3. when neither is known, return undefined so the Modelfile decides. + * + * This intentionally differs from `resolveOllamaNumCtx` by not falling back + * to `DEFAULT_CONTEXT_TOKENS`: that constant is a sane wrapper-side guess for + * the OpenAI-compat path, but on the native path we prefer to leave num_ctx + * absent rather than guess a window for an unknown model. + */ +function resolveOllamaNativeNumCtx(model: ProviderRuntimeModel): number | undefined { + const configured = resolveOllamaConfiguredNumCtx(model); + if (configured !== undefined) { + return configured; + } + const catalog = model.contextWindow ?? model.maxTokens; + if (typeof catalog === "number" && Number.isFinite(catalog) && catalog > 0) { + return Math.floor(catalog); + } + return undefined; +} + function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record { const options: Record = {}; const params = model.params; @@ -303,7 +331,7 @@ function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record 0) { activeSession.agent.streamFn = streamWithIdleTimeout( diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts index a750667dc8d..151efee66c5 100644 --- a/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts +++ b/src/agents/pi-embedded-runner/run/llm-idle-timeout.test.ts @@ -85,6 +85,121 @@ describe("resolveLlmIdleTimeoutMs", () => { const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig; expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); }); + + it.each([ + "http://localhost:11434", + "http://127.0.0.1:11434", + "http://127.0.0.2:11434", + "http://127.255.255.254:11434", + "http://0.0.0.0:11434", + "http://[::1]:11434", + "http://my-rig.local:11434", + "http://10.0.0.5:11434", + "http://172.16.5.10:11434", + "http://172.31.99.1:11434", + "http://192.168.1.20:11434", + "http://100.64.0.5:11434", + "http://100.127.255.254:11434", + // RFC 4193 IPv6 unique local (Tailscale IPv6 mesh fd7a:115c:a1e0::/48 + // falls inside fc00::/7). + "http://[fc00::1]:11434", + "http://[fd00::1]:11434", + "http://[fd7a:115c:a1e0::dead:beef]:11434", + "http://[fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]:11434", + // RFC 4291 IPv6 link-local. + "http://[fe80::1]:11434", + "http://[fe9a::1]:11434", + "http://[feab:cd::1]:11434", + "http://[febf::1]:11434", + ])("disables the default idle watchdog for local provider baseUrl %s", (baseUrl) => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(0); + }); + + it.each([ + "http://172.32.0.1:11434", + "http://192.169.1.1:11434", + "http://100.63.255.254:11434", + "http://100.128.0.1:11434", + ])("keeps the default idle watchdog for non-private IPv4 baseUrl %s", (baseUrl) => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + }); + + // Node's URL parser normalizes every IPv4-mapped loopback form + // (`::ffff:127.0.0.1`, `::ffff:7F00:1`, mixed case, …) to the canonical + // `::ffff:7f00:1`. Exercise the user-facing input shapes here so the full + // parse → lowercase → bracket-strip → exact-match chain is regression-tested + // against future URL parser behavior, not just the canonical literal. + it.each([ + "http://[::ffff:127.0.0.1]:11434", + "http://[::ffff:7f00:1]:11434", + "http://[::FFFF:127.0.0.1]:11434", + ])("disables the default idle watchdog for IPv4-mapped loopback baseUrl %s", (baseUrl) => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(0); + }); + + it.each([ + // Just outside fc00::/7 (fe.. and 00fc::/16 are not unique-local). + "http://[fec0::1]:11434", + "http://[fbff::1]:11434", + // Just outside fe80::/10 (fec0:: was deprecated site-local, fe7f:: not LL). + "http://[fe7f::1]:11434", + // Public IPv6. + "http://[2001:db8::1]:11434", + // Abbreviated `fc::1` expands to 00fc:0:0:...:1, first byte is 0x00, not + // 0xfc — outside fc00::/7. Strict first-hextet match keeps this remote. + "http://[fc::1]:11434", + // IPv4-mapped IPv6 outside loopback (private RFC 1918 in mapped form is + // intentionally not matched, mirroring the SSRF policy helper). + "http://[::ffff:10.0.0.5]:11434", + "http://[::ffff:192.168.1.20]:11434", + ])("keeps the default idle watchdog for non-private IPv6 baseUrl %s", (baseUrl) => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + }); + + it.each([ + "http://10.0.0.5evil:11434", + "http://127.0.0.1foo:11434", + "http://192.168.1.20attacker.com:11434", + "http://10.0.0.5.evil.com:11434", + "http://1.2.3.4.5:11434", + ])( + "keeps the default idle watchdog for numeric-looking hostnames that are not IPv4 literals (%s)", + (baseUrl) => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + }, + ); + + it("keeps the default idle watchdog for remote provider baseUrls", () => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "https://api.openai.com/v1" } })).toBe( + DEFAULT_LLM_IDLE_TIMEOUT_MS, + ); + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "https://ollama.com" } })).toBe( + DEFAULT_LLM_IDLE_TIMEOUT_MS, + ); + }); + + it("ignores malformed baseUrl and keeps the default idle watchdog", () => { + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "not-a-url" } })).toBe( + DEFAULT_LLM_IDLE_TIMEOUT_MS, + ); + expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "" } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS); + }); + + it("still honors an explicit provider request timeout for local providers", () => { + expect( + resolveLlmIdleTimeoutMs({ + model: { baseUrl: "http://127.0.0.1:11434" }, + modelRequestTimeoutMs: 600_000, + }), + ).toBe(600_000); + }); + + it("still applies agents.defaults.timeoutSeconds cap for local providers", () => { + const cfg = { agents: { defaults: { timeoutSeconds: 30 } } } as OpenClawConfig; + expect(resolveLlmIdleTimeoutMs({ cfg, model: { baseUrl: "http://127.0.0.1:11434" } })).toBe( + 30_000, + ); + }); }); describe("streamWithIdleTimeout", () => { diff --git a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts index 2d680247894..a282ce90801 100644 --- a/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts +++ b/src/agents/pi-embedded-runner/run/llm-idle-timeout.ts @@ -15,6 +15,82 @@ export const DEFAULT_LLM_IDLE_TIMEOUT_MS = DEFAULT_LLM_IDLE_TIMEOUT_SECONDS * 10 */ const MAX_SAFE_TIMEOUT_MS = 2_147_000_000; +/** + * Detects loopback / private-network / `.local` base URLs. Local providers + * (Ollama, LM Studio, llama.cpp) legitimately stay silent for many minutes + * during prompt evaluation and thinking, so the network-silence-as-hang + * heuristic that motivates the default idle watchdog does not apply. + * + * Coverage scope: + * - IPv4 loopback (RFC 5735, full 127/8), RFC 1918 private, RFC 6598 shared + * CGNAT (100.64/10 — Tailscale/Headscale IPv4 mesh), `0.0.0.0`, `localhost`, + * and `*.local` mDNS (RFC 6762). + * - IPv6 loopback `::1`, IPv6 unique local `fc00::/7` (RFC 4193 — Tailscale's + * IPv6 mesh `fd7a:115c:a1e0::/48` falls in this range), and IPv6 link-local + * `fe80::/10` (RFC 4291). + * - IPv4-mapped IPv6 covers loopback only (`::ffff:127.0.0.1`, + * `::ffff:7f00:1`); private IPv4 in mapped form is intentionally not + * matched, mirroring the SSRF-policy helper in + * `src/cron/isolated-agent/model-preflight.runtime.ts`. + * - DNS-resolved local aliases (e.g. an `/etc/hosts` entry mapping a custom + * hostname to a private IP) are not detected: classification keys on + * `URL.hostname` so resolution would have to happen here, and adding + * sync/async DNS to the watchdog hot path is disproportionate. Affected + * users can use the IP directly or set + * `models.providers..timeoutSeconds` explicitly. + */ +function isLocalProviderBaseUrl(baseUrl: string): boolean { + let host: string; + try { + host = new URL(baseUrl).hostname.toLowerCase(); + } catch { + return false; + } + if (host.startsWith("[") && host.endsWith("]")) { + host = host.slice(1, -1); + } + if ( + host === "localhost" || + host === "0.0.0.0" || + host === "::1" || + host === "::ffff:7f00:1" || + host === "::ffff:127.0.0.1" || + host.endsWith(".local") + ) { + return true; + } + // IPv6 unique local (RFC 4193, fc00::/7) and link-local (RFC 4291, + // fe80::/10). The full first hextet is required so an abbreviated `fc::1` + // (which expands to `00fc:0:0:...` and is therefore not in fc00::/7) + // correctly stays on the cloud path. The first regex requires four hex + // digits then a colon; a zone identifier such as `fe80::1%eth0` is fine + // because the prefix still matches at the start. + if (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host)) { + return true; + } + // Require a strict IPv4 literal before parsing; `Number.parseInt` is + // permissive and would otherwise let `10.0.0.5evil` parse to [10,0,0,5] + // and disable the watchdog for a non-IP hostname. + if (!/^\d+\.\d+\.\d+\.\d+$/.test(host)) { + return false; + } + const octets = host.split(".").map((part) => Number.parseInt(part, 10)); + if (octets.some((p) => !Number.isInteger(p) || p < 0 || p > 255)) { + return false; + } + const [a, b] = octets; + // RFC 5735 loopback (127/8 — full range, not just .0.1; container/sandbox + // setups commonly bind 127.0.0.2+), RFC 1918 private IPv4, and RFC 6598 + // shared CGNAT (100.64/10 — used by Tailscale and similar mesh VPNs). + return ( + a === 127 || + a === 10 || + (a === 172 && b !== undefined && b >= 16 && b <= 31) || + (a === 192 && b === 168) || + (a === 100 && b !== undefined && b >= 64 && b <= 127) + ); +} + /** * Resolves the LLM idle timeout from configuration. * @returns Idle timeout in milliseconds, or 0 to disable @@ -24,6 +100,7 @@ export function resolveLlmIdleTimeoutMs(params?: { trigger?: EmbeddedRunTrigger; runTimeoutMs?: number; modelRequestTimeoutMs?: number; + model?: { baseUrl?: string }; }): number { const clampTimeoutMs = (valueMs: number) => Math.min(Math.floor(valueMs), MAX_SAFE_TIMEOUT_MS); const clampImplicitTimeoutMs = (valueMs: number) => @@ -72,6 +149,16 @@ export function resolveLlmIdleTimeoutMs(params?: { return 0; } + // The default watchdog is a network-silence-as-hang guard for cloud providers. + // Local providers can legitimately stream nothing for many minutes during + // prompt evaluation or thinking, so falling back to the default would abort + // valid local runs. Honor it only when the user has not opted out via the + // baseUrl pointing at loopback / private-network / `.local`. + const baseUrl = params?.model?.baseUrl; + if (typeof baseUrl === "string" && baseUrl.length > 0 && isLocalProviderBaseUrl(baseUrl)) { + return 0; + } + return DEFAULT_LLM_IDLE_TIMEOUT_MS; }