mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
fix(ollama): restore catalog-driven num_ctx for native /api/chat (#76181)
Merged via squash.
Prepared head SHA: d3142252d5
Co-authored-by: openperf <80630709+openperf@users.noreply.github.com>
Co-authored-by: openperf <80630709+openperf@users.noreply.github.com>
Reviewed-by: @openperf
This commit is contained in:
@@ -144,6 +144,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Slack: allow draft preview streaming in top-level DMs when `replyToMode` is `off` while keeping Slack native streaming and assistant thread status gated on reply threads. Fixes #56480. (#56544) Thanks @HangGlidersRule.
|
||||
- Control UI/chat: remove the delete-confirm popover outside-click listener on every dismiss path, so Cancel, Delete, outside clicks, and same-button toggles no longer leave stale document listeners behind. Refs #75590 and #69982. Thanks @Ricardo-M-L.
|
||||
- Memory-core: treat exhausted file watcher limits as non-fatal for builtin memory auto-sync while preserving fatal handling for unrelated disk-full errors. (#73357) Thanks @solodmd.
|
||||
- Providers/Ollama: restore catalog context-window forwarding as `num_ctx` for native `/api/chat` requests; fixes tool selection and context truncation regressions on models with catalog entries (qwen3, llama3, gemma3, …) when no explicit `params.num_ctx` was configured. Fixes #76117. (#76181) Thanks @openperf.
|
||||
|
||||
## 2026.5.2
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
};
|
||||
expect(requestBody.think).toBe(false);
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -310,7 +310,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
};
|
||||
expect(requestBody.think).toBe("low");
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -405,7 +405,7 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
|
||||
};
|
||||
expect(requestBody.think).toBe("high");
|
||||
expect(requestBody.options?.think).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -1602,7 +1602,9 @@ describe("createOllamaStreamFn", () => {
|
||||
if (!requestBody.options) {
|
||||
throw new Error("Expected Ollama request options");
|
||||
}
|
||||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||||
// Catalog `contextWindow` flows through as `num_ctx` so the request
|
||||
// does not silently truncate to Ollama's small Modelfile default.
|
||||
expect(requestBody.options?.num_ctx).toBe(131072);
|
||||
expect(requestBody.options.num_predict).toBe(123);
|
||||
},
|
||||
);
|
||||
@@ -1657,6 +1659,89 @@ describe("createOllamaStreamFn", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("omits num_ctx when the model has no params.num_ctx and no catalog window", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const stream = await createOllamaTestStream({
|
||||
baseUrl: "http://ollama-host:11434",
|
||||
// Override the helper default contextWindow back to undefined so the
|
||||
// request body should leave Ollama's Modelfile to decide num_ctx.
|
||||
model: { contextWindow: undefined },
|
||||
});
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
options?: { num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.options?.num_ctx).toBeUndefined();
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to catalog contextWindow as num_ctx when params.num_ctx is unset", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const stream = await createOllamaTestStream({
|
||||
baseUrl: "http://ollama-host:11434",
|
||||
model: { contextWindow: 32768 },
|
||||
});
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
options?: { num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.options?.num_ctx).toBe(32768);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to catalog maxTokens as num_ctx when contextWindow is absent", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
|
||||
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
|
||||
],
|
||||
async (fetchMock) => {
|
||||
const stream = await createOllamaTestStream({
|
||||
baseUrl: "http://ollama-host:11434",
|
||||
// The helper default contextWindow is overridden back to undefined so
|
||||
// the right side of `model.contextWindow ?? model.maxTokens` is the
|
||||
// load-bearing branch.
|
||||
model: { contextWindow: undefined, maxTokens: 65536 },
|
||||
});
|
||||
|
||||
await collectStreamEvents(stream);
|
||||
|
||||
const requestInit = getGuardedFetchCall(fetchMock).init ?? {};
|
||||
if (typeof requestInit.body !== "string") {
|
||||
throw new Error("Expected string request body");
|
||||
}
|
||||
const requestBody = JSON.parse(requestInit.body) as {
|
||||
options?: { num_ctx?: number };
|
||||
};
|
||||
expect(requestBody.options?.num_ctx).toBe(65536);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("maps configured native Ollama params.thinking=max to the stable top-level think value", async () => {
|
||||
await withMockNdjsonFetch(
|
||||
[
|
||||
|
||||
@@ -290,6 +290,34 @@ function resolveOllamaNumCtx(model: ProviderRuntimeModel): number {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves num_ctx for native /api/chat requests:
|
||||
* 1. explicit `params.num_ctx` set on the model wins,
|
||||
* 2. otherwise the catalog `contextWindow` / `maxTokens` is forwarded so
|
||||
* OpenClaw's known model windows survive the trip and `/api/chat` does
|
||||
* not silently truncate to Ollama's small Modelfile default (typically
|
||||
* 2048 tokens) — which is too small for a system prompt plus tool
|
||||
* definitions and produces "model picks wrong tools / says nonsense"
|
||||
* symptoms on agent turns,
|
||||
* 3. when neither is known, return undefined so the Modelfile decides.
|
||||
*
|
||||
* This intentionally differs from `resolveOllamaNumCtx` by not falling back
|
||||
* to `DEFAULT_CONTEXT_TOKENS`: that constant is a sane wrapper-side guess for
|
||||
* the OpenAI-compat path, but on the native path we prefer to leave num_ctx
|
||||
* absent rather than guess a window for an unknown model.
|
||||
*/
|
||||
function resolveOllamaNativeNumCtx(model: ProviderRuntimeModel): number | undefined {
|
||||
const configured = resolveOllamaConfiguredNumCtx(model);
|
||||
if (configured !== undefined) {
|
||||
return configured;
|
||||
}
|
||||
const catalog = model.contextWindow ?? model.maxTokens;
|
||||
if (typeof catalog === "number" && Number.isFinite(catalog) && catalog > 0) {
|
||||
return Math.floor(catalog);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string, unknown> {
|
||||
const options: Record<string, unknown> = {};
|
||||
const params = model.params;
|
||||
@@ -303,7 +331,7 @@ function resolveOllamaModelOptions(model: ProviderRuntimeModel): Record<string,
|
||||
}
|
||||
}
|
||||
}
|
||||
const numCtx = resolveOllamaConfiguredNumCtx(model);
|
||||
const numCtx = resolveOllamaNativeNumCtx(model);
|
||||
if (numCtx !== undefined) {
|
||||
options.num_ctx = numCtx;
|
||||
}
|
||||
|
||||
@@ -2141,6 +2141,7 @@ export async function runEmbeddedAttempt(
|
||||
trigger: params.trigger,
|
||||
runTimeoutMs: params.timeoutMs !== configuredRunTimeoutMs ? params.timeoutMs : undefined,
|
||||
modelRequestTimeoutMs: (params.model as { requestTimeoutMs?: number }).requestTimeoutMs,
|
||||
model: params.model as { baseUrl?: string },
|
||||
});
|
||||
if (idleTimeoutMs > 0) {
|
||||
activeSession.agent.streamFn = streamWithIdleTimeout(
|
||||
|
||||
@@ -85,6 +85,121 @@ describe("resolveLlmIdleTimeoutMs", () => {
|
||||
const cfg = { agents: { defaults: { timeoutSeconds: 300 } } } as OpenClawConfig;
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg, trigger: "cron" })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"http://localhost:11434",
|
||||
"http://127.0.0.1:11434",
|
||||
"http://127.0.0.2:11434",
|
||||
"http://127.255.255.254:11434",
|
||||
"http://0.0.0.0:11434",
|
||||
"http://[::1]:11434",
|
||||
"http://my-rig.local:11434",
|
||||
"http://10.0.0.5:11434",
|
||||
"http://172.16.5.10:11434",
|
||||
"http://172.31.99.1:11434",
|
||||
"http://192.168.1.20:11434",
|
||||
"http://100.64.0.5:11434",
|
||||
"http://100.127.255.254:11434",
|
||||
// RFC 4193 IPv6 unique local (Tailscale IPv6 mesh fd7a:115c:a1e0::/48
|
||||
// falls inside fc00::/7).
|
||||
"http://[fc00::1]:11434",
|
||||
"http://[fd00::1]:11434",
|
||||
"http://[fd7a:115c:a1e0::dead:beef]:11434",
|
||||
"http://[fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]:11434",
|
||||
// RFC 4291 IPv6 link-local.
|
||||
"http://[fe80::1]:11434",
|
||||
"http://[fe9a::1]:11434",
|
||||
"http://[feab:cd::1]:11434",
|
||||
"http://[febf::1]:11434",
|
||||
])("disables the default idle watchdog for local provider baseUrl %s", (baseUrl) => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(0);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"http://172.32.0.1:11434",
|
||||
"http://192.169.1.1:11434",
|
||||
"http://100.63.255.254:11434",
|
||||
"http://100.128.0.1:11434",
|
||||
])("keeps the default idle watchdog for non-private IPv4 baseUrl %s", (baseUrl) => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
// Node's URL parser normalizes every IPv4-mapped loopback form
|
||||
// (`::ffff:127.0.0.1`, `::ffff:7F00:1`, mixed case, …) to the canonical
|
||||
// `::ffff:7f00:1`. Exercise the user-facing input shapes here so the full
|
||||
// parse → lowercase → bracket-strip → exact-match chain is regression-tested
|
||||
// against future URL parser behavior, not just the canonical literal.
|
||||
it.each([
|
||||
"http://[::ffff:127.0.0.1]:11434",
|
||||
"http://[::ffff:7f00:1]:11434",
|
||||
"http://[::FFFF:127.0.0.1]:11434",
|
||||
])("disables the default idle watchdog for IPv4-mapped loopback baseUrl %s", (baseUrl) => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(0);
|
||||
});
|
||||
|
||||
it.each([
|
||||
// Just outside fc00::/7 (fe.. and 00fc::/16 are not unique-local).
|
||||
"http://[fec0::1]:11434",
|
||||
"http://[fbff::1]:11434",
|
||||
// Just outside fe80::/10 (fec0:: was deprecated site-local, fe7f:: not LL).
|
||||
"http://[fe7f::1]:11434",
|
||||
// Public IPv6.
|
||||
"http://[2001:db8::1]:11434",
|
||||
// Abbreviated `fc::1` expands to 00fc:0:0:...:1, first byte is 0x00, not
|
||||
// 0xfc — outside fc00::/7. Strict first-hextet match keeps this remote.
|
||||
"http://[fc::1]:11434",
|
||||
// IPv4-mapped IPv6 outside loopback (private RFC 1918 in mapped form is
|
||||
// intentionally not matched, mirroring the SSRF policy helper).
|
||||
"http://[::ffff:10.0.0.5]:11434",
|
||||
"http://[::ffff:192.168.1.20]:11434",
|
||||
])("keeps the default idle watchdog for non-private IPv6 baseUrl %s", (baseUrl) => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"http://10.0.0.5evil:11434",
|
||||
"http://127.0.0.1foo:11434",
|
||||
"http://192.168.1.20attacker.com:11434",
|
||||
"http://10.0.0.5.evil.com:11434",
|
||||
"http://1.2.3.4.5:11434",
|
||||
])(
|
||||
"keeps the default idle watchdog for numeric-looking hostnames that are not IPv4 literals (%s)",
|
||||
(baseUrl) => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
},
|
||||
);
|
||||
|
||||
it("keeps the default idle watchdog for remote provider baseUrls", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "https://api.openai.com/v1" } })).toBe(
|
||||
DEFAULT_LLM_IDLE_TIMEOUT_MS,
|
||||
);
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "https://ollama.com" } })).toBe(
|
||||
DEFAULT_LLM_IDLE_TIMEOUT_MS,
|
||||
);
|
||||
});
|
||||
|
||||
it("ignores malformed baseUrl and keeps the default idle watchdog", () => {
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "not-a-url" } })).toBe(
|
||||
DEFAULT_LLM_IDLE_TIMEOUT_MS,
|
||||
);
|
||||
expect(resolveLlmIdleTimeoutMs({ model: { baseUrl: "" } })).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("still honors an explicit provider request timeout for local providers", () => {
|
||||
expect(
|
||||
resolveLlmIdleTimeoutMs({
|
||||
model: { baseUrl: "http://127.0.0.1:11434" },
|
||||
modelRequestTimeoutMs: 600_000,
|
||||
}),
|
||||
).toBe(600_000);
|
||||
});
|
||||
|
||||
it("still applies agents.defaults.timeoutSeconds cap for local providers", () => {
|
||||
const cfg = { agents: { defaults: { timeoutSeconds: 30 } } } as OpenClawConfig;
|
||||
expect(resolveLlmIdleTimeoutMs({ cfg, model: { baseUrl: "http://127.0.0.1:11434" } })).toBe(
|
||||
30_000,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("streamWithIdleTimeout", () => {
|
||||
|
||||
@@ -15,6 +15,82 @@ export const DEFAULT_LLM_IDLE_TIMEOUT_MS = DEFAULT_LLM_IDLE_TIMEOUT_SECONDS * 10
|
||||
*/
|
||||
const MAX_SAFE_TIMEOUT_MS = 2_147_000_000;
|
||||
|
||||
/**
|
||||
* Detects loopback / private-network / `.local` base URLs. Local providers
|
||||
* (Ollama, LM Studio, llama.cpp) legitimately stay silent for many minutes
|
||||
* during prompt evaluation and thinking, so the network-silence-as-hang
|
||||
* heuristic that motivates the default idle watchdog does not apply.
|
||||
*
|
||||
* Coverage scope:
|
||||
* - IPv4 loopback (RFC 5735, full 127/8), RFC 1918 private, RFC 6598 shared
|
||||
* CGNAT (100.64/10 — Tailscale/Headscale IPv4 mesh), `0.0.0.0`, `localhost`,
|
||||
* and `*.local` mDNS (RFC 6762).
|
||||
* - IPv6 loopback `::1`, IPv6 unique local `fc00::/7` (RFC 4193 — Tailscale's
|
||||
* IPv6 mesh `fd7a:115c:a1e0::/48` falls in this range), and IPv6 link-local
|
||||
* `fe80::/10` (RFC 4291).
|
||||
* - IPv4-mapped IPv6 covers loopback only (`::ffff:127.0.0.1`,
|
||||
* `::ffff:7f00:1`); private IPv4 in mapped form is intentionally not
|
||||
* matched, mirroring the SSRF-policy helper in
|
||||
* `src/cron/isolated-agent/model-preflight.runtime.ts`.
|
||||
* - DNS-resolved local aliases (e.g. an `/etc/hosts` entry mapping a custom
|
||||
* hostname to a private IP) are not detected: classification keys on
|
||||
* `URL.hostname` so resolution would have to happen here, and adding
|
||||
* sync/async DNS to the watchdog hot path is disproportionate. Affected
|
||||
* users can use the IP directly or set
|
||||
* `models.providers.<id>.timeoutSeconds` explicitly.
|
||||
*/
|
||||
function isLocalProviderBaseUrl(baseUrl: string): boolean {
|
||||
let host: string;
|
||||
try {
|
||||
host = new URL(baseUrl).hostname.toLowerCase();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
if (host.startsWith("[") && host.endsWith("]")) {
|
||||
host = host.slice(1, -1);
|
||||
}
|
||||
if (
|
||||
host === "localhost" ||
|
||||
host === "0.0.0.0" ||
|
||||
host === "::1" ||
|
||||
host === "::ffff:7f00:1" ||
|
||||
host === "::ffff:127.0.0.1" ||
|
||||
host.endsWith(".local")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
// IPv6 unique local (RFC 4193, fc00::/7) and link-local (RFC 4291,
|
||||
// fe80::/10). The full first hextet is required so an abbreviated `fc::1`
|
||||
// (which expands to `00fc:0:0:...` and is therefore not in fc00::/7)
|
||||
// correctly stays on the cloud path. The first regex requires four hex
|
||||
// digits then a colon; a zone identifier such as `fe80::1%eth0` is fine
|
||||
// because the prefix still matches at the start.
|
||||
if (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host)) {
|
||||
return true;
|
||||
}
|
||||
// Require a strict IPv4 literal before parsing; `Number.parseInt` is
|
||||
// permissive and would otherwise let `10.0.0.5evil` parse to [10,0,0,5]
|
||||
// and disable the watchdog for a non-IP hostname.
|
||||
if (!/^\d+\.\d+\.\d+\.\d+$/.test(host)) {
|
||||
return false;
|
||||
}
|
||||
const octets = host.split(".").map((part) => Number.parseInt(part, 10));
|
||||
if (octets.some((p) => !Number.isInteger(p) || p < 0 || p > 255)) {
|
||||
return false;
|
||||
}
|
||||
const [a, b] = octets;
|
||||
// RFC 5735 loopback (127/8 — full range, not just .0.1; container/sandbox
|
||||
// setups commonly bind 127.0.0.2+), RFC 1918 private IPv4, and RFC 6598
|
||||
// shared CGNAT (100.64/10 — used by Tailscale and similar mesh VPNs).
|
||||
return (
|
||||
a === 127 ||
|
||||
a === 10 ||
|
||||
(a === 172 && b !== undefined && b >= 16 && b <= 31) ||
|
||||
(a === 192 && b === 168) ||
|
||||
(a === 100 && b !== undefined && b >= 64 && b <= 127)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the LLM idle timeout from configuration.
|
||||
* @returns Idle timeout in milliseconds, or 0 to disable
|
||||
@@ -24,6 +100,7 @@ export function resolveLlmIdleTimeoutMs(params?: {
|
||||
trigger?: EmbeddedRunTrigger;
|
||||
runTimeoutMs?: number;
|
||||
modelRequestTimeoutMs?: number;
|
||||
model?: { baseUrl?: string };
|
||||
}): number {
|
||||
const clampTimeoutMs = (valueMs: number) => Math.min(Math.floor(valueMs), MAX_SAFE_TIMEOUT_MS);
|
||||
const clampImplicitTimeoutMs = (valueMs: number) =>
|
||||
@@ -72,6 +149,16 @@ export function resolveLlmIdleTimeoutMs(params?: {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// The default watchdog is a network-silence-as-hang guard for cloud providers.
|
||||
// Local providers can legitimately stream nothing for many minutes during
|
||||
// prompt evaluation or thinking, so falling back to the default would abort
|
||||
// valid local runs. Honor it only when the user has not opted out via the
|
||||
// baseUrl pointing at loopback / private-network / `.local`.
|
||||
const baseUrl = params?.model?.baseUrl;
|
||||
if (typeof baseUrl === "string" && baseUrl.length > 0 && isLocalProviderBaseUrl(baseUrl)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return DEFAULT_LLM_IDLE_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user