fix(ollama): scope request timeouts to providers

This commit is contained in:
Peter Steinberger
2026-04-27 04:54:57 +01:00
parent 6b6f8ab1aa
commit 18b76e3995
21 changed files with 227 additions and 6 deletions

View File

@@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai
- Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010.
- Providers/Ollama: scope synthetic local auth and embedding bearer headers to declared Ollama host boundaries so cloud keys are not sent to local/self-hosted embedding endpoints and remote/cloud Ollama endpoints no longer receive the `ollama-local` marker as if it were a real token. Supersedes #69261 and #69857; refs #43945. Thanks @hyspacex, @maxramsay, and @Meli73.
- Providers/Ollama: resolve custom-named local Ollama providers such as `ollama-remote` through the Ollama synthetic-auth hook so subagents no longer miss `ollama-local` auth and silently fall back to cloud models. Fixes #43945. Thanks @Meli73 and @maxramsay.
- Providers/Ollama: add provider-scoped model request timeouts, thread them through guarded fetch connect/header/body/abort handling, and document `params.keep_alive` for cold local models so first-turn Ollama loads no longer require global agent timeout changes. Fixes #64541 and #68796; supersedes #65143 and #66511. Thanks @LittleJakub, @Juankcba, @uninhibite-scholar, and @yfge.
- Providers/PDF/Ollama: add bounded network timeouts for Ollama model pulls and native Anthropic/Gemini PDF analysis requests so unresponsive provider endpoints no longer hang sessions indefinitely. Fixes #54142; supersedes #54144 and #54145. Thanks @jinduwang1001-max and @arkyu2077.
- Memory/doctor: treat Ollama memory embeddings as key-optional so `openclaw doctor` no longer warns about a missing API key when the gateway reports embeddings are ready. Fixes #46584. Thanks @fengly78.
- Agents/Ollama: apply provider-owned replay turn normalization to native Ollama chat so Cloud models no longer reject non-alternating replay history in agent/Gateway runs. Fixes #71697. Thanks @ismael-81.

View File

@@ -1,4 +1,4 @@
0c3eaaee031f0adec2fcfc8a3a6a0d80dfc19d4d1c10b0ff4249b30e04b3c47d config-baseline.json
420269ce22f17382cb253c80a232329e943296be101cda313506341ae39cc674 config-baseline.core.json
6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408 config-baseline.json
15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413 config-baseline.core.json
07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json
74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json

View File

@@ -163,6 +163,7 @@ surfaces, while Codex native hooks remain a separate lower-level Codex mechanism
- `agent.wait` default: 30s (just the wait). `timeoutMs` param overrides.
- Agent runtime: `agents.defaults.timeoutSeconds` default 172800s (48 hours); enforced in `runEmbeddedPiAgent` abort timer.
- LLM idle timeout: `agents.defaults.llm.idleTimeoutSeconds` aborts a model request when no response chunks arrive before the idle window. Set it explicitly for slow local models or reasoning/tool-call providers; set it to 0 to disable. If it is not set, OpenClaw uses `agents.defaults.timeoutSeconds` when configured, otherwise 120s. Cron-triggered runs with no explicit LLM or agent timeout disable the idle watchdog and rely on the cron outer timeout.
- Provider HTTP request timeout: `models.providers.<id>.timeoutSeconds` applies only to that provider's model HTTP fetches, including connect, headers, body, and total guarded-fetch abort handling. Use this for slow local/self-hosted providers such as Ollama before raising the whole agent runtime timeout.
## Where things can end early

View File

@@ -429,6 +429,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
- `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
- `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
- `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
- `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling.
- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
- `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
- `models.providers.*.baseUrl`: upstream API base URL.

View File

@@ -296,6 +296,16 @@ OpenClaw rejects image-description requests for models that are not marked image
apiKey: "ollama-local",
baseUrl: "http://ollama-host:11434", // No /v1 - use native Ollama API URL
api: "ollama", // Set explicitly to guarantee native tool-calling behavior
timeoutSeconds: 300, // Optional: give cold local models longer to connect and stream
models: [
{
id: "qwen3:32b",
name: "qwen3:32b",
params: {
keep_alive: "15m", // Optional: keep the model loaded between turns
},
},
],
},
},
},
@@ -330,6 +340,33 @@ Custom Ollama provider ids are also supported. When a model ref uses the active
provider prefix, such as `ollama-spark/qwen3:32b`, OpenClaw strips only that
prefix before calling Ollama so the server receives `qwen3:32b`.
For slow local models, prefer provider-scoped request tuning before raising the
whole agent runtime timeout:
```json5
{
models: {
providers: {
ollama: {
timeoutSeconds: 300,
models: [
{
id: "gemma4:26b",
name: "gemma4:26b",
params: { keep_alive: "15m" },
},
],
},
},
},
}
```
`timeoutSeconds` applies to the model HTTP request, including connection setup,
headers, body streaming, and the total guarded-fetch abort. `params.keep_alive`
is forwarded to Ollama as top-level `keep_alive` on native `/api/chat` requests;
set it per model when first-turn load time is the bottleneck.
## Ollama Web Search
OpenClaw supports **Ollama Web Search** as a bundled `web_search` provider.
@@ -535,6 +572,32 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
```
</Accordion>
<Accordion title="Cold local model times out">
Large local models can need a long first load before streaming begins. Keep the timeout scoped to the Ollama provider, and optionally ask Ollama to keep the model loaded between turns:
```json5
{
models: {
providers: {
ollama: {
timeoutSeconds: 300,
models: [
{
id: "gemma4:26b",
name: "gemma4:26b",
params: { keep_alive: "15m" },
},
],
},
},
},
}
```
If the host itself is slow to accept connections, `timeoutSeconds` also extends the guarded Undici connect timeout for this provider.
</Accordion>
</AccordionGroup>
<Note>

View File

@@ -27,6 +27,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
| {
model?: string;
think?: boolean;
keep_alive?: string;
options?: { num_ctx?: number; top_p?: number };
tools?: Array<{
function?: {
@@ -44,7 +45,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
api: "ollama",
provider: PROVIDER_ID,
contextWindow: 8192,
params: { num_ctx: 4096, top_p: 0.9, thinking: false },
params: { num_ctx: 4096, top_p: 0.9, thinking: false, keep_alive: "5m" },
requestTimeoutMs: 120_000,
} as never,
{
messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -85,6 +87,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
expect(payload?.options?.num_ctx).toBe(4096);
expect(payload?.options?.top_p).toBe(0.9);
expect(payload?.think).toBe(false);
expect(payload?.keep_alive).toBe("5m");
const properties = payload?.tools?.[0]?.function?.parameters?.properties;
expect(properties?.city?.type).toBe("string");
expect(properties?.units?.type).toBe("string");

View File

@@ -23,6 +23,7 @@ type GuardedFetchCall = {
url: string;
init?: RequestInit;
policy?: unknown;
timeoutMs?: number;
auditContext?: string;
};
@@ -264,6 +265,25 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
);
});
it("passes resolved provider request timeouts to native Ollama chat fetches", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
'{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
],
async (fetchMock) => {
const stream = await createOllamaTestStream({
baseUrl: "http://ollama-host:11434",
model: { requestTimeoutMs: 450_000 },
});
await collectStreamEvents(stream);
expect(getGuardedFetchCall(fetchMock).timeoutMs).toBe(450_000);
},
);
});
it("maps native Ollama max thinking to think=high on the wire", async () => {
await withMockNdjsonFetch(
[

View File

@@ -817,6 +817,15 @@ function resolveOllamaModelHeaders(model: {
return model.headers as Record<string, string>;
}
function resolveOllamaRequestTimeoutMs(
model: object,
options: { requestTimeoutMs?: unknown } | undefined,
): number | undefined {
const raw =
options?.requestTimeoutMs ?? (model as { requestTimeoutMs?: unknown }).requestTimeoutMs;
return typeof raw === "number" && Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : undefined;
}
export function createOllamaStreamFn(
baseUrl: string,
defaultHeaders?: Record<string, string>,
@@ -874,6 +883,10 @@ export function createOllamaStreamFn(
signal: options?.signal,
},
policy: ssrfPolicy,
timeoutMs: resolveOllamaRequestTimeoutMs(
model,
options as { requestTimeoutMs?: unknown } | undefined,
),
auditContext: "ollama-stream.chat",
});

View File

@@ -22,6 +22,7 @@ export type InlineProviderConfig = {
models?: ModelDefinitionConfig[];
headers?: unknown;
authHeader?: boolean;
timeoutSeconds?: ModelProviderConfig["timeoutSeconds"];
request?: ModelProviderConfig["request"];
};

View File

@@ -414,6 +414,35 @@ describe("resolveModel", () => {
});
});
it("resolves provider request timeout metadata for configured provider models", () => {
mockDiscoveredModel(discoverModels, {
provider: "ollama",
modelId: "qwen3:32b",
templateModel: {
...makeModel("qwen3:32b"),
provider: "ollama",
},
});
const cfg = {
models: {
providers: {
ollama: {
baseUrl: "http://localhost:11434",
timeoutSeconds: 300,
models: [makeModel("qwen3:32b")],
},
},
},
} as unknown as OpenClawConfig;
const result = resolveModelForTest("ollama", "qwen3:32b", "/tmp/agent", cfg);
expect(result.error).toBeUndefined();
expect((result.model as { requestTimeoutMs?: number } | undefined)?.requestTimeoutMs).toBe(
300_000,
);
});
it("applies agent default model params without explicit provider config", () => {
mockDiscoveredModel(discoverModels, {
provider: "ollama",

View File

@@ -260,6 +260,17 @@ function resolveProviderTransport(params: {
};
}
function resolveProviderRequestTimeoutMs(timeoutSeconds: unknown): number | undefined {
if (
typeof timeoutSeconds !== "number" ||
!Number.isFinite(timeoutSeconds) ||
timeoutSeconds <= 0
) {
return undefined;
}
return Math.floor(timeoutSeconds) * 1000;
}
function matchesProviderScopedModelId(params: {
candidateId?: string;
provider: string;
@@ -430,6 +441,7 @@ function applyConfiguredProviderOverrides(params: {
preferDiscoveredModelMetadata?: boolean;
}): ProviderRuntimeModel {
const { discoveredModel, providerConfig, modelId } = params;
const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
const defaultModelParams = findConfiguredAgentModelParams({
cfg: params.cfg,
provider: params.provider,
@@ -471,6 +483,7 @@ function applyConfiguredProviderOverrides(params: {
!configuredModel &&
!providerConfig.baseUrl &&
!providerConfig.api &&
requestTimeoutMs === undefined &&
!providerHeaders &&
!providerRequest
) {
@@ -481,6 +494,7 @@ function applyConfiguredProviderOverrides(params: {
return {
...discoveredModel,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
headers: discoveredHeaders,
};
}
@@ -531,6 +545,7 @@ function applyConfiguredProviderOverrides(params: {
contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens,
maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
headers: requestConfig.headers,
compat: metadataOverrideModel?.compat ?? discoveredModel.compat,
},
@@ -547,6 +562,7 @@ function resolveExplicitModelWithRegistry(params: {
}): { kind: "resolved"; model: Model<Api> } | { kind: "suppressed" } | undefined {
const { provider, modelId, modelRegistry, cfg, agentDir, runtimeHooks } = params;
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
if (
shouldSuppressBuiltInModel({
provider,
@@ -578,6 +594,7 @@ function resolveExplicitModelWithRegistry(params: {
model: {
...inlineMatch,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
} as Model<Api>,
runtimeHooks,
}),
@@ -627,6 +644,7 @@ function resolveExplicitModelWithRegistry(params: {
model: {
...fallbackInlineMatch,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
} as Model<Api>,
runtimeHooks,
}),
@@ -699,6 +717,7 @@ function resolveConfiguredFallbackModel(params: {
}): Model<Api> | undefined {
const { provider, modelId, cfg, agentDir, runtimeHooks } = params;
const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
const configuredModel = findConfiguredProviderModel(providerConfig, provider, modelId);
const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, {
stripSecretRefMarkers: true,
@@ -763,6 +782,7 @@ function resolveConfiguredFallbackModel(params: {
providerConfig?.models?.[0]?.maxTokens ??
DEFAULT_CONTEXT_TOKENS,
...(resolvedParams ? { params: resolvedParams } : {}),
...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
headers: requestConfig.headers,
} as Model<Api>,
providerRequest,

View File

@@ -94,6 +94,26 @@ describe("buildGuardedModelFetch", () => {
);
});
it("threads resolved provider timeout metadata into the shared guarded fetch seam", async () => {
const { buildGuardedModelFetch } = await import("./provider-transport-fetch.js");
const model = {
id: "qwen3:32b",
provider: "ollama",
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
requestTimeoutMs: 300_000,
} as unknown as Model<"ollama">;
const fetcher = buildGuardedModelFetch(model);
await fetcher("http://127.0.0.1:11434/api/chat", { method: "POST" });
expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith(
expect.objectContaining({
timeoutMs: 300_000,
}),
);
});
it("does not force explicit debug proxy overrides onto plain HTTP model transports", async () => {
process.env.OPENCLAW_DEBUG_PROXY_ENABLED = "1";
process.env.OPENCLAW_DEBUG_PROXY_URL = "http://127.0.0.1:7799";

View File

@@ -154,9 +154,23 @@ function resolveModelRequestPolicy(model: Model<Api>) {
});
}
function resolveModelRequestTimeoutMs(
model: Model<Api>,
timeoutMs: number | undefined,
): number | undefined {
if (timeoutMs !== undefined) {
return timeoutMs;
}
const modelTimeoutMs = (model as { requestTimeoutMs?: unknown }).requestTimeoutMs;
return typeof modelTimeoutMs === "number" && Number.isFinite(modelTimeoutMs) && modelTimeoutMs > 0
? Math.floor(modelTimeoutMs)
: undefined;
}
export function buildGuardedModelFetch(model: Model<Api>, timeoutMs?: number): typeof fetch {
const requestConfig = resolveModelRequestPolicy(model);
const dispatcherPolicy = buildProviderRequestDispatcherPolicy(requestConfig);
const requestTimeoutMs = resolveModelRequestTimeoutMs(model, timeoutMs);
return async (input, init) => {
const request = input instanceof Request ? new Request(input, init) : undefined;
const url =
@@ -189,7 +203,7 @@ export function buildGuardedModelFetch(model: Model<Api>, timeoutMs?: number): t
},
},
dispatcherPolicy,
timeoutMs,
timeoutMs: requestTimeoutMs,
// Provider transport intentionally keeps the secure default and never
// replays unsafe request bodies across cross-origin redirects.
allowCrossOriginUnsafeRedirectReplay: false,

View File

@@ -1554,6 +1554,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
},
timeoutSeconds: {
type: "integer",
exclusiveMinimum: 0,
maximum: 9007199254740991,
title: "Model Provider Request Timeout",
description:
"Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
},
injectNumCtxForOpenAICompat: {
type: "boolean",
title: "Model Provider Inject num_ctx (OpenAI Compat)",
@@ -26477,6 +26485,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
tags: ["models"],
},
"models.providers.*.timeoutSeconds": {
label: "Model Provider Request Timeout",
help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
tags: ["performance", "models"],
},
"models.providers.*.injectNumCtxForOpenAICompat": {
label: "Model Provider Inject num_ctx (OpenAI Compat)",
help: "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",

View File

@@ -826,6 +826,8 @@ export const FIELD_HELP: Record<string, string> = {
'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
"models.providers.*.api":
"Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
"models.providers.*.timeoutSeconds":
"Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
"models.providers.*.injectNumCtxForOpenAICompat":
"Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
"models.providers.*.headers":

View File

@@ -515,6 +515,7 @@ export const FIELD_LABELS: Record<string, string> = {
"models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret
"models.providers.*.auth": "Model Provider Auth Mode",
"models.providers.*.api": "Model Provider API Adapter",
"models.providers.*.timeoutSeconds": "Model Provider Request Timeout",
"models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
"models.providers.*.headers": "Model Provider Headers",
"models.providers.*.authHeader": "Model Provider Authorization Header",

View File

@@ -119,6 +119,7 @@ export type ModelProviderConfig = {
apiKey?: SecretInput;
auth?: ModelProviderAuthMode;
api?: ModelApi;
timeoutSeconds?: number;
injectNumCtxForOpenAICompat?: boolean;
headers?: Record<string, SecretInput>;
authHeader?: boolean;

View File

@@ -357,6 +357,7 @@ export const ModelProviderSchema = z
.union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
.optional(),
api: ModelApiSchema.optional(),
timeoutSeconds: z.number().int().positive().optional(),
injectNumCtxForOpenAICompat: z.boolean().optional(),
headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),
authHeader: z.boolean().optional(),

View File

@@ -126,6 +126,7 @@ describe("createPinnedDispatcher", () => {
expect(agentCtor).toHaveBeenCalledWith({
connect: {
lookup,
timeout: 123_456,
},
allowH2: false,
bodyTimeout: 123_456,
@@ -265,6 +266,9 @@ describe("createPinnedDispatcher", () => {
autoSelectFamily: false,
lookup,
},
connect: {
timeout: 654_321,
},
allowH2: false,
bodyTimeout: 654_321,
headersTimeout: 654_321,

View File

@@ -23,6 +23,10 @@ const HTTP1_ONLY_DISPATCHER_OPTIONS = Object.freeze({
allowH2: false as const,
});
function isObjectRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === "object" && !Array.isArray(value));
}
function isUndiciRuntimeDeps(value: unknown): value is UndiciRuntimeDeps {
return (
typeof value === "object" &&
@@ -62,8 +66,16 @@ function withHttp1OnlyDispatcherOptions<T extends object | undefined>(
// Enforce HTTP/1.1-only — must come after options to prevent accidental override
Object.assign(base, HTTP1_ONLY_DISPATCHER_OPTIONS);
if (timeoutMs !== undefined && Number.isFinite(timeoutMs) && timeoutMs > 0) {
(base as Record<string, unknown>).bodyTimeout = timeoutMs;
(base as Record<string, unknown>).headersTimeout = timeoutMs;
const normalizedTimeoutMs = Math.floor(timeoutMs);
const baseRecord = base as Record<string, unknown>;
baseRecord.bodyTimeout = normalizedTimeoutMs;
baseRecord.headersTimeout = normalizedTimeoutMs;
if (typeof baseRecord.connect !== "function") {
baseRecord.connect = {
...(isObjectRecord(baseRecord.connect) ? baseRecord.connect : {}),
timeout: normalizedTimeoutMs,
};
}
}
return base;
}

View File

@@ -7,4 +7,5 @@ import type { Api, Model } from "@mariozechner/pi-ai";
export type ProviderRuntimeModel = Model<Api> & {
contextTokens?: number;
params?: Record<string, unknown>;
requestTimeoutMs?: number;
};