fix(ollama): scope request timeouts to providers

2026-05-06 07:50:43 +00:00 · 2026-04-27 04:54:57 +01:00
parent 6b6f8ab1aa
commit 18b76e3995
21 changed files with 227 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010.
 - Providers/Ollama: scope synthetic local auth and embedding bearer headers to declared Ollama host boundaries so cloud keys are not sent to local/self-hosted embedding endpoints and remote/cloud Ollama endpoints no longer receive the `ollama-local` marker as if it were a real token. Supersedes #69261 and #69857; refs #43945. Thanks @hyspacex, @maxramsay, and @Meli73.
 - Providers/Ollama: resolve custom-named local Ollama providers such as `ollama-remote` through the Ollama synthetic-auth hook so subagents no longer miss `ollama-local` auth and silently fall back to cloud models. Fixes #43945. Thanks @Meli73 and @maxramsay.
+- Providers/Ollama: add provider-scoped model request timeouts, thread them through guarded fetch connect/header/body/abort handling, and document `params.keep_alive` for cold local models so first-turn Ollama loads no longer require global agent timeout changes. Fixes #64541 and #68796; supersedes #65143 and #66511. Thanks @LittleJakub, @Juankcba, @uninhibite-scholar, and @yfge.
 - Providers/PDF/Ollama: add bounded network timeouts for Ollama model pulls and native Anthropic/Gemini PDF analysis requests so unresponsive provider endpoints no longer hang sessions indefinitely. Fixes #54142; supersedes #54144 and #54145. Thanks @jinduwang1001-max and @arkyu2077.
 - Memory/doctor: treat Ollama memory embeddings as key-optional so `openclaw doctor` no longer warns about a missing API key when the gateway reports embeddings are ready. Fixes #46584. Thanks @fengly78.
 - Agents/Ollama: apply provider-owned replay turn normalization to native Ollama chat so Cloud models no longer reject non-alternating replay history in agent/Gateway runs. Fixes #71697. Thanks @ismael-81.
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-0c3eaaee031f0adec2fcfc8a3a6a0d80dfc19d4d1c10b0ff4249b30e04b3c47d  config-baseline.json
-420269ce22f17382cb253c80a232329e943296be101cda313506341ae39cc674  config-baseline.core.json
+6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408  config-baseline.json
+15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413  config-baseline.core.json
 07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901  config-baseline.channel.json
 74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4  config-baseline.plugin.json
--- a/docs/concepts/agent-loop.md
+++ b/docs/concepts/agent-loop.md
@@ -163,6 +163,7 @@ surfaces, while Codex native hooks remain a separate lower-level Codex mechanism
 - `agent.wait` default: 30s (just the wait). `timeoutMs` param overrides.
 - Agent runtime: `agents.defaults.timeoutSeconds` default 172800s (48 hours); enforced in `runEmbeddedPiAgent` abort timer.
 - LLM idle timeout: `agents.defaults.llm.idleTimeoutSeconds` aborts a model request when no response chunks arrive before the idle window. Set it explicitly for slow local models or reasoning/tool-call providers; set it to 0 to disable. If it is not set, OpenClaw uses `agents.defaults.timeoutSeconds` when configured, otherwise 120s. Cron-triggered runs with no explicit LLM or agent timeout disable the idle watchdog and rely on the cron outer timeout.
+- Provider HTTP request timeout: `models.providers.<id>.timeoutSeconds` applies only to that provider's model HTTP fetches, including connect, headers, body, and total guarded-fetch abort handling. Use this for slow local/self-hosted providers such as Ollama before raising the whole agent runtime timeout.

 ## Where things can end early

--- a/docs/gateway/config-tools.md
+++ b/docs/gateway/config-tools.md
@@ -429,6 +429,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
    - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc).
    - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution).
    - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`).
+    - `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling.
    - `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`).
    - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required.
    - `models.providers.*.baseUrl`: upstream API base URL.
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -296,6 +296,16 @@ OpenClaw rejects image-description requests for models that are not marked image
            apiKey: "ollama-local",
            baseUrl: "http://ollama-host:11434", // No /v1 - use native Ollama API URL
            api: "ollama", // Set explicitly to guarantee native tool-calling behavior
+            timeoutSeconds: 300, // Optional: give cold local models longer to connect and stream
+            models: [
+              {
+                id: "qwen3:32b",
+                name: "qwen3:32b",
+                params: {
+                  keep_alive: "15m", // Optional: keep the model loaded between turns
+                },
+              },
+            ],
          },
        },
      },
@@ -330,6 +340,33 @@ Custom Ollama provider ids are also supported. When a model ref uses the active
 provider prefix, such as `ollama-spark/qwen3:32b`, OpenClaw strips only that
 prefix before calling Ollama so the server receives `qwen3:32b`.

+For slow local models, prefer provider-scoped request tuning before raising the
+whole agent runtime timeout:
+
+```json5
+{
+  models: {
+    providers: {
+      ollama: {
+        timeoutSeconds: 300,
+        models: [
+          {
+            id: "gemma4:26b",
+            name: "gemma4:26b",
+            params: { keep_alive: "15m" },
+          },
+        ],
+      },
+    },
+  },
+}
+```
+
+`timeoutSeconds` applies to the model HTTP request, including connection setup,
+headers, body streaming, and the total guarded-fetch abort. `params.keep_alive`
+is forwarded to Ollama as top-level `keep_alive` on native `/api/chat` requests;
+set it per model when first-turn load time is the bottleneck.
+
 ## Ollama Web Search

 OpenClaw supports **Ollama Web Search** as a bundled `web_search` provider.
@@ -535,6 +572,32 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
    ```

  </Accordion>
+
+  <Accordion title="Cold local model times out">
+    Large local models can need a long first load before streaming begins. Keep the timeout scoped to the Ollama provider, and optionally ask Ollama to keep the model loaded between turns:
+
+    ```json5
+    {
+      models: {
+        providers: {
+          ollama: {
+            timeoutSeconds: 300,
+            models: [
+              {
+                id: "gemma4:26b",
+                name: "gemma4:26b",
+                params: { keep_alive: "15m" },
+              },
+            ],
+          },
+        },
+      },
+    }
+    ```
+
+    If the host itself is slow to accept connections, `timeoutSeconds` also extends the guarded Undici connect timeout for this provider.
+
+  </Accordion>
 </AccordionGroup>

 <Note>
--- a/extensions/ollama/ollama.live.test.ts
+++ b/extensions/ollama/ollama.live.test.ts
@@ -27,6 +27,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
      | {
          model?: string;
          think?: boolean;
+          keep_alive?: string;
          options?: { num_ctx?: number; top_p?: number };
          tools?: Array<{
            function?: {
@@ -44,7 +45,8 @@ describe.skipIf(!LIVE)("ollama live", () => {
        api: "ollama",
        provider: PROVIDER_ID,
        contextWindow: 8192,
-        params: { num_ctx: 4096, top_p: 0.9, thinking: false },
+        params: { num_ctx: 4096, top_p: 0.9, thinking: false, keep_alive: "5m" },
+        requestTimeoutMs: 120_000,
      } as never,
      {
        messages: [{ role: "user", content: "Reply exactly OK." }],
@@ -85,6 +87,7 @@ describe.skipIf(!LIVE)("ollama live", () => {
    expect(payload?.options?.num_ctx).toBe(4096);
    expect(payload?.options?.top_p).toBe(0.9);
    expect(payload?.think).toBe(false);
+    expect(payload?.keep_alive).toBe("5m");
    const properties = payload?.tools?.[0]?.function?.parameters?.properties;
    expect(properties?.city?.type).toBe("string");
    expect(properties?.units?.type).toBe("string");
--- a/extensions/ollama/src/stream-runtime.test.ts
+++ b/extensions/ollama/src/stream-runtime.test.ts
@@ -23,6 +23,7 @@ type GuardedFetchCall = {
  url: string;
  init?: RequestInit;
  policy?: unknown;
+  timeoutMs?: number;
  auditContext?: string;
 };

@@ -264,6 +265,25 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => {
    );
  });

+  it("passes resolved provider request timeouts to native Ollama chat fetches", async () => {
+    await withMockNdjsonFetch(
+      [
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}',
+        '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}',
+      ],
+      async (fetchMock) => {
+        const stream = await createOllamaTestStream({
+          baseUrl: "http://ollama-host:11434",
+          model: { requestTimeoutMs: 450_000 },
+        });
+
+        await collectStreamEvents(stream);
+
+        expect(getGuardedFetchCall(fetchMock).timeoutMs).toBe(450_000);
+      },
+    );
+  });
+
  it("maps native Ollama max thinking to think=high on the wire", async () => {
    await withMockNdjsonFetch(
      [
--- a/extensions/ollama/src/stream.ts
+++ b/extensions/ollama/src/stream.ts
@@ -817,6 +817,15 @@ function resolveOllamaModelHeaders(model: {
  return model.headers as Record<string, string>;
 }

+function resolveOllamaRequestTimeoutMs(
+  model: object,
+  options: { requestTimeoutMs?: unknown } | undefined,
+): number | undefined {
+  const raw =
+    options?.requestTimeoutMs ?? (model as { requestTimeoutMs?: unknown }).requestTimeoutMs;
+  return typeof raw === "number" && Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : undefined;
+}
+
 export function createOllamaStreamFn(
  baseUrl: string,
  defaultHeaders?: Record<string, string>,
@@ -874,6 +883,10 @@ export function createOllamaStreamFn(
            signal: options?.signal,
          },
          policy: ssrfPolicy,
+          timeoutMs: resolveOllamaRequestTimeoutMs(
+            model,
+            options as { requestTimeoutMs?: unknown } | undefined,
+          ),
          auditContext: "ollama-stream.chat",
        });

--- a/src/agents/pi-embedded-runner/model.inline-provider.ts
+++ b/src/agents/pi-embedded-runner/model.inline-provider.ts
@@ -22,6 +22,7 @@ export type InlineProviderConfig = {
  models?: ModelDefinitionConfig[];
  headers?: unknown;
  authHeader?: boolean;
+  timeoutSeconds?: ModelProviderConfig["timeoutSeconds"];
  request?: ModelProviderConfig["request"];
 };

--- a/src/agents/pi-embedded-runner/model.test.ts
+++ b/src/agents/pi-embedded-runner/model.test.ts
@@ -414,6 +414,35 @@ describe("resolveModel", () => {
    });
  });

+  it("resolves provider request timeout metadata for configured provider models", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "ollama",
+      modelId: "qwen3:32b",
+      templateModel: {
+        ...makeModel("qwen3:32b"),
+        provider: "ollama",
+      },
+    });
+    const cfg = {
+      models: {
+        providers: {
+          ollama: {
+            baseUrl: "http://localhost:11434",
+            timeoutSeconds: 300,
+            models: [makeModel("qwen3:32b")],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("ollama", "qwen3:32b", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect((result.model as { requestTimeoutMs?: number } | undefined)?.requestTimeoutMs).toBe(
+      300_000,
+    );
+  });
+
  it("applies agent default model params without explicit provider config", () => {
    mockDiscoveredModel(discoverModels, {
      provider: "ollama",
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -260,6 +260,17 @@ function resolveProviderTransport(params: {
  };
 }

+function resolveProviderRequestTimeoutMs(timeoutSeconds: unknown): number | undefined {
+  if (
+    typeof timeoutSeconds !== "number" ||
+    !Number.isFinite(timeoutSeconds) ||
+    timeoutSeconds <= 0
+  ) {
+    return undefined;
+  }
+  return Math.floor(timeoutSeconds) * 1000;
+}
+
 function matchesProviderScopedModelId(params: {
  candidateId?: string;
  provider: string;
@@ -430,6 +441,7 @@ function applyConfiguredProviderOverrides(params: {
  preferDiscoveredModelMetadata?: boolean;
 }): ProviderRuntimeModel {
  const { discoveredModel, providerConfig, modelId } = params;
+  const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
  const defaultModelParams = findConfiguredAgentModelParams({
    cfg: params.cfg,
    provider: params.provider,
@@ -471,6 +483,7 @@ function applyConfiguredProviderOverrides(params: {
    !configuredModel &&
    !providerConfig.baseUrl &&
    !providerConfig.api &&
+    requestTimeoutMs === undefined &&
    !providerHeaders &&
    !providerRequest
  ) {
@@ -481,6 +494,7 @@ function applyConfiguredProviderOverrides(params: {
    return {
      ...discoveredModel,
      ...(resolvedParams ? { params: resolvedParams } : {}),
+      ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
      headers: discoveredHeaders,
    };
  }
@@ -531,6 +545,7 @@ function applyConfiguredProviderOverrides(params: {
      contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens,
      maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens,
      ...(resolvedParams ? { params: resolvedParams } : {}),
+      ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
      headers: requestConfig.headers,
      compat: metadataOverrideModel?.compat ?? discoveredModel.compat,
    },
@@ -547,6 +562,7 @@ function resolveExplicitModelWithRegistry(params: {
 }): { kind: "resolved"; model: Model<Api> } | { kind: "suppressed" } | undefined {
  const { provider, modelId, modelRegistry, cfg, agentDir, runtimeHooks } = params;
  const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
+  const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
  if (
    shouldSuppressBuiltInModel({
      provider,
@@ -578,6 +594,7 @@ function resolveExplicitModelWithRegistry(params: {
        model: {
          ...inlineMatch,
          ...(resolvedParams ? { params: resolvedParams } : {}),
+          ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
        } as Model<Api>,
        runtimeHooks,
      }),
@@ -627,6 +644,7 @@ function resolveExplicitModelWithRegistry(params: {
        model: {
          ...fallbackInlineMatch,
          ...(resolvedParams ? { params: resolvedParams } : {}),
+          ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
        } as Model<Api>,
        runtimeHooks,
      }),
@@ -699,6 +717,7 @@ function resolveConfiguredFallbackModel(params: {
 }): Model<Api> | undefined {
  const { provider, modelId, cfg, agentDir, runtimeHooks } = params;
  const providerConfig = resolveConfiguredProviderConfig(cfg, provider);
+  const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds);
  const configuredModel = findConfiguredProviderModel(providerConfig, provider, modelId);
  const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, {
    stripSecretRefMarkers: true,
@@ -763,6 +782,7 @@ function resolveConfiguredFallbackModel(params: {
          providerConfig?.models?.[0]?.maxTokens ??
          DEFAULT_CONTEXT_TOKENS,
        ...(resolvedParams ? { params: resolvedParams } : {}),
+        ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
        headers: requestConfig.headers,
      } as Model<Api>,
      providerRequest,
--- a/src/agents/provider-transport-fetch.test.ts
+++ b/src/agents/provider-transport-fetch.test.ts
@@ -94,6 +94,26 @@ describe("buildGuardedModelFetch", () => {
    );
  });

+  it("threads resolved provider timeout metadata into the shared guarded fetch seam", async () => {
+    const { buildGuardedModelFetch } = await import("./provider-transport-fetch.js");
+    const model = {
+      id: "qwen3:32b",
+      provider: "ollama",
+      api: "ollama",
+      baseUrl: "http://127.0.0.1:11434",
+      requestTimeoutMs: 300_000,
+    } as unknown as Model<"ollama">;
+
+    const fetcher = buildGuardedModelFetch(model);
+    await fetcher("http://127.0.0.1:11434/api/chat", { method: "POST" });
+
+    expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        timeoutMs: 300_000,
+      }),
+    );
+  });
+
  it("does not force explicit debug proxy overrides onto plain HTTP model transports", async () => {
    process.env.OPENCLAW_DEBUG_PROXY_ENABLED = "1";
    process.env.OPENCLAW_DEBUG_PROXY_URL = "http://127.0.0.1:7799";
--- a/src/agents/provider-transport-fetch.ts
+++ b/src/agents/provider-transport-fetch.ts
@@ -154,9 +154,23 @@ function resolveModelRequestPolicy(model: Model<Api>) {
  });
 }

+function resolveModelRequestTimeoutMs(
+  model: Model<Api>,
+  timeoutMs: number | undefined,
+): number | undefined {
+  if (timeoutMs !== undefined) {
+    return timeoutMs;
+  }
+  const modelTimeoutMs = (model as { requestTimeoutMs?: unknown }).requestTimeoutMs;
+  return typeof modelTimeoutMs === "number" && Number.isFinite(modelTimeoutMs) && modelTimeoutMs > 0
+    ? Math.floor(modelTimeoutMs)
+    : undefined;
+}
+
 export function buildGuardedModelFetch(model: Model<Api>, timeoutMs?: number): typeof fetch {
  const requestConfig = resolveModelRequestPolicy(model);
  const dispatcherPolicy = buildProviderRequestDispatcherPolicy(requestConfig);
+  const requestTimeoutMs = resolveModelRequestTimeoutMs(model, timeoutMs);
  return async (input, init) => {
    const request = input instanceof Request ? new Request(input, init) : undefined;
    const url =
@@ -189,7 +203,7 @@ export function buildGuardedModelFetch(model: Model<Api>, timeoutMs?: number): t
        },
      },
      dispatcherPolicy,
-      timeoutMs,
+      timeoutMs: requestTimeoutMs,
      // Provider transport intentionally keeps the secure default and never
      // replays unsafe request bodies across cross-origin redirects.
      allowCrossOriginUnsafeRedirectReplay: false,
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -1554,6 +1554,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                  description:
                    "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
                },
+                timeoutSeconds: {
+                  type: "integer",
+                  exclusiveMinimum: 0,
+                  maximum: 9007199254740991,
+                  title: "Model Provider Request Timeout",
+                  description:
+                    "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
+                },
                injectNumCtxForOpenAICompat: {
                  type: "boolean",
                  title: "Model Provider Inject num_ctx (OpenAI Compat)",
@@ -26477,6 +26485,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
      help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
      tags: ["models"],
    },
+    "models.providers.*.timeoutSeconds": {
+      label: "Model Provider Request Timeout",
+      help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
+      tags: ["performance", "models"],
+    },
    "models.providers.*.injectNumCtxForOpenAICompat": {
      label: "Model Provider Inject num_ctx (OpenAI Compat)",
      help: "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -826,6 +826,8 @@ export const FIELD_HELP: Record<string, string> = {
    'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.',
  "models.providers.*.api":
    "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.",
+  "models.providers.*.timeoutSeconds":
+    "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.",
  "models.providers.*.injectNumCtxForOpenAICompat":
    "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.",
  "models.providers.*.headers":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -515,6 +515,7 @@ export const FIELD_LABELS: Record<string, string> = {
  "models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret
  "models.providers.*.auth": "Model Provider Auth Mode",
  "models.providers.*.api": "Model Provider API Adapter",
+  "models.providers.*.timeoutSeconds": "Model Provider Request Timeout",
  "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)",
  "models.providers.*.headers": "Model Provider Headers",
  "models.providers.*.authHeader": "Model Provider Authorization Header",
--- a/src/config/types.models.ts
+++ b/src/config/types.models.ts
@@ -119,6 +119,7 @@ export type ModelProviderConfig = {
  apiKey?: SecretInput;
  auth?: ModelProviderAuthMode;
  api?: ModelApi;
+  timeoutSeconds?: number;
  injectNumCtxForOpenAICompat?: boolean;
  headers?: Record<string, SecretInput>;
  authHeader?: boolean;
--- a/src/config/zod-schema.core.ts
+++ b/src/config/zod-schema.core.ts
@@ -357,6 +357,7 @@ export const ModelProviderSchema = z
      .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")])
      .optional(),
    api: ModelApiSchema.optional(),
+    timeoutSeconds: z.number().int().positive().optional(),
    injectNumCtxForOpenAICompat: z.boolean().optional(),
    headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),
    authHeader: z.boolean().optional(),
--- a/src/infra/net/ssrf.dispatcher.test.ts
+++ b/src/infra/net/ssrf.dispatcher.test.ts
@@ -126,6 +126,7 @@ describe("createPinnedDispatcher", () => {
    expect(agentCtor).toHaveBeenCalledWith({
      connect: {
        lookup,
+        timeout: 123_456,
      },
      allowH2: false,
      bodyTimeout: 123_456,
@@ -265,6 +266,9 @@ describe("createPinnedDispatcher", () => {
        autoSelectFamily: false,
        lookup,
      },
+      connect: {
+        timeout: 654_321,
+      },
      allowH2: false,
      bodyTimeout: 654_321,
      headersTimeout: 654_321,
--- a/src/infra/net/undici-runtime.ts
+++ b/src/infra/net/undici-runtime.ts
@@ -23,6 +23,10 @@ const HTTP1_ONLY_DISPATCHER_OPTIONS = Object.freeze({
  allowH2: false as const,
 });

+function isObjectRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+
 function isUndiciRuntimeDeps(value: unknown): value is UndiciRuntimeDeps {
  return (
    typeof value === "object" &&
@@ -62,8 +66,16 @@ function withHttp1OnlyDispatcherOptions<T extends object | undefined>(
  // Enforce HTTP/1.1-only — must come after options to prevent accidental override
  Object.assign(base, HTTP1_ONLY_DISPATCHER_OPTIONS);
  if (timeoutMs !== undefined && Number.isFinite(timeoutMs) && timeoutMs > 0) {
-    (base as Record<string, unknown>).bodyTimeout = timeoutMs;
-    (base as Record<string, unknown>).headersTimeout = timeoutMs;
+    const normalizedTimeoutMs = Math.floor(timeoutMs);
+    const baseRecord = base as Record<string, unknown>;
+    baseRecord.bodyTimeout = normalizedTimeoutMs;
+    baseRecord.headersTimeout = normalizedTimeoutMs;
+    if (typeof baseRecord.connect !== "function") {
+      baseRecord.connect = {
+        ...(isObjectRecord(baseRecord.connect) ? baseRecord.connect : {}),
+        timeout: normalizedTimeoutMs,
+      };
+    }
  }
  return base;
 }
--- a/src/plugins/provider-runtime-model.types.ts
+++ b/src/plugins/provider-runtime-model.types.ts
@@ -7,4 +7,5 @@ import type { Api, Model } from "@mariozechner/pi-ai";
 export type ProviderRuntimeModel = Model<Api> & {
  contextTokens?: number;
  params?: Record<string, unknown>;
+  requestTimeoutMs?: number;
 };