diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e1036a39fe..0eb0f60ee60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ Docs: https://docs.openclaw.ai - Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010. - Providers/Ollama: scope synthetic local auth and embedding bearer headers to declared Ollama host boundaries so cloud keys are not sent to local/self-hosted embedding endpoints and remote/cloud Ollama endpoints no longer receive the `ollama-local` marker as if it were a real token. Supersedes #69261 and #69857; refs #43945. Thanks @hyspacex, @maxramsay, and @Meli73. - Providers/Ollama: resolve custom-named local Ollama providers such as `ollama-remote` through the Ollama synthetic-auth hook so subagents no longer miss `ollama-local` auth and silently fall back to cloud models. Fixes #43945. Thanks @Meli73 and @maxramsay. +- Providers/Ollama: add provider-scoped model request timeouts, thread them through guarded fetch connect/header/body/abort handling, and document `params.keep_alive` for cold local models so first-turn Ollama loads no longer require global agent timeout changes. Fixes #64541 and #68796; supersedes #65143 and #66511. Thanks @LittleJakub, @Juankcba, @uninhibite-scholar, and @yfge. - Providers/PDF/Ollama: add bounded network timeouts for Ollama model pulls and native Anthropic/Gemini PDF analysis requests so unresponsive provider endpoints no longer hang sessions indefinitely. Fixes #54142; supersedes #54144 and #54145. Thanks @jinduwang1001-max and @arkyu2077. - Memory/doctor: treat Ollama memory embeddings as key-optional so `openclaw doctor` no longer warns about a missing API key when the gateway reports embeddings are ready. Fixes #46584. Thanks @fengly78. - Agents/Ollama: apply provider-owned replay turn normalization to native Ollama chat so Cloud models no longer reject non-alternating replay history in agent/Gateway runs. Fixes #71697. Thanks @ismael-81. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 15ec791ff4e..45b535a755c 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -0c3eaaee031f0adec2fcfc8a3a6a0d80dfc19d4d1c10b0ff4249b30e04b3c47d config-baseline.json -420269ce22f17382cb253c80a232329e943296be101cda313506341ae39cc674 config-baseline.core.json +6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408 config-baseline.json +15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413 config-baseline.core.json 07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json 74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json diff --git a/docs/concepts/agent-loop.md b/docs/concepts/agent-loop.md index 6f9c5f57afc..9916974f26a 100644 --- a/docs/concepts/agent-loop.md +++ b/docs/concepts/agent-loop.md @@ -163,6 +163,7 @@ surfaces, while Codex native hooks remain a separate lower-level Codex mechanism - `agent.wait` default: 30s (just the wait). `timeoutMs` param overrides. - Agent runtime: `agents.defaults.timeoutSeconds` default 172800s (48 hours); enforced in `runEmbeddedPiAgent` abort timer. - LLM idle timeout: `agents.defaults.llm.idleTimeoutSeconds` aborts a model request when no response chunks arrive before the idle window. Set it explicitly for slow local models or reasoning/tool-call providers; set it to 0 to disable. If it is not set, OpenClaw uses `agents.defaults.timeoutSeconds` when configured, otherwise 120s. Cron-triggered runs with no explicit LLM or agent timeout disable the idle watchdog and rely on the cron outer timeout. +- Provider HTTP request timeout: `models.providers..timeoutSeconds` applies only to that provider's model HTTP fetches, including connect, headers, body, and total guarded-fetch abort handling. Use this for slow local/self-hosted providers such as Ollama before raising the whole agent runtime timeout. ## Where things can end early diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 1a3b2e55b07..8fd0c269a66 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -429,6 +429,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc). - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution). - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`). + - `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling. - `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`). - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required. - `models.providers.*.baseUrl`: upstream API base URL. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index cb88cfc2ffd..86462d99a60 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -296,6 +296,16 @@ OpenClaw rejects image-description requests for models that are not marked image apiKey: "ollama-local", baseUrl: "http://ollama-host:11434", // No /v1 - use native Ollama API URL api: "ollama", // Set explicitly to guarantee native tool-calling behavior + timeoutSeconds: 300, // Optional: give cold local models longer to connect and stream + models: [ + { + id: "qwen3:32b", + name: "qwen3:32b", + params: { + keep_alive: "15m", // Optional: keep the model loaded between turns + }, + }, + ], }, }, }, @@ -330,6 +340,33 @@ Custom Ollama provider ids are also supported. When a model ref uses the active provider prefix, such as `ollama-spark/qwen3:32b`, OpenClaw strips only that prefix before calling Ollama so the server receives `qwen3:32b`. +For slow local models, prefer provider-scoped request tuning before raising the +whole agent runtime timeout: + +```json5 +{ + models: { + providers: { + ollama: { + timeoutSeconds: 300, + models: [ + { + id: "gemma4:26b", + name: "gemma4:26b", + params: { keep_alive: "15m" }, + }, + ], + }, + }, + }, +} +``` + +`timeoutSeconds` applies to the model HTTP request, including connection setup, +headers, body streaming, and the total guarded-fetch abort. `params.keep_alive` +is forwarded to Ollama as top-level `keep_alive` on native `/api/chat` requests; +set it per model when first-turn load time is the bottleneck. + ## Ollama Web Search OpenClaw supports **Ollama Web Search** as a bundled `web_search` provider. @@ -535,6 +572,32 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s ``` + + + Large local models can need a long first load before streaming begins. Keep the timeout scoped to the Ollama provider, and optionally ask Ollama to keep the model loaded between turns: + + ```json5 + { + models: { + providers: { + ollama: { + timeoutSeconds: 300, + models: [ + { + id: "gemma4:26b", + name: "gemma4:26b", + params: { keep_alive: "15m" }, + }, + ], + }, + }, + }, + } + ``` + + If the host itself is slow to accept connections, `timeoutSeconds` also extends the guarded Undici connect timeout for this provider. + + diff --git a/extensions/ollama/ollama.live.test.ts b/extensions/ollama/ollama.live.test.ts index b4fb48a1b1e..fa955b85431 100644 --- a/extensions/ollama/ollama.live.test.ts +++ b/extensions/ollama/ollama.live.test.ts @@ -27,6 +27,7 @@ describe.skipIf(!LIVE)("ollama live", () => { | { model?: string; think?: boolean; + keep_alive?: string; options?: { num_ctx?: number; top_p?: number }; tools?: Array<{ function?: { @@ -44,7 +45,8 @@ describe.skipIf(!LIVE)("ollama live", () => { api: "ollama", provider: PROVIDER_ID, contextWindow: 8192, - params: { num_ctx: 4096, top_p: 0.9, thinking: false }, + params: { num_ctx: 4096, top_p: 0.9, thinking: false, keep_alive: "5m" }, + requestTimeoutMs: 120_000, } as never, { messages: [{ role: "user", content: "Reply exactly OK." }], @@ -85,6 +87,7 @@ describe.skipIf(!LIVE)("ollama live", () => { expect(payload?.options?.num_ctx).toBe(4096); expect(payload?.options?.top_p).toBe(0.9); expect(payload?.think).toBe(false); + expect(payload?.keep_alive).toBe("5m"); const properties = payload?.tools?.[0]?.function?.parameters?.properties; expect(properties?.city?.type).toBe("string"); expect(properties?.units?.type).toBe("string"); diff --git a/extensions/ollama/src/stream-runtime.test.ts b/extensions/ollama/src/stream-runtime.test.ts index 4fc712f26bd..1845a3b0047 100644 --- a/extensions/ollama/src/stream-runtime.test.ts +++ b/extensions/ollama/src/stream-runtime.test.ts @@ -23,6 +23,7 @@ type GuardedFetchCall = { url: string; init?: RequestInit; policy?: unknown; + timeoutMs?: number; auditContext?: string; }; @@ -264,6 +265,25 @@ describe("createConfiguredOllamaCompatStreamWrapper", () => { ); }); + it("passes resolved provider request timeouts to native Ollama chat fetches", async () => { + await withMockNdjsonFetch( + [ + '{"model":"m","created_at":"t","message":{"role":"assistant","content":"ok"},"done":false}', + '{"model":"m","created_at":"t","message":{"role":"assistant","content":""},"done":true,"prompt_eval_count":1,"eval_count":1}', + ], + async (fetchMock) => { + const stream = await createOllamaTestStream({ + baseUrl: "http://ollama-host:11434", + model: { requestTimeoutMs: 450_000 }, + }); + + await collectStreamEvents(stream); + + expect(getGuardedFetchCall(fetchMock).timeoutMs).toBe(450_000); + }, + ); + }); + it("maps native Ollama max thinking to think=high on the wire", async () => { await withMockNdjsonFetch( [ diff --git a/extensions/ollama/src/stream.ts b/extensions/ollama/src/stream.ts index aeac03084c5..d9461be70d9 100644 --- a/extensions/ollama/src/stream.ts +++ b/extensions/ollama/src/stream.ts @@ -817,6 +817,15 @@ function resolveOllamaModelHeaders(model: { return model.headers as Record; } +function resolveOllamaRequestTimeoutMs( + model: object, + options: { requestTimeoutMs?: unknown } | undefined, +): number | undefined { + const raw = + options?.requestTimeoutMs ?? (model as { requestTimeoutMs?: unknown }).requestTimeoutMs; + return typeof raw === "number" && Number.isFinite(raw) && raw > 0 ? Math.floor(raw) : undefined; +} + export function createOllamaStreamFn( baseUrl: string, defaultHeaders?: Record, @@ -874,6 +883,10 @@ export function createOllamaStreamFn( signal: options?.signal, }, policy: ssrfPolicy, + timeoutMs: resolveOllamaRequestTimeoutMs( + model, + options as { requestTimeoutMs?: unknown } | undefined, + ), auditContext: "ollama-stream.chat", }); diff --git a/src/agents/pi-embedded-runner/model.inline-provider.ts b/src/agents/pi-embedded-runner/model.inline-provider.ts index 27cd295c7fb..511fd35dd7e 100644 --- a/src/agents/pi-embedded-runner/model.inline-provider.ts +++ b/src/agents/pi-embedded-runner/model.inline-provider.ts @@ -22,6 +22,7 @@ export type InlineProviderConfig = { models?: ModelDefinitionConfig[]; headers?: unknown; authHeader?: boolean; + timeoutSeconds?: ModelProviderConfig["timeoutSeconds"]; request?: ModelProviderConfig["request"]; }; diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index a3b2839a44b..9f4436f4c74 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -414,6 +414,35 @@ describe("resolveModel", () => { }); }); + it("resolves provider request timeout metadata for configured provider models", () => { + mockDiscoveredModel(discoverModels, { + provider: "ollama", + modelId: "qwen3:32b", + templateModel: { + ...makeModel("qwen3:32b"), + provider: "ollama", + }, + }); + const cfg = { + models: { + providers: { + ollama: { + baseUrl: "http://localhost:11434", + timeoutSeconds: 300, + models: [makeModel("qwen3:32b")], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("ollama", "qwen3:32b", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect((result.model as { requestTimeoutMs?: number } | undefined)?.requestTimeoutMs).toBe( + 300_000, + ); + }); + it("applies agent default model params without explicit provider config", () => { mockDiscoveredModel(discoverModels, { provider: "ollama", diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 2586218bce4..bf290fe95bc 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -260,6 +260,17 @@ function resolveProviderTransport(params: { }; } +function resolveProviderRequestTimeoutMs(timeoutSeconds: unknown): number | undefined { + if ( + typeof timeoutSeconds !== "number" || + !Number.isFinite(timeoutSeconds) || + timeoutSeconds <= 0 + ) { + return undefined; + } + return Math.floor(timeoutSeconds) * 1000; +} + function matchesProviderScopedModelId(params: { candidateId?: string; provider: string; @@ -430,6 +441,7 @@ function applyConfiguredProviderOverrides(params: { preferDiscoveredModelMetadata?: boolean; }): ProviderRuntimeModel { const { discoveredModel, providerConfig, modelId } = params; + const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds); const defaultModelParams = findConfiguredAgentModelParams({ cfg: params.cfg, provider: params.provider, @@ -471,6 +483,7 @@ function applyConfiguredProviderOverrides(params: { !configuredModel && !providerConfig.baseUrl && !providerConfig.api && + requestTimeoutMs === undefined && !providerHeaders && !providerRequest ) { @@ -481,6 +494,7 @@ function applyConfiguredProviderOverrides(params: { return { ...discoveredModel, ...(resolvedParams ? { params: resolvedParams } : {}), + ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: discoveredHeaders, }; } @@ -531,6 +545,7 @@ function applyConfiguredProviderOverrides(params: { contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens, maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens, ...(resolvedParams ? { params: resolvedParams } : {}), + ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: requestConfig.headers, compat: metadataOverrideModel?.compat ?? discoveredModel.compat, }, @@ -547,6 +562,7 @@ function resolveExplicitModelWithRegistry(params: { }): { kind: "resolved"; model: Model } | { kind: "suppressed" } | undefined { const { provider, modelId, modelRegistry, cfg, agentDir, runtimeHooks } = params; const providerConfig = resolveConfiguredProviderConfig(cfg, provider); + const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds); if ( shouldSuppressBuiltInModel({ provider, @@ -578,6 +594,7 @@ function resolveExplicitModelWithRegistry(params: { model: { ...inlineMatch, ...(resolvedParams ? { params: resolvedParams } : {}), + ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), } as Model, runtimeHooks, }), @@ -627,6 +644,7 @@ function resolveExplicitModelWithRegistry(params: { model: { ...fallbackInlineMatch, ...(resolvedParams ? { params: resolvedParams } : {}), + ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), } as Model, runtimeHooks, }), @@ -699,6 +717,7 @@ function resolveConfiguredFallbackModel(params: { }): Model | undefined { const { provider, modelId, cfg, agentDir, runtimeHooks } = params; const providerConfig = resolveConfiguredProviderConfig(cfg, provider); + const requestTimeoutMs = resolveProviderRequestTimeoutMs(providerConfig?.timeoutSeconds); const configuredModel = findConfiguredProviderModel(providerConfig, provider, modelId); const providerHeaders = sanitizeModelHeaders(providerConfig?.headers, { stripSecretRefMarkers: true, @@ -763,6 +782,7 @@ function resolveConfiguredFallbackModel(params: { providerConfig?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, ...(resolvedParams ? { params: resolvedParams } : {}), + ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: requestConfig.headers, } as Model, providerRequest, diff --git a/src/agents/provider-transport-fetch.test.ts b/src/agents/provider-transport-fetch.test.ts index 7ed262e0647..c60e6529887 100644 --- a/src/agents/provider-transport-fetch.test.ts +++ b/src/agents/provider-transport-fetch.test.ts @@ -94,6 +94,26 @@ describe("buildGuardedModelFetch", () => { ); }); + it("threads resolved provider timeout metadata into the shared guarded fetch seam", async () => { + const { buildGuardedModelFetch } = await import("./provider-transport-fetch.js"); + const model = { + id: "qwen3:32b", + provider: "ollama", + api: "ollama", + baseUrl: "http://127.0.0.1:11434", + requestTimeoutMs: 300_000, + } as unknown as Model<"ollama">; + + const fetcher = buildGuardedModelFetch(model); + await fetcher("http://127.0.0.1:11434/api/chat", { method: "POST" }); + + expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith( + expect.objectContaining({ + timeoutMs: 300_000, + }), + ); + }); + it("does not force explicit debug proxy overrides onto plain HTTP model transports", async () => { process.env.OPENCLAW_DEBUG_PROXY_ENABLED = "1"; process.env.OPENCLAW_DEBUG_PROXY_URL = "http://127.0.0.1:7799"; diff --git a/src/agents/provider-transport-fetch.ts b/src/agents/provider-transport-fetch.ts index dadc4308242..434c6411b6e 100644 --- a/src/agents/provider-transport-fetch.ts +++ b/src/agents/provider-transport-fetch.ts @@ -154,9 +154,23 @@ function resolveModelRequestPolicy(model: Model) { }); } +function resolveModelRequestTimeoutMs( + model: Model, + timeoutMs: number | undefined, +): number | undefined { + if (timeoutMs !== undefined) { + return timeoutMs; + } + const modelTimeoutMs = (model as { requestTimeoutMs?: unknown }).requestTimeoutMs; + return typeof modelTimeoutMs === "number" && Number.isFinite(modelTimeoutMs) && modelTimeoutMs > 0 + ? Math.floor(modelTimeoutMs) + : undefined; +} + export function buildGuardedModelFetch(model: Model, timeoutMs?: number): typeof fetch { const requestConfig = resolveModelRequestPolicy(model); const dispatcherPolicy = buildProviderRequestDispatcherPolicy(requestConfig); + const requestTimeoutMs = resolveModelRequestTimeoutMs(model, timeoutMs); return async (input, init) => { const request = input instanceof Request ? new Request(input, init) : undefined; const url = @@ -189,7 +203,7 @@ export function buildGuardedModelFetch(model: Model, timeoutMs?: number): t }, }, dispatcherPolicy, - timeoutMs, + timeoutMs: requestTimeoutMs, // Provider transport intentionally keeps the secure default and never // replays unsafe request bodies across cross-origin redirects. allowCrossOriginUnsafeRedirectReplay: false, diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 4addd3d5ba8..227955b3ed2 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -1554,6 +1554,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", }, + timeoutSeconds: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + title: "Model Provider Request Timeout", + description: + "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", + }, injectNumCtxForOpenAICompat: { type: "boolean", title: "Model Provider Inject num_ctx (OpenAI Compat)", @@ -26477,6 +26485,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", tags: ["models"], }, + "models.providers.*.timeoutSeconds": { + label: "Model Provider Request Timeout", + help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", + tags: ["performance", "models"], + }, "models.providers.*.injectNumCtxForOpenAICompat": { label: "Model Provider Inject num_ctx (OpenAI Compat)", help: "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index c4e873858ba..c02d9c65830 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -826,6 +826,8 @@ export const FIELD_HELP: Record = { 'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.', "models.providers.*.api": "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", + "models.providers.*.timeoutSeconds": + "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", "models.providers.*.injectNumCtxForOpenAICompat": "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", "models.providers.*.headers": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 8806ed461c6..d794df3f008 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -515,6 +515,7 @@ export const FIELD_LABELS: Record = { "models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret "models.providers.*.auth": "Model Provider Auth Mode", "models.providers.*.api": "Model Provider API Adapter", + "models.providers.*.timeoutSeconds": "Model Provider Request Timeout", "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)", "models.providers.*.headers": "Model Provider Headers", "models.providers.*.authHeader": "Model Provider Authorization Header", diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 07ee3da6662..fed8fcc258c 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -119,6 +119,7 @@ export type ModelProviderConfig = { apiKey?: SecretInput; auth?: ModelProviderAuthMode; api?: ModelApi; + timeoutSeconds?: number; injectNumCtxForOpenAICompat?: boolean; headers?: Record; authHeader?: boolean; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index d8b49977df9..c2a018c86f8 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -357,6 +357,7 @@ export const ModelProviderSchema = z .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")]) .optional(), api: ModelApiSchema.optional(), + timeoutSeconds: z.number().int().positive().optional(), injectNumCtxForOpenAICompat: z.boolean().optional(), headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(), authHeader: z.boolean().optional(), diff --git a/src/infra/net/ssrf.dispatcher.test.ts b/src/infra/net/ssrf.dispatcher.test.ts index 5a2b4825493..586fed39105 100644 --- a/src/infra/net/ssrf.dispatcher.test.ts +++ b/src/infra/net/ssrf.dispatcher.test.ts @@ -126,6 +126,7 @@ describe("createPinnedDispatcher", () => { expect(agentCtor).toHaveBeenCalledWith({ connect: { lookup, + timeout: 123_456, }, allowH2: false, bodyTimeout: 123_456, @@ -265,6 +266,9 @@ describe("createPinnedDispatcher", () => { autoSelectFamily: false, lookup, }, + connect: { + timeout: 654_321, + }, allowH2: false, bodyTimeout: 654_321, headersTimeout: 654_321, diff --git a/src/infra/net/undici-runtime.ts b/src/infra/net/undici-runtime.ts index c6e7c23b0b6..d08d05a09e7 100644 --- a/src/infra/net/undici-runtime.ts +++ b/src/infra/net/undici-runtime.ts @@ -23,6 +23,10 @@ const HTTP1_ONLY_DISPATCHER_OPTIONS = Object.freeze({ allowH2: false as const, }); +function isObjectRecord(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + function isUndiciRuntimeDeps(value: unknown): value is UndiciRuntimeDeps { return ( typeof value === "object" && @@ -62,8 +66,16 @@ function withHttp1OnlyDispatcherOptions( // Enforce HTTP/1.1-only — must come after options to prevent accidental override Object.assign(base, HTTP1_ONLY_DISPATCHER_OPTIONS); if (timeoutMs !== undefined && Number.isFinite(timeoutMs) && timeoutMs > 0) { - (base as Record).bodyTimeout = timeoutMs; - (base as Record).headersTimeout = timeoutMs; + const normalizedTimeoutMs = Math.floor(timeoutMs); + const baseRecord = base as Record; + baseRecord.bodyTimeout = normalizedTimeoutMs; + baseRecord.headersTimeout = normalizedTimeoutMs; + if (typeof baseRecord.connect !== "function") { + baseRecord.connect = { + ...(isObjectRecord(baseRecord.connect) ? baseRecord.connect : {}), + timeout: normalizedTimeoutMs, + }; + } } return base; } diff --git a/src/plugins/provider-runtime-model.types.ts b/src/plugins/provider-runtime-model.types.ts index 7c07fbc4d04..c961ad34591 100644 --- a/src/plugins/provider-runtime-model.types.ts +++ b/src/plugins/provider-runtime-model.types.ts @@ -7,4 +7,5 @@ import type { Api, Model } from "@mariozechner/pi-ai"; export type ProviderRuntimeModel = Model & { contextTokens?: number; params?: Record; + requestTimeoutMs?: number; };