diff --git a/CHANGELOG.md b/CHANGELOG.md index 58d09a7ac79..793f6b37c4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ Docs: https://docs.openclaw.ai - Providers/Ollama: parse stringified native tool-call arguments before dispatch, preserving unsafe integer values so Ollama tool use receives structured parameters. Fixes #69735; supersedes #69910. Thanks @rongshuzhao and @yfge. - Providers/Ollama: skip ambient localhost discovery unless Ollama auth or meaningful config opts in, preventing unexpected probes to `127.0.0.1:11434` for users who are not using Ollama. Fixes #56939; supersedes #57116. Thanks @IanxDev and @tsukhani. - Providers/Ollama: skip implicit localhost discovery when a custom remote `api: "ollama"` provider is configured, while still treating `127/8` loopback hosts as local. Carries forward #43224. Thanks @issacthekaylon. +- Providers/models: honor provider-level `contextWindow`, `contextTokens`, and `maxTokens` as defaults when resolving discovered models, so local Ollama and other self-hosted providers can cap all models without repeating per-model entries. Fixes #44786; carries forward #44955. Thanks @voltwake and @maweibin. - Providers/Ollama: move memory embeddings to Ollama's current `/api/embed` endpoint with batched `input` requests while preserving vector normalization and custom provider auth/header overrides. Fixes #39983. Thanks @sskkcc and @LiudengZhang. - Providers/Ollama: route local web search through Ollama's signed `/api/experimental/web_search` daemon proxy, use hosted `/api/web_search` directly for `ollama.com`, and keep `OLLAMA_API_KEY` scoped to cloud fallback auth. Fixes #69132. Thanks @yoon1012 and @hyspacex. - Providers/Ollama: accept OpenAI SDK-style `baseURL` as an alias for `baseUrl` across discovery, streaming, setup pulls, embeddings, and web search so remote Ollama hosts are not silently ignored. Fixes #62533; supersedes #62549. Thanks @Julien-BKK and @Linux2010. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 45b535a755c..a094ea0918f 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -6fceeca87ecf3245c9f3a184f1ec66c8dee8df6e5a14c6d9d1924557f8d36408 config-baseline.json -15b6223907d0930307e950752e6498edc40f7df597e8e36914490f7611eab413 config-baseline.core.json +c4b54de7557cd14b35a629585ad706a4e7de411cc725bcbce921f22bfaf14ada config-baseline.json +3fd4da36f28b508f8e6ac4fceb18262244d8ed70df15244192032ec71027bb4f config-baseline.core.json 07963db49502132f26db396c56b36e018b110e6c55a68b3cb012d3ec96f43901 config-baseline.channel.json 74b74cb18ac37c0acaa765f398f1f9edbcee4c43567f02d45c89598a1e13afb4 config-baseline.plugin.json diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 01535f1d78e..f2fe80d9f44 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -16,7 +16,7 @@ Reference for **LLM/model providers** (not chat channels like WhatsApp/Telegram) - Model refs use `provider/model` (example: `opencode/claude-opus-4-6`). - `agents.defaults.models` acts as an allowlist when set. - CLI helpers: `openclaw onboard`, `openclaw models list`, `openclaw models set `. - - `models.providers.*.models[].contextWindow` is native model metadata; `contextTokens` is the effective runtime cap. + - `models.providers.*.contextWindow` / `contextTokens` / `maxTokens` set provider-level defaults; `models.providers.*.models[].contextWindow` / `contextTokens` / `maxTokens` override them per model. - Fallback rules, cooldown probes, and session-override persistence: [Model failover](/concepts/model-failover). diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 8fd0c269a66..8504d83ad9b 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -429,6 +429,9 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc). - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution). - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`). + - `models.providers.*.contextWindow`: default native context window for models under this provider when the model entry does not set `contextWindow`. + - `models.providers.*.contextTokens`: default effective runtime context cap for models under this provider when the model entry does not set `contextTokens`. + - `models.providers.*.maxTokens`: default output-token cap for models under this provider when the model entry does not set `maxTokens`. - `models.providers.*.timeoutSeconds`: optional per-provider model HTTP request timeout in seconds, including connect, headers, body, and total request abort handling. - `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`). - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required. @@ -447,8 +450,8 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi - `models.providers.*.models`: explicit provider model catalog entries. - - `models.providers.*.models.*.contextWindow`: native model context window metadata. - - `models.providers.*.models.*.contextTokens`: optional runtime context cap. Use this when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ. + - `models.providers.*.models.*.contextWindow`: native model context window metadata. This overrides provider-level `contextWindow` for that model. + - `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ. - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior. - `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 2f7b3e3bb19..d1dfa0813b1 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -462,7 +462,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, including larger `PARAMETER num_ctx` values from custom Modelfiles. Otherwise it falls back to the default Ollama context window used by OpenClaw. - You can override `contextWindow` and `maxTokens` in explicit provider config. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`. + You can set provider-level `contextWindow`, `contextTokens`, and `maxTokens` defaults for every model under that Ollama provider, then override them per model when needed. To cap Ollama's per-request runtime context without rebuilding a Modelfile, set `params.num_ctx`; OpenClaw sends it as `options.num_ctx` for both native Ollama and the OpenAI-compatible Ollama adapter. Invalid, zero, negative, and non-finite values are ignored and fall back to `contextWindow`. Native Ollama model entries also accept the common Ollama runtime options under `params`, including `temperature`, `top_p`, `top_k`, `min_p`, `num_predict`, `stop`, `repeat_penalty`, `num_batch`, `num_thread`, and `use_mmap`. OpenClaw forwards only Ollama request keys, so OpenClaw runtime params such as `streaming` are not leaked to Ollama. Use `params.think` or `params.thinking` to send top-level Ollama `think`; `false` disables API-level thinking for Qwen-style thinking models. @@ -471,6 +471,7 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s models: { providers: { ollama: { + contextWindow: 32768, models: [ { id: "llama3.3", diff --git a/src/agents/context.test.ts b/src/agents/context.test.ts index 67f89f38ba6..3be43f89893 100644 --- a/src/agents/context.test.ts +++ b/src/agents/context.test.ts @@ -187,6 +187,23 @@ describe("applyConfiguredContextWindows", () => { expect(cache.get("custom/model")).toBe(200_000); }); + + it("uses provider-level context defaults for configured model entries", () => { + const cache = new Map(); + applyConfiguredContextWindows({ + cache, + modelsConfig: { + providers: { + ollama: { + contextWindow: 8_192, + models: [{ id: "qwen3.5:9b" }], + }, + }, + }, + }); + + expect(cache.get("qwen3.5:9b")).toBe(8_192); + }); }); describe("createSessionManagerRuntimeRegistry", () => { @@ -210,6 +227,50 @@ describe("createSessionManagerRuntimeRegistry", () => { }); describe("resolveContextTokensForModel", () => { + it("uses provider-level context defaults when no model-level cap is set", () => { + const result = resolveContextTokensForModel({ + cfg: { + models: { + providers: { + ollama: { + baseUrl: "http://localhost:11434", + contextWindow: 8_192, + models: [], + }, + }, + }, + }, + provider: "ollama", + model: "qwen3.5:9b", + fallbackContextTokens: 216_000, + allowAsyncLoad: false, + }); + + expect(result).toBe(8_192); + }); + + it("prefers model-level context caps over provider-level defaults", () => { + const result = resolveContextTokensForModel({ + cfg: { + models: { + providers: { + ollama: { + baseUrl: "http://localhost:11434", + contextWindow: 8_192, + models: [{ ...testModelContextWindow("qwen3.5:9b", 216_000), contextTokens: 16_000 }], + }, + }, + }, + }, + provider: "ollama", + model: "qwen3.5:9b", + fallbackContextTokens: 216_000, + allowAsyncLoad: false, + }); + + expect(result).toBe(16_000); + }); + it("returns 1M context when anthropic context1m is enabled for opus/sonnet", () => { const result = resolveContextTokensForModel({ cfg: { diff --git a/src/agents/context.ts b/src/agents/context.ts index aabc43650b4..4dd6da58e47 100644 --- a/src/agents/context.ts +++ b/src/agents/context.ts @@ -21,7 +21,11 @@ type ModelRegistryLike = { getAll: () => ModelEntry[]; }; type ConfigModelEntry = { id?: string; contextWindow?: number; contextTokens?: number }; -type ProviderConfigEntry = { models?: ConfigModelEntry[] }; +type ProviderConfigEntry = { + contextWindow?: number; + contextTokens?: number; + models?: ConfigModelEntry[]; +}; type ModelsConfig = { providers?: Record }; type AgentModelEntry = { params?: Record }; @@ -83,7 +87,11 @@ export function applyConfiguredContextWindows(params: { ? model.contextTokens : typeof model?.contextWindow === "number" ? model.contextWindow - : undefined; + : typeof provider?.contextTokens === "number" + ? provider.contextTokens + : typeof provider?.contextWindow === "number" + ? provider.contextWindow + : undefined; if (!modelId || !contextTokens || contextTokens <= 0) { continue; } @@ -340,30 +348,41 @@ function resolveConfiguredProviderContextTokens( // Mirror the lookup order in pi-embedded-runner/model.ts: exact key first, // then normalized fallback. This prevents alias collisions from picking the // wrong configured cap based on Object.entries iteration order. + function readProviderContextTokens(providerConfig: ProviderConfigEntry | undefined) { + return typeof providerConfig?.contextTokens === "number" + ? providerConfig.contextTokens + : typeof providerConfig?.contextWindow === "number" + ? providerConfig.contextWindow + : undefined; + } + function findContextTokens(matchProviderId: (id: string) => boolean): number | undefined { for (const [providerId, providerConfig] of Object.entries(providers!)) { if (!matchProviderId(providerId)) { continue; } - if (!Array.isArray(providerConfig?.models)) { - continue; - } - for (const m of providerConfig.models) { - const contextTokens = - typeof m?.contextTokens === "number" - ? m.contextTokens - : typeof m?.contextWindow === "number" - ? m.contextWindow - : undefined; - if ( - typeof m?.id === "string" && - m.id === model && - typeof contextTokens === "number" && - contextTokens > 0 - ) { - return contextTokens; + if (Array.isArray(providerConfig?.models)) { + for (const m of providerConfig.models) { + const contextTokens = + typeof m?.contextTokens === "number" + ? m.contextTokens + : typeof m?.contextWindow === "number" + ? m.contextWindow + : undefined; + if ( + typeof m?.id === "string" && + m.id === model && + typeof contextTokens === "number" && + contextTokens > 0 + ) { + return contextTokens; + } } } + const providerContextTokens = readProviderContextTokens(providerConfig); + if (typeof providerContextTokens === "number" && providerContextTokens > 0) { + return providerContextTokens; + } } return undefined; } diff --git a/src/agents/pi-embedded-runner/model.inline-provider.ts b/src/agents/pi-embedded-runner/model.inline-provider.ts index 511fd35dd7e..e00e665909d 100644 --- a/src/agents/pi-embedded-runner/model.inline-provider.ts +++ b/src/agents/pi-embedded-runner/model.inline-provider.ts @@ -20,6 +20,9 @@ export type InlineProviderConfig = { baseUrl?: string; api?: ModelDefinitionConfig["api"]; models?: ModelDefinitionConfig[]; + contextWindow?: ModelProviderConfig["contextWindow"]; + contextTokens?: ModelProviderConfig["contextTokens"]; + maxTokens?: ModelProviderConfig["maxTokens"]; headers?: unknown; authHeader?: boolean; timeoutSeconds?: ModelProviderConfig["timeoutSeconds"]; @@ -154,6 +157,9 @@ export function buildInlineProviderModels( return attachModelProviderRequestTransport( { ...model, + contextWindow: model.contextWindow ?? entry?.contextWindow, + contextTokens: model.contextTokens ?? entry?.contextTokens, + maxTokens: model.maxTokens ?? entry?.maxTokens, input: resolveProviderModelInput({ provider: trimmed, modelId: model.id, diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index 9f4436f4c74..22980337139 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -443,6 +443,77 @@ describe("resolveModel", () => { ); }); + it("uses provider-level context defaults over discovered metadata", () => { + mockDiscoveredModel(discoverModels, { + provider: "ollama", + modelId: "qwen3.5:9b", + templateModel: { + ...makeModel("qwen3.5:9b"), + provider: "ollama", + contextWindow: 216_000, + contextTokens: 216_000, + maxTokens: 65_536, + }, + }); + const cfg = { + models: { + providers: { + ollama: { + baseUrl: "http://localhost:11434", + contextWindow: 8_192, + contextTokens: 8_000, + models: [{ id: "qwen3.5:9b", name: "qwen3.5:9b" }], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model?.contextWindow).toBe(8_192); + expect((result.model as { contextTokens?: number } | undefined)?.contextTokens).toBe(8_000); + expect(result.model?.maxTokens).toBe(8_192); + }); + + it("keeps per-model context values above provider-level defaults", () => { + mockDiscoveredModel(discoverModels, { + provider: "ollama", + modelId: "qwen3.5:9b", + templateModel: { + ...makeModel("qwen3.5:9b"), + provider: "ollama", + contextWindow: 216_000, + maxTokens: 65_536, + }, + }); + const cfg = { + models: { + providers: { + ollama: { + baseUrl: "http://localhost:11434", + contextWindow: 8_192, + maxTokens: 4_096, + models: [ + { + id: "qwen3.5:9b", + name: "qwen3.5:9b", + contextWindow: 16_384, + maxTokens: 12_000, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("ollama", "qwen3.5:9b", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model?.contextWindow).toBe(16_384); + expect(result.model?.maxTokens).toBe(12_000); + }); + it("applies agent default model params without explicit provider config", () => { mockDiscoveredModel(discoverModels, { provider: "ollama", diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index bf290fe95bc..9679813a7ff 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -483,6 +483,9 @@ function applyConfiguredProviderOverrides(params: { !configuredModel && !providerConfig.baseUrl && !providerConfig.api && + providerConfig.contextWindow === undefined && + providerConfig.contextTokens === undefined && + providerConfig.maxTokens === undefined && requestTimeoutMs === undefined && !providerHeaders && !providerRequest @@ -518,6 +521,10 @@ function applyConfiguredProviderOverrides(params: { cfg: params.cfg, runtimeHooks: params.runtimeHooks, }); + const resolvedContextWindow = + metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow; + const resolvedMaxTokens = + metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens; const requestConfig = resolveProviderRequestConfig({ provider: params.provider, api: @@ -541,9 +548,15 @@ function applyConfiguredProviderOverrides(params: { reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning, input: normalizedInput, cost: metadataOverrideModel?.cost ?? discoveredModel.cost, - contextWindow: metadataOverrideModel?.contextWindow ?? discoveredModel.contextWindow, - contextTokens: metadataOverrideModel?.contextTokens ?? discoveredModel.contextTokens, - maxTokens: metadataOverrideModel?.maxTokens ?? discoveredModel.maxTokens, + contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow, + contextTokens: + metadataOverrideModel?.contextTokens ?? + providerConfig.contextTokens ?? + discoveredModel.contextTokens, + maxTokens: + typeof resolvedContextWindow === "number" + ? Math.min(resolvedMaxTokens, resolvedContextWindow) + : resolvedMaxTokens, ...(resolvedParams ? { params: resolvedParams } : {}), ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: requestConfig.headers, @@ -774,11 +787,16 @@ function resolveConfiguredFallbackModel(params: { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: configuredModel?.contextWindow ?? + providerConfig?.contextWindow ?? providerConfig?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, - contextTokens: configuredModel?.contextTokens ?? providerConfig?.models?.[0]?.contextTokens, + contextTokens: + configuredModel?.contextTokens ?? + providerConfig?.contextTokens ?? + providerConfig?.models?.[0]?.contextTokens, maxTokens: configuredModel?.maxTokens ?? + providerConfig?.maxTokens ?? providerConfig?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, ...(resolvedParams ? { params: resolvedParams } : {}), diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 227955b3ed2..d7013291a7f 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -1554,6 +1554,28 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", }, + contextWindow: { + type: "number", + exclusiveMinimum: 0, + title: "Model Provider Context Window", + description: + "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.", + }, + contextTokens: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + title: "Model Provider Context Tokens", + description: + "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.", + }, + maxTokens: { + type: "number", + exclusiveMinimum: 0, + title: "Model Provider Max Tokens", + description: + "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.", + }, timeoutSeconds: { type: "integer", exclusiveMinimum: 0, @@ -26485,6 +26507,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", tags: ["models"], }, + "models.providers.*.contextWindow": { + label: "Model Provider Context Window", + help: "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.", + tags: ["models"], + }, + "models.providers.*.contextTokens": { + label: "Model Provider Context Tokens", + help: "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.", + tags: ["security", "auth", "models"], + }, + "models.providers.*.maxTokens": { + label: "Model Provider Max Tokens", + help: "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.", + tags: ["security", "auth", "performance", "models"], + }, "models.providers.*.timeoutSeconds": { label: "Model Provider Request Timeout", help: "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index ef359c16861..03521d6fb0f 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -367,6 +367,9 @@ const TARGET_KEYS = [ "models.providers.*.baseUrl", "models.providers.*.apiKey", "models.providers.*.api", + "models.providers.*.contextWindow", + "models.providers.*.contextTokens", + "models.providers.*.maxTokens", "models.providers.*.headers", "models.providers.*.models", "agents", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index c02d9c65830..a5518dc24ed 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -826,6 +826,12 @@ export const FIELD_HELP: Record = { 'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.', "models.providers.*.api": "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", + "models.providers.*.contextWindow": + "Default native context window applied to models under this provider when a model entry does not set contextWindow. Use model-level contextWindow for per-model overrides.", + "models.providers.*.contextTokens": + "Default effective runtime context cap applied to models under this provider when a model entry does not set contextTokens. Use this when runtime should budget below the native contextWindow.", + "models.providers.*.maxTokens": + "Default maximum output token budget applied to models under this provider when a model entry does not set maxTokens.", "models.providers.*.timeoutSeconds": "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", "models.providers.*.injectNumCtxForOpenAICompat": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index d794df3f008..f641e4da4c4 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -515,6 +515,9 @@ export const FIELD_LABELS: Record = { "models.providers.*.apiKey": "Model Provider API Key", // pragma: allowlist secret "models.providers.*.auth": "Model Provider Auth Mode", "models.providers.*.api": "Model Provider API Adapter", + "models.providers.*.contextWindow": "Model Provider Context Window", + "models.providers.*.contextTokens": "Model Provider Context Tokens", + "models.providers.*.maxTokens": "Model Provider Max Tokens", "models.providers.*.timeoutSeconds": "Model Provider Request Timeout", "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)", "models.providers.*.headers": "Model Provider Headers", diff --git a/src/config/types.models.ts b/src/config/types.models.ts index fed8fcc258c..30b3ae2f554 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -119,6 +119,9 @@ export type ModelProviderConfig = { apiKey?: SecretInput; auth?: ModelProviderAuthMode; api?: ModelApi; + contextWindow?: number; + contextTokens?: number; + maxTokens?: number; timeoutSeconds?: number; injectNumCtxForOpenAICompat?: boolean; headers?: Record; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index c2a018c86f8..a8ccaf80de6 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -357,6 +357,9 @@ export const ModelProviderSchema = z .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")]) .optional(), api: ModelApiSchema.optional(), + contextWindow: z.number().positive().optional(), + contextTokens: z.number().int().positive().optional(), + maxTokens: z.number().positive().optional(), timeoutSeconds: z.number().int().positive().optional(), injectNumCtxForOpenAICompat: z.boolean().optional(), headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(),