diff --git a/CHANGELOG.md b/CHANGELOG.md index e3a95b12c94..f5376b15b58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Docs: https://docs.openclaw.ai - Web search/Exa: accept `plugins.entries.exa.config.webSearch.baseUrl`, normalize it to the Exa `/search` endpoint, and partition cached results by endpoint. Fixes #54928 and supersedes #54939. Thanks @mrpl327 and @lyfuci. - Web search/MiniMax: include MiniMax Search in the web-search setup flow and let `MINIMAX_API_KEY` participate in MiniMax Search auto-detection. Supersedes #65828. Thanks @Jah-yee. - Plugins/ClawHub: preserve official source-linked trust through archive installs, so OpenClaw can install trusted ClawHub plugin packages that trigger the built-in dangerous-pattern scanner. Thanks @vincentkoc. +- Providers/LM Studio: allow `models.providers.lmstudio.params.preload: false` to skip OpenClaw's native model-load call so LM Studio JIT loading, idle TTL, and auto-evict can own model lifecycle. Fixes #75921. Thanks @garyd9. - Telegram: inherit the process DNS result order for Bot API transport and downgrade recovered sticky IPv4 fallback promotions to debug logs, while keeping pinned-IP escalation warnings visible. Fixes #75904. Thanks @highfly-hi and @neeravmakwana. - Web search/MiniMax: allow `MINIMAX_OAUTH_TOKEN` to satisfy MiniMax Search credentials, so OAuth-authorized MiniMax Token Plan setups do not need a separate web-search key. Fixes #65768. Thanks @kikibrian and @zhouhe-xydt. - Providers/MiniMax: derive Coding Plan usage polling from the configured MiniMax base URL, so global setups no longer query the CN usage host. Fixes #65054. Thanks @sixone74 and @Yanhu007. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index c4d12df1dde..3e953061db2 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -051884bad7339a302ecb75e5f61831b1726c6f0360de27485aac76097570c808 config-baseline.json -80e6e8dce647aef2d1310de55a81d27de52cca47fc24bd7ad81b80f43a72b84c config-baseline.core.json +94f7879b0771e81973c0749c719c19283fdc26e0e42fe6536f8ee563be6a44e5 config-baseline.json +a38ea77d2f0f0188f14ce0e3a8a564ff80e51415849359042f51921eb01ec2d9 config-baseline.core.json eab8a85eefa2792fb8b98a07698e5ec31ff0b6f8af6222767e8049dcc5c4f529 config-baseline.channel.json 6bd6c72b17801072b2d3285c82f4c21adcc95f0edffc1e6f64e767d0a07b678f config-baseline.plugin.json diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index b5b127a98d7..d1f39c38f50 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -555,7 +555,7 @@ Then set a model (replace with one of the IDs returned by `http://localhost:1234 } ``` -OpenClaw uses LM Studio's native `/api/v1/models` and `/api/v1/models/load` for discovery + auto-load, with `/v1/chat/completions` for inference by default. See [/providers/lmstudio](/providers/lmstudio) for setup and troubleshooting. +OpenClaw uses LM Studio's native `/api/v1/models` and `/api/v1/models/load` for discovery + auto-load, with `/v1/chat/completions` for inference by default. If you want LM Studio JIT loading, TTL, and auto-evict to own model lifecycle, set `models.providers.lmstudio.params.preload: false`. See [/providers/lmstudio](/providers/lmstudio) for setup and troubleshooting. ### Ollama diff --git a/docs/providers/lmstudio.md b/docs/providers/lmstudio.md index 5af70dc7de0..aa3d43d33a7 100644 --- a/docs/providers/lmstudio.md +++ b/docs/providers/lmstudio.md @@ -176,7 +176,22 @@ If setup reports HTTP 401, verify your API key: ### Just-in-time model loading -LM Studio supports just-in-time (JIT) model loading, where models are loaded on first request. Make sure you have this enabled to avoid 'Model not loaded' errors. +LM Studio supports just-in-time (JIT) model loading, where models are loaded on first request. OpenClaw preloads models through LM Studio's native load endpoint by default, which helps when JIT is disabled. To let LM Studio's JIT, idle TTL, and auto-evict behavior own model lifecycle, disable OpenClaw's preload step: + +```json5 +{ + models: { + providers: { + lmstudio: { + baseUrl: "http://localhost:1234/v1", + api: "openai-completions", + params: { preload: false }, + models: [{ id: "qwen/qwen3.5-9b" }], + }, + }, + }, +} +``` ### LAN or tailnet LM Studio host diff --git a/extensions/lmstudio/src/stream.test.ts b/extensions/lmstudio/src/stream.test.ts index a82e7c0b730..0ee0177594d 100644 --- a/extensions/lmstudio/src/stream.test.ts +++ b/extensions/lmstudio/src/stream.test.ts @@ -199,6 +199,49 @@ describe("lmstudio stream wrapper", () => { expect(baseStream).toHaveBeenCalledTimes(1); }); + it("skips native model preload when provider params disable it", async () => { + const baseStream = buildDoneStreamFn(); + const wrapped = wrapLmstudioInferencePreload({ + provider: "lmstudio", + modelId: "qwen3-8b-instruct", + config: { + models: { + providers: { + lmstudio: { + baseUrl: "http://localhost:1234", + params: { preload: false }, + models: [], + }, + }, + }, + }, + streamFn: baseStream, + } as never); + + const events = await collectEvents( + wrapped( + { + provider: "lmstudio", + api: "openai-completions", + id: "qwen3-8b-instruct", + } as never, + { messages: [] } as never, + undefined as never, + ), + ); + + expect(events).toEqual([expect.objectContaining({ type: "done" })]); + expect(ensureLmstudioModelLoadedMock).not.toHaveBeenCalled(); + expect(baseStream).toHaveBeenCalledTimes(1); + expect(baseStream).toHaveBeenCalledWith( + expect.objectContaining({ + compat: expect.objectContaining({ supportsUsageInStreaming: true }), + }), + expect.anything(), + undefined, + ); + }); + it("dedupes concurrent preload requests for the same model and context", async () => { let resolvePreload: (() => void) | undefined; ensureLmstudioModelLoadedMock.mockImplementationOnce( diff --git a/extensions/lmstudio/src/stream.ts b/extensions/lmstudio/src/stream.ts index 7631117e5a2..c94926f3b25 100644 --- a/extensions/lmstudio/src/stream.ts +++ b/extensions/lmstudio/src/stream.ts @@ -121,6 +121,22 @@ function toRecord(value: unknown): Record | undefined { return value && typeof value === "object" ? (value as Record) : undefined; } +function shouldPreloadLmstudioModels(value: unknown): boolean { + const providerConfig = toRecord(value); + const params = toRecord(providerConfig?.params); + return params?.preload !== false; +} + +function withLmstudioUsageCompat(model: StreamModel): StreamModel { + return { + ...model, + compat: { + ...(model.compat && typeof model.compat === "object" ? model.compat : {}), + supportsUsageInStreaming: true, + }, + }; +} + function resolveContextToolNames(context: StreamContext): Set { const tools = (context as { tools?: unknown }).tools; if (!Array.isArray(tools)) { @@ -381,7 +397,15 @@ export function wrapLmstudioInferencePreload(ctx: ProviderWrapStreamFnContext): if (!modelKey) { return underlying(model, context, options); } - const providerBaseUrl = ctx.config?.models?.providers?.[LMSTUDIO_PROVIDER_ID]?.baseUrl; + const providerConfig = ctx.config?.models?.providers?.[LMSTUDIO_PROVIDER_ID]; + if (!shouldPreloadLmstudioModels(providerConfig)) { + const stream = underlying(withLmstudioUsageCompat(model), context, options); + return (async () => { + const resolvedStream = stream instanceof Promise ? await stream : stream; + return wrapLmstudioPlainTextToolCalls(resolvedStream, context); + })(); + } + const providerBaseUrl = providerConfig?.baseUrl; const resolvedBaseUrl = resolveLmstudioInferenceBase( typeof model.baseUrl === "string" ? model.baseUrl : providerBaseUrl, ); @@ -454,14 +478,7 @@ export function wrapLmstudioInferencePreload(ctx: ProviderWrapStreamFnContext): // LM Studio uses OpenAI-compatible streaming usage payloads when requested via // `stream_options.include_usage`. Force this compat flag at call time so usage // reporting remains enabled even when catalog entries omitted compat metadata. - const modelWithUsageCompat = { - ...model, - compat: { - ...(model.compat && typeof model.compat === "object" ? model.compat : {}), - supportsUsageInStreaming: true, - }, - }; - const stream = underlying(modelWithUsageCompat, context, options); + const stream = underlying(withLmstudioUsageCompat(model), context, options); const resolvedStream = stream instanceof Promise ? await stream : stream; return wrapLmstudioPlainTextToolCalls(resolvedStream, context); })(); diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 7c5f63335e0..96fa59249a4 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -1688,6 +1688,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", }, + params: { + type: "object", + propertyNames: { + type: "string", + }, + additionalProperties: {}, + title: "Model Provider Runtime Parameters", + description: + "Provider-specific runtime parameters interpreted by provider plugins. Keep keys documented by the provider, and prefer explicit provider docs over ad hoc shared assumptions.", + }, headers: { type: "object", propertyNames: { @@ -26994,6 +27004,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", tags: ["models"], }, + "models.providers.*.params": { + label: "Model Provider Runtime Parameters", + help: "Provider-specific runtime parameters interpreted by provider plugins. Keep keys documented by the provider, and prefer explicit provider docs over ad hoc shared assumptions.", + tags: ["models"], + }, "models.providers.*.headers": { label: "Model Provider Headers", help: "Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 9a81525af3e..46f4eb4cf34 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -866,6 +866,8 @@ export const FIELD_HELP: Record = { "Optional per-provider model request timeout in seconds. Applies to provider HTTP fetches, including connect, headers, body, and total request abort handling. Use this for slow local or self-hosted model servers instead of changing global agent timeouts.", "models.providers.*.injectNumCtxForOpenAICompat": "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", + "models.providers.*.params": + "Provider-specific runtime parameters interpreted by provider plugins. Keep keys documented by the provider, and prefer explicit provider docs over ad hoc shared assumptions.", "models.providers.*.headers": "Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.", "models.providers.*.authHeader": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index b9d78eeafed..8ce53676f99 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -535,6 +535,7 @@ export const FIELD_LABELS: Record = { "models.providers.*.maxTokens": "Model Provider Max Tokens", "models.providers.*.timeoutSeconds": "Model Provider Request Timeout", "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)", + "models.providers.*.params": "Model Provider Runtime Parameters", "models.providers.*.headers": "Model Provider Headers", "models.providers.*.authHeader": "Model Provider Authorization Header", "models.providers.*.request": "Model Provider Request Overrides", diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 8d494cee851..9b524a5a7c2 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -124,6 +124,8 @@ export type ModelProviderConfig = { maxTokens?: number; timeoutSeconds?: number; injectNumCtxForOpenAICompat?: boolean; + /** Provider-specific runtime parameters interpreted by provider plugins. */ + params?: Record; headers?: Record; authHeader?: boolean; request?: ConfiguredModelProviderRequest; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 94836170364..4921c42b47a 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -362,6 +362,7 @@ const ModelProviderSchema = z maxTokens: z.number().positive().optional(), timeoutSeconds: z.number().int().positive().optional(), injectNumCtxForOpenAICompat: z.boolean().optional(), + params: z.record(z.string(), z.unknown()).optional(), headers: z.record(z.string(), SecretInputSchema.register(sensitive)).optional(), authHeader: z.boolean().optional(), request: ConfiguredModelProviderRequestSchema,