From e153eceea5a0b446488694ece41f183a5c21073f Mon Sep 17 00:00:00 2001 From: rendrag-git Date: Wed, 27 May 2026 12:32:18 +0000 Subject: [PATCH] fix(vllm): wire configured thinking params Move vLLM Qwen thinking control onto configured model compat metadata and carry it through catalog/model-selection/runtime thinking contexts. Also migrate legacy provider/default request params in doctor and keep Pi/runtime model rows buildable with explicit reasoning defaults. Thanks @rendrag-git. Co-authored-by: rendrag-git <253747599+rendrag-git@users.noreply.github.com> --- docs/gateway/config-agents.md | 4 +- docs/gateway/config-tools.md | 2 +- docs/plugins/sdk-migration.md | 5 + docs/plugins/sdk-provider-plugins.md | 1 + docs/providers/vllm.md | 32 +- docs/tools/thinking.md | 1 + extensions/vllm/index.ts | 2 + .../vllm/provider-discovery.contract.test.ts | 21 + extensions/vllm/provider-policy-api.test.ts | 62 ++ extensions/vllm/provider-policy-api.ts | 1 + extensions/vllm/stream.test.ts | 83 ++- extensions/vllm/stream.ts | 38 +- extensions/vllm/thinking-policy.ts | 65 ++ src/agents/model-catalog.test.ts | 94 +++ src/agents/model-catalog.ts | 56 +- src/agents/model-selection-shared.ts | 31 +- src/agents/model-selection.test.ts | 90 +++ src/agents/pi-embedded-runner/model.test.ts | 95 +++ src/agents/pi-embedded-runner/model.ts | 97 ++- src/auto-reply/reply/model-selection.test.ts | 111 ++- src/auto-reply/reply/model-selection.ts | 36 +- src/auto-reply/thinking.shared.ts | 1 + src/auto-reply/thinking.test.ts | 58 ++ src/auto-reply/thinking.ts | 23 +- .../shared/legacy-config-migrate.test.ts | 641 ++++++++++++++++++ ...legacy-config-migrations.runtime.models.ts | 585 ++++++++++++++++ src/gateway/server-methods/models.test.ts | 38 ++ src/gateway/server-methods/models.ts | 16 +- src/plugins/provider-thinking.types.ts | 10 + 29 files changed, 2214 insertions(+), 85 deletions(-) create mode 100644 extensions/vllm/provider-policy-api.test.ts create mode 100644 extensions/vllm/provider-policy-api.ts create mode 100644 extensions/vllm/thinking-policy.ts diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index eeefc67b794..8e65e745de6 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -459,8 +459,8 @@ Time format in system prompt. Default: `auto` (OS preference). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `models.providers.openrouter.params.provider`: OpenRouter-wide default provider-routing policy. OpenClaw forwards this to OpenRouter's request `provider` object; per-model `agents.defaults.models["openrouter/"].params.provider` and agent params override by key. See [OpenRouter provider routing](/providers/openrouter#advanced-configuration). - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. -- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. -- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`. +- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. Configured vLLM Qwen and Nemotron thinking models expose binary `/think` choices (`off`, `on`) instead of the multi-level effort ladder. +- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`, and configured vLLM Qwen models expose binary `/think` choices for these formats. - `compat.supportedReasoningEfforts`: per-model OpenAI-compatible reasoning effort list. Include `"xhigh"` for custom endpoints that truly accept it; OpenClaw then exposes `/think xhigh` in command menus, Gateway session rows, session patch validation, agent CLI validation, and `llm-task` validation for that configured provider/model. Use `compat.reasoningEffortMap` when the backend wants a provider-specific value for a canonical level. - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - `localService`: optional provider-level process manager for local/self-hosted model servers. When the selected model belongs to that provider, OpenClaw probes `healthUrl` (or `baseUrl + "/models"`), starts `command` with `args` if the endpoint is down, waits up to `readyTimeoutMs`, then sends the model request. `command` must be an absolute path. `idleStopMs: 0` keeps the process alive until OpenClaw exits; a positive value stops the OpenClaw-spawned process after that many idle milliseconds. See [Local model services](/gateway/local-model-services). diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 4575867c7e4..fcb9ac58489 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -535,7 +535,7 @@ Configuring a custom/local provider `baseUrl` is also the narrow network trust d - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior. - `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request. - `models.providers.*.models.*.compat.strictMessageKeys`: optional compatibility hint for strict OpenAI-compatible chat endpoints. When `true`, OpenClaw strips outgoing Chat Completions message objects to `role` and `content` before sending the request. - - `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM. + - `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM. Configured vLLM Qwen models expose binary `/think` choices (`off`, `on`) for these formats. diff --git a/docs/plugins/sdk-migration.md b/docs/plugins/sdk-migration.md index 733edfb692b..8ed28dbc2aa 100644 --- a/docs/plugins/sdk-migration.md +++ b/docs/plugins/sdk-migration.md @@ -818,6 +818,11 @@ canonical replacement. ranked level list. OpenClaw downgrades stale stored values by profile rank automatically. + The context includes `provider`, `modelId`, optional merged `reasoning`, + and optional merged model `compat` facts. Provider plugins can use those + catalog facts to expose a model-specific profile only when the configured + request contract supports it. + Implement one hook instead of three. The legacy hooks keep working during the deprecation window but are not composed with the profile result. diff --git a/docs/plugins/sdk-provider-plugins.md b/docs/plugins/sdk-provider-plugins.md index 45337814bd2..355ce68e7ae 100644 --- a/docs/plugins/sdk-provider-plugins.md +++ b/docs/plugins/sdk-provider-plugins.md @@ -501,6 +501,7 @@ API key auth, and dynamic model resolution. - `normalizeConfig` checks the matched provider first, then other hook-capable provider plugins until one actually changes the config. If no provider hook rewrites a supported Google-family config entry, the bundled Google config normalizer still applies. - `resolveConfigApiKey` uses the provider hook when exposed. The bundled `amazon-bedrock` path also has a built-in AWS env-marker resolver here, even though Bedrock runtime auth itself still uses the AWS SDK default chain. + - `resolveThinkingProfile(ctx)` receives the selected `provider`, `modelId`, optional merged `reasoning` catalog hint, and optional merged model `compat` facts. Use `compat` only to select the provider's thinking UI/profile. - `resolveSystemPromptContribution` lets a provider inject cache-aware system-prompt guidance for a model family. Prefer it over `before_prompt_build` when the behavior belongs to one provider/model family and should preserve the stable/dynamic cache split. For detailed descriptions and real-world examples, see [Internals: Provider Runtime Hooks](/plugins/architecture-internals#provider-runtime-hooks). diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md index 2df557de3d1..cbf606a442d 100644 --- a/docs/providers/vllm.md +++ b/docs/providers/vllm.md @@ -145,8 +145,32 @@ wildcard to the visible model catalog: For Qwen models served through vLLM, set - `params.qwenThinkingFormat: "chat-template"` on the model entry when the - server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to: + `compat.thinkingFormat: "qwen-chat-template"` on the configured provider + model row when the server expects Qwen chat-template kwargs. Models + configured this way expose a binary `/think` profile (`off`, `on`) because + Qwen template thinking is an on/off request flag, not an OpenAI-style effort + ladder. + + ```json5 + { + models: { + providers: { + vllm: { + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } + ``` + + OpenClaw maps `/think off` to: ```json { @@ -159,8 +183,8 @@ wildcard to the visible model catalog: Non-`off` thinking levels send `enable_thinking: true`. If your endpoint expects DashScope-style top-level flags instead, use - `params.qwenThinkingFormat: "top-level"` to send `enable_thinking` at the - request root. Snake-case `params.qwen_thinking_format` is also accepted. + `compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request + root. diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md index c237bdfc15c..293e242b7aa 100644 --- a/docs/tools/thinking.md +++ b/docs/tools/thinking.md @@ -134,6 +134,7 @@ Malformed local-model reasoning tags are handled conservatively. Closed ` - Provider plugins can expose `resolveThinkingProfile(ctx)` to define the model's supported levels and default. - Provider plugins that proxy Claude models should reuse `resolveClaudeThinkingProfile(modelId)` from `openclaw/plugin-sdk/provider-model-shared` so direct Anthropic and proxy catalogs stay aligned. - Each profile level has a stored canonical `id` (`off`, `minimal`, `low`, `medium`, `high`, `xhigh`, `adaptive`, or `max`) and may include a display `label`. Binary providers use `{ id: "low", label: "on" }`. +- Profile hooks receive merged catalog facts when available, including `reasoning`, `compat.thinkingFormat`, and `compat.supportedReasoningEfforts`. Use those facts to expose binary or custom profiles only when the configured request contract supports the matching payload. - Tool plugins that need to validate an explicit thinking override should use `api.runtime.agent.resolveThinkingPolicy({ provider, model })` plus `api.runtime.agent.normalizeThinkingLevel(...)`; they should not keep their own provider/model level lists. - Tool plugins with access to configured custom model metadata can pass `catalog` into `resolveThinkingPolicy` so `compat.supportedReasoningEfforts` opt-ins are reflected in plugin-side validation. - Published legacy hooks (`supportsXHighThinking`, `isBinaryThinking`, and `resolveDefaultThinkingLevel`) remain as compatibility adapters, but new custom level sets should use `resolveThinkingProfile`. diff --git a/extensions/vllm/index.ts b/extensions/vllm/index.ts index 3130cf04cfc..f2116d2b1d4 100644 --- a/extensions/vllm/index.ts +++ b/extensions/vllm/index.ts @@ -11,6 +11,7 @@ import { VLLM_PROVIDER_LABEL, } from "./api.js"; import { wrapVllmProviderStream } from "./stream.js"; +import { resolveThinkingProfile } from "./thinking-policy.js"; const PROVIDER_ID = "vllm"; @@ -90,6 +91,7 @@ export default definePluginEntry({ "vLLM requires authentication to be registered as a provider. " + 'Set VLLM_API_KEY (any value works) or run "openclaw configure". ' + "See: https://docs.openclaw.ai/providers/vllm", + resolveThinkingProfile, wrapStreamFn: wrapVllmProviderStream, }); }, diff --git a/extensions/vllm/provider-discovery.contract.test.ts b/extensions/vllm/provider-discovery.contract.test.ts index 9b1ca99084f..46194aa3ca1 100644 --- a/extensions/vllm/provider-discovery.contract.test.ts +++ b/extensions/vllm/provider-discovery.contract.test.ts @@ -1,7 +1,28 @@ import { fileURLToPath } from "node:url"; +import { registerSingleProviderPlugin } from "openclaw/plugin-sdk/plugin-test-runtime"; import { describeVllmProviderDiscoveryContract } from "openclaw/plugin-sdk/provider-test-contracts"; +import { describe, expect, it } from "vitest"; +import vllmPlugin from "./index.js"; describeVllmProviderDiscoveryContract({ load: () => import("./index.js"), apiModuleId: fileURLToPath(new URL("./api.js", import.meta.url)), }); + +describe("vLLM provider registration", () => { + it("exposes the binary thinking profile hook", async () => { + const provider = await registerSingleProviderPlugin(vllmPlugin); + + expect( + provider.resolveThinkingProfile?.({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ).toEqual({ + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + }); + }); +}); diff --git a/extensions/vllm/provider-policy-api.test.ts b/extensions/vllm/provider-policy-api.test.ts new file mode 100644 index 00000000000..d9ff393b7bb --- /dev/null +++ b/extensions/vllm/provider-policy-api.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from "vitest"; +import { resolveThinkingProfile } from "./provider-policy-api.js"; + +describe("vLLM provider thinking policy", () => { + it("exposes a binary profile for configured Qwen chat-template models", () => { + expect( + resolveThinkingProfile({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ).toEqual({ + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + }); + }); + + it("uses configured Qwen compat even when catalog reasoning metadata is absent", () => { + expect( + resolveThinkingProfile({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ).toEqual({ + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + }); + }); + + it("exposes a binary profile for vLLM Nemotron 3 reasoning models", () => { + expect( + resolveThinkingProfile({ + provider: "vllm", + modelId: "nemotron-3-super", + reasoning: true, + }), + ).toEqual({ + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + }); + }); + + it("does not flatten unconfigured or non-reasoning vLLM models", () => { + expect( + resolveThinkingProfile({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + reasoning: true, + }), + ).toBeNull(); + expect( + resolveThinkingProfile({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + reasoning: false, + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ).toBeNull(); + }); +}); diff --git a/extensions/vllm/provider-policy-api.ts b/extensions/vllm/provider-policy-api.ts new file mode 100644 index 00000000000..edc2c861187 --- /dev/null +++ b/extensions/vllm/provider-policy-api.ts @@ -0,0 +1 @@ +export { resolveThinkingProfile } from "./thinking-policy.js"; diff --git a/extensions/vllm/stream.test.ts b/extensions/vllm/stream.test.ts index 38ca89ce509..2030314db52 100644 --- a/extensions/vllm/stream.test.ts +++ b/extensions/vllm/stream.test.ts @@ -101,10 +101,19 @@ describe("createVllmQwenThinkingWrapper", () => { }); }); - it("skips non-reasoning and non-completions models", () => { + it("patches configured Qwen models unless reasoning is explicitly disabled", () => { + expect(capturePayload({ format: "chat-template", model: { reasoning: undefined } })).toEqual({ + chat_template_kwargs: { + enable_thinking: true, + preserve_thinking: true, + }, + }); expect(capturePayload({ format: "chat-template", model: { reasoning: false } })).toStrictEqual( {}, ); + }); + + it("skips non-completions models", () => { expect( capturePayload({ format: "chat-template", model: { api: "openai-responses" as never } }), ).toStrictEqual({}); @@ -186,7 +195,25 @@ describe("createVllmProviderThinkingWrapper", () => { }); describe("wrapVllmProviderStream", () => { - it("registers when vLLM Qwen thinking format params are configured", () => { + it("registers when vLLM Qwen thinking format compat is configured", () => { + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + extraParams: {}, + model: { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeTypeOf("function"); + }); + + it("ignores request params when Qwen thinking format compat is not configured", () => { expect( wrapVllmProviderStream({ provider: "vllm", @@ -200,22 +227,42 @@ describe("wrapVllmProviderStream", () => { } as Model<"openai-completions">, streamFn: undefined, } as never), - ).toBeTypeOf("function"); + ).toBeUndefined(); + }); - expect( - wrapVllmProviderStream({ - provider: "vllm", - modelId: "Qwen/Qwen3-8B", - extraParams: { qwen_thinking_format: "enable_thinking" }, - model: { - api: "openai-completions", - provider: "vllm", - id: "Qwen/Qwen3-8B", - reasoning: true, - } as Model<"openai-completions">, - streamFn: undefined, - } as never), - ).toBeTypeOf("function"); + it("uses model compat for Qwen thinking format", () => { + let captured: Record = {}; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = {}; + options?.onPayload?.(payload, _model); + captured = payload; + return {} as ReturnType; + }; + const model = { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + } as unknown as Model<"openai-completions">; + const wrapped = wrapVllmProviderStream({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + extraParams: {}, + thinkingLevel: "off", + model, + streamFn: baseStreamFn, + } as never); + + expect(wrapped).toBeTypeOf("function"); + void wrapped?.(model, { messages: [] } as Context, {}); + + expect(captured).toEqual({ + chat_template_kwargs: { + enable_thinking: false, + preserve_thinking: true, + }, + }); }); it("skips unconfigured vLLM and non-vLLM providers", () => { @@ -237,7 +284,7 @@ describe("wrapVllmProviderStream", () => { wrapVllmProviderStream({ provider: "openai", modelId: "gpt-5.4", - extraParams: { qwenThinkingFormat: "chat-template" }, + extraParams: {}, model: { api: "openai-completions", provider: "openai", diff --git a/extensions/vllm/stream.ts b/extensions/vllm/stream.ts index 603087bb40c..31ca0764e09 100644 --- a/extensions/vllm/stream.ts +++ b/extensions/vllm/stream.ts @@ -5,43 +5,21 @@ import { createPayloadPatchStreamWrapper, isOpenAICompatibleThinkingEnabled, } from "openclaw/plugin-sdk/provider-stream-shared"; +import { + resolveVllmQwenThinkingFormatFromCompat, + type VllmQwenThinkingFormat, +} from "./thinking-policy.js"; type VllmThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; -type VllmQwenThinkingFormat = "chat-template" | "top-level"; function isVllmProviderId(providerId: string): boolean { return normalizeProviderId(providerId) === "vllm"; } -function normalizeQwenThinkingFormat(value: unknown): VllmQwenThinkingFormat | undefined { - if (typeof value !== "string") { - return undefined; - } - const normalized = value.trim().toLowerCase().replace(/_/g, "-"); - if ( - normalized === "chat-template" || - normalized === "chat-template-kwargs" || - normalized === "chat-template-kwarg" || - normalized === "chat-template-arguments" - ) { - return "chat-template"; - } - if ( - normalized === "top-level" || - normalized === "enable-thinking" || - normalized === "request-body" - ) { - return "top-level"; - } - return undefined; -} - function resolveVllmQwenThinkingFormat( - extraParams: ProviderWrapStreamFnContext["extraParams"], + ctx: Pick, ): VllmQwenThinkingFormat | undefined { - return normalizeQwenThinkingFormat( - extraParams?.qwenThinkingFormat ?? extraParams?.qwen_thinking_format, - ); + return resolveVllmQwenThinkingFormatFromCompat(ctx.model?.compat); } function setQwenChatTemplateThinking(payload: Record, enabled: boolean): void { @@ -110,7 +88,7 @@ export function createVllmQwenThinkingWrapper(params: { delete payloadObj.reasoning; }, { - shouldPatch: ({ model }) => model.api === "openai-completions" && model.reasoning, + shouldPatch: ({ model }) => model.api === "openai-completions" && (model.reasoning ?? true), }, ); } @@ -145,7 +123,7 @@ export function wrapVllmProviderStream(ctx: ProviderWrapStreamFnContext): Stream if (!isVllmProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) { return undefined; } - const qwenFormat = resolveVllmQwenThinkingFormat(ctx.extraParams); + const qwenFormat = resolveVllmQwenThinkingFormat(ctx); const shouldHandleNemotron = ctx.thinkingLevel === "off" && isVllmNemotronModel({ diff --git a/extensions/vllm/thinking-policy.ts b/extensions/vllm/thinking-policy.ts new file mode 100644 index 00000000000..2f397bce1be --- /dev/null +++ b/extensions/vllm/thinking-policy.ts @@ -0,0 +1,65 @@ +import type { + ProviderDefaultThinkingPolicyContext, + ProviderThinkingProfile, +} from "openclaw/plugin-sdk/plugin-entry"; +import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; + +export type VllmQwenThinkingFormat = "chat-template" | "top-level"; + +const VLLM_BINARY_THINKING_PROFILE = { + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", +} satisfies ProviderThinkingProfile; + +export function normalizeVllmQwenThinkingFormat( + value: unknown, +): VllmQwenThinkingFormat | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value.trim().toLowerCase().replace(/_/g, "-"); + if ( + normalized === "chat-template" || + normalized === "chat-template-kwargs" || + normalized === "chat-template-kwarg" || + normalized === "chat-template-arguments" || + normalized === "qwen-chat-template" + ) { + return "chat-template"; + } + if ( + normalized === "top-level" || + normalized === "enable-thinking" || + normalized === "request-body" || + normalized === "qwen" + ) { + return "top-level"; + } + return undefined; +} + +export function resolveVllmQwenThinkingFormatFromCompat( + compat?: ProviderDefaultThinkingPolicyContext["compat"], +): VllmQwenThinkingFormat | undefined { + return normalizeVllmQwenThinkingFormat(compat?.thinkingFormat); +} + +function isVllmNemotronThinkingModel(modelId: string): boolean { + return /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(modelId); +} + +export function resolveThinkingProfile( + ctx: ProviderDefaultThinkingPolicyContext, +): ProviderThinkingProfile | null { + if (normalizeProviderId(ctx.provider) !== "vllm") { + return null; + } + if (ctx.reasoning === false) { + return null; + } + const qwenFormat = resolveVllmQwenThinkingFormatFromCompat(ctx.compat); + if (qwenFormat || (ctx.reasoning === true && isVllmNemotronThinkingModel(ctx.modelId))) { + return VLLM_BINARY_THINKING_PROFILE; + } + return null; +} diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts index fa7248610a5..95d1ddd3a7a 100644 --- a/src/agents/model-catalog.test.ts +++ b/src/agents/model-catalog.test.ts @@ -1033,6 +1033,100 @@ describe("loadModelCatalog", () => { expect(entry.contextWindow).toBe(128_000); }); + it("overlays configured model compat onto discovered catalog rows", async () => { + mockPiDiscoveryModels([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + provider: "vllm", + reasoning: false, + compat: { supportsStrictMode: false }, + }, + ]); + + const result = await loadModelCatalog({ + config: { + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000/v1", + api: "openai-completions", + models: [ + { + id: "vllm/Qwen/Qwen3-8B", + name: "Configured Qwen3 8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig, + }); + + const entry = requireCatalogEntry(result, "vllm", "Qwen/Qwen3-8B"); + expect(result.filter((entry) => entry.provider === "vllm")).toHaveLength(1); + expect(entry.name).toBe("Qwen3 8B"); + expect(entry.reasoning).toBe(true); + expect(entry.compat).toEqual( + expect.objectContaining({ + supportsStrictMode: false, + thinkingFormat: "qwen-chat-template", + }), + ); + }); + + it("overlays configured model compat onto persisted read-only catalog rows", async () => { + readFileMock.mockResolvedValue( + JSON.stringify({ + providers: { + vllm: { + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + reasoning: false, + compat: { supportsStrictMode: false }, + }, + ], + }, + }, + }), + ); + + const result = await loadModelCatalog({ + config: { + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000/v1", + api: "openai-completions", + models: [ + { + id: "vllm/Qwen/Qwen3-8B", + name: "Configured Qwen3 8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig, + readOnly: true, + }); + + const entry = requireCatalogEntry(result, "vllm", "Qwen/Qwen3-8B"); + expect(result.filter((entry) => entry.provider === "vllm")).toHaveLength(1); + expect(entry.name).toBe("Qwen3 8B"); + expect(entry.reasoning).toBe(true); + expect(entry.compat).toEqual( + expect.objectContaining({ + supportsStrictMode: false, + thinkingFormat: "qwen-chat-template", + }), + ); + }); + it("merges manifest model catalog rows on the normal catalog path", async () => { mockSingleOpenAiCatalogModel(); currentPluginMetadataSnapshotMock.mockReturnValue({ diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts index b5c6de3b00b..e225e0f29ec 100644 --- a/src/agents/model-catalog.ts +++ b/src/agents/model-catalog.ts @@ -21,6 +21,7 @@ import { resolveDefaultAgentDir } from "./agent-scope.js"; import { modelSupportsInput as modelCatalogEntrySupportsInput } from "./model-catalog-lookup.js"; import type { ModelCatalogEntry, ModelInputType } from "./model-catalog.types.js"; import { + modelKey, normalizeConfiguredProviderCatalogModelId, type ProviderModelIdNormalizationOptions, } from "./model-ref-shared.js"; @@ -112,7 +113,8 @@ function instantiatePiModelRegistry( } function catalogEntryDedupeKey(provider: string, id: string): string { - return `${normalizeProviderId(provider)}::${normalizeLowercaseStringOrEmpty(id)}`; + const normalizedProvider = normalizeProviderId(provider); + return normalizeLowercaseStringOrEmpty(modelKey(normalizedProvider, id)); } function appendCatalogEntriesIfAbsent( @@ -130,6 +132,52 @@ function appendCatalogEntriesIfAbsent( } } +function mergeCatalogCompat( + base: ModelCatalogEntry["compat"] | undefined, + override: ModelCatalogEntry["compat"] | undefined, +): ModelCatalogEntry["compat"] | undefined { + if (!base) { + return override; + } + if (!override) { + return base; + } + return { ...base, ...override }; +} + +function overlayConfiguredCatalogMetadata( + base: ModelCatalogEntry, + configured: ModelCatalogEntry, +): ModelCatalogEntry { + return { + ...base, + ...(configured.contextWindow !== undefined ? { contextWindow: configured.contextWindow } : {}), + ...(configured.contextTokens !== undefined ? { contextTokens: configured.contextTokens } : {}), + ...(configured.reasoning !== undefined ? { reasoning: configured.reasoning } : {}), + ...(configured.input !== undefined ? { input: configured.input } : {}), + compat: mergeCatalogCompat(base.compat, configured.compat), + }; +} + +function mergeConfiguredCatalogEntries( + models: ModelCatalogEntry[], + entries: ModelCatalogEntry[], +): void { + const indexByKey = new Map( + models.map((entry, index) => [catalogEntryDedupeKey(entry.provider, entry.id), index]), + ); + for (const entry of entries) { + const key = catalogEntryDedupeKey(entry.provider, entry.id); + const existingIndex = indexByKey.get(key); + if (existingIndex === undefined) { + models.push(entry); + indexByKey.set(key, models.length - 1); + continue; + } + models[existingIndex] = overlayConfiguredCatalogMetadata(models[existingIndex], entry); + } +} + export function loadManifestModelCatalog(params: { config: OpenClawConfig; workspaceDir?: string; @@ -319,7 +367,7 @@ async function loadReadOnlyPersistedModelCatalog(params?: { manifestPlugins: hasConfiguredProviderModelRows(cfg) ? getManifestPlugins() : undefined, }); if (configuredModels.length > 0) { - appendCatalogEntriesIfAbsent(models, configuredModels); + mergeConfiguredCatalogEntries(models, configuredModels); } return sortModelCatalogEntries(models); } @@ -371,7 +419,7 @@ function loadReadOnlyStaticModelCatalog(params?: { manifestPlugins: configuredManifestPlugins, }); if (configuredModels.length > 0) { - appendCatalogEntriesIfAbsent(models, configuredModels); + mergeConfiguredCatalogEntries(models, configuredModels); } return sortModelCatalogEntries(models); } @@ -537,7 +585,7 @@ export async function loadModelCatalog(params?: { manifestPlugins: hasConfiguredProviderModelRows(cfg) ? getManifestPlugins() : undefined, }); if (configuredModels.length > 0) { - appendCatalogEntriesIfAbsent(models, configuredModels); + mergeConfiguredCatalogEntries(models, configuredModels); } logStage("configured-models-merged", `entries=${models.length}`); diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts index 7885a609550..e069e24e0ce 100644 --- a/src/agents/model-selection-shared.ts +++ b/src/agents/model-selection-shared.ts @@ -562,10 +562,6 @@ function buildModelCatalogMetadata( if (rawKey.trim().endsWith("/*")) { continue; } - const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim(); - if (!alias) { - continue; - } const key = resolveAllowlistModelKey({ cfg: params.cfg, raw: rawKey, @@ -577,7 +573,10 @@ function buildModelCatalogMetadata( if (!key) { continue; } - aliasByKey.set(key, alias); + const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim(); + if (alias) { + aliasByKey.set(key, alias); + } } return { configuredByKey, aliasByKey }; @@ -598,7 +597,10 @@ function applyModelCatalogMetadata(params: { const nextContextTokens = configuredEntry?.contextTokens ?? params.entry.contextTokens; const nextReasoning = configuredEntry?.reasoning ?? params.entry.reasoning; const nextInput = configuredEntry?.input ?? params.entry.input; - const nextCompat = configuredEntry?.compat ?? params.entry.compat; + const nextCompat = + params.entry.compat || configuredEntry?.compat + ? { ...params.entry.compat, ...configuredEntry?.compat } + : undefined; return { ...params.entry, @@ -1180,9 +1182,14 @@ export function buildConfiguredModelCatalog(params: { typeof model?.contextTokens === "number" && model.contextTokens > 0 ? model.contextTokens : undefined; - const reasoning = typeof model?.reasoning === "boolean" ? model.reasoning : undefined; const input = Array.isArray(model?.input) ? model.input : undefined; const compat = model?.compat && typeof model.compat === "object" ? model.compat : undefined; + const reasoning = + typeof model?.reasoning === "boolean" + ? model.reasoning + : isVllmQwenThinkingCompat(providerId, compat) + ? true + : undefined; catalog.push({ provider: providerId, id, @@ -1199,6 +1206,16 @@ export function buildConfiguredModelCatalog(params: { return catalog; } +function isVllmQwenThinkingCompat( + providerId: string, + compat?: { thinkingFormat?: unknown } | null, +): boolean { + return ( + providerId === "vllm" && + (compat?.thinkingFormat === "qwen" || compat?.thinkingFormat === "qwen-chat-template") + ); +} + export function resolveHooksGmailModel( params: { cfg: OpenClawConfig; diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index 3562d9a84b8..fe3326e1e73 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -828,6 +828,59 @@ describe("model-selection", () => { expect(model?.id).toBe("google/gemini-3.1-pro-preview"); expect(model?.name).toBe("Gemini 3 Pro"); }); + + it("carries configured model compat into catalog entries for provider policy", () => { + const cfg = { + models: { + providers: { + vllm: { + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen 3 8B", + reasoning: true, + compat: { + thinkingFormat: "qwen-chat-template", + }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const model = buildConfiguredModelCatalog({ cfg }).find( + (entry) => entry.provider === "vllm" && entry.id === "Qwen/Qwen3-8B", + ); + expect(model?.compat).toEqual({ thinkingFormat: "qwen-chat-template" }); + expect(model?.reasoning).toBe(true); + }); + + it("does not infer reasoning from non-vLLM thinking compat", () => { + const cfg = { + models: { + providers: { + custom: { + models: [ + { + id: "custom-reasoning", + name: "Custom Reasoning", + compat: { + thinkingFormat: "together", + }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const model = buildConfiguredModelCatalog({ cfg }).find( + (entry) => entry.provider === "custom" && entry.id === "custom-reasoning", + ); + expect(model?.compat).toEqual({ thinkingFormat: "together" }); + expect(model?.reasoning).toBeUndefined(); + }); }); describe("buildModelAliasIndex", () => { @@ -953,6 +1006,43 @@ describe("model-selection", () => { ]); }); + it("overlays configured provider metadata after manifest model normalization", () => { + const cfg: OpenClawConfig = { + models: { + providers: { + nvidia: { + models: [ + { + id: "llama-fast", + name: "Configured Llama Fast", + contextWindow: 128_000, + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = buildAllowedModelSet({ + cfg, + catalog: [{ provider: "nvidia", id: "nvidia/llama-fast", name: "Runtime Llama Fast" }], + defaultProvider: "anthropic", + }); + + expect(result.allowedCatalog).toEqual([ + { + provider: "nvidia", + id: "nvidia/llama-fast", + name: "Configured Llama Fast", + contextWindow: 128_000, + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + ]); + }); + it("keeps configured provider models visible when the catalog is otherwise allow-any", () => { const cfg: OpenClawConfig = { agents: { diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index c7cac2f1ad6..46a1db5815e 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -1499,6 +1499,101 @@ describe("resolveModel", () => { expect(result.model?.reasoning).toBe(true); }); + it("propagates compat from matching configured fallback model", () => { + const cfg = { + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + ...makeModel("Qwen/Qwen3-8B"), + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model?.compat).toEqual( + expect.objectContaining({ thinkingFormat: "qwen-chat-template" }), + ); + expect(result.model?.reasoning).toBe(false); + }); + + it("lets configured vLLM Qwen compat override stale discovered reasoning", () => { + mockDiscoveredModel(discoverModels, { + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + templateModel: { + ...makeModel("Qwen/Qwen3-8B"), + provider: "vllm", + api: "openai-completions", + baseUrl: "http://localhost:9000", + reasoning: false, + compat: { supportsStrictMode: false }, + }, + }); + const cfg = { + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model?.reasoning).toBe(true); + expect(result.model?.compat).toEqual( + expect.objectContaining({ + supportsStrictMode: false, + thinkingFormat: "qwen-chat-template", + }), + ); + }); + + it("infers reasoning for matching vLLM Qwen compat fallback models", () => { + const cfg = { + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000", + api: "openai-completions", + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg); + + expect(result.error).toBeUndefined(); + expect(result.model?.reasoning).toBe(true); + }); + it("propagates image input capability from matching configured fallback model", () => { const cfg = { models: { diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 9c6b2305ac3..fee53cc5242 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -5,7 +5,7 @@ import { type AuthStorage, type ModelRegistry, } from "@earendil-works/pi-coding-agent"; -import type { ModelMediaInputConfig } from "../../config/types.models.js"; +import type { ModelCompatConfig, ModelMediaInputConfig } from "../../config/types.models.js"; import type { OpenClawConfig } from "../../config/types.openclaw.js"; import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js"; import { @@ -688,6 +688,14 @@ function applyConfiguredProviderOverrides(params: { metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow; const resolvedMaxTokens = metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens; + const resolvedCompat = mergeModelCompat(discoveredModel.compat, metadataOverrideModel?.compat); + const resolvedReasoning = resolveMergedConfiguredModelReasoning({ + provider: params.provider, + configuredCompat: metadataOverrideModel?.compat, + resolvedCompat, + configuredReasoning: metadataOverrideModel?.reasoning, + discoveredReasoning: discoveredModel.reasoning, + }); const requestConfig = resolveProviderRequestConfig({ provider: params.provider, api: @@ -710,7 +718,7 @@ function applyConfiguredProviderOverrides(params: { ...discoveredModel, api: requestConfig.api ?? "openai-responses", baseUrl: requestConfig.baseUrl ?? discoveredModel.baseUrl, - reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning, + reasoning: resolvedReasoning, input: normalizedInput, cost: metadataOverrideModel?.cost ?? discoveredModel.cost, contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow, @@ -725,7 +733,7 @@ function applyConfiguredProviderOverrides(params: { ...(resolvedParams ? { params: resolvedParams } : {}), ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: requestConfig.headers, - compat: metadataOverrideModel?.compat ?? discoveredModel.compat, + compat: resolvedCompat, mediaInput: mergeModelMediaInput( discoveredModel.mediaInput, metadataOverrideModel?.mediaInput, @@ -778,6 +786,11 @@ function resolveExplicitModelWithRegistry(params: { workspaceDir, model: { ...inlineMatch, + reasoning: resolveConfiguredModelReasoning({ + provider, + compat: inlineMatch.compat, + reasoning: inlineMatch.reasoning, + }), ...(resolvedParams ? { params: resolvedParams } : {}), ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), } as Model, @@ -842,6 +855,11 @@ function resolveExplicitModelWithRegistry(params: { workspaceDir, model: { ...fallbackInlineMatch, + reasoning: resolveConfiguredModelReasoning({ + provider, + compat: fallbackInlineMatch.compat, + reasoning: fallbackInlineMatch.reasoning, + }), ...(resolvedParams ? { params: resolvedParams } : {}), ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), } as Model, @@ -961,6 +979,11 @@ function resolveConfiguredFallbackModel(params: { capability: "llm", transport: "stream", }); + const fallbackReasoning = resolveConfiguredFallbackReasoning({ + provider, + compat: configuredModel?.compat, + reasoning: configuredModel?.reasoning, + }); return normalizeResolvedModel({ provider, cfg, @@ -974,7 +997,7 @@ function resolveConfiguredFallbackModel(params: { api: requestConfig.api ?? "openai-responses", provider, baseUrl: requestConfig.baseUrl, - reasoning: configuredModel?.reasoning ?? false, + reasoning: fallbackReasoning, input: resolveProviderModelInput({ provider, modelId, @@ -999,6 +1022,7 @@ function resolveConfiguredFallbackModel(params: { ...(resolvedParams ? { params: resolvedParams } : {}), ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}), headers: requestConfig.headers, + compat: configuredModel?.compat, mediaInput: configuredModel?.mediaInput, } as Model, providerRequest, @@ -1033,6 +1057,71 @@ function shouldCompareProviderRuntimeResolvedModel(params: { ); } +function resolveConfiguredFallbackReasoning(params: { + provider: string; + compat?: { thinkingFormat?: string } | null; + reasoning?: boolean; +}): boolean { + return resolveConfiguredModelReasoning(params) ?? false; +} + +function resolveConfiguredModelReasoning(params: { + provider: string; + compat?: { thinkingFormat?: string } | null; + reasoning?: boolean; +}): boolean | undefined { + if (params.reasoning !== undefined) { + return params.reasoning; + } + return isVllmQwenThinkingCompat(params) ? true : undefined; +} + +function resolveMergedConfiguredModelReasoning(params: { + provider: string; + configuredCompat?: { thinkingFormat?: string } | null; + resolvedCompat?: { thinkingFormat?: string } | null; + configuredReasoning?: boolean; + discoveredReasoning?: boolean; +}): boolean { + if (params.configuredReasoning !== undefined) { + return params.configuredReasoning; + } + if (isVllmQwenThinkingCompat({ provider: params.provider, compat: params.configuredCompat })) { + return true; + } + return ( + resolveConfiguredModelReasoning({ + provider: params.provider, + compat: params.resolvedCompat, + reasoning: params.discoveredReasoning, + }) ?? false + ); +} + +function isVllmQwenThinkingCompat(params: { + provider: string; + compat?: { thinkingFormat?: string } | null; +}): boolean { + const thinkingFormat = params.compat?.thinkingFormat; + return ( + normalizeProviderId(params.provider) === "vllm" && + (thinkingFormat === "qwen" || thinkingFormat === "qwen-chat-template") + ); +} + +function mergeModelCompat( + base: ModelCompatConfig | undefined, + override: ModelCompatConfig | undefined, +): ModelCompatConfig | undefined { + if (!base) { + return override; + } + if (!override) { + return base; + } + return { ...base, ...override }; +} + function preferProviderRuntimeResolvedModel(params: { explicitModel: Model; runtimeResolvedModel?: Model; diff --git a/src/auto-reply/reply/model-selection.test.ts b/src/auto-reply/reply/model-selection.test.ts index e98eb44b4fc..f0d3b2c0659 100644 --- a/src/auto-reply/reply/model-selection.test.ts +++ b/src/auto-reply/reply/model-selection.test.ts @@ -63,7 +63,7 @@ const makeConfiguredModel = (overrides: Record = {}) => ({ id: "gpt-5.4", name: "GPT-5.4", reasoning: true, - input: ["text"], + input: ["text"] as Array<"text">, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 128_000, maxTokens: 16_384, @@ -215,6 +215,115 @@ describe("createModelSelectionState catalog loading", () => { expect(loadModelCatalog).not.toHaveBeenCalled(); }); + it("keeps configured compat when manifest thinking metadata is used", async () => { + vi.mocked(loadModelCatalog).mockClear(); + vi.mocked(loadManifestModelCatalog).mockReturnValueOnce([ + { provider: "vllm", id: "Qwen/Qwen3-8B", name: "Qwen3", reasoning: true }, + ]); + const cfg = { + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": {}, + }, + }, + }, + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000/v1", + models: [ + makeConfiguredModel({ + id: "Qwen/Qwen3-8B", + name: "Qwen3", + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ], + }, + }, + }, + } as OpenClawConfig; + + const state = await createModelSelectionState({ + cfg, + agentCfg: cfg.agents?.defaults, + defaultProvider: "vllm", + defaultModel: "Qwen/Qwen3-8B", + provider: "vllm", + model: "Qwen/Qwen3-8B", + hasModelDirective: false, + }); + + await expect(state.resolveThinkingCatalog()).resolves.toEqual([ + expect.objectContaining({ + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ]); + expect(loadModelCatalog).not.toHaveBeenCalled(); + }); + + it("keeps configured compat when runtime thinking catalog is already loaded", async () => { + vi.mocked(loadModelCatalog).mockClear(); + vi.mocked(loadModelCatalog).mockResolvedValueOnce([ + { + provider: "vllm", + id: "Qwen/Qwen3-8B", + name: "Qwen3", + reasoning: true, + compat: { supportedReasoningEfforts: ["xhigh"] }, + }, + ]); + const cfg = { + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": {}, + }, + }, + }, + models: { + providers: { + vllm: { + baseUrl: "http://localhost:9000/v1", + models: [ + makeConfiguredModel({ + id: "Qwen/Qwen3-8B", + name: "Qwen3", + compat: { thinkingFormat: "qwen-chat-template" }, + }), + ], + }, + }, + }, + } as OpenClawConfig; + + const state = await createModelSelectionState({ + cfg, + agentCfg: cfg.agents?.defaults, + defaultProvider: "vllm", + defaultModel: "Qwen/Qwen3-8B", + provider: "vllm", + model: "Qwen/Qwen3-8B", + hasModelDirective: true, + }); + + await expect(state.resolveThinkingCatalog()).resolves.toEqual([ + expect.objectContaining({ + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + compat: { + supportedReasoningEfforts: ["xhigh"], + thinkingFormat: "qwen-chat-template", + }, + }), + ]); + expect(loadModelCatalog).toHaveBeenCalledOnce(); + }); + it("prefers per-agent thinkingDefault over model and global defaults", async () => { vi.mocked(loadModelCatalog).mockClear(); const cfg = { diff --git a/src/auto-reply/reply/model-selection.ts b/src/auto-reply/reply/model-selection.ts index c4ab4106508..eb6bd1f8dc8 100644 --- a/src/auto-reply/reply/model-selection.ts +++ b/src/auto-reply/reply/model-selection.ts @@ -97,10 +97,8 @@ function findSelectedCatalogEntry(params: { model: string; }): ModelCatalogEntry | undefined { const normalizedProvider = normalizeProviderId(params.provider); - return params.catalog?.find( - (entry) => - normalizeProviderId(entry.provider) === normalizedProvider && entry.id === params.model, - ); + const selectedKey = modelKey(normalizedProvider, params.model); + return params.catalog?.find((entry) => modelKey(entry.provider, entry.id) === selectedKey); } export async function createModelSelectionState(params: { @@ -360,6 +358,15 @@ export async function createModelSelectionState(params: { let thinkingCatalog: ModelCatalog | undefined; let manifestModelCatalog: ModelCatalog | null = null; + const buildThinkingCatalog = (catalog: ModelCatalog): ModelCatalog => + createModelVisibilityPolicy({ + cfg, + catalog, + defaultProvider, + defaultModel, + agentId: params.agentId, + ...RUNTIME_MODEL_VISIBILITY_NORMALIZATION, + }).allowedCatalog; const loadManifestCatalogForThinking = async () => { if (manifestModelCatalog) { return manifestModelCatalog; @@ -377,7 +384,11 @@ export async function createModelSelectionState(params: { return thinkingCatalog; } let catalogForThinking = - modelCatalog && modelCatalog.length > 0 ? modelCatalog : allowedModelCatalog; + allowedModelCatalog.length > 0 + ? allowedModelCatalog + : modelCatalog && modelCatalog.length > 0 + ? buildThinkingCatalog(modelCatalog) + : []; let selectedCatalogEntry = findSelectedCatalogEntry({ catalog: catalogForThinking, provider, @@ -387,7 +398,7 @@ export async function createModelSelectionState(params: { // allowlist rows know only provider/id; manifest rows can prove reasoning // support without opening the Pi auth-backed model registry. if (!modelCatalog && selectedCatalogEntry?.reasoning === undefined) { - const manifestCatalog = await loadManifestCatalogForThinking(); + const manifestCatalog = buildThinkingCatalog(await loadManifestCatalogForThinking()); const manifestSelectedEntry = findSelectedCatalogEntry({ catalog: manifestCatalog, provider, @@ -403,13 +414,16 @@ export async function createModelSelectionState(params: { if (shouldHydrateRuntimeCatalog) { modelCatalog = await (await loadModelCatalogRuntime()).loadModelCatalog({ config: cfg }); logStage("catalog-loaded-for-thinking", `entries=${modelCatalog.length}`); - const runtimeSelectedEntry = modelCatalog.find( - (entry) => entry.provider === provider && entry.id === model, - ); + const runtimeCatalog = buildThinkingCatalog(modelCatalog); + const runtimeSelectedEntry = findSelectedCatalogEntry({ + catalog: runtimeCatalog, + provider, + model, + }); catalogForThinking = runtimeSelectedEntry || !catalogForThinking || catalogForThinking.length === 0 - ? modelCatalog.length > 0 - ? modelCatalog + ? runtimeCatalog.length > 0 + ? runtimeCatalog : allowedModelCatalog : allowedModelCatalog; } diff --git a/src/auto-reply/thinking.shared.ts b/src/auto-reply/thinking.shared.ts index 0d1a8b4e707..5629d7aa526 100644 --- a/src/auto-reply/thinking.shared.ts +++ b/src/auto-reply/thinking.shared.ts @@ -27,6 +27,7 @@ export type ThinkingCatalogEntry = { id: string; reasoning?: boolean; compat?: { + thinkingFormat?: string; supportedReasoningEfforts?: readonly string[] | null; } | null; }; diff --git a/src/auto-reply/thinking.test.ts b/src/auto-reply/thinking.test.ts index 680fd9f3e64..cf70b5f744f 100644 --- a/src/auto-reply/thinking.test.ts +++ b/src/auto-reply/thinking.test.ts @@ -255,6 +255,64 @@ describe("listThinkingLevels", () => { ).toBe("max"); }); + it("passes catalog compat into provider thinking profiles", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) => + context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template" + ? { + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + } + : undefined, + ); + const catalog = [ + { + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]; + + expect(listThinkingLevelLabels("vllm", "Qwen/Qwen3-8B", catalog)).toEqual(["off", "on"]); + expect( + resolveSupportedThinkingLevel({ + provider: "vllm", + model: "Qwen/Qwen3-8B", + level: "high", + catalog, + }), + ).toBe("low"); + }); + + it("matches provider-qualified catalog ids for provider thinking profiles", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) => + context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template" + ? { + levels: [{ id: "off" }, { id: "low", label: "on" }], + defaultLevel: "off", + } + : undefined, + ); + const catalog = [ + { + provider: "vllm", + id: "vllm/Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]; + + expect(listThinkingLevelLabels("vllm", "Qwen/Qwen3-8B", catalog)).toEqual(["off", "on"]); + expect( + resolveSupportedThinkingLevel({ + provider: "vllm", + model: "Qwen/Qwen3-8B", + level: "high", + catalog, + }), + ).toBe("low"); + }); + it("uses catalog compat reasoning efforts to expose xhigh for configured custom models", () => { const catalog = [ { diff --git a/src/auto-reply/thinking.ts b/src/auto-reply/thinking.ts index fa3c00e1b59..8589c6523b2 100644 --- a/src/auto-reply/thinking.ts +++ b/src/auto-reply/thinking.ts @@ -57,6 +57,22 @@ type ResolvedThinkingProfile = { defaultLevel?: ThinkLevel | null; }; +function buildCatalogModelKey(provider: string, model: string): string { + const providerId = provider.trim(); + const modelId = model.trim(); + if (!providerId) { + return modelId; + } + if (!modelId) { + return providerId; + } + return normalizeOptionalLowercaseString(modelId)?.startsWith( + `${normalizeOptionalLowercaseString(providerId)}/`, + ) + ? modelId + : `${providerId}/${modelId}`; +} + function resolveThinkingPolicyContext(params: { provider?: string | null; model?: string | null; @@ -66,8 +82,12 @@ function resolveThinkingPolicyContext(params: { const normalizedProvider = providerRaw ? normalizeProviderId(providerRaw) : ""; const modelId = normalizeOptionalString(params.model) ?? ""; const modelKey = normalizeOptionalLowercaseString(params.model) ?? ""; + const selectedCatalogKey = + normalizedProvider && modelId ? buildCatalogModelKey(normalizedProvider, modelId) : undefined; const candidate = params.catalog?.find( - (entry) => normalizeProviderId(entry.provider) === normalizedProvider && entry.id === modelId, + (entry) => + selectedCatalogKey !== undefined && + buildCatalogModelKey(normalizeProviderId(entry.provider), entry.id) === selectedCatalogKey, ); return { normalizedProvider, @@ -165,6 +185,7 @@ export function resolveThinkingProfile(params: { provider: context.normalizedProvider, modelId: context.modelId, reasoning: context.reasoning, + compat: context.compat, }; const pluginProfile = resolveProviderThinkingProfile({ provider: context.normalizedProvider, diff --git a/src/commands/doctor/shared/legacy-config-migrate.test.ts b/src/commands/doctor/shared/legacy-config-migrate.test.ts index 2f74cb70bd0..2e2339f8ff2 100644 --- a/src/commands/doctor/shared/legacy-config-migrate.test.ts +++ b/src/commands/doctor/shared/legacy-config-migrate.test.ts @@ -1700,6 +1700,647 @@ describe("legacy model compat migrate", () => { ]); }); + it("moves legacy vLLM Qwen thinking params to model compat", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }, + }, + models: { + providers: { + vllm: { + models: [{ id: "Qwen/Qwen3-8B", name: "Qwen3 8B" }], + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]?.params).toEqual({ + temperature: 0.2, + }); + expect(res.config?.models?.providers?.vllm?.models?.[0]?.compat).toEqual({ + thinkingFormat: "qwen-chat-template", + }); + expect(res.config?.models?.providers?.vllm?.models?.[0]?.reasoning).toBe(true); + expect(res.changes).toStrictEqual([ + 'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("moves legacy vLLM Qwen thinking params from normalized agent model refs", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "VLLM/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.models?.["VLLM/Qwen/Qwen3-8B"]).not.toHaveProperty( + "params", + ); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.defaults.models."VLLM/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("creates a vLLM model row for legacy Qwen top-level thinking params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwen_thinking_format: "enable_thinking", + }, + }, + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]).not.toHaveProperty( + "params", + ); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").', + ]); + }); + + it("preserves existing vLLM model compat when removing legacy Qwen thinking params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "top-level", + }, + }, + }, + }, + }, + models: { + providers: { + vllm: { + models: [ + { + id: "Qwen/Qwen3-8B", + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]).not.toHaveProperty( + "params", + ); + expect(res.config?.models?.providers?.vllm?.models?.[0]?.compat).toEqual({ + thinkingFormat: "qwen-chat-template", + }); + expect(res.config?.models?.providers?.vllm?.models?.[0]?.reasoning).toBe(true); + expect(res.changes).toStrictEqual([ + 'Removed agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat; models.providers.vllm.models[0].compat.thinkingFormat is already "qwen-chat-template".', + ]); + }); + + it("moves legacy vLLM Qwen thinking params onto provider-qualified model rows", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }, + models: { + providers: { + vllm: { + models: [{ id: "vllm/Qwen/Qwen3-8B", name: "Qwen3 8B" }], + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "vllm/Qwen/Qwen3-8B", + name: "Qwen3 8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("moves legacy vLLM Qwen model-row params to model compat", () => { + const res = migrateLegacyConfigForTest({ + models: { + providers: { + vllm: { + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + ], + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.models?.[0]).toEqual({ + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + reasoning: true, + params: { temperature: 0.2 }, + compat: { thinkingFormat: "qwen-chat-template" }, + }); + expect(res.changes).toStrictEqual([ + 'Moved models.providers.vllm.models[0].params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("moves legacy vLLM Qwen provider params to model compat rows", () => { + const res = migrateLegacyConfigForTest({ + models: { + providers: { + vllm: { + params: { + qwen_thinking_format: "enable_thinking", + temperature: 0.2, + }, + models: [ + { id: "Qwen/Qwen3-8B", name: "Qwen3 8B" }, + { id: "Qwen/Qwen3-14B", name: "Qwen3 14B" }, + ], + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.params).toEqual({ temperature: 0.2 }); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen3 8B", + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + { + id: "Qwen/Qwen3-14B", + name: "Qwen3 14B", + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved models.providers.vllm.params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").', + 'Moved models.providers.vllm.params.qwen_thinking_format to models.providers.vllm.models[1].compat.thinkingFormat ("qwen").', + ]); + }); + + it("moves legacy vLLM Qwen provider params to existing and selected model rows", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: { primary: "vllm/Qwen/Qwen3-8B" }, + }, + }, + models: { + providers: { + vllm: { + params: { + qwenThinkingFormat: "chat-template", + }, + models: [{ id: "Qwen/Qwen3-14B", name: "Qwen3 14B" }], + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-14B", + name: "Qwen3 14B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + 'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[1].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("removes untargeted legacy vLLM Qwen provider params", () => { + const res = migrateLegacyConfigForTest({ + models: { + providers: { + vllm: { + baseUrl: "http://localhost:8000/v1", + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm).toEqual({ + baseUrl: "http://localhost:8000/v1", + params: { temperature: 0.2 }, + }); + expect(res.changes).toStrictEqual([ + "Removed models.providers.vllm.params.qwenThinkingFormat; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.", + ]); + }); + + it("moves legacy vLLM Qwen provider params using the default selected model", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: { primary: "vllm/Qwen/Qwen3-8B" }, + }, + }, + models: { + providers: { + vllm: { + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.params).toEqual({ temperature: 0.2 }); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("preserves normalized vLLM provider keys when moving provider params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: { primary: "vllm/Qwen/Qwen3-8B" }, + }, + }, + models: { + providers: { + VLLM: { + baseUrl: "http://localhost:8000/v1", + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm).toBeUndefined(); + expect(res.config?.models?.providers?.VLLM).toEqual({ + baseUrl: "http://localhost:8000/v1", + params: { temperature: 0.2 }, + models: [ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ], + }); + expect(res.changes).toStrictEqual([ + 'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("strips auth profile suffixes when moving legacy vLLM Qwen params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: { primary: "vllm/Qwen/Qwen3-8B@local" }, + }, + }, + models: { + providers: { + vllm: { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }); + + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + }); + + it("moves legacy vLLM Qwen default agent params to the selected model compat row", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: { primary: "vllm/Qwen/Qwen3-8B" }, + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.params).toEqual({ temperature: 0.2 }); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.defaults.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("removes untargeted legacy vLLM Qwen default agent params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + params: { + qwenThinkingFormat: "chat-template", + temperature: 0.2, + }, + }, + }, + }); + + expect(res.config?.agents?.defaults?.params).toEqual({ temperature: 0.2 }); + expect(res.changes).toStrictEqual([ + "Removed agents.defaults.params.qwenThinkingFormat; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.", + ]); + }); + + it("moves legacy vLLM Qwen per-agent params to the agent model compat row", () => { + const res = migrateLegacyConfigForTest({ + agents: { + list: [ + { + id: "local", + model: "vllm/Qwen/Qwen3-8B", + params: { + qwen_thinking_format: "enable_thinking", + temperature: 0.2, + }, + }, + ], + }, + }); + + expect(res.config?.agents?.list?.[0]?.params).toEqual({ temperature: 0.2 }); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.list[0].params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").', + ]); + }); + + it("removes untargeted legacy vLLM Qwen per-agent params", () => { + const res = migrateLegacyConfigForTest({ + agents: { + list: [ + { + id: "local", + params: { + qwen_thinking_format: "enable_thinking", + temperature: 0.2, + }, + }, + ], + }, + }); + + expect(res.config?.agents?.list?.[0]?.params).toEqual({ temperature: 0.2 }); + expect(res.changes).toStrictEqual([ + "Removed agents.list[0].params.qwen_thinking_format; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.", + ]); + }); + + it("moves legacy vLLM Qwen per-agent params using the inherited default model", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + model: "vllm/Qwen/Qwen3-8B", + }, + list: [ + { + id: "local", + params: { + qwenThinkingFormat: "chat-template", + }, + }, + ], + }, + }); + + expect(res.config?.agents?.list?.[0]).not.toHaveProperty("params"); + expect(res.config?.models?.providers?.vllm?.models).toEqual([ + { + id: "Qwen/Qwen3-8B", + name: "Qwen/Qwen3-8B", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + }, + ]); + expect(res.changes).toStrictEqual([ + 'Moved agents.list[0].params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").', + ]); + }); + + it("leaves legacy vLLM Qwen thinking params when the model compat row cannot be written", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }, + models: { + providers: { + vllm: { + models: "malformed", + }, + }, + }, + }); + + expect(res.config).toBeNull(); + expect(res.changes).toStrictEqual([]); + }); + + it("leaves malformed vLLM provider ancestors untouched during legacy Qwen migration", () => { + const res = migrateLegacyConfigForTest({ + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }, + models: { + providers: { + vllm: "malformed", + }, + }, + }); + + expect(res.config).toBeNull(); + expect(res.changes).toStrictEqual([]); + }); + + it("reports legacy vLLM Qwen thinking params before doctor fix", () => { + const raw = { + agents: { + defaults: { + models: { + "vllm/Qwen/Qwen3-8B": { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }, + }; + + expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toContain( + "agents.defaults.models", + ); + }); + + it("reports legacy vLLM Qwen thinking params from merged extra-param sources", () => { + const raw = { + agents: { + defaults: { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + list: [ + { + id: "local", + params: { + qwen_thinking_format: "enable_thinking", + }, + }, + ], + }, + }; + + expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toEqual( + expect.arrayContaining(["agents.defaults.params", "agents"]), + ); + }); + + it("reports legacy vLLM Qwen params from normalized provider keys", () => { + const raw = { + models: { + providers: { + VLLM: { + params: { + qwenThinkingFormat: "chat-template", + }, + }, + }, + }, + }; + + expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toContain("models.providers"); + }); + it("preserves recognized model compat thinkingFormat values", () => { const res = migrateLegacyConfigForTest({ models: { diff --git a/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts b/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts index bb360da569d..91a55962f4a 100644 --- a/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts +++ b/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts @@ -1,6 +1,8 @@ import { splitTrailingAuthProfile } from "../../../agents/model-ref-profile.js"; +import { normalizeProviderId } from "../../../agents/provider-id.js"; import { defineLegacyConfigMigration, + ensureRecord, getRecord, type LegacyConfigMigrationSpec, type LegacyConfigRule, @@ -78,6 +80,394 @@ function hasInvalidThinkingFormat(providers: unknown): boolean { return false; } +const LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS = [ + "qwenThinkingFormat", + "qwen_thinking_format", +] as const; + +function normalizeLegacyVllmQwenThinkingFormat( + value: unknown, +): "qwen" | "qwen-chat-template" | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value + .trim() + .toLowerCase() + .replace(/[_\s]+/g, "-"); + switch (normalized) { + case "chat-template": + case "chat-template-argument": + case "chat-template-arguments": + case "chat-template-kwarg": + case "chat-template-kwargs": + case "qwen-chat-template": + return "qwen-chat-template"; + case "enable-thinking": + case "qwen": + case "request-body": + case "top-level": + return "qwen"; + default: + return undefined; + } +} + +function getLegacyVllmQwenThinkingFormat(params: Record): + | { + key: (typeof LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS)[number]; + value: unknown; + compat: "qwen" | "qwen-chat-template" | undefined; + } + | undefined { + for (const key of LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS) { + if (Object.prototype.hasOwnProperty.call(params, key)) { + return { + key, + value: params[key], + compat: normalizeLegacyVllmQwenThinkingFormat(params[key]), + }; + } + } + return undefined; +} + +function parseVllmAgentModelKey(key: string): string | undefined { + const trimmed = splitTrailingAuthProfile(key).model.trim(); + const slashIndex = trimmed.indexOf("/"); + if (slashIndex <= 0) { + return undefined; + } + const providerId = trimmed.slice(0, slashIndex); + if (normalizeProviderId(providerId) !== "vllm") { + return undefined; + } + const modelId = trimmed.slice(slashIndex + 1).trim(); + return modelId && modelId !== "*" ? modelId : undefined; +} + +function hasLegacyVllmQwenThinkingFormat(defaultModels: unknown): boolean { + const models = getRecord(defaultModels); + if (!models) { + return false; + } + for (const [key, entry] of Object.entries(models)) { + if (!parseVllmAgentModelKey(key)) { + continue; + } + const params = getRecord(getRecord(entry)?.params); + if (params && getLegacyVllmQwenThinkingFormat(params)) { + return true; + } + } + return false; +} + +function hasLegacyVllmQwenThinkingProviderParams(provider: unknown): boolean { + const params = getRecord(getRecord(provider)?.params); + return Boolean(params && getLegacyVllmQwenThinkingFormat(params)); +} + +function hasLegacyVllmQwenThinkingModelParams(provider: unknown): boolean { + const models = getRecord(provider)?.models; + if (!Array.isArray(models)) { + return false; + } + return models.some((model) => { + const params = getRecord(getRecord(model)?.params); + return Boolean(params && getLegacyVllmQwenThinkingFormat(params)); + }); +} + +function hasLegacyVllmQwenThinkingParams(params: unknown): boolean { + const record = getRecord(params); + return Boolean(record && getLegacyVllmQwenThinkingFormat(record)); +} + +function hasLegacyVllmQwenThinkingAgentParams(agents: unknown): boolean { + const list = getRecord(agents)?.list; + if (!Array.isArray(list)) { + return false; + } + return list.some((agent) => hasLegacyVllmQwenThinkingParams(getRecord(agent)?.params)); +} + +function findOrCreateVllmModelEntry( + raw: Record, + modelId: string, +): { model: Record; index: number } | undefined { + const modelsRoot = getOrCreateRecord(raw, "models"); + const providers = modelsRoot ? getOrCreateRecord(modelsRoot, "providers") : undefined; + const vllm = providers ? getOrCreateVllmProvider(providers) : undefined; + if (!vllm) { + return undefined; + } + if (vllm.models !== undefined && !Array.isArray(vllm.models)) { + return undefined; + } + + const models = Array.isArray(vllm.models) ? vllm.models : []; + vllm.models = models; + const providerModelId = `vllm/${modelId}`; + for (const [index, model] of models.entries()) { + const record = getRecord(model); + if (record?.id === modelId || record?.id === providerModelId) { + return { model: record, index }; + } + } + + const model = { id: modelId, name: modelId }; + models.push(model); + return { model, index: models.length - 1 }; +} + +function listExistingVllmModelTargets( + raw: Record, +): Array<{ model: Record; index: number }> { + const models = findVllmProvider(getRecord(getRecord(raw.models)?.providers))?.models; + if (!Array.isArray(models)) { + return []; + } + return models.flatMap((model, index) => { + const record = getRecord(model); + return record ? [{ model: record, index }] : []; + }); +} + +function collectVllmModelIdsFromSelection(value: unknown): string[] { + if (typeof value === "string") { + const modelId = parseVllmAgentModelKey(value); + return modelId ? [modelId] : []; + } + const record = getRecord(value); + if (!record) { + return []; + } + const ids: string[] = []; + if (typeof record.primary === "string") { + const primary = parseVllmAgentModelKey(record.primary); + if (primary) { + ids.push(primary); + } + } + if (Array.isArray(record.fallbacks)) { + for (const fallback of record.fallbacks) { + if (typeof fallback !== "string") { + continue; + } + const modelId = parseVllmAgentModelKey(fallback); + if (modelId) { + ids.push(modelId); + } + } + } + return ids; +} + +function collectVllmModelIdsFromAgentModelMap(value: unknown): string[] { + const models = getRecord(value); + if (!models) { + return []; + } + return Object.keys(models).flatMap((key) => { + const modelId = parseVllmAgentModelKey(key); + return modelId ? [modelId] : []; + }); +} + +function createVllmModelTargets( + raw: Record, + modelIds: string[], +): Array<{ model: Record; index: number }> { + const targets: Array<{ model: Record; index: number }> = []; + const seen = new Set>(); + for (const modelId of modelIds) { + const target = findOrCreateVllmModelEntry(raw, modelId); + if (!target || seen.has(target.model)) { + continue; + } + seen.add(target.model); + targets.push(target); + } + return targets; +} + +function combineVllmModelTargets( + ...groups: Array; index: number }>> +): Array<{ model: Record; index: number }> { + const targets: Array<{ model: Record; index: number }> = []; + const seen = new Set>(); + for (const group of groups) { + for (const target of group) { + if (seen.has(target.model)) { + continue; + } + seen.add(target.model); + targets.push(target); + } + } + return targets; +} + +function collectVllmModelIdsFromAgentList(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.flatMap((agent) => { + const record = getRecord(agent); + return record + ? [ + ...collectVllmModelIdsFromSelection(record.model), + ...collectVllmModelIdsFromAgentModelMap(record.models), + ] + : []; + }); +} + +function getOrCreateRecord( + root: Record, + key: string, +): Record | undefined { + if (root[key] === undefined) { + const next: Record = {}; + root[key] = next; + return next; + } + return getRecord(root[key]) ?? undefined; +} + +function findVllmProvider( + providers: Record | null | undefined, +): Record | undefined { + if (!providers) { + return undefined; + } + const key = Object.keys(providers).find((entry) => normalizeProviderId(entry) === "vllm"); + return key ? (getRecord(providers[key]) ?? undefined) : undefined; +} + +function getOrCreateVllmProvider( + providers: Record, +): Record | undefined { + const key = Object.keys(providers).find((entry) => normalizeProviderId(entry) === "vllm"); + if (key) { + return getRecord(providers[key]) ?? undefined; + } + return getOrCreateRecord(providers, "vllm"); +} + +function hasLegacyVllmQwenThinkingNormalizedProvider(providers: unknown): boolean { + const providersRecord = getRecord(providers); + if (!providersRecord || getRecord(providersRecord.vllm)) { + return false; + } + const vllmProvider = findVllmProvider(providersRecord); + return ( + hasLegacyVllmQwenThinkingProviderParams(vllmProvider) || + hasLegacyVllmQwenThinkingModelParams(vllmProvider) + ); +} + +function preserveMigratedVllmQwenReasoning(model: Record): void { + if (model.reasoning === undefined) { + model.reasoning = true; + } +} + +function removeLegacyVllmQwenThinkingParams(params: Record): void { + for (const key of LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS) { + delete params[key]; + } +} + +function applyLegacyVllmQwenThinkingFormat(params: { + sourcePath: string; + legacyParams: Record; + target: { model: Record; index: number }; + legacyFormat: NonNullable>; + changes: string[]; +}): boolean { + if (!params.legacyFormat.compat) { + removeLegacyVllmQwenThinkingParams(params.legacyParams); + params.changes.push( + `Removed ${params.sourcePath}.${params.legacyFormat.key} (unrecognized value ${JSON.stringify(params.legacyFormat.value)}; configure models.providers.vllm.models[].compat.thinkingFormat if needed).`, + ); + return true; + } + + preserveMigratedVllmQwenReasoning(params.target.model); + const compat = ensureRecord(params.target.model, "compat"); + const currentThinkingFormat = compat.thinkingFormat; + if (typeof currentThinkingFormat === "string" && isModelThinkingFormat(currentThinkingFormat)) { + removeLegacyVllmQwenThinkingParams(params.legacyParams); + params.changes.push( + `Removed ${params.sourcePath}.${params.legacyFormat.key}; models.providers.vllm.models[${params.target.index}].compat.thinkingFormat is already ${JSON.stringify(currentThinkingFormat)}.`, + ); + return true; + } + + compat.thinkingFormat = params.legacyFormat.compat; + removeLegacyVllmQwenThinkingParams(params.legacyParams); + params.changes.push( + `Moved ${params.sourcePath}.${params.legacyFormat.key} to models.providers.vllm.models[${params.target.index}].compat.thinkingFormat (${JSON.stringify(params.legacyFormat.compat)}).`, + ); + return true; +} + +function removeUntargetedLegacyVllmQwenThinkingFormat(params: { + sourcePath: string; + legacyParams: Record; + legacyFormat: NonNullable>; + changes: string[]; +}): void { + removeLegacyVllmQwenThinkingParams(params.legacyParams); + params.changes.push( + `Removed ${params.sourcePath}.${params.legacyFormat.key}; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.`, + ); +} + +const LEGACY_VLLM_QWEN_AGENT_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["agents", "defaults", "models"], + message: + 'agents.defaults.models..params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingFormat(value), +}; + +const LEGACY_VLLM_QWEN_PROVIDER_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["models", "providers", "vllm", "params"], + message: + 'models.providers.vllm.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingProviderParams({ params: value }), +}; + +const LEGACY_VLLM_QWEN_PROVIDER_MODEL_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["models", "providers", "vllm", "models"], + message: + 'models.providers.vllm.models[*].params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingModelParams({ models: value }), +}; + +const LEGACY_VLLM_QWEN_NORMALIZED_PROVIDER_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["models", "providers"], + message: + 'models.providers..params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers..models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingNormalizedProvider(value), +}; + +const LEGACY_VLLM_QWEN_DEFAULT_PARAMS_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["agents", "defaults", "params"], + message: + 'agents.defaults.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingParams(value), +}; + +const LEGACY_VLLM_QWEN_AGENT_PARAMS_THINKING_FORMAT_RULE: LegacyConfigRule = { + path: ["agents"], + message: + 'agents.list[].params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.', + match: (value) => hasLegacyVllmQwenThinkingAgentParams(value), +}; + const INVALID_THINKING_FORMAT_RULE: LegacyConfigRule = { path: ["models", "providers"], message: @@ -559,6 +949,201 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME_MODELS: LegacyConfigMigrationSpec[ Object.assign(raw, rewritten.value); }, }), + defineLegacyConfigMigration({ + id: "agents.defaults.models.vllm.params.qwenThinkingFormat->models.providers.vllm.models.compat.thinkingFormat", + describe: "Move legacy vLLM Qwen thinking params to model compat metadata", + legacyRules: [ + LEGACY_VLLM_QWEN_AGENT_THINKING_FORMAT_RULE, + LEGACY_VLLM_QWEN_PROVIDER_THINKING_FORMAT_RULE, + LEGACY_VLLM_QWEN_PROVIDER_MODEL_THINKING_FORMAT_RULE, + LEGACY_VLLM_QWEN_NORMALIZED_PROVIDER_THINKING_FORMAT_RULE, + LEGACY_VLLM_QWEN_DEFAULT_PARAMS_THINKING_FORMAT_RULE, + LEGACY_VLLM_QWEN_AGENT_PARAMS_THINKING_FORMAT_RULE, + ], + apply: (raw, changes) => { + const agentsDefaults = getRecord(getRecord(raw.agents)?.defaults); + const defaultModels = getRecord(agentsDefaults?.models); + if (defaultModels) { + for (const [key, entry] of Object.entries(defaultModels)) { + const modelId = parseVllmAgentModelKey(key); + const entryRecord = getRecord(entry); + const params = getRecord(entryRecord?.params); + if (!modelId || !entryRecord || !params) { + continue; + } + + const legacyFormat = getLegacyVllmQwenThinkingFormat(params); + if (!legacyFormat) { + continue; + } + + const target = legacyFormat.compat ? findOrCreateVllmModelEntry(raw, modelId) : undefined; + if (legacyFormat.compat && !target) { + continue; + } + applyLegacyVllmQwenThinkingFormat({ + sourcePath: `agents.defaults.models.${JSON.stringify(key)}.params`, + legacyParams: params, + target: target ?? { model: {}, index: -1 }, + legacyFormat, + changes, + }); + if (Object.keys(params).length === 0) { + delete entryRecord.params; + } + } + } + + const vllmProvider = findVllmProvider(getRecord(getRecord(raw.models)?.providers)); + const vllmModels = vllmProvider?.models; + if (Array.isArray(vllmModels)) { + for (const [index, model] of vllmModels.entries()) { + const modelRecord = getRecord(model); + const params = getRecord(modelRecord?.params); + if (!modelRecord || !params) { + continue; + } + const legacyFormat = getLegacyVllmQwenThinkingFormat(params); + if (!legacyFormat) { + continue; + } + applyLegacyVllmQwenThinkingFormat({ + sourcePath: `models.providers.vllm.models[${index}].params`, + legacyParams: params, + target: { model: modelRecord, index }, + legacyFormat, + changes, + }); + if (Object.keys(params).length === 0) { + delete modelRecord.params; + } + } + } + + const providerParams = getRecord(vllmProvider?.params); + if (providerParams) { + const providerLegacyFormat = getLegacyVllmQwenThinkingFormat(providerParams); + if (providerLegacyFormat) { + const providerModelIds = [ + ...collectVllmModelIdsFromSelection(agentsDefaults?.model), + ...collectVllmModelIdsFromAgentModelMap(defaultModels), + ...collectVllmModelIdsFromAgentList(getRecord(raw.agents)?.list), + ]; + const targets = combineVllmModelTargets( + listExistingVllmModelTargets(raw), + createVllmModelTargets(raw, providerModelIds), + ); + if (targets.length === 0) { + removeUntargetedLegacyVllmQwenThinkingFormat({ + sourcePath: "models.providers.vllm.params", + legacyParams: providerParams, + legacyFormat: providerLegacyFormat, + changes, + }); + } else { + for (const target of targets) { + applyLegacyVllmQwenThinkingFormat({ + sourcePath: "models.providers.vllm.params", + legacyParams: providerParams, + target, + legacyFormat: providerLegacyFormat, + changes, + }); + } + } + if (Object.keys(providerParams).length === 0) { + delete vllmProvider?.params; + } + } + } + + const defaultParams = getRecord(agentsDefaults?.params); + if (defaultParams) { + const defaultLegacyFormat = getLegacyVllmQwenThinkingFormat(defaultParams); + if (defaultLegacyFormat) { + const defaultModelIds = [ + ...collectVllmModelIdsFromSelection(agentsDefaults?.model), + ...collectVllmModelIdsFromAgentModelMap(defaultModels), + ]; + const targets = + defaultModelIds.length > 0 + ? createVllmModelTargets(raw, defaultModelIds) + : listExistingVllmModelTargets(raw); + if (targets.length === 0) { + removeUntargetedLegacyVllmQwenThinkingFormat({ + sourcePath: "agents.defaults.params", + legacyParams: defaultParams, + legacyFormat: defaultLegacyFormat, + changes, + }); + } else { + for (const target of targets) { + applyLegacyVllmQwenThinkingFormat({ + sourcePath: "agents.defaults.params", + legacyParams: defaultParams, + target, + legacyFormat: defaultLegacyFormat, + changes, + }); + } + } + if (Object.keys(defaultParams).length === 0) { + delete agentsDefaults?.params; + } + } + } + + const agentList = getRecord(raw.agents)?.list; + if (!Array.isArray(agentList)) { + return; + } + for (const [index, agent] of agentList.entries()) { + const agentRecord = getRecord(agent); + const agentParams = getRecord(agentRecord?.params); + const agentLegacyFormat = agentParams + ? getLegacyVllmQwenThinkingFormat(agentParams) + : undefined; + if (!agentRecord || !agentParams || !agentLegacyFormat) { + continue; + } + const explicitAgentModelIds = [ + ...collectVllmModelIdsFromSelection(agentRecord.model), + ...collectVllmModelIdsFromAgentModelMap(agentRecord.models), + ]; + const inheritedDefaultModelIds = [ + ...collectVllmModelIdsFromSelection(agentsDefaults?.model), + ...collectVllmModelIdsFromAgentModelMap(defaultModels), + ]; + const agentModelIds = + explicitAgentModelIds.length > 0 ? explicitAgentModelIds : inheritedDefaultModelIds; + const targets = + agentModelIds.length > 0 + ? createVllmModelTargets(raw, agentModelIds) + : listExistingVllmModelTargets(raw); + if (targets.length === 0) { + removeUntargetedLegacyVllmQwenThinkingFormat({ + sourcePath: `agents.list[${index}].params`, + legacyParams: agentParams, + legacyFormat: agentLegacyFormat, + changes, + }); + } else { + for (const target of targets) { + applyLegacyVllmQwenThinkingFormat({ + sourcePath: `agents.list[${index}].params`, + legacyParams: agentParams, + target, + legacyFormat: agentLegacyFormat, + changes, + }); + } + } + if (Object.keys(agentParams).length === 0) { + delete agentRecord.params; + } + } + }, + }), defineLegacyConfigMigration({ id: "models.providers.*.models.*.compat.thinkingFormat-invalid", describe: "Remove unrecognized compat.thinkingFormat values from provider model entries", diff --git a/src/gateway/server-methods/models.test.ts b/src/gateway/server-methods/models.test.ts index aa95bffc01d..762520a83dc 100644 --- a/src/gateway/server-methods/models.test.ts +++ b/src/gateway/server-methods/models.test.ts @@ -128,6 +128,44 @@ describe("models.list", () => { } }); + it("does not expose runtime params from catalog rows", async () => { + const respond = vi.fn(); + await modelsHandlers["models.list"]({ + req: { + type: "req", + id: "req-models-list-redact-params", + method: "models.list", + params: { view: "all" }, + }, + params: { view: "all" }, + respond, + client: null, + isWebchatConnect: () => false, + context: { + getRuntimeConfig: () => ({}) as OpenClawConfig, + loadGatewayModelCatalog: vi.fn(() => + Promise.resolve([ + { + id: "qwen-local", + name: "Qwen Local", + provider: "vllm", + params: { qwenThinkingFormat: "chat-template" }, + }, + ]), + ), + logGateway: { + debug: vi.fn(), + }, + } as never, + }); + + expect(respond).toHaveBeenCalledWith( + true, + { models: [{ id: "qwen-local", name: "Qwen Local", provider: "vllm" }] }, + undefined, + ); + }); + it("loads the full catalog for provider-scoped configured view and filters only providers", async () => { const catalog = [ { id: "claude-test", name: "Claude Test", provider: "anthropic" }, diff --git a/src/gateway/server-methods/models.ts b/src/gateway/server-methods/models.ts index 2a6dbdd82e6..2412f272c00 100644 --- a/src/gateway/server-methods/models.ts +++ b/src/gateway/server-methods/models.ts @@ -5,6 +5,7 @@ import { type ModelCatalogBrowseView, } from "../../agents/model-catalog-browse.js"; import { resolveVisibleModelCatalog } from "../../agents/model-catalog-visibility.js"; +import type { ModelCatalogEntry } from "../../agents/model-catalog.types.js"; import { resolveDefaultAgentWorkspaceDir } from "../../agents/workspace.js"; import { ErrorCodes, @@ -22,6 +23,17 @@ function resolveModelsListView(params: Record): ModelsListView return typeof params.view === "string" ? (params.view as ModelsListView) : "default"; } +function omitRuntimeModelParams(entry: ModelCatalogEntry): ModelCatalogEntry { + const { params: _params, ...rest } = entry as ModelCatalogEntry & { + params?: Record; + }; + return rest; +} + +function omitRuntimeModelParamsFromCatalog(catalog: ModelCatalogEntry[]): ModelCatalogEntry[] { + return catalog.map(omitRuntimeModelParams); +} + export const modelsHandlers: GatewayRequestHandlers = { "models.list": async ({ params, respond, context }) => { if (!validateModelsListParams(params)) { @@ -56,7 +68,7 @@ export const modelsHandlers: GatewayRequestHandlers = { }, }); if (view === "all") { - respond(true, { models: catalog }, undefined); + respond(true, { models: omitRuntimeModelParamsFromCatalog(catalog) }, undefined); return; } const models = await resolveVisibleModelCatalog({ @@ -67,7 +79,7 @@ export const modelsHandlers: GatewayRequestHandlers = { view, runtimeAuthDiscovery: false, }); - respond(true, { models }, undefined); + respond(true, { models: omitRuntimeModelParamsFromCatalog(models) }, undefined); } catch (err) { respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err))); } diff --git a/src/plugins/provider-thinking.types.ts b/src/plugins/provider-thinking.types.ts index eab175e4608..f9bc40ee61f 100644 --- a/src/plugins/provider-thinking.types.ts +++ b/src/plugins/provider-thinking.types.ts @@ -10,15 +10,25 @@ export type ProviderThinkingPolicyContext = { modelId: string; }; +export type ProviderThinkingModelCompat = { + thinkingFormat?: string; + supportedReasoningEfforts?: readonly string[] | null; +}; + /** * Provider-owned default thinking policy input. * * `reasoning` is the merged catalog hint for the selected model when one is * available. Providers can use it to keep "reasoning model => low" behavior * without re-reading the catalog themselves. + * + * `compat` carries model-level request contract facts for the selected model + * when available. Providers can use it to expose model-specific thinking + * profiles only when the configured payload style supports them. */ export type ProviderDefaultThinkingPolicyContext = ProviderThinkingPolicyContext & { reasoning?: boolean; + compat?: ProviderThinkingModelCompat | null; }; export type ProviderThinkingLevelId =