From e153eceea5a0b446488694ece41f183a5c21073f Mon Sep 17 00:00:00 2001
From: rendrag-git <pearsonmgardner@gmail.com>
Date: Wed, 27 May 2026 12:32:18 +0000
Subject: [PATCH] fix(vllm): wire configured thinking params

Move vLLM Qwen thinking control onto configured model compat metadata and carry it through catalog/model-selection/runtime thinking contexts.

Also migrate legacy provider/default request params in doctor and keep Pi/runtime model rows buildable with explicit reasoning defaults.

Thanks @rendrag-git.

Co-authored-by: rendrag-git <253747599+rendrag-git@users.noreply.github.com>
---
 docs/gateway/config-agents.md                 |   4 +-
 docs/gateway/config-tools.md                  |   2 +-
 docs/plugins/sdk-migration.md                 |   5 +
 docs/plugins/sdk-provider-plugins.md          |   1 +
 docs/providers/vllm.md                        |  32 +-
 docs/tools/thinking.md                        |   1 +
 extensions/vllm/index.ts                      |   2 +
 .../vllm/provider-discovery.contract.test.ts  |  21 +
 extensions/vllm/provider-policy-api.test.ts   |  62 ++
 extensions/vllm/provider-policy-api.ts        |   1 +
 extensions/vllm/stream.test.ts                |  83 ++-
 extensions/vllm/stream.ts                     |  38 +-
 extensions/vllm/thinking-policy.ts            |  65 ++
 src/agents/model-catalog.test.ts              |  94 +++
 src/agents/model-catalog.ts                   |  56 +-
 src/agents/model-selection-shared.ts          |  31 +-
 src/agents/model-selection.test.ts            |  90 +++
 src/agents/pi-embedded-runner/model.test.ts   |  95 +++
 src/agents/pi-embedded-runner/model.ts        |  97 ++-
 src/auto-reply/reply/model-selection.test.ts  | 111 ++-
 src/auto-reply/reply/model-selection.ts       |  36 +-
 src/auto-reply/thinking.shared.ts             |   1 +
 src/auto-reply/thinking.test.ts               |  58 ++
 src/auto-reply/thinking.ts                    |  23 +-
 .../shared/legacy-config-migrate.test.ts      | 641 ++++++++++++++++++
 ...legacy-config-migrations.runtime.models.ts | 585 ++++++++++++++++
 src/gateway/server-methods/models.test.ts     |  38 ++
 src/gateway/server-methods/models.ts          |  16 +-
 src/plugins/provider-thinking.types.ts        |  10 +
 29 files changed, 2214 insertions(+), 85 deletions(-)
 create mode 100644 extensions/vllm/provider-policy-api.test.ts
 create mode 100644 extensions/vllm/provider-policy-api.ts
 create mode 100644 extensions/vllm/thinking-policy.ts
diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md
index eeefc67b794..8e65e745de6 100644
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -459,8 +459,8 @@ Time format in system prompt. Default: `auto` (OS preference).
 - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
 - `models.providers.openrouter.params.provider`: OpenRouter-wide default provider-routing policy. OpenClaw forwards this to OpenRouter's request `provider` object; per-model `agents.defaults.models["openrouter/<model>"].params.provider` and agent params override by key. See [OpenRouter provider routing](/providers/openrouter#advanced-configuration).
 - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
-- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry.
-- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`.
+- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. Configured vLLM Qwen and Nemotron thinking models expose binary `/think` choices (`off`, `on`) instead of the multi-level effort ladder.
+- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`, and configured vLLM Qwen models expose binary `/think` choices for these formats.
 - `compat.supportedReasoningEfforts`: per-model OpenAI-compatible reasoning effort list. Include `"xhigh"` for custom endpoints that truly accept it; OpenClaw then exposes `/think xhigh` in command menus, Gateway session rows, session patch validation, agent CLI validation, and `llm-task` validation for that configured provider/model. Use `compat.reasoningEffortMap` when the backend wants a provider-specific value for a canonical level.
 - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
 - `localService`: optional provider-level process manager for local/self-hosted model servers. When the selected model belongs to that provider, OpenClaw probes `healthUrl` (or `baseUrl + "/models"`), starts `command` with `args` if the endpoint is down, waits up to `readyTimeoutMs`, then sends the model request. `command` must be an absolute path. `idleStopMs: 0` keeps the process alive until OpenClaw exits; a positive value stops the OpenClaw-spawned process after that many idle milliseconds. See [Local model services](/gateway/local-model-services).
diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md
index 4575867c7e4..fcb9ac58489 100644
--- a/docs/gateway/config-tools.md
+++ b/docs/gateway/config-tools.md
@@ -535,7 +535,7 @@ Configuring a custom/local provider `baseUrl` is also the narrow network trust d
     - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
     - `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
     - `models.providers.*.models.*.compat.strictMessageKeys`: optional compatibility hint for strict OpenAI-compatible chat endpoints. When `true`, OpenClaw strips outgoing Chat Completions message objects to `role` and `content` before sending the request.
-    - `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM.
+    - `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"together"` for Together-style `reasoning.enabled`, `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM. Configured vLLM Qwen models expose binary `/think` choices (`off`, `on`) for these formats.
 
   </Accordion>
   <Accordion title="Amazon Bedrock discovery">
diff --git a/docs/plugins/sdk-migration.md b/docs/plugins/sdk-migration.md
index 733edfb692b..8ed28dbc2aa 100644
--- a/docs/plugins/sdk-migration.md
+++ b/docs/plugins/sdk-migration.md
@@ -818,6 +818,11 @@ canonical replacement.
     ranked level list. OpenClaw downgrades stale stored values by profile
     rank automatically.
 
+    The context includes `provider`, `modelId`, optional merged `reasoning`,
+    and optional merged model `compat` facts. Provider plugins can use those
+    catalog facts to expose a model-specific profile only when the configured
+    request contract supports it.
+
     Implement one hook instead of three. The legacy hooks keep working during
     the deprecation window but are not composed with the profile result.
 
diff --git a/docs/plugins/sdk-provider-plugins.md b/docs/plugins/sdk-provider-plugins.md
index 45337814bd2..355ce68e7ae 100644
--- a/docs/plugins/sdk-provider-plugins.md
+++ b/docs/plugins/sdk-provider-plugins.md
@@ -501,6 +501,7 @@ API key auth, and dynamic model resolution.
 
       - `normalizeConfig` checks the matched provider first, then other hook-capable provider plugins until one actually changes the config. If no provider hook rewrites a supported Google-family config entry, the bundled Google config normalizer still applies.
       - `resolveConfigApiKey` uses the provider hook when exposed. The bundled `amazon-bedrock` path also has a built-in AWS env-marker resolver here, even though Bedrock runtime auth itself still uses the AWS SDK default chain.
+      - `resolveThinkingProfile(ctx)` receives the selected `provider`, `modelId`, optional merged `reasoning` catalog hint, and optional merged model `compat` facts. Use `compat` only to select the provider's thinking UI/profile.
       - `resolveSystemPromptContribution` lets a provider inject cache-aware system-prompt guidance for a model family. Prefer it over `before_prompt_build` when the behavior belongs to one provider/model family and should preserve the stable/dynamic cache split.
 
       For detailed descriptions and real-world examples, see [Internals: Provider Runtime Hooks](/plugins/architecture-internals#provider-runtime-hooks).
diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md
index 2df557de3d1..cbf606a442d 100644
--- a/docs/providers/vllm.md
+++ b/docs/providers/vllm.md
@@ -145,8 +145,32 @@ wildcard to the visible model catalog:
 
   <Accordion title="Qwen thinking controls">
     For Qwen models served through vLLM, set
-    `params.qwenThinkingFormat: "chat-template"` on the model entry when the
-    server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to:
+    `compat.thinkingFormat: "qwen-chat-template"` on the configured provider
+    model row when the server expects Qwen chat-template kwargs. Models
+    configured this way expose a binary `/think` profile (`off`, `on`) because
+    Qwen template thinking is an on/off request flag, not an OpenAI-style effort
+    ladder.
+
+    ```json5
+    {
+      models: {
+        providers: {
+          vllm: {
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen3 8B",
+                reasoning: true,
+                compat: { thinkingFormat: "qwen-chat-template" },
+              },
+            ],
+          },
+        },
+      },
+    }
+    ```
+
+    OpenClaw maps `/think off` to:
 
     ```json
     {
@@ -159,8 +183,8 @@ wildcard to the visible model catalog:
 
     Non-`off` thinking levels send `enable_thinking: true`. If your endpoint
     expects DashScope-style top-level flags instead, use
-    `params.qwenThinkingFormat: "top-level"` to send `enable_thinking` at the
-    request root. Snake-case `params.qwen_thinking_format` is also accepted.
+    `compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request
+    root.
 
   </Accordion>
 
diff --git a/docs/tools/thinking.md b/docs/tools/thinking.md
index c237bdfc15c..293e242b7aa 100644
--- a/docs/tools/thinking.md
+++ b/docs/tools/thinking.md
@@ -134,6 +134,7 @@ Malformed local-model reasoning tags are handled conservatively. Closed `<think>
 - Provider plugins can expose `resolveThinkingProfile(ctx)` to define the model's supported levels and default.
 - Provider plugins that proxy Claude models should reuse `resolveClaudeThinkingProfile(modelId)` from `openclaw/plugin-sdk/provider-model-shared` so direct Anthropic and proxy catalogs stay aligned.
 - Each profile level has a stored canonical `id` (`off`, `minimal`, `low`, `medium`, `high`, `xhigh`, `adaptive`, or `max`) and may include a display `label`. Binary providers use `{ id: "low", label: "on" }`.
+- Profile hooks receive merged catalog facts when available, including `reasoning`, `compat.thinkingFormat`, and `compat.supportedReasoningEfforts`. Use those facts to expose binary or custom profiles only when the configured request contract supports the matching payload.
 - Tool plugins that need to validate an explicit thinking override should use `api.runtime.agent.resolveThinkingPolicy({ provider, model })` plus `api.runtime.agent.normalizeThinkingLevel(...)`; they should not keep their own provider/model level lists.
 - Tool plugins with access to configured custom model metadata can pass `catalog` into `resolveThinkingPolicy` so `compat.supportedReasoningEfforts` opt-ins are reflected in plugin-side validation.
 - Published legacy hooks (`supportsXHighThinking`, `isBinaryThinking`, and `resolveDefaultThinkingLevel`) remain as compatibility adapters, but new custom level sets should use `resolveThinkingProfile`.
diff --git a/extensions/vllm/index.ts b/extensions/vllm/index.ts
index 3130cf04cfc..f2116d2b1d4 100644
--- a/extensions/vllm/index.ts
+++ b/extensions/vllm/index.ts
@@ -11,6 +11,7 @@ import {
   VLLM_PROVIDER_LABEL,
 } from "./api.js";
 import { wrapVllmProviderStream } from "./stream.js";
+import { resolveThinkingProfile } from "./thinking-policy.js";
 
 const PROVIDER_ID = "vllm";
 
@@ -90,6 +91,7 @@ export default definePluginEntry({
         "vLLM requires authentication to be registered as a provider. " +
         'Set VLLM_API_KEY (any value works) or run "openclaw configure". ' +
         "See: https://docs.openclaw.ai/providers/vllm",
+      resolveThinkingProfile,
       wrapStreamFn: wrapVllmProviderStream,
     });
   },
diff --git a/extensions/vllm/provider-discovery.contract.test.ts b/extensions/vllm/provider-discovery.contract.test.ts
index 9b1ca99084f..46194aa3ca1 100644
--- a/extensions/vllm/provider-discovery.contract.test.ts
+++ b/extensions/vllm/provider-discovery.contract.test.ts
@@ -1,7 +1,28 @@
 import { fileURLToPath } from "node:url";
+import { registerSingleProviderPlugin } from "openclaw/plugin-sdk/plugin-test-runtime";
 import { describeVllmProviderDiscoveryContract } from "openclaw/plugin-sdk/provider-test-contracts";
+import { describe, expect, it } from "vitest";
+import vllmPlugin from "./index.js";
 
 describeVllmProviderDiscoveryContract({
   load: () => import("./index.js"),
   apiModuleId: fileURLToPath(new URL("./api.js", import.meta.url)),
 });
+
+describe("vLLM provider registration", () => {
+  it("exposes the binary thinking profile hook", async () => {
+    const provider = await registerSingleProviderPlugin(vllmPlugin);
+
+    expect(
+      provider.resolveThinkingProfile?.({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      }),
+    ).toEqual({
+      levels: [{ id: "off" }, { id: "low", label: "on" }],
+      defaultLevel: "off",
+    });
+  });
+});
diff --git a/extensions/vllm/provider-policy-api.test.ts b/extensions/vllm/provider-policy-api.test.ts
new file mode 100644
index 00000000000..d9ff393b7bb
--- /dev/null
+++ b/extensions/vllm/provider-policy-api.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, it } from "vitest";
+import { resolveThinkingProfile } from "./provider-policy-api.js";
+
+describe("vLLM provider thinking policy", () => {
+  it("exposes a binary profile for configured Qwen chat-template models", () => {
+    expect(
+      resolveThinkingProfile({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      }),
+    ).toEqual({
+      levels: [{ id: "off" }, { id: "low", label: "on" }],
+      defaultLevel: "off",
+    });
+  });
+
+  it("uses configured Qwen compat even when catalog reasoning metadata is absent", () => {
+    expect(
+      resolveThinkingProfile({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        compat: { thinkingFormat: "qwen-chat-template" },
+      }),
+    ).toEqual({
+      levels: [{ id: "off" }, { id: "low", label: "on" }],
+      defaultLevel: "off",
+    });
+  });
+
+  it("exposes a binary profile for vLLM Nemotron 3 reasoning models", () => {
+    expect(
+      resolveThinkingProfile({
+        provider: "vllm",
+        modelId: "nemotron-3-super",
+        reasoning: true,
+      }),
+    ).toEqual({
+      levels: [{ id: "off" }, { id: "low", label: "on" }],
+      defaultLevel: "off",
+    });
+  });
+
+  it("does not flatten unconfigured or non-reasoning vLLM models", () => {
+    expect(
+      resolveThinkingProfile({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        reasoning: true,
+      }),
+    ).toBeNull();
+    expect(
+      resolveThinkingProfile({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        reasoning: false,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      }),
+    ).toBeNull();
+  });
+});
diff --git a/extensions/vllm/provider-policy-api.ts b/extensions/vllm/provider-policy-api.ts
new file mode 100644
index 00000000000..edc2c861187
--- /dev/null
+++ b/extensions/vllm/provider-policy-api.ts
@@ -0,0 +1 @@
+export { resolveThinkingProfile } from "./thinking-policy.js";
diff --git a/extensions/vllm/stream.test.ts b/extensions/vllm/stream.test.ts
index 38ca89ce509..2030314db52 100644
--- a/extensions/vllm/stream.test.ts
+++ b/extensions/vllm/stream.test.ts
@@ -101,10 +101,19 @@ describe("createVllmQwenThinkingWrapper", () => {
     });
   });
 
-  it("skips non-reasoning and non-completions models", () => {
+  it("patches configured Qwen models unless reasoning is explicitly disabled", () => {
+    expect(capturePayload({ format: "chat-template", model: { reasoning: undefined } })).toEqual({
+      chat_template_kwargs: {
+        enable_thinking: true,
+        preserve_thinking: true,
+      },
+    });
     expect(capturePayload({ format: "chat-template", model: { reasoning: false } })).toStrictEqual(
       {},
     );
+  });
+
+  it("skips non-completions models", () => {
     expect(
       capturePayload({ format: "chat-template", model: { api: "openai-responses" as never } }),
     ).toStrictEqual({});
@@ -186,7 +195,25 @@ describe("createVllmProviderThinkingWrapper", () => {
 });
 
 describe("wrapVllmProviderStream", () => {
-  it("registers when vLLM Qwen thinking format params are configured", () => {
+  it("registers when vLLM Qwen thinking format compat is configured", () => {
+    expect(
+      wrapVllmProviderStream({
+        provider: "vllm",
+        modelId: "Qwen/Qwen3-8B",
+        extraParams: {},
+        model: {
+          api: "openai-completions",
+          provider: "vllm",
+          id: "Qwen/Qwen3-8B",
+          reasoning: true,
+          compat: { thinkingFormat: "qwen-chat-template" },
+        } as Model<"openai-completions">,
+        streamFn: undefined,
+      } as never),
+    ).toBeTypeOf("function");
+  });
+
+  it("ignores request params when Qwen thinking format compat is not configured", () => {
     expect(
       wrapVllmProviderStream({
         provider: "vllm",
@@ -200,22 +227,42 @@ describe("wrapVllmProviderStream", () => {
         } as Model<"openai-completions">,
         streamFn: undefined,
       } as never),
-    ).toBeTypeOf("function");
+    ).toBeUndefined();
+  });
 
-    expect(
-      wrapVllmProviderStream({
-        provider: "vllm",
-        modelId: "Qwen/Qwen3-8B",
-        extraParams: { qwen_thinking_format: "enable_thinking" },
-        model: {
-          api: "openai-completions",
-          provider: "vllm",
-          id: "Qwen/Qwen3-8B",
-          reasoning: true,
-        } as Model<"openai-completions">,
-        streamFn: undefined,
-      } as never),
-    ).toBeTypeOf("function");
+  it("uses model compat for Qwen thinking format", () => {
+    let captured: Record<string, unknown> = {};
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      const payload = {};
+      options?.onPayload?.(payload, _model);
+      captured = payload;
+      return {} as ReturnType<StreamFn>;
+    };
+    const model = {
+      api: "openai-completions",
+      provider: "vllm",
+      id: "Qwen/Qwen3-8B",
+      reasoning: true,
+      compat: { thinkingFormat: "qwen-chat-template" },
+    } as unknown as Model<"openai-completions">;
+    const wrapped = wrapVllmProviderStream({
+      provider: "vllm",
+      modelId: "Qwen/Qwen3-8B",
+      extraParams: {},
+      thinkingLevel: "off",
+      model,
+      streamFn: baseStreamFn,
+    } as never);
+
+    expect(wrapped).toBeTypeOf("function");
+    void wrapped?.(model, { messages: [] } as Context, {});
+
+    expect(captured).toEqual({
+      chat_template_kwargs: {
+        enable_thinking: false,
+        preserve_thinking: true,
+      },
+    });
   });
 
   it("skips unconfigured vLLM and non-vLLM providers", () => {
@@ -237,7 +284,7 @@ describe("wrapVllmProviderStream", () => {
       wrapVllmProviderStream({
         provider: "openai",
         modelId: "gpt-5.4",
-        extraParams: { qwenThinkingFormat: "chat-template" },
+        extraParams: {},
         model: {
           api: "openai-completions",
           provider: "openai",
diff --git a/extensions/vllm/stream.ts b/extensions/vllm/stream.ts
index 603087bb40c..31ca0764e09 100644
--- a/extensions/vllm/stream.ts
+++ b/extensions/vllm/stream.ts
@@ -5,43 +5,21 @@ import {
   createPayloadPatchStreamWrapper,
   isOpenAICompatibleThinkingEnabled,
 } from "openclaw/plugin-sdk/provider-stream-shared";
+import {
+  resolveVllmQwenThinkingFormatFromCompat,
+  type VllmQwenThinkingFormat,
+} from "./thinking-policy.js";
 
 type VllmThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
-type VllmQwenThinkingFormat = "chat-template" | "top-level";
 
 function isVllmProviderId(providerId: string): boolean {
   return normalizeProviderId(providerId) === "vllm";
 }
 
-function normalizeQwenThinkingFormat(value: unknown): VllmQwenThinkingFormat | undefined {
-  if (typeof value !== "string") {
-    return undefined;
-  }
-  const normalized = value.trim().toLowerCase().replace(/_/g, "-");
-  if (
-    normalized === "chat-template" ||
-    normalized === "chat-template-kwargs" ||
-    normalized === "chat-template-kwarg" ||
-    normalized === "chat-template-arguments"
-  ) {
-    return "chat-template";
-  }
-  if (
-    normalized === "top-level" ||
-    normalized === "enable-thinking" ||
-    normalized === "request-body"
-  ) {
-    return "top-level";
-  }
-  return undefined;
-}
-
 function resolveVllmQwenThinkingFormat(
-  extraParams: ProviderWrapStreamFnContext["extraParams"],
+  ctx: Pick<ProviderWrapStreamFnContext, "model">,
 ): VllmQwenThinkingFormat | undefined {
-  return normalizeQwenThinkingFormat(
-    extraParams?.qwenThinkingFormat ?? extraParams?.qwen_thinking_format,
-  );
+  return resolveVllmQwenThinkingFormatFromCompat(ctx.model?.compat);
 }
 
 function setQwenChatTemplateThinking(payload: Record<string, unknown>, enabled: boolean): void {
@@ -110,7 +88,7 @@ export function createVllmQwenThinkingWrapper(params: {
       delete payloadObj.reasoning;
     },
     {
-      shouldPatch: ({ model }) => model.api === "openai-completions" && model.reasoning,
+      shouldPatch: ({ model }) => model.api === "openai-completions" && (model.reasoning ?? true),
     },
   );
 }
@@ -145,7 +123,7 @@ export function wrapVllmProviderStream(ctx: ProviderWrapStreamFnContext): Stream
   if (!isVllmProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) {
     return undefined;
   }
-  const qwenFormat = resolveVllmQwenThinkingFormat(ctx.extraParams);
+  const qwenFormat = resolveVllmQwenThinkingFormat(ctx);
   const shouldHandleNemotron =
     ctx.thinkingLevel === "off" &&
     isVllmNemotronModel({
diff --git a/extensions/vllm/thinking-policy.ts b/extensions/vllm/thinking-policy.ts
new file mode 100644
index 00000000000..2f397bce1be
--- /dev/null
+++ b/extensions/vllm/thinking-policy.ts
@@ -0,0 +1,65 @@
+import type {
+  ProviderDefaultThinkingPolicyContext,
+  ProviderThinkingProfile,
+} from "openclaw/plugin-sdk/plugin-entry";
+import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
+
+export type VllmQwenThinkingFormat = "chat-template" | "top-level";
+
+const VLLM_BINARY_THINKING_PROFILE = {
+  levels: [{ id: "off" }, { id: "low", label: "on" }],
+  defaultLevel: "off",
+} satisfies ProviderThinkingProfile;
+
+export function normalizeVllmQwenThinkingFormat(
+  value: unknown,
+): VllmQwenThinkingFormat | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const normalized = value.trim().toLowerCase().replace(/_/g, "-");
+  if (
+    normalized === "chat-template" ||
+    normalized === "chat-template-kwargs" ||
+    normalized === "chat-template-kwarg" ||
+    normalized === "chat-template-arguments" ||
+    normalized === "qwen-chat-template"
+  ) {
+    return "chat-template";
+  }
+  if (
+    normalized === "top-level" ||
+    normalized === "enable-thinking" ||
+    normalized === "request-body" ||
+    normalized === "qwen"
+  ) {
+    return "top-level";
+  }
+  return undefined;
+}
+
+export function resolveVllmQwenThinkingFormatFromCompat(
+  compat?: ProviderDefaultThinkingPolicyContext["compat"],
+): VllmQwenThinkingFormat | undefined {
+  return normalizeVllmQwenThinkingFormat(compat?.thinkingFormat);
+}
+
+function isVllmNemotronThinkingModel(modelId: string): boolean {
+  return /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(modelId);
+}
+
+export function resolveThinkingProfile(
+  ctx: ProviderDefaultThinkingPolicyContext,
+): ProviderThinkingProfile | null {
+  if (normalizeProviderId(ctx.provider) !== "vllm") {
+    return null;
+  }
+  if (ctx.reasoning === false) {
+    return null;
+  }
+  const qwenFormat = resolveVllmQwenThinkingFormatFromCompat(ctx.compat);
+  if (qwenFormat || (ctx.reasoning === true && isVllmNemotronThinkingModel(ctx.modelId))) {
+    return VLLM_BINARY_THINKING_PROFILE;
+  }
+  return null;
+}
diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts
index fa7248610a5..95d1ddd3a7a 100644
--- a/src/agents/model-catalog.test.ts
+++ b/src/agents/model-catalog.test.ts
@@ -1033,6 +1033,100 @@ describe("loadModelCatalog", () => {
     expect(entry.contextWindow).toBe(128_000);
   });
 
+  it("overlays configured model compat onto discovered catalog rows", async () => {
+    mockPiDiscoveryModels([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen3 8B",
+        provider: "vllm",
+        reasoning: false,
+        compat: { supportsStrictMode: false },
+      },
+    ]);
+
+    const result = await loadModelCatalog({
+      config: {
+        models: {
+          providers: {
+            vllm: {
+              baseUrl: "http://localhost:9000/v1",
+              api: "openai-completions",
+              models: [
+                {
+                  id: "vllm/Qwen/Qwen3-8B",
+                  name: "Configured Qwen3 8B",
+                  compat: { thinkingFormat: "qwen-chat-template" },
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig,
+    });
+
+    const entry = requireCatalogEntry(result, "vllm", "Qwen/Qwen3-8B");
+    expect(result.filter((entry) => entry.provider === "vllm")).toHaveLength(1);
+    expect(entry.name).toBe("Qwen3 8B");
+    expect(entry.reasoning).toBe(true);
+    expect(entry.compat).toEqual(
+      expect.objectContaining({
+        supportsStrictMode: false,
+        thinkingFormat: "qwen-chat-template",
+      }),
+    );
+  });
+
+  it("overlays configured model compat onto persisted read-only catalog rows", async () => {
+    readFileMock.mockResolvedValue(
+      JSON.stringify({
+        providers: {
+          vllm: {
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen3 8B",
+                reasoning: false,
+                compat: { supportsStrictMode: false },
+              },
+            ],
+          },
+        },
+      }),
+    );
+
+    const result = await loadModelCatalog({
+      config: {
+        models: {
+          providers: {
+            vllm: {
+              baseUrl: "http://localhost:9000/v1",
+              api: "openai-completions",
+              models: [
+                {
+                  id: "vllm/Qwen/Qwen3-8B",
+                  name: "Configured Qwen3 8B",
+                  compat: { thinkingFormat: "qwen-chat-template" },
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig,
+      readOnly: true,
+    });
+
+    const entry = requireCatalogEntry(result, "vllm", "Qwen/Qwen3-8B");
+    expect(result.filter((entry) => entry.provider === "vllm")).toHaveLength(1);
+    expect(entry.name).toBe("Qwen3 8B");
+    expect(entry.reasoning).toBe(true);
+    expect(entry.compat).toEqual(
+      expect.objectContaining({
+        supportsStrictMode: false,
+        thinkingFormat: "qwen-chat-template",
+      }),
+    );
+  });
+
   it("merges manifest model catalog rows on the normal catalog path", async () => {
     mockSingleOpenAiCatalogModel();
     currentPluginMetadataSnapshotMock.mockReturnValue({
diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts
index b5c6de3b00b..e225e0f29ec 100644
--- a/src/agents/model-catalog.ts
+++ b/src/agents/model-catalog.ts
@@ -21,6 +21,7 @@ import { resolveDefaultAgentDir } from "./agent-scope.js";
 import { modelSupportsInput as modelCatalogEntrySupportsInput } from "./model-catalog-lookup.js";
 import type { ModelCatalogEntry, ModelInputType } from "./model-catalog.types.js";
 import {
+  modelKey,
   normalizeConfiguredProviderCatalogModelId,
   type ProviderModelIdNormalizationOptions,
 } from "./model-ref-shared.js";
@@ -112,7 +113,8 @@ function instantiatePiModelRegistry(
 }
 
 function catalogEntryDedupeKey(provider: string, id: string): string {
-  return `${normalizeProviderId(provider)}::${normalizeLowercaseStringOrEmpty(id)}`;
+  const normalizedProvider = normalizeProviderId(provider);
+  return normalizeLowercaseStringOrEmpty(modelKey(normalizedProvider, id));
 }
 
 function appendCatalogEntriesIfAbsent(
@@ -130,6 +132,52 @@ function appendCatalogEntriesIfAbsent(
   }
 }
 
+function mergeCatalogCompat(
+  base: ModelCatalogEntry["compat"] | undefined,
+  override: ModelCatalogEntry["compat"] | undefined,
+): ModelCatalogEntry["compat"] | undefined {
+  if (!base) {
+    return override;
+  }
+  if (!override) {
+    return base;
+  }
+  return { ...base, ...override };
+}
+
+function overlayConfiguredCatalogMetadata(
+  base: ModelCatalogEntry,
+  configured: ModelCatalogEntry,
+): ModelCatalogEntry {
+  return {
+    ...base,
+    ...(configured.contextWindow !== undefined ? { contextWindow: configured.contextWindow } : {}),
+    ...(configured.contextTokens !== undefined ? { contextTokens: configured.contextTokens } : {}),
+    ...(configured.reasoning !== undefined ? { reasoning: configured.reasoning } : {}),
+    ...(configured.input !== undefined ? { input: configured.input } : {}),
+    compat: mergeCatalogCompat(base.compat, configured.compat),
+  };
+}
+
+function mergeConfiguredCatalogEntries(
+  models: ModelCatalogEntry[],
+  entries: ModelCatalogEntry[],
+): void {
+  const indexByKey = new Map(
+    models.map((entry, index) => [catalogEntryDedupeKey(entry.provider, entry.id), index]),
+  );
+  for (const entry of entries) {
+    const key = catalogEntryDedupeKey(entry.provider, entry.id);
+    const existingIndex = indexByKey.get(key);
+    if (existingIndex === undefined) {
+      models.push(entry);
+      indexByKey.set(key, models.length - 1);
+      continue;
+    }
+    models[existingIndex] = overlayConfiguredCatalogMetadata(models[existingIndex], entry);
+  }
+}
+
 export function loadManifestModelCatalog(params: {
   config: OpenClawConfig;
   workspaceDir?: string;
@@ -319,7 +367,7 @@ async function loadReadOnlyPersistedModelCatalog(params?: {
     manifestPlugins: hasConfiguredProviderModelRows(cfg) ? getManifestPlugins() : undefined,
   });
   if (configuredModels.length > 0) {
-    appendCatalogEntriesIfAbsent(models, configuredModels);
+    mergeConfiguredCatalogEntries(models, configuredModels);
   }
   return sortModelCatalogEntries(models);
 }
@@ -371,7 +419,7 @@ function loadReadOnlyStaticModelCatalog(params?: {
     manifestPlugins: configuredManifestPlugins,
   });
   if (configuredModels.length > 0) {
-    appendCatalogEntriesIfAbsent(models, configuredModels);
+    mergeConfiguredCatalogEntries(models, configuredModels);
   }
   return sortModelCatalogEntries(models);
 }
@@ -537,7 +585,7 @@ export async function loadModelCatalog(params?: {
         manifestPlugins: hasConfiguredProviderModelRows(cfg) ? getManifestPlugins() : undefined,
       });
       if (configuredModels.length > 0) {
-        appendCatalogEntriesIfAbsent(models, configuredModels);
+        mergeConfiguredCatalogEntries(models, configuredModels);
       }
       logStage("configured-models-merged", `entries=${models.length}`);
 
diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts
index 7885a609550..e069e24e0ce 100644
--- a/src/agents/model-selection-shared.ts
+++ b/src/agents/model-selection-shared.ts
@@ -562,10 +562,6 @@ function buildModelCatalogMetadata(
     if (rawKey.trim().endsWith("/*")) {
       continue;
     }
-    const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim();
-    if (!alias) {
-      continue;
-    }
     const key = resolveAllowlistModelKey({
       cfg: params.cfg,
       raw: rawKey,
@@ -577,7 +573,10 @@ function buildModelCatalogMetadata(
     if (!key) {
       continue;
     }
-    aliasByKey.set(key, alias);
+    const alias = ((entryRaw as { alias?: string } | undefined)?.alias ?? "").trim();
+    if (alias) {
+      aliasByKey.set(key, alias);
+    }
   }
 
   return { configuredByKey, aliasByKey };
@@ -598,7 +597,10 @@ function applyModelCatalogMetadata(params: {
   const nextContextTokens = configuredEntry?.contextTokens ?? params.entry.contextTokens;
   const nextReasoning = configuredEntry?.reasoning ?? params.entry.reasoning;
   const nextInput = configuredEntry?.input ?? params.entry.input;
-  const nextCompat = configuredEntry?.compat ?? params.entry.compat;
+  const nextCompat =
+    params.entry.compat || configuredEntry?.compat
+      ? { ...params.entry.compat, ...configuredEntry?.compat }
+      : undefined;
 
   return {
     ...params.entry,
@@ -1180,9 +1182,14 @@ export function buildConfiguredModelCatalog(params: {
         typeof model?.contextTokens === "number" && model.contextTokens > 0
           ? model.contextTokens
           : undefined;
-      const reasoning = typeof model?.reasoning === "boolean" ? model.reasoning : undefined;
       const input = Array.isArray(model?.input) ? model.input : undefined;
       const compat = model?.compat && typeof model.compat === "object" ? model.compat : undefined;
+      const reasoning =
+        typeof model?.reasoning === "boolean"
+          ? model.reasoning
+          : isVllmQwenThinkingCompat(providerId, compat)
+            ? true
+            : undefined;
       catalog.push({
         provider: providerId,
         id,
@@ -1199,6 +1206,16 @@ export function buildConfiguredModelCatalog(params: {
   return catalog;
 }
 
+function isVllmQwenThinkingCompat(
+  providerId: string,
+  compat?: { thinkingFormat?: unknown } | null,
+): boolean {
+  return (
+    providerId === "vllm" &&
+    (compat?.thinkingFormat === "qwen" || compat?.thinkingFormat === "qwen-chat-template")
+  );
+}
+
 export function resolveHooksGmailModel(
   params: {
     cfg: OpenClawConfig;
diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts
index 3562d9a84b8..fe3326e1e73 100644
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -828,6 +828,59 @@ describe("model-selection", () => {
       expect(model?.id).toBe("google/gemini-3.1-pro-preview");
       expect(model?.name).toBe("Gemini 3 Pro");
     });
+
+    it("carries configured model compat into catalog entries for provider policy", () => {
+      const cfg = {
+        models: {
+          providers: {
+            vllm: {
+              models: [
+                {
+                  id: "Qwen/Qwen3-8B",
+                  name: "Qwen 3 8B",
+                  reasoning: true,
+                  compat: {
+                    thinkingFormat: "qwen-chat-template",
+                  },
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const model = buildConfiguredModelCatalog({ cfg }).find(
+        (entry) => entry.provider === "vllm" && entry.id === "Qwen/Qwen3-8B",
+      );
+      expect(model?.compat).toEqual({ thinkingFormat: "qwen-chat-template" });
+      expect(model?.reasoning).toBe(true);
+    });
+
+    it("does not infer reasoning from non-vLLM thinking compat", () => {
+      const cfg = {
+        models: {
+          providers: {
+            custom: {
+              models: [
+                {
+                  id: "custom-reasoning",
+                  name: "Custom Reasoning",
+                  compat: {
+                    thinkingFormat: "together",
+                  },
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const model = buildConfiguredModelCatalog({ cfg }).find(
+        (entry) => entry.provider === "custom" && entry.id === "custom-reasoning",
+      );
+      expect(model?.compat).toEqual({ thinkingFormat: "together" });
+      expect(model?.reasoning).toBeUndefined();
+    });
   });
 
   describe("buildModelAliasIndex", () => {
@@ -953,6 +1006,43 @@ describe("model-selection", () => {
       ]);
     });
 
+    it("overlays configured provider metadata after manifest model normalization", () => {
+      const cfg: OpenClawConfig = {
+        models: {
+          providers: {
+            nvidia: {
+              models: [
+                {
+                  id: "llama-fast",
+                  name: "Configured Llama Fast",
+                  contextWindow: 128_000,
+                  reasoning: true,
+                  compat: { thinkingFormat: "qwen" },
+                },
+              ],
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      const result = buildAllowedModelSet({
+        cfg,
+        catalog: [{ provider: "nvidia", id: "nvidia/llama-fast", name: "Runtime Llama Fast" }],
+        defaultProvider: "anthropic",
+      });
+
+      expect(result.allowedCatalog).toEqual([
+        {
+          provider: "nvidia",
+          id: "nvidia/llama-fast",
+          name: "Configured Llama Fast",
+          contextWindow: 128_000,
+          reasoning: true,
+          compat: { thinkingFormat: "qwen" },
+        },
+      ]);
+    });
+
     it("keeps configured provider models visible when the catalog is otherwise allow-any", () => {
       const cfg: OpenClawConfig = {
         agents: {
diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts
index c7cac2f1ad6..46a1db5815e 100644
--- a/src/agents/pi-embedded-runner/model.test.ts
+++ b/src/agents/pi-embedded-runner/model.test.ts
@@ -1499,6 +1499,101 @@ describe("resolveModel", () => {
     expect(result.model?.reasoning).toBe(true);
   });
 
+  it("propagates compat from matching configured fallback model", () => {
+    const cfg = {
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:9000",
+            api: "openai-completions",
+            models: [
+              {
+                ...makeModel("Qwen/Qwen3-8B"),
+                compat: { thinkingFormat: "qwen-chat-template" },
+              },
+            ],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.compat).toEqual(
+      expect.objectContaining({ thinkingFormat: "qwen-chat-template" }),
+    );
+    expect(result.model?.reasoning).toBe(false);
+  });
+
+  it("lets configured vLLM Qwen compat override stale discovered reasoning", () => {
+    mockDiscoveredModel(discoverModels, {
+      provider: "vllm",
+      modelId: "Qwen/Qwen3-8B",
+      templateModel: {
+        ...makeModel("Qwen/Qwen3-8B"),
+        provider: "vllm",
+        api: "openai-completions",
+        baseUrl: "http://localhost:9000",
+        reasoning: false,
+        compat: { supportsStrictMode: false },
+      },
+    });
+    const cfg = {
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:9000",
+            api: "openai-completions",
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen/Qwen3-8B",
+                compat: { thinkingFormat: "qwen-chat-template" },
+              },
+            ],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.reasoning).toBe(true);
+    expect(result.model?.compat).toEqual(
+      expect.objectContaining({
+        supportsStrictMode: false,
+        thinkingFormat: "qwen-chat-template",
+      }),
+    );
+  });
+
+  it("infers reasoning for matching vLLM Qwen compat fallback models", () => {
+    const cfg = {
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:9000",
+            api: "openai-completions",
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen/Qwen3-8B",
+                compat: { thinkingFormat: "qwen-chat-template" },
+              },
+            ],
+          },
+        },
+      },
+    } as unknown as OpenClawConfig;
+
+    const result = resolveModelForTest("vllm", "Qwen/Qwen3-8B", "/tmp/agent", cfg);
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.reasoning).toBe(true);
+  });
+
   it("propagates image input capability from matching configured fallback model", () => {
     const cfg = {
       models: {
diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts
index 9c6b2305ac3..fee53cc5242 100644
--- a/src/agents/pi-embedded-runner/model.ts
+++ b/src/agents/pi-embedded-runner/model.ts
@@ -5,7 +5,7 @@ import {
   type AuthStorage,
   type ModelRegistry,
 } from "@earendil-works/pi-coding-agent";
-import type { ModelMediaInputConfig } from "../../config/types.models.js";
+import type { ModelCompatConfig, ModelMediaInputConfig } from "../../config/types.models.js";
 import type { OpenClawConfig } from "../../config/types.openclaw.js";
 import type { ProviderRuntimeModel } from "../../plugins/provider-runtime-model.types.js";
 import {
@@ -688,6 +688,14 @@ function applyConfiguredProviderOverrides(params: {
     metadataOverrideModel?.contextWindow ?? providerConfig.contextWindow;
   const resolvedMaxTokens =
     metadataOverrideModel?.maxTokens ?? providerConfig.maxTokens ?? discoveredModel.maxTokens;
+  const resolvedCompat = mergeModelCompat(discoveredModel.compat, metadataOverrideModel?.compat);
+  const resolvedReasoning = resolveMergedConfiguredModelReasoning({
+    provider: params.provider,
+    configuredCompat: metadataOverrideModel?.compat,
+    resolvedCompat,
+    configuredReasoning: metadataOverrideModel?.reasoning,
+    discoveredReasoning: discoveredModel.reasoning,
+  });
   const requestConfig = resolveProviderRequestConfig({
     provider: params.provider,
     api:
@@ -710,7 +718,7 @@ function applyConfiguredProviderOverrides(params: {
         ...discoveredModel,
         api: requestConfig.api ?? "openai-responses",
         baseUrl: requestConfig.baseUrl ?? discoveredModel.baseUrl,
-        reasoning: metadataOverrideModel?.reasoning ?? discoveredModel.reasoning,
+        reasoning: resolvedReasoning,
         input: normalizedInput,
         cost: metadataOverrideModel?.cost ?? discoveredModel.cost,
         contextWindow: resolvedContextWindow ?? discoveredModel.contextWindow,
@@ -725,7 +733,7 @@ function applyConfiguredProviderOverrides(params: {
         ...(resolvedParams ? { params: resolvedParams } : {}),
         ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
         headers: requestConfig.headers,
-        compat: metadataOverrideModel?.compat ?? discoveredModel.compat,
+        compat: resolvedCompat,
         mediaInput: mergeModelMediaInput(
           discoveredModel.mediaInput,
           metadataOverrideModel?.mediaInput,
@@ -778,6 +786,11 @@ function resolveExplicitModelWithRegistry(params: {
         workspaceDir,
         model: {
           ...inlineMatch,
+          reasoning: resolveConfiguredModelReasoning({
+            provider,
+            compat: inlineMatch.compat,
+            reasoning: inlineMatch.reasoning,
+          }),
           ...(resolvedParams ? { params: resolvedParams } : {}),
           ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
         } as Model<Api>,
@@ -842,6 +855,11 @@ function resolveExplicitModelWithRegistry(params: {
         workspaceDir,
         model: {
           ...fallbackInlineMatch,
+          reasoning: resolveConfiguredModelReasoning({
+            provider,
+            compat: fallbackInlineMatch.compat,
+            reasoning: fallbackInlineMatch.reasoning,
+          }),
           ...(resolvedParams ? { params: resolvedParams } : {}),
           ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
         } as Model<Api>,
@@ -961,6 +979,11 @@ function resolveConfiguredFallbackModel(params: {
     capability: "llm",
     transport: "stream",
   });
+  const fallbackReasoning = resolveConfiguredFallbackReasoning({
+    provider,
+    compat: configuredModel?.compat,
+    reasoning: configuredModel?.reasoning,
+  });
   return normalizeResolvedModel({
     provider,
     cfg,
@@ -974,7 +997,7 @@ function resolveConfiguredFallbackModel(params: {
           api: requestConfig.api ?? "openai-responses",
           provider,
           baseUrl: requestConfig.baseUrl,
-          reasoning: configuredModel?.reasoning ?? false,
+          reasoning: fallbackReasoning,
           input: resolveProviderModelInput({
             provider,
             modelId,
@@ -999,6 +1022,7 @@ function resolveConfiguredFallbackModel(params: {
           ...(resolvedParams ? { params: resolvedParams } : {}),
           ...(requestTimeoutMs !== undefined ? { requestTimeoutMs } : {}),
           headers: requestConfig.headers,
+          compat: configuredModel?.compat,
           mediaInput: configuredModel?.mediaInput,
         } as Model<Api>,
         providerRequest,
@@ -1033,6 +1057,71 @@ function shouldCompareProviderRuntimeResolvedModel(params: {
   );
 }
 
+function resolveConfiguredFallbackReasoning(params: {
+  provider: string;
+  compat?: { thinkingFormat?: string } | null;
+  reasoning?: boolean;
+}): boolean {
+  return resolveConfiguredModelReasoning(params) ?? false;
+}
+
+function resolveConfiguredModelReasoning(params: {
+  provider: string;
+  compat?: { thinkingFormat?: string } | null;
+  reasoning?: boolean;
+}): boolean | undefined {
+  if (params.reasoning !== undefined) {
+    return params.reasoning;
+  }
+  return isVllmQwenThinkingCompat(params) ? true : undefined;
+}
+
+function resolveMergedConfiguredModelReasoning(params: {
+  provider: string;
+  configuredCompat?: { thinkingFormat?: string } | null;
+  resolvedCompat?: { thinkingFormat?: string } | null;
+  configuredReasoning?: boolean;
+  discoveredReasoning?: boolean;
+}): boolean {
+  if (params.configuredReasoning !== undefined) {
+    return params.configuredReasoning;
+  }
+  if (isVllmQwenThinkingCompat({ provider: params.provider, compat: params.configuredCompat })) {
+    return true;
+  }
+  return (
+    resolveConfiguredModelReasoning({
+      provider: params.provider,
+      compat: params.resolvedCompat,
+      reasoning: params.discoveredReasoning,
+    }) ?? false
+  );
+}
+
+function isVllmQwenThinkingCompat(params: {
+  provider: string;
+  compat?: { thinkingFormat?: string } | null;
+}): boolean {
+  const thinkingFormat = params.compat?.thinkingFormat;
+  return (
+    normalizeProviderId(params.provider) === "vllm" &&
+    (thinkingFormat === "qwen" || thinkingFormat === "qwen-chat-template")
+  );
+}
+
+function mergeModelCompat(
+  base: ModelCompatConfig | undefined,
+  override: ModelCompatConfig | undefined,
+): ModelCompatConfig | undefined {
+  if (!base) {
+    return override;
+  }
+  if (!override) {
+    return base;
+  }
+  return { ...base, ...override };
+}
+
 function preferProviderRuntimeResolvedModel(params: {
   explicitModel: Model<Api>;
   runtimeResolvedModel?: Model<Api>;
diff --git a/src/auto-reply/reply/model-selection.test.ts b/src/auto-reply/reply/model-selection.test.ts
index e98eb44b4fc..f0d3b2c0659 100644
--- a/src/auto-reply/reply/model-selection.test.ts
+++ b/src/auto-reply/reply/model-selection.test.ts
@@ -63,7 +63,7 @@ const makeConfiguredModel = (overrides: Record<string, unknown> = {}) => ({
   id: "gpt-5.4",
   name: "GPT-5.4",
   reasoning: true,
-  input: ["text"],
+  input: ["text"] as Array<"text">,
   cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
   contextWindow: 128_000,
   maxTokens: 16_384,
@@ -215,6 +215,115 @@ describe("createModelSelectionState catalog loading", () => {
     expect(loadModelCatalog).not.toHaveBeenCalled();
   });
 
+  it("keeps configured compat when manifest thinking metadata is used", async () => {
+    vi.mocked(loadModelCatalog).mockClear();
+    vi.mocked(loadManifestModelCatalog).mockReturnValueOnce([
+      { provider: "vllm", id: "Qwen/Qwen3-8B", name: "Qwen3", reasoning: true },
+    ]);
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {},
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:9000/v1",
+            models: [
+              makeConfiguredModel({
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen3",
+                compat: { thinkingFormat: "qwen-chat-template" },
+              }),
+            ],
+          },
+        },
+      },
+    } as OpenClawConfig;
+
+    const state = await createModelSelectionState({
+      cfg,
+      agentCfg: cfg.agents?.defaults,
+      defaultProvider: "vllm",
+      defaultModel: "Qwen/Qwen3-8B",
+      provider: "vllm",
+      model: "Qwen/Qwen3-8B",
+      hasModelDirective: false,
+    });
+
+    await expect(state.resolveThinkingCatalog()).resolves.toEqual([
+      expect.objectContaining({
+        provider: "vllm",
+        id: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      }),
+    ]);
+    expect(loadModelCatalog).not.toHaveBeenCalled();
+  });
+
+  it("keeps configured compat when runtime thinking catalog is already loaded", async () => {
+    vi.mocked(loadModelCatalog).mockClear();
+    vi.mocked(loadModelCatalog).mockResolvedValueOnce([
+      {
+        provider: "vllm",
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen3",
+        reasoning: true,
+        compat: { supportedReasoningEfforts: ["xhigh"] },
+      },
+    ]);
+    const cfg = {
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {},
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:9000/v1",
+            models: [
+              makeConfiguredModel({
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen3",
+                compat: { thinkingFormat: "qwen-chat-template" },
+              }),
+            ],
+          },
+        },
+      },
+    } as OpenClawConfig;
+
+    const state = await createModelSelectionState({
+      cfg,
+      agentCfg: cfg.agents?.defaults,
+      defaultProvider: "vllm",
+      defaultModel: "Qwen/Qwen3-8B",
+      provider: "vllm",
+      model: "Qwen/Qwen3-8B",
+      hasModelDirective: true,
+    });
+
+    await expect(state.resolveThinkingCatalog()).resolves.toEqual([
+      expect.objectContaining({
+        provider: "vllm",
+        id: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: {
+          supportedReasoningEfforts: ["xhigh"],
+          thinkingFormat: "qwen-chat-template",
+        },
+      }),
+    ]);
+    expect(loadModelCatalog).toHaveBeenCalledOnce();
+  });
+
   it("prefers per-agent thinkingDefault over model and global defaults", async () => {
     vi.mocked(loadModelCatalog).mockClear();
     const cfg = {
diff --git a/src/auto-reply/reply/model-selection.ts b/src/auto-reply/reply/model-selection.ts
index c4ab4106508..eb6bd1f8dc8 100644
--- a/src/auto-reply/reply/model-selection.ts
+++ b/src/auto-reply/reply/model-selection.ts
@@ -97,10 +97,8 @@ function findSelectedCatalogEntry(params: {
   model: string;
 }): ModelCatalogEntry | undefined {
   const normalizedProvider = normalizeProviderId(params.provider);
-  return params.catalog?.find(
-    (entry) =>
-      normalizeProviderId(entry.provider) === normalizedProvider && entry.id === params.model,
-  );
+  const selectedKey = modelKey(normalizedProvider, params.model);
+  return params.catalog?.find((entry) => modelKey(entry.provider, entry.id) === selectedKey);
 }
 
 export async function createModelSelectionState(params: {
@@ -360,6 +358,15 @@ export async function createModelSelectionState(params: {
 
   let thinkingCatalog: ModelCatalog | undefined;
   let manifestModelCatalog: ModelCatalog | null = null;
+  const buildThinkingCatalog = (catalog: ModelCatalog): ModelCatalog =>
+    createModelVisibilityPolicy({
+      cfg,
+      catalog,
+      defaultProvider,
+      defaultModel,
+      agentId: params.agentId,
+      ...RUNTIME_MODEL_VISIBILITY_NORMALIZATION,
+    }).allowedCatalog;
   const loadManifestCatalogForThinking = async () => {
     if (manifestModelCatalog) {
       return manifestModelCatalog;
@@ -377,7 +384,11 @@ export async function createModelSelectionState(params: {
       return thinkingCatalog;
     }
     let catalogForThinking =
-      modelCatalog && modelCatalog.length > 0 ? modelCatalog : allowedModelCatalog;
+      allowedModelCatalog.length > 0
+        ? allowedModelCatalog
+        : modelCatalog && modelCatalog.length > 0
+          ? buildThinkingCatalog(modelCatalog)
+          : [];
     let selectedCatalogEntry = findSelectedCatalogEntry({
       catalog: catalogForThinking,
       provider,
@@ -387,7 +398,7 @@ export async function createModelSelectionState(params: {
     // allowlist rows know only provider/id; manifest rows can prove reasoning
     // support without opening the Pi auth-backed model registry.
     if (!modelCatalog && selectedCatalogEntry?.reasoning === undefined) {
-      const manifestCatalog = await loadManifestCatalogForThinking();
+      const manifestCatalog = buildThinkingCatalog(await loadManifestCatalogForThinking());
       const manifestSelectedEntry = findSelectedCatalogEntry({
         catalog: manifestCatalog,
         provider,
@@ -403,13 +414,16 @@ export async function createModelSelectionState(params: {
     if (shouldHydrateRuntimeCatalog) {
       modelCatalog = await (await loadModelCatalogRuntime()).loadModelCatalog({ config: cfg });
       logStage("catalog-loaded-for-thinking", `entries=${modelCatalog.length}`);
-      const runtimeSelectedEntry = modelCatalog.find(
-        (entry) => entry.provider === provider && entry.id === model,
-      );
+      const runtimeCatalog = buildThinkingCatalog(modelCatalog);
+      const runtimeSelectedEntry = findSelectedCatalogEntry({
+        catalog: runtimeCatalog,
+        provider,
+        model,
+      });
       catalogForThinking =
         runtimeSelectedEntry || !catalogForThinking || catalogForThinking.length === 0
-          ? modelCatalog.length > 0
-            ? modelCatalog
+          ? runtimeCatalog.length > 0
+            ? runtimeCatalog
             : allowedModelCatalog
           : allowedModelCatalog;
     }
diff --git a/src/auto-reply/thinking.shared.ts b/src/auto-reply/thinking.shared.ts
index 0d1a8b4e707..5629d7aa526 100644
--- a/src/auto-reply/thinking.shared.ts
+++ b/src/auto-reply/thinking.shared.ts
@@ -27,6 +27,7 @@ export type ThinkingCatalogEntry = {
   id: string;
   reasoning?: boolean;
   compat?: {
+    thinkingFormat?: string;
     supportedReasoningEfforts?: readonly string[] | null;
   } | null;
 };
diff --git a/src/auto-reply/thinking.test.ts b/src/auto-reply/thinking.test.ts
index 680fd9f3e64..cf70b5f744f 100644
--- a/src/auto-reply/thinking.test.ts
+++ b/src/auto-reply/thinking.test.ts
@@ -255,6 +255,64 @@ describe("listThinkingLevels", () => {
     ).toBe("max");
   });
 
+  it("passes catalog compat into provider thinking profiles", () => {
+    providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) =>
+      context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template"
+        ? {
+            levels: [{ id: "off" }, { id: "low", label: "on" }],
+            defaultLevel: "off",
+          }
+        : undefined,
+    );
+    const catalog = [
+      {
+        provider: "vllm",
+        id: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ];
+
+    expect(listThinkingLevelLabels("vllm", "Qwen/Qwen3-8B", catalog)).toEqual(["off", "on"]);
+    expect(
+      resolveSupportedThinkingLevel({
+        provider: "vllm",
+        model: "Qwen/Qwen3-8B",
+        level: "high",
+        catalog,
+      }),
+    ).toBe("low");
+  });
+
+  it("matches provider-qualified catalog ids for provider thinking profiles", () => {
+    providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) =>
+      context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template"
+        ? {
+            levels: [{ id: "off" }, { id: "low", label: "on" }],
+            defaultLevel: "off",
+          }
+        : undefined,
+    );
+    const catalog = [
+      {
+        provider: "vllm",
+        id: "vllm/Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ];
+
+    expect(listThinkingLevelLabels("vllm", "Qwen/Qwen3-8B", catalog)).toEqual(["off", "on"]);
+    expect(
+      resolveSupportedThinkingLevel({
+        provider: "vllm",
+        model: "Qwen/Qwen3-8B",
+        level: "high",
+        catalog,
+      }),
+    ).toBe("low");
+  });
+
   it("uses catalog compat reasoning efforts to expose xhigh for configured custom models", () => {
     const catalog = [
       {
diff --git a/src/auto-reply/thinking.ts b/src/auto-reply/thinking.ts
index fa3c00e1b59..8589c6523b2 100644
--- a/src/auto-reply/thinking.ts
+++ b/src/auto-reply/thinking.ts
@@ -57,6 +57,22 @@ type ResolvedThinkingProfile = {
   defaultLevel?: ThinkLevel | null;
 };
 
+function buildCatalogModelKey(provider: string, model: string): string {
+  const providerId = provider.trim();
+  const modelId = model.trim();
+  if (!providerId) {
+    return modelId;
+  }
+  if (!modelId) {
+    return providerId;
+  }
+  return normalizeOptionalLowercaseString(modelId)?.startsWith(
+    `${normalizeOptionalLowercaseString(providerId)}/`,
+  )
+    ? modelId
+    : `${providerId}/${modelId}`;
+}
+
 function resolveThinkingPolicyContext(params: {
   provider?: string | null;
   model?: string | null;
@@ -66,8 +82,12 @@ function resolveThinkingPolicyContext(params: {
   const normalizedProvider = providerRaw ? normalizeProviderId(providerRaw) : "";
   const modelId = normalizeOptionalString(params.model) ?? "";
   const modelKey = normalizeOptionalLowercaseString(params.model) ?? "";
+  const selectedCatalogKey =
+    normalizedProvider && modelId ? buildCatalogModelKey(normalizedProvider, modelId) : undefined;
   const candidate = params.catalog?.find(
-    (entry) => normalizeProviderId(entry.provider) === normalizedProvider && entry.id === modelId,
+    (entry) =>
+      selectedCatalogKey !== undefined &&
+      buildCatalogModelKey(normalizeProviderId(entry.provider), entry.id) === selectedCatalogKey,
   );
   return {
     normalizedProvider,
@@ -165,6 +185,7 @@ export function resolveThinkingProfile(params: {
     provider: context.normalizedProvider,
     modelId: context.modelId,
     reasoning: context.reasoning,
+    compat: context.compat,
   };
   const pluginProfile = resolveProviderThinkingProfile({
     provider: context.normalizedProvider,
diff --git a/src/commands/doctor/shared/legacy-config-migrate.test.ts b/src/commands/doctor/shared/legacy-config-migrate.test.ts
index 2f74cb70bd0..2e2339f8ff2 100644
--- a/src/commands/doctor/shared/legacy-config-migrate.test.ts
+++ b/src/commands/doctor/shared/legacy-config-migrate.test.ts
@@ -1700,6 +1700,647 @@ describe("legacy model compat migrate", () => {
     ]);
   });
 
+  it("moves legacy vLLM Qwen thinking params to model compat", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+                temperature: 0.2,
+              },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            models: [{ id: "Qwen/Qwen3-8B", name: "Qwen3 8B" }],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]?.params).toEqual({
+      temperature: 0.2,
+    });
+    expect(res.config?.models?.providers?.vllm?.models?.[0]?.compat).toEqual({
+      thinkingFormat: "qwen-chat-template",
+    });
+    expect(res.config?.models?.providers?.vllm?.models?.[0]?.reasoning).toBe(true);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen thinking params from normalized agent model refs", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "VLLM/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+              },
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.models?.["VLLM/Qwen/Qwen3-8B"]).not.toHaveProperty(
+      "params",
+    );
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.defaults.models."VLLM/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("creates a vLLM model row for legacy Qwen top-level thinking params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwen_thinking_format: "enable_thinking",
+              },
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]).not.toHaveProperty(
+      "params",
+    );
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").',
+    ]);
+  });
+
+  it("preserves existing vLLM model compat when removing legacy Qwen thinking params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "top-level",
+              },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                compat: { thinkingFormat: "qwen-chat-template" },
+              },
+            ],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.models?.["vllm/Qwen/Qwen3-8B"]).not.toHaveProperty(
+      "params",
+    );
+    expect(res.config?.models?.providers?.vllm?.models?.[0]?.compat).toEqual({
+      thinkingFormat: "qwen-chat-template",
+    });
+    expect(res.config?.models?.providers?.vllm?.models?.[0]?.reasoning).toBe(true);
+    expect(res.changes).toStrictEqual([
+      'Removed agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat; models.providers.vllm.models[0].compat.thinkingFormat is already "qwen-chat-template".',
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen thinking params onto provider-qualified model rows", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+              },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            models: [{ id: "vllm/Qwen/Qwen3-8B", name: "Qwen3 8B" }],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "vllm/Qwen/Qwen3-8B",
+        name: "Qwen3 8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.defaults.models."vllm/Qwen/Qwen3-8B".params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen model-row params to model compat", () => {
+    const res = migrateLegacyConfigForTest({
+      models: {
+        providers: {
+          vllm: {
+            models: [
+              {
+                id: "Qwen/Qwen3-8B",
+                name: "Qwen3 8B",
+                params: {
+                  qwenThinkingFormat: "chat-template",
+                  temperature: 0.2,
+                },
+              },
+            ],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.models?.[0]).toEqual({
+      id: "Qwen/Qwen3-8B",
+      name: "Qwen3 8B",
+      reasoning: true,
+      params: { temperature: 0.2 },
+      compat: { thinkingFormat: "qwen-chat-template" },
+    });
+    expect(res.changes).toStrictEqual([
+      'Moved models.providers.vllm.models[0].params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen provider params to model compat rows", () => {
+    const res = migrateLegacyConfigForTest({
+      models: {
+        providers: {
+          vllm: {
+            params: {
+              qwen_thinking_format: "enable_thinking",
+              temperature: 0.2,
+            },
+            models: [
+              { id: "Qwen/Qwen3-8B", name: "Qwen3 8B" },
+              { id: "Qwen/Qwen3-14B", name: "Qwen3 14B" },
+            ],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.params).toEqual({ temperature: 0.2 });
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen3 8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen" },
+      },
+      {
+        id: "Qwen/Qwen3-14B",
+        name: "Qwen3 14B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved models.providers.vllm.params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").',
+      'Moved models.providers.vllm.params.qwen_thinking_format to models.providers.vllm.models[1].compat.thinkingFormat ("qwen").',
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen provider params to existing and selected model rows", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: { primary: "vllm/Qwen/Qwen3-8B" },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            params: {
+              qwenThinkingFormat: "chat-template",
+            },
+            models: [{ id: "Qwen/Qwen3-14B", name: "Qwen3 14B" }],
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-14B",
+        name: "Qwen3 14B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+      'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[1].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("removes untargeted legacy vLLM Qwen provider params", () => {
+    const res = migrateLegacyConfigForTest({
+      models: {
+        providers: {
+          vllm: {
+            baseUrl: "http://localhost:8000/v1",
+            params: {
+              qwenThinkingFormat: "chat-template",
+              temperature: 0.2,
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm).toEqual({
+      baseUrl: "http://localhost:8000/v1",
+      params: { temperature: 0.2 },
+    });
+    expect(res.changes).toStrictEqual([
+      "Removed models.providers.vllm.params.qwenThinkingFormat; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.",
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen provider params using the default selected model", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: { primary: "vllm/Qwen/Qwen3-8B" },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            params: {
+              qwenThinkingFormat: "chat-template",
+              temperature: 0.2,
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.params).toEqual({ temperature: 0.2 });
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("preserves normalized vLLM provider keys when moving provider params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: { primary: "vllm/Qwen/Qwen3-8B" },
+        },
+      },
+      models: {
+        providers: {
+          VLLM: {
+            baseUrl: "http://localhost:8000/v1",
+            params: {
+              qwenThinkingFormat: "chat-template",
+              temperature: 0.2,
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm).toBeUndefined();
+    expect(res.config?.models?.providers?.VLLM).toEqual({
+      baseUrl: "http://localhost:8000/v1",
+      params: { temperature: 0.2 },
+      models: [
+        {
+          id: "Qwen/Qwen3-8B",
+          name: "Qwen/Qwen3-8B",
+          reasoning: true,
+          compat: { thinkingFormat: "qwen-chat-template" },
+        },
+      ],
+    });
+    expect(res.changes).toStrictEqual([
+      'Moved models.providers.vllm.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("strips auth profile suffixes when moving legacy vLLM Qwen params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: { primary: "vllm/Qwen/Qwen3-8B@local" },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            params: {
+              qwenThinkingFormat: "chat-template",
+            },
+          },
+        },
+      },
+    });
+
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen default agent params to the selected model compat row", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: { primary: "vllm/Qwen/Qwen3-8B" },
+          params: {
+            qwenThinkingFormat: "chat-template",
+            temperature: 0.2,
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.params).toEqual({ temperature: 0.2 });
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.defaults.params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("removes untargeted legacy vLLM Qwen default agent params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          params: {
+            qwenThinkingFormat: "chat-template",
+            temperature: 0.2,
+          },
+        },
+      },
+    });
+
+    expect(res.config?.agents?.defaults?.params).toEqual({ temperature: 0.2 });
+    expect(res.changes).toStrictEqual([
+      "Removed agents.defaults.params.qwenThinkingFormat; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.",
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen per-agent params to the agent model compat row", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        list: [
+          {
+            id: "local",
+            model: "vllm/Qwen/Qwen3-8B",
+            params: {
+              qwen_thinking_format: "enable_thinking",
+              temperature: 0.2,
+            },
+          },
+        ],
+      },
+    });
+
+    expect(res.config?.agents?.list?.[0]?.params).toEqual({ temperature: 0.2 });
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.list[0].params.qwen_thinking_format to models.providers.vllm.models[0].compat.thinkingFormat ("qwen").',
+    ]);
+  });
+
+  it("removes untargeted legacy vLLM Qwen per-agent params", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        list: [
+          {
+            id: "local",
+            params: {
+              qwen_thinking_format: "enable_thinking",
+              temperature: 0.2,
+            },
+          },
+        ],
+      },
+    });
+
+    expect(res.config?.agents?.list?.[0]?.params).toEqual({ temperature: 0.2 });
+    expect(res.changes).toStrictEqual([
+      "Removed agents.list[0].params.qwen_thinking_format; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.",
+    ]);
+  });
+
+  it("moves legacy vLLM Qwen per-agent params using the inherited default model", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          model: "vllm/Qwen/Qwen3-8B",
+        },
+        list: [
+          {
+            id: "local",
+            params: {
+              qwenThinkingFormat: "chat-template",
+            },
+          },
+        ],
+      },
+    });
+
+    expect(res.config?.agents?.list?.[0]).not.toHaveProperty("params");
+    expect(res.config?.models?.providers?.vllm?.models).toEqual([
+      {
+        id: "Qwen/Qwen3-8B",
+        name: "Qwen/Qwen3-8B",
+        reasoning: true,
+        compat: { thinkingFormat: "qwen-chat-template" },
+      },
+    ]);
+    expect(res.changes).toStrictEqual([
+      'Moved agents.list[0].params.qwenThinkingFormat to models.providers.vllm.models[0].compat.thinkingFormat ("qwen-chat-template").',
+    ]);
+  });
+
+  it("leaves legacy vLLM Qwen thinking params when the model compat row cannot be written", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+              },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: {
+            models: "malformed",
+          },
+        },
+      },
+    });
+
+    expect(res.config).toBeNull();
+    expect(res.changes).toStrictEqual([]);
+  });
+
+  it("leaves malformed vLLM provider ancestors untouched during legacy Qwen migration", () => {
+    const res = migrateLegacyConfigForTest({
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+              },
+            },
+          },
+        },
+      },
+      models: {
+        providers: {
+          vllm: "malformed",
+        },
+      },
+    });
+
+    expect(res.config).toBeNull();
+    expect(res.changes).toStrictEqual([]);
+  });
+
+  it("reports legacy vLLM Qwen thinking params before doctor fix", () => {
+    const raw = {
+      agents: {
+        defaults: {
+          models: {
+            "vllm/Qwen/Qwen3-8B": {
+              params: {
+                qwenThinkingFormat: "chat-template",
+              },
+            },
+          },
+        },
+      },
+    };
+
+    expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toContain(
+      "agents.defaults.models",
+    );
+  });
+
+  it("reports legacy vLLM Qwen thinking params from merged extra-param sources", () => {
+    const raw = {
+      agents: {
+        defaults: {
+          params: {
+            qwenThinkingFormat: "chat-template",
+          },
+        },
+        list: [
+          {
+            id: "local",
+            params: {
+              qwen_thinking_format: "enable_thinking",
+            },
+          },
+        ],
+      },
+    };
+
+    expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toEqual(
+      expect.arrayContaining(["agents.defaults.params", "agents"]),
+    );
+  });
+
+  it("reports legacy vLLM Qwen params from normalized provider keys", () => {
+    const raw = {
+      models: {
+        providers: {
+          VLLM: {
+            params: {
+              qwenThinkingFormat: "chat-template",
+            },
+          },
+        },
+      },
+    };
+
+    expect(findLegacyConfigIssues(raw).map((issue) => issue.path)).toContain("models.providers");
+  });
+
   it("preserves recognized model compat thinkingFormat values", () => {
     const res = migrateLegacyConfigForTest({
       models: {
diff --git a/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts b/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts
index bb360da569d..91a55962f4a 100644
--- a/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts
+++ b/src/commands/doctor/shared/legacy-config-migrations.runtime.models.ts
@@ -1,6 +1,8 @@
 import { splitTrailingAuthProfile } from "../../../agents/model-ref-profile.js";
+import { normalizeProviderId } from "../../../agents/provider-id.js";
 import {
   defineLegacyConfigMigration,
+  ensureRecord,
   getRecord,
   type LegacyConfigMigrationSpec,
   type LegacyConfigRule,
@@ -78,6 +80,394 @@ function hasInvalidThinkingFormat(providers: unknown): boolean {
   return false;
 }
 
+const LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS = [
+  "qwenThinkingFormat",
+  "qwen_thinking_format",
+] as const;
+
+function normalizeLegacyVllmQwenThinkingFormat(
+  value: unknown,
+): "qwen" | "qwen-chat-template" | undefined {
+  if (typeof value !== "string") {
+    return undefined;
+  }
+  const normalized = value
+    .trim()
+    .toLowerCase()
+    .replace(/[_\s]+/g, "-");
+  switch (normalized) {
+    case "chat-template":
+    case "chat-template-argument":
+    case "chat-template-arguments":
+    case "chat-template-kwarg":
+    case "chat-template-kwargs":
+    case "qwen-chat-template":
+      return "qwen-chat-template";
+    case "enable-thinking":
+    case "qwen":
+    case "request-body":
+    case "top-level":
+      return "qwen";
+    default:
+      return undefined;
+  }
+}
+
+function getLegacyVllmQwenThinkingFormat(params: Record<string, unknown>):
+  | {
+      key: (typeof LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS)[number];
+      value: unknown;
+      compat: "qwen" | "qwen-chat-template" | undefined;
+    }
+  | undefined {
+  for (const key of LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS) {
+    if (Object.prototype.hasOwnProperty.call(params, key)) {
+      return {
+        key,
+        value: params[key],
+        compat: normalizeLegacyVllmQwenThinkingFormat(params[key]),
+      };
+    }
+  }
+  return undefined;
+}
+
+function parseVllmAgentModelKey(key: string): string | undefined {
+  const trimmed = splitTrailingAuthProfile(key).model.trim();
+  const slashIndex = trimmed.indexOf("/");
+  if (slashIndex <= 0) {
+    return undefined;
+  }
+  const providerId = trimmed.slice(0, slashIndex);
+  if (normalizeProviderId(providerId) !== "vllm") {
+    return undefined;
+  }
+  const modelId = trimmed.slice(slashIndex + 1).trim();
+  return modelId && modelId !== "*" ? modelId : undefined;
+}
+
+function hasLegacyVllmQwenThinkingFormat(defaultModels: unknown): boolean {
+  const models = getRecord(defaultModels);
+  if (!models) {
+    return false;
+  }
+  for (const [key, entry] of Object.entries(models)) {
+    if (!parseVllmAgentModelKey(key)) {
+      continue;
+    }
+    const params = getRecord(getRecord(entry)?.params);
+    if (params && getLegacyVllmQwenThinkingFormat(params)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+function hasLegacyVllmQwenThinkingProviderParams(provider: unknown): boolean {
+  const params = getRecord(getRecord(provider)?.params);
+  return Boolean(params && getLegacyVllmQwenThinkingFormat(params));
+}
+
+function hasLegacyVllmQwenThinkingModelParams(provider: unknown): boolean {
+  const models = getRecord(provider)?.models;
+  if (!Array.isArray(models)) {
+    return false;
+  }
+  return models.some((model) => {
+    const params = getRecord(getRecord(model)?.params);
+    return Boolean(params && getLegacyVllmQwenThinkingFormat(params));
+  });
+}
+
+function hasLegacyVllmQwenThinkingParams(params: unknown): boolean {
+  const record = getRecord(params);
+  return Boolean(record && getLegacyVllmQwenThinkingFormat(record));
+}
+
+function hasLegacyVllmQwenThinkingAgentParams(agents: unknown): boolean {
+  const list = getRecord(agents)?.list;
+  if (!Array.isArray(list)) {
+    return false;
+  }
+  return list.some((agent) => hasLegacyVllmQwenThinkingParams(getRecord(agent)?.params));
+}
+
+function findOrCreateVllmModelEntry(
+  raw: Record<string, unknown>,
+  modelId: string,
+): { model: Record<string, unknown>; index: number } | undefined {
+  const modelsRoot = getOrCreateRecord(raw, "models");
+  const providers = modelsRoot ? getOrCreateRecord(modelsRoot, "providers") : undefined;
+  const vllm = providers ? getOrCreateVllmProvider(providers) : undefined;
+  if (!vllm) {
+    return undefined;
+  }
+  if (vllm.models !== undefined && !Array.isArray(vllm.models)) {
+    return undefined;
+  }
+
+  const models = Array.isArray(vllm.models) ? vllm.models : [];
+  vllm.models = models;
+  const providerModelId = `vllm/${modelId}`;
+  for (const [index, model] of models.entries()) {
+    const record = getRecord(model);
+    if (record?.id === modelId || record?.id === providerModelId) {
+      return { model: record, index };
+    }
+  }
+
+  const model = { id: modelId, name: modelId };
+  models.push(model);
+  return { model, index: models.length - 1 };
+}
+
+function listExistingVllmModelTargets(
+  raw: Record<string, unknown>,
+): Array<{ model: Record<string, unknown>; index: number }> {
+  const models = findVllmProvider(getRecord(getRecord(raw.models)?.providers))?.models;
+  if (!Array.isArray(models)) {
+    return [];
+  }
+  return models.flatMap((model, index) => {
+    const record = getRecord(model);
+    return record ? [{ model: record, index }] : [];
+  });
+}
+
+function collectVllmModelIdsFromSelection(value: unknown): string[] {
+  if (typeof value === "string") {
+    const modelId = parseVllmAgentModelKey(value);
+    return modelId ? [modelId] : [];
+  }
+  const record = getRecord(value);
+  if (!record) {
+    return [];
+  }
+  const ids: string[] = [];
+  if (typeof record.primary === "string") {
+    const primary = parseVllmAgentModelKey(record.primary);
+    if (primary) {
+      ids.push(primary);
+    }
+  }
+  if (Array.isArray(record.fallbacks)) {
+    for (const fallback of record.fallbacks) {
+      if (typeof fallback !== "string") {
+        continue;
+      }
+      const modelId = parseVllmAgentModelKey(fallback);
+      if (modelId) {
+        ids.push(modelId);
+      }
+    }
+  }
+  return ids;
+}
+
+function collectVllmModelIdsFromAgentModelMap(value: unknown): string[] {
+  const models = getRecord(value);
+  if (!models) {
+    return [];
+  }
+  return Object.keys(models).flatMap((key) => {
+    const modelId = parseVllmAgentModelKey(key);
+    return modelId ? [modelId] : [];
+  });
+}
+
+function createVllmModelTargets(
+  raw: Record<string, unknown>,
+  modelIds: string[],
+): Array<{ model: Record<string, unknown>; index: number }> {
+  const targets: Array<{ model: Record<string, unknown>; index: number }> = [];
+  const seen = new Set<Record<string, unknown>>();
+  for (const modelId of modelIds) {
+    const target = findOrCreateVllmModelEntry(raw, modelId);
+    if (!target || seen.has(target.model)) {
+      continue;
+    }
+    seen.add(target.model);
+    targets.push(target);
+  }
+  return targets;
+}
+
+function combineVllmModelTargets(
+  ...groups: Array<Array<{ model: Record<string, unknown>; index: number }>>
+): Array<{ model: Record<string, unknown>; index: number }> {
+  const targets: Array<{ model: Record<string, unknown>; index: number }> = [];
+  const seen = new Set<Record<string, unknown>>();
+  for (const group of groups) {
+    for (const target of group) {
+      if (seen.has(target.model)) {
+        continue;
+      }
+      seen.add(target.model);
+      targets.push(target);
+    }
+  }
+  return targets;
+}
+
+function collectVllmModelIdsFromAgentList(value: unknown): string[] {
+  if (!Array.isArray(value)) {
+    return [];
+  }
+  return value.flatMap((agent) => {
+    const record = getRecord(agent);
+    return record
+      ? [
+          ...collectVllmModelIdsFromSelection(record.model),
+          ...collectVllmModelIdsFromAgentModelMap(record.models),
+        ]
+      : [];
+  });
+}
+
+function getOrCreateRecord(
+  root: Record<string, unknown>,
+  key: string,
+): Record<string, unknown> | undefined {
+  if (root[key] === undefined) {
+    const next: Record<string, unknown> = {};
+    root[key] = next;
+    return next;
+  }
+  return getRecord(root[key]) ?? undefined;
+}
+
+function findVllmProvider(
+  providers: Record<string, unknown> | null | undefined,
+): Record<string, unknown> | undefined {
+  if (!providers) {
+    return undefined;
+  }
+  const key = Object.keys(providers).find((entry) => normalizeProviderId(entry) === "vllm");
+  return key ? (getRecord(providers[key]) ?? undefined) : undefined;
+}
+
+function getOrCreateVllmProvider(
+  providers: Record<string, unknown>,
+): Record<string, unknown> | undefined {
+  const key = Object.keys(providers).find((entry) => normalizeProviderId(entry) === "vllm");
+  if (key) {
+    return getRecord(providers[key]) ?? undefined;
+  }
+  return getOrCreateRecord(providers, "vllm");
+}
+
+function hasLegacyVllmQwenThinkingNormalizedProvider(providers: unknown): boolean {
+  const providersRecord = getRecord(providers);
+  if (!providersRecord || getRecord(providersRecord.vllm)) {
+    return false;
+  }
+  const vllmProvider = findVllmProvider(providersRecord);
+  return (
+    hasLegacyVllmQwenThinkingProviderParams(vllmProvider) ||
+    hasLegacyVllmQwenThinkingModelParams(vllmProvider)
+  );
+}
+
+function preserveMigratedVllmQwenReasoning(model: Record<string, unknown>): void {
+  if (model.reasoning === undefined) {
+    model.reasoning = true;
+  }
+}
+
+function removeLegacyVllmQwenThinkingParams(params: Record<string, unknown>): void {
+  for (const key of LEGACY_VLLM_QWEN_THINKING_FORMAT_KEYS) {
+    delete params[key];
+  }
+}
+
+function applyLegacyVllmQwenThinkingFormat(params: {
+  sourcePath: string;
+  legacyParams: Record<string, unknown>;
+  target: { model: Record<string, unknown>; index: number };
+  legacyFormat: NonNullable<ReturnType<typeof getLegacyVllmQwenThinkingFormat>>;
+  changes: string[];
+}): boolean {
+  if (!params.legacyFormat.compat) {
+    removeLegacyVllmQwenThinkingParams(params.legacyParams);
+    params.changes.push(
+      `Removed ${params.sourcePath}.${params.legacyFormat.key} (unrecognized value ${JSON.stringify(params.legacyFormat.value)}; configure models.providers.vllm.models[].compat.thinkingFormat if needed).`,
+    );
+    return true;
+  }
+
+  preserveMigratedVllmQwenReasoning(params.target.model);
+  const compat = ensureRecord(params.target.model, "compat");
+  const currentThinkingFormat = compat.thinkingFormat;
+  if (typeof currentThinkingFormat === "string" && isModelThinkingFormat(currentThinkingFormat)) {
+    removeLegacyVllmQwenThinkingParams(params.legacyParams);
+    params.changes.push(
+      `Removed ${params.sourcePath}.${params.legacyFormat.key}; models.providers.vllm.models[${params.target.index}].compat.thinkingFormat is already ${JSON.stringify(currentThinkingFormat)}.`,
+    );
+    return true;
+  }
+
+  compat.thinkingFormat = params.legacyFormat.compat;
+  removeLegacyVllmQwenThinkingParams(params.legacyParams);
+  params.changes.push(
+    `Moved ${params.sourcePath}.${params.legacyFormat.key} to models.providers.vllm.models[${params.target.index}].compat.thinkingFormat (${JSON.stringify(params.legacyFormat.compat)}).`,
+  );
+  return true;
+}
+
+function removeUntargetedLegacyVllmQwenThinkingFormat(params: {
+  sourcePath: string;
+  legacyParams: Record<string, unknown>;
+  legacyFormat: NonNullable<ReturnType<typeof getLegacyVllmQwenThinkingFormat>>;
+  changes: string[];
+}): void {
+  removeLegacyVllmQwenThinkingParams(params.legacyParams);
+  params.changes.push(
+    `Removed ${params.sourcePath}.${params.legacyFormat.key}; no concrete vLLM model row or agent model ref exists, so configure models.providers.vllm.models[].compat.thinkingFormat on each Qwen model that needs it.`,
+  );
+}
+
+const LEGACY_VLLM_QWEN_AGENT_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["agents", "defaults", "models"],
+  message:
+    'agents.defaults.models.<vllm-model>.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingFormat(value),
+};
+
+const LEGACY_VLLM_QWEN_PROVIDER_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["models", "providers", "vllm", "params"],
+  message:
+    'models.providers.vllm.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingProviderParams({ params: value }),
+};
+
+const LEGACY_VLLM_QWEN_PROVIDER_MODEL_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["models", "providers", "vllm", "models"],
+  message:
+    'models.providers.vllm.models[*].params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingModelParams({ models: value }),
+};
+
+const LEGACY_VLLM_QWEN_NORMALIZED_PROVIDER_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["models", "providers"],
+  message:
+    'models.providers.<vllm>.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.<vllm>.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingNormalizedProvider(value),
+};
+
+const LEGACY_VLLM_QWEN_DEFAULT_PARAMS_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["agents", "defaults", "params"],
+  message:
+    'agents.defaults.params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingParams(value),
+};
+
+const LEGACY_VLLM_QWEN_AGENT_PARAMS_THINKING_FORMAT_RULE: LegacyConfigRule = {
+  path: ["agents"],
+  message:
+    'agents.list[].params.qwenThinkingFormat is legacy; run "openclaw doctor --fix" to move it to models.providers.vllm.models[].compat.thinkingFormat.',
+  match: (value) => hasLegacyVllmQwenThinkingAgentParams(value),
+};
+
 const INVALID_THINKING_FORMAT_RULE: LegacyConfigRule = {
   path: ["models", "providers"],
   message:
@@ -559,6 +949,201 @@ export const LEGACY_CONFIG_MIGRATIONS_RUNTIME_MODELS: LegacyConfigMigrationSpec[
       Object.assign(raw, rewritten.value);
     },
   }),
+  defineLegacyConfigMigration({
+    id: "agents.defaults.models.vllm.params.qwenThinkingFormat->models.providers.vllm.models.compat.thinkingFormat",
+    describe: "Move legacy vLLM Qwen thinking params to model compat metadata",
+    legacyRules: [
+      LEGACY_VLLM_QWEN_AGENT_THINKING_FORMAT_RULE,
+      LEGACY_VLLM_QWEN_PROVIDER_THINKING_FORMAT_RULE,
+      LEGACY_VLLM_QWEN_PROVIDER_MODEL_THINKING_FORMAT_RULE,
+      LEGACY_VLLM_QWEN_NORMALIZED_PROVIDER_THINKING_FORMAT_RULE,
+      LEGACY_VLLM_QWEN_DEFAULT_PARAMS_THINKING_FORMAT_RULE,
+      LEGACY_VLLM_QWEN_AGENT_PARAMS_THINKING_FORMAT_RULE,
+    ],
+    apply: (raw, changes) => {
+      const agentsDefaults = getRecord(getRecord(raw.agents)?.defaults);
+      const defaultModels = getRecord(agentsDefaults?.models);
+      if (defaultModels) {
+        for (const [key, entry] of Object.entries(defaultModels)) {
+          const modelId = parseVllmAgentModelKey(key);
+          const entryRecord = getRecord(entry);
+          const params = getRecord(entryRecord?.params);
+          if (!modelId || !entryRecord || !params) {
+            continue;
+          }
+
+          const legacyFormat = getLegacyVllmQwenThinkingFormat(params);
+          if (!legacyFormat) {
+            continue;
+          }
+
+          const target = legacyFormat.compat ? findOrCreateVllmModelEntry(raw, modelId) : undefined;
+          if (legacyFormat.compat && !target) {
+            continue;
+          }
+          applyLegacyVllmQwenThinkingFormat({
+            sourcePath: `agents.defaults.models.${JSON.stringify(key)}.params`,
+            legacyParams: params,
+            target: target ?? { model: {}, index: -1 },
+            legacyFormat,
+            changes,
+          });
+          if (Object.keys(params).length === 0) {
+            delete entryRecord.params;
+          }
+        }
+      }
+
+      const vllmProvider = findVllmProvider(getRecord(getRecord(raw.models)?.providers));
+      const vllmModels = vllmProvider?.models;
+      if (Array.isArray(vllmModels)) {
+        for (const [index, model] of vllmModels.entries()) {
+          const modelRecord = getRecord(model);
+          const params = getRecord(modelRecord?.params);
+          if (!modelRecord || !params) {
+            continue;
+          }
+          const legacyFormat = getLegacyVllmQwenThinkingFormat(params);
+          if (!legacyFormat) {
+            continue;
+          }
+          applyLegacyVllmQwenThinkingFormat({
+            sourcePath: `models.providers.vllm.models[${index}].params`,
+            legacyParams: params,
+            target: { model: modelRecord, index },
+            legacyFormat,
+            changes,
+          });
+          if (Object.keys(params).length === 0) {
+            delete modelRecord.params;
+          }
+        }
+      }
+
+      const providerParams = getRecord(vllmProvider?.params);
+      if (providerParams) {
+        const providerLegacyFormat = getLegacyVllmQwenThinkingFormat(providerParams);
+        if (providerLegacyFormat) {
+          const providerModelIds = [
+            ...collectVllmModelIdsFromSelection(agentsDefaults?.model),
+            ...collectVllmModelIdsFromAgentModelMap(defaultModels),
+            ...collectVllmModelIdsFromAgentList(getRecord(raw.agents)?.list),
+          ];
+          const targets = combineVllmModelTargets(
+            listExistingVllmModelTargets(raw),
+            createVllmModelTargets(raw, providerModelIds),
+          );
+          if (targets.length === 0) {
+            removeUntargetedLegacyVllmQwenThinkingFormat({
+              sourcePath: "models.providers.vllm.params",
+              legacyParams: providerParams,
+              legacyFormat: providerLegacyFormat,
+              changes,
+            });
+          } else {
+            for (const target of targets) {
+              applyLegacyVllmQwenThinkingFormat({
+                sourcePath: "models.providers.vllm.params",
+                legacyParams: providerParams,
+                target,
+                legacyFormat: providerLegacyFormat,
+                changes,
+              });
+            }
+          }
+          if (Object.keys(providerParams).length === 0) {
+            delete vllmProvider?.params;
+          }
+        }
+      }
+
+      const defaultParams = getRecord(agentsDefaults?.params);
+      if (defaultParams) {
+        const defaultLegacyFormat = getLegacyVllmQwenThinkingFormat(defaultParams);
+        if (defaultLegacyFormat) {
+          const defaultModelIds = [
+            ...collectVllmModelIdsFromSelection(agentsDefaults?.model),
+            ...collectVllmModelIdsFromAgentModelMap(defaultModels),
+          ];
+          const targets =
+            defaultModelIds.length > 0
+              ? createVllmModelTargets(raw, defaultModelIds)
+              : listExistingVllmModelTargets(raw);
+          if (targets.length === 0) {
+            removeUntargetedLegacyVllmQwenThinkingFormat({
+              sourcePath: "agents.defaults.params",
+              legacyParams: defaultParams,
+              legacyFormat: defaultLegacyFormat,
+              changes,
+            });
+          } else {
+            for (const target of targets) {
+              applyLegacyVllmQwenThinkingFormat({
+                sourcePath: "agents.defaults.params",
+                legacyParams: defaultParams,
+                target,
+                legacyFormat: defaultLegacyFormat,
+                changes,
+              });
+            }
+          }
+          if (Object.keys(defaultParams).length === 0) {
+            delete agentsDefaults?.params;
+          }
+        }
+      }
+
+      const agentList = getRecord(raw.agents)?.list;
+      if (!Array.isArray(agentList)) {
+        return;
+      }
+      for (const [index, agent] of agentList.entries()) {
+        const agentRecord = getRecord(agent);
+        const agentParams = getRecord(agentRecord?.params);
+        const agentLegacyFormat = agentParams
+          ? getLegacyVllmQwenThinkingFormat(agentParams)
+          : undefined;
+        if (!agentRecord || !agentParams || !agentLegacyFormat) {
+          continue;
+        }
+        const explicitAgentModelIds = [
+          ...collectVllmModelIdsFromSelection(agentRecord.model),
+          ...collectVllmModelIdsFromAgentModelMap(agentRecord.models),
+        ];
+        const inheritedDefaultModelIds = [
+          ...collectVllmModelIdsFromSelection(agentsDefaults?.model),
+          ...collectVllmModelIdsFromAgentModelMap(defaultModels),
+        ];
+        const agentModelIds =
+          explicitAgentModelIds.length > 0 ? explicitAgentModelIds : inheritedDefaultModelIds;
+        const targets =
+          agentModelIds.length > 0
+            ? createVllmModelTargets(raw, agentModelIds)
+            : listExistingVllmModelTargets(raw);
+        if (targets.length === 0) {
+          removeUntargetedLegacyVllmQwenThinkingFormat({
+            sourcePath: `agents.list[${index}].params`,
+            legacyParams: agentParams,
+            legacyFormat: agentLegacyFormat,
+            changes,
+          });
+        } else {
+          for (const target of targets) {
+            applyLegacyVllmQwenThinkingFormat({
+              sourcePath: `agents.list[${index}].params`,
+              legacyParams: agentParams,
+              target,
+              legacyFormat: agentLegacyFormat,
+              changes,
+            });
+          }
+        }
+        if (Object.keys(agentParams).length === 0) {
+          delete agentRecord.params;
+        }
+      }
+    },
+  }),
   defineLegacyConfigMigration({
     id: "models.providers.*.models.*.compat.thinkingFormat-invalid",
     describe: "Remove unrecognized compat.thinkingFormat values from provider model entries",
diff --git a/src/gateway/server-methods/models.test.ts b/src/gateway/server-methods/models.test.ts
index aa95bffc01d..762520a83dc 100644
--- a/src/gateway/server-methods/models.test.ts
+++ b/src/gateway/server-methods/models.test.ts
@@ -128,6 +128,44 @@ describe("models.list", () => {
     }
   });
 
+  it("does not expose runtime params from catalog rows", async () => {
+    const respond = vi.fn();
+    await modelsHandlers["models.list"]({
+      req: {
+        type: "req",
+        id: "req-models-list-redact-params",
+        method: "models.list",
+        params: { view: "all" },
+      },
+      params: { view: "all" },
+      respond,
+      client: null,
+      isWebchatConnect: () => false,
+      context: {
+        getRuntimeConfig: () => ({}) as OpenClawConfig,
+        loadGatewayModelCatalog: vi.fn(() =>
+          Promise.resolve([
+            {
+              id: "qwen-local",
+              name: "Qwen Local",
+              provider: "vllm",
+              params: { qwenThinkingFormat: "chat-template" },
+            },
+          ]),
+        ),
+        logGateway: {
+          debug: vi.fn(),
+        },
+      } as never,
+    });
+
+    expect(respond).toHaveBeenCalledWith(
+      true,
+      { models: [{ id: "qwen-local", name: "Qwen Local", provider: "vllm" }] },
+      undefined,
+    );
+  });
+
   it("loads the full catalog for provider-scoped configured view and filters only providers", async () => {
     const catalog = [
       { id: "claude-test", name: "Claude Test", provider: "anthropic" },
diff --git a/src/gateway/server-methods/models.ts b/src/gateway/server-methods/models.ts
index 2a6dbdd82e6..2412f272c00 100644
--- a/src/gateway/server-methods/models.ts
+++ b/src/gateway/server-methods/models.ts
@@ -5,6 +5,7 @@ import {
   type ModelCatalogBrowseView,
 } from "../../agents/model-catalog-browse.js";
 import { resolveVisibleModelCatalog } from "../../agents/model-catalog-visibility.js";
+import type { ModelCatalogEntry } from "../../agents/model-catalog.types.js";
 import { resolveDefaultAgentWorkspaceDir } from "../../agents/workspace.js";
 import {
   ErrorCodes,
@@ -22,6 +23,17 @@ function resolveModelsListView(params: Record<string, unknown>): ModelsListView
   return typeof params.view === "string" ? (params.view as ModelsListView) : "default";
 }
 
+function omitRuntimeModelParams(entry: ModelCatalogEntry): ModelCatalogEntry {
+  const { params: _params, ...rest } = entry as ModelCatalogEntry & {
+    params?: Record<string, unknown>;
+  };
+  return rest;
+}
+
+function omitRuntimeModelParamsFromCatalog(catalog: ModelCatalogEntry[]): ModelCatalogEntry[] {
+  return catalog.map(omitRuntimeModelParams);
+}
+
 export const modelsHandlers: GatewayRequestHandlers = {
   "models.list": async ({ params, respond, context }) => {
     if (!validateModelsListParams(params)) {
@@ -56,7 +68,7 @@ export const modelsHandlers: GatewayRequestHandlers = {
         },
       });
       if (view === "all") {
-        respond(true, { models: catalog }, undefined);
+        respond(true, { models: omitRuntimeModelParamsFromCatalog(catalog) }, undefined);
         return;
       }
       const models = await resolveVisibleModelCatalog({
@@ -67,7 +79,7 @@ export const modelsHandlers: GatewayRequestHandlers = {
         view,
         runtimeAuthDiscovery: false,
       });
-      respond(true, { models }, undefined);
+      respond(true, { models: omitRuntimeModelParamsFromCatalog(models) }, undefined);
     } catch (err) {
       respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err)));
     }
diff --git a/src/plugins/provider-thinking.types.ts b/src/plugins/provider-thinking.types.ts
index eab175e4608..f9bc40ee61f 100644
--- a/src/plugins/provider-thinking.types.ts
+++ b/src/plugins/provider-thinking.types.ts
@@ -10,15 +10,25 @@ export type ProviderThinkingPolicyContext = {
   modelId: string;
 };
 
+export type ProviderThinkingModelCompat = {
+  thinkingFormat?: string;
+  supportedReasoningEfforts?: readonly string[] | null;
+};
+
 /**
  * Provider-owned default thinking policy input.
  *
  * `reasoning` is the merged catalog hint for the selected model when one is
  * available. Providers can use it to keep "reasoning model => low" behavior
  * without re-reading the catalog themselves.
+ *
+ * `compat` carries model-level request contract facts for the selected model
+ * when available. Providers can use it to expose model-specific thinking
+ * profiles only when the configured payload style supports them.
  */
 export type ProviderDefaultThinkingPolicyContext = ProviderThinkingPolicyContext & {
   reasoning?: boolean;
+  compat?: ProviderThinkingModelCompat | null;
 };
 
 export type ProviderThinkingLevelId =