diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md
index 591af8b2be4..f61ab79baf5 100644
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@@ -660,7 +660,7 @@ Example (OpenAI‑compatible):
     - For `api: "openai-completions"` on non-native endpoints (any non-empty `baseUrl` whose host is not `api.openai.com`), OpenClaw forces `compat.supportsDeveloperRole: false` to avoid provider 400 errors for unsupported `developer` roles.
     - Proxy-style OpenAI-compatible routes also skip native OpenAI-only request shaping: no `service_tier`, no Responses `store`, no Completions `store`, no prompt-cache hints, no OpenAI reasoning-compat payload shaping, and no hidden OpenClaw attribution headers.
     - For OpenAI-compatible Completions proxies that need vendor-specific fields, set `agents.defaults.models["provider/model"].params.extra_body` (or `extraBody`) to merge extra JSON into the outbound request body.
-    - For vLLM chat-template controls, set `agents.defaults.models["provider/model"].params.chat_template_kwargs`. OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true` for `vllm/nemotron-3-*` when the session thinking level is off.
+    - For vLLM chat-template controls, set `agents.defaults.models["provider/model"].params.chat_template_kwargs`. The bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true` for `vllm/nemotron-3-*` when the session thinking level is off.
     - For slow local models or remote LAN/tailnet hosts, set `models.providers.<id>.timeoutSeconds`. This extends provider model HTTP request handling, including connect, headers, body streaming, and the total guarded-fetch abort, without increasing the whole agent runtime timeout.
     - If `baseUrl` is empty/omitted, OpenClaw keeps the default OpenAI behavior (which resolves to `api.openai.com`).
     - For safety, an explicit `compat.supportsDeveloperRole: true` is still overridden on non-native `openai-completions` endpoints.
diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md
index 8e55d906d8b..dee298aedd6 100644
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference).
 - `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`).
 - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
 - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
-- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry.
+- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry.
 - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
 - `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
 - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md
index 0d7b9bc61b7..e89457c0641 100644
--- a/docs/providers/vllm.md
+++ b/docs/providers/vllm.md
@@ -153,7 +153,7 @@ Use explicit config when:
   <Accordion title="Nemotron 3 thinking controls">
     vLLM/Nemotron 3 can use chat-template kwargs to control whether reasoning is
     returned as hidden reasoning or visible answer text. When an OpenClaw session
-    uses `vllm/nemotron-3-*` with thinking off, OpenClaw sends:
+    uses `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin sends:
 
     ```json
     {
diff --git a/extensions/qwen/stream.ts b/extensions/qwen/stream.ts
index 428a0d2c184..c3ec46f91d7 100644
--- a/extensions/qwen/stream.ts
+++ b/extensions/qwen/stream.ts
@@ -1,7 +1,10 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry";
 import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
-import { createPayloadPatchStreamWrapper } from "openclaw/plugin-sdk/provider-stream-shared";
+import {
+  createPayloadPatchStreamWrapper,
+  isOpenAICompatibleThinkingEnabled,
+} from "openclaw/plugin-sdk/provider-stream-shared";
 
 type QwenThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
 
@@ -15,19 +18,6 @@ function isQwenProviderId(providerId: string): boolean {
   );
 }
 
-function resolveOpenAICompatibleThinkingEnabled(params: {
-  thinkingLevel: QwenThinkingLevel;
-  options: Parameters<StreamFn>[2];
-}): boolean {
-  const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown };
-  const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high";
-  if (typeof raw !== "string") {
-    return true;
-  }
-  const normalized = raw.trim().toLowerCase();
-  return normalized !== "off" && normalized !== "none";
-}
-
 export function createQwenThinkingWrapper(
   baseStreamFn: StreamFn | undefined,
   thinkingLevel: QwenThinkingLevel,
@@ -35,7 +25,7 @@ export function createQwenThinkingWrapper(
   return createPayloadPatchStreamWrapper(
     baseStreamFn,
     ({ payload: payloadObj, options }) => {
-      const enableThinking = resolveOpenAICompatibleThinkingEnabled({ thinkingLevel, options });
+      const enableThinking = isOpenAICompatibleThinkingEnabled({ thinkingLevel, options });
       payloadObj.enable_thinking = enableThinking;
       delete payloadObj.reasoning_effort;
       delete payloadObj.reasoningEffort;
diff --git a/extensions/vllm/stream.test.ts b/extensions/vllm/stream.test.ts
index 73352acbf81..99b26c9e439 100644
--- a/extensions/vllm/stream.test.ts
+++ b/extensions/vllm/stream.test.ts
@@ -1,7 +1,11 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import type { Context, Model } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
-import { createVllmQwenThinkingWrapper, wrapVllmProviderStream } from "./stream.js";
+import {
+  createVllmProviderThinkingWrapper,
+  createVllmQwenThinkingWrapper,
+  wrapVllmProviderStream,
+} from "./stream.js";
 
 function capturePayload(params: {
   format: "chat-template" | "top-level";
@@ -105,6 +109,80 @@ describe("createVllmQwenThinkingWrapper", () => {
   });
 });
 
+describe("createVllmProviderThinkingWrapper", () => {
+  function captureProviderPayload(params: {
+    thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max";
+    initialPayload?: Record<string, unknown>;
+    model?: Partial<Model<"openai-completions">>;
+  }): Record<string, unknown> {
+    let captured: Record<string, unknown> = {};
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      const payload = { ...params.initialPayload };
+      options?.onPayload?.(payload, _model);
+      captured = payload;
+      return {} as ReturnType<StreamFn>;
+    };
+
+    const wrapped = createVllmProviderThinkingWrapper({
+      baseStreamFn,
+      thinkingLevel: params.thinkingLevel ?? "high",
+    });
+    void wrapped(
+      {
+        api: "openai-completions",
+        provider: "vllm",
+        id: "nemotron-3-super",
+        reasoning: true,
+        ...params.model,
+      } as Model<"openai-completions">,
+      { messages: [] } as Context,
+      {},
+    );
+
+    return captured;
+  }
+
+  it("injects Nemotron 3 chat-template kwargs when thinking is off", () => {
+    expect(captureProviderPayload({ thinkingLevel: "off" })).toEqual({
+      chat_template_kwargs: {
+        enable_thinking: false,
+        force_nonempty_content: true,
+      },
+    });
+  });
+
+  it("does not inject Nemotron 3 chat-template kwargs when thinking is enabled", () => {
+    expect(captureProviderPayload({ thinkingLevel: "low" })).toEqual({});
+  });
+
+  it("preserves existing Nemotron 3 chat-template kwargs over defaults", () => {
+    expect(
+      captureProviderPayload({
+        thinkingLevel: "off",
+        initialPayload: {
+          chat_template_kwargs: {
+            enable_thinking: true,
+          },
+        },
+      }),
+    ).toEqual({
+      chat_template_kwargs: {
+        enable_thinking: true,
+        force_nonempty_content: true,
+      },
+    });
+  });
+
+  it("skips non-Nemotron vLLM models", () => {
+    expect(
+      captureProviderPayload({
+        thinkingLevel: "off",
+        model: { id: "Qwen/Qwen3-8B" },
+      }),
+    ).toEqual({});
+  });
+});
+
 describe("wrapVllmProviderStream", () => {
   it("registers when vLLM Qwen thinking format params are configured", () => {
     expect(
@@ -167,4 +245,36 @@ describe("wrapVllmProviderStream", () => {
       } as never),
     ).toBeUndefined();
   });
+
+  it("registers for vLLM Nemotron when thinking is off", () => {
+    expect(
+      wrapVllmProviderStream({
+        provider: "vllm",
+        modelId: "nemotron-3-super",
+        extraParams: {},
+        thinkingLevel: "off",
+        model: {
+          api: "openai-completions",
+          provider: "vllm",
+          id: "nemotron-3-super",
+        } as Model<"openai-completions">,
+        streamFn: undefined,
+      } as never),
+    ).toBeTypeOf("function");
+
+    expect(
+      wrapVllmProviderStream({
+        provider: "vllm",
+        modelId: "nemotron-3-super",
+        extraParams: {},
+        thinkingLevel: "low",
+        model: {
+          api: "openai-completions",
+          provider: "vllm",
+          id: "nemotron-3-super",
+        } as Model<"openai-completions">,
+        streamFn: undefined,
+      } as never),
+    ).toBeUndefined();
+  });
 });
diff --git a/extensions/vllm/stream.ts b/extensions/vllm/stream.ts
index 15050d73e4d..19f5392ea1f 100644
--- a/extensions/vllm/stream.ts
+++ b/extensions/vllm/stream.ts
@@ -1,7 +1,10 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry";
 import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
-import { createPayloadPatchStreamWrapper } from "openclaw/plugin-sdk/provider-stream-shared";
+import {
+  createPayloadPatchStreamWrapper,
+  isOpenAICompatibleThinkingEnabled,
+} from "openclaw/plugin-sdk/provider-stream-shared";
 
 type VllmThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
 type VllmQwenThinkingFormat = "chat-template" | "top-level";
@@ -41,19 +44,6 @@ function resolveVllmQwenThinkingFormat(
   );
 }
 
-function resolveOpenAICompatibleThinkingEnabled(params: {
-  thinkingLevel: VllmThinkingLevel;
-  options: Parameters<StreamFn>[2];
-}): boolean {
-  const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown };
-  const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high";
-  if (typeof raw !== "string") {
-    return true;
-  }
-  const normalized = raw.trim().toLowerCase();
-  return normalized !== "off" && normalized !== "none";
-}
-
 function setQwenChatTemplateThinking(payload: Record<string, unknown>, enabled: boolean): void {
   const existing = payload.chat_template_kwargs;
   if (existing && typeof existing === "object" && !Array.isArray(existing)) {
@@ -73,6 +63,31 @@ function setQwenChatTemplateThinking(payload: Record<string, unknown>, enabled:
   };
 }
 
+function isVllmNemotronModel(model: { api?: unknown; provider?: unknown; id?: unknown }): boolean {
+  return (
+    model.api === "openai-completions" &&
+    typeof model.provider === "string" &&
+    normalizeProviderId(model.provider) === "vllm" &&
+    typeof model.id === "string" &&
+    /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(model.id)
+  );
+}
+
+function setNemotronThinkingOffChatTemplateKwargs(payload: Record<string, unknown>): void {
+  const defaults = {
+    enable_thinking: false,
+    force_nonempty_content: true,
+  };
+  const existing = payload.chat_template_kwargs;
+  payload.chat_template_kwargs =
+    existing && typeof existing === "object" && !Array.isArray(existing)
+      ? {
+          ...defaults,
+          ...(existing as Record<string, unknown>),
+        }
+      : defaults;
+}
+
 export function createVllmQwenThinkingWrapper(params: {
   baseStreamFn: StreamFn | undefined;
   format: VllmQwenThinkingFormat;
@@ -81,7 +96,7 @@ export function createVllmQwenThinkingWrapper(params: {
   return createPayloadPatchStreamWrapper(
     params.baseStreamFn,
     ({ payload: payloadObj, options }) => {
-      const enableThinking = resolveOpenAICompatibleThinkingEnabled({
+      const enableThinking = isOpenAICompatibleThinkingEnabled({
         thinkingLevel: params.thinkingLevel,
         options,
       });
@@ -100,17 +115,50 @@ export function createVllmQwenThinkingWrapper(params: {
   );
 }
 
+export function createVllmProviderThinkingWrapper(params: {
+  baseStreamFn: StreamFn | undefined;
+  qwenFormat?: VllmQwenThinkingFormat;
+  thinkingLevel: VllmThinkingLevel;
+}): StreamFn {
+  const qwenWrapped = params.qwenFormat
+    ? createVllmQwenThinkingWrapper({
+        baseStreamFn: params.baseStreamFn,
+        format: params.qwenFormat,
+        thinkingLevel: params.thinkingLevel,
+      })
+    : params.baseStreamFn;
+  return createPayloadPatchStreamWrapper(
+    qwenWrapped,
+    ({ payload: payloadObj }) => {
+      setNemotronThinkingOffChatTemplateKwargs(payloadObj);
+    },
+    {
+      shouldPatch: ({ model }) =>
+        model.api === "openai-completions" &&
+        params.thinkingLevel === "off" &&
+        isVllmNemotronModel(model),
+    },
+  );
+}
+
 export function wrapVllmProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined {
   if (!isVllmProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) {
     return undefined;
   }
-  const format = resolveVllmQwenThinkingFormat(ctx.extraParams);
-  if (!format) {
+  const qwenFormat = resolveVllmQwenThinkingFormat(ctx.extraParams);
+  const shouldHandleNemotron =
+    ctx.thinkingLevel === "off" &&
+    isVllmNemotronModel({
+      api: "openai-completions",
+      provider: ctx.provider,
+      id: ctx.modelId,
+    });
+  if (!qwenFormat && !shouldHandleNemotron) {
     return undefined;
   }
-  return createVllmQwenThinkingWrapper({
+  return createVllmProviderThinkingWrapper({
     baseStreamFn: ctx.streamFn,
-    format,
+    qwenFormat,
     thinkingLevel: ctx.thinkingLevel,
   });
 }
diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts
index 044738e0a5c..22852257130 100644
--- a/src/agents/pi-embedded-runner-extraparams.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.test.ts
@@ -894,85 +894,6 @@ describe("applyExtraParamsToAgent", () => {
     });
   });
 
-  it("injects vLLM Nemotron chat_template_kwargs when thinking is off", () => {
-    const payload = runResponsesPayloadMutationCase({
-      applyProvider: "vllm",
-      applyModelId: "nemotron-3-super",
-      model: {
-        api: "openai-completions",
-        provider: "vllm",
-        id: "nemotron-3-super",
-        baseUrl: "http://127.0.0.1:8000/v1",
-      } as Model<"openai-completions">,
-      payload: {
-        messages: [],
-      },
-      thinkingLevel: "off",
-    });
-
-    expect(payload.chat_template_kwargs).toEqual({
-      enable_thinking: false,
-      force_nonempty_content: true,
-    });
-  });
-
-  it("does not inject vLLM Nemotron chat_template_kwargs when thinking is enabled", () => {
-    const payload = runResponsesPayloadMutationCase({
-      applyProvider: "vllm",
-      applyModelId: "nemotron-3-super",
-      model: {
-        api: "openai-completions",
-        provider: "vllm",
-        id: "nemotron-3-super",
-        baseUrl: "http://127.0.0.1:8000/v1",
-      } as Model<"openai-completions">,
-      payload: {
-        messages: [],
-      },
-      thinkingLevel: "low",
-    });
-
-    expect(payload).not.toHaveProperty("chat_template_kwargs");
-  });
-
-  it("lets extra_body override generated vLLM Nemotron chat_template_kwargs", () => {
-    const payload = runResponsesPayloadMutationCase({
-      applyProvider: "vllm",
-      applyModelId: "nemotron-3-super",
-      cfg: {
-        agents: {
-          defaults: {
-            models: {
-              "vllm/nemotron-3-super": {
-                params: {
-                  extra_body: {
-                    chat_template_kwargs: {
-                      enable_thinking: true,
-                    },
-                  },
-                },
-              },
-            },
-          },
-        },
-      },
-      model: {
-        api: "openai-completions",
-        provider: "vllm",
-        id: "nemotron-3-super",
-        baseUrl: "http://127.0.0.1:8000/v1",
-      } as Model<"openai-completions">,
-      payload: {
-        messages: [],
-      },
-      thinkingLevel: "off",
-    });
-
-    expect(payload.chat_template_kwargs).toEqual({
-      enable_thinking: true,
-    });
-  });
-
   it("warns and skips invalid chat_template_kwargs params", () => {
     const warnSpy = vi.spyOn(log, "warn").mockImplementation(() => {});
     try {
diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts
index 286f46caa85..aa9cc94a569 100644
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -462,63 +462,25 @@ function resolveChatTemplateKwargsParam(
   return Object.keys(chatTemplateKwargs).length > 0 ? chatTemplateKwargs : undefined;
 }
 
-function isVllmNemotronModel(model: ProviderRuntimeModel): boolean {
-  return (
-    model.api === "openai-completions" &&
-    typeof model.provider === "string" &&
-    model.provider.toLowerCase() === "vllm" &&
-    typeof model.id === "string" &&
-    /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(model.id)
-  );
-}
-
-function resolveOpenAICompletionsChatTemplateKwargs(params: {
-  model: ProviderRuntimeModel;
-  thinkingLevel?: ThinkLevel;
-  configured?: Record<string, unknown>;
-}): Record<string, unknown> | undefined {
-  const defaults =
-    params.thinkingLevel === "off" && isVllmNemotronModel(params.model)
-      ? {
-          enable_thinking: false,
-          force_nonempty_content: true,
-        }
-      : undefined;
-  const merged = {
-    ...defaults,
-    ...params.configured,
-  };
-  return Object.keys(merged).length > 0 ? merged : undefined;
-}
-
 function createOpenAICompletionsChatTemplateKwargsWrapper(params: {
   baseStreamFn: StreamFn | undefined;
-  configured?: Record<string, unknown>;
-  thinkingLevel?: ThinkLevel;
+  configured: Record<string, unknown>;
 }): StreamFn {
   const underlying = params.baseStreamFn ?? streamSimple;
   return (model, context, options) => {
     if (model.api !== "openai-completions") {
       return underlying(model, context, options);
     }
-    const chatTemplateKwargs = resolveOpenAICompletionsChatTemplateKwargs({
-      model: model as ProviderRuntimeModel,
-      thinkingLevel: params.thinkingLevel,
-      configured: params.configured,
-    });
-    if (!chatTemplateKwargs) {
-      return underlying(model, context, options);
-    }
     return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
       const existing = payloadObj.chat_template_kwargs;
       if (existing && typeof existing === "object" && !Array.isArray(existing)) {
         payloadObj.chat_template_kwargs = {
           ...(existing as Record<string, unknown>),
-          ...chatTemplateKwargs,
+          ...params.configured,
         };
         return;
       }
-      payloadObj.chat_template_kwargs = chatTemplateKwargs;
+      payloadObj.chat_template_kwargs = params.configured;
     });
   };
 }
@@ -614,11 +576,10 @@ function applyPostPluginStreamWrappers(
     "chatTemplateKwargs",
   );
   const configuredChatTemplateKwargs = resolveChatTemplateKwargsParam(rawChatTemplateKwargs);
-  if (configuredChatTemplateKwargs || ctx.thinkingLevel === "off") {
+  if (configuredChatTemplateKwargs) {
     ctx.agent.streamFn = createOpenAICompletionsChatTemplateKwargsWrapper({
       baseStreamFn: ctx.agent.streamFn,
       configured: configuredChatTemplateKwargs,
-      thinkingLevel: ctx.thinkingLevel,
     });
   }
 
diff --git a/src/plugin-sdk/provider-stream-shared.test.ts b/src/plugin-sdk/provider-stream-shared.test.ts
index d152c17ef20..f12ba8e0f16 100644
--- a/src/plugin-sdk/provider-stream-shared.test.ts
+++ b/src/plugin-sdk/provider-stream-shared.test.ts
@@ -7,6 +7,7 @@ import {
   defaultToolStreamExtraParams,
   decodeHtmlEntitiesInObject,
   hasCopilotVisionInput,
+  isOpenAICompatibleThinkingEnabled,
 } from "./provider-stream-shared.js";
 
 type FakeWrappedStream = {
@@ -64,6 +65,43 @@ describe("defaultToolStreamExtraParams", () => {
   });
 });
 
+describe("isOpenAICompatibleThinkingEnabled", () => {
+  it("uses explicit request reasoning before session thinking level", () => {
+    expect(
+      isOpenAICompatibleThinkingEnabled({
+        thinkingLevel: "high",
+        options: { reasoning: "none" } as never,
+      }),
+    ).toBe(false);
+    expect(
+      isOpenAICompatibleThinkingEnabled({
+        thinkingLevel: "off",
+        options: { reasoningEffort: "medium" } as never,
+      }),
+    ).toBe(true);
+  });
+
+  it("treats off and none as disabled", () => {
+    expect(isOpenAICompatibleThinkingEnabled({ thinkingLevel: "off", options: {} })).toBe(false);
+    expect(
+      isOpenAICompatibleThinkingEnabled({
+        thinkingLevel: "high",
+        options: { reasoning: "none" } as never,
+      }),
+    ).toBe(false);
+  });
+
+  it("defaults to enabled for missing or non-string values", () => {
+    expect(isOpenAICompatibleThinkingEnabled({ thinkingLevel: undefined, options: {} })).toBe(true);
+    expect(
+      isOpenAICompatibleThinkingEnabled({
+        thinkingLevel: "off",
+        options: { reasoning: { effort: "off" } } as never,
+      }),
+    ).toBe(true);
+  });
+});
+
 describe("buildCopilotDynamicHeaders", () => {
   it("matches Copilot IDE-style request headers without the legacy Openai-Intent", () => {
     expect(
diff --git a/src/plugin-sdk/provider-stream-shared.ts b/src/plugin-sdk/provider-stream-shared.ts
index c02d0a566a4..065d699966f 100644
--- a/src/plugin-sdk/provider-stream-shared.ts
+++ b/src/plugin-sdk/provider-stream-shared.ts
@@ -154,6 +154,21 @@ export function createPayloadPatchStreamWrapper(
   };
 }
 
+export type OpenAICompatibleThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
+
+export function isOpenAICompatibleThinkingEnabled(params: {
+  thinkingLevel: OpenAICompatibleThinkingLevel;
+  options: Parameters<StreamFn>[2];
+}): boolean {
+  const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown };
+  const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high";
+  if (typeof raw !== "string") {
+    return true;
+  }
+  const normalized = raw.trim().toLowerCase();
+  return normalized !== "off" && normalized !== "none";
+}
+
 export type DeepSeekV4ThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
 
 function isDisabledDeepSeekV4ThinkingLevel(thinkingLevel: DeepSeekV4ThinkingLevel): boolean {
diff --git a/src/plugin-sdk/provider-stream.ts b/src/plugin-sdk/provider-stream.ts
index 389d98ce833..ab9c9f42354 100644
--- a/src/plugin-sdk/provider-stream.ts
+++ b/src/plugin-sdk/provider-stream.ts
@@ -43,6 +43,7 @@ export {
   defaultToolStreamExtraParams,
   hasCopilotVisionInput,
   isAnthropicBedrockModel,
+  isOpenAICompatibleThinkingEnabled,
   type ProviderStreamWrapperFactory,
   resolveAnthropicPayloadPolicy,
   resolveMoonshotThinkingType,