diff --git a/CHANGELOG.md b/CHANGELOG.md
index 484055d8ba6..fa742119a64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
- Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
- Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
+- Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris.
- Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
- Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent.
- Config/doctor: stop masking unknown-key validation diagnostics such as `agents.defaults.llm`, and have `openclaw doctor --fix` remove the retired `agents.defaults.llm` timeout block. Thanks @aidiffuser.
diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md
index 4c36bf8e8d1..b6a2616c0fa 100644
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference).
- `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`).
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
-- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
+- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; models with `compat.thinkingFormat: "qwen-chat-template"` map OpenClaw thinking controls to `chat_template_kwargs.enable_thinking` plus `preserve_thinking: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence.
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
- `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
- Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md
index 48dc533a676..d1efe226fed 100644
--- a/docs/providers/vllm.md
+++ b/docs/providers/vllm.md
@@ -129,6 +129,27 @@ Use explicit config when:
+
+ For Qwen models served through vLLM, set
+ `compat.thinkingFormat: "qwen-chat-template"` on the model entry when the
+ server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to:
+
+ ```json
+ {
+ "chat_template_kwargs": {
+ "enable_thinking": false,
+ "preserve_thinking": true
+ }
+ }
+ ```
+
+ Non-`off` thinking levels send `enable_thinking: true`. If your endpoint
+ expects DashScope-style top-level flags instead, use
+ `compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request
+ root.
+
+
+
vLLM/Nemotron 3 can use chat-template kwargs to control whether reasoning is
returned as hidden reasoning or visible answer text. When an OpenClaw session
diff --git a/src/agents/openai-completions-compat.ts b/src/agents/openai-completions-compat.ts
index e81fcfa9c87..b0619f5f25c 100644
--- a/src/agents/openai-completions-compat.ts
+++ b/src/agents/openai-completions-compat.ts
@@ -17,7 +17,7 @@ export type OpenAICompletionsCompatDefaults = {
supportsReasoningEffort: boolean;
supportsUsageInStreaming: boolean;
maxTokensField: "max_completion_tokens" | "max_tokens";
- thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai";
+ thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai" | "qwen" | "qwen-chat-template";
visibleReasoningDetailTypes: string[];
supportsStrictMode: boolean;
};
diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts
index e961c131352..bb44991b6fa 100644
--- a/src/agents/openai-transport-stream.test.ts
+++ b/src/agents/openai-transport-stream.test.ts
@@ -1816,6 +1816,78 @@ describe("openai transport stream", () => {
expect(params.stream_options).toMatchObject({ include_usage: true });
});
+ it("maps qwen-chat-template thinking compat to vLLM chat template kwargs", () => {
+ const baseModel = {
+ id: "Qwen/Qwen3-8B",
+ name: "Qwen3 8B",
+ api: "openai-completions",
+ provider: "vllm",
+ baseUrl: "http://127.0.0.1:8000/v1",
+ reasoning: true,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 32768,
+ maxTokens: 8192,
+ compat: { thinkingFormat: "qwen-chat-template" },
+ } as unknown as Model<"openai-completions">;
+ const context = {
+ systemPrompt: "system",
+ messages: [],
+ tools: [],
+ } as never;
+
+ const disabled = buildOpenAICompletionsParams(baseModel, context, {
+ reasoning: "none",
+ } as never) as {
+ chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
+ };
+ const enabled = buildOpenAICompletionsParams(baseModel, context, {
+ reasoning: "medium",
+ } as never) as {
+ chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
+ };
+
+ expect(disabled.chat_template_kwargs).toEqual({
+ enable_thinking: false,
+ preserve_thinking: true,
+ });
+ expect(disabled).not.toHaveProperty("reasoning_effort");
+ expect(enabled.chat_template_kwargs).toEqual({
+ enable_thinking: true,
+ preserve_thinking: true,
+ });
+ expect(enabled).not.toHaveProperty("reasoning_effort");
+ });
+
+ it("maps qwen thinking compat to top-level enable_thinking", () => {
+ const params = buildOpenAICompletionsParams(
+ {
+ id: "qwen3.6-plus",
+ name: "Qwen 3.6 Plus",
+ api: "openai-completions",
+ provider: "qwen-custom",
+ baseUrl: "https://example.com/v1",
+ reasoning: true,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 32768,
+ maxTokens: 8192,
+ compat: { thinkingFormat: "qwen" },
+ } as unknown as Model<"openai-completions">,
+ {
+ systemPrompt: "system",
+ messages: [],
+ tools: [],
+ } as never,
+ {
+ reasoning: "none",
+ } as never,
+ ) as { enable_thinking?: unknown; reasoning_effort?: unknown };
+
+ expect(params.enable_thinking).toBe(false);
+ expect(params).not.toHaveProperty("reasoning_effort");
+ });
+
it("enables streaming usage compat for generic providers on native DashScope endpoints", () => {
const params = buildOpenAICompletionsParams(
{
diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts
index 67045555539..6238ff7699b 100644
--- a/src/agents/openai-transport-stream.ts
+++ b/src/agents/openai-transport-stream.ts
@@ -1631,6 +1631,29 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio
return options?.reasoningEffort ?? options?.reasoning ?? "high";
}
+function isCompletionsThinkingEnabled(effort: string): boolean {
+ return normalizeOpenAIReasoningEffort(effort) !== "none";
+}
+
+function setChatTemplateThinking(params: Record, enabled: boolean): void {
+ const existing = params.chat_template_kwargs;
+ if (existing && typeof existing === "object" && !Array.isArray(existing)) {
+ const next: Record = {
+ ...(existing as Record),
+ enable_thinking: enabled,
+ };
+ if (!Object.hasOwn(next, "preserve_thinking")) {
+ next.preserve_thinking = true;
+ }
+ params.chat_template_kwargs = next;
+ return;
+ }
+ params.chat_template_kwargs = {
+ enable_thinking: enabled,
+ preserve_thinking: true,
+ };
+}
+
function convertTools(
tools: NonNullable,
compat: ReturnType,
@@ -1814,7 +1837,15 @@ export function buildOpenAICompletionsParams(
fallbackMap: compat.reasoningEffortMap,
})
: undefined;
- if (
+ if (compat.thinkingFormat === "qwen" && model.reasoning && completionsReasoningEffort) {
+ params.enable_thinking = isCompletionsThinkingEnabled(completionsReasoningEffort);
+ } else if (
+ compat.thinkingFormat === "qwen-chat-template" &&
+ model.reasoning &&
+ completionsReasoningEffort
+ ) {
+ setChatTemplateThinking(params, isCompletionsThinkingEnabled(completionsReasoningEffort));
+ } else if (
compat.thinkingFormat === "openrouter" &&
model.reasoning &&
resolvedCompletionsReasoningEffort