fix(agents): honor qwen chat-template thinking compat

This commit is contained in:
Peter Steinberger
2026-04-27 11:26:23 +01:00
parent 3db407da40
commit 75c8c1bebe
6 changed files with 128 additions and 3 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
- Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
- Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
- Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris.
- Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
- Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent.
- Config/doctor: stop masking unknown-key validation diagnostics such as `agents.defaults.llm`, and have `openclaw doctor --fix` remove the retired `agents.defaults.llm` timeout block. Thanks @aidiffuser.

View File

@@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference).
- `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`).
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; models with `compat.thinkingFormat: "qwen-chat-template"` map OpenClaw thinking controls to `chat_template_kwargs.enable_thinking` plus `preserve_thinking: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence.
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
- `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
- Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.

View File

@@ -129,6 +129,27 @@ Use explicit config when:
</Accordion>
<Accordion title="Qwen thinking controls">
For Qwen models served through vLLM, set
`compat.thinkingFormat: "qwen-chat-template"` on the model entry when the
server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to:
```json
{
"chat_template_kwargs": {
"enable_thinking": false,
"preserve_thinking": true
}
}
```
Non-`off` thinking levels send `enable_thinking: true`. If your endpoint
expects DashScope-style top-level flags instead, use
`compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request
root.
</Accordion>
<Accordion title="Nemotron 3 thinking controls">
vLLM/Nemotron 3 can use chat-template kwargs to control whether reasoning is
returned as hidden reasoning or visible answer text. When an OpenClaw session

View File

@@ -17,7 +17,7 @@ export type OpenAICompletionsCompatDefaults = {
supportsReasoningEffort: boolean;
supportsUsageInStreaming: boolean;
maxTokensField: "max_completion_tokens" | "max_tokens";
thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai";
thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai" | "qwen" | "qwen-chat-template";
visibleReasoningDetailTypes: string[];
supportsStrictMode: boolean;
};

View File

@@ -1816,6 +1816,78 @@ describe("openai transport stream", () => {
expect(params.stream_options).toMatchObject({ include_usage: true });
});
it("maps qwen-chat-template thinking compat to vLLM chat template kwargs", () => {
const baseModel = {
id: "Qwen/Qwen3-8B",
name: "Qwen3 8B",
api: "openai-completions",
provider: "vllm",
baseUrl: "http://127.0.0.1:8000/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
compat: { thinkingFormat: "qwen-chat-template" },
} as unknown as Model<"openai-completions">;
const context = {
systemPrompt: "system",
messages: [],
tools: [],
} as never;
const disabled = buildOpenAICompletionsParams(baseModel, context, {
reasoning: "none",
} as never) as {
chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
};
const enabled = buildOpenAICompletionsParams(baseModel, context, {
reasoning: "medium",
} as never) as {
chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
};
expect(disabled.chat_template_kwargs).toEqual({
enable_thinking: false,
preserve_thinking: true,
});
expect(disabled).not.toHaveProperty("reasoning_effort");
expect(enabled.chat_template_kwargs).toEqual({
enable_thinking: true,
preserve_thinking: true,
});
expect(enabled).not.toHaveProperty("reasoning_effort");
});
it("maps qwen thinking compat to top-level enable_thinking", () => {
const params = buildOpenAICompletionsParams(
{
id: "qwen3.6-plus",
name: "Qwen 3.6 Plus",
api: "openai-completions",
provider: "qwen-custom",
baseUrl: "https://example.com/v1",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 32768,
maxTokens: 8192,
compat: { thinkingFormat: "qwen" },
} as unknown as Model<"openai-completions">,
{
systemPrompt: "system",
messages: [],
tools: [],
} as never,
{
reasoning: "none",
} as never,
) as { enable_thinking?: unknown; reasoning_effort?: unknown };
expect(params.enable_thinking).toBe(false);
expect(params).not.toHaveProperty("reasoning_effort");
});
it("enables streaming usage compat for generic providers on native DashScope endpoints", () => {
const params = buildOpenAICompletionsParams(
{

View File

@@ -1631,6 +1631,29 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio
return options?.reasoningEffort ?? options?.reasoning ?? "high";
}
function isCompletionsThinkingEnabled(effort: string): boolean {
return normalizeOpenAIReasoningEffort(effort) !== "none";
}
function setChatTemplateThinking(params: Record<string, unknown>, enabled: boolean): void {
const existing = params.chat_template_kwargs;
if (existing && typeof existing === "object" && !Array.isArray(existing)) {
const next: Record<string, unknown> = {
...(existing as Record<string, unknown>),
enable_thinking: enabled,
};
if (!Object.hasOwn(next, "preserve_thinking")) {
next.preserve_thinking = true;
}
params.chat_template_kwargs = next;
return;
}
params.chat_template_kwargs = {
enable_thinking: enabled,
preserve_thinking: true,
};
}
function convertTools(
tools: NonNullable<Context["tools"]>,
compat: ReturnType<typeof getCompat>,
@@ -1814,7 +1837,15 @@ export function buildOpenAICompletionsParams(
fallbackMap: compat.reasoningEffortMap,
})
: undefined;
if (
if (compat.thinkingFormat === "qwen" && model.reasoning && completionsReasoningEffort) {
params.enable_thinking = isCompletionsThinkingEnabled(completionsReasoningEffort);
} else if (
compat.thinkingFormat === "qwen-chat-template" &&
model.reasoning &&
completionsReasoningEffort
) {
setChatTemplateThinking(params, isCompletionsThinkingEnabled(completionsReasoningEffort));
} else if (
compat.thinkingFormat === "openrouter" &&
model.reasoning &&
resolvedCompletionsReasoningEffort