mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:30:42 +00:00
fix(agents): honor qwen chat-template thinking compat
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Agents/OpenAI-compatible: retry replay-safe empty `stop` turns once for `openai-completions` endpoints, so transient empty local backend responses no longer surface as “Agent couldn't generate a response” when a continuation succeeds, and restore `openclaw agent --model` for one-shot CLI runs. Fixes #72751. Thanks @moooV252.
|
||||
- Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin.
|
||||
- Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv.
|
||||
- Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris.
|
||||
- Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis.
|
||||
- Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent.
|
||||
- Config/doctor: stop masking unknown-key validation diagnostics such as `agents.defaults.llm`, and have `openclaw doctor --fix` remove the retired `agents.defaults.llm` timeout block. Thanks @aidiffuser.
|
||||
|
||||
@@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference).
|
||||
- `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`).
|
||||
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
|
||||
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
|
||||
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
|
||||
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; models with `compat.thinkingFormat: "qwen-chat-template"` map OpenClaw thinking controls to `chat_template_kwargs.enable_thinking` plus `preserve_thinking: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence.
|
||||
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
|
||||
- `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
|
||||
- Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
|
||||
|
||||
@@ -129,6 +129,27 @@ Use explicit config when:
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Qwen thinking controls">
|
||||
For Qwen models served through vLLM, set
|
||||
`compat.thinkingFormat: "qwen-chat-template"` on the model entry when the
|
||||
server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to:
|
||||
|
||||
```json
|
||||
{
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": false,
|
||||
"preserve_thinking": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Non-`off` thinking levels send `enable_thinking: true`. If your endpoint
|
||||
expects DashScope-style top-level flags instead, use
|
||||
`compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request
|
||||
root.
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Nemotron 3 thinking controls">
|
||||
vLLM/Nemotron 3 can use chat-template kwargs to control whether reasoning is
|
||||
returned as hidden reasoning or visible answer text. When an OpenClaw session
|
||||
|
||||
@@ -17,7 +17,7 @@ export type OpenAICompletionsCompatDefaults = {
|
||||
supportsReasoningEffort: boolean;
|
||||
supportsUsageInStreaming: boolean;
|
||||
maxTokensField: "max_completion_tokens" | "max_tokens";
|
||||
thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai";
|
||||
thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai" | "qwen" | "qwen-chat-template";
|
||||
visibleReasoningDetailTypes: string[];
|
||||
supportsStrictMode: boolean;
|
||||
};
|
||||
|
||||
@@ -1816,6 +1816,78 @@ describe("openai transport stream", () => {
|
||||
expect(params.stream_options).toMatchObject({ include_usage: true });
|
||||
});
|
||||
|
||||
it("maps qwen-chat-template thinking compat to vLLM chat template kwargs", () => {
|
||||
const baseModel = {
|
||||
id: "Qwen/Qwen3-8B",
|
||||
name: "Qwen3 8B",
|
||||
api: "openai-completions",
|
||||
provider: "vllm",
|
||||
baseUrl: "http://127.0.0.1:8000/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 32768,
|
||||
maxTokens: 8192,
|
||||
compat: { thinkingFormat: "qwen-chat-template" },
|
||||
} as unknown as Model<"openai-completions">;
|
||||
const context = {
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never;
|
||||
|
||||
const disabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "none",
|
||||
} as never) as {
|
||||
chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
|
||||
};
|
||||
const enabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "medium",
|
||||
} as never) as {
|
||||
chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown };
|
||||
};
|
||||
|
||||
expect(disabled.chat_template_kwargs).toEqual({
|
||||
enable_thinking: false,
|
||||
preserve_thinking: true,
|
||||
});
|
||||
expect(disabled).not.toHaveProperty("reasoning_effort");
|
||||
expect(enabled.chat_template_kwargs).toEqual({
|
||||
enable_thinking: true,
|
||||
preserve_thinking: true,
|
||||
});
|
||||
expect(enabled).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("maps qwen thinking compat to top-level enable_thinking", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
id: "qwen3.6-plus",
|
||||
name: "Qwen 3.6 Plus",
|
||||
api: "openai-completions",
|
||||
provider: "qwen-custom",
|
||||
baseUrl: "https://example.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 32768,
|
||||
maxTokens: 8192,
|
||||
compat: { thinkingFormat: "qwen" },
|
||||
} as unknown as Model<"openai-completions">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
{
|
||||
reasoning: "none",
|
||||
} as never,
|
||||
) as { enable_thinking?: unknown; reasoning_effort?: unknown };
|
||||
|
||||
expect(params.enable_thinking).toBe(false);
|
||||
expect(params).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("enables streaming usage compat for generic providers on native DashScope endpoints", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
|
||||
@@ -1631,6 +1631,29 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio
|
||||
return options?.reasoningEffort ?? options?.reasoning ?? "high";
|
||||
}
|
||||
|
||||
function isCompletionsThinkingEnabled(effort: string): boolean {
|
||||
return normalizeOpenAIReasoningEffort(effort) !== "none";
|
||||
}
|
||||
|
||||
function setChatTemplateThinking(params: Record<string, unknown>, enabled: boolean): void {
|
||||
const existing = params.chat_template_kwargs;
|
||||
if (existing && typeof existing === "object" && !Array.isArray(existing)) {
|
||||
const next: Record<string, unknown> = {
|
||||
...(existing as Record<string, unknown>),
|
||||
enable_thinking: enabled,
|
||||
};
|
||||
if (!Object.hasOwn(next, "preserve_thinking")) {
|
||||
next.preserve_thinking = true;
|
||||
}
|
||||
params.chat_template_kwargs = next;
|
||||
return;
|
||||
}
|
||||
params.chat_template_kwargs = {
|
||||
enable_thinking: enabled,
|
||||
preserve_thinking: true,
|
||||
};
|
||||
}
|
||||
|
||||
function convertTools(
|
||||
tools: NonNullable<Context["tools"]>,
|
||||
compat: ReturnType<typeof getCompat>,
|
||||
@@ -1814,7 +1837,15 @@ export function buildOpenAICompletionsParams(
|
||||
fallbackMap: compat.reasoningEffortMap,
|
||||
})
|
||||
: undefined;
|
||||
if (
|
||||
if (compat.thinkingFormat === "qwen" && model.reasoning && completionsReasoningEffort) {
|
||||
params.enable_thinking = isCompletionsThinkingEnabled(completionsReasoningEffort);
|
||||
} else if (
|
||||
compat.thinkingFormat === "qwen-chat-template" &&
|
||||
model.reasoning &&
|
||||
completionsReasoningEffort
|
||||
) {
|
||||
setChatTemplateThinking(params, isCompletionsThinkingEnabled(completionsReasoningEffort));
|
||||
} else if (
|
||||
compat.thinkingFormat === "openrouter" &&
|
||||
model.reasoning &&
|
||||
resolvedCompletionsReasoningEffort
|
||||
|
||||
Reference in New Issue
Block a user