From c91fffdd67d6458b200a622b91f59c7d34981785 Mon Sep 17 00:00:00 2001 From: Liu Wenyu <117838866+indulgeback@users.noreply.github.com> Date: Sat, 9 May 2026 20:02:39 +0800 Subject: [PATCH] feat(config): support Qwen thinkingFormat values (#79777) ## Summary - allow configured OpenAI-compatible Qwen models to opt into `qwen` and `qwen-chat-template` thinking payloads - preserve those compat values through schema validation and catalog normalization - map OpenClaw `/think` levels to `enable_thinking` or `chat_template_kwargs.enable_thinking` without also sending `reasoning_effort` - clarify docs that request-level chat-template kwargs require compatible backends such as vLLM ## Verification - git diff --check - pnpm exec oxfmt --check --threads=1 CHANGELOG.md docs/gateway/config-agents.md docs/gateway/config-tools.md src/config/zod-schema.core.ts src/config/types.models.ts src/model-catalog/normalize.ts src/agents/openai-transport-stream.ts src/config/config-misc.test.ts src/model-catalog/normalize.test.ts src/agents/openai-transport-stream.test.ts - pnpm config:schema:check - pnpm test src/config/config-misc.test.ts src/model-catalog/normalize.test.ts src/agents/openai-transport-stream.test.ts - GitHub CI on 2404edca39709c347e6567600e413c09cc2c75b6 Thanks @indulgeback. --- CHANGELOG.md | 1 + docs/gateway/config-agents.md | 1 + docs/gateway/config-tools.md | 1 + src/agents/openai-transport-stream.test.ts | 66 ++++++++++++++++++++++ src/agents/openai-transport-stream.ts | 45 +++++++++++++++ src/config/config-misc.test.ts | 57 ++++++++++--------- src/config/types.models.ts | 2 +- src/config/zod-schema.core.ts | 2 + src/model-catalog/normalize.test.ts | 2 + src/model-catalog/normalize.ts | 2 + 10 files changed, 151 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd273066ac9..f6350be7ffb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai - Codex app-server: pin the managed Codex harness and Codex CLI smoke package to `@openai/codex@0.129.0`, defer OpenClaw integration dynamic tools behind Codex tool search by default, and accept current Codex service-tier values so legacy `fast` settings survive the stable harness upgrade as `priority`. - Codex app-server: annotate message-tool-only direct chat turns in the dynamic `message` tool spec so visible replies are sent through `message(action="send")` instead of staying private. (#79704) - Agents/PI: route explicit OpenAI Codex Responses runs through PI's native WebSocket-capable transport and remove OpenClaw's custom OpenAI Responses WebSocket stack while preserving auth injection, run abort signals, and prompt cache boundary stripping. +- Models/config: allow `compat.thinkingFormat` values `qwen` and `qwen-chat-template` for configured OpenAI-compatible Qwen models, preserving them through catalog normalization and mapping `/think` levels to `enable_thinking` or `chat_template_kwargs.enable_thinking`. Fixes #79677. (#79777) Thanks @indulgeback. - Codex app-server: default implicit local stdio app-server permissions to guardian when Codex system requirements disallow the YOLO approval, reviewer, or sandbox value, including hostname-scoped remote sandbox entries, avoiding turn-start failures on managed hosts that permit only reviewed approval or narrower sandboxes. - Plugins/install: run managed npm-root install, uninstall, prune, and repair commands from the managed root without a redundant `--prefix .`, avoiding npm 10.9.3 Arborist crashes on native Windows WhatsApp plugin installs. Fixes #78514. (#78902) Thanks @melihselamett-stack. - Discord/voice: stream ElevenLabs TTS directly into Discord playback and send ElevenLabs latency optimization as the documented query parameter so spoken replies can start sooner. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index bbdea0635fb..6bc884f1382 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -393,6 +393,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. - `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. +- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`. - `compat.supportedReasoningEfforts`: per-model OpenAI-compatible reasoning effort list. Include `"xhigh"` for custom endpoints that truly accept it; OpenClaw then exposes `/think xhigh` in command menus, Gateway session rows, session patch validation, agent CLI validation, and `llm-task` validation for that configured provider/model. Use `compat.reasoningEffortMap` when the backend wants a provider-specific value for a canonical level. - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - Runtime policy belongs on providers or models, not on `agents.defaults`. Use `models.providers..agentRuntime` for provider-wide rules or `agents.defaults.models["provider/model"].agentRuntime` / `agents.list[].models["provider/model"].agentRuntime` for model-specific rules. OpenAI agent models on the official OpenAI provider select Codex by default. diff --git a/docs/gateway/config-tools.md b/docs/gateway/config-tools.md index 33df72a8b46..439ef2acacb 100644 --- a/docs/gateway/config-tools.md +++ b/docs/gateway/config-tools.md @@ -474,6 +474,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi - `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ. - `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior. - `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request. + - `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM. diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index be72234da3f..ff217a02322 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -2597,6 +2597,72 @@ describe("openai transport stream", () => { expect(disabled.reasoning_effort).toBe("none"); }); + it("maps qwen thinking format to top-level enable_thinking", () => { + const baseModel = { + id: "qwen3.5-32b", + name: "Qwen 3.5 32B", + api: "openai-completions", + provider: "llama-cpp", + baseUrl: "http://127.0.0.1:8080/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 8192, + compat: { + thinkingFormat: "qwen", + }, + } as unknown as Model<"openai-completions">; + const context = { + systemPrompt: "system", + messages: [], + tools: [], + } as never; + + const enabled = buildOpenAICompletionsParams(baseModel, context, { + reasoning: "medium", + } as never) as { enable_thinking?: unknown; reasoning_effort?: unknown }; + const disabled = buildOpenAICompletionsParams(baseModel, context, { + reasoning: "off", + } as never) as { enable_thinking?: unknown; reasoning_effort?: unknown }; + + expect(enabled.enable_thinking).toBe(true); + expect(disabled.enable_thinking).toBe(false); + expect(enabled).not.toHaveProperty("reasoning_effort"); + expect(disabled).not.toHaveProperty("reasoning_effort"); + }); + + it("maps qwen-chat-template thinking format to chat_template_kwargs", () => { + const params = buildOpenAICompletionsParams( + { + id: "qwen3.5-32b", + name: "Qwen 3.5 32B", + api: "openai-completions", + provider: "llama-cpp", + baseUrl: "http://127.0.0.1:8080/v1", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 131072, + maxTokens: 8192, + compat: { + thinkingFormat: "qwen-chat-template", + }, + } as unknown as Model<"openai-completions">, + { + systemPrompt: "system", + messages: [], + tools: [], + } as never, + { + reasoning: "off", + } as never, + ) as { chat_template_kwargs?: Record; reasoning_effort?: unknown }; + + expect(params.chat_template_kwargs).toEqual({ enable_thinking: false }); + expect(params).not.toHaveProperty("reasoning_effort"); + }); + it("omits unsupported disabled reasoning for completions providers", () => { const params = buildOpenAICompletionsParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 4e941147f36..9ec19456418 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -1831,6 +1831,44 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio return options?.reasoningEffort ?? options?.reasoning ?? "high"; } +function isQwenOpenAICompletionsThinkingFormat(format: string): boolean { + return format === "qwen" || format === "qwen-chat-template"; +} + +function isOpenAICompletionsThinkingEnabled(effort: OpenAIReasoningEffort): boolean { + const normalized = effort.trim().toLowerCase(); + return normalized !== "off" && normalized !== "none"; +} + +function setQwenChatTemplateThinking(params: Record, enabled: boolean): void { + const existing = params.chat_template_kwargs; + params.chat_template_kwargs = + existing && typeof existing === "object" && !Array.isArray(existing) + ? { ...(existing as Record), enable_thinking: enabled } + : { enable_thinking: enabled }; +} + +function applyQwenOpenAICompletionsThinkingParams(params: { + compatThinkingFormat: string; + modelReasoning: boolean; + payload: Record; + requestedEffort: OpenAIReasoningEffort; +}): boolean { + if ( + !params.modelReasoning || + !isQwenOpenAICompletionsThinkingFormat(params.compatThinkingFormat) + ) { + return false; + } + const enabled = isOpenAICompletionsThinkingEnabled(params.requestedEffort); + if (params.compatThinkingFormat === "qwen-chat-template") { + setQwenChatTemplateThinking(params.payload, enabled); + } else { + params.payload.enable_thinking = enabled; + } + return true; +} + function convertTools( tools: NonNullable, compat: ReturnType, @@ -2030,6 +2068,12 @@ export function buildOpenAICompletionsParams( : undefined; const omitGpt54MiniToolReasoningEffort = isOpenAIGpt54MiniModel(model) && Array.isArray(params.tools) && params.tools.length > 0; + const handledQwenThinkingFormat = applyQwenOpenAICompletionsThinkingParams({ + compatThinkingFormat: compat.thinkingFormat, + modelReasoning: model.reasoning, + payload: params, + requestedEffort: completionsReasoningEffort, + }); if ( compat.thinkingFormat === "openrouter" && model.reasoning && @@ -2042,6 +2086,7 @@ export function buildOpenAICompletionsParams( resolvedCompletionsReasoningEffort && model.reasoning && compat.supportsReasoningEffort && + !handledQwenThinkingFormat && !omitGpt54MiniToolReasoningEffort ) { params.reasoning_effort = resolvedCompletionsReasoningEffort; diff --git a/src/config/config-misc.test.ts b/src/config/config-misc.test.ts index 5aed93cf213..226e9b064ce 100644 --- a/src/config/config-misc.test.ts +++ b/src/config/config-misc.test.ts @@ -888,37 +888,40 @@ describe("broadcast", () => { }); describe("model compat config schema", () => { - it("accepts full openai-completions compat fields", () => { - const res = OpenClawSchema.safeParse({ - models: { - providers: { - local: { - baseUrl: "http://127.0.0.1:1234/v1", - api: "openai-completions", - models: [ - { - id: "qwen3-32b", - name: "Qwen3 32B", - compat: { - supportsUsageInStreaming: true, - supportsStrictMode: false, - requiresStringContent: true, - thinkingFormat: "zai", - requiresToolResultName: true, - requiresAssistantAfterToolResult: false, - requiresThinkingAsText: false, - requiresMistralToolIds: false, - requiresOpenAiAnthropicToolPayload: true, + it.each(["zai", "qwen", "qwen-chat-template"] as const)( + "accepts full openai-completions compat fields with %s thinking format", + (thinkingFormat) => { + const res = OpenClawSchema.safeParse({ + models: { + providers: { + local: { + baseUrl: "http://127.0.0.1:1234/v1", + api: "openai-completions", + models: [ + { + id: "qwen3-32b", + name: "Qwen3 32B", + compat: { + supportsUsageInStreaming: true, + supportsStrictMode: false, + requiresStringContent: true, + thinkingFormat, + requiresToolResultName: true, + requiresAssistantAfterToolResult: false, + requiresThinkingAsText: false, + requiresMistralToolIds: false, + requiresOpenAiAnthropicToolPayload: true, + }, }, - }, - ], + ], + }, }, }, - }, - }); + }); - expect(res.success).toBe(true); - }); + expect(res.success).toBe(true); + }, + ); }); describe("config paths", () => { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index ff1ef052176..a228ac15a19 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -51,7 +51,7 @@ type SupportedAnthropicMessagesCompatFields = Pick< >; type SupportedThinkingFormat = - | Exclude, "qwen" | "qwen-chat-template"> + | NonNullable | "deepseek" | "openrouter"; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 4a7b8285052..ca651f15735 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -205,6 +205,8 @@ const ModelCompatSchema = z z.literal("openai"), z.literal("openrouter"), z.literal("deepseek"), + z.literal("qwen"), + z.literal("qwen-chat-template"), z.literal("zai"), ]) .optional(), diff --git a/src/model-catalog/normalize.test.ts b/src/model-catalog/normalize.test.ts index bc5a1424808..11c4f54e70b 100644 --- a/src/model-catalog/normalize.test.ts +++ b/src/model-catalog/normalize.test.ts @@ -53,6 +53,7 @@ describe("model catalog normalization", () => { compat: { supportsTools: true, supportsStore: "yes", + thinkingFormat: "qwen-chat-template", unknownFlag: true, }, status: "preview", @@ -137,6 +138,7 @@ describe("model catalog normalization", () => { }, compat: { supportsTools: true, + thinkingFormat: "qwen-chat-template", }, status: "preview", statusReason: "rolling out", diff --git a/src/model-catalog/normalize.ts b/src/model-catalog/normalize.ts index 0c221494d77..8c22a36afa2 100644 --- a/src/model-catalog/normalize.ts +++ b/src/model-catalog/normalize.ts @@ -223,6 +223,8 @@ function normalizeModelCatalogCompat(value: unknown): ModelCompatConfig | undefi thinkingFormat === "openai" || thinkingFormat === "openrouter" || thinkingFormat === "deepseek" || + thinkingFormat === "qwen" || + thinkingFormat === "qwen-chat-template" || thinkingFormat === "zai" ) { compat.thinkingFormat = thinkingFormat;