From ffc7bda443759bf1844544f3d23a23c8e8d9a89d Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 17 May 2026 02:19:01 +0100 Subject: [PATCH] fix(qwen): honor chat-template thinking level --- CHANGELOG.md | 1 + extensions/qwen/stream.test.ts | 105 ++++++++++++++++++++++++++++----- extensions/qwen/stream.ts | 48 ++++++++++++++- 3 files changed, 137 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83eac67ac92..88b837738ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai - Providers/Anthropic-messages: extract `reasoning_content` from `thinking` blocks during assistant replay so proxy providers that route through the Anthropic-messages transport preserve reasoning context across tool-call follow-up turns. Thanks @Sunnyone2three. - Mac app: let menu gateway/session error text wrap across a few lines and stop rebuilding dynamic Context/Gateway menu rows while the menu is open, reducing flicker. - Mac app: make device pairing approval sheets friendlier, with concise Mac/device copy, shortened identifiers, friendly scope labels, and Approve as the primary action. +- Providers/Qwen: honor session thinking level for `qwen-chat-template` payloads so `/think off` disables nested llama.cpp chat-template thinking controls. Fixes #82768. Thanks @bfox55. - Feishu/wiki: reject numeric wiki space IDs before creating Lark clients and keep numeric-looking IDs documented as quoted opaque strings, preventing JavaScript precision loss in knowledge base calls. Fixes #45301. (#82769) Thanks @hyspacex. - Control UI: simplify Talk settings to Voice, Model, and Sensitivity defaults, with provider, transport, exact VAD, and timing controls behind Advanced. - Telegram: let catch-all mention patterns match captionless group photos, so media-only group messages reach the agent when the group is intentionally configured to respond to all messages. Fixes #44833. (#82756) Thanks @IWhatsskill. diff --git a/extensions/qwen/stream.test.ts b/extensions/qwen/stream.test.ts index 8e30e90a77f..ff34ff696ef 100644 --- a/extensions/qwen/stream.test.ts +++ b/extensions/qwen/stream.test.ts @@ -5,6 +5,7 @@ import { createQwenThinkingWrapper, wrapQwenProviderStream } from "./stream.js"; function capturePayload(params: { thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max"; + thinkingFormat?: string; reasoning?: unknown; initialPayload?: Record; model?: Partial>; @@ -17,7 +18,11 @@ function capturePayload(params: { return {} as ReturnType; }; - const wrapped = createQwenThinkingWrapper(baseStreamFn, params.thinkingLevel ?? "high"); + const wrapped = createQwenThinkingWrapper( + baseStreamFn, + params.thinkingLevel ?? "high", + params.thinkingFormat, + ); void wrapped( { api: "openai-completions", @@ -56,6 +61,37 @@ describe("createQwenThinkingWrapper", () => { expect(capturePayload({ thinkingLevel: "high" })).toEqual({ enable_thinking: true }); }); + it("overrides qwen-chat-template thinking with the session level", () => { + expect( + capturePayload({ + thinkingFormat: "qwen-chat-template", + thinkingLevel: "off", + initialPayload: { + chat_template_kwargs: { enable_thinking: true, preserve_thinking: true }, + enable_thinking: true, + reasoning_effort: "high", + }, + }), + ).toEqual({ + chat_template_kwargs: { enable_thinking: false, preserve_thinking: true }, + }); + }); + + it("uses the runtime model qwen-chat-template format when the wrapper context omits it", () => { + expect( + capturePayload({ + thinkingLevel: "off", + model: { compat: { thinkingFormat: "qwen-chat-template" } }, + initialPayload: { + chat_template_kwargs: { enable_thinking: true }, + enable_thinking: true, + }, + }), + ).toEqual({ + chat_template_kwargs: { enable_thinking: false, preserve_thinking: true }, + }); + }); + it("skips non-reasoning and non-completions models", () => { expect(capturePayload({ model: { reasoning: false } })).toStrictEqual({}); expect(capturePayload({ model: { api: "openai-responses" as never } })).toStrictEqual({}); @@ -64,19 +100,18 @@ describe("createQwenThinkingWrapper", () => { describe("wrapQwenProviderStream", () => { it("only registers for Qwen-family OpenAI-compatible providers", () => { - expect( - wrapQwenProviderStream({ - provider: "qwencloud", - modelId: "qwen3.6-plus", - model: { - api: "openai-completions", - provider: "qwen", - id: "qwen3.6-plus", - reasoning: true, - } as Model<"openai-completions">, - streamFn: undefined, - } as never), - ).toBeTypeOf("function"); + const streamFn = wrapQwenProviderStream({ + provider: "qwencloud", + modelId: "qwen3.6-plus", + model: { + api: "openai-completions", + provider: "qwen", + id: "qwen3.6-plus", + reasoning: true, + } as Model<"openai-completions">, + streamFn: undefined, + } as never); + expect(streamFn).toBeTypeOf("function"); expect( wrapQwenProviderStream({ @@ -91,4 +126,46 @@ describe("wrapQwenProviderStream", () => { } as never), ).toBeUndefined(); }); + + it("passes qwen-chat-template format to the Qwen wrapper", () => { + let captured: Record = {}; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = { + chat_template_kwargs: { enable_thinking: true }, + enable_thinking: true, + }; + options?.onPayload?.(payload, _model); + captured = payload; + return {} as ReturnType; + }; + + const wrapped = wrapQwenProviderStream({ + provider: "qwen", + modelId: "qwen3.6-plus", + model: { + api: "openai-completions", + provider: "qwen", + id: "qwen3.6-plus", + reasoning: true, + compat: { thinkingFormat: "qwen-chat-template" }, + } as Model<"openai-completions">, + streamFn: baseStreamFn, + thinkingLevel: "off", + } as never); + + void wrapped?.( + { + api: "openai-completions", + provider: "qwen", + id: "qwen3.6-plus", + reasoning: true, + } as Model<"openai-completions">, + { messages: [] } as Context, + {}, + ); + + expect(captured).toStrictEqual({ + chat_template_kwargs: { enable_thinking: false, preserve_thinking: true }, + }); + }); }); diff --git a/extensions/qwen/stream.ts b/extensions/qwen/stream.ts index c406c85a5c8..bc9a44946d6 100644 --- a/extensions/qwen/stream.ts +++ b/extensions/qwen/stream.ts @@ -7,6 +7,7 @@ import { } from "openclaw/plugin-sdk/provider-stream-shared"; type QwenThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; +type QwenThinkingFormat = string | undefined; function isQwenProviderId(providerId: string): boolean { const normalized = normalizeProviderId(providerId); @@ -18,15 +19,52 @@ function isQwenProviderId(providerId: string): boolean { ); } +function setQwenChatTemplateThinking(payload: Record, enabled: boolean): void { + const existing = payload.chat_template_kwargs; + if (existing && typeof existing === "object" && !Array.isArray(existing)) { + const next: Record = { + ...(existing as Record), + enable_thinking: enabled, + }; + if (!Object.hasOwn(next, "preserve_thinking")) { + next.preserve_thinking = true; + } + payload.chat_template_kwargs = next; + return; + } + payload.chat_template_kwargs = { + enable_thinking: enabled, + preserve_thinking: true, + }; +} + +function readQwenThinkingFormatFromModel(model: Parameters[0]): QwenThinkingFormat { + if (model.api !== "openai-completions") { + return undefined; + } + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as { thinkingFormat?: unknown }) + : undefined; + return typeof compat?.thinkingFormat === "string" ? compat.thinkingFormat : undefined; +} + export function createQwenThinkingWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel: QwenThinkingLevel, + thinkingFormat?: QwenThinkingFormat, ): StreamFn { return createPayloadPatchStreamWrapper( baseStreamFn, - ({ payload: payloadObj, options }) => { + ({ payload: payloadObj, model, options }) => { const enableThinking = isOpenAICompatibleThinkingEnabled({ thinkingLevel, options }); - payloadObj.enable_thinking = enableThinking; + const effectiveThinkingFormat = thinkingFormat ?? readQwenThinkingFormatFromModel(model); + if (effectiveThinkingFormat === "qwen-chat-template") { + setQwenChatTemplateThinking(payloadObj, enableThinking); + delete payloadObj.enable_thinking; + } else { + payloadObj.enable_thinking = enableThinking; + } delete payloadObj.reasoning_effort; delete payloadObj.reasoningEffort; delete payloadObj.reasoning; @@ -41,5 +79,9 @@ export function wrapQwenProviderStream(ctx: ProviderWrapStreamFnContext): Stream if (!isQwenProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) { return undefined; } - return createQwenThinkingWrapper(ctx.streamFn, ctx.thinkingLevel); + return createQwenThinkingWrapper( + ctx.streamFn, + ctx.thinkingLevel, + ctx.model ? readQwenThinkingFormatFromModel(ctx.model) : undefined, + ); }