diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dc815cf6d5..d606b2ddf56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ Docs: https://docs.openclaw.ai - Git hooks: skip ignored staged paths when formatting and restaging pre-commit files, so merge commits no longer abort when `.gitignore` newly ignores staged merged content. Fixes #72744. Thanks @100yenadmin. - Memory-core/dreaming: add a supported `dreaming.model` knob for Dream Diary narrative subagents, wired through phase config and the existing plugin subagent model-override trust gate. Refs #65963. Thanks @esqandil and @mjamiv. - Agents/Anthropic: remove trailing assistant prefill payloads when extended thinking is enabled, so Opus 4.7/Sonnet 4.6 requests do not fail Anthropic's user-final-turn validation. Fixes #72739. Thanks @superandylin. -- Agents/vLLM: honor `compat.thinkingFormat: "qwen-chat-template"` by sending Qwen chat-template thinking kwargs, including preserved thinking for agent loops, and support DashScope-style `qwen` top-level thinking flags. Fixes #72329. Thanks @stavrostzagadouris. +- Agents/vLLM/Qwen: add plugin-owned Qwen thinking controls for vLLM chat-template kwargs and DashScope-style top-level `enable_thinking` flags, including preserved thinking for agent loops. Fixes #72329. Thanks @stavrostzagadouris. - Memory-core/dreaming: treat request-scoped narrative fallback as expected, skip session cleanup when no subagent run was created, and remove duplicate phase-level cleanup so fallback no longer emits warning noise. Fixes #67152. Thanks @jsompis. - Agents/exec: apply configured `tools.exec.timeoutSec` to background, `yieldMs`, and node `system.run` commands when no per-call timeout is set, preventing auto-backgrounded and remote node commands from running indefinitely. Fixes #67600; supersedes #67603. Thanks @dlmpx and @kagura-agent. - Config/doctor: stop masking unknown-key validation diagnostics such as `agents.defaults.llm`, and have `openclaw doctor --fix` remove the retired `agents.defaults.llm` timeout block. Thanks @aidiffuser. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index b6a2616c0fa..8e55d906d8b 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. -- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; models with `compat.thinkingFormat: "qwen-chat-template"` map OpenClaw thinking controls to `chat_template_kwargs.enable_thinking` plus `preserve_thinking: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. +- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection. - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible. diff --git a/docs/providers/qwen.md b/docs/providers/qwen.md index beea2510e01..bf1e2a4b207 100644 --- a/docs/providers/qwen.md +++ b/docs/providers/qwen.md @@ -169,6 +169,13 @@ Availability can still vary by endpoint and billing plan even when a model is present in the bundled catalog. +## Thinking Controls + +For reasoning-enabled Qwen Cloud models, the bundled provider maps OpenClaw +thinking levels to DashScope's top-level `enable_thinking` request flag. Disabled +thinking sends `enable_thinking: false`; other thinking levels send +`enable_thinking: true`. + ## Multimodal add-ons The `qwen` plugin also exposes multimodal capabilities on the **Standard** diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md index d1efe226fed..0d7b9bc61b7 100644 --- a/docs/providers/vllm.md +++ b/docs/providers/vllm.md @@ -131,7 +131,7 @@ Use explicit config when: For Qwen models served through vLLM, set - `compat.thinkingFormat: "qwen-chat-template"` on the model entry when the + `params.qwenThinkingFormat: "chat-template"` on the model entry when the server expects Qwen chat-template kwargs. OpenClaw maps `/think off` to: ```json @@ -145,8 +145,8 @@ Use explicit config when: Non-`off` thinking levels send `enable_thinking: true`. If your endpoint expects DashScope-style top-level flags instead, use - `compat.thinkingFormat: "qwen"` to send `enable_thinking` at the request - root. + `params.qwenThinkingFormat: "top-level"` to send `enable_thinking` at the + request root. Snake-case `params.qwen_thinking_format` is also accepted. diff --git a/extensions/qwen/api.ts b/extensions/qwen/api.ts index 08f87897da1..0117bad9128 100644 --- a/extensions/qwen/api.ts +++ b/extensions/qwen/api.ts @@ -31,3 +31,4 @@ export { MODELSTUDIO_MODEL_CATALOG, } from "./models.js"; export { buildModelStudioProvider, buildQwenProvider } from "./provider-catalog.js"; +export { createQwenThinkingWrapper, wrapQwenProviderStream } from "./stream.js"; diff --git a/extensions/qwen/index.ts b/extensions/qwen/index.ts index e7cfd093b11..3b9bce7f8cd 100644 --- a/extensions/qwen/index.ts +++ b/extensions/qwen/index.ts @@ -10,6 +10,7 @@ import { QWEN_DEFAULT_MODEL_REF, } from "./onboard.js"; import { buildQwenProvider } from "./provider-catalog.js"; +import { wrapQwenProviderStream } from "./stream.js"; import { buildQwenVideoGenerationProvider } from "./video-generation-provider.js"; const PROVIDER_ID = "qwen"; @@ -165,6 +166,7 @@ export default defineSingleProviderPluginEntry({ }, applyNativeStreamingUsageCompat: ({ providerConfig }) => applyQwenNativeStreamingUsageCompat(providerConfig), + wrapStreamFn: wrapQwenProviderStream, normalizeConfig: ({ providerConfig }) => { if (!isQwenCodingPlanBaseUrl(providerConfig.baseUrl)) { return undefined; diff --git a/extensions/qwen/stream.test.ts b/extensions/qwen/stream.test.ts new file mode 100644 index 00000000000..596fca552b3 --- /dev/null +++ b/extensions/qwen/stream.test.ts @@ -0,0 +1,94 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { Context, Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { createQwenThinkingWrapper, wrapQwenProviderStream } from "./stream.js"; + +function capturePayload(params: { + thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max"; + reasoning?: unknown; + initialPayload?: Record; + model?: Partial>; +}): Record { + let captured: Record = {}; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = { ...params.initialPayload }; + options?.onPayload?.(payload, _model); + captured = payload; + return {} as ReturnType; + }; + + const wrapped = createQwenThinkingWrapper(baseStreamFn, params.thinkingLevel ?? "high"); + void wrapped( + { + api: "openai-completions", + provider: "qwen", + id: "qwen3.6-plus", + reasoning: true, + ...params.model, + } as Model<"openai-completions">, + { messages: [] } as Context, + params.reasoning === undefined ? {} : ({ reasoning: params.reasoning } as never), + ); + + return captured; +} + +describe("createQwenThinkingWrapper", () => { + it("maps disabled thinking to Qwen top-level enable_thinking", () => { + const payload = capturePayload({ + reasoning: "none", + initialPayload: { + reasoning_effort: "high", + reasoning: { effort: "high" }, + reasoningEffort: "high", + }, + }); + + expect(payload).toEqual({ enable_thinking: false }); + }); + + it("maps enabled thinking to Qwen top-level enable_thinking", () => { + expect(capturePayload({ reasoning: "medium" })).toEqual({ enable_thinking: true }); + }); + + it("falls back to the session thinking level", () => { + expect(capturePayload({ thinkingLevel: "off" })).toEqual({ enable_thinking: false }); + expect(capturePayload({ thinkingLevel: "high" })).toEqual({ enable_thinking: true }); + }); + + it("skips non-reasoning and non-completions models", () => { + expect(capturePayload({ model: { reasoning: false } })).toEqual({}); + expect(capturePayload({ model: { api: "openai-responses" as never } })).toEqual({}); + }); +}); + +describe("wrapQwenProviderStream", () => { + it("only registers for Qwen-family OpenAI-compatible providers", () => { + expect( + wrapQwenProviderStream({ + provider: "qwencloud", + modelId: "qwen3.6-plus", + model: { + api: "openai-completions", + provider: "qwen", + id: "qwen3.6-plus", + reasoning: true, + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeTypeOf("function"); + + expect( + wrapQwenProviderStream({ + provider: "openai", + modelId: "gpt-5.4", + model: { + api: "openai-completions", + provider: "openai", + id: "gpt-5.4", + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeUndefined(); + }); +}); diff --git a/extensions/qwen/stream.ts b/extensions/qwen/stream.ts new file mode 100644 index 00000000000..c37c3287f70 --- /dev/null +++ b/extensions/qwen/stream.ts @@ -0,0 +1,56 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import { streamSimple } from "@mariozechner/pi-ai"; +import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; +import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; +import { streamWithPayloadPatch } from "openclaw/plugin-sdk/provider-stream-shared"; + +type QwenThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; + +function isQwenProviderId(providerId: string): boolean { + const normalized = normalizeProviderId(providerId); + return ( + normalized === "qwen" || + normalized === "modelstudio" || + normalized === "qwencloud" || + normalized === "dashscope" + ); +} + +function resolveOpenAICompatibleThinkingEnabled(params: { + thinkingLevel: QwenThinkingLevel; + options: Parameters[2]; +}): boolean { + const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown }; + const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high"; + if (typeof raw !== "string") { + return true; + } + const normalized = raw.trim().toLowerCase(); + return normalized !== "off" && normalized !== "none"; +} + +export function createQwenThinkingWrapper( + baseStreamFn: StreamFn | undefined, + thinkingLevel: QwenThinkingLevel, +): StreamFn { + const underlying = baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (model.api !== "openai-completions" || !model.reasoning) { + return underlying(model, context, options); + } + const enableThinking = resolveOpenAICompatibleThinkingEnabled({ thinkingLevel, options }); + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + payloadObj.enable_thinking = enableThinking; + delete payloadObj.reasoning_effort; + delete payloadObj.reasoningEffort; + delete payloadObj.reasoning; + }); + }; +} + +export function wrapQwenProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined { + if (!isQwenProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) { + return undefined; + } + return createQwenThinkingWrapper(ctx.streamFn, ctx.thinkingLevel); +} diff --git a/extensions/vllm/api.ts b/extensions/vllm/api.ts index b01c8e166bc..c8053f4e637 100644 --- a/extensions/vllm/api.ts +++ b/extensions/vllm/api.ts @@ -5,3 +5,4 @@ export { VLLM_PROVIDER_LABEL, } from "./defaults.js"; export { buildVllmProvider } from "./models.js"; +export { createVllmQwenThinkingWrapper, wrapVllmProviderStream } from "./stream.js"; diff --git a/extensions/vllm/index.ts b/extensions/vllm/index.ts index dcc0f4e2dbf..5ccaee19684 100644 --- a/extensions/vllm/index.ts +++ b/extensions/vllm/index.ts @@ -10,6 +10,7 @@ import { VLLM_MODEL_PLACEHOLDER, VLLM_PROVIDER_LABEL, } from "./api.js"; +import { wrapVllmProviderStream } from "./stream.js"; const PROVIDER_ID = "vllm"; @@ -89,6 +90,7 @@ export default definePluginEntry({ "vLLM requires authentication to be registered as a provider. " + 'Set VLLM_API_KEY (any value works) or run "openclaw configure". ' + "See: https://docs.openclaw.ai/providers/vllm", + wrapStreamFn: wrapVllmProviderStream, }); }, }); diff --git a/extensions/vllm/stream.test.ts b/extensions/vllm/stream.test.ts new file mode 100644 index 00000000000..73352acbf81 --- /dev/null +++ b/extensions/vllm/stream.test.ts @@ -0,0 +1,170 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import type { Context, Model } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { createVllmQwenThinkingWrapper, wrapVllmProviderStream } from "./stream.js"; + +function capturePayload(params: { + format: "chat-template" | "top-level"; + thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max"; + reasoning?: unknown; + initialPayload?: Record; + model?: Partial>; +}): Record { + let captured: Record = {}; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = { ...params.initialPayload }; + options?.onPayload?.(payload, _model); + captured = payload; + return {} as ReturnType; + }; + + const wrapped = createVllmQwenThinkingWrapper({ + baseStreamFn, + format: params.format, + thinkingLevel: params.thinkingLevel ?? "high", + }); + void wrapped( + { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + ...params.model, + } as Model<"openai-completions">, + { messages: [] } as Context, + params.reasoning === undefined ? {} : ({ reasoning: params.reasoning } as never), + ); + + return captured; +} + +describe("createVllmQwenThinkingWrapper", () => { + it("maps Qwen chat-template thinking off to chat_template_kwargs", () => { + const payload = capturePayload({ + format: "chat-template", + reasoning: "none", + initialPayload: { + reasoning_effort: "high", + reasoning: { effort: "high" }, + reasoningEffort: "high", + }, + }); + + expect(payload).toEqual({ + chat_template_kwargs: { + enable_thinking: false, + preserve_thinking: true, + }, + }); + }); + + it("maps Qwen chat-template thinking on to chat_template_kwargs", () => { + expect(capturePayload({ format: "chat-template", reasoning: "medium" })).toEqual({ + chat_template_kwargs: { + enable_thinking: true, + preserve_thinking: true, + }, + }); + }); + + it("preserves explicit chat-template kwargs while setting enable_thinking", () => { + expect( + capturePayload({ + format: "chat-template", + thinkingLevel: "off", + initialPayload: { + chat_template_kwargs: { + preserve_thinking: false, + force_nonempty_content: true, + }, + }, + }), + ).toEqual({ + chat_template_kwargs: { + enable_thinking: false, + preserve_thinking: false, + force_nonempty_content: true, + }, + }); + }); + + it("maps Qwen top-level thinking format to enable_thinking", () => { + expect(capturePayload({ format: "top-level", thinkingLevel: "off" })).toEqual({ + enable_thinking: false, + }); + expect(capturePayload({ format: "top-level", thinkingLevel: "high" })).toEqual({ + enable_thinking: true, + }); + }); + + it("skips non-reasoning and non-completions models", () => { + expect(capturePayload({ format: "chat-template", model: { reasoning: false } })).toEqual({}); + expect( + capturePayload({ format: "chat-template", model: { api: "openai-responses" as never } }), + ).toEqual({}); + }); +}); + +describe("wrapVllmProviderStream", () => { + it("registers when vLLM Qwen thinking format params are configured", () => { + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + extraParams: { qwenThinkingFormat: "chat-template" }, + model: { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeTypeOf("function"); + + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + extraParams: { qwen_thinking_format: "enable_thinking" }, + model: { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + reasoning: true, + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeTypeOf("function"); + }); + + it("skips unconfigured vLLM and non-vLLM providers", () => { + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "Qwen/Qwen3-8B", + extraParams: {}, + model: { + api: "openai-completions", + provider: "vllm", + id: "Qwen/Qwen3-8B", + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeUndefined(); + + expect( + wrapVllmProviderStream({ + provider: "openai", + modelId: "gpt-5.4", + extraParams: { qwenThinkingFormat: "chat-template" }, + model: { + api: "openai-completions", + provider: "openai", + id: "gpt-5.4", + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeUndefined(); + }); +}); diff --git a/extensions/vllm/stream.ts b/extensions/vllm/stream.ts new file mode 100644 index 00000000000..c7b24de2547 --- /dev/null +++ b/extensions/vllm/stream.ts @@ -0,0 +1,117 @@ +import type { StreamFn } from "@mariozechner/pi-agent-core"; +import { streamSimple } from "@mariozechner/pi-ai"; +import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; +import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; +import { streamWithPayloadPatch } from "openclaw/plugin-sdk/provider-stream-shared"; + +type VllmThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; +type VllmQwenThinkingFormat = "chat-template" | "top-level"; + +function isVllmProviderId(providerId: string): boolean { + return normalizeProviderId(providerId) === "vllm"; +} + +function normalizeQwenThinkingFormat(value: unknown): VllmQwenThinkingFormat | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value.trim().toLowerCase().replace(/_/g, "-"); + if ( + normalized === "chat-template" || + normalized === "chat-template-kwargs" || + normalized === "chat-template-kwarg" || + normalized === "chat-template-arguments" + ) { + return "chat-template"; + } + if ( + normalized === "top-level" || + normalized === "enable-thinking" || + normalized === "request-body" + ) { + return "top-level"; + } + return undefined; +} + +function resolveVllmQwenThinkingFormat( + extraParams: ProviderWrapStreamFnContext["extraParams"], +): VllmQwenThinkingFormat | undefined { + return normalizeQwenThinkingFormat( + extraParams?.qwenThinkingFormat ?? extraParams?.qwen_thinking_format, + ); +} + +function resolveOpenAICompatibleThinkingEnabled(params: { + thinkingLevel: VllmThinkingLevel; + options: Parameters[2]; +}): boolean { + const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown }; + const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high"; + if (typeof raw !== "string") { + return true; + } + const normalized = raw.trim().toLowerCase(); + return normalized !== "off" && normalized !== "none"; +} + +function setQwenChatTemplateThinking(payload: Record, enabled: boolean): void { + const existing = payload.chat_template_kwargs; + if (existing && typeof existing === "object" && !Array.isArray(existing)) { + const next: Record = { + ...(existing as Record), + enable_thinking: enabled, + }; + if (!Object.hasOwn(next, "preserve_thinking")) { + next.preserve_thinking = true; + } + payload.chat_template_kwargs = next; + return; + } + payload.chat_template_kwargs = { + enable_thinking: enabled, + preserve_thinking: true, + }; +} + +export function createVllmQwenThinkingWrapper(params: { + baseStreamFn: StreamFn | undefined; + format: VllmQwenThinkingFormat; + thinkingLevel: VllmThinkingLevel; +}): StreamFn { + const underlying = params.baseStreamFn ?? streamSimple; + return (model, context, options) => { + if (model.api !== "openai-completions" || !model.reasoning) { + return underlying(model, context, options); + } + const enableThinking = resolveOpenAICompatibleThinkingEnabled({ + thinkingLevel: params.thinkingLevel, + options, + }); + return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { + if (params.format === "chat-template") { + setQwenChatTemplateThinking(payloadObj, enableThinking); + } else { + payloadObj.enable_thinking = enableThinking; + } + delete payloadObj.reasoning_effort; + delete payloadObj.reasoningEffort; + delete payloadObj.reasoning; + }); + }; +} + +export function wrapVllmProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined { + if (!isVllmProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) { + return undefined; + } + const format = resolveVllmQwenThinkingFormat(ctx.extraParams); + if (!format) { + return undefined; + } + return createVllmQwenThinkingWrapper({ + baseStreamFn: ctx.streamFn, + format, + thinkingLevel: ctx.thinkingLevel, + }); +} diff --git a/src/agents/openai-completions-compat.ts b/src/agents/openai-completions-compat.ts index b0619f5f25c..e81fcfa9c87 100644 --- a/src/agents/openai-completions-compat.ts +++ b/src/agents/openai-completions-compat.ts @@ -17,7 +17,7 @@ export type OpenAICompletionsCompatDefaults = { supportsReasoningEffort: boolean; supportsUsageInStreaming: boolean; maxTokensField: "max_completion_tokens" | "max_tokens"; - thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai" | "qwen" | "qwen-chat-template"; + thinkingFormat: "openai" | "openrouter" | "deepseek" | "zai"; visibleReasoningDetailTypes: string[]; supportsStrictMode: boolean; }; diff --git a/src/agents/openai-transport-stream.test.ts b/src/agents/openai-transport-stream.test.ts index bb44991b6fa..e961c131352 100644 --- a/src/agents/openai-transport-stream.test.ts +++ b/src/agents/openai-transport-stream.test.ts @@ -1816,78 +1816,6 @@ describe("openai transport stream", () => { expect(params.stream_options).toMatchObject({ include_usage: true }); }); - it("maps qwen-chat-template thinking compat to vLLM chat template kwargs", () => { - const baseModel = { - id: "Qwen/Qwen3-8B", - name: "Qwen3 8B", - api: "openai-completions", - provider: "vllm", - baseUrl: "http://127.0.0.1:8000/v1", - reasoning: true, - input: ["text"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 32768, - maxTokens: 8192, - compat: { thinkingFormat: "qwen-chat-template" }, - } as unknown as Model<"openai-completions">; - const context = { - systemPrompt: "system", - messages: [], - tools: [], - } as never; - - const disabled = buildOpenAICompletionsParams(baseModel, context, { - reasoning: "none", - } as never) as { - chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown }; - }; - const enabled = buildOpenAICompletionsParams(baseModel, context, { - reasoning: "medium", - } as never) as { - chat_template_kwargs?: { enable_thinking?: unknown; preserve_thinking?: unknown }; - }; - - expect(disabled.chat_template_kwargs).toEqual({ - enable_thinking: false, - preserve_thinking: true, - }); - expect(disabled).not.toHaveProperty("reasoning_effort"); - expect(enabled.chat_template_kwargs).toEqual({ - enable_thinking: true, - preserve_thinking: true, - }); - expect(enabled).not.toHaveProperty("reasoning_effort"); - }); - - it("maps qwen thinking compat to top-level enable_thinking", () => { - const params = buildOpenAICompletionsParams( - { - id: "qwen3.6-plus", - name: "Qwen 3.6 Plus", - api: "openai-completions", - provider: "qwen-custom", - baseUrl: "https://example.com/v1", - reasoning: true, - input: ["text"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 32768, - maxTokens: 8192, - compat: { thinkingFormat: "qwen" }, - } as unknown as Model<"openai-completions">, - { - systemPrompt: "system", - messages: [], - tools: [], - } as never, - { - reasoning: "none", - } as never, - ) as { enable_thinking?: unknown; reasoning_effort?: unknown }; - - expect(params.enable_thinking).toBe(false); - expect(params).not.toHaveProperty("reasoning_effort"); - }); - it("enables streaming usage compat for generic providers on native DashScope endpoints", () => { const params = buildOpenAICompletionsParams( { diff --git a/src/agents/openai-transport-stream.ts b/src/agents/openai-transport-stream.ts index 6238ff7699b..7ae8ee6f453 100644 --- a/src/agents/openai-transport-stream.ts +++ b/src/agents/openai-transport-stream.ts @@ -87,8 +87,12 @@ type OpenAICompletionsOptions = BaseStreamOptions & { reasoningEffort?: OpenAIReasoningEffort; }; +type OpenAIModeCompatInput = Omit & { + thinkingFormat?: string; +}; + type OpenAIModeModel = Omit, "compat"> & { - compat?: ModelCompatConfig; + compat?: OpenAIModeCompatInput | null; }; type MutableAssistantOutput = { @@ -1592,7 +1596,7 @@ function getCompat(model: OpenAIModeModel): { requiresAssistantAfterToolResult: compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult, requiresThinkingAsText: compat.requiresThinkingAsText ?? detected.requiresThinkingAsText, - thinkingFormat: (compat.thinkingFormat as string | undefined) ?? detected.thinkingFormat, + thinkingFormat: compat.thinkingFormat ?? detected.thinkingFormat, openRouterRouting: (compat.openRouterRouting as Record | undefined) ?? {}, vercelGatewayRouting: (compat.vercelGatewayRouting as Record | undefined) ?? @@ -1631,29 +1635,6 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio return options?.reasoningEffort ?? options?.reasoning ?? "high"; } -function isCompletionsThinkingEnabled(effort: string): boolean { - return normalizeOpenAIReasoningEffort(effort) !== "none"; -} - -function setChatTemplateThinking(params: Record, enabled: boolean): void { - const existing = params.chat_template_kwargs; - if (existing && typeof existing === "object" && !Array.isArray(existing)) { - const next: Record = { - ...(existing as Record), - enable_thinking: enabled, - }; - if (!Object.hasOwn(next, "preserve_thinking")) { - next.preserve_thinking = true; - } - params.chat_template_kwargs = next; - return; - } - params.chat_template_kwargs = { - enable_thinking: enabled, - preserve_thinking: true, - }; -} - function convertTools( tools: NonNullable, compat: ReturnType, @@ -1837,15 +1818,7 @@ export function buildOpenAICompletionsParams( fallbackMap: compat.reasoningEffortMap, }) : undefined; - if (compat.thinkingFormat === "qwen" && model.reasoning && completionsReasoningEffort) { - params.enable_thinking = isCompletionsThinkingEnabled(completionsReasoningEffort); - } else if ( - compat.thinkingFormat === "qwen-chat-template" && - model.reasoning && - completionsReasoningEffort - ) { - setChatTemplateThinking(params, isCompletionsThinkingEnabled(completionsReasoningEffort)); - } else if ( + if ( compat.thinkingFormat === "openrouter" && model.reasoning && resolvedCompletionsReasoningEffort diff --git a/src/config/config-misc.test.ts b/src/config/config-misc.test.ts index f45be41d15d..217d9995435 100644 --- a/src/config/config-misc.test.ts +++ b/src/config/config-misc.test.ts @@ -642,7 +642,7 @@ describe("model compat config schema", () => { supportsUsageInStreaming: true, supportsStrictMode: false, requiresStringContent: true, - thinkingFormat: "qwen", + thinkingFormat: "zai", requiresToolResultName: true, requiresAssistantAfterToolResult: false, requiresThinkingAsText: false, diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index e63da5864dc..91b293642cc 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -3105,14 +3105,6 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { type: "string", const: "zai", }, - { - type: "string", - const: "qwen", - }, - { - type: "string", - const: "qwen-chat-template", - }, ], }, requiresToolResultName: { diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 30b3ae2f554..dcb4474dfcf 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -50,10 +50,9 @@ type SupportedAnthropicMessagesCompatFields = Pick< >; type SupportedThinkingFormat = - | NonNullable + | Exclude, "qwen" | "qwen-chat-template"> | "deepseek" - | "openrouter" - | "qwen-chat-template"; + | "openrouter"; export type ModelCompatConfig = SupportedOpenAICompatFields & SupportedOpenAIResponsesCompatFields & diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index a8ccaf80de6..5d6aa3631f4 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -204,8 +204,6 @@ export const ModelCompatSchema = z z.literal("openrouter"), z.literal("deepseek"), z.literal("zai"), - z.literal("qwen"), - z.literal("qwen-chat-template"), ]) .optional(), requiresToolResultName: z.boolean().optional(), diff --git a/src/model-catalog/normalize.ts b/src/model-catalog/normalize.ts index 325156de436..ff6b72c7489 100644 --- a/src/model-catalog/normalize.ts +++ b/src/model-catalog/normalize.ts @@ -212,9 +212,7 @@ function normalizeModelCatalogCompat(value: unknown): ModelCompatConfig | undefi thinkingFormat === "openai" || thinkingFormat === "openrouter" || thinkingFormat === "deepseek" || - thinkingFormat === "zai" || - thinkingFormat === "qwen" || - thinkingFormat === "qwen-chat-template" + thinkingFormat === "zai" ) { compat.thinkingFormat = thinkingFormat; }