mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 10:30:43 +00:00
feat(config): support Qwen thinkingFormat values (#79777)
## Summary
- allow configured OpenAI-compatible Qwen models to opt into `qwen` and `qwen-chat-template` thinking payloads
- preserve those compat values through schema validation and catalog normalization
- map OpenClaw `/think` levels to `enable_thinking` or `chat_template_kwargs.enable_thinking` without also sending `reasoning_effort`
- clarify docs that request-level chat-template kwargs require compatible backends such as vLLM
## Verification
- git diff --check
- pnpm exec oxfmt --check --threads=1 CHANGELOG.md docs/gateway/config-agents.md docs/gateway/config-tools.md src/config/zod-schema.core.ts src/config/types.models.ts src/model-catalog/normalize.ts src/agents/openai-transport-stream.ts src/config/config-misc.test.ts src/model-catalog/normalize.test.ts src/agents/openai-transport-stream.test.ts
- pnpm config:schema:check
- pnpm test src/config/config-misc.test.ts src/model-catalog/normalize.test.ts src/agents/openai-transport-stream.test.ts
- GitHub CI on 2404edca39
Thanks @indulgeback.
This commit is contained in:
@@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Codex app-server: pin the managed Codex harness and Codex CLI smoke package to `@openai/codex@0.129.0`, defer OpenClaw integration dynamic tools behind Codex tool search by default, and accept current Codex service-tier values so legacy `fast` settings survive the stable harness upgrade as `priority`.
|
||||
- Codex app-server: annotate message-tool-only direct chat turns in the dynamic `message` tool spec so visible replies are sent through `message(action="send")` instead of staying private. (#79704)
|
||||
- Agents/PI: route explicit OpenAI Codex Responses runs through PI's native WebSocket-capable transport and remove OpenClaw's custom OpenAI Responses WebSocket stack while preserving auth injection, run abort signals, and prompt cache boundary stripping.
|
||||
- Models/config: allow `compat.thinkingFormat` values `qwen` and `qwen-chat-template` for configured OpenAI-compatible Qwen models, preserving them through catalog normalization and mapping `/think` levels to `enable_thinking` or `chat_template_kwargs.enable_thinking`. Fixes #79677. (#79777) Thanks @indulgeback.
|
||||
- Codex app-server: default implicit local stdio app-server permissions to guardian when Codex system requirements disallow the YOLO approval, reviewer, or sandbox value, including hostname-scoped remote sandbox entries, avoiding turn-start failures on managed hosts that permit only reviewed approval or narrower sandboxes.
|
||||
- Plugins/install: run managed npm-root install, uninstall, prune, and repair commands from the managed root without a redundant `--prefix .`, avoiding npm 10.9.3 Arborist crashes on native Windows WhatsApp plugin installs. Fixes #78514. (#78902) Thanks @melihselamett-stack.
|
||||
- Discord/voice: stream ElevenLabs TTS directly into Discord playback and send ElevenLabs latency optimization as the documented query parameter so spoken replies can start sooner.
|
||||
|
||||
@@ -393,6 +393,7 @@ Time format in system prompt. Default: `auto` (OS preference).
|
||||
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
|
||||
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
|
||||
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry.
|
||||
- `compat.thinkingFormat`: OpenAI-compatible thinking payload style. Use `"qwen"` for Qwen-style top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family backends that support request-level chat-template kwargs, such as vLLM. OpenClaw maps disabled thinking to `false` and enabled thinking to `true`.
|
||||
- `compat.supportedReasoningEfforts`: per-model OpenAI-compatible reasoning effort list. Include `"xhigh"` for custom endpoints that truly accept it; OpenClaw then exposes `/think xhigh` in command menus, Gateway session rows, session patch validation, agent CLI validation, and `llm-task` validation for that configured provider/model. Use `compat.reasoningEffortMap` when the backend wants a provider-specific value for a canonical level.
|
||||
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
|
||||
- Runtime policy belongs on providers or models, not on `agents.defaults`. Use `models.providers.<provider>.agentRuntime` for provider-wide rules or `agents.defaults.models["provider/model"].agentRuntime` / `agents.list[].models["provider/model"].agentRuntime` for model-specific rules. OpenAI agent models on the official OpenAI provider select Codex by default.
|
||||
|
||||
@@ -474,6 +474,7 @@ OpenClaw uses the built-in model catalog. Add custom providers via `models.provi
|
||||
- `models.providers.*.models.*.contextTokens`: optional runtime context cap. This overrides provider-level `contextTokens`; use it when you want a smaller effective context budget than the model's native `contextWindow`; `openclaw models list` shows both values when they differ.
|
||||
- `models.providers.*.models.*.compat.supportsDeveloperRole`: optional compatibility hint. For `api: "openai-completions"` with a non-empty non-native `baseUrl` (host not `api.openai.com`), OpenClaw forces this to `false` at runtime. Empty/omitted `baseUrl` keeps default OpenAI behavior.
|
||||
- `models.providers.*.models.*.compat.requiresStringContent`: optional compatibility hint for string-only OpenAI-compatible chat endpoints. When `true`, OpenClaw flattens pure text `messages[].content` arrays into plain strings before sending the request.
|
||||
- `models.providers.*.models.*.compat.thinkingFormat`: optional thinking payload hint. Use `"qwen"` for top-level `enable_thinking`, or `"qwen-chat-template"` for `chat_template_kwargs.enable_thinking` on Qwen-family OpenAI-compatible servers that support request-level chat-template kwargs, such as vLLM.
|
||||
|
||||
</Accordion>
|
||||
<Accordion title="Amazon Bedrock discovery">
|
||||
|
||||
@@ -2597,6 +2597,72 @@ describe("openai transport stream", () => {
|
||||
expect(disabled.reasoning_effort).toBe("none");
|
||||
});
|
||||
|
||||
it("maps qwen thinking format to top-level enable_thinking", () => {
|
||||
const baseModel = {
|
||||
id: "qwen3.5-32b",
|
||||
name: "Qwen 3.5 32B",
|
||||
api: "openai-completions",
|
||||
provider: "llama-cpp",
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
compat: {
|
||||
thinkingFormat: "qwen",
|
||||
},
|
||||
} as unknown as Model<"openai-completions">;
|
||||
const context = {
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never;
|
||||
|
||||
const enabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "medium",
|
||||
} as never) as { enable_thinking?: unknown; reasoning_effort?: unknown };
|
||||
const disabled = buildOpenAICompletionsParams(baseModel, context, {
|
||||
reasoning: "off",
|
||||
} as never) as { enable_thinking?: unknown; reasoning_effort?: unknown };
|
||||
|
||||
expect(enabled.enable_thinking).toBe(true);
|
||||
expect(disabled.enable_thinking).toBe(false);
|
||||
expect(enabled).not.toHaveProperty("reasoning_effort");
|
||||
expect(disabled).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("maps qwen-chat-template thinking format to chat_template_kwargs", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
id: "qwen3.5-32b",
|
||||
name: "Qwen 3.5 32B",
|
||||
api: "openai-completions",
|
||||
provider: "llama-cpp",
|
||||
baseUrl: "http://127.0.0.1:8080/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
compat: {
|
||||
thinkingFormat: "qwen-chat-template",
|
||||
},
|
||||
} as unknown as Model<"openai-completions">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
{
|
||||
reasoning: "off",
|
||||
} as never,
|
||||
) as { chat_template_kwargs?: Record<string, unknown>; reasoning_effort?: unknown };
|
||||
|
||||
expect(params.chat_template_kwargs).toEqual({ enable_thinking: false });
|
||||
expect(params).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("omits unsupported disabled reasoning for completions providers", () => {
|
||||
const params = buildOpenAICompletionsParams(
|
||||
{
|
||||
|
||||
@@ -1831,6 +1831,44 @@ function resolveOpenAICompletionsReasoningEffort(options: OpenAICompletionsOptio
|
||||
return options?.reasoningEffort ?? options?.reasoning ?? "high";
|
||||
}
|
||||
|
||||
function isQwenOpenAICompletionsThinkingFormat(format: string): boolean {
|
||||
return format === "qwen" || format === "qwen-chat-template";
|
||||
}
|
||||
|
||||
function isOpenAICompletionsThinkingEnabled(effort: OpenAIReasoningEffort): boolean {
|
||||
const normalized = effort.trim().toLowerCase();
|
||||
return normalized !== "off" && normalized !== "none";
|
||||
}
|
||||
|
||||
function setQwenChatTemplateThinking(params: Record<string, unknown>, enabled: boolean): void {
|
||||
const existing = params.chat_template_kwargs;
|
||||
params.chat_template_kwargs =
|
||||
existing && typeof existing === "object" && !Array.isArray(existing)
|
||||
? { ...(existing as Record<string, unknown>), enable_thinking: enabled }
|
||||
: { enable_thinking: enabled };
|
||||
}
|
||||
|
||||
function applyQwenOpenAICompletionsThinkingParams(params: {
|
||||
compatThinkingFormat: string;
|
||||
modelReasoning: boolean;
|
||||
payload: Record<string, unknown>;
|
||||
requestedEffort: OpenAIReasoningEffort;
|
||||
}): boolean {
|
||||
if (
|
||||
!params.modelReasoning ||
|
||||
!isQwenOpenAICompletionsThinkingFormat(params.compatThinkingFormat)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
const enabled = isOpenAICompletionsThinkingEnabled(params.requestedEffort);
|
||||
if (params.compatThinkingFormat === "qwen-chat-template") {
|
||||
setQwenChatTemplateThinking(params.payload, enabled);
|
||||
} else {
|
||||
params.payload.enable_thinking = enabled;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function convertTools(
|
||||
tools: NonNullable<Context["tools"]>,
|
||||
compat: ReturnType<typeof getCompat>,
|
||||
@@ -2030,6 +2068,12 @@ export function buildOpenAICompletionsParams(
|
||||
: undefined;
|
||||
const omitGpt54MiniToolReasoningEffort =
|
||||
isOpenAIGpt54MiniModel(model) && Array.isArray(params.tools) && params.tools.length > 0;
|
||||
const handledQwenThinkingFormat = applyQwenOpenAICompletionsThinkingParams({
|
||||
compatThinkingFormat: compat.thinkingFormat,
|
||||
modelReasoning: model.reasoning,
|
||||
payload: params,
|
||||
requestedEffort: completionsReasoningEffort,
|
||||
});
|
||||
if (
|
||||
compat.thinkingFormat === "openrouter" &&
|
||||
model.reasoning &&
|
||||
@@ -2042,6 +2086,7 @@ export function buildOpenAICompletionsParams(
|
||||
resolvedCompletionsReasoningEffort &&
|
||||
model.reasoning &&
|
||||
compat.supportsReasoningEffort &&
|
||||
!handledQwenThinkingFormat &&
|
||||
!omitGpt54MiniToolReasoningEffort
|
||||
) {
|
||||
params.reasoning_effort = resolvedCompletionsReasoningEffort;
|
||||
|
||||
@@ -888,37 +888,40 @@ describe("broadcast", () => {
|
||||
});
|
||||
|
||||
describe("model compat config schema", () => {
|
||||
it("accepts full openai-completions compat fields", () => {
|
||||
const res = OpenClawSchema.safeParse({
|
||||
models: {
|
||||
providers: {
|
||||
local: {
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "qwen3-32b",
|
||||
name: "Qwen3 32B",
|
||||
compat: {
|
||||
supportsUsageInStreaming: true,
|
||||
supportsStrictMode: false,
|
||||
requiresStringContent: true,
|
||||
thinkingFormat: "zai",
|
||||
requiresToolResultName: true,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
requiresThinkingAsText: false,
|
||||
requiresMistralToolIds: false,
|
||||
requiresOpenAiAnthropicToolPayload: true,
|
||||
it.each(["zai", "qwen", "qwen-chat-template"] as const)(
|
||||
"accepts full openai-completions compat fields with %s thinking format",
|
||||
(thinkingFormat) => {
|
||||
const res = OpenClawSchema.safeParse({
|
||||
models: {
|
||||
providers: {
|
||||
local: {
|
||||
baseUrl: "http://127.0.0.1:1234/v1",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "qwen3-32b",
|
||||
name: "Qwen3 32B",
|
||||
compat: {
|
||||
supportsUsageInStreaming: true,
|
||||
supportsStrictMode: false,
|
||||
requiresStringContent: true,
|
||||
thinkingFormat,
|
||||
requiresToolResultName: true,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
requiresThinkingAsText: false,
|
||||
requiresMistralToolIds: false,
|
||||
requiresOpenAiAnthropicToolPayload: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
expect(res.success).toBe(true);
|
||||
});
|
||||
expect(res.success).toBe(true);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe("config paths", () => {
|
||||
|
||||
@@ -51,7 +51,7 @@ type SupportedAnthropicMessagesCompatFields = Pick<
|
||||
>;
|
||||
|
||||
type SupportedThinkingFormat =
|
||||
| Exclude<NonNullable<OpenAICompletionsCompat["thinkingFormat"]>, "qwen" | "qwen-chat-template">
|
||||
| NonNullable<OpenAICompletionsCompat["thinkingFormat"]>
|
||||
| "deepseek"
|
||||
| "openrouter";
|
||||
|
||||
|
||||
@@ -205,6 +205,8 @@ const ModelCompatSchema = z
|
||||
z.literal("openai"),
|
||||
z.literal("openrouter"),
|
||||
z.literal("deepseek"),
|
||||
z.literal("qwen"),
|
||||
z.literal("qwen-chat-template"),
|
||||
z.literal("zai"),
|
||||
])
|
||||
.optional(),
|
||||
|
||||
@@ -53,6 +53,7 @@ describe("model catalog normalization", () => {
|
||||
compat: {
|
||||
supportsTools: true,
|
||||
supportsStore: "yes",
|
||||
thinkingFormat: "qwen-chat-template",
|
||||
unknownFlag: true,
|
||||
},
|
||||
status: "preview",
|
||||
@@ -137,6 +138,7 @@ describe("model catalog normalization", () => {
|
||||
},
|
||||
compat: {
|
||||
supportsTools: true,
|
||||
thinkingFormat: "qwen-chat-template",
|
||||
},
|
||||
status: "preview",
|
||||
statusReason: "rolling out",
|
||||
|
||||
@@ -223,6 +223,8 @@ function normalizeModelCatalogCompat(value: unknown): ModelCompatConfig | undefi
|
||||
thinkingFormat === "openai" ||
|
||||
thinkingFormat === "openrouter" ||
|
||||
thinkingFormat === "deepseek" ||
|
||||
thinkingFormat === "qwen" ||
|
||||
thinkingFormat === "qwen-chat-template" ||
|
||||
thinkingFormat === "zai"
|
||||
) {
|
||||
compat.thinkingFormat = thinkingFormat;
|
||||
|
||||
Reference in New Issue
Block a user