diff --git a/CHANGELOG.md b/CHANGELOG.md index 46225440d1b..15c4d7e14ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo. - TTS: strip model-emitted TTS directives from streamed block text before channel delivery, including directives split across adjacent blocks, while preserving the accumulated raw reply for final-mode synthesis. Fixes #38937. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index d5acd35f5c3..2586567c3dc 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -372,6 +372,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. - `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence. +- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - `embeddedHarness`: default low-level embedded agent runtime policy. Omitted runtime defaults to OpenClaw Pi. Use `runtime: "pi"` to force the built-in PI harness, `runtime: "auto"` to let registered plugin harnesses claim supported models, or a registered harness id such as `runtime: "codex"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection. - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible. - `maxConcurrent`: max parallel agent runs across sessions (each session still serialized). Default: 4. diff --git a/docs/providers/zai.md b/docs/providers/zai.md index 114965543f1..43f45d7e8df 100644 --- a/docs/providers/zai.md +++ b/docs/providers/zai.md @@ -132,6 +132,38 @@ GLM models are available as `zai/` (example: `zai/glm-5`). The default bu + + Z.AI thinking follows OpenClaw's `/think` controls. With thinking off, + OpenClaw sends `thinking: { type: "disabled" }` to avoid responses that + spend the output budget on `reasoning_content` before visible text. + + Preserved thinking is opt-in because Z.AI requires the full historical + `reasoning_content` to be replayed, which increases prompt tokens. Enable it + per model: + + ```json5 + { + agents: { + defaults: { + models: { + "zai/glm-5.1": { + params: { preserveThinking: true }, + }, + }, + }, + }, + } + ``` + + When enabled and thinking is on, OpenClaw sends + `thinking: { type: "enabled", clear_thinking: false }` and replays prior + `reasoning_content` for the same OpenAI-compatible transcript. + + Advanced users can still override the exact provider payload with + `params.extra_body.thinking`. + + + The bundled Z.AI plugin registers image understanding. diff --git a/extensions/zai/index.test.ts b/extensions/zai/index.test.ts index 5081c68ea74..cbf76c05ba8 100644 --- a/extensions/zai/index.test.ts +++ b/extensions/zai/index.test.ts @@ -1,6 +1,7 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { Context, Model } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; +import { buildOpenAICompletionsParams } from "../../src/agents/openai-transport-stream.js"; import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js"; import plugin from "./index.js"; @@ -198,6 +199,169 @@ describe("zai provider plugin", () => { expect(capturedPayload).not.toHaveProperty("tool_stream"); }); + it("maps thinking off to Z.AI thinking disabled", async () => { + const provider = await registerSingleProviderPlugin(plugin); + let capturedPayload: Record | undefined; + const baseStreamFn: StreamFn = (model, _context, options) => { + const payload: Record = {}; + options?.onPayload?.(payload as never, model as never); + capturedPayload = payload; + return {} as ReturnType; + }; + + const wrapped = provider.wrapStreamFn?.({ + provider: "zai", + modelId: "glm-5.1", + extraParams: {}, + thinkingLevel: "off", + streamFn: baseStreamFn, + } as never); + + void wrapped?.( + { + api: "openai-completions", + provider: "zai", + id: "glm-5.1", + } as Model<"openai-completions">, + { messages: [] } as Context, + {}, + ); + + expect(capturedPayload).toMatchObject({ + tool_stream: true, + thinking: { type: "disabled" }, + }); + }); + + it("enables Z.AI preserved thinking only when requested", async () => { + const provider = await registerSingleProviderPlugin(plugin); + let capturedPayload: Record | undefined; + const baseStreamFn: StreamFn = (model, _context, options) => { + const payload: Record = {}; + options?.onPayload?.(payload as never, model as never); + capturedPayload = payload; + return {} as ReturnType; + }; + + const wrappedWithoutPreserve = provider.wrapStreamFn?.({ + provider: "zai", + modelId: "glm-5.1", + extraParams: {}, + thinkingLevel: "low", + streamFn: baseStreamFn, + } as never); + + void wrappedWithoutPreserve?.( + { + api: "openai-completions", + provider: "zai", + id: "glm-5.1", + } as Model<"openai-completions">, + { messages: [] } as Context, + {}, + ); + + expect(capturedPayload).toMatchObject({ tool_stream: true }); + expect(capturedPayload).not.toHaveProperty("thinking"); + + const wrappedWithPreserve = provider.wrapStreamFn?.({ + provider: "zai", + modelId: "glm-5.1", + extraParams: { preserveThinking: true }, + thinkingLevel: "low", + streamFn: baseStreamFn, + } as never); + + void wrappedWithPreserve?.( + { + api: "openai-completions", + provider: "zai", + id: "glm-5.1", + } as Model<"openai-completions">, + { messages: [] } as Context, + {}, + ); + + expect(capturedPayload).toMatchObject({ + tool_stream: true, + thinking: { type: "enabled", clear_thinking: false }, + }); + }); + + it("preserves replayed reasoning_content for Z.AI preserved thinking", async () => { + const provider = await registerSingleProviderPlugin(plugin); + let capturedPayload: Record | undefined; + const model = { + provider: "zai", + id: "glm-5.1", + name: "GLM 5.1", + api: "openai-completions", + baseUrl: "https://api.z.ai/api/paas/v4", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 131_072, + } as Model<"openai-completions">; + const context = { + messages: [ + { role: "user", content: "hi", timestamp: 1 }, + { + role: "assistant", + api: "openai-completions", + provider: "zai", + model: "glm-5.1", + content: [ + { + type: "thinking", + thinking: "prior reasoning", + thinkingSignature: "reasoning_content", + }, + { type: "text", text: "visible reply" }, + ], + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: 2, + }, + { role: "user", content: "continue", timestamp: 3 }, + ], + } as Context; + const baseStreamFn: StreamFn = (streamModel, streamContext, options) => { + const payload = buildOpenAICompletionsParams(streamModel as never, streamContext, { + reasoning: "high", + } as never); + options?.onPayload?.(payload as never, streamModel as never); + capturedPayload = payload; + return {} as ReturnType; + }; + + const wrapped = provider.wrapStreamFn?.({ + provider: "zai", + modelId: "glm-5.1", + extraParams: { preserve_thinking: true }, + thinkingLevel: "low", + streamFn: baseStreamFn, + } as never); + + void wrapped?.(model, context, {}); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "enabled", clear_thinking: false }, + }); + expect((capturedPayload?.messages as Array>)[1]).toMatchObject({ + role: "assistant", + content: "visible reply", + reasoning_content: "prior reasoning", + }); + }); + it("defaults tool_stream extra params but preserves explicit values", async () => { const provider = await registerSingleProviderPlugin(plugin); diff --git a/extensions/zai/index.ts b/extensions/zai/index.ts index a60c905053c..210d5b3f0a1 100644 --- a/extensions/zai/index.ts +++ b/extensions/zai/index.ts @@ -5,6 +5,7 @@ import { type ProviderAuthMethodNonInteractiveContext, type ProviderResolveDynamicModelContext, type ProviderRuntimeModel, + type ProviderWrapStreamFnContext, } from "openclaw/plugin-sdk/plugin-entry"; import { applyAuthProfileConfig, @@ -20,8 +21,11 @@ import { normalizeModelCompat, OPENAI_COMPATIBLE_REPLAY_HOOKS, } from "openclaw/plugin-sdk/provider-model-shared"; -import { TOOL_STREAM_DEFAULT_ON_HOOKS } from "openclaw/plugin-sdk/provider-stream-family"; -import { defaultToolStreamExtraParams } from "openclaw/plugin-sdk/provider-stream-shared"; +import { + createPayloadPatchStreamWrapper, + createToolStreamWrapper, + defaultToolStreamExtraParams, +} from "openclaw/plugin-sdk/provider-stream-shared"; import { fetchZaiUsage, resolveLegacyPiAgentAccessToken } from "openclaw/plugin-sdk/provider-usage"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import { detectZaiEndpoint, type ZaiEndpointId } from "./detect.js"; @@ -72,6 +76,44 @@ function resolveZaiDefaultModel(modelIdOverride?: string): string { return modelIdOverride ? `zai/${modelIdOverride}` : ZAI_DEFAULT_MODEL_REF; } +function isTrueParam(value: unknown): boolean { + return value === true; +} + +function shouldPreserveZaiThinking(extraParams?: Record): boolean { + return isTrueParam(extraParams?.preserveThinking) || isTrueParam(extraParams?.preserve_thinking); +} + +function isDisabledThinkingLevel(thinkingLevel: ProviderWrapStreamFnContext["thinkingLevel"]) { + return thinkingLevel === "off"; +} + +function wrapZaiStreamFn(ctx: ProviderWrapStreamFnContext) { + let streamFn = createToolStreamWrapper(ctx.streamFn, ctx.extraParams?.tool_stream !== false); + const preserveThinking = shouldPreserveZaiThinking(ctx.extraParams); + + if (!isDisabledThinkingLevel(ctx.thinkingLevel) && !preserveThinking) { + return streamFn; + } + + streamFn = createPayloadPatchStreamWrapper(streamFn, ({ payload, model }) => { + if (model.api !== "openai-completions" || model.provider !== PROVIDER_ID) { + return; + } + + if (isDisabledThinkingLevel(ctx.thinkingLevel)) { + payload.thinking = { type: "disabled" }; + return; + } + + if (preserveThinking) { + payload.thinking = { type: "enabled", clear_thinking: false }; + } + }); + + return streamFn; +} + async function promptForZaiEndpoint(ctx: ProviderAuthContext): Promise { return await ctx.prompter.select({ message: "Select Z.AI endpoint", @@ -279,7 +321,7 @@ export default definePluginEntry({ resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx), ...OPENAI_COMPATIBLE_REPLAY_HOOKS, prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams), - ...TOOL_STREAM_DEFAULT_ON_HOOKS, + wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx), resolveThinkingProfile: () => ({ levels: [ { id: "off", label: "off" },