fix(providers): support zai preserved thinking

2026-05-06 05:10:44 +00:00 · 2026-04-26 04:35:37 +01:00
parent 844d2bd515
commit b58223510c
5 changed files with 243 additions and 3 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo.
 - TTS: strip model-emitted TTS directives from streamed block text before channel
  delivery, including directives split across adjacent blocks, while preserving
  the accumulated raw reply for final-mode synthesis. Fixes #38937.
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -372,6 +372,7 @@ Time format in system prompt. Default: `auto` (OS preference).
 - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
 - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
 - `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
+- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
 - `embeddedHarness`: default low-level embedded agent runtime policy. Omitted runtime defaults to OpenClaw Pi. Use `runtime: "pi"` to force the built-in PI harness, `runtime: "auto"` to let registered plugin harnesses claim supported models, or a registered harness id such as `runtime: "codex"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
 - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
 - `maxConcurrent`: max parallel agent runs across sessions (each session still serialized). Default: 4.
--- a/docs/providers/zai.md
+++ b/docs/providers/zai.md
@@ -132,6 +132,38 @@ GLM models are available as `zai/<model>` (example: `zai/glm-5`). The default bu

  </Accordion>

+  <Accordion title="Thinking and preserved thinking">
+    Z.AI thinking follows OpenClaw's `/think` controls. With thinking off,
+    OpenClaw sends `thinking: { type: "disabled" }` to avoid responses that
+    spend the output budget on `reasoning_content` before visible text.
+
+    Preserved thinking is opt-in because Z.AI requires the full historical
+    `reasoning_content` to be replayed, which increases prompt tokens. Enable it
+    per model:
+
+    ```json5
+    {
+      agents: {
+        defaults: {
+          models: {
+            "zai/glm-5.1": {
+              params: { preserveThinking: true },
+            },
+          },
+        },
+      },
+    }
+    ```
+
+    When enabled and thinking is on, OpenClaw sends
+    `thinking: { type: "enabled", clear_thinking: false }` and replays prior
+    `reasoning_content` for the same OpenAI-compatible transcript.
+
+    Advanced users can still override the exact provider payload with
+    `params.extra_body.thinking`.
+
+  </Accordion>
+
  <Accordion title="Image understanding">
    The bundled Z.AI plugin registers image understanding.

--- a/extensions/zai/index.test.ts
+++ b/extensions/zai/index.test.ts
@@ -1,6 +1,7 @@
 import type { StreamFn } from "@mariozechner/pi-agent-core";
 import type { Context, Model } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
+import { buildOpenAICompletionsParams } from "../../src/agents/openai-transport-stream.js";
 import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
 import plugin from "./index.js";

@@ -198,6 +199,169 @@ describe("zai provider plugin", () => {
    expect(capturedPayload).not.toHaveProperty("tool_stream");
  });

+  it("maps thinking off to Z.AI thinking disabled", async () => {
+    const provider = await registerSingleProviderPlugin(plugin);
+    let capturedPayload: Record<string, unknown> | undefined;
+    const baseStreamFn: StreamFn = (model, _context, options) => {
+      const payload: Record<string, unknown> = {};
+      options?.onPayload?.(payload as never, model as never);
+      capturedPayload = payload;
+      return {} as ReturnType<StreamFn>;
+    };
+
+    const wrapped = provider.wrapStreamFn?.({
+      provider: "zai",
+      modelId: "glm-5.1",
+      extraParams: {},
+      thinkingLevel: "off",
+      streamFn: baseStreamFn,
+    } as never);
+
+    void wrapped?.(
+      {
+        api: "openai-completions",
+        provider: "zai",
+        id: "glm-5.1",
+      } as Model<"openai-completions">,
+      { messages: [] } as Context,
+      {},
+    );
+
+    expect(capturedPayload).toMatchObject({
+      tool_stream: true,
+      thinking: { type: "disabled" },
+    });
+  });
+
+  it("enables Z.AI preserved thinking only when requested", async () => {
+    const provider = await registerSingleProviderPlugin(plugin);
+    let capturedPayload: Record<string, unknown> | undefined;
+    const baseStreamFn: StreamFn = (model, _context, options) => {
+      const payload: Record<string, unknown> = {};
+      options?.onPayload?.(payload as never, model as never);
+      capturedPayload = payload;
+      return {} as ReturnType<StreamFn>;
+    };
+
+    const wrappedWithoutPreserve = provider.wrapStreamFn?.({
+      provider: "zai",
+      modelId: "glm-5.1",
+      extraParams: {},
+      thinkingLevel: "low",
+      streamFn: baseStreamFn,
+    } as never);
+
+    void wrappedWithoutPreserve?.(
+      {
+        api: "openai-completions",
+        provider: "zai",
+        id: "glm-5.1",
+      } as Model<"openai-completions">,
+      { messages: [] } as Context,
+      {},
+    );
+
+    expect(capturedPayload).toMatchObject({ tool_stream: true });
+    expect(capturedPayload).not.toHaveProperty("thinking");
+
+    const wrappedWithPreserve = provider.wrapStreamFn?.({
+      provider: "zai",
+      modelId: "glm-5.1",
+      extraParams: { preserveThinking: true },
+      thinkingLevel: "low",
+      streamFn: baseStreamFn,
+    } as never);
+
+    void wrappedWithPreserve?.(
+      {
+        api: "openai-completions",
+        provider: "zai",
+        id: "glm-5.1",
+      } as Model<"openai-completions">,
+      { messages: [] } as Context,
+      {},
+    );
+
+    expect(capturedPayload).toMatchObject({
+      tool_stream: true,
+      thinking: { type: "enabled", clear_thinking: false },
+    });
+  });
+
+  it("preserves replayed reasoning_content for Z.AI preserved thinking", async () => {
+    const provider = await registerSingleProviderPlugin(plugin);
+    let capturedPayload: Record<string, unknown> | undefined;
+    const model = {
+      provider: "zai",
+      id: "glm-5.1",
+      name: "GLM 5.1",
+      api: "openai-completions",
+      baseUrl: "https://api.z.ai/api/paas/v4",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+      contextWindow: 200_000,
+      maxTokens: 131_072,
+    } as Model<"openai-completions">;
+    const context = {
+      messages: [
+        { role: "user", content: "hi", timestamp: 1 },
+        {
+          role: "assistant",
+          api: "openai-completions",
+          provider: "zai",
+          model: "glm-5.1",
+          content: [
+            {
+              type: "thinking",
+              thinking: "prior reasoning",
+              thinkingSignature: "reasoning_content",
+            },
+            { type: "text", text: "visible reply" },
+          ],
+          usage: {
+            input: 0,
+            output: 0,
+            cacheRead: 0,
+            cacheWrite: 0,
+            totalTokens: 0,
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+          },
+          stopReason: "stop",
+          timestamp: 2,
+        },
+        { role: "user", content: "continue", timestamp: 3 },
+      ],
+    } as Context;
+    const baseStreamFn: StreamFn = (streamModel, streamContext, options) => {
+      const payload = buildOpenAICompletionsParams(streamModel as never, streamContext, {
+        reasoning: "high",
+      } as never);
+      options?.onPayload?.(payload as never, streamModel as never);
+      capturedPayload = payload;
+      return {} as ReturnType<StreamFn>;
+    };
+
+    const wrapped = provider.wrapStreamFn?.({
+      provider: "zai",
+      modelId: "glm-5.1",
+      extraParams: { preserve_thinking: true },
+      thinkingLevel: "low",
+      streamFn: baseStreamFn,
+    } as never);
+
+    void wrapped?.(model, context, {});
+
+    expect(capturedPayload).toMatchObject({
+      thinking: { type: "enabled", clear_thinking: false },
+    });
+    expect((capturedPayload?.messages as Array<Record<string, unknown>>)[1]).toMatchObject({
+      role: "assistant",
+      content: "visible reply",
+      reasoning_content: "prior reasoning",
+    });
+  });
+
  it("defaults tool_stream extra params but preserves explicit values", async () => {
    const provider = await registerSingleProviderPlugin(plugin);

--- a/extensions/zai/index.ts
+++ b/extensions/zai/index.ts
@@ -5,6 +5,7 @@ import {
  type ProviderAuthMethodNonInteractiveContext,
  type ProviderResolveDynamicModelContext,
  type ProviderRuntimeModel,
+  type ProviderWrapStreamFnContext,
 } from "openclaw/plugin-sdk/plugin-entry";
 import {
  applyAuthProfileConfig,
@@ -20,8 +21,11 @@ import {
  normalizeModelCompat,
  OPENAI_COMPATIBLE_REPLAY_HOOKS,
 } from "openclaw/plugin-sdk/provider-model-shared";
-import { TOOL_STREAM_DEFAULT_ON_HOOKS } from "openclaw/plugin-sdk/provider-stream-family";
-import { defaultToolStreamExtraParams } from "openclaw/plugin-sdk/provider-stream-shared";
+import {
+  createPayloadPatchStreamWrapper,
+  createToolStreamWrapper,
+  defaultToolStreamExtraParams,
+} from "openclaw/plugin-sdk/provider-stream-shared";
 import { fetchZaiUsage, resolveLegacyPiAgentAccessToken } from "openclaw/plugin-sdk/provider-usage";
 import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
 import { detectZaiEndpoint, type ZaiEndpointId } from "./detect.js";
@@ -72,6 +76,44 @@ function resolveZaiDefaultModel(modelIdOverride?: string): string {
  return modelIdOverride ? `zai/${modelIdOverride}` : ZAI_DEFAULT_MODEL_REF;
 }

+function isTrueParam(value: unknown): boolean {
+  return value === true;
+}
+
+function shouldPreserveZaiThinking(extraParams?: Record<string, unknown>): boolean {
+  return isTrueParam(extraParams?.preserveThinking) || isTrueParam(extraParams?.preserve_thinking);
+}
+
+function isDisabledThinkingLevel(thinkingLevel: ProviderWrapStreamFnContext["thinkingLevel"]) {
+  return thinkingLevel === "off";
+}
+
+function wrapZaiStreamFn(ctx: ProviderWrapStreamFnContext) {
+  let streamFn = createToolStreamWrapper(ctx.streamFn, ctx.extraParams?.tool_stream !== false);
+  const preserveThinking = shouldPreserveZaiThinking(ctx.extraParams);
+
+  if (!isDisabledThinkingLevel(ctx.thinkingLevel) && !preserveThinking) {
+    return streamFn;
+  }
+
+  streamFn = createPayloadPatchStreamWrapper(streamFn, ({ payload, model }) => {
+    if (model.api !== "openai-completions" || model.provider !== PROVIDER_ID) {
+      return;
+    }
+
+    if (isDisabledThinkingLevel(ctx.thinkingLevel)) {
+      payload.thinking = { type: "disabled" };
+      return;
+    }
+
+    if (preserveThinking) {
+      payload.thinking = { type: "enabled", clear_thinking: false };
+    }
+  });
+
+  return streamFn;
+}
+
 async function promptForZaiEndpoint(ctx: ProviderAuthContext): Promise<ZaiEndpointId> {
  return await ctx.prompter.select<ZaiEndpointId>({
    message: "Select Z.AI endpoint",
@@ -279,7 +321,7 @@ export default definePluginEntry({
      resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
      ...OPENAI_COMPATIBLE_REPLAY_HOOKS,
      prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
-      ...TOOL_STREAM_DEFAULT_ON_HOOKS,
+      wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
      resolveThinkingProfile: () => ({
        levels: [
          { id: "off", label: "off" },