fix(providers): support zai preserved thinking

This commit is contained in:
Peter Steinberger
2026-04-26 04:35:37 +01:00
parent 844d2bd515
commit b58223510c
5 changed files with 243 additions and 3 deletions

View File

@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo.
- TTS: strip model-emitted TTS directives from streamed block text before channel
delivery, including directives split across adjacent blocks, while preserving
the accumulated raw reply for final-mode synthesis. Fixes #38937.

View File

@@ -372,6 +372,7 @@ Time format in system prompt. Default: `auto` (OS preference).
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
- `embeddedHarness`: default low-level embedded agent runtime policy. Omitted runtime defaults to OpenClaw Pi. Use `runtime: "pi"` to force the built-in PI harness, `runtime: "auto"` to let registered plugin harnesses claim supported models, or a registered harness id such as `runtime: "codex"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
- Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
- `maxConcurrent`: max parallel agent runs across sessions (each session still serialized). Default: 4.

View File

@@ -132,6 +132,38 @@ GLM models are available as `zai/<model>` (example: `zai/glm-5`). The default bu
</Accordion>
<Accordion title="Thinking and preserved thinking">
Z.AI thinking follows OpenClaw's `/think` controls. With thinking off,
OpenClaw sends `thinking: { type: "disabled" }` to avoid responses that
spend the output budget on `reasoning_content` before visible text.
Preserved thinking is opt-in because Z.AI requires the full historical
`reasoning_content` to be replayed, which increases prompt tokens. Enable it
per model:
```json5
{
agents: {
defaults: {
models: {
"zai/glm-5.1": {
params: { preserveThinking: true },
},
},
},
},
}
```
When enabled and thinking is on, OpenClaw sends
`thinking: { type: "enabled", clear_thinking: false }` and replays prior
`reasoning_content` for the same OpenAI-compatible transcript.
Advanced users can still override the exact provider payload with
`params.extra_body.thinking`.
</Accordion>
<Accordion title="Image understanding">
The bundled Z.AI plugin registers image understanding.

View File

@@ -1,6 +1,7 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import type { Context, Model } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { buildOpenAICompletionsParams } from "../../src/agents/openai-transport-stream.js";
import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
import plugin from "./index.js";
@@ -198,6 +199,169 @@ describe("zai provider plugin", () => {
expect(capturedPayload).not.toHaveProperty("tool_stream");
});
it("maps thinking off to Z.AI thinking disabled", async () => {
const provider = await registerSingleProviderPlugin(plugin);
let capturedPayload: Record<string, unknown> | undefined;
const baseStreamFn: StreamFn = (model, _context, options) => {
const payload: Record<string, unknown> = {};
options?.onPayload?.(payload as never, model as never);
capturedPayload = payload;
return {} as ReturnType<StreamFn>;
};
const wrapped = provider.wrapStreamFn?.({
provider: "zai",
modelId: "glm-5.1",
extraParams: {},
thinkingLevel: "off",
streamFn: baseStreamFn,
} as never);
void wrapped?.(
{
api: "openai-completions",
provider: "zai",
id: "glm-5.1",
} as Model<"openai-completions">,
{ messages: [] } as Context,
{},
);
expect(capturedPayload).toMatchObject({
tool_stream: true,
thinking: { type: "disabled" },
});
});
it("enables Z.AI preserved thinking only when requested", async () => {
const provider = await registerSingleProviderPlugin(plugin);
let capturedPayload: Record<string, unknown> | undefined;
const baseStreamFn: StreamFn = (model, _context, options) => {
const payload: Record<string, unknown> = {};
options?.onPayload?.(payload as never, model as never);
capturedPayload = payload;
return {} as ReturnType<StreamFn>;
};
const wrappedWithoutPreserve = provider.wrapStreamFn?.({
provider: "zai",
modelId: "glm-5.1",
extraParams: {},
thinkingLevel: "low",
streamFn: baseStreamFn,
} as never);
void wrappedWithoutPreserve?.(
{
api: "openai-completions",
provider: "zai",
id: "glm-5.1",
} as Model<"openai-completions">,
{ messages: [] } as Context,
{},
);
expect(capturedPayload).toMatchObject({ tool_stream: true });
expect(capturedPayload).not.toHaveProperty("thinking");
const wrappedWithPreserve = provider.wrapStreamFn?.({
provider: "zai",
modelId: "glm-5.1",
extraParams: { preserveThinking: true },
thinkingLevel: "low",
streamFn: baseStreamFn,
} as never);
void wrappedWithPreserve?.(
{
api: "openai-completions",
provider: "zai",
id: "glm-5.1",
} as Model<"openai-completions">,
{ messages: [] } as Context,
{},
);
expect(capturedPayload).toMatchObject({
tool_stream: true,
thinking: { type: "enabled", clear_thinking: false },
});
});
it("preserves replayed reasoning_content for Z.AI preserved thinking", async () => {
const provider = await registerSingleProviderPlugin(plugin);
let capturedPayload: Record<string, unknown> | undefined;
const model = {
provider: "zai",
id: "glm-5.1",
name: "GLM 5.1",
api: "openai-completions",
baseUrl: "https://api.z.ai/api/paas/v4",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 131_072,
} as Model<"openai-completions">;
const context = {
messages: [
{ role: "user", content: "hi", timestamp: 1 },
{
role: "assistant",
api: "openai-completions",
provider: "zai",
model: "glm-5.1",
content: [
{
type: "thinking",
thinking: "prior reasoning",
thinkingSignature: "reasoning_content",
},
{ type: "text", text: "visible reply" },
],
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: 2,
},
{ role: "user", content: "continue", timestamp: 3 },
],
} as Context;
const baseStreamFn: StreamFn = (streamModel, streamContext, options) => {
const payload = buildOpenAICompletionsParams(streamModel as never, streamContext, {
reasoning: "high",
} as never);
options?.onPayload?.(payload as never, streamModel as never);
capturedPayload = payload;
return {} as ReturnType<StreamFn>;
};
const wrapped = provider.wrapStreamFn?.({
provider: "zai",
modelId: "glm-5.1",
extraParams: { preserve_thinking: true },
thinkingLevel: "low",
streamFn: baseStreamFn,
} as never);
void wrapped?.(model, context, {});
expect(capturedPayload).toMatchObject({
thinking: { type: "enabled", clear_thinking: false },
});
expect((capturedPayload?.messages as Array<Record<string, unknown>>)[1]).toMatchObject({
role: "assistant",
content: "visible reply",
reasoning_content: "prior reasoning",
});
});
it("defaults tool_stream extra params but preserves explicit values", async () => {
const provider = await registerSingleProviderPlugin(plugin);

View File

@@ -5,6 +5,7 @@ import {
type ProviderAuthMethodNonInteractiveContext,
type ProviderResolveDynamicModelContext,
type ProviderRuntimeModel,
type ProviderWrapStreamFnContext,
} from "openclaw/plugin-sdk/plugin-entry";
import {
applyAuthProfileConfig,
@@ -20,8 +21,11 @@ import {
normalizeModelCompat,
OPENAI_COMPATIBLE_REPLAY_HOOKS,
} from "openclaw/plugin-sdk/provider-model-shared";
import { TOOL_STREAM_DEFAULT_ON_HOOKS } from "openclaw/plugin-sdk/provider-stream-family";
import { defaultToolStreamExtraParams } from "openclaw/plugin-sdk/provider-stream-shared";
import {
createPayloadPatchStreamWrapper,
createToolStreamWrapper,
defaultToolStreamExtraParams,
} from "openclaw/plugin-sdk/provider-stream-shared";
import { fetchZaiUsage, resolveLegacyPiAgentAccessToken } from "openclaw/plugin-sdk/provider-usage";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
import { detectZaiEndpoint, type ZaiEndpointId } from "./detect.js";
@@ -72,6 +76,44 @@ function resolveZaiDefaultModel(modelIdOverride?: string): string {
return modelIdOverride ? `zai/${modelIdOverride}` : ZAI_DEFAULT_MODEL_REF;
}
function isTrueParam(value: unknown): boolean {
return value === true;
}
function shouldPreserveZaiThinking(extraParams?: Record<string, unknown>): boolean {
return isTrueParam(extraParams?.preserveThinking) || isTrueParam(extraParams?.preserve_thinking);
}
function isDisabledThinkingLevel(thinkingLevel: ProviderWrapStreamFnContext["thinkingLevel"]) {
return thinkingLevel === "off";
}
function wrapZaiStreamFn(ctx: ProviderWrapStreamFnContext) {
let streamFn = createToolStreamWrapper(ctx.streamFn, ctx.extraParams?.tool_stream !== false);
const preserveThinking = shouldPreserveZaiThinking(ctx.extraParams);
if (!isDisabledThinkingLevel(ctx.thinkingLevel) && !preserveThinking) {
return streamFn;
}
streamFn = createPayloadPatchStreamWrapper(streamFn, ({ payload, model }) => {
if (model.api !== "openai-completions" || model.provider !== PROVIDER_ID) {
return;
}
if (isDisabledThinkingLevel(ctx.thinkingLevel)) {
payload.thinking = { type: "disabled" };
return;
}
if (preserveThinking) {
payload.thinking = { type: "enabled", clear_thinking: false };
}
});
return streamFn;
}
async function promptForZaiEndpoint(ctx: ProviderAuthContext): Promise<ZaiEndpointId> {
return await ctx.prompter.select<ZaiEndpointId>({
message: "Select Z.AI endpoint",
@@ -279,7 +321,7 @@ export default definePluginEntry({
resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
...OPENAI_COMPATIBLE_REPLAY_HOOKS,
prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
...TOOL_STREAM_DEFAULT_ON_HOOKS,
wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
resolveThinkingProfile: () => ({
levels: [
{ id: "off", label: "off" },