mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:10:44 +00:00
fix(providers): support zai preserved thinking
This commit is contained in:
@@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Providers/Z.AI: map OpenClaw thinking controls to Z.AI's `thinking` payload and add opt-in preserved thinking replay via `params.preserveThinking`, so GLM 5.x can keep prior `reasoning_content` when requested. Fixes #58680. Thanks @xuanmingguo.
|
||||
- TTS: strip model-emitted TTS directives from streamed block text before channel
|
||||
delivery, including directives split across adjacent blocks, while preserving
|
||||
the accumulated raw reply for final-mode synthesis. Fixes #38937.
|
||||
|
||||
@@ -372,6 +372,7 @@ Time format in system prompt. Default: `auto` (OS preference).
|
||||
- `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details.
|
||||
- `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward.
|
||||
- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override those defaults, and `extra_body.chat_template_kwargs` still has final precedence.
|
||||
- `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking).
|
||||
- `embeddedHarness`: default low-level embedded agent runtime policy. Omitted runtime defaults to OpenClaw Pi. Use `runtime: "pi"` to force the built-in PI harness, `runtime: "auto"` to let registered plugin harnesses claim supported models, or a registered harness id such as `runtime: "codex"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection.
|
||||
- Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible.
|
||||
- `maxConcurrent`: max parallel agent runs across sessions (each session still serialized). Default: 4.
|
||||
|
||||
@@ -132,6 +132,38 @@ GLM models are available as `zai/<model>` (example: `zai/glm-5`). The default bu
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Thinking and preserved thinking">
|
||||
Z.AI thinking follows OpenClaw's `/think` controls. With thinking off,
|
||||
OpenClaw sends `thinking: { type: "disabled" }` to avoid responses that
|
||||
spend the output budget on `reasoning_content` before visible text.
|
||||
|
||||
Preserved thinking is opt-in because Z.AI requires the full historical
|
||||
`reasoning_content` to be replayed, which increases prompt tokens. Enable it
|
||||
per model:
|
||||
|
||||
```json5
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
models: {
|
||||
"zai/glm-5.1": {
|
||||
params: { preserveThinking: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
When enabled and thinking is on, OpenClaw sends
|
||||
`thinking: { type: "enabled", clear_thinking: false }` and replays prior
|
||||
`reasoning_content` for the same OpenAI-compatible transcript.
|
||||
|
||||
Advanced users can still override the exact provider payload with
|
||||
`params.extra_body.thinking`.
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Image understanding">
|
||||
The bundled Z.AI plugin registers image understanding.
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import type { Context, Model } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildOpenAICompletionsParams } from "../../src/agents/openai-transport-stream.js";
|
||||
import { registerSingleProviderPlugin } from "../../test/helpers/plugins/plugin-registration.js";
|
||||
import plugin from "./index.js";
|
||||
|
||||
@@ -198,6 +199,169 @@ describe("zai provider plugin", () => {
|
||||
expect(capturedPayload).not.toHaveProperty("tool_stream");
|
||||
});
|
||||
|
||||
it("maps thinking off to Z.AI thinking disabled", async () => {
|
||||
const provider = await registerSingleProviderPlugin(plugin);
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {};
|
||||
options?.onPayload?.(payload as never, model as never);
|
||||
capturedPayload = payload;
|
||||
return {} as ReturnType<StreamFn>;
|
||||
};
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
extraParams: {},
|
||||
thinkingLevel: "off",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrapped?.(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
id: "glm-5.1",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(capturedPayload).toMatchObject({
|
||||
tool_stream: true,
|
||||
thinking: { type: "disabled" },
|
||||
});
|
||||
});
|
||||
|
||||
it("enables Z.AI preserved thinking only when requested", async () => {
|
||||
const provider = await registerSingleProviderPlugin(plugin);
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {};
|
||||
options?.onPayload?.(payload as never, model as never);
|
||||
capturedPayload = payload;
|
||||
return {} as ReturnType<StreamFn>;
|
||||
};
|
||||
|
||||
const wrappedWithoutPreserve = provider.wrapStreamFn?.({
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
extraParams: {},
|
||||
thinkingLevel: "low",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrappedWithoutPreserve?.(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
id: "glm-5.1",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(capturedPayload).toMatchObject({ tool_stream: true });
|
||||
expect(capturedPayload).not.toHaveProperty("thinking");
|
||||
|
||||
const wrappedWithPreserve = provider.wrapStreamFn?.({
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
extraParams: { preserveThinking: true },
|
||||
thinkingLevel: "low",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrappedWithPreserve?.(
|
||||
{
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
id: "glm-5.1",
|
||||
} as Model<"openai-completions">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(capturedPayload).toMatchObject({
|
||||
tool_stream: true,
|
||||
thinking: { type: "enabled", clear_thinking: false },
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves replayed reasoning_content for Z.AI preserved thinking", async () => {
|
||||
const provider = await registerSingleProviderPlugin(plugin);
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
const model = {
|
||||
provider: "zai",
|
||||
id: "glm-5.1",
|
||||
name: "GLM 5.1",
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://api.z.ai/api/paas/v4",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200_000,
|
||||
maxTokens: 131_072,
|
||||
} as Model<"openai-completions">;
|
||||
const context = {
|
||||
messages: [
|
||||
{ role: "user", content: "hi", timestamp: 1 },
|
||||
{
|
||||
role: "assistant",
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
model: "glm-5.1",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "prior reasoning",
|
||||
thinkingSignature: "reasoning_content",
|
||||
},
|
||||
{ type: "text", text: "visible reply" },
|
||||
],
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
timestamp: 2,
|
||||
},
|
||||
{ role: "user", content: "continue", timestamp: 3 },
|
||||
],
|
||||
} as Context;
|
||||
const baseStreamFn: StreamFn = (streamModel, streamContext, options) => {
|
||||
const payload = buildOpenAICompletionsParams(streamModel as never, streamContext, {
|
||||
reasoning: "high",
|
||||
} as never);
|
||||
options?.onPayload?.(payload as never, streamModel as never);
|
||||
capturedPayload = payload;
|
||||
return {} as ReturnType<StreamFn>;
|
||||
};
|
||||
|
||||
const wrapped = provider.wrapStreamFn?.({
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
extraParams: { preserve_thinking: true },
|
||||
thinkingLevel: "low",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrapped?.(model, context, {});
|
||||
|
||||
expect(capturedPayload).toMatchObject({
|
||||
thinking: { type: "enabled", clear_thinking: false },
|
||||
});
|
||||
expect((capturedPayload?.messages as Array<Record<string, unknown>>)[1]).toMatchObject({
|
||||
role: "assistant",
|
||||
content: "visible reply",
|
||||
reasoning_content: "prior reasoning",
|
||||
});
|
||||
});
|
||||
|
||||
it("defaults tool_stream extra params but preserves explicit values", async () => {
|
||||
const provider = await registerSingleProviderPlugin(plugin);
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
type ProviderAuthMethodNonInteractiveContext,
|
||||
type ProviderResolveDynamicModelContext,
|
||||
type ProviderRuntimeModel,
|
||||
type ProviderWrapStreamFnContext,
|
||||
} from "openclaw/plugin-sdk/plugin-entry";
|
||||
import {
|
||||
applyAuthProfileConfig,
|
||||
@@ -20,8 +21,11 @@ import {
|
||||
normalizeModelCompat,
|
||||
OPENAI_COMPATIBLE_REPLAY_HOOKS,
|
||||
} from "openclaw/plugin-sdk/provider-model-shared";
|
||||
import { TOOL_STREAM_DEFAULT_ON_HOOKS } from "openclaw/plugin-sdk/provider-stream-family";
|
||||
import { defaultToolStreamExtraParams } from "openclaw/plugin-sdk/provider-stream-shared";
|
||||
import {
|
||||
createPayloadPatchStreamWrapper,
|
||||
createToolStreamWrapper,
|
||||
defaultToolStreamExtraParams,
|
||||
} from "openclaw/plugin-sdk/provider-stream-shared";
|
||||
import { fetchZaiUsage, resolveLegacyPiAgentAccessToken } from "openclaw/plugin-sdk/provider-usage";
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
||||
import { detectZaiEndpoint, type ZaiEndpointId } from "./detect.js";
|
||||
@@ -72,6 +76,44 @@ function resolveZaiDefaultModel(modelIdOverride?: string): string {
|
||||
return modelIdOverride ? `zai/${modelIdOverride}` : ZAI_DEFAULT_MODEL_REF;
|
||||
}
|
||||
|
||||
function isTrueParam(value: unknown): boolean {
|
||||
return value === true;
|
||||
}
|
||||
|
||||
function shouldPreserveZaiThinking(extraParams?: Record<string, unknown>): boolean {
|
||||
return isTrueParam(extraParams?.preserveThinking) || isTrueParam(extraParams?.preserve_thinking);
|
||||
}
|
||||
|
||||
function isDisabledThinkingLevel(thinkingLevel: ProviderWrapStreamFnContext["thinkingLevel"]) {
|
||||
return thinkingLevel === "off";
|
||||
}
|
||||
|
||||
function wrapZaiStreamFn(ctx: ProviderWrapStreamFnContext) {
|
||||
let streamFn = createToolStreamWrapper(ctx.streamFn, ctx.extraParams?.tool_stream !== false);
|
||||
const preserveThinking = shouldPreserveZaiThinking(ctx.extraParams);
|
||||
|
||||
if (!isDisabledThinkingLevel(ctx.thinkingLevel) && !preserveThinking) {
|
||||
return streamFn;
|
||||
}
|
||||
|
||||
streamFn = createPayloadPatchStreamWrapper(streamFn, ({ payload, model }) => {
|
||||
if (model.api !== "openai-completions" || model.provider !== PROVIDER_ID) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isDisabledThinkingLevel(ctx.thinkingLevel)) {
|
||||
payload.thinking = { type: "disabled" };
|
||||
return;
|
||||
}
|
||||
|
||||
if (preserveThinking) {
|
||||
payload.thinking = { type: "enabled", clear_thinking: false };
|
||||
}
|
||||
});
|
||||
|
||||
return streamFn;
|
||||
}
|
||||
|
||||
async function promptForZaiEndpoint(ctx: ProviderAuthContext): Promise<ZaiEndpointId> {
|
||||
return await ctx.prompter.select<ZaiEndpointId>({
|
||||
message: "Select Z.AI endpoint",
|
||||
@@ -279,7 +321,7 @@ export default definePluginEntry({
|
||||
resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
|
||||
...OPENAI_COMPATIBLE_REPLAY_HOOKS,
|
||||
prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
|
||||
...TOOL_STREAM_DEFAULT_ON_HOOKS,
|
||||
wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
|
||||
resolveThinkingProfile: () => ({
|
||||
levels: [
|
||||
{ id: "off", label: "off" },
|
||||
|
||||
Reference in New Issue
Block a user