mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 21:00:44 +00:00
fix(deepseek): backfill v4 assistant reasoning replay
This commit is contained in:
@@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Gateway/startup: start chat channels without waiting for primary model prewarm, keeping model warmup bounded in the background so Slack and other channels come online promptly when provider discovery is slow. Supersedes #73420. Thanks @dorukardahan.
|
- Gateway/startup: start chat channels without waiting for primary model prewarm, keeping model warmup bounded in the background so Slack and other channels come online promptly when provider discovery is slow. Supersedes #73420. Thanks @dorukardahan.
|
||||||
- Gateway/install: carry env-backed config SecretRefs such as `channels.discord.token` into generated service environments when they are present only in the installing shell, while keeping gateway auth SecretRefs non-persisted. Fixes #67817; supersedes #73426. Thanks @wdimaculangan and @ztexydt-cqh.
|
- Gateway/install: carry env-backed config SecretRefs such as `channels.discord.token` into generated service environments when they are present only in the installing shell, while keeping gateway auth SecretRefs non-persisted. Fixes #67817; supersedes #73426. Thanks @wdimaculangan and @ztexydt-cqh.
|
||||||
- Auto-reply/commands: stop bare `/reset` and `/new` after reset hooks acknowledge the command, so non-ACP channels no longer fall through into empty provider calls while `/reset <message>` and `/new <message>` still seed the next model turn. Fixes #73367 and #73412. Thanks @hoyanhan, @wenxu007, and @amdhelper.
|
- Auto-reply/commands: stop bare `/reset` and `/new` after reset hooks acknowledge the command, so non-ACP channels no longer fall through into empty provider calls while `/reset <message>` and `/new <message>` still seed the next model turn. Fixes #73367 and #73412. Thanks @hoyanhan, @wenxu007, and @amdhelper.
|
||||||
|
- Providers/DeepSeek: backfill DeepSeek V4 `reasoning_content` on plain assistant replay messages as well as tool-call turns, so thinking sessions with prior tool use no longer fail follow-up requests with missing reasoning content. Fixes #73417; refs #71372. Thanks @34262315716 and @Bartok9.
|
||||||
- Auto-reply: preserve voice-note media from silent turns while continuing to suppress text and non-voice media, so `NO_REPLY` TTS replies still deliver the requested audio bubble. (#73406) Thanks @zqchris.
|
- Auto-reply: preserve voice-note media from silent turns while continuing to suppress text and non-voice media, so `NO_REPLY` TTS replies still deliver the requested audio bubble. (#73406) Thanks @zqchris.
|
||||||
- Channels/Mattermost: stop enqueueing regular inbound posts as system events, so Mattermost user messages reach the model only as user-role inbound-envelope content instead of also appearing as `System: Mattermost message...` directives. Fixes #71795. Thanks @juan-flores077.
|
- Channels/Mattermost: stop enqueueing regular inbound posts as system events, so Mattermost user messages reach the model only as user-role inbound-envelope content instead of also appearing as `System: Mattermost message...` directives. Fixes #71795. Thanks @juan-flores077.
|
||||||
- Agents/Anthropic: send implicit Anthropic beta headers only to direct public Anthropic endpoints, including OAuth, so custom Anthropic-compatible providers no longer mis-handle unsupported beta flags unless explicitly configured. Refs #73346. Thanks @byBrodowski.
|
- Agents/Anthropic: send implicit Anthropic beta headers only to direct public Anthropic endpoints, including OAuth, so custom Anthropic-compatible providers no longer mis-handle unsupported beta flags unless explicitly configured. Refs #73346. Thanks @byBrodowski.
|
||||||
|
|||||||
@@ -84,17 +84,17 @@ calls can continue.
|
|||||||
## Thinking and tools
|
## Thinking and tools
|
||||||
|
|
||||||
DeepSeek V4 thinking sessions have a stricter replay contract than most
|
DeepSeek V4 thinking sessions have a stricter replay contract than most
|
||||||
OpenAI-compatible providers: when a thinking-enabled assistant message includes
|
OpenAI-compatible providers: after a thinking-enabled turn uses tools, DeepSeek
|
||||||
tool calls, DeepSeek expects the prior assistant `reasoning_content` to be sent
|
expects replayed assistant messages from that turn to include
|
||||||
back on the follow-up request. OpenClaw handles this inside the DeepSeek plugin,
|
`reasoning_content` on follow-up requests. OpenClaw handles this inside the
|
||||||
so normal multi-turn tool use works with `deepseek/deepseek-v4-flash` and
|
DeepSeek plugin, so normal multi-turn tool use works with
|
||||||
`deepseek/deepseek-v4-pro`.
|
`deepseek/deepseek-v4-flash` and `deepseek/deepseek-v4-pro`.
|
||||||
|
|
||||||
If you switch an existing session from another OpenAI-compatible provider to a
|
If you switch an existing session from another OpenAI-compatible provider to a
|
||||||
DeepSeek V4 model, older assistant tool-call turns may not have native
|
DeepSeek V4 model, older assistant tool-call turns may not have native
|
||||||
DeepSeek `reasoning_content`. OpenClaw fills that missing field for DeepSeek V4
|
DeepSeek `reasoning_content`. OpenClaw fills that missing field on replayed
|
||||||
thinking requests so the provider can accept the replayed tool-call history
|
assistant messages for DeepSeek V4 thinking requests so the provider can accept
|
||||||
without requiring `/new`.
|
the history without requiring `/new`.
|
||||||
|
|
||||||
When thinking is disabled in OpenClaw (including the UI **None** selection),
|
When thinking is disabled in OpenClaw (including the UI **None** selection),
|
||||||
OpenClaw sends DeepSeek `thinking: { type: "disabled" }` and strips replayed
|
OpenClaw sends DeepSeek `thinking: { type: "disabled" }` and strips replayed
|
||||||
|
|||||||
@@ -127,9 +127,9 @@ Use the table below to pick the right model for your use case.
|
|||||||
|
|
||||||
If Venice exposes DeepSeek V4 models such as `venice/deepseek-v4-pro` or
|
If Venice exposes DeepSeek V4 models such as `venice/deepseek-v4-pro` or
|
||||||
`venice/deepseek-v4-flash`, OpenClaw fills the required DeepSeek V4
|
`venice/deepseek-v4-flash`, OpenClaw fills the required DeepSeek V4
|
||||||
`reasoning_content` replay placeholder on assistant tool-call turns when the
|
`reasoning_content` replay placeholder on assistant messages when the proxy
|
||||||
proxy omits it. Venice rejects DeepSeek's native top-level `thinking` control,
|
omits it. Venice rejects DeepSeek's native top-level `thinking` control, so
|
||||||
so OpenClaw keeps that provider-specific replay fix separate from the native
|
OpenClaw keeps that provider-specific replay fix separate from the native
|
||||||
DeepSeek provider's thinking controls.
|
DeepSeek provider's thinking controls.
|
||||||
|
|
||||||
## Built-in catalog (41 total)
|
## Built-in catalog (41 total)
|
||||||
|
|||||||
@@ -176,4 +176,59 @@ describeLive("deepseek plugin live", () => {
|
|||||||
});
|
});
|
||||||
expect(extractNonEmptyAssistantText(result.content).length).toBeGreaterThan(0);
|
expect(extractNonEmptyAssistantText(result.content).length).toBeGreaterThan(0);
|
||||||
}, 60_000);
|
}, 60_000);
|
||||||
|
|
||||||
|
it("accepts V4 thinking replay after a prior plain assistant message", async () => {
|
||||||
|
const context: Context = {
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: "Say hello.",
|
||||||
|
timestamp: Date.now() - 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openai",
|
||||||
|
model: "gpt-5.4",
|
||||||
|
content: [{ type: "text", text: "Hello." }],
|
||||||
|
usage: ZERO_USAGE,
|
||||||
|
stopReason: "stop",
|
||||||
|
timestamp: Date.now() - 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: "Reply with exactly: ok",
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
let capturedPayload: Record<string, unknown> | undefined;
|
||||||
|
const streamFn = createDeepSeekV4ThinkingWrapper(streamSimple, "high");
|
||||||
|
expect(streamFn).toBeDefined();
|
||||||
|
|
||||||
|
const stream = streamFn?.(resolveDeepSeekV4LiveModel(), context, {
|
||||||
|
apiKey: DEEPSEEK_KEY,
|
||||||
|
maxTokens: 64,
|
||||||
|
onPayload: (payload) => {
|
||||||
|
capturedPayload = payload as Record<string, unknown>;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(stream).toBeDefined();
|
||||||
|
|
||||||
|
const result = await (await stream!).result();
|
||||||
|
if (result.stopReason === "error") {
|
||||||
|
throw new Error(
|
||||||
|
result.errorMessage || "DeepSeek V4 plain replay returned error with no message",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const messages = capturedPayload?.messages;
|
||||||
|
expect(Array.isArray(messages)).toBe(true);
|
||||||
|
expect((messages as Array<Record<string, unknown>>)[1]).toMatchObject({
|
||||||
|
role: "assistant",
|
||||||
|
content: "Hello.",
|
||||||
|
reasoning_content: "",
|
||||||
|
});
|
||||||
|
expect(extractNonEmptyAssistantText(result.content).length).toBeGreaterThan(0);
|
||||||
|
}, 60_000);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -309,6 +309,73 @@ describe("deepseek provider plugin", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("adds blank reasoning_content for replayed plain assistant messages", async () => {
|
||||||
|
let capturedPayload: Record<string, unknown> | undefined;
|
||||||
|
const model = {
|
||||||
|
provider: "deepseek",
|
||||||
|
id: "deepseek-v4-pro",
|
||||||
|
name: "DeepSeek V4 Pro",
|
||||||
|
api: "openai-completions",
|
||||||
|
baseUrl: "https://api.deepseek.com",
|
||||||
|
reasoning: true,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 1_000_000,
|
||||||
|
maxTokens: 384_000,
|
||||||
|
compat: {
|
||||||
|
supportsUsageInStreaming: true,
|
||||||
|
supportsReasoningEffort: true,
|
||||||
|
maxTokensField: "max_tokens",
|
||||||
|
},
|
||||||
|
} as Model<"openai-completions">;
|
||||||
|
const context = {
|
||||||
|
messages: [
|
||||||
|
{ role: "user", content: "hi", timestamp: 1 },
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openai",
|
||||||
|
model: "gpt-5.4",
|
||||||
|
content: [{ type: "text", text: "Hello." }],
|
||||||
|
usage: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
totalTokens: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
},
|
||||||
|
stopReason: "stop",
|
||||||
|
timestamp: 2,
|
||||||
|
},
|
||||||
|
{ role: "user", content: "next", timestamp: 3 },
|
||||||
|
],
|
||||||
|
} as Context;
|
||||||
|
const baseStreamFn = (
|
||||||
|
streamModel: Model<"openai-completions">,
|
||||||
|
streamContext: Context,
|
||||||
|
options?: { onPayload?: (payload: unknown, model: unknown) => unknown },
|
||||||
|
) => {
|
||||||
|
capturedPayload = buildOpenAICompletionsParams(streamModel, streamContext, {
|
||||||
|
reasoning: "high",
|
||||||
|
} as never);
|
||||||
|
options?.onPayload?.(capturedPayload, streamModel);
|
||||||
|
const stream = createAssistantMessageEventStream();
|
||||||
|
queueMicrotask(() => stream.end());
|
||||||
|
return stream;
|
||||||
|
};
|
||||||
|
|
||||||
|
const wrapThinkingHigh = createDeepSeekV4ThinkingWrapper(baseStreamFn as never, "high");
|
||||||
|
expect(wrapThinkingHigh).toBeDefined();
|
||||||
|
await wrapThinkingHigh?.(model, context, {});
|
||||||
|
|
||||||
|
expect((capturedPayload?.messages as Array<Record<string, unknown>>)[1]).toMatchObject({
|
||||||
|
role: "assistant",
|
||||||
|
content: "Hello.",
|
||||||
|
reasoning_content: "",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("strips replayed reasoning_content when DeepSeek V4 thinking is disabled", async () => {
|
it("strips replayed reasoning_content when DeepSeek V4 thinking is disabled", async () => {
|
||||||
let capturedPayload: Record<string, unknown> | undefined;
|
let capturedPayload: Record<string, unknown> | undefined;
|
||||||
const model = {
|
const model = {
|
||||||
|
|||||||
@@ -55,6 +55,7 @@ describe("venice provider plugin", () => {
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{ role: "assistant", content: "done" },
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
(options as { onPayload?: (payload: Record<string, unknown>) => void })?.onPayload?.(payload);
|
(options as { onPayload?: (payload: Record<string, unknown>) => void })?.onPayload?.(payload);
|
||||||
@@ -87,6 +88,11 @@ describe("venice provider plugin", () => {
|
|||||||
],
|
],
|
||||||
reasoning_content: "",
|
reasoning_content: "",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: "done",
|
||||||
|
reasoning_content: "",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ function ensureVeniceDeepSeekV4Replay(payload: Record<string, unknown>): void {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const record = message as Record<string, unknown>;
|
const record = message as Record<string, unknown>;
|
||||||
if (record.role === "assistant" && Array.isArray(record.tool_calls)) {
|
if (record.role === "assistant") {
|
||||||
record.reasoning_content ??= "";
|
record.reasoning_content ??= "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import type { StreamFn } from "@mariozechner/pi-agent-core";
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import {
|
import {
|
||||||
buildCopilotDynamicHeaders,
|
buildCopilotDynamicHeaders,
|
||||||
|
createDeepSeekV4OpenAICompatibleThinkingWrapper,
|
||||||
createHtmlEntityToolCallArgumentDecodingWrapper,
|
createHtmlEntityToolCallArgumentDecodingWrapper,
|
||||||
createAnthropicThinkingPrefillPayloadWrapper,
|
createAnthropicThinkingPrefillPayloadWrapper,
|
||||||
createPayloadPatchStreamWrapper,
|
createPayloadPatchStreamWrapper,
|
||||||
@@ -104,6 +105,37 @@ describe("isOpenAICompatibleThinkingEnabled", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("createDeepSeekV4OpenAICompatibleThinkingWrapper", () => {
|
||||||
|
it("backfills reasoning_content on every replayed assistant message when thinking is enabled", () => {
|
||||||
|
const payload = {
|
||||||
|
messages: [
|
||||||
|
{ role: "user", content: "read file" },
|
||||||
|
{ role: "assistant", tool_calls: [{ id: "call_1", name: "read" }] },
|
||||||
|
{ role: "tool", content: "ok" },
|
||||||
|
{ role: "assistant", content: "done" },
|
||||||
|
{ role: "assistant", content: "kept", reasoning_content: "native reasoning" },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||||
|
options?.onPayload?.(payload as never, _model as never);
|
||||||
|
return {} as ReturnType<StreamFn>;
|
||||||
|
};
|
||||||
|
|
||||||
|
const wrapped = createDeepSeekV4OpenAICompatibleThinkingWrapper({
|
||||||
|
baseStreamFn,
|
||||||
|
thinkingLevel: "high",
|
||||||
|
shouldPatchModel: () => true,
|
||||||
|
});
|
||||||
|
void wrapped?.({} as never, {} as never, {});
|
||||||
|
|
||||||
|
expect(payload.messages[0]).not.toHaveProperty("reasoning_content");
|
||||||
|
expect(payload.messages[1]).toHaveProperty("reasoning_content", "");
|
||||||
|
expect(payload.messages[2]).not.toHaveProperty("reasoning_content");
|
||||||
|
expect(payload.messages[3]).toHaveProperty("reasoning_content", "");
|
||||||
|
expect(payload.messages[4]).toHaveProperty("reasoning_content", "native reasoning");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("buildCopilotDynamicHeaders", () => {
|
describe("buildCopilotDynamicHeaders", () => {
|
||||||
it("matches Copilot IDE-style request headers without the legacy Openai-Intent", () => {
|
it("matches Copilot IDE-style request headers without the legacy Openai-Intent", () => {
|
||||||
expect(
|
expect(
|
||||||
|
|||||||
@@ -259,7 +259,7 @@ function stripDeepSeekV4ReasoningContent(payload: Record<string, unknown>): void
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function ensureDeepSeekV4ToolCallReasoningContent(payload: Record<string, unknown>): void {
|
function ensureDeepSeekV4AssistantReasoningContent(payload: Record<string, unknown>): void {
|
||||||
if (!Array.isArray(payload.messages)) {
|
if (!Array.isArray(payload.messages)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -268,7 +268,7 @@ function ensureDeepSeekV4ToolCallReasoningContent(payload: Record<string, unknow
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const record = message as Record<string, unknown>;
|
const record = message as Record<string, unknown>;
|
||||||
if (record.role !== "assistant" || !Array.isArray(record.tool_calls)) {
|
if (record.role !== "assistant") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!("reasoning_content" in record)) {
|
if (!("reasoning_content" in record)) {
|
||||||
@@ -302,7 +302,7 @@ export function createDeepSeekV4OpenAICompatibleThinkingWrapper(params: {
|
|||||||
|
|
||||||
payload.thinking = { type: "enabled" };
|
payload.thinking = { type: "enabled" };
|
||||||
payload.reasoning_effort = resolveDeepSeekV4ReasoningEffort(params.thinkingLevel);
|
payload.reasoning_effort = resolveDeepSeekV4ReasoningEffort(params.thinkingLevel);
|
||||||
ensureDeepSeekV4ToolCallReasoningContent(payload);
|
ensureDeepSeekV4AssistantReasoningContent(payload);
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user