fix(deepseek): expose V4 max thinking levels (#73008)

Merged via squash. Prepared head SHA: ef561a59de Co-authored-by: ai-hpc <183861985+ai-hpc@users.noreply.github.com> Co-authored-by: hxy91819 <8814856+hxy91819@users.noreply.github.com> Reviewed-by: @hxy91819
2026-05-06 04:50:44 +00:00 · 2026-04-30 08:34:05 -07:00
parent 0eb8f34000
commit 797d574dfd
7 changed files with 67 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -142,6 +142,7 @@ Docs: https://docs.openclaw.ai
 - ACP/resolver: fall through to thread-bound session resolution when an explicit `--session` token cannot be resolved while preserving the bad-token diagnostic when no thread binding exists, so Discord slash commands that auto-fill the current thread ID as the positional ACP target no longer return "Unable to resolve session target" errors. Fixes #66299. Thanks @hclsys, @kindomLee, and @martingarramon.
 - Agents/sessions: emit a terminal lifecycle backstop when embedded timeout/error turns return without `agent_end`, so Gateway sessions no longer stay stuck in `running` after failover surfaces a timeout. Fixes #74607. Thanks @millerc79.
 - Gateway/diagnostics: include stuck-session reason hints and recovery skip causes in warnings, so operators can tell whether a lane is waiting on active work, queued work, or stale bookkeeping. Thanks @vincentkoc.
+- Providers/DeepSeek: expose native DeepSeek V4 `xhigh` and `max` thinking levels through the provider `resolveThinkingProfile` hook so `/think xhigh|max` applies the intended effort instead of falling back to base levels. (#73008) Thanks @ai-hpc.
 - Agents/Codex: bound embedded-run cleanup, trajectory flushing, and command-lane task timeouts after runtime failures, so Discord and other chat sessions return to idle instead of staying stuck in processing. Thanks @vincentkoc.
 - Heartbeat/exec: consume successful metadata-only async exec completions silently so Telegram and other chat surfaces no longer ask users for missing command logs after `No session found`. Fixes #74595. Thanks @gkoch02.
 - Web fetch: add a documented `tools.web.fetch.ssrfPolicy.allowIpv6UniqueLocalRange` opt-in and thread it through cache keys and DNS/IP checks so trusted fake-IP proxy stacks using `fc00::/7` can work without broad private-network access. Fixes #74351. Thanks @jeffrey701.
--- a/docs/providers/deepseek.md
+++ b/docs/providers/deepseek.md
@@ -79,6 +79,8 @@ is available to that process (for example, in `~/.openclaw/.env` or via
 V4 models support DeepSeek's `thinking` control. OpenClaw also replays
 DeepSeek `reasoning_content` on follow-up turns so thinking sessions with tool
 calls can continue.
+Use `/think xhigh` or `/think max` with DeepSeek V4 models to request DeepSeek's
+maximum `reasoning_effort`.
 </Tip>

 ## Thinking and tools
--- a/docs/tools/thinking.md
+++ b/docs/tools/thinking.md
@@ -26,6 +26,7 @@ title: "Thinking levels"
  - Anthropic Claude Opus 4.7 does not default to adaptive thinking. Its API effort default remains provider-owned unless you explicitly set a thinking level.
  - Anthropic Claude Opus 4.7 maps `/think xhigh` to adaptive thinking plus `output_config.effort: "xhigh"`, because `/think` is a thinking directive and `xhigh` is the Opus 4.7 effort setting.
  - Anthropic Claude Opus 4.7 also exposes `/think max`; it maps to the same provider-owned max effort path.
+  - DeepSeek V4 models expose `/think xhigh|max`; both map to DeepSeek `reasoning_effort: "max"` while lower non-off levels map to `high`.
  - Ollama thinking-capable models expose `/think low|medium|high|max`; `max` maps to native `think: "high"` because Ollama's native API accepts `low`, `medium`, and `high` effort strings.
  - OpenAI GPT models map `/think` through model-specific Responses API effort support. `/think off` sends `reasoning.effort: "none"` only when the target model supports it; otherwise OpenClaw omits the disabled reasoning payload instead of sending an unsupported value.
  - Custom OpenAI-compatible catalog entries can opt into `/think xhigh` by setting `models.providers.<provider>.models[].compat.supportedReasoningEfforts` to include `"xhigh"`. This uses the same compat metadata that maps outbound OpenAI reasoning effort payloads, so menus, session validation, agent CLI, and `llm-task` agree with transport behavior.
--- a/extensions/deepseek/index.test.ts
+++ b/extensions/deepseek/index.test.ts
@@ -110,6 +110,37 @@ describe("deepseek provider plugin", () => {
    );
  });

+  it("advertises max thinking levels for DeepSeek V4 models only", async () => {
+    const provider = await registerSingleProviderPlugin(deepseekPlugin);
+    const resolveThinkingProfile = provider.resolveThinkingProfile!;
+    const expectedV4Levels = ["off", "minimal", "low", "medium", "high", "xhigh", "max"];
+
+    expect(
+      resolveThinkingProfile({
+        provider: "deepseek",
+        modelId: "deepseek-v4-pro",
+      } as never)?.levels.map((level) => level.id),
+    ).toEqual(expectedV4Levels);
+    expect(
+      resolveThinkingProfile({
+        provider: "deepseek",
+        modelId: "deepseek-v4-flash",
+      } as never)?.defaultLevel,
+    ).toBe("high");
+    expect(
+      resolveThinkingProfile({
+        provider: "deepseek",
+        modelId: "deepseek-v4-flash",
+      } as never)?.levels.map((level) => level.id),
+    ).toEqual(expectedV4Levels);
+    expect(
+      resolveThinkingProfile({ provider: "deepseek", modelId: "deepseek-chat" } as never),
+    ).toBe(undefined);
+    expect(
+      resolveThinkingProfile({ provider: "deepseek", modelId: "deepseek-reasoner" } as never),
+    ).toBe(undefined);
+  });
+
  it("maps thinking levels to DeepSeek V4 payload controls", async () => {
    let capturedPayload: Record<string, unknown> | undefined;
    const baseStreamFn = (
--- a/extensions/deepseek/index.ts
+++ b/extensions/deepseek/index.ts
@@ -1,11 +1,27 @@
+import type { ProviderThinkingProfile } from "openclaw/plugin-sdk/plugin-entry";
 import { readConfiguredProviderCatalogEntries } from "openclaw/plugin-sdk/provider-catalog-shared";
 import { defineSingleProviderPluginEntry } from "openclaw/plugin-sdk/provider-entry";
 import { buildProviderReplayFamilyHooks } from "openclaw/plugin-sdk/provider-model-shared";
+import { isDeepSeekV4ModelId } from "./models.js";
 import { applyDeepSeekConfig, DEEPSEEK_DEFAULT_MODEL_REF } from "./onboard.js";
 import { buildDeepSeekProvider } from "./provider-catalog.js";
 import { createDeepSeekV4ThinkingWrapper } from "./stream.js";

 const PROVIDER_ID = "deepseek";
+const V4_THINKING_LEVEL_IDS = ["off", "minimal", "low", "medium", "high", "xhigh", "max"] as const;
+
+function buildDeepSeekV4ThinkingLevel(id: (typeof V4_THINKING_LEVEL_IDS)[number]) {
+  return { id };
+}
+
+const DEEPSEEK_V4_THINKING_PROFILE = {
+  levels: V4_THINKING_LEVEL_IDS.map(buildDeepSeekV4ThinkingLevel),
+  defaultLevel: "high",
+} satisfies ProviderThinkingProfile;
+
+function resolveDeepSeekV4ThinkingProfile(modelId: string): ProviderThinkingProfile | undefined {
+  return isDeepSeekV4ModelId(modelId) ? DEEPSEEK_V4_THINKING_PROFILE : undefined;
+}

 export default defineSingleProviderPluginEntry({
  id: PROVIDER_ID,
@@ -46,9 +62,7 @@ export default defineSingleProviderPluginEntry({
      /\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage),
    ...buildProviderReplayFamilyHooks({ family: "openai-compatible" }),
    wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel),
-    isModernModelRef: ({ modelId }) => {
-      const lower = modelId.toLowerCase();
-      return lower === "deepseek-v4-flash" || lower === "deepseek-v4-pro";
-    },
+    resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId),
+    isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)),
  },
 });
--- a/extensions/deepseek/models.ts
+++ b/extensions/deepseek/models.ts
@@ -19,3 +19,15 @@ export function buildDeepSeekModelDefinition(
    api: "openai-completions",
  };
 }
+
+const DEEPSEEK_V4_MODEL_IDS = new Set(["deepseek-v4-flash", "deepseek-v4-pro"]);
+
+export function isDeepSeekV4ModelId(modelId: string): boolean {
+  return DEEPSEEK_V4_MODEL_IDS.has(modelId.toLowerCase());
+}
+
+export function isDeepSeekV4ModelRef(model: { provider?: string; id?: unknown }): boolean {
+  return (
+    model.provider === "deepseek" && typeof model.id === "string" && isDeepSeekV4ModelId(model.id)
+  );
+}
--- a/extensions/deepseek/stream.ts
+++ b/extensions/deepseek/stream.ts
@@ -1,9 +1,6 @@
 import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry";
 import { createDeepSeekV4OpenAICompatibleThinkingWrapper } from "openclaw/plugin-sdk/provider-stream-shared";
-
-function isDeepSeekV4ModelId(modelId: unknown): boolean {
-  return modelId === "deepseek-v4-flash" || modelId === "deepseek-v4-pro";
-}
+import { isDeepSeekV4ModelRef } from "./models.js";

 export function createDeepSeekV4ThinkingWrapper(
  baseStreamFn: ProviderWrapStreamFnContext["streamFn"],
@@ -12,6 +9,6 @@ export function createDeepSeekV4ThinkingWrapper(
  return createDeepSeekV4OpenAICompatibleThinkingWrapper({
    baseStreamFn,
    thinkingLevel,
-    shouldPatchModel: (model) => model.provider === "deepseek" && isDeepSeekV4ModelId(model.id),
+    shouldPatchModel: isDeepSeekV4ModelRef,
  });
 }