diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 591af8b2be4..f61ab79baf5 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -660,7 +660,7 @@ Example (OpenAI‑compatible): - For `api: "openai-completions"` on non-native endpoints (any non-empty `baseUrl` whose host is not `api.openai.com`), OpenClaw forces `compat.supportsDeveloperRole: false` to avoid provider 400 errors for unsupported `developer` roles. - Proxy-style OpenAI-compatible routes also skip native OpenAI-only request shaping: no `service_tier`, no Responses `store`, no Completions `store`, no prompt-cache hints, no OpenAI reasoning-compat payload shaping, and no hidden OpenClaw attribution headers. - For OpenAI-compatible Completions proxies that need vendor-specific fields, set `agents.defaults.models["provider/model"].params.extra_body` (or `extraBody`) to merge extra JSON into the outbound request body. - - For vLLM chat-template controls, set `agents.defaults.models["provider/model"].params.chat_template_kwargs`. OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true` for `vllm/nemotron-3-*` when the session thinking level is off. + - For vLLM chat-template controls, set `agents.defaults.models["provider/model"].params.chat_template_kwargs`. The bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true` for `vllm/nemotron-3-*` when the session thinking level is off. - For slow local models or remote LAN/tailnet hosts, set `models.providers..timeoutSeconds`. This extends provider model HTTP request handling, including connect, headers, body streaming, and the total guarded-fetch abort, without increasing the whole agent runtime timeout. - If `baseUrl` is empty/omitted, OpenClaw keeps the default OpenAI behavior (which resolves to `api.openai.com`). - For safety, an explicit `compat.supportsDeveloperRole: true` is still overridden on non-native `openai-completions` endpoints. diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index 8e55d906d8b..dee298aedd6 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -371,7 +371,7 @@ Time format in system prompt. Default: `auto` (OS preference). - `params`: global default provider parameters applied to all models. Set at `agents.defaults.params` (e.g. `{ cacheRetention: "long" }`). - `params` merge precedence (config): `agents.defaults.params` (global base) is overridden by `agents.defaults.models["provider/model"].params` (per-model), then `agents.list[].params` (matching agent id) overrides by key. See [Prompt Caching](/reference/prompt-caching) for details. - `params.extra_body`/`params.extraBody`: advanced pass-through JSON merged into `api: "openai-completions"` request bodies for OpenAI-compatible proxies. If it collides with generated request keys, the extra body wins; non-native completions routes still strip OpenAI-only `store` afterward. -- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, OpenClaw automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. +- `params.chat_template_kwargs`: vLLM/OpenAI-compatible chat-template arguments merged into top-level `api: "openai-completions"` request bodies. For `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin automatically sends `enable_thinking: false` and `force_nonempty_content: true`; explicit `chat_template_kwargs` override generated defaults, and `extra_body.chat_template_kwargs` still has final precedence. For vLLM Qwen thinking controls, set `params.qwenThinkingFormat` to `"chat-template"` or `"top-level"` on that model entry. - `params.preserveThinking`: Z.AI-only opt-in for preserved thinking. When enabled and thinking is on, OpenClaw sends `thinking.clear_thinking: false` and replays prior `reasoning_content`; see [Z.AI thinking and preserved thinking](/providers/zai#thinking-and-preserved-thinking). - `agentRuntime`: default low-level agent runtime policy. Omitted id defaults to OpenClaw Pi. Use `id: "pi"` to force the built-in PI harness, `id: "auto"` to let registered plugin harnesses claim supported models, a registered harness id such as `id: "codex"`, or a supported CLI backend alias such as `id: "claude-cli"`. Set `fallback: "none"` to disable automatic PI fallback. Explicit plugin runtimes such as `codex` fail closed by default unless you set `fallback: "pi"` in the same override scope. Keep model refs canonical as `provider/model`; select Codex, Claude CLI, Gemini CLI, and other execution backends through runtime config instead of legacy runtime provider prefixes. See [Agent runtimes](/concepts/agent-runtimes) for how this differs from provider/model selection. - Config writers that mutate these fields (for example `/models set`, `/models set-image`, and fallback add/remove commands) save canonical object form and preserve existing fallback lists when possible. diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md index 0d7b9bc61b7..e89457c0641 100644 --- a/docs/providers/vllm.md +++ b/docs/providers/vllm.md @@ -153,7 +153,7 @@ Use explicit config when: vLLM/Nemotron 3 can use chat-template kwargs to control whether reasoning is returned as hidden reasoning or visible answer text. When an OpenClaw session - uses `vllm/nemotron-3-*` with thinking off, OpenClaw sends: + uses `vllm/nemotron-3-*` with thinking off, the bundled vLLM plugin sends: ```json { diff --git a/extensions/qwen/stream.ts b/extensions/qwen/stream.ts index 428a0d2c184..c3ec46f91d7 100644 --- a/extensions/qwen/stream.ts +++ b/extensions/qwen/stream.ts @@ -1,7 +1,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; -import { createPayloadPatchStreamWrapper } from "openclaw/plugin-sdk/provider-stream-shared"; +import { + createPayloadPatchStreamWrapper, + isOpenAICompatibleThinkingEnabled, +} from "openclaw/plugin-sdk/provider-stream-shared"; type QwenThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; @@ -15,19 +18,6 @@ function isQwenProviderId(providerId: string): boolean { ); } -function resolveOpenAICompatibleThinkingEnabled(params: { - thinkingLevel: QwenThinkingLevel; - options: Parameters[2]; -}): boolean { - const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown }; - const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high"; - if (typeof raw !== "string") { - return true; - } - const normalized = raw.trim().toLowerCase(); - return normalized !== "off" && normalized !== "none"; -} - export function createQwenThinkingWrapper( baseStreamFn: StreamFn | undefined, thinkingLevel: QwenThinkingLevel, @@ -35,7 +25,7 @@ export function createQwenThinkingWrapper( return createPayloadPatchStreamWrapper( baseStreamFn, ({ payload: payloadObj, options }) => { - const enableThinking = resolveOpenAICompatibleThinkingEnabled({ thinkingLevel, options }); + const enableThinking = isOpenAICompatibleThinkingEnabled({ thinkingLevel, options }); payloadObj.enable_thinking = enableThinking; delete payloadObj.reasoning_effort; delete payloadObj.reasoningEffort; diff --git a/extensions/vllm/stream.test.ts b/extensions/vllm/stream.test.ts index 73352acbf81..99b26c9e439 100644 --- a/extensions/vllm/stream.test.ts +++ b/extensions/vllm/stream.test.ts @@ -1,7 +1,11 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { Context, Model } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; -import { createVllmQwenThinkingWrapper, wrapVllmProviderStream } from "./stream.js"; +import { + createVllmProviderThinkingWrapper, + createVllmQwenThinkingWrapper, + wrapVllmProviderStream, +} from "./stream.js"; function capturePayload(params: { format: "chat-template" | "top-level"; @@ -105,6 +109,80 @@ describe("createVllmQwenThinkingWrapper", () => { }); }); +describe("createVllmProviderThinkingWrapper", () => { + function captureProviderPayload(params: { + thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max"; + initialPayload?: Record; + model?: Partial>; + }): Record { + let captured: Record = {}; + const baseStreamFn: StreamFn = (_model, _context, options) => { + const payload = { ...params.initialPayload }; + options?.onPayload?.(payload, _model); + captured = payload; + return {} as ReturnType; + }; + + const wrapped = createVllmProviderThinkingWrapper({ + baseStreamFn, + thinkingLevel: params.thinkingLevel ?? "high", + }); + void wrapped( + { + api: "openai-completions", + provider: "vllm", + id: "nemotron-3-super", + reasoning: true, + ...params.model, + } as Model<"openai-completions">, + { messages: [] } as Context, + {}, + ); + + return captured; + } + + it("injects Nemotron 3 chat-template kwargs when thinking is off", () => { + expect(captureProviderPayload({ thinkingLevel: "off" })).toEqual({ + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true, + }, + }); + }); + + it("does not inject Nemotron 3 chat-template kwargs when thinking is enabled", () => { + expect(captureProviderPayload({ thinkingLevel: "low" })).toEqual({}); + }); + + it("preserves existing Nemotron 3 chat-template kwargs over defaults", () => { + expect( + captureProviderPayload({ + thinkingLevel: "off", + initialPayload: { + chat_template_kwargs: { + enable_thinking: true, + }, + }, + }), + ).toEqual({ + chat_template_kwargs: { + enable_thinking: true, + force_nonempty_content: true, + }, + }); + }); + + it("skips non-Nemotron vLLM models", () => { + expect( + captureProviderPayload({ + thinkingLevel: "off", + model: { id: "Qwen/Qwen3-8B" }, + }), + ).toEqual({}); + }); +}); + describe("wrapVllmProviderStream", () => { it("registers when vLLM Qwen thinking format params are configured", () => { expect( @@ -167,4 +245,36 @@ describe("wrapVllmProviderStream", () => { } as never), ).toBeUndefined(); }); + + it("registers for vLLM Nemotron when thinking is off", () => { + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "nemotron-3-super", + extraParams: {}, + thinkingLevel: "off", + model: { + api: "openai-completions", + provider: "vllm", + id: "nemotron-3-super", + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeTypeOf("function"); + + expect( + wrapVllmProviderStream({ + provider: "vllm", + modelId: "nemotron-3-super", + extraParams: {}, + thinkingLevel: "low", + model: { + api: "openai-completions", + provider: "vllm", + id: "nemotron-3-super", + } as Model<"openai-completions">, + streamFn: undefined, + } as never), + ).toBeUndefined(); + }); }); diff --git a/extensions/vllm/stream.ts b/extensions/vllm/stream.ts index 15050d73e4d..19f5392ea1f 100644 --- a/extensions/vllm/stream.ts +++ b/extensions/vllm/stream.ts @@ -1,7 +1,10 @@ import type { StreamFn } from "@mariozechner/pi-agent-core"; import type { ProviderWrapStreamFnContext } from "openclaw/plugin-sdk/plugin-entry"; import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; -import { createPayloadPatchStreamWrapper } from "openclaw/plugin-sdk/provider-stream-shared"; +import { + createPayloadPatchStreamWrapper, + isOpenAICompatibleThinkingEnabled, +} from "openclaw/plugin-sdk/provider-stream-shared"; type VllmThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; type VllmQwenThinkingFormat = "chat-template" | "top-level"; @@ -41,19 +44,6 @@ function resolveVllmQwenThinkingFormat( ); } -function resolveOpenAICompatibleThinkingEnabled(params: { - thinkingLevel: VllmThinkingLevel; - options: Parameters[2]; -}): boolean { - const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown }; - const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high"; - if (typeof raw !== "string") { - return true; - } - const normalized = raw.trim().toLowerCase(); - return normalized !== "off" && normalized !== "none"; -} - function setQwenChatTemplateThinking(payload: Record, enabled: boolean): void { const existing = payload.chat_template_kwargs; if (existing && typeof existing === "object" && !Array.isArray(existing)) { @@ -73,6 +63,31 @@ function setQwenChatTemplateThinking(payload: Record, enabled: }; } +function isVllmNemotronModel(model: { api?: unknown; provider?: unknown; id?: unknown }): boolean { + return ( + model.api === "openai-completions" && + typeof model.provider === "string" && + normalizeProviderId(model.provider) === "vllm" && + typeof model.id === "string" && + /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(model.id) + ); +} + +function setNemotronThinkingOffChatTemplateKwargs(payload: Record): void { + const defaults = { + enable_thinking: false, + force_nonempty_content: true, + }; + const existing = payload.chat_template_kwargs; + payload.chat_template_kwargs = + existing && typeof existing === "object" && !Array.isArray(existing) + ? { + ...defaults, + ...(existing as Record), + } + : defaults; +} + export function createVllmQwenThinkingWrapper(params: { baseStreamFn: StreamFn | undefined; format: VllmQwenThinkingFormat; @@ -81,7 +96,7 @@ export function createVllmQwenThinkingWrapper(params: { return createPayloadPatchStreamWrapper( params.baseStreamFn, ({ payload: payloadObj, options }) => { - const enableThinking = resolveOpenAICompatibleThinkingEnabled({ + const enableThinking = isOpenAICompatibleThinkingEnabled({ thinkingLevel: params.thinkingLevel, options, }); @@ -100,17 +115,50 @@ export function createVllmQwenThinkingWrapper(params: { ); } +export function createVllmProviderThinkingWrapper(params: { + baseStreamFn: StreamFn | undefined; + qwenFormat?: VllmQwenThinkingFormat; + thinkingLevel: VllmThinkingLevel; +}): StreamFn { + const qwenWrapped = params.qwenFormat + ? createVllmQwenThinkingWrapper({ + baseStreamFn: params.baseStreamFn, + format: params.qwenFormat, + thinkingLevel: params.thinkingLevel, + }) + : params.baseStreamFn; + return createPayloadPatchStreamWrapper( + qwenWrapped, + ({ payload: payloadObj }) => { + setNemotronThinkingOffChatTemplateKwargs(payloadObj); + }, + { + shouldPatch: ({ model }) => + model.api === "openai-completions" && + params.thinkingLevel === "off" && + isVllmNemotronModel(model), + }, + ); +} + export function wrapVllmProviderStream(ctx: ProviderWrapStreamFnContext): StreamFn | undefined { if (!isVllmProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) { return undefined; } - const format = resolveVllmQwenThinkingFormat(ctx.extraParams); - if (!format) { + const qwenFormat = resolveVllmQwenThinkingFormat(ctx.extraParams); + const shouldHandleNemotron = + ctx.thinkingLevel === "off" && + isVllmNemotronModel({ + api: "openai-completions", + provider: ctx.provider, + id: ctx.modelId, + }); + if (!qwenFormat && !shouldHandleNemotron) { return undefined; } - return createVllmQwenThinkingWrapper({ + return createVllmProviderThinkingWrapper({ baseStreamFn: ctx.streamFn, - format, + qwenFormat, thinkingLevel: ctx.thinkingLevel, }); } diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 044738e0a5c..22852257130 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -894,85 +894,6 @@ describe("applyExtraParamsToAgent", () => { }); }); - it("injects vLLM Nemotron chat_template_kwargs when thinking is off", () => { - const payload = runResponsesPayloadMutationCase({ - applyProvider: "vllm", - applyModelId: "nemotron-3-super", - model: { - api: "openai-completions", - provider: "vllm", - id: "nemotron-3-super", - baseUrl: "http://127.0.0.1:8000/v1", - } as Model<"openai-completions">, - payload: { - messages: [], - }, - thinkingLevel: "off", - }); - - expect(payload.chat_template_kwargs).toEqual({ - enable_thinking: false, - force_nonempty_content: true, - }); - }); - - it("does not inject vLLM Nemotron chat_template_kwargs when thinking is enabled", () => { - const payload = runResponsesPayloadMutationCase({ - applyProvider: "vllm", - applyModelId: "nemotron-3-super", - model: { - api: "openai-completions", - provider: "vllm", - id: "nemotron-3-super", - baseUrl: "http://127.0.0.1:8000/v1", - } as Model<"openai-completions">, - payload: { - messages: [], - }, - thinkingLevel: "low", - }); - - expect(payload).not.toHaveProperty("chat_template_kwargs"); - }); - - it("lets extra_body override generated vLLM Nemotron chat_template_kwargs", () => { - const payload = runResponsesPayloadMutationCase({ - applyProvider: "vllm", - applyModelId: "nemotron-3-super", - cfg: { - agents: { - defaults: { - models: { - "vllm/nemotron-3-super": { - params: { - extra_body: { - chat_template_kwargs: { - enable_thinking: true, - }, - }, - }, - }, - }, - }, - }, - }, - model: { - api: "openai-completions", - provider: "vllm", - id: "nemotron-3-super", - baseUrl: "http://127.0.0.1:8000/v1", - } as Model<"openai-completions">, - payload: { - messages: [], - }, - thinkingLevel: "off", - }); - - expect(payload.chat_template_kwargs).toEqual({ - enable_thinking: true, - }); - }); - it("warns and skips invalid chat_template_kwargs params", () => { const warnSpy = vi.spyOn(log, "warn").mockImplementation(() => {}); try { diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 286f46caa85..aa9cc94a569 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -462,63 +462,25 @@ function resolveChatTemplateKwargsParam( return Object.keys(chatTemplateKwargs).length > 0 ? chatTemplateKwargs : undefined; } -function isVllmNemotronModel(model: ProviderRuntimeModel): boolean { - return ( - model.api === "openai-completions" && - typeof model.provider === "string" && - model.provider.toLowerCase() === "vllm" && - typeof model.id === "string" && - /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(model.id) - ); -} - -function resolveOpenAICompletionsChatTemplateKwargs(params: { - model: ProviderRuntimeModel; - thinkingLevel?: ThinkLevel; - configured?: Record; -}): Record | undefined { - const defaults = - params.thinkingLevel === "off" && isVllmNemotronModel(params.model) - ? { - enable_thinking: false, - force_nonempty_content: true, - } - : undefined; - const merged = { - ...defaults, - ...params.configured, - }; - return Object.keys(merged).length > 0 ? merged : undefined; -} - function createOpenAICompletionsChatTemplateKwargsWrapper(params: { baseStreamFn: StreamFn | undefined; - configured?: Record; - thinkingLevel?: ThinkLevel; + configured: Record; }): StreamFn { const underlying = params.baseStreamFn ?? streamSimple; return (model, context, options) => { if (model.api !== "openai-completions") { return underlying(model, context, options); } - const chatTemplateKwargs = resolveOpenAICompletionsChatTemplateKwargs({ - model: model as ProviderRuntimeModel, - thinkingLevel: params.thinkingLevel, - configured: params.configured, - }); - if (!chatTemplateKwargs) { - return underlying(model, context, options); - } return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => { const existing = payloadObj.chat_template_kwargs; if (existing && typeof existing === "object" && !Array.isArray(existing)) { payloadObj.chat_template_kwargs = { ...(existing as Record), - ...chatTemplateKwargs, + ...params.configured, }; return; } - payloadObj.chat_template_kwargs = chatTemplateKwargs; + payloadObj.chat_template_kwargs = params.configured; }); }; } @@ -614,11 +576,10 @@ function applyPostPluginStreamWrappers( "chatTemplateKwargs", ); const configuredChatTemplateKwargs = resolveChatTemplateKwargsParam(rawChatTemplateKwargs); - if (configuredChatTemplateKwargs || ctx.thinkingLevel === "off") { + if (configuredChatTemplateKwargs) { ctx.agent.streamFn = createOpenAICompletionsChatTemplateKwargsWrapper({ baseStreamFn: ctx.agent.streamFn, configured: configuredChatTemplateKwargs, - thinkingLevel: ctx.thinkingLevel, }); } diff --git a/src/plugin-sdk/provider-stream-shared.test.ts b/src/plugin-sdk/provider-stream-shared.test.ts index d152c17ef20..f12ba8e0f16 100644 --- a/src/plugin-sdk/provider-stream-shared.test.ts +++ b/src/plugin-sdk/provider-stream-shared.test.ts @@ -7,6 +7,7 @@ import { defaultToolStreamExtraParams, decodeHtmlEntitiesInObject, hasCopilotVisionInput, + isOpenAICompatibleThinkingEnabled, } from "./provider-stream-shared.js"; type FakeWrappedStream = { @@ -64,6 +65,43 @@ describe("defaultToolStreamExtraParams", () => { }); }); +describe("isOpenAICompatibleThinkingEnabled", () => { + it("uses explicit request reasoning before session thinking level", () => { + expect( + isOpenAICompatibleThinkingEnabled({ + thinkingLevel: "high", + options: { reasoning: "none" } as never, + }), + ).toBe(false); + expect( + isOpenAICompatibleThinkingEnabled({ + thinkingLevel: "off", + options: { reasoningEffort: "medium" } as never, + }), + ).toBe(true); + }); + + it("treats off and none as disabled", () => { + expect(isOpenAICompatibleThinkingEnabled({ thinkingLevel: "off", options: {} })).toBe(false); + expect( + isOpenAICompatibleThinkingEnabled({ + thinkingLevel: "high", + options: { reasoning: "none" } as never, + }), + ).toBe(false); + }); + + it("defaults to enabled for missing or non-string values", () => { + expect(isOpenAICompatibleThinkingEnabled({ thinkingLevel: undefined, options: {} })).toBe(true); + expect( + isOpenAICompatibleThinkingEnabled({ + thinkingLevel: "off", + options: { reasoning: { effort: "off" } } as never, + }), + ).toBe(true); + }); +}); + describe("buildCopilotDynamicHeaders", () => { it("matches Copilot IDE-style request headers without the legacy Openai-Intent", () => { expect( diff --git a/src/plugin-sdk/provider-stream-shared.ts b/src/plugin-sdk/provider-stream-shared.ts index c02d0a566a4..065d699966f 100644 --- a/src/plugin-sdk/provider-stream-shared.ts +++ b/src/plugin-sdk/provider-stream-shared.ts @@ -154,6 +154,21 @@ export function createPayloadPatchStreamWrapper( }; } +export type OpenAICompatibleThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; + +export function isOpenAICompatibleThinkingEnabled(params: { + thinkingLevel: OpenAICompatibleThinkingLevel; + options: Parameters[2]; +}): boolean { + const options = (params.options ?? {}) as { reasoningEffort?: unknown; reasoning?: unknown }; + const raw = options.reasoningEffort ?? options.reasoning ?? params.thinkingLevel ?? "high"; + if (typeof raw !== "string") { + return true; + } + const normalized = raw.trim().toLowerCase(); + return normalized !== "off" && normalized !== "none"; +} + export type DeepSeekV4ThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"]; function isDisabledDeepSeekV4ThinkingLevel(thinkingLevel: DeepSeekV4ThinkingLevel): boolean { diff --git a/src/plugin-sdk/provider-stream.ts b/src/plugin-sdk/provider-stream.ts index 389d98ce833..ab9c9f42354 100644 --- a/src/plugin-sdk/provider-stream.ts +++ b/src/plugin-sdk/provider-stream.ts @@ -43,6 +43,7 @@ export { defaultToolStreamExtraParams, hasCopilotVisionInput, isAnthropicBedrockModel, + isOpenAICompatibleThinkingEnabled, type ProviderStreamWrapperFactory, resolveAnthropicPayloadPolicy, resolveMoonshotThinkingType,