Files
openclaw/extensions/vllm/thinking-policy.ts
rendrag-git e153eceea5 fix(vllm): wire configured thinking params
Move vLLM Qwen thinking control onto configured model compat metadata and carry it through catalog/model-selection/runtime thinking contexts.

Also migrate legacy provider/default request params in doctor and keep Pi/runtime model rows buildable with explicit reasoning defaults.

Thanks @rendrag-git.

Co-authored-by: rendrag-git <253747599+rendrag-git@users.noreply.github.com>
2026-05-27 13:32:18 +01:00

66 lines
1.9 KiB
TypeScript

import type {
ProviderDefaultThinkingPolicyContext,
ProviderThinkingProfile,
} from "openclaw/plugin-sdk/plugin-entry";
import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
export type VllmQwenThinkingFormat = "chat-template" | "top-level";
const VLLM_BINARY_THINKING_PROFILE = {
levels: [{ id: "off" }, { id: "low", label: "on" }],
defaultLevel: "off",
} satisfies ProviderThinkingProfile;
export function normalizeVllmQwenThinkingFormat(
value: unknown,
): VllmQwenThinkingFormat | undefined {
if (typeof value !== "string") {
return undefined;
}
const normalized = value.trim().toLowerCase().replace(/_/g, "-");
if (
normalized === "chat-template" ||
normalized === "chat-template-kwargs" ||
normalized === "chat-template-kwarg" ||
normalized === "chat-template-arguments" ||
normalized === "qwen-chat-template"
) {
return "chat-template";
}
if (
normalized === "top-level" ||
normalized === "enable-thinking" ||
normalized === "request-body" ||
normalized === "qwen"
) {
return "top-level";
}
return undefined;
}
export function resolveVllmQwenThinkingFormatFromCompat(
compat?: ProviderDefaultThinkingPolicyContext["compat"],
): VllmQwenThinkingFormat | undefined {
return normalizeVllmQwenThinkingFormat(compat?.thinkingFormat);
}
function isVllmNemotronThinkingModel(modelId: string): boolean {
return /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(modelId);
}
export function resolveThinkingProfile(
ctx: ProviderDefaultThinkingPolicyContext,
): ProviderThinkingProfile | null {
if (normalizeProviderId(ctx.provider) !== "vllm") {
return null;
}
if (ctx.reasoning === false) {
return null;
}
const qwenFormat = resolveVllmQwenThinkingFormatFromCompat(ctx.compat);
if (qwenFormat || (ctx.reasoning === true && isVllmNemotronThinkingModel(ctx.modelId))) {
return VLLM_BINARY_THINKING_PROFILE;
}
return null;
}