fix(vllm): wire configured thinking params

Move vLLM Qwen thinking control onto configured model compat metadata and carry it through catalog/model-selection/runtime thinking contexts. Also migrate legacy provider/default request params in doctor and keep Pi/runtime model rows buildable with explicit reasoning defaults. Thanks @rendrag-git. Co-authored-by: rendrag-git <253747599+rendrag-git@users.noreply.github.com>
2026-05-31 06:34:54 +00:00 · 2026-05-27 12:32:18 +00:00
parent 75221e0550
commit e153eceea5
29 changed files with 2214 additions and 85 deletions
--- a/src/plugins/provider-thinking.types.ts
+++ b/src/plugins/provider-thinking.types.ts
@@ -10,15 +10,25 @@ export type ProviderThinkingPolicyContext = {
  modelId: string;
 };

+export type ProviderThinkingModelCompat = {
+  thinkingFormat?: string;
+  supportedReasoningEfforts?: readonly string[] | null;
+};
+
 /**
 * Provider-owned default thinking policy input.
 *
 * `reasoning` is the merged catalog hint for the selected model when one is
 * available. Providers can use it to keep "reasoning model => low" behavior
 * without re-reading the catalog themselves.
+ *
+ * `compat` carries model-level request contract facts for the selected model
+ * when available. Providers can use it to expose model-specific thinking
+ * profiles only when the configured payload style supports them.
 */
 export type ProviderDefaultThinkingPolicyContext = ProviderThinkingPolicyContext & {
  reasoning?: boolean;
+  compat?: ProviderThinkingModelCompat | null;
 };

 export type ProviderThinkingLevelId =