Onboarding: add vLLM provider support

2026-03-12 07:20:45 +00:00 · 2026-02-09 10:20:45 +00:00
parent 54bf5d0f41
commit e73d881c50
19 changed files with 555 additions and 3 deletions
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@@ -259,6 +259,32 @@ ollama pull llama3.3
 Ollama is automatically detected when running locally at `http://127.0.0.1:11434/v1`. See [/providers/ollama](/providers/ollama) for model recommendations and custom configuration.
 ### vLLM
 vLLM is a local (or self-hosted) OpenAI-compatible server:
 - Provider: `vllm`
 - Auth: Optional (depends on your server)
 - Default base URL: `http://127.0.0.1:8000/v1`
 To opt in to auto-discovery locally (any value works if your server doesn’t enforce auth):
 ```bash
 export VLLM_API_KEY="vllm-local"
 ```
 Then set a model (replace with one of the IDs returned by `/v1/models`):
 ```json5
 {
  agents: {
    defaults: { model: { primary: "vllm/your-model-id" } },
  },
 }
 ```
 See [/providers/vllm](/providers/vllm) for details.
 ### Local proxies (LM Studio, vLLM, LiteLLM, etc.)
 Example (OpenAI‑compatible):
--- a/docs/providers/index.md
+++ b/docs/providers/index.md
@@ -52,6 +52,7 @@ See [Venice AI](/providers/venice).
 - [MiniMax](/providers/minimax)
 - [Venice (Venice AI, privacy-focused)](/providers/venice)
 - [Ollama (local models)](/providers/ollama)
 - [vLLM (local models)](/providers/vllm)
 - [Qianfan](/providers/qianfan)
 ## Transcription providers
--- a/docs/providers/vllm.md
+++ b/docs/providers/vllm.md
@@ -0,0 +1,92 @@
 ---
 summary: "Run OpenClaw with vLLM (OpenAI-compatible local server)"
 read_when:
  - You want to run OpenClaw against a local vLLM server
  - You want OpenAI-compatible /v1 endpoints with your own models
 title: "vLLM"
 ---
 # vLLM
 vLLM can serve open-source (and some custom) models via an **OpenAI-compatible** HTTP API. OpenClaw can connect to vLLM using the `openai-completions` API.
 OpenClaw can also **auto-discover** available models from vLLM when you opt in with `VLLM_API_KEY` (any value works if your server doesn’t enforce auth) and you do not define an explicit `models.providers.vllm` entry.
 ## Quick start
 1. Start vLLM with an OpenAI-compatible server.
 Your base URL should expose `/v1` endpoints (e.g. `/v1/models`, `/v1/chat/completions`). vLLM commonly runs on:
 - `http://127.0.0.1:8000/v1`
 2. Opt in (any value works if no auth is configured):
 ```bash
 export VLLM_API_KEY="vllm-local"
 ```
 3. Select a model (replace with one of your vLLM model IDs):
 ```json5
 {
  agents: {
    defaults: {
      model: { primary: "vllm/your-model-id" },
    },
  },
 }
 ```
 ## Model discovery (implicit provider)
 When `VLLM_API_KEY` is set (or an auth profile exists) and you **do not** define `models.providers.vllm`, OpenClaw will query:
 - `GET http://127.0.0.1:8000/v1/models`
 …and convert the returned IDs into model entries.
 If you set `models.providers.vllm` explicitly, auto-discovery is skipped and you must define models manually.
 ## Explicit configuration (manual models)
 Use explicit config when:
 - vLLM runs on a different host/port.
 - You want to pin `contextWindow`/`maxTokens` values.
 - Your server requires a real API key (or you want to control headers).
 ```json5
 {
  models: {
    providers: {
      vllm: {
        baseUrl: "http://127.0.0.1:8000/v1",
        apiKey: "${VLLM_API_KEY}",
        api: "openai-completions",
        models: [
          {
            id: "your-model-id",
            name: "Local vLLM Model",
            reasoning: false,
            input: ["text"],
            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
            contextWindow: 128000,
            maxTokens: 8192,
          },
        ],
      },
    },
  },
 }
 ```
 ## Troubleshooting
 - Check the server is reachable:
 ```bash
 curl http://127.0.0.1:8000/v1/models
 ```
 - If requests fail with auth errors, set a real `VLLM_API_KEY` that matches your server configuration, or configure the provider explicitly under `models.providers.vllm`.
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@@ -309,6 +309,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
    together: "TOGETHER_API_KEY",
    qianfan: "QIANFAN_API_KEY",
    ollama: "OLLAMA_API_KEY",
    vllm: "VLLM_API_KEY",
  };
  const envVar = envMap[normalized];
  if (!envVar) {
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@@ -85,6 +85,16 @@ const OLLAMA_DEFAULT_COST = {
  cacheWrite: 0,
 };
 const VLLM_BASE_URL = "http://127.0.0.1:8000/v1";
 const VLLM_DEFAULT_CONTEXT_WINDOW = 128000;
 const VLLM_DEFAULT_MAX_TOKENS = 8192;
 const VLLM_DEFAULT_COST = {
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
 };
 export const QIANFAN_BASE_URL = "https://qianfan.baidubce.com/v2";
 export const QIANFAN_DEFAULT_MODEL_ID = "deepseek-v3.2";
 const QIANFAN_DEFAULT_CONTEXT_WINDOW = 98304;
@@ -129,6 +139,11 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string {
 }
 async function discoverOllamaModels(baseUrl?: string): Promise<ModelDefinitionConfig[]> {
 type VllmModelsResponse = {
  data?: Array<{
    id?: string;
  }>;
 };
  // Skip Ollama discovery in test environments
  if (process.env.VITEST || process.env.NODE_ENV === "test") {
    return [];
@@ -172,6 +187,59 @@ async function discoverOllamaModels(baseUrl?: string): Promise<ModelDefinitionCo
  }
 }
 async function discoverVllmModels(
  baseUrl: string,
  apiKey?: string,
 ): Promise<ModelDefinitionConfig[]> {
  // Skip vLLM discovery in test environments
  if (process.env.VITEST || process.env.NODE_ENV === "test") {
    return [];
  }
  const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, "");
  const url = `${trimmedBaseUrl}/models`;
  try {
    const trimmedApiKey = apiKey?.trim();
    const response = await fetch(url, {
      headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
      signal: AbortSignal.timeout(5000),
    });
    if (!response.ok) {
      console.warn(`Failed to discover vLLM models: ${response.status}`);
      return [];
    }
    const data = (await response.json()) as VllmModelsResponse;
    const models = data.data ?? [];
    if (models.length === 0) {
      console.warn("No vLLM models found on local instance");
      return [];
    }
    return models
      .map((m) => ({ id: typeof m.id === "string" ? m.id.trim() : "" }))
      .filter((m) => Boolean(m.id))
      .map((m) => {
        const modelId = m.id;
        const lower = modelId.toLowerCase();
        const isReasoning =
          lower.includes("r1") || lower.includes("reasoning") || lower.includes("think");
        return {
          id: modelId,
          name: modelId,
          reasoning: isReasoning,
          input: ["text"],
          cost: VLLM_DEFAULT_COST,
          contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW,
          maxTokens: VLLM_DEFAULT_MAX_TOKENS,
        } satisfies ModelDefinitionConfig;
      });
  } catch (error) {
    console.warn(`Failed to discover vLLM models: ${String(error)}`);
    return [];
  }
 }
 function normalizeApiKeyConfig(value: string): string {
  const trimmed = value.trim();
  const match = /^\$\{([A-Z0-9_]+)\}$/.exec(trimmed);
@@ -481,6 +549,18 @@ function buildTogetherProvider(): ProviderConfig {
  };
 }
 async function buildVllmProvider(params?: {
  baseUrl?: string;
  apiKey?: string;
 }): Promise<ProviderConfig> {
  const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, "");
  const models = await discoverVllmModels(baseUrl, params?.apiKey);
  return {
    baseUrl,
    api: "openai-completions",
    models,
  };
 }
 export function buildQianfanProvider(): ProviderConfig {
  return {
    baseUrl: QIANFAN_BASE_URL,
@@ -607,6 +687,23 @@ export async function resolveImplicitProviders(params: {
    providers.ollama = { ...(await buildOllamaProvider(ollamaBaseUrl)), apiKey: ollamaKey };
  }
  // vLLM provider - OpenAI-compatible local server (opt-in via env/profile).
  // If explicitly configured, keep user-defined models/settings as-is.
  if (!params.explicitProviders?.vllm) {
    const vllmEnvVar = resolveEnvApiKeyVarName("vllm");
    const vllmProfileKey = resolveApiKeyFromProfiles({ provider: "vllm", store: authStore });
    const vllmKey = vllmEnvVar ?? vllmProfileKey;
    if (vllmKey) {
      const discoveryApiKey = vllmEnvVar
        ? (process.env[vllmEnvVar]?.trim() ?? "")
        : (vllmProfileKey ?? "");
      providers.vllm = {
        ...(await buildVllmProvider({ apiKey: discoveryApiKey || undefined })),
        apiKey: vllmKey,
      };
    }
  }
  const togetherKey =
    resolveEnvApiKeyVarName("together") ??
    resolveApiKeyFromProfiles({ provider: "together", store: authStore });
--- a/src/agents/models-config.providers.vllm.test.ts
+++ b/src/agents/models-config.providers.vllm.test.ts
@@ -0,0 +1,33 @@
 import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { describe, expect, it } from "vitest";
 import { resolveImplicitProviders } from "./models-config.providers.js";
 describe("vLLM provider", () => {
  it("should not include vllm when no API key is configured", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
    const providers = await resolveImplicitProviders({ agentDir });
    expect(providers?.vllm).toBeUndefined();
  });
  it("should include vllm when VLLM_API_KEY is set", async () => {
    const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
    process.env.VLLM_API_KEY = "test-key";
    try {
      const providers = await resolveImplicitProviders({ agentDir });
      expect(providers?.vllm).toBeDefined();
      expect(providers?.vllm?.apiKey).toBe("VLLM_API_KEY");
      expect(providers?.vllm?.baseUrl).toBe("http://127.0.0.1:8000/v1");
      expect(providers?.vllm?.api).toBe("openai-completions");
      // Note: discovery is disabled in test environments (VITEST check)
      expect(providers?.vllm?.models).toEqual([]);
    } finally {
      delete process.env.VLLM_API_KEY;
    }
  });
 });
--- a/src/cli/program/register.onboard.ts
+++ b/src/cli/program/register.onboard.ts
@@ -58,7 +58,7 @@ export function registerOnboardCommand(program: Command) {
    .option("--mode <mode>", "Wizard mode: local|remote")
    .option(
      "--auth-choice <choice>",
-      "Auth: setup-token|token|chutes|openai-codex|openai-api-key|xai-api-key|qianfan-api-key|openrouter-api-key|litellm-api-key|ai-gateway-api-key|cloudflare-ai-gateway-api-key|moonshot-api-key|moonshot-api-key-cn|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|zai-coding-global|zai-coding-cn|zai-global|zai-cn|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|custom-api-key|skip|together-api-key",
+      "Auth: setup-token|token|chutes|vllm|openai-codex|openai-api-key|xai-api-key|qianfan-api-key|openrouter-api-key|litellm-api-key|ai-gateway-api-key|cloudflare-ai-gateway-api-key|moonshot-api-key|moonshot-api-key-cn|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|zai-coding-global|zai-coding-cn|zai-global|zai-cn|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|custom-api-key|skip|together-api-key",
    )
    .option(
      "--token-provider <id>",
--- a/src/commands/auth-choice-options.e2e.test.ts
+++ b/src/commands/auth-choice-options.e2e.test.ts
@@ -134,4 +134,14 @@ describe("buildAuthChoiceOptions", () => {
    expect(options.some((opt) => opt.value === "xai-api-key")).toBe(true);
  });
  it("includes vLLM auth choice", () => {
    const store: AuthProfileStore = { version: 1, profiles: {} };
    const options = buildAuthChoiceOptions({
      store,
      includeSkip: false,
    });
    expect(options.some((opt) => opt.value === "vllm")).toBe(true);
  });
 });
--- a/src/commands/auth-choice-options.ts
+++ b/src/commands/auth-choice-options.ts
@@ -10,6 +10,7 @@ export type AuthChoiceOption = {
 export type AuthChoiceGroupId =
  | "openai"
  | "anthropic"
  | "vllm"
  | "google"
  | "copilot"
  | "openrouter"
@@ -54,6 +55,12 @@ const AUTH_CHOICE_GROUP_DEFS: {
    hint: "setup-token + API key",
    choices: ["token", "apiKey"],
  },
  {
    value: "vllm",
    label: "vLLM",
    hint: "Local/self-hosted OpenAI-compatible",
    choices: ["vllm"],
  },
  {
    value: "minimax",
    label: "MiniMax",
@@ -182,6 +189,11 @@ export function buildAuthChoiceOptions(params: {
    label: "OpenAI Codex (ChatGPT OAuth)",
  });
  options.push({ value: "chutes", label: "Chutes (OAuth)" });
  options.push({
    value: "vllm",
    label: "vLLM (custom URL + model)",
    hint: "Local/self-hosted OpenAI-compatible server",
  });
  options.push({ value: "openai-api-key", label: "OpenAI API key" });
  options.push({ value: "xai-api-key", label: "xAI (Grok) API key" });
  options.push({
--- a/src/commands/auth-choice.apply.ts
+++ b/src/commands/auth-choice.apply.ts
@@ -12,6 +12,7 @@ import { applyAuthChoiceMiniMax } from "./auth-choice.apply.minimax.js";
 import { applyAuthChoiceOAuth } from "./auth-choice.apply.oauth.js";
 import { applyAuthChoiceOpenAI } from "./auth-choice.apply.openai.js";
 import { applyAuthChoiceQwenPortal } from "./auth-choice.apply.qwen-portal.js";
 import { applyAuthChoiceVllm } from "./auth-choice.apply.vllm.js";
 import { applyAuthChoiceXAI } from "./auth-choice.apply.xai.js";
 export type ApplyAuthChoiceParams = {
@@ -42,6 +43,7 @@ export async function applyAuthChoice(
 ): Promise<ApplyAuthChoiceResult> {
  const handlers: Array<(p: ApplyAuthChoiceParams) => Promise<ApplyAuthChoiceResult | null>> = [
    applyAuthChoiceAnthropic,
    applyAuthChoiceVllm,
    applyAuthChoiceOpenAI,
    applyAuthChoiceOAuth,
    applyAuthChoiceApiProviders,
--- a/src/commands/auth-choice.apply.vllm.ts
+++ b/src/commands/auth-choice.apply.vllm.ts
@@ -0,0 +1,107 @@
 import type { OpenClawConfig } from "../config/config.js";
 import type { ApplyAuthChoiceParams, ApplyAuthChoiceResult } from "./auth-choice.apply.js";
 import { upsertAuthProfile } from "../agents/auth-profiles.js";
 const VLLM_DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1";
 const VLLM_DEFAULT_CONTEXT_WINDOW = 128000;
 const VLLM_DEFAULT_MAX_TOKENS = 8192;
 const VLLM_DEFAULT_COST = {
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
 };
 function applyVllmDefaultModel(cfg: OpenClawConfig, modelRef: string): OpenClawConfig {
  const existingModel = cfg.agents?.defaults?.model;
  const fallbacks =
    existingModel && typeof existingModel === "object" && "fallbacks" in existingModel
      ? (existingModel as { fallbacks?: string[] }).fallbacks
      : undefined;
  return {
    ...cfg,
    agents: {
      ...cfg.agents,
      defaults: {
        ...cfg.agents?.defaults,
        model: {
          ...(fallbacks ? { fallbacks } : undefined),
          primary: modelRef,
        },
      },
    },
  };
 }
 export async function applyAuthChoiceVllm(
  params: ApplyAuthChoiceParams,
 ): Promise<ApplyAuthChoiceResult | null> {
  if (params.authChoice !== "vllm") {
    return null;
  }
  const baseUrlRaw = await params.prompter.text({
    message: "vLLM base URL",
    initialValue: VLLM_DEFAULT_BASE_URL,
    placeholder: VLLM_DEFAULT_BASE_URL,
    validate: (value) => (value?.trim() ? undefined : "Required"),
  });
  const apiKeyRaw = await params.prompter.text({
    message: "vLLM API key",
    placeholder: "sk-... (or any non-empty string)",
    validate: (value) => (value?.trim() ? undefined : "Required"),
  });
  const modelIdRaw = await params.prompter.text({
    message: "vLLM model",
    placeholder: "meta-llama/Meta-Llama-3-8B-Instruct",
    validate: (value) => (value?.trim() ? undefined : "Required"),
  });
  const baseUrl = String(baseUrlRaw ?? "")
    .trim()
    .replace(/\/+$/, "");
  const apiKey = String(apiKeyRaw ?? "").trim();
  const modelId = String(modelIdRaw ?? "").trim();
  const modelRef = `vllm/${modelId}`;
  upsertAuthProfile({
    profileId: "vllm:default",
    credential: { type: "api_key", provider: "vllm", key: apiKey },
    agentDir: params.agentDir,
  });
  const nextConfig: OpenClawConfig = {
    ...params.config,
    models: {
      ...params.config.models,
      mode: params.config.models?.mode ?? "merge",
      providers: {
        ...params.config.models?.providers,
        vllm: {
          baseUrl,
          api: "openai-completions",
          apiKey: "VLLM_API_KEY",
          models: [
            {
              id: modelId,
              name: modelId,
              reasoning: false,
              input: ["text"],
              cost: VLLM_DEFAULT_COST,
              contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW,
              maxTokens: VLLM_DEFAULT_MAX_TOKENS,
            },
          ],
        },
      },
    },
  };
  if (!params.setDefaultModel) {
    return { config: nextConfig, agentModelOverride: modelRef };
  }
  await params.prompter.note(`Default model set to ${modelRef}`, "Model configured");
  return { config: applyVllmDefaultModel(nextConfig, modelRef) };
 }
--- a/src/commands/auth-choice.preferred-provider.ts
+++ b/src/commands/auth-choice.preferred-provider.ts
@@ -6,6 +6,7 @@ const PREFERRED_PROVIDER_BY_AUTH_CHOICE: Partial<Record<AuthChoice, string>> = {
  "claude-cli": "anthropic",
  token: "anthropic",
  apiKey: "anthropic",
  vllm: "vllm",
  "openai-codex": "openai-codex",
  "codex-cli": "openai-codex",
  chutes: "chutes",
--- a/src/commands/configure.gateway-auth.ts
+++ b/src/commands/configure.gateway-auth.ts
@@ -77,6 +77,9 @@ export async function promptAuthConfig(
      ignoreAllowlist: true,
      preferredProvider: resolvePreferredProviderForAuthChoice(authChoice),
    });
    if (modelSelection.config) {
      next = modelSelection.config;
    }
    if (modelSelection.model) {
      next = applyPrimaryModel(next, modelSelection.model);
    }
--- a/src/commands/model-picker.e2e.test.ts
+++ b/src/commands/model-picker.e2e.test.ts
@@ -20,9 +20,11 @@ const ensureAuthProfileStore = vi.hoisted(() =>
  })),
 );
 const listProfilesForProvider = vi.hoisted(() => vi.fn(() => []));
 const upsertAuthProfile = vi.hoisted(() => vi.fn());
 vi.mock("../agents/auth-profiles.js", () => ({
  ensureAuthProfileStore,
  listProfilesForProvider,
  upsertAuthProfile,
 }));
 const resolveEnvApiKey = vi.hoisted(() => vi.fn(() => undefined));
@@ -68,6 +70,53 @@ describe("promptDefaultModel", () => {
      true,
    );
  });
  it("supports configuring vLLM during onboarding", async () => {
    loadModelCatalog.mockResolvedValue([
      {
        provider: "anthropic",
        id: "claude-sonnet-4-5",
        name: "Claude Sonnet 4.5",
      },
    ]);
    const select = vi.fn(async (params) => {
      const vllm = params.options.find((opt: { value: string }) => opt.value === "__vllm__");
      return (vllm?.value ?? "") as never;
    });
    const text = vi
      .fn()
      .mockResolvedValueOnce("http://127.0.0.1:8000/v1")
      .mockResolvedValueOnce("sk-vllm-test")
      .mockResolvedValueOnce("meta-llama/Meta-Llama-3-8B-Instruct");
    const prompter = makePrompter({ select, text: text as never });
    const config = { agents: { defaults: {} } } as OpenClawConfig;
    const result = await promptDefaultModel({
      config,
      prompter,
      allowKeep: false,
      includeManual: false,
      includeVllm: true,
      ignoreAllowlist: true,
    });
    expect(upsertAuthProfile).toHaveBeenCalledWith(
      expect.objectContaining({
        profileId: "vllm:default",
        credential: expect.objectContaining({ provider: "vllm" }),
      }),
    );
    expect(result.model).toBe("vllm/meta-llama/Meta-Llama-3-8B-Instruct");
    expect(result.config?.models?.providers?.vllm).toMatchObject({
      baseUrl: "http://127.0.0.1:8000/v1",
      api: "openai-completions",
      apiKey: "VLLM_API_KEY",
      models: [
        { id: "meta-llama/Meta-Llama-3-8B-Instruct", name: "meta-llama/Meta-Llama-3-8B-Instruct" },
      ],
    });
  });
 });
 describe("promptModelAllowlist", () => {
--- a/src/commands/model-picker.ts
+++ b/src/commands/model-picker.ts
@@ -1,6 +1,10 @@
 import type { OpenClawConfig } from "../config/config.js";
 import type { WizardPrompter, WizardSelectOption } from "../wizard/prompts.js";
-import { ensureAuthProfileStore, listProfilesForProvider } from "../agents/auth-profiles.js";
+import {
  ensureAuthProfileStore,
  listProfilesForProvider,
  upsertAuthProfile,
 } from "../agents/auth-profiles.js";
 import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
 import { getCustomProviderApiKey, resolveEnvApiKey } from "../agents/model-auth.js";
 import { loadModelCatalog } from "../agents/model-catalog.js";
@@ -16,7 +20,17 @@ import { OPENAI_CODEX_DEFAULT_MODEL } from "./openai-codex-model-default.js";
 const KEEP_VALUE = "__keep__";
 const MANUAL_VALUE = "__manual__";
 const VLLM_VALUE = "__vllm__";
 const PROVIDER_FILTER_THRESHOLD = 30;
 const VLLM_DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1";
 const VLLM_DEFAULT_CONTEXT_WINDOW = 128000;
 const VLLM_DEFAULT_MAX_TOKENS = 8192;
 const VLLM_DEFAULT_COST = {
  input: 0,
  output: 0,
  cacheRead: 0,
  cacheWrite: 0,
 };
 // Models that are internal routing features and should not be shown in selection lists.
 // These may be valid as defaults (e.g., set automatically during auth flow) but are not
@@ -28,13 +42,14 @@ type PromptDefaultModelParams = {
  prompter: WizardPrompter;
  allowKeep?: boolean;
  includeManual?: boolean;
  includeVllm?: boolean;
  ignoreAllowlist?: boolean;
  preferredProvider?: string;
  agentDir?: string;
  message?: string;
 };
-type PromptDefaultModelResult = { model?: string };
+type PromptDefaultModelResult = { model?: string; config?: OpenClawConfig };
 type PromptModelAllowlistResult = { models?: string[] };
 function hasAuthForProvider(
@@ -107,6 +122,7 @@ export async function promptDefaultModel(
  const cfg = params.config;
  const allowKeep = params.allowKeep ?? true;
  const includeManual = params.includeManual ?? true;
  const includeVllm = params.includeVllm ?? false;
  const ignoreAllowlist = params.ignoreAllowlist ?? false;
  const preferredProviderRaw = params.preferredProvider?.trim();
  const preferredProvider = preferredProviderRaw
@@ -212,6 +228,13 @@ export async function promptDefaultModel(
  if (includeManual) {
    options.push({ value: MANUAL_VALUE, label: "Enter model manually" });
  }
  if (includeVllm) {
    options.push({
      value: VLLM_VALUE,
      label: "vLLM (custom)",
      hint: "Enter vLLM URL + API key + model",
    });
  }
  const seen = new Set<string>();
  const addModelOption = (entry: {
@@ -295,6 +318,65 @@ export async function promptDefaultModel(
      initialValue: configuredRaw || resolvedKey || undefined,
    });
  }
  if (selection === VLLM_VALUE) {
    const baseUrlRaw = await params.prompter.text({
      message: "vLLM base URL",
      initialValue: VLLM_DEFAULT_BASE_URL,
      placeholder: VLLM_DEFAULT_BASE_URL,
      validate: (value) => (value?.trim() ? undefined : "Required"),
    });
    const apiKeyRaw = await params.prompter.text({
      message: "vLLM API key",
      placeholder: "sk-... (or any non-empty string)",
      validate: (value) => (value?.trim() ? undefined : "Required"),
    });
    const modelIdRaw = await params.prompter.text({
      message: "vLLM model",
      placeholder: "meta-llama/Meta-Llama-3-8B-Instruct",
      validate: (value) => (value?.trim() ? undefined : "Required"),
    });
    const baseUrl = String(baseUrlRaw ?? "")
      .trim()
      .replace(/\/+$/, "");
    const apiKey = String(apiKeyRaw ?? "").trim();
    const modelId = String(modelIdRaw ?? "").trim();
    upsertAuthProfile({
      profileId: "vllm:default",
      credential: { type: "api_key", provider: "vllm", key: apiKey },
      agentDir: params.agentDir,
    });
    const nextConfig: OpenClawConfig = {
      ...cfg,
      models: {
        ...cfg.models,
        mode: cfg.models?.mode ?? "merge",
        providers: {
          ...cfg.models?.providers,
          vllm: {
            baseUrl,
            api: "openai-completions",
            apiKey: "VLLM_API_KEY",
            models: [
              {
                id: modelId,
                name: modelId,
                reasoning: false,
                input: ["text"],
                cost: VLLM_DEFAULT_COST,
                contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW,
                maxTokens: VLLM_DEFAULT_MAX_TOKENS,
              },
            ],
          },
        },
      },
    };
    return { model: `vllm/${modelId}`, config: nextConfig };
  }
  return { model: String(selection) };
 }
--- a/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts
+++ b/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts
@@ -330,6 +330,24 @@ describe("onboard (non-interactive): provider auth", () => {
    });
  }, 60_000);
  it("rejects vLLM auth choice in non-interactive mode", async () => {
    await withOnboardEnv("openclaw-onboard-vllm-non-interactive-", async ({ runtime }) => {
      await expect(
        runNonInteractive(
          {
            nonInteractive: true,
            authChoice: "vllm",
            skipHealth: true,
            skipChannels: true,
            skipSkills: true,
            json: true,
          },
          runtime,
        ),
      ).rejects.toThrow('Auth choice "vllm" requires interactive mode.');
    });
  }, 60_000);
  it("stores LiteLLM API key and sets default model", async () => {
    await withOnboardEnv("openclaw-onboard-litellm-", async ({ configPath, runtime }) => {
      await runNonInteractive(
--- a/src/commands/onboard-non-interactive/local/auth-choice.ts
+++ b/src/commands/onboard-non-interactive/local/auth-choice.ts
@@ -88,6 +88,17 @@ export async function applyNonInteractiveAuthChoice(params: {
    return null;
  }
  if (authChoice === "vllm") {
    runtime.error(
      [
        'Auth choice "vllm" requires interactive mode.',
        "Use interactive onboard/configure to enter base URL, API key, and model ID.",
      ].join("\n"),
    );
    runtime.exit(1);
    return null;
  }
  if (authChoice === "apiKey") {
    const resolved = await resolveNonInteractiveApiKey({
      provider: "anthropic",
--- a/src/commands/onboard-types.ts
+++ b/src/commands/onboard-types.ts
@@ -9,6 +9,7 @@ export type AuthChoice =
  | "claude-cli"
  | "token"
  | "chutes"
  | "vllm"
  | "openai-codex"
  | "openai-api-key"
  | "openrouter-api-key"
--- a/src/wizard/onboarding.ts
+++ b/src/wizard/onboarding.ts
@@ -411,7 +411,13 @@ export async function runOnboardingWizard(
      ignoreAllowlist: true,
      preferredProvider:
        customPreferredProvider ?? resolvePreferredProviderForAuthChoice(authChoice),
      includeVllm: true,
      preferredProvider:
        customPreferredProvider ?? resolvePreferredProviderForAuthChoice(authChoice),
    });
    if (modelSelection.config) {
      nextConfig = modelSelection.config;
    }
    if (modelSelection.model) {
      nextConfig = applyPrimaryModel(nextConfig, modelSelection.model);
    }