From e862e0acb5777ac1ac7624ef497c78a59f277e62 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Mon, 27 Apr 2026 10:37:52 +0100
Subject: [PATCH] fix(providers): guard self-hosted model discovery

---
 CHANGELOG.md                                  |  1 +
 .../provider-self-hosted-setup.test.ts        | 79 ++++++++++++++++-
 src/plugins/provider-self-hosted-setup.ts     | 86 +++++++++++++------
 3 files changed, 136 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1d79fc7cf2b..063a9d2016a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai
 - Agents/Bedrock: stop heartbeat runs from persisting blank user transcript turns and repair existing blank user text messages before replay, preventing AWS Bedrock `ContentBlock` blank-text validation failures. Fixes #72640 and #72622. Thanks @goldzulu.
 - Agents/LM Studio: promote standalone bracketed local-model tool requests into registered tool calls and hide unsupported bracket blocks from visible replies, so MemPalace MCP lookups do not print raw `[tool]` JSON scaffolding in chat. Fixes #66178. Thanks @detroit357.
 - Local models: warn when an assistant reply looks like a tool call but the provider emitted plain text instead of a structured tool invocation, making fake/non-executed tool calls visible in logs. Fixes #51332. Thanks @emilclaw.
+- Local models: route self-hosted OpenAI-compatible model discovery through the guarded fetch path pinned to the configured host, covering vLLM and SGLang setup without reopening local/LAN SSRF probes. Supersedes #46359. Thanks @cdxiaodong.
 - Local models: classify terminated, reset, closed, timeout, and aborted model-call failures and attach a process memory snapshot to the diagnostic event, making LM Studio/Ollama RAM-pressure failures easier to prove from stability bundles. Refs #65551. Thanks @BigWiLLi111.
 - Local models: pass configured provider request timeouts through OpenAI SDK transports and the model idle watchdog so long-running local or custom OpenAI-compatible streams use one timeout knob instead of hitting the SDK's 10-minute default or the 120s idle default. Fixes #63663. Thanks @aidiffuser.
 - LM Studio: trust configured LM Studio loopback, LAN, and tailnet endpoints for guarded model requests by default, preserving explicit private-network opt-outs. Refs #60994. Thanks @tnowakow.
diff --git a/src/plugins/provider-self-hosted-setup.test.ts b/src/plugins/provider-self-hosted-setup.test.ts
index 6cb2e2569df..b0fe8633241 100644
--- a/src/plugins/provider-self-hosted-setup.test.ts
+++ b/src/plugins/provider-self-hosted-setup.test.ts
@@ -1,8 +1,19 @@
 import { beforeEach, describe, expect, it, vi } from "vitest";
-import { configureOpenAICompatibleSelfHostedProviderNonInteractive } from "./provider-self-hosted-setup.js";
+import {
+  configureOpenAICompatibleSelfHostedProviderNonInteractive,
+  discoverOpenAICompatibleLocalModels,
+} from "./provider-self-hosted-setup.js";
 import type { ProviderAuthMethodNonInteractiveContext } from "./types.js";
 
-const upsertAuthProfileWithLock = vi.hoisted(() => vi.fn(async () => null));
+const { fetchWithSsrFGuardMock, upsertAuthProfileWithLock } = vi.hoisted(() => ({
+  fetchWithSsrFGuardMock: vi.fn(),
+  upsertAuthProfileWithLock: vi.fn(async () => null),
+}));
+
+vi.mock("../infra/net/fetch-guard.js", () => ({
+  fetchWithSsrFGuard: fetchWithSsrFGuardMock,
+}));
+
 vi.mock("../agents/auth-profiles/upsert-with-lock.js", () => ({
   upsertAuthProfileWithLock,
 }));
@@ -74,6 +85,70 @@ async function configureSelfHostedTestProvider(params: {
   });
 }
 
+describe("discoverOpenAICompatibleLocalModels", () => {
+  it("uses guarded fetch pinned to the configured self-hosted provider", async () => {
+    const release = vi.fn(async () => undefined);
+    fetchWithSsrFGuardMock.mockResolvedValueOnce({
+      response: new Response(JSON.stringify({ data: [{ id: "Qwen/Qwen3-32B" }] }), {
+        status: 200,
+      }),
+      finalUrl: "http://127.0.0.1:8000/v1/models",
+      release,
+    });
+
+    const models = await discoverOpenAICompatibleLocalModels({
+      baseUrl: "http://127.0.0.1:8000/v1/",
+      apiKey: "self-hosted-test-key",
+      label: "vLLM",
+      env: {},
+    });
+
+    expect(models).toEqual([
+      expect.objectContaining({
+        id: "Qwen/Qwen3-32B",
+        name: "Qwen/Qwen3-32B",
+      }),
+    ]);
+    expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        url: "http://127.0.0.1:8000/v1/models",
+        init: { headers: { Authorization: "Bearer self-hosted-test-key" } },
+        policy: {
+          hostnameAllowlist: ["127.0.0.1"],
+          allowPrivateNetwork: true,
+        },
+        timeoutMs: 5000,
+      }),
+    );
+    expect(release).toHaveBeenCalledOnce();
+  });
+
+  it("does not allowlist always-blocked metadata hostnames", async () => {
+    const release = vi.fn(async () => undefined);
+    fetchWithSsrFGuardMock.mockResolvedValueOnce({
+      response: new Response(JSON.stringify({ data: [{ id: "metadata-probe" }] }), {
+        status: 200,
+      }),
+      finalUrl: "http://metadata.google.internal/v1/models",
+      release,
+    });
+
+    await discoverOpenAICompatibleLocalModels({
+      baseUrl: "http://metadata.google.internal/v1",
+      label: "vLLM",
+      env: {},
+    });
+
+    expect(fetchWithSsrFGuardMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        url: "http://metadata.google.internal/v1/models",
+        policy: undefined,
+      }),
+    );
+    expect(release).toHaveBeenCalledOnce();
+  });
+});
+
 describe("configureOpenAICompatibleSelfHostedProviderNonInteractive", () => {
   it.each([
     {
diff --git a/src/plugins/provider-self-hosted-setup.ts b/src/plugins/provider-self-hosted-setup.ts
index a49da805167..0e23748e4b8 100644
--- a/src/plugins/provider-self-hosted-setup.ts
+++ b/src/plugins/provider-self-hosted-setup.ts
@@ -7,6 +7,8 @@ import {
 } from "../agents/self-hosted-provider-defaults.js";
 import type { ModelDefinitionConfig } from "../config/types.models.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
+import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
+import type { SsrFPolicy } from "../infra/net/ssrf.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import {
   normalizeOptionalString,
@@ -40,6 +42,26 @@ function isReasoningModelHeuristic(modelId: string): boolean {
   return /r1|reasoning|think|reason/i.test(modelId);
 }
 
+const SELF_HOSTED_ALWAYS_BLOCKED_HOSTNAMES = new Set(["metadata.google.internal"]);
+
+function buildSelfHostedBaseUrlSsrFPolicy(baseUrl: string): SsrFPolicy | undefined {
+  try {
+    const parsed = new URL(baseUrl.trim());
+    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+      return undefined;
+    }
+    if (SELF_HOSTED_ALWAYS_BLOCKED_HOSTNAMES.has(parsed.hostname.toLowerCase())) {
+      return undefined;
+    }
+    return {
+      hostnameAllowlist: [parsed.hostname],
+      allowPrivateNetwork: true,
+    };
+  } catch {
+    return undefined;
+  }
+}
+
 export async function discoverOpenAICompatibleLocalModels(params: {
   baseUrl: string;
   apiKey?: string;
@@ -58,36 +80,44 @@ export async function discoverOpenAICompatibleLocalModels(params: {
 
   try {
     const trimmedApiKey = normalizeOptionalString(params.apiKey);
-    const response = await fetch(url, {
-      headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
-      signal: AbortSignal.timeout(5000),
+    const { response, release } = await fetchWithSsrFGuard({
+      url,
+      init: {
+        headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
+      },
+      policy: buildSelfHostedBaseUrlSsrFPolicy(trimmedBaseUrl),
+      timeoutMs: 5000,
     });
-    if (!response.ok) {
-      log.warn(`Failed to discover ${params.label} models: ${response.status}`);
-      return [];
-    }
-    const data = (await response.json()) as OpenAICompatModelsResponse;
-    const models = data.data ?? [];
-    if (models.length === 0) {
-      log.warn(`No ${params.label} models found on local instance`);
-      return [];
-    }
+    try {
+      if (!response.ok) {
+        log.warn(`Failed to discover ${params.label} models: ${response.status}`);
+        return [];
+      }
+      const data = (await response.json()) as OpenAICompatModelsResponse;
+      const models = data.data ?? [];
+      if (models.length === 0) {
+        log.warn(`No ${params.label} models found on local instance`);
+        return [];
+      }
 
-    return models
-      .map((model) => ({ id: normalizeOptionalString(model.id) ?? "" }))
-      .filter((model) => Boolean(model.id))
-      .map((model) => {
-        const modelId = model.id;
-        return {
-          id: modelId,
-          name: modelId,
-          reasoning: isReasoningModelHeuristic(modelId),
-          input: ["text"],
-          cost: SELF_HOSTED_DEFAULT_COST,
-          contextWindow: params.contextWindow ?? SELF_HOSTED_DEFAULT_CONTEXT_WINDOW,
-          maxTokens: params.maxTokens ?? SELF_HOSTED_DEFAULT_MAX_TOKENS,
-        } satisfies ModelDefinitionConfig;
-      });
+      return models
+        .map((model) => ({ id: normalizeOptionalString(model.id) ?? "" }))
+        .filter((model) => Boolean(model.id))
+        .map((model) => {
+          const modelId = model.id;
+          return {
+            id: modelId,
+            name: modelId,
+            reasoning: isReasoningModelHeuristic(modelId),
+            input: ["text"],
+            cost: SELF_HOSTED_DEFAULT_COST,
+            contextWindow: params.contextWindow ?? SELF_HOSTED_DEFAULT_CONTEXT_WINDOW,
+            maxTokens: params.maxTokens ?? SELF_HOSTED_DEFAULT_MAX_TOKENS,
+          } satisfies ModelDefinitionConfig;
+        });
+    } finally {
+      await release();
+    }
   } catch (error) {
     log.warn(`Failed to discover ${params.label} models: ${String(error)}`);
     return [];