fix(web-search): restrict private network guard

This commit is contained in:
Peter Steinberger
2026-05-02 06:39:40 +01:00
parent e052bdcfb6
commit 1771160d2c
12 changed files with 143 additions and 20 deletions

View File

@@ -7,8 +7,8 @@ import {
readResponseText,
resolveCacheTtlMs,
truncateText,
withSelfHostedWebToolsEndpoint,
withStrictWebToolsEndpoint,
withTrustedWebToolsEndpoint,
writeCache,
} from "openclaw/plugin-sdk/provider-web-fetch";
import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input";
@@ -45,7 +45,7 @@ const FIRECRAWL_SELF_HOSTED_PRIVATE_ERROR =
const FIRECRAWL_HTTP_PRIVATE_ERROR =
"Firecrawl HTTP baseUrl must target a private or internal self-hosted endpoint. Use https:// for public hosts.";
type FirecrawlEndpointMode = "strict" | "trusted";
type FirecrawlEndpointMode = "selfHosted" | "strict";
type FirecrawlResolvedEndpoint = {
url: string;
mode: FirecrawlEndpointMode;
@@ -124,7 +124,7 @@ async function validateFirecrawlBaseUrl(
const isPrivateTarget = await firecrawlEndpointTargetsPrivateNetwork(url, lookupFn);
if (isPrivateTarget) {
return "trusted";
return "selfHosted";
}
if (url.protocol === "http:") {
throw new Error(FIRECRAWL_HTTP_PRIVATE_ERROR);
@@ -161,7 +161,7 @@ async function postFirecrawlJson<T>(
const apiKey = normalizeSecretInput(params.apiKey);
const mode = params.mode ?? (await validateFirecrawlBaseUrl(params.url));
const withEndpoint =
mode === "trusted" ? withTrustedWebToolsEndpoint : withStrictWebToolsEndpoint;
mode === "selfHosted" ? withSelfHostedWebToolsEndpoint : withStrictWebToolsEndpoint;
return await withEndpoint(
{
url: params.url,

View File

@@ -616,7 +616,7 @@ describe("firecrawl tools", () => {
firecrawlClientTesting.resolveEndpoint("http://127.0.0.1:8787", "/v2/scrape"),
).resolves.toEqual({
url: "http://127.0.0.1:8787/v2/scrape",
mode: "trusted",
mode: "selfHosted",
});
await expect(
firecrawlClientTesting.resolveEndpoint(
@@ -625,7 +625,7 @@ describe("firecrawl tools", () => {
),
).resolves.toEqual({
url: "https://host.openshell.internal:444/v2/search",
mode: "trusted",
mode: "selfHosted",
});
await expect(
firecrawlClientTesting.resolveEndpoint("http://api.firecrawl.dev", "/v2/scrape"),
@@ -638,7 +638,7 @@ describe("firecrawl tools", () => {
).rejects.toThrow("Firecrawl baseUrl must use http:// or https://.");
});
it("routes private self-hosted Firecrawl endpoints through the trusted fetch guard", async () => {
it("routes private self-hosted Firecrawl endpoints through the self-hosted fetch guard", async () => {
ssrfMock?.mockRestore();
ssrfMock = mockPinnedHostnameResolution(["127.0.0.1"]);
const fetchSpy = vi.fn(

View File

@@ -66,8 +66,11 @@ describe("searxng client", () => {
it("allows https public hosts", async () => {
await expect(
__testing.validateSearxngBaseUrl("https://search.example.com/searxng"),
).resolves.toBeUndefined();
__testing.validateSearxngBaseUrl(
"https://search.example.com/searxng",
createLookupFn([{ address: "93.184.216.34", family: 4 }]),
),
).resolves.toBe("strict");
});
it("allows cleartext private-network hosts", async () => {
@@ -76,7 +79,16 @@ describe("searxng client", () => {
"http://matrix-synapse:8080",
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
),
).resolves.toBeUndefined();
).resolves.toBe("selfHosted");
});
it("routes https private-network hosts through the self-hosted guard", async () => {
await expect(
__testing.validateSearxngBaseUrl(
"https://search.internal/searxng",
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
),
).resolves.toBe("selfHosted");
});
it("rejects cleartext public hosts", async () => {

View File

@@ -9,12 +9,16 @@ import {
resolveSearchCount,
resolveSiteName,
resolveTimeoutSeconds,
withSelfHostedWebSearchEndpoint,
withTrustedWebSearchEndpoint,
wrapWebContent,
writeCache,
} from "openclaw/plugin-sdk/provider-web-search";
import {
assertHttpUrlTargetsPrivateNetwork,
isBlockedHostnameOrIp,
isPrivateIpAddress,
resolvePinnedHostnameWithPolicy,
type LookupFn,
} from "openclaw/plugin-sdk/ssrf-runtime";
import {
@@ -25,6 +29,7 @@ import {
const DEFAULT_TIMEOUT_SECONDS = 20;
const MAX_RESPONSE_BYTES = 1_000_000;
type SearxngEndpointMode = "selfHosted" | "strict";
const SEARXNG_SEARCH_CACHE = new Map<
string,
@@ -79,7 +84,31 @@ function buildSearxngSearchUrl(params: {
return url.toString();
}
async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Promise<void> {
async function searxngEndpointTargetsPrivateNetwork(
url: URL,
lookupFn?: LookupFn,
): Promise<boolean> {
if (isBlockedHostnameOrIp(url.hostname)) {
return true;
}
try {
const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, {
lookupFn,
policy: {
allowPrivateNetwork: true,
allowRfc2544BenchmarkRange: true,
},
});
return pinned.addresses.every((address) => isPrivateIpAddress(address));
} catch {
return false;
}
}
async function validateSearxngBaseUrl(
baseUrl: string,
lookupFn?: LookupFn,
): Promise<SearxngEndpointMode> {
let parsed: URL;
try {
parsed = new URL(baseUrl);
@@ -98,7 +127,10 @@ async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Pro
errorMessage:
"SearXNG HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.",
});
return "selfHosted";
}
return (await searxngEndpointTargetsPrivateNetwork(parsed, lookupFn)) ? "selfHosted" : "strict";
}
function parseSearxngResponseText(text: string, count: number): SearxngResult[] {
@@ -152,7 +184,7 @@ export async function runSearxngSearch(params: {
"SearXNG base URL is not configured. Set SEARXNG_BASE_URL or configure plugins.entries.searxng.config.webSearch.baseUrl.",
);
}
await validateSearxngBaseUrl(baseUrl);
const endpointMode = await validateSearxngBaseUrl(baseUrl);
const cacheKey = normalizeCacheKey(
JSON.stringify({
@@ -177,7 +209,9 @@ export async function runSearxngSearch(params: {
});
const startedAt = Date.now();
const results = await withTrustedWebSearchEndpoint(
const withEndpoint =
endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint;
const results = await withEndpoint(
{
url,
timeoutSeconds,