mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 14:40:43 +00:00
fix(web-search): restrict private network guard
This commit is contained in:
@@ -7,8 +7,8 @@ import {
|
||||
readResponseText,
|
||||
resolveCacheTtlMs,
|
||||
truncateText,
|
||||
withSelfHostedWebToolsEndpoint,
|
||||
withStrictWebToolsEndpoint,
|
||||
withTrustedWebToolsEndpoint,
|
||||
writeCache,
|
||||
} from "openclaw/plugin-sdk/provider-web-fetch";
|
||||
import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input";
|
||||
@@ -45,7 +45,7 @@ const FIRECRAWL_SELF_HOSTED_PRIVATE_ERROR =
|
||||
const FIRECRAWL_HTTP_PRIVATE_ERROR =
|
||||
"Firecrawl HTTP baseUrl must target a private or internal self-hosted endpoint. Use https:// for public hosts.";
|
||||
|
||||
type FirecrawlEndpointMode = "strict" | "trusted";
|
||||
type FirecrawlEndpointMode = "selfHosted" | "strict";
|
||||
type FirecrawlResolvedEndpoint = {
|
||||
url: string;
|
||||
mode: FirecrawlEndpointMode;
|
||||
@@ -124,7 +124,7 @@ async function validateFirecrawlBaseUrl(
|
||||
|
||||
const isPrivateTarget = await firecrawlEndpointTargetsPrivateNetwork(url, lookupFn);
|
||||
if (isPrivateTarget) {
|
||||
return "trusted";
|
||||
return "selfHosted";
|
||||
}
|
||||
if (url.protocol === "http:") {
|
||||
throw new Error(FIRECRAWL_HTTP_PRIVATE_ERROR);
|
||||
@@ -161,7 +161,7 @@ async function postFirecrawlJson<T>(
|
||||
const apiKey = normalizeSecretInput(params.apiKey);
|
||||
const mode = params.mode ?? (await validateFirecrawlBaseUrl(params.url));
|
||||
const withEndpoint =
|
||||
mode === "trusted" ? withTrustedWebToolsEndpoint : withStrictWebToolsEndpoint;
|
||||
mode === "selfHosted" ? withSelfHostedWebToolsEndpoint : withStrictWebToolsEndpoint;
|
||||
return await withEndpoint(
|
||||
{
|
||||
url: params.url,
|
||||
|
||||
@@ -616,7 +616,7 @@ describe("firecrawl tools", () => {
|
||||
firecrawlClientTesting.resolveEndpoint("http://127.0.0.1:8787", "/v2/scrape"),
|
||||
).resolves.toEqual({
|
||||
url: "http://127.0.0.1:8787/v2/scrape",
|
||||
mode: "trusted",
|
||||
mode: "selfHosted",
|
||||
});
|
||||
await expect(
|
||||
firecrawlClientTesting.resolveEndpoint(
|
||||
@@ -625,7 +625,7 @@ describe("firecrawl tools", () => {
|
||||
),
|
||||
).resolves.toEqual({
|
||||
url: "https://host.openshell.internal:444/v2/search",
|
||||
mode: "trusted",
|
||||
mode: "selfHosted",
|
||||
});
|
||||
await expect(
|
||||
firecrawlClientTesting.resolveEndpoint("http://api.firecrawl.dev", "/v2/scrape"),
|
||||
@@ -638,7 +638,7 @@ describe("firecrawl tools", () => {
|
||||
).rejects.toThrow("Firecrawl baseUrl must use http:// or https://.");
|
||||
});
|
||||
|
||||
it("routes private self-hosted Firecrawl endpoints through the trusted fetch guard", async () => {
|
||||
it("routes private self-hosted Firecrawl endpoints through the self-hosted fetch guard", async () => {
|
||||
ssrfMock?.mockRestore();
|
||||
ssrfMock = mockPinnedHostnameResolution(["127.0.0.1"]);
|
||||
const fetchSpy = vi.fn(
|
||||
|
||||
@@ -66,8 +66,11 @@ describe("searxng client", () => {
|
||||
|
||||
it("allows https public hosts", async () => {
|
||||
await expect(
|
||||
__testing.validateSearxngBaseUrl("https://search.example.com/searxng"),
|
||||
).resolves.toBeUndefined();
|
||||
__testing.validateSearxngBaseUrl(
|
||||
"https://search.example.com/searxng",
|
||||
createLookupFn([{ address: "93.184.216.34", family: 4 }]),
|
||||
),
|
||||
).resolves.toBe("strict");
|
||||
});
|
||||
|
||||
it("allows cleartext private-network hosts", async () => {
|
||||
@@ -76,7 +79,16 @@ describe("searxng client", () => {
|
||||
"http://matrix-synapse:8080",
|
||||
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
|
||||
),
|
||||
).resolves.toBeUndefined();
|
||||
).resolves.toBe("selfHosted");
|
||||
});
|
||||
|
||||
it("routes https private-network hosts through the self-hosted guard", async () => {
|
||||
await expect(
|
||||
__testing.validateSearxngBaseUrl(
|
||||
"https://search.internal/searxng",
|
||||
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
|
||||
),
|
||||
).resolves.toBe("selfHosted");
|
||||
});
|
||||
|
||||
it("rejects cleartext public hosts", async () => {
|
||||
|
||||
@@ -9,12 +9,16 @@ import {
|
||||
resolveSearchCount,
|
||||
resolveSiteName,
|
||||
resolveTimeoutSeconds,
|
||||
withSelfHostedWebSearchEndpoint,
|
||||
withTrustedWebSearchEndpoint,
|
||||
wrapWebContent,
|
||||
writeCache,
|
||||
} from "openclaw/plugin-sdk/provider-web-search";
|
||||
import {
|
||||
assertHttpUrlTargetsPrivateNetwork,
|
||||
isBlockedHostnameOrIp,
|
||||
isPrivateIpAddress,
|
||||
resolvePinnedHostnameWithPolicy,
|
||||
type LookupFn,
|
||||
} from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import {
|
||||
@@ -25,6 +29,7 @@ import {
|
||||
|
||||
const DEFAULT_TIMEOUT_SECONDS = 20;
|
||||
const MAX_RESPONSE_BYTES = 1_000_000;
|
||||
type SearxngEndpointMode = "selfHosted" | "strict";
|
||||
|
||||
const SEARXNG_SEARCH_CACHE = new Map<
|
||||
string,
|
||||
@@ -79,7 +84,31 @@ function buildSearxngSearchUrl(params: {
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Promise<void> {
|
||||
async function searxngEndpointTargetsPrivateNetwork(
|
||||
url: URL,
|
||||
lookupFn?: LookupFn,
|
||||
): Promise<boolean> {
|
||||
if (isBlockedHostnameOrIp(url.hostname)) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, {
|
||||
lookupFn,
|
||||
policy: {
|
||||
allowPrivateNetwork: true,
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
},
|
||||
});
|
||||
return pinned.addresses.every((address) => isPrivateIpAddress(address));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function validateSearxngBaseUrl(
|
||||
baseUrl: string,
|
||||
lookupFn?: LookupFn,
|
||||
): Promise<SearxngEndpointMode> {
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(baseUrl);
|
||||
@@ -98,7 +127,10 @@ async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Pro
|
||||
errorMessage:
|
||||
"SearXNG HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.",
|
||||
});
|
||||
return "selfHosted";
|
||||
}
|
||||
|
||||
return (await searxngEndpointTargetsPrivateNetwork(parsed, lookupFn)) ? "selfHosted" : "strict";
|
||||
}
|
||||
|
||||
function parseSearxngResponseText(text: string, count: number): SearxngResult[] {
|
||||
@@ -152,7 +184,7 @@ export async function runSearxngSearch(params: {
|
||||
"SearXNG base URL is not configured. Set SEARXNG_BASE_URL or configure plugins.entries.searxng.config.webSearch.baseUrl.",
|
||||
);
|
||||
}
|
||||
await validateSearxngBaseUrl(baseUrl);
|
||||
const endpointMode = await validateSearxngBaseUrl(baseUrl);
|
||||
|
||||
const cacheKey = normalizeCacheKey(
|
||||
JSON.stringify({
|
||||
@@ -177,7 +209,9 @@ export async function runSearxngSearch(params: {
|
||||
});
|
||||
|
||||
const startedAt = Date.now();
|
||||
const results = await withTrustedWebSearchEndpoint(
|
||||
const withEndpoint =
|
||||
endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint;
|
||||
const results = await withEndpoint(
|
||||
{
|
||||
url,
|
||||
timeoutSeconds,
|
||||
|
||||
Reference in New Issue
Block a user