fix(web-search): restrict private network guard

This commit is contained in:
Peter Steinberger
2026-05-02 06:39:40 +01:00
parent e052bdcfb6
commit 1771160d2c
12 changed files with 143 additions and 20 deletions

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- CLI/directory: report unsupported directory operations for installed channel plugins instead of prompting to reinstall the plugin when it lacks a directory adapter. Fixes #75770. Thanks @lawong888.
- Web search: keep public provider requests on the strict SSRF guard and reserve private-network access for explicit self-hosted SearXNG/Firecrawl endpoints. Fixes #74357 and supersedes #74360. Thanks @fede-kamel.
- Web search/Firecrawl: allow self-hosted private/internal Firecrawl `baseUrl` endpoints, including HTTP for private targets, while keeping hosted Firecrawl on the strict official endpoint. Fixes #63877 and supersedes #59666, #63941, and #74013. Thanks @jhthompson12, @jzakirov, @Mlightsnow, and @shad0wca7.
- Feishu: preserve Feishu/Lark HTTP error bodies for message sends, media sends, and chat member lookups, so HTTP 400 failures include vendor code, message, log id, and troubleshooter details. Fixes #73860. Thanks @desksk.
- Agents/transcripts: avoid reopening large Pi transcript files through the synchronous session manager for maintenance rewrites, persisted tool-result truncation, manual compaction boundary hardening, and queued compaction rotation. Thanks @mariozechner.

View File

@@ -1,2 +1,2 @@
1fbd0ea7f65901d96653458ba414f9ac69dc0142ff3772e48d63de8b9fa5567f plugin-sdk-api-baseline.json
2d29f4e632b05bd365f414096c87a2a3d9718f13fdbf9538824cb32db2902436 plugin-sdk-api-baseline.jsonl
7c25208c10ba075f76719883b7b2aefe4cf5e42328bad3acff1c5055350d344f plugin-sdk-api-baseline.json
6cac90f85065bcbd447911a0c7c54e7d6992278fd1b95a3e78ae4be3f185848a plugin-sdk-api-baseline.jsonl

View File

@@ -85,6 +85,9 @@ Transport rules:
- `https://` works for public or private SearXNG hosts
- `http://` is only accepted for trusted private-network or loopback hosts
- public SearXNG hosts must use `https://`
- private/internal hosts use the self-hosted network guard; public `https://`
hosts stay on the strict web-search guard and cannot redirect to private
addresses
## Environment variable
@@ -112,6 +115,9 @@ key wins first).
- **No API key** -- works with any SearXNG instance out of the box
- **Base URL validation** -- `baseUrl` must be a valid `http://` or `https://`
URL; public hosts must use `https://`
- **Network guard** -- private/internal SearXNG endpoints opt in to
private-network access; public `https://` SearXNG endpoints keep strict SSRF
protection
- **Auto-detection order** -- SearXNG is checked last (order 200) in
auto-detection. API-backed providers with configured keys run first, then
DuckDuckGo (order 100), then Ollama Web Search (order 110)

View File

@@ -7,8 +7,8 @@ import {
readResponseText,
resolveCacheTtlMs,
truncateText,
withSelfHostedWebToolsEndpoint,
withStrictWebToolsEndpoint,
withTrustedWebToolsEndpoint,
writeCache,
} from "openclaw/plugin-sdk/provider-web-fetch";
import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input";
@@ -45,7 +45,7 @@ const FIRECRAWL_SELF_HOSTED_PRIVATE_ERROR =
const FIRECRAWL_HTTP_PRIVATE_ERROR =
"Firecrawl HTTP baseUrl must target a private or internal self-hosted endpoint. Use https:// for public hosts.";
type FirecrawlEndpointMode = "strict" | "trusted";
type FirecrawlEndpointMode = "selfHosted" | "strict";
type FirecrawlResolvedEndpoint = {
url: string;
mode: FirecrawlEndpointMode;
@@ -124,7 +124,7 @@ async function validateFirecrawlBaseUrl(
const isPrivateTarget = await firecrawlEndpointTargetsPrivateNetwork(url, lookupFn);
if (isPrivateTarget) {
return "trusted";
return "selfHosted";
}
if (url.protocol === "http:") {
throw new Error(FIRECRAWL_HTTP_PRIVATE_ERROR);
@@ -161,7 +161,7 @@ async function postFirecrawlJson<T>(
const apiKey = normalizeSecretInput(params.apiKey);
const mode = params.mode ?? (await validateFirecrawlBaseUrl(params.url));
const withEndpoint =
mode === "trusted" ? withTrustedWebToolsEndpoint : withStrictWebToolsEndpoint;
mode === "selfHosted" ? withSelfHostedWebToolsEndpoint : withStrictWebToolsEndpoint;
return await withEndpoint(
{
url: params.url,

View File

@@ -616,7 +616,7 @@ describe("firecrawl tools", () => {
firecrawlClientTesting.resolveEndpoint("http://127.0.0.1:8787", "/v2/scrape"),
).resolves.toEqual({
url: "http://127.0.0.1:8787/v2/scrape",
mode: "trusted",
mode: "selfHosted",
});
await expect(
firecrawlClientTesting.resolveEndpoint(
@@ -625,7 +625,7 @@ describe("firecrawl tools", () => {
),
).resolves.toEqual({
url: "https://host.openshell.internal:444/v2/search",
mode: "trusted",
mode: "selfHosted",
});
await expect(
firecrawlClientTesting.resolveEndpoint("http://api.firecrawl.dev", "/v2/scrape"),
@@ -638,7 +638,7 @@ describe("firecrawl tools", () => {
).rejects.toThrow("Firecrawl baseUrl must use http:// or https://.");
});
it("routes private self-hosted Firecrawl endpoints through the trusted fetch guard", async () => {
it("routes private self-hosted Firecrawl endpoints through the self-hosted fetch guard", async () => {
ssrfMock?.mockRestore();
ssrfMock = mockPinnedHostnameResolution(["127.0.0.1"]);
const fetchSpy = vi.fn(

View File

@@ -66,8 +66,11 @@ describe("searxng client", () => {
it("allows https public hosts", async () => {
await expect(
__testing.validateSearxngBaseUrl("https://search.example.com/searxng"),
).resolves.toBeUndefined();
__testing.validateSearxngBaseUrl(
"https://search.example.com/searxng",
createLookupFn([{ address: "93.184.216.34", family: 4 }]),
),
).resolves.toBe("strict");
});
it("allows cleartext private-network hosts", async () => {
@@ -76,7 +79,16 @@ describe("searxng client", () => {
"http://matrix-synapse:8080",
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
),
).resolves.toBeUndefined();
).resolves.toBe("selfHosted");
});
it("routes https private-network hosts through the self-hosted guard", async () => {
await expect(
__testing.validateSearxngBaseUrl(
"https://search.internal/searxng",
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
),
).resolves.toBe("selfHosted");
});
it("rejects cleartext public hosts", async () => {

View File

@@ -9,12 +9,16 @@ import {
resolveSearchCount,
resolveSiteName,
resolveTimeoutSeconds,
withSelfHostedWebSearchEndpoint,
withTrustedWebSearchEndpoint,
wrapWebContent,
writeCache,
} from "openclaw/plugin-sdk/provider-web-search";
import {
assertHttpUrlTargetsPrivateNetwork,
isBlockedHostnameOrIp,
isPrivateIpAddress,
resolvePinnedHostnameWithPolicy,
type LookupFn,
} from "openclaw/plugin-sdk/ssrf-runtime";
import {
@@ -25,6 +29,7 @@ import {
const DEFAULT_TIMEOUT_SECONDS = 20;
const MAX_RESPONSE_BYTES = 1_000_000;
type SearxngEndpointMode = "selfHosted" | "strict";
const SEARXNG_SEARCH_CACHE = new Map<
string,
@@ -79,7 +84,31 @@ function buildSearxngSearchUrl(params: {
return url.toString();
}
async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Promise<void> {
async function searxngEndpointTargetsPrivateNetwork(
url: URL,
lookupFn?: LookupFn,
): Promise<boolean> {
if (isBlockedHostnameOrIp(url.hostname)) {
return true;
}
try {
const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, {
lookupFn,
policy: {
allowPrivateNetwork: true,
allowRfc2544BenchmarkRange: true,
},
});
return pinned.addresses.every((address) => isPrivateIpAddress(address));
} catch {
return false;
}
}
async function validateSearxngBaseUrl(
baseUrl: string,
lookupFn?: LookupFn,
): Promise<SearxngEndpointMode> {
let parsed: URL;
try {
parsed = new URL(baseUrl);
@@ -98,7 +127,10 @@ async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Pro
errorMessage:
"SearXNG HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.",
});
return "selfHosted";
}
return (await searxngEndpointTargetsPrivateNetwork(parsed, lookupFn)) ? "selfHosted" : "strict";
}
function parseSearxngResponseText(text: string, count: number): SearxngResult[] {
@@ -152,7 +184,7 @@ export async function runSearxngSearch(params: {
"SearXNG base URL is not configured. Set SEARXNG_BASE_URL or configure plugins.entries.searxng.config.webSearch.baseUrl.",
);
}
await validateSearxngBaseUrl(baseUrl);
const endpointMode = await validateSearxngBaseUrl(baseUrl);
const cacheKey = normalizeCacheKey(
JSON.stringify({
@@ -177,7 +209,9 @@ export async function runSearxngSearch(params: {
});
const startedAt = Date.now();
const results = await withTrustedWebSearchEndpoint(
const withEndpoint =
endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint;
const results = await withEndpoint(
{
url,
timeoutSeconds,

View File

@@ -1,6 +1,10 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { fetchWithSsrFGuard, GUARDED_FETCH_MODE } from "../../infra/net/fetch-guard.js";
import { withStrictWebToolsEndpoint, withTrustedWebToolsEndpoint } from "./web-guarded-fetch.js";
import {
withSelfHostedWebToolsEndpoint,
withStrictWebToolsEndpoint,
withTrustedWebToolsEndpoint,
} from "./web-guarded-fetch.js";
vi.mock("../../infra/net/fetch-guard.js", () => {
const GUARDED_FETCH_MODE = {
@@ -26,7 +30,7 @@ describe("web-guarded-fetch", () => {
vi.clearAllMocks();
});
it("uses trusted SSRF policy for trusted web tools endpoints", async () => {
it("uses strict SSRF policy for trusted web tools endpoints", async () => {
vi.mocked(fetchWithSsrFGuard).mockResolvedValue({
response: new Response("ok", { status: 200 }),
finalUrl: "https://example.com",
@@ -38,6 +42,24 @@ describe("web-guarded-fetch", () => {
expect(fetchWithSsrFGuard).toHaveBeenCalledWith(
expect.objectContaining({
url: "https://example.com",
policy: {},
mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY,
}),
);
});
it("uses private-network policy only for self-hosted web tools endpoints", async () => {
vi.mocked(fetchWithSsrFGuard).mockResolvedValue({
response: new Response("ok", { status: 200 }),
finalUrl: "http://127.0.0.1:8080",
release: async () => {},
});
await withSelfHostedWebToolsEndpoint({ url: "http://127.0.0.1:8080" }, async () => undefined);
expect(fetchWithSsrFGuard).toHaveBeenCalledWith(
expect.objectContaining({
url: "http://127.0.0.1:8080",
policy: expect.objectContaining({
dangerouslyAllowPrivateNetwork: true,
allowRfc2544BenchmarkRange: true,

View File

@@ -7,7 +7,8 @@ import {
} from "../../infra/net/fetch-guard.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {};
const WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
dangerouslyAllowPrivateNetwork: true,
allowRfc2544BenchmarkRange: true,
};
@@ -75,6 +76,20 @@ export async function withTrustedWebToolsEndpoint<T>(
);
}
export async function withSelfHostedWebToolsEndpoint<T>(
params: WebToolEndpointFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise<T>,
): Promise<T> {
return await withWebToolsNetworkGuard(
{
...params,
policy: WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY,
useEnvProxy: true,
},
run,
);
}
export async function withStrictWebToolsEndpoint<T>(
params: WebToolEndpointFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise<T>,

View File

@@ -16,7 +16,7 @@ import {
type WebGuardedFetchModule = Pick<
typeof import("./web-guarded-fetch.js"),
"withTrustedWebToolsEndpoint"
"withSelfHostedWebToolsEndpoint" | "withTrustedWebToolsEndpoint"
>;
let webGuardedFetchPromise: Promise<WebGuardedFetchModule> | null = null;
@@ -28,6 +28,13 @@ async function loadTrustedWebToolsEndpoint(): Promise<
return (await webGuardedFetchPromise).withTrustedWebToolsEndpoint;
}
async function loadSelfHostedWebToolsEndpoint(): Promise<
WebGuardedFetchModule["withSelfHostedWebToolsEndpoint"]
> {
webGuardedFetchPromise ??= import("./web-guarded-fetch.js");
return (await webGuardedFetchPromise).withSelfHostedWebToolsEndpoint;
}
export type SearchConfigRecord = (NonNullable<OpenClawConfig["tools"]>["web"] extends infer Web
? Web extends { search?: infer Search }
? Search
@@ -95,6 +102,27 @@ export async function withTrustedWebSearchEndpoint<T>(
);
}
export async function withSelfHostedWebSearchEndpoint<T>(
params: {
url: string;
timeoutSeconds: number;
init: RequestInit;
signal?: AbortSignal;
},
run: (response: Response) => Promise<T>,
): Promise<T> {
const withSelfHostedWebToolsEndpoint = await loadSelfHostedWebToolsEndpoint();
return withSelfHostedWebToolsEndpoint(
{
url: params.url,
init: params.init,
timeoutSeconds: params.timeoutSeconds,
signal: params.signal,
},
async ({ response }) => run(response),
);
}
export async function postTrustedWebToolsJson<T>(
params: {
url: string;

View File

@@ -7,6 +7,7 @@ import type {
} from "../plugins/types.js";
export { jsonResult, readNumberParam, readStringParam } from "../agents/tools/common.js";
export {
withSelfHostedWebToolsEndpoint,
withStrictWebToolsEndpoint,
withTrustedWebToolsEndpoint,
} from "../agents/tools/web-guarded-fetch.js";

View File

@@ -33,6 +33,7 @@ export {
resolveSiteName,
postTrustedWebToolsJson,
throwWebSearchApiError,
withSelfHostedWebSearchEndpoint,
withTrustedWebSearchEndpoint,
writeCachedSearchPayload,
} from "../agents/tools/web-search-provider-common.js";
@@ -47,7 +48,10 @@ export {
} from "../agents/tools/web-search-provider-config.js";
export type { SearchConfigRecord } from "../agents/tools/web-search-provider-common.js";
export { resolveWebSearchProviderCredential } from "../agents/tools/web-search-provider-credentials.js";
export { withTrustedWebToolsEndpoint } from "../agents/tools/web-guarded-fetch.js";
export {
withSelfHostedWebToolsEndpoint,
withTrustedWebToolsEndpoint,
} from "../agents/tools/web-guarded-fetch.js";
export { markdownToText, truncateText } from "../agents/tools/web-fetch-utils.js";
export {
DEFAULT_CACHE_TTL_MINUTES,