diff --git a/CHANGELOG.md b/CHANGELOG.md index 5815a221af6..3d03d696b54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai ### Fixes - CLI/directory: report unsupported directory operations for installed channel plugins instead of prompting to reinstall the plugin when it lacks a directory adapter. Fixes #75770. Thanks @lawong888. +- Web search: keep public provider requests on the strict SSRF guard and reserve private-network access for explicit self-hosted SearXNG/Firecrawl endpoints. Fixes #74357 and supersedes #74360. Thanks @fede-kamel. - Web search/Firecrawl: allow self-hosted private/internal Firecrawl `baseUrl` endpoints, including HTTP for private targets, while keeping hosted Firecrawl on the strict official endpoint. Fixes #63877 and supersedes #59666, #63941, and #74013. Thanks @jhthompson12, @jzakirov, @Mlightsnow, and @shad0wca7. - Feishu: preserve Feishu/Lark HTTP error bodies for message sends, media sends, and chat member lookups, so HTTP 400 failures include vendor code, message, log id, and troubleshooter details. Fixes #73860. Thanks @desksk. - Agents/transcripts: avoid reopening large Pi transcript files through the synchronous session manager for maintenance rewrites, persisted tool-result truncation, manual compaction boundary hardening, and queued compaction rotation. Thanks @mariozechner. diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index bbdc6b3a702..dc8766b354f 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -1fbd0ea7f65901d96653458ba414f9ac69dc0142ff3772e48d63de8b9fa5567f plugin-sdk-api-baseline.json -2d29f4e632b05bd365f414096c87a2a3d9718f13fdbf9538824cb32db2902436 plugin-sdk-api-baseline.jsonl +7c25208c10ba075f76719883b7b2aefe4cf5e42328bad3acff1c5055350d344f plugin-sdk-api-baseline.json +6cac90f85065bcbd447911a0c7c54e7d6992278fd1b95a3e78ae4be3f185848a plugin-sdk-api-baseline.jsonl diff --git a/docs/tools/searxng-search.md b/docs/tools/searxng-search.md index cbd7720dd89..2052055326b 100644 --- a/docs/tools/searxng-search.md +++ b/docs/tools/searxng-search.md @@ -85,6 +85,9 @@ Transport rules: - `https://` works for public or private SearXNG hosts - `http://` is only accepted for trusted private-network or loopback hosts - public SearXNG hosts must use `https://` +- private/internal hosts use the self-hosted network guard; public `https://` + hosts stay on the strict web-search guard and cannot redirect to private + addresses ## Environment variable @@ -112,6 +115,9 @@ key wins first). - **No API key** -- works with any SearXNG instance out of the box - **Base URL validation** -- `baseUrl` must be a valid `http://` or `https://` URL; public hosts must use `https://` +- **Network guard** -- private/internal SearXNG endpoints opt in to + private-network access; public `https://` SearXNG endpoints keep strict SSRF + protection - **Auto-detection order** -- SearXNG is checked last (order 200) in auto-detection. API-backed providers with configured keys run first, then DuckDuckGo (order 100), then Ollama Web Search (order 110) diff --git a/extensions/firecrawl/src/firecrawl-client.ts b/extensions/firecrawl/src/firecrawl-client.ts index 99612cbd89b..e68fad3c1c7 100644 --- a/extensions/firecrawl/src/firecrawl-client.ts +++ b/extensions/firecrawl/src/firecrawl-client.ts @@ -7,8 +7,8 @@ import { readResponseText, resolveCacheTtlMs, truncateText, + withSelfHostedWebToolsEndpoint, withStrictWebToolsEndpoint, - withTrustedWebToolsEndpoint, writeCache, } from "openclaw/plugin-sdk/provider-web-fetch"; import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input"; @@ -45,7 +45,7 @@ const FIRECRAWL_SELF_HOSTED_PRIVATE_ERROR = const FIRECRAWL_HTTP_PRIVATE_ERROR = "Firecrawl HTTP baseUrl must target a private or internal self-hosted endpoint. Use https:// for public hosts."; -type FirecrawlEndpointMode = "strict" | "trusted"; +type FirecrawlEndpointMode = "selfHosted" | "strict"; type FirecrawlResolvedEndpoint = { url: string; mode: FirecrawlEndpointMode; @@ -124,7 +124,7 @@ async function validateFirecrawlBaseUrl( const isPrivateTarget = await firecrawlEndpointTargetsPrivateNetwork(url, lookupFn); if (isPrivateTarget) { - return "trusted"; + return "selfHosted"; } if (url.protocol === "http:") { throw new Error(FIRECRAWL_HTTP_PRIVATE_ERROR); @@ -161,7 +161,7 @@ async function postFirecrawlJson( const apiKey = normalizeSecretInput(params.apiKey); const mode = params.mode ?? (await validateFirecrawlBaseUrl(params.url)); const withEndpoint = - mode === "trusted" ? withTrustedWebToolsEndpoint : withStrictWebToolsEndpoint; + mode === "selfHosted" ? withSelfHostedWebToolsEndpoint : withStrictWebToolsEndpoint; return await withEndpoint( { url: params.url, diff --git a/extensions/firecrawl/src/firecrawl-tools.test.ts b/extensions/firecrawl/src/firecrawl-tools.test.ts index fb031fc114a..7a4ebdf4df8 100644 --- a/extensions/firecrawl/src/firecrawl-tools.test.ts +++ b/extensions/firecrawl/src/firecrawl-tools.test.ts @@ -616,7 +616,7 @@ describe("firecrawl tools", () => { firecrawlClientTesting.resolveEndpoint("http://127.0.0.1:8787", "/v2/scrape"), ).resolves.toEqual({ url: "http://127.0.0.1:8787/v2/scrape", - mode: "trusted", + mode: "selfHosted", }); await expect( firecrawlClientTesting.resolveEndpoint( @@ -625,7 +625,7 @@ describe("firecrawl tools", () => { ), ).resolves.toEqual({ url: "https://host.openshell.internal:444/v2/search", - mode: "trusted", + mode: "selfHosted", }); await expect( firecrawlClientTesting.resolveEndpoint("http://api.firecrawl.dev", "/v2/scrape"), @@ -638,7 +638,7 @@ describe("firecrawl tools", () => { ).rejects.toThrow("Firecrawl baseUrl must use http:// or https://."); }); - it("routes private self-hosted Firecrawl endpoints through the trusted fetch guard", async () => { + it("routes private self-hosted Firecrawl endpoints through the self-hosted fetch guard", async () => { ssrfMock?.mockRestore(); ssrfMock = mockPinnedHostnameResolution(["127.0.0.1"]); const fetchSpy = vi.fn( diff --git a/extensions/searxng/src/searxng-client.test.ts b/extensions/searxng/src/searxng-client.test.ts index 7bff1f7a24e..d6aa090ae23 100644 --- a/extensions/searxng/src/searxng-client.test.ts +++ b/extensions/searxng/src/searxng-client.test.ts @@ -66,8 +66,11 @@ describe("searxng client", () => { it("allows https public hosts", async () => { await expect( - __testing.validateSearxngBaseUrl("https://search.example.com/searxng"), - ).resolves.toBeUndefined(); + __testing.validateSearxngBaseUrl( + "https://search.example.com/searxng", + createLookupFn([{ address: "93.184.216.34", family: 4 }]), + ), + ).resolves.toBe("strict"); }); it("allows cleartext private-network hosts", async () => { @@ -76,7 +79,16 @@ describe("searxng client", () => { "http://matrix-synapse:8080", createLookupFn([{ address: "10.0.0.5", family: 4 }]), ), - ).resolves.toBeUndefined(); + ).resolves.toBe("selfHosted"); + }); + + it("routes https private-network hosts through the self-hosted guard", async () => { + await expect( + __testing.validateSearxngBaseUrl( + "https://search.internal/searxng", + createLookupFn([{ address: "10.0.0.5", family: 4 }]), + ), + ).resolves.toBe("selfHosted"); }); it("rejects cleartext public hosts", async () => { diff --git a/extensions/searxng/src/searxng-client.ts b/extensions/searxng/src/searxng-client.ts index 25d8ed0275b..0634c5f5de9 100644 --- a/extensions/searxng/src/searxng-client.ts +++ b/extensions/searxng/src/searxng-client.ts @@ -9,12 +9,16 @@ import { resolveSearchCount, resolveSiteName, resolveTimeoutSeconds, + withSelfHostedWebSearchEndpoint, withTrustedWebSearchEndpoint, wrapWebContent, writeCache, } from "openclaw/plugin-sdk/provider-web-search"; import { assertHttpUrlTargetsPrivateNetwork, + isBlockedHostnameOrIp, + isPrivateIpAddress, + resolvePinnedHostnameWithPolicy, type LookupFn, } from "openclaw/plugin-sdk/ssrf-runtime"; import { @@ -25,6 +29,7 @@ import { const DEFAULT_TIMEOUT_SECONDS = 20; const MAX_RESPONSE_BYTES = 1_000_000; +type SearxngEndpointMode = "selfHosted" | "strict"; const SEARXNG_SEARCH_CACHE = new Map< string, @@ -79,7 +84,31 @@ function buildSearxngSearchUrl(params: { return url.toString(); } -async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Promise { +async function searxngEndpointTargetsPrivateNetwork( + url: URL, + lookupFn?: LookupFn, +): Promise { + if (isBlockedHostnameOrIp(url.hostname)) { + return true; + } + try { + const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, { + lookupFn, + policy: { + allowPrivateNetwork: true, + allowRfc2544BenchmarkRange: true, + }, + }); + return pinned.addresses.every((address) => isPrivateIpAddress(address)); + } catch { + return false; + } +} + +async function validateSearxngBaseUrl( + baseUrl: string, + lookupFn?: LookupFn, +): Promise { let parsed: URL; try { parsed = new URL(baseUrl); @@ -98,7 +127,10 @@ async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Pro errorMessage: "SearXNG HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.", }); + return "selfHosted"; } + + return (await searxngEndpointTargetsPrivateNetwork(parsed, lookupFn)) ? "selfHosted" : "strict"; } function parseSearxngResponseText(text: string, count: number): SearxngResult[] { @@ -152,7 +184,7 @@ export async function runSearxngSearch(params: { "SearXNG base URL is not configured. Set SEARXNG_BASE_URL or configure plugins.entries.searxng.config.webSearch.baseUrl.", ); } - await validateSearxngBaseUrl(baseUrl); + const endpointMode = await validateSearxngBaseUrl(baseUrl); const cacheKey = normalizeCacheKey( JSON.stringify({ @@ -177,7 +209,9 @@ export async function runSearxngSearch(params: { }); const startedAt = Date.now(); - const results = await withTrustedWebSearchEndpoint( + const withEndpoint = + endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint; + const results = await withEndpoint( { url, timeoutSeconds, diff --git a/src/agents/tools/web-guarded-fetch.test.ts b/src/agents/tools/web-guarded-fetch.test.ts index 005a94ad3da..179a8151e43 100644 --- a/src/agents/tools/web-guarded-fetch.test.ts +++ b/src/agents/tools/web-guarded-fetch.test.ts @@ -1,6 +1,10 @@ import { afterEach, describe, expect, it, vi } from "vitest"; import { fetchWithSsrFGuard, GUARDED_FETCH_MODE } from "../../infra/net/fetch-guard.js"; -import { withStrictWebToolsEndpoint, withTrustedWebToolsEndpoint } from "./web-guarded-fetch.js"; +import { + withSelfHostedWebToolsEndpoint, + withStrictWebToolsEndpoint, + withTrustedWebToolsEndpoint, +} from "./web-guarded-fetch.js"; vi.mock("../../infra/net/fetch-guard.js", () => { const GUARDED_FETCH_MODE = { @@ -26,7 +30,7 @@ describe("web-guarded-fetch", () => { vi.clearAllMocks(); }); - it("uses trusted SSRF policy for trusted web tools endpoints", async () => { + it("uses strict SSRF policy for trusted web tools endpoints", async () => { vi.mocked(fetchWithSsrFGuard).mockResolvedValue({ response: new Response("ok", { status: 200 }), finalUrl: "https://example.com", @@ -38,6 +42,24 @@ describe("web-guarded-fetch", () => { expect(fetchWithSsrFGuard).toHaveBeenCalledWith( expect.objectContaining({ url: "https://example.com", + policy: {}, + mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY, + }), + ); + }); + + it("uses private-network policy only for self-hosted web tools endpoints", async () => { + vi.mocked(fetchWithSsrFGuard).mockResolvedValue({ + response: new Response("ok", { status: 200 }), + finalUrl: "http://127.0.0.1:8080", + release: async () => {}, + }); + + await withSelfHostedWebToolsEndpoint({ url: "http://127.0.0.1:8080" }, async () => undefined); + + expect(fetchWithSsrFGuard).toHaveBeenCalledWith( + expect.objectContaining({ + url: "http://127.0.0.1:8080", policy: expect.objectContaining({ dangerouslyAllowPrivateNetwork: true, allowRfc2544BenchmarkRange: true, diff --git a/src/agents/tools/web-guarded-fetch.ts b/src/agents/tools/web-guarded-fetch.ts index aa4e8274cf9..f1d542fdcf4 100644 --- a/src/agents/tools/web-guarded-fetch.ts +++ b/src/agents/tools/web-guarded-fetch.ts @@ -7,7 +7,8 @@ import { } from "../../infra/net/fetch-guard.js"; import type { SsrFPolicy } from "../../infra/net/ssrf.js"; -const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = { +const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {}; +const WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY: SsrFPolicy = { dangerouslyAllowPrivateNetwork: true, allowRfc2544BenchmarkRange: true, }; @@ -75,6 +76,20 @@ export async function withTrustedWebToolsEndpoint( ); } +export async function withSelfHostedWebToolsEndpoint( + params: WebToolEndpointFetchOptions, + run: (result: { response: Response; finalUrl: string }) => Promise, +): Promise { + return await withWebToolsNetworkGuard( + { + ...params, + policy: WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY, + useEnvProxy: true, + }, + run, + ); +} + export async function withStrictWebToolsEndpoint( params: WebToolEndpointFetchOptions, run: (result: { response: Response; finalUrl: string }) => Promise, diff --git a/src/agents/tools/web-search-provider-common.ts b/src/agents/tools/web-search-provider-common.ts index 7ab52e242b1..bc50d79bcb7 100644 --- a/src/agents/tools/web-search-provider-common.ts +++ b/src/agents/tools/web-search-provider-common.ts @@ -16,7 +16,7 @@ import { type WebGuardedFetchModule = Pick< typeof import("./web-guarded-fetch.js"), - "withTrustedWebToolsEndpoint" + "withSelfHostedWebToolsEndpoint" | "withTrustedWebToolsEndpoint" >; let webGuardedFetchPromise: Promise | null = null; @@ -28,6 +28,13 @@ async function loadTrustedWebToolsEndpoint(): Promise< return (await webGuardedFetchPromise).withTrustedWebToolsEndpoint; } +async function loadSelfHostedWebToolsEndpoint(): Promise< + WebGuardedFetchModule["withSelfHostedWebToolsEndpoint"] +> { + webGuardedFetchPromise ??= import("./web-guarded-fetch.js"); + return (await webGuardedFetchPromise).withSelfHostedWebToolsEndpoint; +} + export type SearchConfigRecord = (NonNullable["web"] extends infer Web ? Web extends { search?: infer Search } ? Search @@ -95,6 +102,27 @@ export async function withTrustedWebSearchEndpoint( ); } +export async function withSelfHostedWebSearchEndpoint( + params: { + url: string; + timeoutSeconds: number; + init: RequestInit; + signal?: AbortSignal; + }, + run: (response: Response) => Promise, +): Promise { + const withSelfHostedWebToolsEndpoint = await loadSelfHostedWebToolsEndpoint(); + return withSelfHostedWebToolsEndpoint( + { + url: params.url, + init: params.init, + timeoutSeconds: params.timeoutSeconds, + signal: params.signal, + }, + async ({ response }) => run(response), + ); +} + export async function postTrustedWebToolsJson( params: { url: string; diff --git a/src/plugin-sdk/provider-web-fetch.ts b/src/plugin-sdk/provider-web-fetch.ts index bf84c71f27f..70b8c77bfa9 100644 --- a/src/plugin-sdk/provider-web-fetch.ts +++ b/src/plugin-sdk/provider-web-fetch.ts @@ -7,6 +7,7 @@ import type { } from "../plugins/types.js"; export { jsonResult, readNumberParam, readStringParam } from "../agents/tools/common.js"; export { + withSelfHostedWebToolsEndpoint, withStrictWebToolsEndpoint, withTrustedWebToolsEndpoint, } from "../agents/tools/web-guarded-fetch.js"; diff --git a/src/plugin-sdk/provider-web-search.ts b/src/plugin-sdk/provider-web-search.ts index 35ef16e877e..632dd02e2ad 100644 --- a/src/plugin-sdk/provider-web-search.ts +++ b/src/plugin-sdk/provider-web-search.ts @@ -33,6 +33,7 @@ export { resolveSiteName, postTrustedWebToolsJson, throwWebSearchApiError, + withSelfHostedWebSearchEndpoint, withTrustedWebSearchEndpoint, writeCachedSearchPayload, } from "../agents/tools/web-search-provider-common.js"; @@ -47,7 +48,10 @@ export { } from "../agents/tools/web-search-provider-config.js"; export type { SearchConfigRecord } from "../agents/tools/web-search-provider-common.js"; export { resolveWebSearchProviderCredential } from "../agents/tools/web-search-provider-credentials.js"; -export { withTrustedWebToolsEndpoint } from "../agents/tools/web-guarded-fetch.js"; +export { + withSelfHostedWebToolsEndpoint, + withTrustedWebToolsEndpoint, +} from "../agents/tools/web-guarded-fetch.js"; export { markdownToText, truncateText } from "../agents/tools/web-fetch-utils.js"; export { DEFAULT_CACHE_TTL_MINUTES,