mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:40:44 +00:00
fix(web-search): restrict private network guard
This commit is contained in:
@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- CLI/directory: report unsupported directory operations for installed channel plugins instead of prompting to reinstall the plugin when it lacks a directory adapter. Fixes #75770. Thanks @lawong888.
|
||||
- Web search: keep public provider requests on the strict SSRF guard and reserve private-network access for explicit self-hosted SearXNG/Firecrawl endpoints. Fixes #74357 and supersedes #74360. Thanks @fede-kamel.
|
||||
- Web search/Firecrawl: allow self-hosted private/internal Firecrawl `baseUrl` endpoints, including HTTP for private targets, while keeping hosted Firecrawl on the strict official endpoint. Fixes #63877 and supersedes #59666, #63941, and #74013. Thanks @jhthompson12, @jzakirov, @Mlightsnow, and @shad0wca7.
|
||||
- Feishu: preserve Feishu/Lark HTTP error bodies for message sends, media sends, and chat member lookups, so HTTP 400 failures include vendor code, message, log id, and troubleshooter details. Fixes #73860. Thanks @desksk.
|
||||
- Agents/transcripts: avoid reopening large Pi transcript files through the synchronous session manager for maintenance rewrites, persisted tool-result truncation, manual compaction boundary hardening, and queued compaction rotation. Thanks @mariozechner.
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
1fbd0ea7f65901d96653458ba414f9ac69dc0142ff3772e48d63de8b9fa5567f plugin-sdk-api-baseline.json
|
||||
2d29f4e632b05bd365f414096c87a2a3d9718f13fdbf9538824cb32db2902436 plugin-sdk-api-baseline.jsonl
|
||||
7c25208c10ba075f76719883b7b2aefe4cf5e42328bad3acff1c5055350d344f plugin-sdk-api-baseline.json
|
||||
6cac90f85065bcbd447911a0c7c54e7d6992278fd1b95a3e78ae4be3f185848a plugin-sdk-api-baseline.jsonl
|
||||
|
||||
@@ -85,6 +85,9 @@ Transport rules:
|
||||
- `https://` works for public or private SearXNG hosts
|
||||
- `http://` is only accepted for trusted private-network or loopback hosts
|
||||
- public SearXNG hosts must use `https://`
|
||||
- private/internal hosts use the self-hosted network guard; public `https://`
|
||||
hosts stay on the strict web-search guard and cannot redirect to private
|
||||
addresses
|
||||
|
||||
## Environment variable
|
||||
|
||||
@@ -112,6 +115,9 @@ key wins first).
|
||||
- **No API key** -- works with any SearXNG instance out of the box
|
||||
- **Base URL validation** -- `baseUrl` must be a valid `http://` or `https://`
|
||||
URL; public hosts must use `https://`
|
||||
- **Network guard** -- private/internal SearXNG endpoints opt in to
|
||||
private-network access; public `https://` SearXNG endpoints keep strict SSRF
|
||||
protection
|
||||
- **Auto-detection order** -- SearXNG is checked last (order 200) in
|
||||
auto-detection. API-backed providers with configured keys run first, then
|
||||
DuckDuckGo (order 100), then Ollama Web Search (order 110)
|
||||
|
||||
@@ -7,8 +7,8 @@ import {
|
||||
readResponseText,
|
||||
resolveCacheTtlMs,
|
||||
truncateText,
|
||||
withSelfHostedWebToolsEndpoint,
|
||||
withStrictWebToolsEndpoint,
|
||||
withTrustedWebToolsEndpoint,
|
||||
writeCache,
|
||||
} from "openclaw/plugin-sdk/provider-web-fetch";
|
||||
import { normalizeSecretInput } from "openclaw/plugin-sdk/secret-input";
|
||||
@@ -45,7 +45,7 @@ const FIRECRAWL_SELF_HOSTED_PRIVATE_ERROR =
|
||||
const FIRECRAWL_HTTP_PRIVATE_ERROR =
|
||||
"Firecrawl HTTP baseUrl must target a private or internal self-hosted endpoint. Use https:// for public hosts.";
|
||||
|
||||
type FirecrawlEndpointMode = "strict" | "trusted";
|
||||
type FirecrawlEndpointMode = "selfHosted" | "strict";
|
||||
type FirecrawlResolvedEndpoint = {
|
||||
url: string;
|
||||
mode: FirecrawlEndpointMode;
|
||||
@@ -124,7 +124,7 @@ async function validateFirecrawlBaseUrl(
|
||||
|
||||
const isPrivateTarget = await firecrawlEndpointTargetsPrivateNetwork(url, lookupFn);
|
||||
if (isPrivateTarget) {
|
||||
return "trusted";
|
||||
return "selfHosted";
|
||||
}
|
||||
if (url.protocol === "http:") {
|
||||
throw new Error(FIRECRAWL_HTTP_PRIVATE_ERROR);
|
||||
@@ -161,7 +161,7 @@ async function postFirecrawlJson<T>(
|
||||
const apiKey = normalizeSecretInput(params.apiKey);
|
||||
const mode = params.mode ?? (await validateFirecrawlBaseUrl(params.url));
|
||||
const withEndpoint =
|
||||
mode === "trusted" ? withTrustedWebToolsEndpoint : withStrictWebToolsEndpoint;
|
||||
mode === "selfHosted" ? withSelfHostedWebToolsEndpoint : withStrictWebToolsEndpoint;
|
||||
return await withEndpoint(
|
||||
{
|
||||
url: params.url,
|
||||
|
||||
@@ -616,7 +616,7 @@ describe("firecrawl tools", () => {
|
||||
firecrawlClientTesting.resolveEndpoint("http://127.0.0.1:8787", "/v2/scrape"),
|
||||
).resolves.toEqual({
|
||||
url: "http://127.0.0.1:8787/v2/scrape",
|
||||
mode: "trusted",
|
||||
mode: "selfHosted",
|
||||
});
|
||||
await expect(
|
||||
firecrawlClientTesting.resolveEndpoint(
|
||||
@@ -625,7 +625,7 @@ describe("firecrawl tools", () => {
|
||||
),
|
||||
).resolves.toEqual({
|
||||
url: "https://host.openshell.internal:444/v2/search",
|
||||
mode: "trusted",
|
||||
mode: "selfHosted",
|
||||
});
|
||||
await expect(
|
||||
firecrawlClientTesting.resolveEndpoint("http://api.firecrawl.dev", "/v2/scrape"),
|
||||
@@ -638,7 +638,7 @@ describe("firecrawl tools", () => {
|
||||
).rejects.toThrow("Firecrawl baseUrl must use http:// or https://.");
|
||||
});
|
||||
|
||||
it("routes private self-hosted Firecrawl endpoints through the trusted fetch guard", async () => {
|
||||
it("routes private self-hosted Firecrawl endpoints through the self-hosted fetch guard", async () => {
|
||||
ssrfMock?.mockRestore();
|
||||
ssrfMock = mockPinnedHostnameResolution(["127.0.0.1"]);
|
||||
const fetchSpy = vi.fn(
|
||||
|
||||
@@ -66,8 +66,11 @@ describe("searxng client", () => {
|
||||
|
||||
it("allows https public hosts", async () => {
|
||||
await expect(
|
||||
__testing.validateSearxngBaseUrl("https://search.example.com/searxng"),
|
||||
).resolves.toBeUndefined();
|
||||
__testing.validateSearxngBaseUrl(
|
||||
"https://search.example.com/searxng",
|
||||
createLookupFn([{ address: "93.184.216.34", family: 4 }]),
|
||||
),
|
||||
).resolves.toBe("strict");
|
||||
});
|
||||
|
||||
it("allows cleartext private-network hosts", async () => {
|
||||
@@ -76,7 +79,16 @@ describe("searxng client", () => {
|
||||
"http://matrix-synapse:8080",
|
||||
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
|
||||
),
|
||||
).resolves.toBeUndefined();
|
||||
).resolves.toBe("selfHosted");
|
||||
});
|
||||
|
||||
it("routes https private-network hosts through the self-hosted guard", async () => {
|
||||
await expect(
|
||||
__testing.validateSearxngBaseUrl(
|
||||
"https://search.internal/searxng",
|
||||
createLookupFn([{ address: "10.0.0.5", family: 4 }]),
|
||||
),
|
||||
).resolves.toBe("selfHosted");
|
||||
});
|
||||
|
||||
it("rejects cleartext public hosts", async () => {
|
||||
|
||||
@@ -9,12 +9,16 @@ import {
|
||||
resolveSearchCount,
|
||||
resolveSiteName,
|
||||
resolveTimeoutSeconds,
|
||||
withSelfHostedWebSearchEndpoint,
|
||||
withTrustedWebSearchEndpoint,
|
||||
wrapWebContent,
|
||||
writeCache,
|
||||
} from "openclaw/plugin-sdk/provider-web-search";
|
||||
import {
|
||||
assertHttpUrlTargetsPrivateNetwork,
|
||||
isBlockedHostnameOrIp,
|
||||
isPrivateIpAddress,
|
||||
resolvePinnedHostnameWithPolicy,
|
||||
type LookupFn,
|
||||
} from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import {
|
||||
@@ -25,6 +29,7 @@ import {
|
||||
|
||||
const DEFAULT_TIMEOUT_SECONDS = 20;
|
||||
const MAX_RESPONSE_BYTES = 1_000_000;
|
||||
type SearxngEndpointMode = "selfHosted" | "strict";
|
||||
|
||||
const SEARXNG_SEARCH_CACHE = new Map<
|
||||
string,
|
||||
@@ -79,7 +84,31 @@ function buildSearxngSearchUrl(params: {
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Promise<void> {
|
||||
async function searxngEndpointTargetsPrivateNetwork(
|
||||
url: URL,
|
||||
lookupFn?: LookupFn,
|
||||
): Promise<boolean> {
|
||||
if (isBlockedHostnameOrIp(url.hostname)) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, {
|
||||
lookupFn,
|
||||
policy: {
|
||||
allowPrivateNetwork: true,
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
},
|
||||
});
|
||||
return pinned.addresses.every((address) => isPrivateIpAddress(address));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function validateSearxngBaseUrl(
|
||||
baseUrl: string,
|
||||
lookupFn?: LookupFn,
|
||||
): Promise<SearxngEndpointMode> {
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(baseUrl);
|
||||
@@ -98,7 +127,10 @@ async function validateSearxngBaseUrl(baseUrl: string, lookupFn?: LookupFn): Pro
|
||||
errorMessage:
|
||||
"SearXNG HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.",
|
||||
});
|
||||
return "selfHosted";
|
||||
}
|
||||
|
||||
return (await searxngEndpointTargetsPrivateNetwork(parsed, lookupFn)) ? "selfHosted" : "strict";
|
||||
}
|
||||
|
||||
function parseSearxngResponseText(text: string, count: number): SearxngResult[] {
|
||||
@@ -152,7 +184,7 @@ export async function runSearxngSearch(params: {
|
||||
"SearXNG base URL is not configured. Set SEARXNG_BASE_URL or configure plugins.entries.searxng.config.webSearch.baseUrl.",
|
||||
);
|
||||
}
|
||||
await validateSearxngBaseUrl(baseUrl);
|
||||
const endpointMode = await validateSearxngBaseUrl(baseUrl);
|
||||
|
||||
const cacheKey = normalizeCacheKey(
|
||||
JSON.stringify({
|
||||
@@ -177,7 +209,9 @@ export async function runSearxngSearch(params: {
|
||||
});
|
||||
|
||||
const startedAt = Date.now();
|
||||
const results = await withTrustedWebSearchEndpoint(
|
||||
const withEndpoint =
|
||||
endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint;
|
||||
const results = await withEndpoint(
|
||||
{
|
||||
url,
|
||||
timeoutSeconds,
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { fetchWithSsrFGuard, GUARDED_FETCH_MODE } from "../../infra/net/fetch-guard.js";
|
||||
import { withStrictWebToolsEndpoint, withTrustedWebToolsEndpoint } from "./web-guarded-fetch.js";
|
||||
import {
|
||||
withSelfHostedWebToolsEndpoint,
|
||||
withStrictWebToolsEndpoint,
|
||||
withTrustedWebToolsEndpoint,
|
||||
} from "./web-guarded-fetch.js";
|
||||
|
||||
vi.mock("../../infra/net/fetch-guard.js", () => {
|
||||
const GUARDED_FETCH_MODE = {
|
||||
@@ -26,7 +30,7 @@ describe("web-guarded-fetch", () => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("uses trusted SSRF policy for trusted web tools endpoints", async () => {
|
||||
it("uses strict SSRF policy for trusted web tools endpoints", async () => {
|
||||
vi.mocked(fetchWithSsrFGuard).mockResolvedValue({
|
||||
response: new Response("ok", { status: 200 }),
|
||||
finalUrl: "https://example.com",
|
||||
@@ -38,6 +42,24 @@ describe("web-guarded-fetch", () => {
|
||||
expect(fetchWithSsrFGuard).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://example.com",
|
||||
policy: {},
|
||||
mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("uses private-network policy only for self-hosted web tools endpoints", async () => {
|
||||
vi.mocked(fetchWithSsrFGuard).mockResolvedValue({
|
||||
response: new Response("ok", { status: 200 }),
|
||||
finalUrl: "http://127.0.0.1:8080",
|
||||
release: async () => {},
|
||||
});
|
||||
|
||||
await withSelfHostedWebToolsEndpoint({ url: "http://127.0.0.1:8080" }, async () => undefined);
|
||||
|
||||
expect(fetchWithSsrFGuard).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "http://127.0.0.1:8080",
|
||||
policy: expect.objectContaining({
|
||||
dangerouslyAllowPrivateNetwork: true,
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
|
||||
@@ -7,7 +7,8 @@ import {
|
||||
} from "../../infra/net/fetch-guard.js";
|
||||
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
|
||||
|
||||
const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
|
||||
const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {};
|
||||
const WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
|
||||
dangerouslyAllowPrivateNetwork: true,
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
};
|
||||
@@ -75,6 +76,20 @@ export async function withTrustedWebToolsEndpoint<T>(
|
||||
);
|
||||
}
|
||||
|
||||
export async function withSelfHostedWebToolsEndpoint<T>(
|
||||
params: WebToolEndpointFetchOptions,
|
||||
run: (result: { response: Response; finalUrl: string }) => Promise<T>,
|
||||
): Promise<T> {
|
||||
return await withWebToolsNetworkGuard(
|
||||
{
|
||||
...params,
|
||||
policy: WEB_TOOLS_SELF_HOSTED_NETWORK_SSRF_POLICY,
|
||||
useEnvProxy: true,
|
||||
},
|
||||
run,
|
||||
);
|
||||
}
|
||||
|
||||
export async function withStrictWebToolsEndpoint<T>(
|
||||
params: WebToolEndpointFetchOptions,
|
||||
run: (result: { response: Response; finalUrl: string }) => Promise<T>,
|
||||
|
||||
@@ -16,7 +16,7 @@ import {
|
||||
|
||||
type WebGuardedFetchModule = Pick<
|
||||
typeof import("./web-guarded-fetch.js"),
|
||||
"withTrustedWebToolsEndpoint"
|
||||
"withSelfHostedWebToolsEndpoint" | "withTrustedWebToolsEndpoint"
|
||||
>;
|
||||
|
||||
let webGuardedFetchPromise: Promise<WebGuardedFetchModule> | null = null;
|
||||
@@ -28,6 +28,13 @@ async function loadTrustedWebToolsEndpoint(): Promise<
|
||||
return (await webGuardedFetchPromise).withTrustedWebToolsEndpoint;
|
||||
}
|
||||
|
||||
async function loadSelfHostedWebToolsEndpoint(): Promise<
|
||||
WebGuardedFetchModule["withSelfHostedWebToolsEndpoint"]
|
||||
> {
|
||||
webGuardedFetchPromise ??= import("./web-guarded-fetch.js");
|
||||
return (await webGuardedFetchPromise).withSelfHostedWebToolsEndpoint;
|
||||
}
|
||||
|
||||
export type SearchConfigRecord = (NonNullable<OpenClawConfig["tools"]>["web"] extends infer Web
|
||||
? Web extends { search?: infer Search }
|
||||
? Search
|
||||
@@ -95,6 +102,27 @@ export async function withTrustedWebSearchEndpoint<T>(
|
||||
);
|
||||
}
|
||||
|
||||
export async function withSelfHostedWebSearchEndpoint<T>(
|
||||
params: {
|
||||
url: string;
|
||||
timeoutSeconds: number;
|
||||
init: RequestInit;
|
||||
signal?: AbortSignal;
|
||||
},
|
||||
run: (response: Response) => Promise<T>,
|
||||
): Promise<T> {
|
||||
const withSelfHostedWebToolsEndpoint = await loadSelfHostedWebToolsEndpoint();
|
||||
return withSelfHostedWebToolsEndpoint(
|
||||
{
|
||||
url: params.url,
|
||||
init: params.init,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
signal: params.signal,
|
||||
},
|
||||
async ({ response }) => run(response),
|
||||
);
|
||||
}
|
||||
|
||||
export async function postTrustedWebToolsJson<T>(
|
||||
params: {
|
||||
url: string;
|
||||
|
||||
@@ -7,6 +7,7 @@ import type {
|
||||
} from "../plugins/types.js";
|
||||
export { jsonResult, readNumberParam, readStringParam } from "../agents/tools/common.js";
|
||||
export {
|
||||
withSelfHostedWebToolsEndpoint,
|
||||
withStrictWebToolsEndpoint,
|
||||
withTrustedWebToolsEndpoint,
|
||||
} from "../agents/tools/web-guarded-fetch.js";
|
||||
|
||||
@@ -33,6 +33,7 @@ export {
|
||||
resolveSiteName,
|
||||
postTrustedWebToolsJson,
|
||||
throwWebSearchApiError,
|
||||
withSelfHostedWebSearchEndpoint,
|
||||
withTrustedWebSearchEndpoint,
|
||||
writeCachedSearchPayload,
|
||||
} from "../agents/tools/web-search-provider-common.js";
|
||||
@@ -47,7 +48,10 @@ export {
|
||||
} from "../agents/tools/web-search-provider-config.js";
|
||||
export type { SearchConfigRecord } from "../agents/tools/web-search-provider-common.js";
|
||||
export { resolveWebSearchProviderCredential } from "../agents/tools/web-search-provider-credentials.js";
|
||||
export { withTrustedWebToolsEndpoint } from "../agents/tools/web-guarded-fetch.js";
|
||||
export {
|
||||
withSelfHostedWebToolsEndpoint,
|
||||
withTrustedWebToolsEndpoint,
|
||||
} from "../agents/tools/web-guarded-fetch.js";
|
||||
export { markdownToText, truncateText } from "../agents/tools/web-fetch-utils.js";
|
||||
export {
|
||||
DEFAULT_CACHE_TTL_MINUTES,
|
||||
|
||||
Reference in New Issue
Block a user