From 66336bf7c846d79725c236b158d25851f21de915 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 3 May 2026 22:30:01 +0100 Subject: [PATCH] fix: add trusted env proxy opt-in for web fetch --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 4 +- docs/tools/web-fetch.md | 20 +++++++ src/agents/tools/web-fetch.ssrf.test.ts | 17 ++++++ src/agents/tools/web-fetch.ts | 10 +++- src/agents/tools/web-tools.fetch.test.ts | 27 ++++++++- src/config/schema.base.generated.ts | 11 ++++ src/config/schema.help.ts | 2 + src/config/schema.labels.ts | 1 + src/config/schema.test.ts | 12 ++++ src/config/types.tools.ts | 2 + src/config/zod-schema.agent-runtime.ts | 1 + src/infra/net/fetch-guard.ssrf.test.ts | 74 ++++++++++++++++++++++++ src/infra/net/fetch-guard.ts | 7 +++ src/infra/net/proxy-env.test.ts | 18 ++++++ src/infra/net/proxy-env.ts | 18 +++--- 16 files changed, 212 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5341435eb6b..d17c6b25ab0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Config/messages: coerce boolean `messages.visibleReplies` and `messages.groupChat.visibleReplies` values to the documented enum modes so an intuitive toggle no longer invalidates config and drops channel startup. Fixes #75390. Thanks @scottgl9. - Agents/network: allow trusted web-search providers and configured model-provider hosts to work behind Surge/Clash/sing-box fake-IP DNS by accepting RFC 2544 and IPv6 ULA synthetic answers only for the request's scoped hostname, without broad private-network access. Refs #76530 and #76549. Thanks @zqchris. - Providers: honor env-proxy settings for guarded provider model fetches when no explicit dispatcher policy is configured, preserving explicit transport overrides. Fixes #70453. (#72480) Thanks @mjamiv. +- Web fetch: add a default-off `tools.web.fetch.useTrustedEnvProxy` opt-in for proxy-only environments so `web_fetch` can let an operator-controlled HTTP(S) proxy resolve DNS while preserving default strict DNS pinning and hostname policy checks. Refs #58034 and #62560. Thanks @cosmicnet and @mjamiv. - Feishu: accept and honor `channels.feishu.blockStreaming` at the top level and per account, while keeping the legacy default off so Feishu cards no longer reject documented config or silently drop block replies. Fixes #75555. Thanks @vincentkoc. - Gateway/update: avoid `launchctl kickstart -k` immediately after fresh macOS update bootstraps, and unlink dangling global plugin-runtime symlinks during packaged postinstall and `doctor --fix` so upgrades no longer SIGTERM the newly booted Gateway or leave bundled plugin imports pointed at pruned `plugin-runtime-deps` trees. Completes #76261 and fixes #76466. (#76929) - Google Chat: normalize custom Google auth transport headers before google-auth/gaxios interceptors run, restoring webhook token verification when certificate retrieval expects Fetch `Headers`. Fixes #76742. Thanks @donbowman. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 8ecf61c265f..4bee7427bd9 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -b4cce06ca8c16774e277551ba027591289762ed9cf2490c993fec2051ac19c61 config-baseline.json -bfb7ade43e58c630d0480eaa215ef22bf0d5030136c3e24cdd2c2a4c73d1b663 config-baseline.core.json +056760c0a86627641d8e2993cc0cc987820dc4289c40c67dc8c2c1e8970c1849 config-baseline.json +5b5ebd95939d75496597d9858a375e27544812d0f79dc3b4bf87c794ada2ba08 config-baseline.core.json 7b207901b595ad527026b1f357f63a5cd33123a72eeb66bdac24a8f2e8bb1ac8 config-baseline.channel.json 055fae0d0067a751dc10125af7421da45633f73519c94c982d02b0c4eb2bdf67 config-baseline.plugin.json diff --git a/docs/tools/web-fetch.md b/docs/tools/web-fetch.md index 7904e19b8a2..e4711bbdc43 100644 --- a/docs/tools/web-fetch.md +++ b/docs/tools/web-fetch.md @@ -72,6 +72,7 @@ Truncate output to this many characters. timeoutSeconds: 30, cacheTtlMinutes: 15, maxRedirects: 3, + useTrustedEnvProxy: false, // let a trusted HTTP(S) env proxy resolve DNS readability: true, // use Readability extraction userAgent: "Mozilla/5.0 ...", // override User-Agent ssrfPolicy: { @@ -142,6 +143,22 @@ Current runtime behavior: - If Readability is disabled, `web_fetch` skips straight to the selected provider fallback. If no provider is available, it fails closed. +## Trusted Env Proxy + +If your deployment requires `web_fetch` to go through a trusted outbound +HTTP(S) proxy, set `tools.web.fetch.useTrustedEnvProxy: true`. + +In this mode, OpenClaw still applies hostname-based SSRF checks before sending +the request, but it lets the proxy resolve DNS instead of doing local DNS +pinning. Enable this only when the proxy is operator-controlled and enforces +outbound policy after DNS resolution. + + + If no HTTP(S) proxy env var is configured, or the target host is excluded by + `NO_PROXY`, `web_fetch` falls back to the normal strict path with local DNS + pinning. + + ## Limits and safety - `maxChars` is clamped to `tools.web.fetch.maxCharsCap` @@ -153,6 +170,9 @@ Current runtime behavior: for trusted fake-IP proxy stacks; leave them unset unless your proxy owns those synthetic ranges and enforces its own destination policy - Redirects are checked and limited by `maxRedirects` +- `useTrustedEnvProxy` is an explicit opt-in and should only be enabled for + operator-controlled proxies that still enforce outbound policy after DNS + resolution - `web_fetch` is best-effort -- some sites need the [Web Browser](/tools/browser) ## Tool profiles diff --git a/src/agents/tools/web-fetch.ssrf.test.ts b/src/agents/tools/web-fetch.ssrf.test.ts index db3c5477b22..99243fe3f88 100644 --- a/src/agents/tools/web-fetch.ssrf.test.ts +++ b/src/agents/tools/web-fetch.ssrf.test.ts @@ -36,6 +36,7 @@ function setMockFetch( function createWebFetchToolForTest(params?: { firecrawlApiKey?: string; + useTrustedEnvProxy?: boolean; ssrfPolicy?: { allowRfc2544BenchmarkRange?: boolean; allowIpv6UniqueLocalRange?: boolean }; cacheTtlMinutes?: number; }) { @@ -58,6 +59,7 @@ function createWebFetchToolForTest(params?: { web: { fetch: { cacheTtlMinutes: params?.cacheTtlMinutes ?? 0, + useTrustedEnvProxy: params?.useTrustedEnvProxy, ssrfPolicy: params?.ssrfPolicy, ...(params?.firecrawlApiKey ? { provider: "firecrawl" } : {}), }, @@ -89,6 +91,7 @@ describe("web_fetch SSRF protection", () => { global.fetch = priorFetch; lookupMock.mockClear(); vi.restoreAllMocks(); + vi.unstubAllEnvs(); }); it("blocks localhost hostnames before fetch/firecrawl", async () => { @@ -202,4 +205,18 @@ describe("web_fetch SSRF protection", () => { const stricterTool = createWebFetchToolForTest({ cacheTtlMinutes: 1 }); await expectBlockedUrl(stricterTool, url, /private|internal|blocked/i); }); + + it("still blocks dangerous hostnames when trusted env proxy is explicitly enabled", async () => { + vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890"); + vi.stubEnv("http_proxy", "http://127.0.0.1:7890"); + const fetchSpy = setMockFetch(); + const tool = createWebFetchToolForTest({ + useTrustedEnvProxy: true, + cacheTtlMinutes: 1, + }); + + await expectBlockedUrl(tool, "http://localhost/test", /Blocked hostname/i); + expect(fetchSpy).not.toHaveBeenCalled(); + expect(lookupMock).not.toHaveBeenCalled(); + }); }); diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index c60e931cb5d..c42033ed48b 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -117,6 +117,10 @@ function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean { return true; } +function resolveFetchUseTrustedEnvProxy(fetch?: WebFetchConfig): boolean { + return fetch?.useTrustedEnvProxy === true; +} + function resolveFetchMaxCharsCap(fetch?: WebFetchConfig): number { const raw = fetch && "maxCharsCap" in fetch && typeof fetch.maxCharsCap === "number" @@ -273,6 +277,7 @@ type WebFetchRuntimeParams = { userAgent: string; readabilityEnabled: boolean; config?: OpenClawConfig; + useTrustedEnvProxy: boolean; ssrfPolicy?: { allowRfc2544BenchmarkRange?: boolean; allowIpv6UniqueLocalRange?: boolean; @@ -392,6 +397,7 @@ async function maybeFetchProviderWebFetchPayload( async function runWebFetch(params: WebFetchRuntimeParams): Promise> { const allowRfc2544BenchmarkRange = params.ssrfPolicy?.allowRfc2544BenchmarkRange === true; const allowIpv6UniqueLocalRange = params.ssrfPolicy?.allowIpv6UniqueLocalRange === true; + const useTrustedEnvProxy = params.useTrustedEnvProxy; const ssrfPolicy: SsrFPolicy | undefined = allowRfc2544BenchmarkRange || allowIpv6UniqueLocalRange ? { @@ -400,7 +406,7 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise { expect(details?.warning).toContain("Response body truncated"); }); - it("keeps DNS pinning for untrusted web_fetch URLs even when HTTP_PROXY is configured", async () => { + it("keeps DNS pinning for web_fetch by default even when HTTP_PROXY is configured", async () => { vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890"); const mockFetch = installMockFetch((input: RequestInfo | URL) => Promise.resolve({ @@ -353,6 +353,31 @@ describe("web_fetch extraction fallbacks", () => { expect(requestInit?.dispatcher).not.toBeInstanceOf(EnvHttpProxyAgent); }); + it("uses env proxy dispatch for web_fetch when trusted env proxy is explicitly enabled", async () => { + vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890"); + const mockFetch = installMockFetch((input: RequestInfo | URL) => + Promise.resolve({ + ok: true, + status: 200, + headers: makeFetchHeaders({ "content-type": "text/plain" }), + text: async () => "proxy body", + url: resolveRequestUrl(input), + } as Response), + ); + const tool = createFetchTool({ + firecrawl: { enabled: false }, + useTrustedEnvProxy: true, + }); + + await tool?.execute?.("call", { url: "https://example.com/proxy" }); + + const requestInit = mockFetch.mock.calls[0]?.[1] as + | (RequestInit & { dispatcher?: unknown }) + | undefined; + expect(requestInit?.dispatcher).toBeDefined(); + expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent); + }); + // NOTE: Test for wrapping url/finalUrl/warning fields requires DNS mocking. // The sanitization of these fields is verified by external-content.test.ts tests. diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index 600e2e57efb..3c94eb83ae3 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -8759,6 +8759,12 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).", }, + useTrustedEnvProxy: { + type: "boolean", + title: "Web Fetch Trusted Env Proxy", + description: + "Route web_fetch through a trusted HTTP(S) env proxy and let the proxy resolve DNS. Enable only when that proxy is operator-controlled and enforces outbound policy after DNS resolution.", + }, ssrfPolicy: { type: "object", properties: { @@ -25987,6 +25993,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).", tags: ["tools"], }, + "tools.web.fetch.useTrustedEnvProxy": { + label: "Web Fetch Trusted Env Proxy", + help: "Route web_fetch through a trusted HTTP(S) env proxy and let the proxy resolve DNS. Enable only when that proxy is operator-controlled and enforces outbound policy after DNS resolution.", + tags: ["tools"], + }, "tools.web.fetch.ssrfPolicy": { label: "Web Fetch SSRF Policy", help: "Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 74200ee706d..d53ce928333 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -834,6 +834,8 @@ export const FIELD_HELP: Record = { "tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.", "tools.web.fetch.readability": "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).", + "tools.web.fetch.useTrustedEnvProxy": + "Route web_fetch through a trusted HTTP(S) env proxy and let the proxy resolve DNS. Enable only when that proxy is operator-controlled and enforces outbound policy after DNS resolution.", "tools.web.fetch.ssrfPolicy": "Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.", "tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index cb0d853cd6f..ac38254f107 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -296,6 +296,7 @@ export const FIELD_LABELS: Record = { "tools.web.fetch.maxRedirects": "Web Fetch Max Redirects", "tools.web.fetch.userAgent": "Web Fetch User-Agent", "tools.web.fetch.readability": "Web Fetch Readability Extraction", + "tools.web.fetch.useTrustedEnvProxy": "Web Fetch Trusted Env Proxy", "tools.web.fetch.ssrfPolicy": "Web Fetch SSRF Policy", "tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange": "Web Fetch Allow RFC 2544 Benchmark Range", diff --git a/src/config/schema.test.ts b/src/config/schema.test.ts index 1bce4d03ab3..2ef0a6d76ce 100644 --- a/src/config/schema.test.ts +++ b/src/config/schema.test.ts @@ -390,6 +390,18 @@ describe("config schema", () => { }); }); + it("accepts web fetch trusted env proxy opt-in in the runtime zod schema", () => { + const parsed = ToolsSchema.parse({ + web: { + fetch: { + useTrustedEnvProxy: true, + }, + }, + }); + + expect(parsed?.web?.fetch?.useTrustedEnvProxy).toBe(true); + }); + it("rejects allowPrivateNetwork on media-understanding request config", () => { expect(() => ToolsSchema.parse({ diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 11701b4b5c0..755799a9d84 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -576,6 +576,8 @@ export type ToolsConfig = { userAgent?: string; /** Use Readability to extract main content (default: true). */ readability?: boolean; + /** Route web_fetch through a trusted HTTP(S) env proxy and let the proxy resolve DNS. Enable only when that proxy enforces outbound policy. */ + useTrustedEnvProxy?: boolean; /** SSRF policy configuration for web_fetch. */ ssrfPolicy?: { /** Allow RFC 2544 benchmark range IPs (198.18.0.0/15) for fake-IP proxy compatibility (e.g., Clash TUN mode, Surge). */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index e212bf94e13..c1c88c79521 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -351,6 +351,7 @@ const ToolsWebFetchSchema = z maxRedirects: z.number().int().nonnegative().optional(), userAgent: z.string().optional(), readability: z.boolean().optional(), + useTrustedEnvProxy: z.boolean().optional(), ssrfPolicy: z .object({ allowRfc2544BenchmarkRange: z.boolean().optional(), diff --git a/src/infra/net/fetch-guard.ssrf.test.ts b/src/infra/net/fetch-guard.ssrf.test.ts index 7bbbe5ebc54..3a6848f31c2 100644 --- a/src/infra/net/fetch-guard.ssrf.test.ts +++ b/src/infra/net/fetch-guard.ssrf.test.ts @@ -1334,4 +1334,78 @@ describe("fetchWithSsrFGuard hardening", () => { expect(lookupFn).toHaveBeenCalledOnce(); await result.release(); }); + + it("enforces hostnameAllowlist in trusted env proxy mode before dispatch", async () => { + clearProxyEnv(); + vi.stubEnv("HTTPS_PROXY", "http://127.0.0.1:7890"); + const lookupFn = vi.fn() as unknown as LookupFn; + const fetchImpl = vi.fn(async () => okResponse()); + + await expect( + fetchWithSsrFGuard({ + url: "https://not-allowed.example/resource", + fetchImpl, + lookupFn, + mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY, + policy: { hostnameAllowlist: ["*.permitted.example"] }, + }), + ).rejects.toThrow(/allowlist/i); + + expect(lookupFn).not.toHaveBeenCalled(); + expect(fetchImpl).not.toHaveBeenCalled(); + }); + + it("keeps DNS pinning in trusted proxy mode when only ALL_PROXY is configured", async () => { + clearProxyEnv(); + vi.stubEnv("ALL_PROXY", "http://127.0.0.1:7890"); + (globalThis as Record)[TEST_UNDICI_RUNTIME_DEPS_KEY] = { + Agent: agentCtor, + EnvHttpProxyAgent: envHttpProxyAgentCtor, + ProxyAgent: proxyAgentCtor, + fetch: vi.fn(async () => okResponse()), + }; + const lookupFn = createPublicLookup(); + const fetchImpl = vi.fn(async (_input: RequestInfo | URL, init?: RequestInit) => { + const requestInit = init as RequestInit & { dispatcher?: unknown }; + expect(requestInit.dispatcher).toBeDefined(); + expect(getDispatcherClassName(requestInit.dispatcher)).not.toBe("EnvHttpProxyAgent"); + return okResponse(); + }); + + const result = await fetchWithSsrFGuard({ + url: "https://public.example/resource", + fetchImpl, + lookupFn, + mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY, + }); + + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(lookupFn).toHaveBeenCalledOnce(); + await result.release(); + }); + + it("falls back to DNS pinning when NO_PROXY excludes the target host", async () => { + clearProxyEnv(); + vi.stubEnv("HTTPS_PROXY", "http://proxy.corp:8080"); + vi.stubEnv("HTTP_PROXY", "http://proxy.corp:8080"); + vi.stubEnv("NO_PROXY", "public.example"); + const lookupFn = createPublicLookup(); + const fetchImpl = vi.fn(async (_input: RequestInfo | URL, init?: RequestInit) => { + const requestInit = init as RequestInit & { dispatcher?: unknown }; + expect(requestInit.dispatcher).toBeDefined(); + expect(getDispatcherClassName(requestInit.dispatcher)).not.toBe("EnvHttpProxyAgent"); + return okResponse(); + }); + + const result = await fetchWithSsrFGuard({ + url: "https://public.example/resource", + fetchImpl, + lookupFn, + mode: GUARDED_FETCH_MODE.TRUSTED_ENV_PROXY, + }); + + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(lookupFn).toHaveBeenCalledOnce(); + await result.release(); + }); }); diff --git a/src/infra/net/fetch-guard.ts b/src/infra/net/fetch-guard.ts index fad52009660..564edf5e9a3 100644 --- a/src/infra/net/fetch-guard.ts +++ b/src/infra/net/fetch-guard.ts @@ -371,6 +371,13 @@ export async function fetchWithSsrFGuard(params: GuardedFetchOptions): Promise { env: { NO_PROXY: "*" } as NodeJS.ProcessEnv, expected: true, }, + { + name: "matches apex hostnames for leading-dot entries", + url: "https://openai.com/v1/chat", + env: { NO_PROXY: ".openai.com" } as NodeJS.ProcessEnv, + expected: true, + }, + { + name: "matches apex hostnames for wildcard-dot entries", + url: "https://openai.com/v1/chat", + env: { NO_PROXY: "*.openai.com" } as NodeJS.ProcessEnv, + expected: true, + }, + { + name: "does not treat wildcard entries inside a list as global bypass", + url: "https://api.openai.com/v1/chat", + env: { NO_PROXY: "localhost,*" } as NodeJS.ProcessEnv, + expected: false, + }, { name: "matches exact hostname", url: "https://api.openai.com/v1/chat", diff --git a/src/infra/net/proxy-env.ts b/src/infra/net/proxy-env.ts index de96d01b1f8..21173e06591 100644 --- a/src/infra/net/proxy-env.ts +++ b/src/infra/net/proxy-env.ts @@ -119,7 +119,7 @@ export function shouldUseEnvHttpProxyForUrl( * - Entries separated by commas OR whitespace (undici splits on `/[,\s]/`) * - Case-insensitive * - Empty or missing → no bypass - * - `*` → bypass everything + * - Bare `*` value → bypass everything * - Exact hostname match * - Leading-dot match (`.example.com` matches `foo.example.com`) * - Leading `*.` wildcard match (`*.example.com` matches `foo.example.com`); @@ -153,6 +153,10 @@ export function matchesNoProxy(targetUrl: string, env: NodeJS.ProcessEnv = proce return false; } + if (raw === "*") { + return true; + } + const targetPort = parsed.port !== "" ? parsed.port @@ -170,10 +174,6 @@ export function matchesNoProxy(targetUrl: string, env: NodeJS.ProcessEnv = proce if (!entry) { continue; } - if (entry === "*") { - return true; - } - let entryHost: string; let entryPort: string | undefined; if (entry.startsWith("[")) { @@ -198,9 +198,10 @@ export function matchesNoProxy(targetUrl: string, env: NodeJS.ProcessEnv = proce } // Mirror undici: strip optional leading `*` followed by `.` so both - // `.example.com` and `*.example.com` normalize to `example.com`. - const normalizedEntry = entryHost.replace(/^\*?\./, ""); - if (!normalizedEntry) { + // `.example.com` and `*.example.com` normalize to `example.com`. That also + // means apex hosts still match those entries after normalization. + const normalizedEntry = entryHost.replace(/^\*\./, "").replace(/^\./, ""); + if (!normalizedEntry || normalizedEntry === "*") { continue; } @@ -211,6 +212,5 @@ export function matchesNoProxy(targetUrl: string, env: NodeJS.ProcessEnv = proce return true; } } - return false; }