feat(tools): add URL allowlist for web_search and web_fetch

Add optional urlAllowlist config at tools.web level that restricts which
URLs can be accessed by web tools:

- Config types (types.tools.ts): Add urlAllowlist?: string[] to tools.web
- Zod schema: Add urlAllowlist field to ToolsWebSchema
- Schema help: Add help text for the new config fields
- web_search: Filter Brave search results by allowlist (provider=brave)
- web_fetch: Block URLs not matching allowlist before fetching
- ssrf.ts: Export normalizeHostnameAllowlist and matchesHostnameAllowlist

URL matching supports:
- Exact domain match (example.com)
- Wildcard patterns (*.github.com)

When urlAllowlist is not configured, all URLs are allowed (backwards compatible).

Tests: Add web-tools.url-allowlist.test.ts with 23 tests covering:
- URL allowlist resolution from config
- Wildcard pattern matching
- web_fetch error response format
- Brave search result filtering
This commit is contained in:
smartprogrammer93
2026-02-16 21:09:44 +00:00
committed by Peter Steinberger
parent e179d453c7
commit 6d2e3685d6
7 changed files with 305 additions and 14 deletions

View File

@@ -2,7 +2,11 @@ import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import type { AnyAgentTool } from "./common.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { SsrFBlockedError } from "../../infra/net/ssrf.js";
import {
matchesHostnameAllowlist,
normalizeHostnameAllowlist,
SsrFBlockedError,
} from "../../infra/net/ssrf.js";
import { logDebug } from "../../logger.js";
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
@@ -68,6 +72,32 @@ type WebFetchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
: undefined
: undefined;
type WebConfig = NonNullable<OpenClawConfig["tools"]>["web"];
export function resolveFetchUrlAllowlist(web?: WebConfig): string[] | undefined {
if (!web || typeof web !== "object") {
return undefined;
}
if (!("urlAllowlist" in web)) {
return undefined;
}
const allowlist = web.urlAllowlist;
if (!Array.isArray(allowlist)) {
return undefined;
}
return allowlist.length > 0 ? allowlist : undefined;
}
export function isUrlAllowedByAllowlist(url: string, allowlist: string[]): boolean {
try {
const hostname = new URL(url).hostname;
const normalizedAllowlist = normalizeHostnameAllowlist(allowlist);
return matchesHostnameAllowlist(hostname, normalizedAllowlist);
} catch {
return false;
}
}
type FirecrawlFetchConfig =
| {
enabled?: boolean;
@@ -732,6 +762,7 @@ export function createWebFetchTool(options?: {
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
DEFAULT_FETCH_USER_AGENT;
const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
const urlAllowlist = resolveFetchUrlAllowlist(options?.config?.tools?.web);
return {
label: "Web Fetch",
name: "web_fetch",
@@ -741,6 +772,25 @@ export function createWebFetchTool(options?: {
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const url = readStringParam(params, "url", { required: true });
// Check URL against allowlist if configured
if (urlAllowlist && urlAllowlist.length > 0) {
if (!isUrlAllowedByAllowlist(url, urlAllowlist)) {
let hostname: string;
try {
hostname = new URL(url).hostname;
} catch {
hostname = url;
}
return jsonResult({
error: "url_not_allowed",
message: `URL not in allowlist. Allowed domains: ${urlAllowlist.join(", ")}`,
blockedUrl: url,
blockedHostname: hostname,
});
}
}
const extractMode = readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
const maxChars = readNumberParam(params, "maxChars", { integer: true });
const maxCharsCap = resolveFetchMaxCharsCap(fetch);

View File

@@ -2,6 +2,7 @@ import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import type { AnyAgentTool } from "./common.js";
import { formatCliCommand } from "../../cli/command-format.js";
import { matchesHostnameAllowlist, normalizeHostnameAllowlist } from "../../infra/net/ssrf.js";
import { wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
@@ -75,6 +76,43 @@ type WebSearchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
: undefined
: undefined;
type WebConfig = NonNullable<OpenClawConfig["tools"]>["web"];
export function resolveUrlAllowlist(web?: WebConfig): string[] | undefined {
if (!web || typeof web !== "object") {
return undefined;
}
if (!("urlAllowlist" in web)) {
return undefined;
}
const allowlist = web.urlAllowlist;
if (!Array.isArray(allowlist)) {
return undefined;
}
return allowlist.length > 0 ? allowlist : undefined;
}
export function filterResultsByAllowlist(
results: Array<{ url?: string; siteName?: string }>,
allowlist: string[],
): Array<{ url?: string; siteName?: string }> {
if (allowlist.length === 0) {
return results;
}
const normalizedAllowlist = normalizeHostnameAllowlist(allowlist);
return results.filter((result) => {
if (!result.url) {
return true; // Keep entries without URL
}
try {
const hostname = new URL(result.url).hostname;
return matchesHostnameAllowlist(hostname, normalizedAllowlist);
} catch {
return true; // Keep entries with invalid URLs (let them pass through)
}
});
}
type BraveSearchResult = {
title?: string;
url?: string;
@@ -566,6 +604,7 @@ async function runWebSearch(params: {
perplexityModel?: string;
grokModel?: string;
grokInlineCitations?: boolean;
urlAllowlist?: string[];
}): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(
params.provider === "brave"
@@ -688,10 +727,15 @@ async function runWebSearch(params: {
};
});
// Filter results by urlAllowlist if configured
const filteredResults = params.urlAllowlist
? filterResultsByAllowlist(mapped, params.urlAllowlist)
: mapped;
const payload = {
query: params.query,
provider: params.provider,
count: mapped.length,
count: filteredResults.length,
tookMs: Date.now() - start,
externalContent: {
untrusted: true,
@@ -699,7 +743,7 @@ async function runWebSearch(params: {
provider: params.provider,
wrapped: true,
},
results: mapped,
results: filteredResults,
};
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
@@ -717,6 +761,7 @@ export function createWebSearchTool(options?: {
const provider = resolveSearchProvider(search);
const perplexityConfig = resolvePerplexityConfig(search);
const grokConfig = resolveGrokConfig(search);
const urlAllowlist = resolveUrlAllowlist(options?.config?.tools?.web);
const description =
provider === "perplexity"
@@ -786,6 +831,7 @@ export function createWebSearchTool(options?: {
perplexityModel: resolvePerplexityModel(perplexityConfig),
grokModel: resolveGrokModel(grokConfig),
grokInlineCitations: resolveGrokInlineCitations(grokConfig),
urlAllowlist,
});
return jsonResult(result);
},
@@ -803,4 +849,6 @@ export const __testing = {
resolveGrokModel,
resolveGrokInlineCitations,
extractGrokContent,
resolveUrlAllowlist,
filterResultsByAllowlist,
} as const;

View File

@@ -0,0 +1,193 @@
import { describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { isUrlAllowedByAllowlist, resolveFetchUrlAllowlist } from "./web-fetch.js";
import { filterResultsByAllowlist, resolveUrlAllowlist } from "./web-search.js";
describe("web-search urlAllowlist", () => {
describe("resolveUrlAllowlist", () => {
it("returns undefined when web config is undefined", () => {
const result = resolveUrlAllowlist(undefined);
expect(result).toBeUndefined();
});
it("returns undefined when urlAllowlist is not set", () => {
const result = resolveUrlAllowlist({ search: { enabled: true } });
expect(result).toBeUndefined();
});
it("returns undefined when urlAllowlist is empty array", () => {
const result = resolveUrlAllowlist({ urlAllowlist: [], search: { enabled: true } });
expect(result).toBeUndefined();
});
it("returns the allowlist when configured", () => {
const result = resolveUrlAllowlist({
urlAllowlist: ["example.com", "*.github.com"],
search: { enabled: true },
});
expect(result).toEqual(["example.com", "*.github.com"]);
});
});
describe("filterResultsByAllowlist", () => {
const results = [
{ url: "https://example.com/page", siteName: "example.com" },
{ url: "https://api.github.com/user/repo", siteName: "api.github.com" },
{ url: "https://docs.openclaw.ai/guide", siteName: "docs.openclaw.ai" },
{ url: "https://blocked.org/page", siteName: "blocked.org" },
{ url: undefined, siteName: "unknown" }, // entry without URL
];
it("returns all results when allowlist is empty", () => {
const result = filterResultsByAllowlist(results, []);
expect(result).toHaveLength(5);
});
it("filters results by exact domain match", () => {
const result = filterResultsByAllowlist(results, ["example.com"]);
expect(result).toHaveLength(2); // example.com + entry without URL
expect(result.map((r) => r.url)).toContain("https://example.com/page");
expect(result.map((r) => r.url)).not.toContain("https://api.github.com/user/repo");
});
it("filters results by wildcard pattern", () => {
const result = filterResultsByAllowlist(results, ["*.github.com"]);
expect(result).toHaveLength(2); // api.github.com + entry without URL
expect(result.map((r) => r.url)).toContain("https://api.github.com/user/repo");
expect(result.map((r) => r.url)).not.toContain("https://example.com/page");
});
it("filters results with multiple patterns", () => {
const result = filterResultsByAllowlist(results, ["example.com", "*.github.com"]);
expect(result).toHaveLength(3); // example.com + api.github.com + entry without URL
expect(result.map((r) => r.url)).toContain("https://example.com/page");
expect(result.map((r) => r.url)).toContain("https://api.github.com/user/repo");
expect(result.map((r) => r.url)).not.toContain("https://blocked.org/page");
});
it("keeps entries without URLs and entries not in blocklist", () => {
const result = filterResultsByAllowlist(results, ["blocked.org"]);
// With allowlist ["blocked.org"], we ONLY keep blocked.org URLs and entries without URLs
expect(result).toHaveLength(2); // blocked.org + entry without URL
expect(result.map((r) => r.url)).toContain("https://blocked.org/page");
});
});
});
describe("web-fetch urlAllowlist", () => {
describe("resolveFetchUrlAllowlist", () => {
it("returns undefined when web config is undefined", () => {
const result = resolveFetchUrlAllowlist(undefined);
expect(result).toBeUndefined();
});
it("returns undefined when urlAllowlist is not set", () => {
const result = resolveFetchUrlAllowlist({ fetch: { enabled: true } });
expect(result).toBeUndefined();
});
it("returns undefined when urlAllowlist is empty array", () => {
const result = resolveFetchUrlAllowlist({ urlAllowlist: [], fetch: { enabled: true } });
expect(result).toBeUndefined();
});
it("returns the allowlist when configured", () => {
const result = resolveFetchUrlAllowlist({
urlAllowlist: ["example.com", "*.github.com"],
fetch: { enabled: true },
});
expect(result).toEqual(["example.com", "*.github.com"]);
});
});
describe("isUrlAllowedByAllowlist", () => {
it("allows any URL when allowlist is empty", () => {
const result = isUrlAllowedByAllowlist("https://example.com/page", []);
expect(result).toBe(true);
});
it("blocks URLs not in allowlist", () => {
const result = isUrlAllowedByAllowlist("https://blocked.com/page", ["example.com"]);
expect(result).toBe(false);
});
it("allows URLs matching exact domain", () => {
const result = isUrlAllowedByAllowlist("https://example.com/page", ["example.com"]);
expect(result).toBe(true);
});
it("allows URLs matching wildcard pattern", () => {
const result = isUrlAllowedByAllowlist("https://api.github.com/users", ["*.github.com"]);
expect(result).toBe(true);
});
it("blocks URLs not matching wildcard pattern", () => {
const result = isUrlAllowedByAllowlist("https://github.com", ["*.github.com"]);
// Exact match "github.com" should not match "*.github.com" pattern
// because *.github.com requires at least one subdomain
expect(result).toBe(false);
});
it("allows subdomain with wildcard pattern", () => {
const result = isUrlAllowedByAllowlist("https://docs.openclaw.ai/guide", ["*.openclaw.ai"]);
expect(result).toBe(true);
});
it("handles URLs without protocol", () => {
const result = isUrlAllowedByAllowlist("not-a-url", ["example.com"]);
expect(result).toBe(false);
});
});
describe("web_fetch error response", () => {
// This test verifies the error format returned when URL is blocked
it("returns correct error format for blocked URL", () => {
// Simulate the error response format
const urlAllowlist = ["example.com"];
const url = "https://blocked.com/page";
if (!isUrlAllowedByAllowlist(url, urlAllowlist)) {
const hostname = new URL(url).hostname;
const errorResponse = {
error: "url_not_allowed",
message: `URL not in allowlist. Allowed domains: ${urlAllowlist.join(", ")}`,
blockedUrl: url,
blockedHostname: hostname,
};
expect(errorResponse.error).toBe("url_not_allowed");
expect(errorResponse.message).toContain("example.com");
expect(errorResponse.blockedUrl).toBe("https://blocked.com/page");
expect(errorResponse.blockedHostname).toBe("blocked.com");
}
});
});
});
describe("integration with config", () => {
it("reads urlAllowlist from tools.web config", () => {
const config: OpenClawConfig = {
tools: {
web: {
urlAllowlist: ["example.com", "*.github.com"],
search: { enabled: true },
fetch: { enabled: true },
},
},
};
const searchAllowlist = resolveUrlAllowlist(config.tools?.web);
const fetchAllowlist = resolveFetchUrlAllowlist(config.tools?.web);
expect(searchAllowlist).toEqual(["example.com", "*.github.com"]);
expect(fetchAllowlist).toEqual(["example.com", "*.github.com"]);
});
it("works with undefined config", () => {
const searchAllowlist = resolveUrlAllowlist(undefined);
const fetchAllowlist = resolveFetchUrlAllowlist(undefined);
expect(searchAllowlist).toBeUndefined();
expect(fetchAllowlist).toBeUndefined();
});
});

View File

@@ -93,16 +93,13 @@ export const FIELD_HELP: Record<string, string> = {
"tools.web.search.enabled": "Enable the web_search tool (requires a provider API key).",
"tools.web.search.provider": 'Search provider ("brave" or "perplexity").',
"tools.web.search.apiKey": "Brave Search API key (fallback: BRAVE_API_KEY env var).",
"tools.web.search.maxResults": "Default number of results to return (1-10).",
"tools.web.search.timeoutSeconds": "Timeout in seconds for web_search requests.",
"tools.web.search.cacheTtlMinutes": "Cache TTL in minutes for web_search results.",
"tools.web.search.perplexity.apiKey":
"Perplexity or OpenRouter API key (fallback: PERPLEXITY_API_KEY or OPENROUTER_API_KEY env var).",
"tools.web.search.perplexity.baseUrl":
"Perplexity base URL override (default: https://openrouter.ai/api/v1 or https://api.perplexity.ai).",
"tools.web.search.perplexity.model":
'Perplexity model override (default: "perplexity/sonar-pro").',
"tools.web.search.urlAllowlist":
"Optional URL/domain allowlist for web_search. When configured, Brave search results are filtered to only include URLs from allowed domains.",
"tools.web.urlAllowlist":
"Optional URL/domain allowlist shared by web_search and web_fetch. Accepts domain patterns like 'example.com', '*.github.com', 'docs.openclaw.ai'. When configured, only matching URLs are allowed.",
"tools.web.fetch.enabled": "Enable the web_fetch tool (lightweight HTTP fetch).",
"tools.web.fetch.urlAllowlist":
"Optional URL/domain allowlist for web_fetch. When configured, only URLs matching these patterns can be fetched.",
"tools.web.fetch.maxChars": "Max characters returned by web_fetch (truncated).",
"tools.web.fetch.maxCharsCap":
"Hard cap for web_fetch maxChars (applies to config and tool calls).",

View File

@@ -355,6 +355,8 @@ export type ToolsConfig = {
/** Optional tool policy overrides keyed by provider id or "provider/model". */
byProvider?: Record<string, ToolPolicyConfig>;
web?: {
/** Optional URL/domain allowlist for web tools. When configured, only URLs matching these patterns are allowed. */
urlAllowlist?: string[];
search?: {
/** Enable web search tool (default: true when API key is present). */
enabled?: boolean;

View File

@@ -267,6 +267,7 @@ export const ToolsWebFetchSchema = z
export const ToolsWebSchema = z
.object({
urlAllowlist: z.array(z.string()).optional(),
search: ToolsWebSearchSchema,
fetch: ToolsWebFetchSchema,
})

View File

@@ -33,7 +33,7 @@ function normalizeHostnameSet(values?: string[]): Set<string> {
return new Set(values.map((value) => normalizeHostname(value)).filter(Boolean));
}
function normalizeHostnameAllowlist(values?: string[]): string[] {
export function normalizeHostnameAllowlist(values?: string[]): string[] {
if (!values || values.length === 0) {
return [];
}
@@ -57,7 +57,7 @@ function isHostnameAllowedByPattern(hostname: string, pattern: string): boolean
return hostname === pattern;
}
function matchesHostnameAllowlist(hostname: string, allowlist: string[]): boolean {
export function matchesHostnameAllowlist(hostname: string, allowlist: string[]): boolean {
if (allowlist.length === 0) {
return true;
}