From a483de1787348a9d6ea08ff088b48035e45e7261 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 08:27:15 +0100 Subject: [PATCH] feat(brave): support configurable search base url --- CHANGELOG.md | 1 + docs/.generated/config-baseline.sha256 | 4 +- docs/tools/brave-search.md | 9 ++ extensions/brave/openclaw.plugin.json | 7 + .../src/brave-web-search-provider.runtime.ts | 104 ++++++++++++++- .../src/brave-web-search-provider.shared.ts | 1 + .../src/brave-web-search-provider.test.ts | 121 ++++++++++++++++++ 7 files changed, 239 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18341be637b..cdb2096585d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Cron: keep implicit/default isolated cron announce deliveries out of the main session awareness queue, so isolated jobs do not accumulate in the main conversation. Fixes #61426. Thanks @Lihannon. - Subagents: avoid duplicate parent-visible replies when a parent uses `sessions_send` on its own persistent native subagent session, while preserving announce delivery for async sends. Fixes #73550. Thanks @sylviazhang2006-design. - Web search/Brave: add opt-in `brave.http` diagnostics for Brave request URLs/query params, response status/timing, and cache hit/miss/write events without logging API keys or response bodies. Fixes #55196. Thanks @mecampbellsoup. +- Web search/Brave: add `plugins.entries.brave.config.webSearch.baseUrl` for Brave-compatible proxies, including endpoint-aware cache keys for both web and LLM Context modes. Fixes #19075. Thanks @jkoprax and @vishnukool. - Web search/config: validate explicit `tools.web.search.provider` values against bundled and installed plugin manifests, while warning for stale third-party plugin config. Fixes #53092. Thanks @TinyTb. - Web search/SearXNG: retry empty non-general category searches once with the general category, so unsupported category engines do not return empty results when general search has matches. Fixes #73552. Thanks @Loukky. - Agents/sandbox: preserve existing workspace file modes when sandbox edits atomically replace files, so 0644 files do not collapse to 0600 after Write/Edit/apply_patch. Fixes #44077. Thanks @patosullivan. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 0bff9e7bc28..3851000d64f 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -3545cf963a093d50b69904f859544088212e6522905b72710eb7818caa154b89 config-baseline.json +737056ad5544e24250ce91c000ae4a5fe0af751681a529f2e4710b383ef5d4e7 config-baseline.json 2d132b4c2e3b0e0f2524fc1cc889d3be658ad0e40c970b2d367bf27348883658 config-baseline.core.json f42329d45c095881bd226bdb192c235980658fd250606d0c0badc2b12f12f5d3 config-baseline.channel.json -38b16427911ba4ff19240097e5002fb892178fb3cefdc9c50fd98ad2044c02bf config-baseline.plugin.json +726c2fb81319f05be6977cdf5c9598884feafc600e6c76d482be626f4983bc32 config-baseline.plugin.json diff --git a/docs/tools/brave-search.md b/docs/tools/brave-search.md index 8760acdd889..cd4522abf53 100644 --- a/docs/tools/brave-search.md +++ b/docs/tools/brave-search.md @@ -27,6 +27,7 @@ OpenClaw supports Brave Search API as a `web_search` provider. webSearch: { apiKey: "BRAVE_API_KEY_HERE", mode: "web", // or "llm-context" + baseUrl: "https://api.search.brave.com", // optional proxy/base URL override }, }, }, @@ -52,6 +53,12 @@ Legacy `tools.web.search.apiKey` still loads through the compatibility shim, but - `web` (default): normal Brave web search with titles, URLs, and snippets - `llm-context`: Brave LLM Context API with pre-extracted text chunks and sources for grounding +`webSearch.baseUrl` can point Brave requests at a trusted Brave-compatible proxy +or gateway. OpenClaw appends `/res/v1/web/search` or `/res/v1/llm/context` to +the configured base URL and keeps the base URL in the cache key. Public +endpoints must use `https://`; `http://` is accepted only for trusted loopback +or private-network proxy hosts. + ## Tool parameters @@ -123,6 +130,8 @@ await web_search({ - `llm-context` mode supports `freshness` and bounded `date_after` + `date_before` ranges. It does not support `ui_lang`; `date_before` without `date_after` is rejected because Brave requires custom freshness ranges to include both start and end dates. - `ui_lang` must include a region subtag like `en-US`. - Results are cached for 15 minutes by default (configurable via `cacheTtlMinutes`). +- Custom `webSearch.baseUrl` values are included in Brave cache identity, so + proxy-specific responses do not collide. - Enable the `brave.http` diagnostics flag to log Brave request URLs/query params, response status/timing, and search-cache hit/miss/write events while troubleshooting. The flag never logs the API key or response bodies, but search queries can be sensitive. ## Related diff --git a/extensions/brave/openclaw.plugin.json b/extensions/brave/openclaw.plugin.json index 1698421ae47..be8204f1257 100644 --- a/extensions/brave/openclaw.plugin.json +++ b/extensions/brave/openclaw.plugin.json @@ -16,6 +16,10 @@ "webSearch.mode": { "label": "Brave Search Mode", "help": "Brave Search mode: web or llm-context." + }, + "webSearch.baseUrl": { + "label": "Brave Search Base URL", + "help": "Optional Brave-compatible API base URL for trusted proxies. Defaults to https://api.search.brave.com." } }, "contracts": { @@ -38,6 +42,9 @@ "mode": { "type": "string", "enum": ["web", "llm-context"] + }, + "baseUrl": { + "type": ["string", "object"] } } } diff --git a/extensions/brave/src/brave-web-search-provider.runtime.ts b/extensions/brave/src/brave-web-search-provider.runtime.ts index 2d31d4bfaf8..7a4d2e6b189 100644 --- a/extensions/brave/src/brave-web-search-provider.runtime.ts +++ b/extensions/brave/src/brave-web-search-provider.runtime.ts @@ -14,11 +14,18 @@ import { resolveSearchCount, resolveSearchTimeoutSeconds, resolveSiteName, + withSelfHostedWebSearchEndpoint, withTrustedWebSearchEndpoint, wrapWebContent, writeCachedSearchPayload, } from "openclaw/plugin-sdk/provider-web-search"; import { createSubsystemLogger } from "openclaw/plugin-sdk/runtime-env"; +import { + assertHttpUrlTargetsPrivateNetwork, + isBlockedHostnameOrIp, + isPrivateIpAddress, + resolvePinnedHostnameWithPolicy, +} from "openclaw/plugin-sdk/ssrf-runtime"; import { type BraveLlmContextResponse, mapBraveLlmContextResults, @@ -28,9 +35,11 @@ import { resolveBraveMode, } from "./brave-web-search-provider.shared.js"; -const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"; -const BRAVE_LLM_CONTEXT_ENDPOINT = "https://api.search.brave.com/res/v1/llm/context"; +const DEFAULT_BRAVE_BASE_URL = "https://api.search.brave.com"; +const BRAVE_SEARCH_ENDPOINT_PATH = "/res/v1/web/search"; +const BRAVE_LLM_CONTEXT_ENDPOINT_PATH = "/res/v1/llm/context"; const braveHttpLogger = createSubsystemLogger("brave/http"); +type BraveEndpointMode = "selfHosted" | "strict"; type BraveSearchResult = { title?: string; @@ -79,6 +88,63 @@ function resolveBraveApiKey(searchConfig?: SearchConfigRecord): string | undefin ); } +function resolveBraveBaseUrl(braveConfig: { baseUrl?: unknown } | undefined): string { + const configured = readConfiguredSecretString( + braveConfig?.baseUrl, + "plugins.entries.brave.config.webSearch.baseUrl", + ); + return configured?.replace(/\/+$/u, "") || DEFAULT_BRAVE_BASE_URL; +} + +function buildBraveEndpointUrl(params: { baseUrl: string; endpointPath: string }): URL { + const url = new URL(params.baseUrl); + const basePath = url.pathname.replace(/\/+$/u, ""); + url.pathname = `${basePath}${params.endpointPath}`; + url.search = ""; + return url; +} + +async function braveEndpointTargetsPrivateNetwork(url: URL): Promise { + if (isBlockedHostnameOrIp(url.hostname)) { + return true; + } + try { + const pinned = await resolvePinnedHostnameWithPolicy(url.hostname, { + policy: { + allowPrivateNetwork: true, + allowRfc2544BenchmarkRange: true, + }, + }); + return pinned.addresses.every((address) => isPrivateIpAddress(address)); + } catch { + return false; + } +} + +async function validateBraveBaseUrl(baseUrl: string): Promise { + let parsed: URL; + try { + parsed = new URL(baseUrl); + } catch { + throw new Error("Brave Search base URL must be a valid http:// or https:// URL."); + } + + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error("Brave Search base URL must use http:// or https://."); + } + + if (parsed.protocol === "http:") { + await assertHttpUrlTargetsPrivateNetwork(parsed.toString(), { + dangerouslyAllowPrivateNetwork: true, + errorMessage: + "Brave Search HTTP base URL must target a trusted private or loopback host. Use https:// for public hosts.", + }); + return "selfHosted"; + } + + return (await braveEndpointTargetsPrivateNetwork(parsed)) ? "selfHosted" : "strict"; +} + function missingBraveKeyPayload() { return { error: "missing_brave_api_key", @@ -88,6 +154,8 @@ function missingBraveKeyPayload() { } async function runBraveLlmContextSearch(params: { + baseUrl: string; + endpointMode: BraveEndpointMode; query: string; apiKey: string; timeoutSeconds: number; @@ -106,7 +174,10 @@ async function runBraveLlmContextSearch(params: { }>; sources?: BraveLlmContextResponse["sources"]; }> { - const url = new URL(BRAVE_LLM_CONTEXT_ENDPOINT); + const url = buildBraveEndpointUrl({ + baseUrl: params.baseUrl, + endpointPath: BRAVE_LLM_CONTEXT_ENDPOINT_PATH, + }); url.searchParams.set("q", params.query); if (params.country) { url.searchParams.set("country", params.country); @@ -130,7 +201,11 @@ async function runBraveLlmContextSearch(params: { ...describeBraveRequestUrl(url), }); const startedAt = Date.now(); - return withTrustedWebSearchEndpoint( + const withEndpoint = + params.endpointMode === "selfHosted" + ? withSelfHostedWebSearchEndpoint + : withTrustedWebSearchEndpoint; + return withEndpoint( { url: url.toString(), timeoutSeconds: params.timeoutSeconds, @@ -163,6 +238,8 @@ async function runBraveLlmContextSearch(params: { } async function runBraveWebSearch(params: { + baseUrl: string; + endpointMode: BraveEndpointMode; query: string; count: number; apiKey: string; @@ -175,7 +252,10 @@ async function runBraveWebSearch(params: { dateAfter?: string; dateBefore?: string; }): Promise>> { - const url = new URL(BRAVE_SEARCH_ENDPOINT); + const url = buildBraveEndpointUrl({ + baseUrl: params.baseUrl, + endpointPath: BRAVE_SEARCH_ENDPOINT_PATH, + }); url.searchParams.set("q", params.query); url.searchParams.set("count", String(params.count)); if (params.country) { @@ -205,7 +285,11 @@ async function runBraveWebSearch(params: { ...describeBraveRequestUrl(url), }); const startedAt = Date.now(); - return withTrustedWebSearchEndpoint( + const withEndpoint = + params.endpointMode === "selfHosted" + ? withSelfHostedWebSearchEndpoint + : withTrustedWebSearchEndpoint; + return withEndpoint( { url: url.toString(), timeoutSeconds: params.timeoutSeconds, @@ -263,6 +347,8 @@ export async function executeBraveSearch( const braveConfig = resolveBraveConfig(searchConfig); const braveMode = resolveBraveMode(braveConfig); + const braveBaseUrl = resolveBraveBaseUrl(braveConfig); + const braveEndpointMode = await validateBraveBaseUrl(braveBaseUrl); const query = readStringParam(args, "query", { required: true }); const count = readNumberParam(args, "count", { integer: true }) ?? searchConfig?.maxResults ?? undefined; @@ -358,6 +444,7 @@ export async function executeBraveSearch( ? [ "brave", braveMode, + braveBaseUrl, query, country, normalizedLanguage.search_lang, @@ -368,6 +455,7 @@ export async function executeBraveSearch( : [ "brave", braveMode, + braveBaseUrl, query, resolveSearchCount(count, DEFAULT_SEARCH_COUNT), country, @@ -392,6 +480,8 @@ export async function executeBraveSearch( if (braveMode === "llm-context") { const { results, sources } = await runBraveLlmContextSearch({ + baseUrl: braveBaseUrl, + endpointMode: braveEndpointMode, query, apiKey, timeoutSeconds, @@ -434,6 +524,8 @@ export async function executeBraveSearch( } const results = await runBraveWebSearch({ + baseUrl: braveBaseUrl, + endpointMode: braveEndpointMode, query, count: resolveSearchCount(count, DEFAULT_SEARCH_COUNT), apiKey, diff --git a/extensions/brave/src/brave-web-search-provider.shared.ts b/extensions/brave/src/brave-web-search-provider.shared.ts index 704d60e1180..bfe9d6ec86e 100644 --- a/extensions/brave/src/brave-web-search-provider.shared.ts +++ b/extensions/brave/src/brave-web-search-provider.shared.ts @@ -4,6 +4,7 @@ import { } from "openclaw/plugin-sdk/text-runtime"; type BraveConfig = { + baseUrl?: unknown; mode?: string; }; diff --git a/extensions/brave/src/brave-web-search-provider.test.ts b/extensions/brave/src/brave-web-search-provider.test.ts index 24a80ccdf02..4528e615071 100644 --- a/extensions/brave/src/brave-web-search-provider.test.ts +++ b/extensions/brave/src/brave-web-search-provider.test.ts @@ -168,6 +168,127 @@ describe("brave web search provider", () => { expect(result.ok).toBe(true); }); + it("accepts baseUrl in the Brave plugin config schema", () => { + if (!braveManifest.configSchema) { + throw new Error("Expected Brave manifest config schema"); + } + + const result = validateJsonSchemaValue({ + schema: braveManifest.configSchema, + cacheKey: "test:brave-config-schema-base-url", + value: { + webSearch: { + baseUrl: "https://api.search.brave.com/proxy", + }, + }, + }); + + expect(result.ok).toBe(true); + }); + + it("uses configured Brave baseUrl for web search requests", async () => { + vi.stubEnv("BRAVE_API_KEY", ""); + const mockFetch = vi.fn(async (_input?: unknown, _init?: unknown) => { + return { + ok: true, + json: async () => ({ web: { results: [] } }), + } as Response; + }); + global.fetch = mockFetch as typeof global.fetch; + + const provider = createBraveWebSearchProvider(); + const tool = provider.createTool({ + config: {}, + searchConfig: { + apiKey: "brave-test-key", + brave: { + baseUrl: "https://api.search.brave.com/proxy/", + mode: "web", + }, + }, + }); + if (!tool) { + throw new Error("Expected tool definition"); + } + + await tool.execute({ query: "latest ai news" }); + + const requestUrl = new URL(String(mockFetch.mock.calls[0]?.[0])); + expect(requestUrl.origin).toBe("https://api.search.brave.com"); + expect(requestUrl.pathname).toBe("/proxy/res/v1/web/search"); + }); + + it("uses configured Brave baseUrl for llm-context requests", async () => { + vi.stubEnv("BRAVE_API_KEY", ""); + const mockFetch = installBraveLlmContextFetch(); + const provider = createBraveWebSearchProvider(); + const tool = provider.createTool({ + config: {}, + searchConfig: { + apiKey: "brave-test-key", + brave: { + baseUrl: "https://api.search.brave.com/proxy", + mode: "llm-context", + }, + }, + }); + if (!tool) { + throw new Error("Expected tool definition"); + } + + await tool.execute({ query: "latest ai news" }); + + const requestUrl = new URL(String(mockFetch.mock.calls[0]?.[0])); + expect(requestUrl.pathname).toBe("/proxy/res/v1/llm/context"); + }); + + it("keeps Brave cache entries isolated by baseUrl", async () => { + vi.stubEnv("BRAVE_API_KEY", ""); + const mockFetch = vi.fn(async (_input?: unknown, _init?: unknown) => { + return { + ok: true, + json: async () => ({ web: { results: [] } }), + } as Response; + }); + global.fetch = mockFetch as typeof global.fetch; + + const provider = createBraveWebSearchProvider(); + const firstTool = provider.createTool({ + config: {}, + searchConfig: { + apiKey: "brave-test-key", + brave: { + baseUrl: "https://api.search.brave.com/proxy-one", + mode: "web", + }, + }, + }); + const secondTool = provider.createTool({ + config: {}, + searchConfig: { + apiKey: "brave-test-key", + brave: { + baseUrl: "https://api.search.brave.com/proxy-two", + mode: "web", + }, + }, + }); + if (!firstTool || !secondTool) { + throw new Error("Expected tool definitions"); + } + + await firstTool.execute({ query: "base url cache identity" }); + await secondTool.execute({ query: "base url cache identity" }); + + expect(mockFetch).toHaveBeenCalledTimes(2); + expect(new URL(String(mockFetch.mock.calls[0]?.[0])).pathname).toBe( + "/proxy-one/res/v1/web/search", + ); + expect(new URL(String(mockFetch.mock.calls[1]?.[0])).pathname).toBe( + "/proxy-two/res/v1/web/search", + ); + }); + it("rejects invalid Brave mode values in the plugin config schema", () => { if (!braveManifest.configSchema) { throw new Error("Expected Brave manifest config schema");