From eac7a281d5525c4a14c1179a530ba26e03f99e28 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 2 May 2026 08:15:24 +0100 Subject: [PATCH] fix(searxng): retry empty category searches --- CHANGELOG.md | 1 + docs/tools/searxng-search.md | 3 + extensions/searxng/src/searxng-client.test.ts | 102 ++++++++++++++++- extensions/searxng/src/searxng-client.ts | 105 +++++++++++++----- 4 files changed, 179 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a46abd6f60..bd0baf7c05a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Subagents: avoid duplicate parent-visible replies when a parent uses `sessions_send` on its own persistent native subagent session, while preserving announce delivery for async sends. Fixes #73550. Thanks @sylviazhang2006-design. - Web search/Brave: add opt-in `brave.http` diagnostics for Brave request URLs/query params, response status/timing, and cache hit/miss/write events without logging API keys or response bodies. Fixes #55196. Thanks @mecampbellsoup. - Web search/config: validate explicit `tools.web.search.provider` values against bundled and installed plugin manifests, while warning for stale third-party plugin config. Fixes #53092. Thanks @TinyTb. +- Web search/SearXNG: retry empty non-general category searches once with the general category, so unsupported category engines do not return empty results when general search has matches. Fixes #73552. Thanks @Loukky. - Agents/sandbox: preserve existing workspace file modes when sandbox edits atomically replace files, so 0644 files do not collapse to 0600 after Write/Edit/apply_patch. Fixes #44077. Thanks @patosullivan. - Agents/models: keep legacy CLI runtime model refs such as `claude-cli/*` in the configured allowlist after canonical runtime migration, so cron `payload.model` overrides keep working. Fixes #75753. Thanks @RyanSandoval. - Codex/app-server: restart the shared Codex app-server client once when it closes during startup thread resume, preserving the existing thread binding instead of retrying `thread/start` on a closed client. Thanks @vincentkoc. diff --git a/docs/tools/searxng-search.md b/docs/tools/searxng-search.md index 9944676f627..9241d1e4740 100644 --- a/docs/tools/searxng-search.md +++ b/docs/tools/searxng-search.md @@ -125,6 +125,9 @@ key wins first). DuckDuckGo (order 100), then Ollama Web Search (order 110) - **Self-hosted** -- you control the instance, queries, and upstream search engines - **Categories** default to `general` when not configured +- **Category fallback** -- if a non-`general` category request succeeds but + returns zero results, OpenClaw retries the same query once with `general` + before returning an empty result set For SearXNG JSON API to work, make sure your SearXNG instance has the `json` diff --git a/extensions/searxng/src/searxng-client.test.ts b/extensions/searxng/src/searxng-client.test.ts index 7d841def6d4..e1ce8f12242 100644 --- a/extensions/searxng/src/searxng-client.test.ts +++ b/extensions/searxng/src/searxng-client.test.ts @@ -1,6 +1,32 @@ import type { LookupFn } from "openclaw/plugin-sdk/ssrf-runtime"; -import { describe, expect, it, vi } from "vitest"; -import { __testing } from "./searxng-client.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const endpointMockState = vi.hoisted(() => ({ + calls: [] as Array<{ url: string; timeoutSeconds: number; init: RequestInit }>, + responses: [] as Response[], +})); + +vi.mock("openclaw/plugin-sdk/provider-web-search", async (importOriginal) => { + const actual = await importOriginal(); + const runEndpoint = async ( + params: { url: string; timeoutSeconds: number; init: RequestInit }, + run: (response: Response) => Promise, + ) => { + endpointMockState.calls.push(params); + const response = endpointMockState.responses.shift(); + if (!response) { + throw new Error("Missing mocked SearXNG response."); + } + return await run(response); + }; + return { + ...actual, + withSelfHostedWebSearchEndpoint: vi.fn(runEndpoint), + withTrustedWebSearchEndpoint: vi.fn(runEndpoint), + }; +}); + +import { __testing, runSearxngSearch } from "./searxng-client.js"; function createLookupFn(addresses: Array<{ address: string; family: number }>): LookupFn { return vi.fn(async (_hostname: string, options?: unknown) => { @@ -12,6 +38,12 @@ function createLookupFn(addresses: Array<{ address: string; family: number }>): } describe("searxng client", () => { + beforeEach(() => { + endpointMockState.calls = []; + endpointMockState.responses = []; + __testing.SEARXNG_SEARCH_CACHE.clear(); + }); + it("preserves a configured base-path prefix when building the search URL", () => { expect( __testing.buildSearxngSearchUrl({ @@ -39,6 +71,72 @@ describe("searxng client", () => { ).toEqual([{ title: "One", url: "https://example.com/1", content: "A" }]); }); + it("retries an empty category search with general results", async () => { + endpointMockState.responses.push( + new Response(JSON.stringify({ results: [] }), { status: 200 }), + new Response( + JSON.stringify({ + results: [ + { + title: "Beijing hourly weather", + url: "https://example.com/weather", + content: "Hourly forecast", + }, + ], + }), + { status: 200 }, + ), + ); + + const result = await runSearxngSearch({ + baseUrl: "http://127.0.0.1:8888", + query: "beijing hourly weather", + categories: "weather", + count: 5, + }); + + expect(endpointMockState.calls).toHaveLength(2); + expect(new URL(endpointMockState.calls[0].url).searchParams.get("categories")).toBe("weather"); + expect(new URL(endpointMockState.calls[1].url).searchParams.get("categories")).toBe("general"); + expect(result).toMatchObject({ + provider: "searxng", + count: 1, + results: [ + expect.objectContaining({ + url: "https://example.com/weather", + }), + ], + }); + }); + + it("does not retry empty general category searches", async () => { + endpointMockState.responses.push( + new Response(JSON.stringify({ results: [] }), { status: 200 }), + ); + + const result = await runSearxngSearch({ + baseUrl: "http://127.0.0.1:8888", + query: "openclaw", + categories: "general", + count: 5, + }); + + expect(endpointMockState.calls).toHaveLength(1); + expect(result).toMatchObject({ + provider: "searxng", + count: 0, + results: [], + }); + }); + + it("detects category searches that should retry with general", () => { + expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("weather")).toBe(true); + expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("weather,news")).toBe(true); + expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("general")).toBe(false); + expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("general,news")).toBe(false); + expect(__testing.shouldRetryEmptyCategorySearchWithGeneral(undefined)).toBe(false); + }); + it("preserves img_src from image search results", () => { expect( __testing.parseSearxngResponseText( diff --git a/extensions/searxng/src/searxng-client.ts b/extensions/searxng/src/searxng-client.ts index 0ad17563ca1..ceeb1e10163 100644 --- a/extensions/searxng/src/searxng-client.ts +++ b/extensions/searxng/src/searxng-client.ts @@ -91,6 +91,17 @@ function buildSearxngSearchUrl(params: { return url.toString(); } +function shouldRetryEmptyCategorySearchWithGeneral(categories: string | undefined): boolean { + if (!categories) { + return false; + } + const normalized = categories + .split(",") + .map((category) => category.trim().toLowerCase()) + .filter((category) => category.length > 0); + return normalized.length > 0 && !normalized.includes("general"); +} + async function searxngEndpointTargetsPrivateNetwork( url: URL, lookupFn?: LookupFn, @@ -169,6 +180,54 @@ function parseSearxngResponseText(text: string, count: number): SearxngResult[] return results; } +async function fetchSearxngResults(params: { + baseUrl: string; + query: string; + categories?: string; + language?: string; + timeoutSeconds: number; + count: number; + endpointMode: SearxngEndpointMode; +}): Promise { + const url = buildSearxngSearchUrl({ + baseUrl: params.baseUrl, + query: params.query, + categories: params.categories, + language: params.language, + }); + + const withEndpoint = + params.endpointMode === "selfHosted" + ? withSelfHostedWebSearchEndpoint + : withTrustedWebSearchEndpoint; + return await withEndpoint( + { + url, + timeoutSeconds: params.timeoutSeconds, + init: { + method: "GET", + headers: { + Accept: "application/json", + }, + }, + }, + async (response) => { + if (!response.ok) { + const detail = (await readResponseText(response, { maxBytes: 64_000 })).text; + throw new Error( + `SearXNG search error (${response.status}): ${detail || response.statusText}`, + ); + } + + const body = await readResponseText(response, { maxBytes: MAX_RESPONSE_BYTES }); + if (body.truncated) { + throw new Error("SearXNG response too large."); + } + return parseSearxngResponseText(body.text, params.count); + }, + ); +} + export async function runSearxngSearch(params: { config?: OpenClawConfig; query: string; @@ -208,42 +267,27 @@ export async function runSearxngSearch(params: { return { ...cached.value, cached: true }; } - const url = buildSearxngSearchUrl({ + const startedAt = Date.now(); + let results = await fetchSearxngResults({ baseUrl, query: params.query, categories, language, + timeoutSeconds, + count, + endpointMode, }); - - const startedAt = Date.now(); - const withEndpoint = - endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint; - const results = await withEndpoint( - { - url, + if (results.length === 0 && shouldRetryEmptyCategorySearchWithGeneral(categories)) { + results = await fetchSearxngResults({ + baseUrl, + query: params.query, + categories: "general", + language, timeoutSeconds, - init: { - method: "GET", - headers: { - Accept: "application/json", - }, - }, - }, - async (response) => { - if (!response.ok) { - const detail = (await readResponseText(response, { maxBytes: 64_000 })).text; - throw new Error( - `SearXNG search error (${response.status}): ${detail || response.statusText}`, - ); - } - - const body = await readResponseText(response, { maxBytes: MAX_RESPONSE_BYTES }); - if (body.truncated) { - throw new Error("SearXNG response too large."); - } - return parseSearxngResponseText(body.text, count); - }, - ); + count, + endpointMode, + }); + } const payload = { query: params.query, @@ -273,6 +317,7 @@ export const __testing = { buildSearxngSearchUrl, normalizeSearxngResult, parseSearxngResponseText, + shouldRetryEmptyCategorySearchWithGeneral, validateSearxngBaseUrl, SEARXNG_SEARCH_CACHE, };