fix(searxng): retry empty category searches

This commit is contained in:
Peter Steinberger
2026-05-02 08:15:24 +01:00
parent 49e9cdeb98
commit eac7a281d5
4 changed files with 179 additions and 32 deletions

View File

@@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai
- Subagents: avoid duplicate parent-visible replies when a parent uses `sessions_send` on its own persistent native subagent session, while preserving announce delivery for async sends. Fixes #73550. Thanks @sylviazhang2006-design.
- Web search/Brave: add opt-in `brave.http` diagnostics for Brave request URLs/query params, response status/timing, and cache hit/miss/write events without logging API keys or response bodies. Fixes #55196. Thanks @mecampbellsoup.
- Web search/config: validate explicit `tools.web.search.provider` values against bundled and installed plugin manifests, while warning for stale third-party plugin config. Fixes #53092. Thanks @TinyTb.
- Web search/SearXNG: retry empty non-general category searches once with the general category, so unsupported category engines do not return empty results when general search has matches. Fixes #73552. Thanks @Loukky.
- Agents/sandbox: preserve existing workspace file modes when sandbox edits atomically replace files, so 0644 files do not collapse to 0600 after Write/Edit/apply_patch. Fixes #44077. Thanks @patosullivan.
- Agents/models: keep legacy CLI runtime model refs such as `claude-cli/*` in the configured allowlist after canonical runtime migration, so cron `payload.model` overrides keep working. Fixes #75753. Thanks @RyanSandoval.
- Codex/app-server: restart the shared Codex app-server client once when it closes during startup thread resume, preserving the existing thread binding instead of retrying `thread/start` on a closed client. Thanks @vincentkoc.

View File

@@ -125,6 +125,9 @@ key wins first).
DuckDuckGo (order 100), then Ollama Web Search (order 110)
- **Self-hosted** -- you control the instance, queries, and upstream search engines
- **Categories** default to `general` when not configured
- **Category fallback** -- if a non-`general` category request succeeds but
returns zero results, OpenClaw retries the same query once with `general`
before returning an empty result set
<Tip>
For SearXNG JSON API to work, make sure your SearXNG instance has the `json`

View File

@@ -1,6 +1,32 @@
import type { LookupFn } from "openclaw/plugin-sdk/ssrf-runtime";
import { describe, expect, it, vi } from "vitest";
import { __testing } from "./searxng-client.js";
import { beforeEach, describe, expect, it, vi } from "vitest";
const endpointMockState = vi.hoisted(() => ({
calls: [] as Array<{ url: string; timeoutSeconds: number; init: RequestInit }>,
responses: [] as Response[],
}));
vi.mock("openclaw/plugin-sdk/provider-web-search", async (importOriginal) => {
const actual = await importOriginal<typeof import("openclaw/plugin-sdk/provider-web-search")>();
const runEndpoint = async (
params: { url: string; timeoutSeconds: number; init: RequestInit },
run: (response: Response) => Promise<unknown>,
) => {
endpointMockState.calls.push(params);
const response = endpointMockState.responses.shift();
if (!response) {
throw new Error("Missing mocked SearXNG response.");
}
return await run(response);
};
return {
...actual,
withSelfHostedWebSearchEndpoint: vi.fn(runEndpoint),
withTrustedWebSearchEndpoint: vi.fn(runEndpoint),
};
});
import { __testing, runSearxngSearch } from "./searxng-client.js";
function createLookupFn(addresses: Array<{ address: string; family: number }>): LookupFn {
return vi.fn(async (_hostname: string, options?: unknown) => {
@@ -12,6 +38,12 @@ function createLookupFn(addresses: Array<{ address: string; family: number }>):
}
describe("searxng client", () => {
beforeEach(() => {
endpointMockState.calls = [];
endpointMockState.responses = [];
__testing.SEARXNG_SEARCH_CACHE.clear();
});
it("preserves a configured base-path prefix when building the search URL", () => {
expect(
__testing.buildSearxngSearchUrl({
@@ -39,6 +71,72 @@ describe("searxng client", () => {
).toEqual([{ title: "One", url: "https://example.com/1", content: "A" }]);
});
it("retries an empty category search with general results", async () => {
endpointMockState.responses.push(
new Response(JSON.stringify({ results: [] }), { status: 200 }),
new Response(
JSON.stringify({
results: [
{
title: "Beijing hourly weather",
url: "https://example.com/weather",
content: "Hourly forecast",
},
],
}),
{ status: 200 },
),
);
const result = await runSearxngSearch({
baseUrl: "http://127.0.0.1:8888",
query: "beijing hourly weather",
categories: "weather",
count: 5,
});
expect(endpointMockState.calls).toHaveLength(2);
expect(new URL(endpointMockState.calls[0].url).searchParams.get("categories")).toBe("weather");
expect(new URL(endpointMockState.calls[1].url).searchParams.get("categories")).toBe("general");
expect(result).toMatchObject({
provider: "searxng",
count: 1,
results: [
expect.objectContaining({
url: "https://example.com/weather",
}),
],
});
});
it("does not retry empty general category searches", async () => {
endpointMockState.responses.push(
new Response(JSON.stringify({ results: [] }), { status: 200 }),
);
const result = await runSearxngSearch({
baseUrl: "http://127.0.0.1:8888",
query: "openclaw",
categories: "general",
count: 5,
});
expect(endpointMockState.calls).toHaveLength(1);
expect(result).toMatchObject({
provider: "searxng",
count: 0,
results: [],
});
});
it("detects category searches that should retry with general", () => {
expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("weather")).toBe(true);
expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("weather,news")).toBe(true);
expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("general")).toBe(false);
expect(__testing.shouldRetryEmptyCategorySearchWithGeneral("general,news")).toBe(false);
expect(__testing.shouldRetryEmptyCategorySearchWithGeneral(undefined)).toBe(false);
});
it("preserves img_src from image search results", () => {
expect(
__testing.parseSearxngResponseText(

View File

@@ -91,6 +91,17 @@ function buildSearxngSearchUrl(params: {
return url.toString();
}
function shouldRetryEmptyCategorySearchWithGeneral(categories: string | undefined): boolean {
if (!categories) {
return false;
}
const normalized = categories
.split(",")
.map((category) => category.trim().toLowerCase())
.filter((category) => category.length > 0);
return normalized.length > 0 && !normalized.includes("general");
}
async function searxngEndpointTargetsPrivateNetwork(
url: URL,
lookupFn?: LookupFn,
@@ -169,6 +180,54 @@ function parseSearxngResponseText(text: string, count: number): SearxngResult[]
return results;
}
async function fetchSearxngResults(params: {
baseUrl: string;
query: string;
categories?: string;
language?: string;
timeoutSeconds: number;
count: number;
endpointMode: SearxngEndpointMode;
}): Promise<SearxngResult[]> {
const url = buildSearxngSearchUrl({
baseUrl: params.baseUrl,
query: params.query,
categories: params.categories,
language: params.language,
});
const withEndpoint =
params.endpointMode === "selfHosted"
? withSelfHostedWebSearchEndpoint
: withTrustedWebSearchEndpoint;
return await withEndpoint(
{
url,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "GET",
headers: {
Accept: "application/json",
},
},
},
async (response) => {
if (!response.ok) {
const detail = (await readResponseText(response, { maxBytes: 64_000 })).text;
throw new Error(
`SearXNG search error (${response.status}): ${detail || response.statusText}`,
);
}
const body = await readResponseText(response, { maxBytes: MAX_RESPONSE_BYTES });
if (body.truncated) {
throw new Error("SearXNG response too large.");
}
return parseSearxngResponseText(body.text, params.count);
},
);
}
export async function runSearxngSearch(params: {
config?: OpenClawConfig;
query: string;
@@ -208,42 +267,27 @@ export async function runSearxngSearch(params: {
return { ...cached.value, cached: true };
}
const url = buildSearxngSearchUrl({
const startedAt = Date.now();
let results = await fetchSearxngResults({
baseUrl,
query: params.query,
categories,
language,
timeoutSeconds,
count,
endpointMode,
});
const startedAt = Date.now();
const withEndpoint =
endpointMode === "selfHosted" ? withSelfHostedWebSearchEndpoint : withTrustedWebSearchEndpoint;
const results = await withEndpoint(
{
url,
if (results.length === 0 && shouldRetryEmptyCategorySearchWithGeneral(categories)) {
results = await fetchSearxngResults({
baseUrl,
query: params.query,
categories: "general",
language,
timeoutSeconds,
init: {
method: "GET",
headers: {
Accept: "application/json",
},
},
},
async (response) => {
if (!response.ok) {
const detail = (await readResponseText(response, { maxBytes: 64_000 })).text;
throw new Error(
`SearXNG search error (${response.status}): ${detail || response.statusText}`,
);
}
const body = await readResponseText(response, { maxBytes: MAX_RESPONSE_BYTES });
if (body.truncated) {
throw new Error("SearXNG response too large.");
}
return parseSearxngResponseText(body.text, count);
},
);
count,
endpointMode,
});
}
const payload = {
query: params.query,
@@ -273,6 +317,7 @@ export const __testing = {
buildSearxngSearchUrl,
normalizeSearxngResult,
parseSearxngResponseText,
shouldRetryEmptyCategorySearchWithGeneral,
validateSearxngBaseUrl,
SEARXNG_SEARCH_CACHE,
};