From 3f0039e2ea1a92a9cff05f7aab05de127bac455c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 29 Apr 2026 16:52:28 +0100 Subject: [PATCH] fix(memory-wiki): reduce people wiki search noise --- AGENTS.md | 2 + CHANGELOG.md | 1 + extensions/memory-wiki/src/compile.test.ts | 40 +++++++ extensions/memory-wiki/src/compile.ts | 41 +++++++- extensions/memory-wiki/src/query.test.ts | 117 +++++++++++++++++++++ extensions/memory-wiki/src/query.ts | 95 ++++++++++++----- 6 files changed, 267 insertions(+), 29 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6bd25b9a29e..21b401980ed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -182,6 +182,8 @@ Telegraph style. Root rules only. Read scoped `AGENTS.md` before subtree work. ## Ops / Footguns - Remote install docs: `docs/install/{exe-dev,fly,hetzner}.md`. Parallels smoke: `$openclaw-parallels-smoke`; Discord roundtrip: `parallels-discord-roundtrip`. +- Memory wiki: keep prompt digest tiny. The prompt should only say the wiki exists, prefer `wiki_search` / `wiki_get`, start from `reports/maintainer-agent-directory.md` for people routing, and verify contact data before use. +- People wiki provenance: generated identity, social, contact, and "fun detail" notes need explicit source class/confidence (`maintainer-whois`, Discrawl sample/stat, GitHub profile, maintainer repo file). Do not promote inferred details to facts. - Rebrand/migration/config warnings: run `openclaw doctor`. - Never edit `node_modules`. - Local-only `.agents` ignores: `.git/info/exclude`, not repo `.gitignore`. diff --git a/CHANGELOG.md b/CHANGELOG.md index f29f8dcff19..ce3dedf7dcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Build/Gateway: route restart, shutdown, respawn, diagnostics, command-queue cleanup, and runtime cleanup through one stable gateway lifecycle runtime entry so rebuilt packages do not strand long-running gateways on stale hashed chunks. Carries forward #73964. Thanks @pashpashpash. +- Memory/wiki: keep broad shared-source and generated related-link blocks from turning every page into a search hit, cap noisy backlinks, support all-term searches such as people-routing queries, and prefer readable page body snippets over generated metadata. Thanks @vincentkoc. - Agents/errors: suppress malformed streaming tool-call JSON fragments before they reach chat surfaces while preserving provider request-validation diagnostics. Fixes #59076; keeps #59080 as duplicate coverage. (#59118) Thanks @singleGanghood. - CLI/models: restore provider-filtered `models list --all --provider ` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd. - CLI/models: move the OpenAI listable catalog into the plugin manifest so `models list --all --provider openai` uses the manifest fast path instead of loading provider runtime normalization hooks. Thanks @shakkernerd. diff --git a/extensions/memory-wiki/src/compile.test.ts b/extensions/memory-wiki/src/compile.test.ts index 0ebbc6526f9..df1fe4e1d6f 100644 --- a/extensions/memory-wiki/src/compile.test.ts +++ b/extensions/memory-wiki/src/compile.test.ts @@ -170,6 +170,46 @@ describe("compileMemoryWikiVault", () => { ); }); + it("does not relate every page through a broad shared source", async () => { + const { rootDir, config } = await createVault({ + rootDir: nextCaseRoot(), + initialize: true, + }); + + await fs.writeFile( + path.join(rootDir, "sources", "alpha.md"), + renderWikiMarkdown({ + frontmatter: { pageType: "source", id: "source.alpha", title: "Alpha" }, + body: "# Alpha\n", + }), + "utf8", + ); + + for (let index = 0; index < 30; index += 1) { + await fs.writeFile( + path.join(rootDir, "entities", `entity-${index}.md`), + renderWikiMarkdown({ + frontmatter: { + pageType: "entity", + id: `entity.${index}`, + title: `Entity ${index}`, + sourceIds: ["source.alpha"], + }, + body: `# Entity ${index}\n`, + }), + "utf8", + ); + } + + await compileMemoryWikiVault(config); + + const firstEntity = await fs.readFile(path.join(rootDir, "entities", "entity-0.md"), "utf8"); + const sourcePage = await fs.readFile(path.join(rootDir, "sources", "alpha.md"), "utf8"); + expect(firstEntity).toContain("[Alpha](sources/alpha.md)"); + expect(firstEntity).not.toContain("### Related Pages"); + expect(sourcePage).not.toContain("### Referenced By"); + }); + it("writes dashboard report pages when createDashboards is enabled", async () => { const { rootDir, config } = await createVault({ rootDir: nextCaseRoot(), diff --git a/extensions/memory-wiki/src/compile.ts b/extensions/memory-wiki/src/compile.ts index 2cc483a0fb3..97948006d74 100644 --- a/extensions/memory-wiki/src/compile.ts +++ b/extensions/memory-wiki/src/compile.ts @@ -44,6 +44,8 @@ const COMPILE_PAGE_GROUPS: Array<{ kind: WikiPageKind; dir: string; heading: str ]; const AGENT_DIGEST_PATH = ".openclaw-wiki/cache/agent-digest.json"; const CLAIMS_DIGEST_PATH = ".openclaw-wiki/cache/claims.jsonl"; +const MAX_RELATED_PAGES_PER_SECTION = 12; +const MAX_SHARED_SOURCE_FANOUT = 24; type DashboardPageDefinition = { id: string; @@ -395,12 +397,33 @@ function renderWikiPageLinks(params: { .join("\n"); } +function sharedSourceFanout( + page: WikiPageSummary, + allPages: WikiPageSummary[], +): Map { + const sourceIds = new Set(page.sourceIds); + const counts = new Map(); + for (const candidate of allPages) { + if (candidate.relativePath === page.relativePath) { + continue; + } + for (const sourceId of candidate.sourceIds) { + if (!sourceIds.has(sourceId)) { + continue; + } + counts.set(sourceId, (counts.get(sourceId) ?? 0) + 1); + } + } + return counts; +} + function buildRelatedBlockBody(params: { config: ResolvedMemoryWikiConfig; page: WikiPageSummary; allPages: WikiPageSummary[]; }): string { const candidatePages = params.allPages.filter((candidate) => candidate.kind !== "report"); + const sourceFanout = sharedSourceFanout(params.page, candidatePages); const pagesById = new Map( candidatePages.flatMap((candidate) => candidate.id ? [[candidate.id, candidate] as const] : [], @@ -426,6 +449,10 @@ function buildRelatedBlockBody(params: { ); }), ); + const backlinkPages = + backlinks.length <= MAX_SHARED_SOURCE_FANOUT + ? backlinks.slice(0, MAX_RELATED_PAGES_PER_SECTION) + : []; const relatedPages = uniquePages( candidatePages.filter((candidate) => { if (candidate.relativePath === params.page.relativePath) { @@ -434,15 +461,19 @@ function buildRelatedBlockBody(params: { if (sourcePages.some((sourcePage) => sourcePage.relativePath === candidate.relativePath)) { return false; } - if (backlinks.some((backlink) => backlink.relativePath === candidate.relativePath)) { + if (backlinkPages.some((backlink) => backlink.relativePath === candidate.relativePath)) { return false; } if (params.page.sourceIds.length === 0 || candidate.sourceIds.length === 0) { return false; } - return params.page.sourceIds.some((sourceId) => candidate.sourceIds.includes(sourceId)); + return params.page.sourceIds.some( + (sourceId) => + candidate.sourceIds.includes(sourceId) && + (sourceFanout.get(sourceId) ?? 0) <= MAX_SHARED_SOURCE_FANOUT, + ); }), - ); + ).slice(0, MAX_RELATED_PAGES_PER_SECTION); const sections: string[] = []; if (sourcePages.length > 0) { @@ -451,10 +482,10 @@ function buildRelatedBlockBody(params: { renderWikiPageLinks({ config: params.config, pages: sourcePages }), ); } - if (backlinks.length > 0) { + if (backlinkPages.length > 0) { sections.push( "### Referenced By", - renderWikiPageLinks({ config: params.config, pages: backlinks }), + renderWikiPageLinks({ config: params.config, pages: backlinkPages }), ); } if (relatedPages.length > 0) { diff --git a/extensions/memory-wiki/src/query.test.ts b/extensions/memory-wiki/src/query.test.ts index 8aed7069c02..49d3e66da16 100644 --- a/extensions/memory-wiki/src/query.test.ts +++ b/extensions/memory-wiki/src/query.test.ts @@ -117,6 +117,123 @@ describe("searchMemoryWiki", () => { expect(getActiveMemorySearchManagerMock).not.toHaveBeenCalled(); }); + it("does not match generated related blocks during wiki search", async () => { + const { rootDir, config } = await createQueryVault({ + initialize: true, + }); + await fs.writeFile( + path.join(rootDir, "entities", "alpha.md"), + renderWikiMarkdown({ + frontmatter: { + pageType: "entity", + id: "entity.alpha", + title: "Alpha", + sourceIds: ["source.alpha"], + }, + body: [ + "# Alpha", + "", + "Alpha body.", + "", + "## Related", + "", + "### Related Pages", + "- [Needle Person](entities/needle-person.md)", + "", + "", + ].join("\n"), + }), + "utf8", + ); + await fs.writeFile( + path.join(rootDir, "entities", "needle-person.md"), + renderWikiMarkdown({ + frontmatter: { + pageType: "entity", + id: "entity.needle-person", + title: "Needle Person", + sourceIds: ["source.alpha"], + }, + body: "# Needle Person\n\nNeedle body.\n", + }), + "utf8", + ); + + const results = await searchMemoryWiki({ + config, + query: "Needle Person", + maxResults: 10, + }); + + expect(results.map((result) => result.path)).toEqual(["entities/needle-person.md"]); + }); + + it("matches pages when all query terms appear without an exact phrase", async () => { + const { rootDir, config } = await createQueryVault({ + initialize: true, + }); + await fs.writeFile( + path.join(rootDir, "entities", "brad.md"), + renderWikiMarkdown({ + frontmatter: { + pageType: "entity", + id: "entity.brad", + title: "Maintainer: Brad Groux", + sourceIds: ["source.maintainers"], + }, + body: [ + "# Maintainer: Brad Groux", + "", + "## Agent Card", + "- Maintainer lane: CEO; Microsoft-facing OpenClaw maintainer", + "", + "## AI Notes", + "- Main sample theme is Microsoft ecosystem adoption: Teams, M365, Azure, Foundry, tenants, and pilots.", + "", + ].join("\n"), + }), + "utf8", + ); + + const results = await searchMemoryWiki({ + config, + query: "Brad Microsoft Teams", + maxResults: 10, + }); + + expect(results.map((result) => result.path)).toEqual(["entities/brad.md"]); + expect(results[0]?.snippet).toContain("Teams"); + }); + + it("uses body text instead of frontmatter for fallback snippets", async () => { + const { rootDir, config } = await createQueryVault({ + initialize: true, + }); + await fs.writeFile( + path.join(rootDir, "entities", "alias.md"), + renderWikiMarkdown({ + frontmatter: { + pageType: "entity", + id: "entity.alias", + title: "Alias Carrier", + aliases: ["frontmatter-only-alias"], + sourceIds: ["source.maintainers"], + }, + body: "# Alias Carrier\n\nReadable agent card summary.\n", + }), + "utf8", + ); + + const results = await searchMemoryWiki({ + config, + query: "frontmatter-only-alias", + maxResults: 10, + }); + + expect(results.map((result) => result.path)).toEqual(["entities/alias.md"]); + expect(results[0]?.snippet).toBe("# Alias Carrier"); + }); + it("finds wiki pages by structured claim text and surfaces the claim as the snippet", async () => { const { rootDir, config } = await createQueryVault({ initialize: true, diff --git a/extensions/memory-wiki/src/query.ts b/extensions/memory-wiki/src/query.ts index a86f77acf64..5febc64f1fe 100644 --- a/extensions/memory-wiki/src/query.ts +++ b/extensions/memory-wiki/src/query.ts @@ -18,6 +18,9 @@ import { initializeMemoryWikiVault } from "./vault.js"; const QUERY_DIRS = ["entities", "concepts", "sources", "syntheses", "reports"] as const; const AGENT_DIGEST_PATH = ".openclaw-wiki/cache/agent-digest.json"; const CLAIMS_DIGEST_PATH = ".openclaw-wiki/cache/claims.jsonl"; +const RELATED_BLOCK_PATTERN = + /[\s\S]*?/g; +const MARKDOWN_FRONTMATTER_PATTERN = /^\s*---\r?\n[\s\S]*?\r?\n---\r?\n?/; type QueryDigestPage = { id?: string; @@ -180,20 +183,22 @@ async function readQueryDigestBundle(rootDir: string): Promise - normalizeLowercaseStringOrEmpty(line).includes(queryLower) && line.trim().length > 0, - ); - return ( - matchingLine?.trim() || - raw - .split(/\r?\n/) - .find((line) => line.trim().length > 0) - ?.trim() || - "" - ); + const queryTokens = buildQueryTokens(queryLower); + const searchable = buildSnippetSearchText(raw); + const lines = searchable.split(/\r?\n/).filter((line) => line.trim().length > 0); + const matchingLine = + lines.find((line) => + lineMatchesQuery(normalizeLowercaseStringOrEmpty(line), queryLower, queryTokens), + ) ?? + lines + .map((line) => ({ + line, + hits: queryTokens.filter((token) => normalizeLowercaseStringOrEmpty(line).includes(token)) + .length, + })) + .toSorted((left, right) => right.hits - left.hits) + .find((candidate) => candidate.hits > 0)?.line; + return matchingLine?.trim() || lines.find((line) => line.trim() !== "---")?.trim() || ""; } function buildPageSearchText(page: QueryableWikiPage): string { @@ -211,6 +216,32 @@ function buildPageSearchText(page: QueryableWikiPage): string { .join("\n"); } +function stripGeneratedRelatedBlock(raw: string): string { + return raw.replace(RELATED_BLOCK_PATTERN, ""); +} + +function buildSnippetSearchText(raw: string): string { + return stripGeneratedRelatedBlock(raw).replace(MARKDOWN_FRONTMATTER_PATTERN, ""); +} + +function buildQueryTokens(queryLower: string): string[] { + return [ + ...new Set( + queryLower + .split(/[^a-z0-9@._-]+/i) + .map((token) => token.trim()) + .filter((token) => token.length >= 2), + ), + ]; +} + +function lineMatchesQuery(lineLower: string, queryLower: string, queryTokens: string[]): boolean { + if (queryLower.length > 0 && lineLower.includes(queryLower)) { + return true; + } + return queryTokens.length > 0 && queryTokens.every((token) => lineLower.includes(token)); +} + function buildDigestPageSearchText(page: QueryDigestPage, claims: QueryDigestClaim[]): string { return [ page.title, @@ -407,20 +438,22 @@ function buildPageSnippet(page: QueryableWikiPage, query: string): string { function scorePage(page: QueryableWikiPage, query: string): number { const queryLower = normalizeLowercaseStringOrEmpty(query); + const queryTokens = buildQueryTokens(queryLower); const titleLower = normalizeLowercaseStringOrEmpty(page.title); const pathLower = normalizeLowercaseStringOrEmpty(page.relativePath); const idLower = normalizeLowercaseStringOrEmpty(page.id); const metadataLower = normalizeLowercaseStringOrEmpty(buildPageSearchText(page)); - const rawLower = normalizeLowercaseStringOrEmpty(page.raw); - if ( - !( - titleLower.includes(queryLower) || - pathLower.includes(queryLower) || - idLower.includes(queryLower) || - metadataLower.includes(queryLower) || - rawLower.includes(queryLower) - ) - ) { + const rawLower = normalizeLowercaseStringOrEmpty(stripGeneratedRelatedBlock(page.raw)); + const combinedLower = [titleLower, pathLower, idLower, metadataLower, rawLower].join("\n"); + const hasExactMatch = + titleLower.includes(queryLower) || + pathLower.includes(queryLower) || + idLower.includes(queryLower) || + metadataLower.includes(queryLower) || + rawLower.includes(queryLower); + const hasAllTokens = + queryTokens.length > 0 && queryTokens.every((token) => combinedLower.includes(token)); + if (!hasExactMatch && !hasAllTokens) { return 0; } @@ -440,6 +473,20 @@ function scorePage(page: QueryableWikiPage, query: string): number { } const bodyOccurrences = rawLower.split(queryLower).length - 1; score += Math.min(10, bodyOccurrences); + for (const token of queryTokens) { + if (titleLower.includes(token)) { + score += 8; + } + if (pathLower.includes(token) || idLower.includes(token)) { + score += 6; + } + if (metadataLower.includes(token)) { + score += 4; + } + if (rawLower.includes(token)) { + score += 1; + } + } return score; }