fix(memory-wiki): reduce people wiki search noise

This commit is contained in:
Peter Steinberger
2026-04-29 16:52:28 +01:00
parent c99d680714
commit 3f0039e2ea
6 changed files with 267 additions and 29 deletions

View File

@@ -182,6 +182,8 @@ Telegraph style. Root rules only. Read scoped `AGENTS.md` before subtree work.
## Ops / Footguns
- Remote install docs: `docs/install/{exe-dev,fly,hetzner}.md`. Parallels smoke: `$openclaw-parallels-smoke`; Discord roundtrip: `parallels-discord-roundtrip`.
- Memory wiki: keep prompt digest tiny. The prompt should only say the wiki exists, prefer `wiki_search` / `wiki_get`, start from `reports/maintainer-agent-directory.md` for people routing, and verify contact data before use.
- People wiki provenance: generated identity, social, contact, and "fun detail" notes need explicit source class/confidence (`maintainer-whois`, Discrawl sample/stat, GitHub profile, maintainer repo file). Do not promote inferred details to facts.
- Rebrand/migration/config warnings: run `openclaw doctor`.
- Never edit `node_modules`.
- Local-only `.agents` ignores: `.git/info/exclude`, not repo `.gitignore`.

View File

@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Build/Gateway: route restart, shutdown, respawn, diagnostics, command-queue cleanup, and runtime cleanup through one stable gateway lifecycle runtime entry so rebuilt packages do not strand long-running gateways on stale hashed chunks. Carries forward #73964. Thanks @pashpashpash.
- Memory/wiki: keep broad shared-source and generated related-link blocks from turning every page into a search hit, cap noisy backlinks, support all-term searches such as people-routing queries, and prefer readable page body snippets over generated metadata. Thanks @vincentkoc.
- Agents/errors: suppress malformed streaming tool-call JSON fragments before they reach chat surfaces while preserving provider request-validation diagnostics. Fixes #59076; keeps #59080 as duplicate coverage. (#59118) Thanks @singleGanghood.
- CLI/models: restore provider-filtered `models list --all --provider <id>` rows for providers without manifest/static catalog coverage, including Anthropic and Amazon Bedrock, while keeping the compatibility fallback off expensive availability and resolver paths. Thanks @shakkernerd.
- CLI/models: move the OpenAI listable catalog into the plugin manifest so `models list --all --provider openai` uses the manifest fast path instead of loading provider runtime normalization hooks. Thanks @shakkernerd.

View File

@@ -170,6 +170,46 @@ describe("compileMemoryWikiVault", () => {
);
});
it("does not relate every page through a broad shared source", async () => {
const { rootDir, config } = await createVault({
rootDir: nextCaseRoot(),
initialize: true,
});
await fs.writeFile(
path.join(rootDir, "sources", "alpha.md"),
renderWikiMarkdown({
frontmatter: { pageType: "source", id: "source.alpha", title: "Alpha" },
body: "# Alpha\n",
}),
"utf8",
);
for (let index = 0; index < 30; index += 1) {
await fs.writeFile(
path.join(rootDir, "entities", `entity-${index}.md`),
renderWikiMarkdown({
frontmatter: {
pageType: "entity",
id: `entity.${index}`,
title: `Entity ${index}`,
sourceIds: ["source.alpha"],
},
body: `# Entity ${index}\n`,
}),
"utf8",
);
}
await compileMemoryWikiVault(config);
const firstEntity = await fs.readFile(path.join(rootDir, "entities", "entity-0.md"), "utf8");
const sourcePage = await fs.readFile(path.join(rootDir, "sources", "alpha.md"), "utf8");
expect(firstEntity).toContain("[Alpha](sources/alpha.md)");
expect(firstEntity).not.toContain("### Related Pages");
expect(sourcePage).not.toContain("### Referenced By");
});
it("writes dashboard report pages when createDashboards is enabled", async () => {
const { rootDir, config } = await createVault({
rootDir: nextCaseRoot(),

View File

@@ -44,6 +44,8 @@ const COMPILE_PAGE_GROUPS: Array<{ kind: WikiPageKind; dir: string; heading: str
];
const AGENT_DIGEST_PATH = ".openclaw-wiki/cache/agent-digest.json";
const CLAIMS_DIGEST_PATH = ".openclaw-wiki/cache/claims.jsonl";
const MAX_RELATED_PAGES_PER_SECTION = 12;
const MAX_SHARED_SOURCE_FANOUT = 24;
type DashboardPageDefinition = {
id: string;
@@ -395,12 +397,33 @@ function renderWikiPageLinks(params: {
.join("\n");
}
function sharedSourceFanout(
page: WikiPageSummary,
allPages: WikiPageSummary[],
): Map<string, number> {
const sourceIds = new Set(page.sourceIds);
const counts = new Map<string, number>();
for (const candidate of allPages) {
if (candidate.relativePath === page.relativePath) {
continue;
}
for (const sourceId of candidate.sourceIds) {
if (!sourceIds.has(sourceId)) {
continue;
}
counts.set(sourceId, (counts.get(sourceId) ?? 0) + 1);
}
}
return counts;
}
function buildRelatedBlockBody(params: {
config: ResolvedMemoryWikiConfig;
page: WikiPageSummary;
allPages: WikiPageSummary[];
}): string {
const candidatePages = params.allPages.filter((candidate) => candidate.kind !== "report");
const sourceFanout = sharedSourceFanout(params.page, candidatePages);
const pagesById = new Map(
candidatePages.flatMap((candidate) =>
candidate.id ? [[candidate.id, candidate] as const] : [],
@@ -426,6 +449,10 @@ function buildRelatedBlockBody(params: {
);
}),
);
const backlinkPages =
backlinks.length <= MAX_SHARED_SOURCE_FANOUT
? backlinks.slice(0, MAX_RELATED_PAGES_PER_SECTION)
: [];
const relatedPages = uniquePages(
candidatePages.filter((candidate) => {
if (candidate.relativePath === params.page.relativePath) {
@@ -434,15 +461,19 @@ function buildRelatedBlockBody(params: {
if (sourcePages.some((sourcePage) => sourcePage.relativePath === candidate.relativePath)) {
return false;
}
if (backlinks.some((backlink) => backlink.relativePath === candidate.relativePath)) {
if (backlinkPages.some((backlink) => backlink.relativePath === candidate.relativePath)) {
return false;
}
if (params.page.sourceIds.length === 0 || candidate.sourceIds.length === 0) {
return false;
}
return params.page.sourceIds.some((sourceId) => candidate.sourceIds.includes(sourceId));
return params.page.sourceIds.some(
(sourceId) =>
candidate.sourceIds.includes(sourceId) &&
(sourceFanout.get(sourceId) ?? 0) <= MAX_SHARED_SOURCE_FANOUT,
);
}),
);
).slice(0, MAX_RELATED_PAGES_PER_SECTION);
const sections: string[] = [];
if (sourcePages.length > 0) {
@@ -451,10 +482,10 @@ function buildRelatedBlockBody(params: {
renderWikiPageLinks({ config: params.config, pages: sourcePages }),
);
}
if (backlinks.length > 0) {
if (backlinkPages.length > 0) {
sections.push(
"### Referenced By",
renderWikiPageLinks({ config: params.config, pages: backlinks }),
renderWikiPageLinks({ config: params.config, pages: backlinkPages }),
);
}
if (relatedPages.length > 0) {

View File

@@ -117,6 +117,123 @@ describe("searchMemoryWiki", () => {
expect(getActiveMemorySearchManagerMock).not.toHaveBeenCalled();
});
it("does not match generated related blocks during wiki search", async () => {
const { rootDir, config } = await createQueryVault({
initialize: true,
});
await fs.writeFile(
path.join(rootDir, "entities", "alpha.md"),
renderWikiMarkdown({
frontmatter: {
pageType: "entity",
id: "entity.alpha",
title: "Alpha",
sourceIds: ["source.alpha"],
},
body: [
"# Alpha",
"",
"Alpha body.",
"",
"## Related",
"<!-- openclaw:wiki:related:start -->",
"### Related Pages",
"- [Needle Person](entities/needle-person.md)",
"<!-- openclaw:wiki:related:end -->",
"",
].join("\n"),
}),
"utf8",
);
await fs.writeFile(
path.join(rootDir, "entities", "needle-person.md"),
renderWikiMarkdown({
frontmatter: {
pageType: "entity",
id: "entity.needle-person",
title: "Needle Person",
sourceIds: ["source.alpha"],
},
body: "# Needle Person\n\nNeedle body.\n",
}),
"utf8",
);
const results = await searchMemoryWiki({
config,
query: "Needle Person",
maxResults: 10,
});
expect(results.map((result) => result.path)).toEqual(["entities/needle-person.md"]);
});
it("matches pages when all query terms appear without an exact phrase", async () => {
const { rootDir, config } = await createQueryVault({
initialize: true,
});
await fs.writeFile(
path.join(rootDir, "entities", "brad.md"),
renderWikiMarkdown({
frontmatter: {
pageType: "entity",
id: "entity.brad",
title: "Maintainer: Brad Groux",
sourceIds: ["source.maintainers"],
},
body: [
"# Maintainer: Brad Groux",
"",
"## Agent Card",
"- Maintainer lane: CEO; Microsoft-facing OpenClaw maintainer",
"",
"## AI Notes",
"- Main sample theme is Microsoft ecosystem adoption: Teams, M365, Azure, Foundry, tenants, and pilots.",
"",
].join("\n"),
}),
"utf8",
);
const results = await searchMemoryWiki({
config,
query: "Brad Microsoft Teams",
maxResults: 10,
});
expect(results.map((result) => result.path)).toEqual(["entities/brad.md"]);
expect(results[0]?.snippet).toContain("Teams");
});
it("uses body text instead of frontmatter for fallback snippets", async () => {
const { rootDir, config } = await createQueryVault({
initialize: true,
});
await fs.writeFile(
path.join(rootDir, "entities", "alias.md"),
renderWikiMarkdown({
frontmatter: {
pageType: "entity",
id: "entity.alias",
title: "Alias Carrier",
aliases: ["frontmatter-only-alias"],
sourceIds: ["source.maintainers"],
},
body: "# Alias Carrier\n\nReadable agent card summary.\n",
}),
"utf8",
);
const results = await searchMemoryWiki({
config,
query: "frontmatter-only-alias",
maxResults: 10,
});
expect(results.map((result) => result.path)).toEqual(["entities/alias.md"]);
expect(results[0]?.snippet).toBe("# Alias Carrier");
});
it("finds wiki pages by structured claim text and surfaces the claim as the snippet", async () => {
const { rootDir, config } = await createQueryVault({
initialize: true,

View File

@@ -18,6 +18,9 @@ import { initializeMemoryWikiVault } from "./vault.js";
const QUERY_DIRS = ["entities", "concepts", "sources", "syntheses", "reports"] as const;
const AGENT_DIGEST_PATH = ".openclaw-wiki/cache/agent-digest.json";
const CLAIMS_DIGEST_PATH = ".openclaw-wiki/cache/claims.jsonl";
const RELATED_BLOCK_PATTERN =
/<!-- openclaw:wiki:related:start -->[\s\S]*?<!-- openclaw:wiki:related:end -->/g;
const MARKDOWN_FRONTMATTER_PATTERN = /^\s*---\r?\n[\s\S]*?\r?\n---\r?\n?/;
type QueryDigestPage = {
id?: string;
@@ -180,20 +183,22 @@ async function readQueryDigestBundle(rootDir: string): Promise<QueryDigestBundle
function buildSnippet(raw: string, query: string): string {
const queryLower = normalizeLowercaseStringOrEmpty(query);
const matchingLine = raw
.split(/\r?\n/)
.find(
(line) =>
normalizeLowercaseStringOrEmpty(line).includes(queryLower) && line.trim().length > 0,
);
return (
matchingLine?.trim() ||
raw
.split(/\r?\n/)
.find((line) => line.trim().length > 0)
?.trim() ||
""
);
const queryTokens = buildQueryTokens(queryLower);
const searchable = buildSnippetSearchText(raw);
const lines = searchable.split(/\r?\n/).filter((line) => line.trim().length > 0);
const matchingLine =
lines.find((line) =>
lineMatchesQuery(normalizeLowercaseStringOrEmpty(line), queryLower, queryTokens),
) ??
lines
.map((line) => ({
line,
hits: queryTokens.filter((token) => normalizeLowercaseStringOrEmpty(line).includes(token))
.length,
}))
.toSorted((left, right) => right.hits - left.hits)
.find((candidate) => candidate.hits > 0)?.line;
return matchingLine?.trim() || lines.find((line) => line.trim() !== "---")?.trim() || "";
}
function buildPageSearchText(page: QueryableWikiPage): string {
@@ -211,6 +216,32 @@ function buildPageSearchText(page: QueryableWikiPage): string {
.join("\n");
}
function stripGeneratedRelatedBlock(raw: string): string {
return raw.replace(RELATED_BLOCK_PATTERN, "");
}
function buildSnippetSearchText(raw: string): string {
return stripGeneratedRelatedBlock(raw).replace(MARKDOWN_FRONTMATTER_PATTERN, "");
}
function buildQueryTokens(queryLower: string): string[] {
return [
...new Set(
queryLower
.split(/[^a-z0-9@._-]+/i)
.map((token) => token.trim())
.filter((token) => token.length >= 2),
),
];
}
function lineMatchesQuery(lineLower: string, queryLower: string, queryTokens: string[]): boolean {
if (queryLower.length > 0 && lineLower.includes(queryLower)) {
return true;
}
return queryTokens.length > 0 && queryTokens.every((token) => lineLower.includes(token));
}
function buildDigestPageSearchText(page: QueryDigestPage, claims: QueryDigestClaim[]): string {
return [
page.title,
@@ -407,20 +438,22 @@ function buildPageSnippet(page: QueryableWikiPage, query: string): string {
function scorePage(page: QueryableWikiPage, query: string): number {
const queryLower = normalizeLowercaseStringOrEmpty(query);
const queryTokens = buildQueryTokens(queryLower);
const titleLower = normalizeLowercaseStringOrEmpty(page.title);
const pathLower = normalizeLowercaseStringOrEmpty(page.relativePath);
const idLower = normalizeLowercaseStringOrEmpty(page.id);
const metadataLower = normalizeLowercaseStringOrEmpty(buildPageSearchText(page));
const rawLower = normalizeLowercaseStringOrEmpty(page.raw);
if (
!(
titleLower.includes(queryLower) ||
pathLower.includes(queryLower) ||
idLower.includes(queryLower) ||
metadataLower.includes(queryLower) ||
rawLower.includes(queryLower)
)
) {
const rawLower = normalizeLowercaseStringOrEmpty(stripGeneratedRelatedBlock(page.raw));
const combinedLower = [titleLower, pathLower, idLower, metadataLower, rawLower].join("\n");
const hasExactMatch =
titleLower.includes(queryLower) ||
pathLower.includes(queryLower) ||
idLower.includes(queryLower) ||
metadataLower.includes(queryLower) ||
rawLower.includes(queryLower);
const hasAllTokens =
queryTokens.length > 0 && queryTokens.every((token) => combinedLower.includes(token));
if (!hasExactMatch && !hasAllTokens) {
return 0;
}
@@ -440,6 +473,20 @@ function scorePage(page: QueryableWikiPage, query: string): number {
}
const bodyOccurrences = rawLower.split(queryLower).length - 1;
score += Math.min(10, bodyOccurrences);
for (const token of queryTokens) {
if (titleLower.includes(token)) {
score += 8;
}
if (pathLower.includes(token) || idLower.includes(token)) {
score += 6;
}
if (metadataLower.includes(token)) {
score += 4;
}
if (rawLower.includes(token)) {
score += 1;
}
}
return score;
}