improve memory fallback lexical ranking (#65395)

* improve memory fallback lexical ranking

* use neutral lexical fallback fixtures

* fix(memory-core): keep lexical boosts out of hybrid search

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
Anonymous Amit
2026-04-12 23:06:28 +05:30
committed by GitHub
parent 9259e593e6
commit 42590106ab
4 changed files with 118 additions and 3 deletions

View File

@@ -25,6 +25,7 @@ describe("searchKeyword trigram fallback", () => {
async function runSearch(params: {
rows: Array<{ id: string; path: string; text: string }>;
query: string;
boostFallbackRanking?: boolean;
}) {
const db = createTrigramDb();
try {
@@ -45,6 +46,7 @@ describe("searchKeyword trigram fallback", () => {
sourceFilter: { sql: "", params: [] },
buildFtsQuery,
bm25RankToScore,
boostFallbackRanking: params.boostFallbackRanking,
});
} finally {
db.close();
@@ -85,4 +87,64 @@ describe("searchKeyword trigram fallback", () => {
expect(results.map((row) => row.id)).toEqual(["match"]);
expect(results[0]?.textScore).toBeGreaterThan(0);
});
it("applies fallback lexical boosts without exceeding bounded scores", async () => {
const results = await runSearch({
rows: [
{
id: "strong",
path: "memory/project-memory-notes.md",
text: "Project memory notes covering workspace context and retrieval behavior.",
},
{
id: "weak",
path: "memory/notes.md",
text: "Project memory context.",
},
],
query: "project memory context",
boostFallbackRanking: true,
});
expect(results.map((row) => row.id)).toEqual(["weak", "strong"]);
const rawResults = await runSearch({
rows: [
{
id: "strong",
path: "memory/project-memory-notes.md",
text: "Project memory notes covering workspace context and retrieval behavior.",
},
{
id: "weak",
path: "memory/notes.md",
text: "Project memory context.",
},
],
query: "project memory context",
boostFallbackRanking: false,
});
const boostedById = new Map(results.map((row) => [row.id, row]));
const rawById = new Map(rawResults.map((row) => [row.id, row]));
expect(rawById.get("strong")?.textScore).toBeLessThan(rawById.get("weak")?.textScore ?? 0);
expect(boostedById.get("strong")?.score).toBeGreaterThan(boostedById.get("weak")?.score ?? 0);
expect(boostedById.get("strong")?.textScore).toBe(rawById.get("strong")?.textScore);
expect(boostedById.get("weak")?.textScore).toBe(rawById.get("weak")?.textScore);
expect(boostedById.get("strong")?.score).toBeLessThanOrEqual(1);
expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1);
});
it("does not overweight repeated query tokens in fallback scoring", async () => {
const unique = await runSearch({
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
query: "project memory context",
boostFallbackRanking: true,
});
const repeated = await runSearch({
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
query: "project project project memory context",
boostFallbackRanking: true,
});
expect(repeated[0]?.score).toBe(unique[0]?.score);
});
});

View File

@@ -22,6 +22,42 @@ export type SearchRowResult = {
source: SearchSource;
};
function normalizeSearchTokens(raw: string): string[] {
return (
raw
.match(FTS_QUERY_TOKEN_RE)
?.map((token) => token.trim().toLowerCase())
.filter(Boolean) ?? []
);
}
function scoreFallbackKeywordResult(params: {
query: string;
path: string;
text: string;
ftsScore: number;
}): number {
const queryTokens = [...new Set(normalizeSearchTokens(params.query))];
if (queryTokens.length === 0) {
return params.ftsScore;
}
const textTokens = normalizeSearchTokens(params.text);
const textTokenSet = new Set(textTokens);
const pathLower = params.path.toLowerCase();
const overlap = queryTokens.filter((token) => textTokenSet.has(token)).length;
const uniqueQueryOverlap = overlap / Math.max(new Set(queryTokens).size, 1);
const density = overlap / Math.max(textTokenSet.size, 1);
const pathBoost = queryTokens.reduce(
(score, token) => score + (pathLower.includes(token) ? 0.18 : 0),
0,
);
const textLengthBoost = Math.min(params.text.length / 160, 0.18);
const lexicalBoost = uniqueQueryOverlap * 0.45 + density * 0.2 + pathBoost + textLengthBoost;
return Math.min(1, params.ftsScore + lexicalBoost);
}
function escapeLikePattern(term: string): string {
return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_");
}
@@ -198,6 +234,7 @@ export async function searchKeyword(params: {
sourceFilter: { sql: string; params: SearchSource[] };
buildFtsQuery: (raw: string) => string | null;
bm25RankToScore: (rank: number) => number;
boostFallbackRanking?: boolean;
}): Promise<Array<SearchRowResult & { textScore: number }>> {
if (params.limit <= 0) {
return [];
@@ -249,12 +286,20 @@ export async function searchKeyword(params: {
return rows.map((row) => {
const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
const score = params.boostFallbackRanking
? scoreFallbackKeywordResult({
query: params.query,
path: row.path,
text: row.text,
ftsScore: textScore,
})
: textScore;
return {
id: row.id,
path: row.path,
startLine: row.start_line,
endLine: row.end_line,
score: textScore,
score,
textScore,
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
source: row.source,

View File

@@ -345,7 +345,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
return [];
}
const fullQueryResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
const fullQueryResults = await this.searchKeyword(cleaned, candidates, {
boostFallbackRanking: true,
}).catch(() => []);
const resultSets =
fullQueryResults.length > 0
? [fullQueryResults]
@@ -358,7 +360,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
});
const searchTerms = keywords.length > 0 ? keywords : [cleaned];
return searchTerms.map((term) =>
this.searchKeyword(term, candidates).catch(() => []),
this.searchKeyword(term, candidates, { boostFallbackRanking: true }).catch(
() => [],
),
);
})(),
);
@@ -495,6 +499,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
private async searchKeyword(
query: string,
limit: number,
options?: { boostFallbackRanking?: boolean },
): Promise<Array<MemorySearchResult & { id: string; textScore: number }>> {
if (!this.fts.enabled || !this.fts.available) {
return [];
@@ -513,6 +518,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
sourceFilter,
buildFtsQuery: (raw) => this.buildFtsQuery(raw),
bm25RankToScore,
boostFallbackRanking: options?.boostFallbackRanking,
});
return results.map((entry) => entry as MemorySearchResult & { id: string; textScore: number });
}