fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors (#74175)

* fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors

When searchKeyword FTS5 MATCH fails (e.g. unicode61 tokenizer rejects
certain query patterns), the search now falls back to a LIKE-based query
instead of silently returning zero results. The four .catch(() => [])
sites in the search orchestrator now log warnings so failures are
visible in diagnostics.

Fixes #74036

* fix(memory): split LIKE fallback into per-token clauses and log MATCH errors
This commit is contained in:
Logan Ye
2026-04-29 19:16:32 +08:00
committed by GitHub
parent ef7c528c8a
commit 2aa6abddbe
3 changed files with 331 additions and 26 deletions

View File

@@ -179,6 +179,247 @@ describe("searchKeyword trigram fallback", () => {
});
});
describe("searchKeyword FTS MATCH fallback", () => {
const { DatabaseSync } = requireNodeSqlite();
function supportsFts(): boolean {
const db = new DatabaseSync(":memory:");
try {
const result = ensureMemoryIndexSchema({
db,
embeddingCacheTable: "embedding_cache",
cacheEnabled: false,
ftsTable: "chunks_fts",
ftsEnabled: true,
});
return result.ftsAvailable;
} finally {
db.close();
}
}
function createFtsDb() {
const db = new DatabaseSync(":memory:");
const result = ensureMemoryIndexSchema({
db,
embeddingCacheTable: "embedding_cache",
cacheEnabled: false,
ftsTable: "chunks_fts",
ftsEnabled: true,
});
if (!result.ftsAvailable) {
db.close();
throw new Error(`FTS5 unavailable: ${result.ftsError ?? "unknown error"}`);
}
return db;
}
const itWithFts = supportsFts() ? it : it.skip;
itWithFts("falls back to LIKE search when FTS MATCH throws", async () => {
const db = createFtsDb();
try {
const insert = db.prepare(
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
);
insert.run(
"The Agent framework handles API calls and cron jobs",
"1",
"doc.md",
"sessions",
"mock-embed",
1,
5,
);
insert.run(
"Deploy the database cluster on Hetzner",
"2",
"ops.md",
"sessions",
"mock-embed",
1,
3,
);
// Simulate a buildFtsQuery that produces a broken MATCH expression
const brokenBuildFtsQuery = () => "BROKEN_QUERY_SYNTAX <<<";
const results = await searchKeyword({
db,
ftsTable: "chunks_fts",
providerModel: "mock-embed",
query: "Agent",
ftsTokenizer: "unicode61",
limit: 10,
snippetMaxChars: 200,
sourceFilter: { sql: "", params: [] },
buildFtsQuery: brokenBuildFtsQuery,
bm25RankToScore: bm25RankToScore,
});
// LIKE fallback should find "Agent" in the first row
expect(results.length).toBeGreaterThan(0);
expect(results[0]?.id).toBe("1");
// Fallback results have textScore=1 (no BM25 ranking)
expect(results[0]?.textScore).toBe(1);
} finally {
db.close();
}
});
itWithFts("returns BM25-scored results when FTS MATCH succeeds", async () => {
const db = createFtsDb();
try {
const insert = db.prepare(
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
);
insert.run(
"The Transformer architecture powers modern LLMs",
"1",
"ml.md",
"memory",
"mock-embed",
1,
3,
);
const results = await searchKeyword({
db,
ftsTable: "chunks_fts",
providerModel: "mock-embed",
query: "Transformer",
ftsTokenizer: "unicode61",
limit: 10,
snippetMaxChars: 200,
sourceFilter: { sql: "", params: [] },
buildFtsQuery,
bm25RankToScore,
});
expect(results.length).toBe(1);
expect(results[0]?.id).toBe("1");
// BM25 score should be a real computed value, not the fallback default
expect(results[0]?.textScore).toBeGreaterThan(0);
expect(results[0]?.textScore).toBeLessThan(1);
} finally {
db.close();
}
});
itWithFts("applies source filter in LIKE fallback", async () => {
const db = createFtsDb();
try {
const insert = db.prepare(
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
);
insert.run("Agent handles API calls", "1", "doc.md", "sessions", "mock-embed", 1, 3);
insert.run("Agent design patterns", "2", "notes.md", "memory", "mock-embed", 1, 3);
const brokenBuildFtsQuery = () => "BROKEN <<<";
const results = await searchKeyword({
db,
ftsTable: "chunks_fts",
providerModel: "mock-embed",
query: "Agent",
ftsTokenizer: "unicode61",
limit: 10,
snippetMaxChars: 200,
sourceFilter: { sql: " AND source IN (?)", params: ["sessions"] },
buildFtsQuery: brokenBuildFtsQuery,
bm25RankToScore,
});
expect(results.length).toBe(1);
expect(results[0]?.id).toBe("1");
expect(results[0]?.source).toBe("sessions");
} finally {
db.close();
}
});
itWithFts("splits multi-word query into per-token LIKE clauses in fallback", async () => {
const db = createFtsDb();
try {
const insert = db.prepare(
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
);
// "Agent" and "cron" appear in this row but not adjacent
insert.run(
"The Agent framework handles API calls and cron jobs",
"1",
"doc.md",
"sessions",
"mock-embed",
1,
5,
);
// Only "Agent" appears in this row
insert.run(
"Agent design patterns for microservices",
"2",
"arch.md",
"sessions",
"mock-embed",
1,
3,
);
// A single-substring LIKE '%Agent cron%' would miss row 1 because
// the words are not adjacent. Per-token LIKE should find it.
const brokenBuildFtsQuery = () => "BROKEN <<<";
const results = await searchKeyword({
db,
ftsTable: "chunks_fts",
providerModel: "mock-embed",
query: "Agent cron",
ftsTokenizer: "unicode61",
limit: 10,
snippetMaxChars: 200,
sourceFilter: { sql: "", params: [] },
buildFtsQuery: brokenBuildFtsQuery,
bm25RankToScore: bm25RankToScore,
});
// Per-token fallback: both "Agent" AND "cron" must match
expect(results.length).toBe(1);
expect(results[0]?.id).toBe("1");
} finally {
db.close();
}
});
itWithFts("logs warning when MATCH fallback is used", async () => {
const db = createFtsDb();
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
try {
const insert = db.prepare(
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
);
insert.run("test content", "1", "doc.md", "sessions", "mock-embed", 1, 1);
await searchKeyword({
db,
ftsTable: "chunks_fts",
providerModel: "mock-embed",
query: "test",
ftsTokenizer: "unicode61",
limit: 10,
snippetMaxChars: 200,
sourceFilter: { sql: "", params: [] },
buildFtsQuery: () => "BROKEN <<<",
bm25RankToScore: bm25RankToScore,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining("FTS5 MATCH failed, falling back to LIKE"),
);
} finally {
warnSpy.mockRestore();
db.close();
}
});
});
describe("searchVector sqlite-vec KNN", () => {
const { DatabaseSync } = requireNodeSqlite();

View File

@@ -304,28 +304,8 @@ export async function searchKeyword(params: {
const modelParams = params.providerModel ? [params.providerModel] : [];
const substringClause = plan.substringTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join("");
const substringParams = plan.substringTerms.map((term) => `%${escapeLikePattern(term)}%`);
const whereClause = plan.matchQuery
? `${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}`
: `1=1${substringClause}${modelClause}${params.sourceFilter.sql}`;
const queryParams = [
...(plan.matchQuery ? [plan.matchQuery] : []),
...substringParams,
...modelParams,
...params.sourceFilter.params,
params.limit,
];
const rankExpression = plan.matchQuery ? `bm25(${params.ftsTable})` : "0";
const rows = params.db
.prepare(
`SELECT id, path, source, start_line, end_line, text,\n` +
` ${rankExpression} AS rank\n` +
` FROM ${params.ftsTable}\n` +
` WHERE ${whereClause}\n` +
` ORDER BY rank ASC\n` +
` LIMIT ?`,
)
.all(...queryParams) as Array<{
let rows: Array<{
id: string;
path: string;
source: SearchSource;
@@ -334,9 +314,75 @@ export async function searchKeyword(params: {
text: string;
rank: number;
}>;
let usedMatch = false;
if (plan.matchQuery) {
try {
rows = params.db
.prepare(
`SELECT id, path, source, start_line, end_line, text,\n` +
` bm25(${params.ftsTable}) AS rank\n` +
` FROM ${params.ftsTable}\n` +
` WHERE ${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}\n` +
` ORDER BY rank ASC\n` +
` LIMIT ?`,
)
.all(
plan.matchQuery,
...substringParams,
...modelParams,
...params.sourceFilter.params,
params.limit,
) as typeof rows;
usedMatch = true;
} catch (matchErr) {
// FTS5 MATCH can fail on certain token patterns depending on the
// Node.js sqlite runtime and tokenizer (e.g. unicode61 vs trigram).
// Log the root cause, then fall back to per-token LIKE-based substring
// search so results are still returned instead of being silently dropped.
console.warn(`memory search: FTS5 MATCH failed, falling back to LIKE: ${String(matchErr)}`);
const queryTokens =
params.query
.match(FTS_QUERY_TOKEN_RE)
?.map((t) => t.trim())
.filter(Boolean) ?? [];
const allTerms = [...new Set([...queryTokens, ...plan.substringTerms])];
const fallbackLikeClause = allTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join("");
const fallbackLikeParams = allTerms.map((term) => `%${escapeLikePattern(term)}%`);
rows = params.db
.prepare(
`SELECT id, path, source, start_line, end_line, text,\n` +
` 0 AS rank\n` +
` FROM ${params.ftsTable}\n` +
` WHERE 1=1${fallbackLikeClause}${modelClause}${params.sourceFilter.sql}\n` +
` LIMIT ?`,
)
.all(
...fallbackLikeParams,
...modelParams,
...params.sourceFilter.params,
params.limit,
) as typeof rows;
}
} else {
rows = params.db
.prepare(
`SELECT id, path, source, start_line, end_line, text,\n` +
` 0 AS rank\n` +
` FROM ${params.ftsTable}\n` +
` WHERE 1=1${substringClause}${modelClause}${params.sourceFilter.sql}\n` +
` LIMIT ?`,
)
.all(
...substringParams,
...modelParams,
...params.sourceFilter.params,
params.limit,
) as typeof rows;
}
return rows.map((row) => {
const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
const textScore = usedMatch ? params.bm25RankToScore(row.rank) : 1;
const score = params.boostFallbackRanking
? scoreFallbackKeywordResult({
query: params.query,

View File

@@ -380,7 +380,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
boostFallbackRanking: true,
},
sourceFilterList,
).catch(() => []);
).catch((err) => {
log.warn(`memory search: FTS keyword query failed: ${formatErrorMessage(err)}`);
return [];
});
const resultSets =
fullQueryResults.length > 0
? [fullQueryResults]
@@ -398,7 +401,12 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
candidates,
{ boostFallbackRanking: true },
sourceFilterList,
).catch(() => []),
).catch((err) => {
log.warn(
`memory search: FTS per-keyword query failed for "${term}": ${formatErrorMessage(err)}`,
);
return [];
}),
);
})(),
);
@@ -427,13 +435,23 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
// If FTS isn't available, hybrid mode cannot use keyword search; degrade to vector-only.
const keywordResults =
hybrid.enabled && this.fts.enabled && this.fts.available
? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch(() => [])
? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch(
(err) => {
log.warn(
`memory search: FTS hybrid keyword query failed: ${formatErrorMessage(err)}`,
);
return [];
},
)
: [];
const queryVec = await this.embedQueryWithTimeout(cleaned);
const hasVector = queryVec.some((v) => v !== 0);
const vectorResults = hasVector
? await this.searchVector(queryVec, candidates, sourceFilterList).catch(() => [])
? await this.searchVector(queryVec, candidates, sourceFilterList).catch((err) => {
log.warn(`memory search: vector query failed: ${formatErrorMessage(err)}`);
return [];
})
: [];
if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) {