From 2aa6abddbeb5d7aedea33ddb0cfb328ca5ec7291 Mon Sep 17 00:00:00 2001 From: Logan Ye Date: Wed, 29 Apr 2026 19:16:32 +0800 Subject: [PATCH] fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors (#74175) * fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors When searchKeyword FTS5 MATCH fails (e.g. unicode61 tokenizer rejects certain query patterns), the search now falls back to a LIKE-based query instead of silently returning zero results. The four .catch(() => []) sites in the search orchestrator now log warnings so failures are visible in diagnostics. Fixes #74036 * fix(memory): split LIKE fallback into per-token clauses and log MATCH errors --- .../src/memory/manager-search.test.ts | 241 ++++++++++++++++++ .../memory-core/src/memory/manager-search.ts | 90 +++++-- extensions/memory-core/src/memory/manager.ts | 26 +- 3 files changed, 331 insertions(+), 26 deletions(-) diff --git a/extensions/memory-core/src/memory/manager-search.test.ts b/extensions/memory-core/src/memory/manager-search.test.ts index b915848f2ea..f7086e107cf 100644 --- a/extensions/memory-core/src/memory/manager-search.test.ts +++ b/extensions/memory-core/src/memory/manager-search.test.ts @@ -179,6 +179,247 @@ describe("searchKeyword trigram fallback", () => { }); }); +describe("searchKeyword FTS MATCH fallback", () => { + const { DatabaseSync } = requireNodeSqlite(); + + function supportsFts(): boolean { + const db = new DatabaseSync(":memory:"); + try { + const result = ensureMemoryIndexSchema({ + db, + embeddingCacheTable: "embedding_cache", + cacheEnabled: false, + ftsTable: "chunks_fts", + ftsEnabled: true, + }); + return result.ftsAvailable; + } finally { + db.close(); + } + } + + function createFtsDb() { + const db = new DatabaseSync(":memory:"); + const result = ensureMemoryIndexSchema({ + db, + embeddingCacheTable: "embedding_cache", + cacheEnabled: false, + ftsTable: "chunks_fts", + ftsEnabled: true, + }); + if (!result.ftsAvailable) { + db.close(); + throw new Error(`FTS5 unavailable: ${result.ftsError ?? "unknown error"}`); + } + return db; + } + + const itWithFts = supportsFts() ? it : it.skip; + + itWithFts("falls back to LIKE search when FTS MATCH throws", async () => { + const db = createFtsDb(); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + insert.run( + "The Agent framework handles API calls and cron jobs", + "1", + "doc.md", + "sessions", + "mock-embed", + 1, + 5, + ); + insert.run( + "Deploy the database cluster on Hetzner", + "2", + "ops.md", + "sessions", + "mock-embed", + 1, + 3, + ); + + // Simulate a buildFtsQuery that produces a broken MATCH expression + const brokenBuildFtsQuery = () => "BROKEN_QUERY_SYNTAX <<<"; + + const results = await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: "Agent", + ftsTokenizer: "unicode61", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: "", params: [] }, + buildFtsQuery: brokenBuildFtsQuery, + bm25RankToScore: bm25RankToScore, + }); + + // LIKE fallback should find "Agent" in the first row + expect(results.length).toBeGreaterThan(0); + expect(results[0]?.id).toBe("1"); + // Fallback results have textScore=1 (no BM25 ranking) + expect(results[0]?.textScore).toBe(1); + } finally { + db.close(); + } + }); + + itWithFts("returns BM25-scored results when FTS MATCH succeeds", async () => { + const db = createFtsDb(); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + insert.run( + "The Transformer architecture powers modern LLMs", + "1", + "ml.md", + "memory", + "mock-embed", + 1, + 3, + ); + + const results = await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: "Transformer", + ftsTokenizer: "unicode61", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: "", params: [] }, + buildFtsQuery, + bm25RankToScore, + }); + + expect(results.length).toBe(1); + expect(results[0]?.id).toBe("1"); + // BM25 score should be a real computed value, not the fallback default + expect(results[0]?.textScore).toBeGreaterThan(0); + expect(results[0]?.textScore).toBeLessThan(1); + } finally { + db.close(); + } + }); + + itWithFts("applies source filter in LIKE fallback", async () => { + const db = createFtsDb(); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + insert.run("Agent handles API calls", "1", "doc.md", "sessions", "mock-embed", 1, 3); + insert.run("Agent design patterns", "2", "notes.md", "memory", "mock-embed", 1, 3); + + const brokenBuildFtsQuery = () => "BROKEN <<<"; + const results = await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: "Agent", + ftsTokenizer: "unicode61", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: " AND source IN (?)", params: ["sessions"] }, + buildFtsQuery: brokenBuildFtsQuery, + bm25RankToScore, + }); + + expect(results.length).toBe(1); + expect(results[0]?.id).toBe("1"); + expect(results[0]?.source).toBe("sessions"); + } finally { + db.close(); + } + }); + + itWithFts("splits multi-word query into per-token LIKE clauses in fallback", async () => { + const db = createFtsDb(); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + // "Agent" and "cron" appear in this row but not adjacent + insert.run( + "The Agent framework handles API calls and cron jobs", + "1", + "doc.md", + "sessions", + "mock-embed", + 1, + 5, + ); + // Only "Agent" appears in this row + insert.run( + "Agent design patterns for microservices", + "2", + "arch.md", + "sessions", + "mock-embed", + 1, + 3, + ); + + // A single-substring LIKE '%Agent cron%' would miss row 1 because + // the words are not adjacent. Per-token LIKE should find it. + const brokenBuildFtsQuery = () => "BROKEN <<<"; + const results = await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: "Agent cron", + ftsTokenizer: "unicode61", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: "", params: [] }, + buildFtsQuery: brokenBuildFtsQuery, + bm25RankToScore: bm25RankToScore, + }); + + // Per-token fallback: both "Agent" AND "cron" must match + expect(results.length).toBe(1); + expect(results[0]?.id).toBe("1"); + } finally { + db.close(); + } + }); + + itWithFts("logs warning when MATCH fallback is used", async () => { + const db = createFtsDb(); + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + try { + const insert = db.prepare( + "INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)", + ); + insert.run("test content", "1", "doc.md", "sessions", "mock-embed", 1, 1); + + await searchKeyword({ + db, + ftsTable: "chunks_fts", + providerModel: "mock-embed", + query: "test", + ftsTokenizer: "unicode61", + limit: 10, + snippetMaxChars: 200, + sourceFilter: { sql: "", params: [] }, + buildFtsQuery: () => "BROKEN <<<", + bm25RankToScore: bm25RankToScore, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("FTS5 MATCH failed, falling back to LIKE"), + ); + } finally { + warnSpy.mockRestore(); + db.close(); + } + }); +}); + describe("searchVector sqlite-vec KNN", () => { const { DatabaseSync } = requireNodeSqlite(); diff --git a/extensions/memory-core/src/memory/manager-search.ts b/extensions/memory-core/src/memory/manager-search.ts index e30318a24e9..13faf9669d2 100644 --- a/extensions/memory-core/src/memory/manager-search.ts +++ b/extensions/memory-core/src/memory/manager-search.ts @@ -304,28 +304,8 @@ export async function searchKeyword(params: { const modelParams = params.providerModel ? [params.providerModel] : []; const substringClause = plan.substringTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join(""); const substringParams = plan.substringTerms.map((term) => `%${escapeLikePattern(term)}%`); - const whereClause = plan.matchQuery - ? `${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}` - : `1=1${substringClause}${modelClause}${params.sourceFilter.sql}`; - const queryParams = [ - ...(plan.matchQuery ? [plan.matchQuery] : []), - ...substringParams, - ...modelParams, - ...params.sourceFilter.params, - params.limit, - ]; - const rankExpression = plan.matchQuery ? `bm25(${params.ftsTable})` : "0"; - const rows = params.db - .prepare( - `SELECT id, path, source, start_line, end_line, text,\n` + - ` ${rankExpression} AS rank\n` + - ` FROM ${params.ftsTable}\n` + - ` WHERE ${whereClause}\n` + - ` ORDER BY rank ASC\n` + - ` LIMIT ?`, - ) - .all(...queryParams) as Array<{ + let rows: Array<{ id: string; path: string; source: SearchSource; @@ -334,9 +314,75 @@ export async function searchKeyword(params: { text: string; rank: number; }>; + let usedMatch = false; + + if (plan.matchQuery) { + try { + rows = params.db + .prepare( + `SELECT id, path, source, start_line, end_line, text,\n` + + ` bm25(${params.ftsTable}) AS rank\n` + + ` FROM ${params.ftsTable}\n` + + ` WHERE ${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}\n` + + ` ORDER BY rank ASC\n` + + ` LIMIT ?`, + ) + .all( + plan.matchQuery, + ...substringParams, + ...modelParams, + ...params.sourceFilter.params, + params.limit, + ) as typeof rows; + usedMatch = true; + } catch (matchErr) { + // FTS5 MATCH can fail on certain token patterns depending on the + // Node.js sqlite runtime and tokenizer (e.g. unicode61 vs trigram). + // Log the root cause, then fall back to per-token LIKE-based substring + // search so results are still returned instead of being silently dropped. + console.warn(`memory search: FTS5 MATCH failed, falling back to LIKE: ${String(matchErr)}`); + const queryTokens = + params.query + .match(FTS_QUERY_TOKEN_RE) + ?.map((t) => t.trim()) + .filter(Boolean) ?? []; + const allTerms = [...new Set([...queryTokens, ...plan.substringTerms])]; + const fallbackLikeClause = allTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join(""); + const fallbackLikeParams = allTerms.map((term) => `%${escapeLikePattern(term)}%`); + rows = params.db + .prepare( + `SELECT id, path, source, start_line, end_line, text,\n` + + ` 0 AS rank\n` + + ` FROM ${params.ftsTable}\n` + + ` WHERE 1=1${fallbackLikeClause}${modelClause}${params.sourceFilter.sql}\n` + + ` LIMIT ?`, + ) + .all( + ...fallbackLikeParams, + ...modelParams, + ...params.sourceFilter.params, + params.limit, + ) as typeof rows; + } + } else { + rows = params.db + .prepare( + `SELECT id, path, source, start_line, end_line, text,\n` + + ` 0 AS rank\n` + + ` FROM ${params.ftsTable}\n` + + ` WHERE 1=1${substringClause}${modelClause}${params.sourceFilter.sql}\n` + + ` LIMIT ?`, + ) + .all( + ...substringParams, + ...modelParams, + ...params.sourceFilter.params, + params.limit, + ) as typeof rows; + } return rows.map((row) => { - const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1; + const textScore = usedMatch ? params.bm25RankToScore(row.rank) : 1; const score = params.boostFallbackRanking ? scoreFallbackKeywordResult({ query: params.query, diff --git a/extensions/memory-core/src/memory/manager.ts b/extensions/memory-core/src/memory/manager.ts index 33aca9ab9bf..ef554becdda 100644 --- a/extensions/memory-core/src/memory/manager.ts +++ b/extensions/memory-core/src/memory/manager.ts @@ -380,7 +380,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem boostFallbackRanking: true, }, sourceFilterList, - ).catch(() => []); + ).catch((err) => { + log.warn(`memory search: FTS keyword query failed: ${formatErrorMessage(err)}`); + return []; + }); const resultSets = fullQueryResults.length > 0 ? [fullQueryResults] @@ -398,7 +401,12 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem candidates, { boostFallbackRanking: true }, sourceFilterList, - ).catch(() => []), + ).catch((err) => { + log.warn( + `memory search: FTS per-keyword query failed for "${term}": ${formatErrorMessage(err)}`, + ); + return []; + }), ); })(), ); @@ -427,13 +435,23 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem // If FTS isn't available, hybrid mode cannot use keyword search; degrade to vector-only. const keywordResults = hybrid.enabled && this.fts.enabled && this.fts.available - ? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch(() => []) + ? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch( + (err) => { + log.warn( + `memory search: FTS hybrid keyword query failed: ${formatErrorMessage(err)}`, + ); + return []; + }, + ) : []; const queryVec = await this.embedQueryWithTimeout(cleaned); const hasVector = queryVec.some((v) => v !== 0); const vectorResults = hasVector - ? await this.searchVector(queryVec, candidates, sourceFilterList).catch(() => []) + ? await this.searchVector(queryVec, candidates, sourceFilterList).catch((err) => { + log.warn(`memory search: vector query failed: ${formatErrorMessage(err)}`); + return []; + }) : []; if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) {