mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:40:44 +00:00
fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors (#74175)
* fix(memory): add LIKE fallback when FTS5 MATCH throws and log silent search errors When searchKeyword FTS5 MATCH fails (e.g. unicode61 tokenizer rejects certain query patterns), the search now falls back to a LIKE-based query instead of silently returning zero results. The four .catch(() => []) sites in the search orchestrator now log warnings so failures are visible in diagnostics. Fixes #74036 * fix(memory): split LIKE fallback into per-token clauses and log MATCH errors
This commit is contained in:
@@ -179,6 +179,247 @@ describe("searchKeyword trigram fallback", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("searchKeyword FTS MATCH fallback", () => {
|
||||
const { DatabaseSync } = requireNodeSqlite();
|
||||
|
||||
function supportsFts(): boolean {
|
||||
const db = new DatabaseSync(":memory:");
|
||||
try {
|
||||
const result = ensureMemoryIndexSchema({
|
||||
db,
|
||||
embeddingCacheTable: "embedding_cache",
|
||||
cacheEnabled: false,
|
||||
ftsTable: "chunks_fts",
|
||||
ftsEnabled: true,
|
||||
});
|
||||
return result.ftsAvailable;
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
|
||||
function createFtsDb() {
|
||||
const db = new DatabaseSync(":memory:");
|
||||
const result = ensureMemoryIndexSchema({
|
||||
db,
|
||||
embeddingCacheTable: "embedding_cache",
|
||||
cacheEnabled: false,
|
||||
ftsTable: "chunks_fts",
|
||||
ftsEnabled: true,
|
||||
});
|
||||
if (!result.ftsAvailable) {
|
||||
db.close();
|
||||
throw new Error(`FTS5 unavailable: ${result.ftsError ?? "unknown error"}`);
|
||||
}
|
||||
return db;
|
||||
}
|
||||
|
||||
const itWithFts = supportsFts() ? it : it.skip;
|
||||
|
||||
itWithFts("falls back to LIKE search when FTS MATCH throws", async () => {
|
||||
const db = createFtsDb();
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
insert.run(
|
||||
"The Agent framework handles API calls and cron jobs",
|
||||
"1",
|
||||
"doc.md",
|
||||
"sessions",
|
||||
"mock-embed",
|
||||
1,
|
||||
5,
|
||||
);
|
||||
insert.run(
|
||||
"Deploy the database cluster on Hetzner",
|
||||
"2",
|
||||
"ops.md",
|
||||
"sessions",
|
||||
"mock-embed",
|
||||
1,
|
||||
3,
|
||||
);
|
||||
|
||||
// Simulate a buildFtsQuery that produces a broken MATCH expression
|
||||
const brokenBuildFtsQuery = () => "BROKEN_QUERY_SYNTAX <<<";
|
||||
|
||||
const results = await searchKeyword({
|
||||
db,
|
||||
ftsTable: "chunks_fts",
|
||||
providerModel: "mock-embed",
|
||||
query: "Agent",
|
||||
ftsTokenizer: "unicode61",
|
||||
limit: 10,
|
||||
snippetMaxChars: 200,
|
||||
sourceFilter: { sql: "", params: [] },
|
||||
buildFtsQuery: brokenBuildFtsQuery,
|
||||
bm25RankToScore: bm25RankToScore,
|
||||
});
|
||||
|
||||
// LIKE fallback should find "Agent" in the first row
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0]?.id).toBe("1");
|
||||
// Fallback results have textScore=1 (no BM25 ranking)
|
||||
expect(results[0]?.textScore).toBe(1);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
itWithFts("returns BM25-scored results when FTS MATCH succeeds", async () => {
|
||||
const db = createFtsDb();
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
insert.run(
|
||||
"The Transformer architecture powers modern LLMs",
|
||||
"1",
|
||||
"ml.md",
|
||||
"memory",
|
||||
"mock-embed",
|
||||
1,
|
||||
3,
|
||||
);
|
||||
|
||||
const results = await searchKeyword({
|
||||
db,
|
||||
ftsTable: "chunks_fts",
|
||||
providerModel: "mock-embed",
|
||||
query: "Transformer",
|
||||
ftsTokenizer: "unicode61",
|
||||
limit: 10,
|
||||
snippetMaxChars: 200,
|
||||
sourceFilter: { sql: "", params: [] },
|
||||
buildFtsQuery,
|
||||
bm25RankToScore,
|
||||
});
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0]?.id).toBe("1");
|
||||
// BM25 score should be a real computed value, not the fallback default
|
||||
expect(results[0]?.textScore).toBeGreaterThan(0);
|
||||
expect(results[0]?.textScore).toBeLessThan(1);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
itWithFts("applies source filter in LIKE fallback", async () => {
|
||||
const db = createFtsDb();
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
insert.run("Agent handles API calls", "1", "doc.md", "sessions", "mock-embed", 1, 3);
|
||||
insert.run("Agent design patterns", "2", "notes.md", "memory", "mock-embed", 1, 3);
|
||||
|
||||
const brokenBuildFtsQuery = () => "BROKEN <<<";
|
||||
const results = await searchKeyword({
|
||||
db,
|
||||
ftsTable: "chunks_fts",
|
||||
providerModel: "mock-embed",
|
||||
query: "Agent",
|
||||
ftsTokenizer: "unicode61",
|
||||
limit: 10,
|
||||
snippetMaxChars: 200,
|
||||
sourceFilter: { sql: " AND source IN (?)", params: ["sessions"] },
|
||||
buildFtsQuery: brokenBuildFtsQuery,
|
||||
bm25RankToScore,
|
||||
});
|
||||
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0]?.id).toBe("1");
|
||||
expect(results[0]?.source).toBe("sessions");
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
itWithFts("splits multi-word query into per-token LIKE clauses in fallback", async () => {
|
||||
const db = createFtsDb();
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
// "Agent" and "cron" appear in this row but not adjacent
|
||||
insert.run(
|
||||
"The Agent framework handles API calls and cron jobs",
|
||||
"1",
|
||||
"doc.md",
|
||||
"sessions",
|
||||
"mock-embed",
|
||||
1,
|
||||
5,
|
||||
);
|
||||
// Only "Agent" appears in this row
|
||||
insert.run(
|
||||
"Agent design patterns for microservices",
|
||||
"2",
|
||||
"arch.md",
|
||||
"sessions",
|
||||
"mock-embed",
|
||||
1,
|
||||
3,
|
||||
);
|
||||
|
||||
// A single-substring LIKE '%Agent cron%' would miss row 1 because
|
||||
// the words are not adjacent. Per-token LIKE should find it.
|
||||
const brokenBuildFtsQuery = () => "BROKEN <<<";
|
||||
const results = await searchKeyword({
|
||||
db,
|
||||
ftsTable: "chunks_fts",
|
||||
providerModel: "mock-embed",
|
||||
query: "Agent cron",
|
||||
ftsTokenizer: "unicode61",
|
||||
limit: 10,
|
||||
snippetMaxChars: 200,
|
||||
sourceFilter: { sql: "", params: [] },
|
||||
buildFtsQuery: brokenBuildFtsQuery,
|
||||
bm25RankToScore: bm25RankToScore,
|
||||
});
|
||||
|
||||
// Per-token fallback: both "Agent" AND "cron" must match
|
||||
expect(results.length).toBe(1);
|
||||
expect(results[0]?.id).toBe("1");
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
|
||||
itWithFts("logs warning when MATCH fallback is used", async () => {
|
||||
const db = createFtsDb();
|
||||
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
||||
try {
|
||||
const insert = db.prepare(
|
||||
"INSERT INTO chunks_fts (text, id, path, source, model, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
);
|
||||
insert.run("test content", "1", "doc.md", "sessions", "mock-embed", 1, 1);
|
||||
|
||||
await searchKeyword({
|
||||
db,
|
||||
ftsTable: "chunks_fts",
|
||||
providerModel: "mock-embed",
|
||||
query: "test",
|
||||
ftsTokenizer: "unicode61",
|
||||
limit: 10,
|
||||
snippetMaxChars: 200,
|
||||
sourceFilter: { sql: "", params: [] },
|
||||
buildFtsQuery: () => "BROKEN <<<",
|
||||
bm25RankToScore: bm25RankToScore,
|
||||
});
|
||||
|
||||
expect(warnSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining("FTS5 MATCH failed, falling back to LIKE"),
|
||||
);
|
||||
} finally {
|
||||
warnSpy.mockRestore();
|
||||
db.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("searchVector sqlite-vec KNN", () => {
|
||||
const { DatabaseSync } = requireNodeSqlite();
|
||||
|
||||
|
||||
@@ -304,28 +304,8 @@ export async function searchKeyword(params: {
|
||||
const modelParams = params.providerModel ? [params.providerModel] : [];
|
||||
const substringClause = plan.substringTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join("");
|
||||
const substringParams = plan.substringTerms.map((term) => `%${escapeLikePattern(term)}%`);
|
||||
const whereClause = plan.matchQuery
|
||||
? `${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}`
|
||||
: `1=1${substringClause}${modelClause}${params.sourceFilter.sql}`;
|
||||
const queryParams = [
|
||||
...(plan.matchQuery ? [plan.matchQuery] : []),
|
||||
...substringParams,
|
||||
...modelParams,
|
||||
...params.sourceFilter.params,
|
||||
params.limit,
|
||||
];
|
||||
const rankExpression = plan.matchQuery ? `bm25(${params.ftsTable})` : "0";
|
||||
|
||||
const rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` ${rankExpression} AS rank\n` +
|
||||
` FROM ${params.ftsTable}\n` +
|
||||
` WHERE ${whereClause}\n` +
|
||||
` ORDER BY rank ASC\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(...queryParams) as Array<{
|
||||
let rows: Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
source: SearchSource;
|
||||
@@ -334,9 +314,75 @@ export async function searchKeyword(params: {
|
||||
text: string;
|
||||
rank: number;
|
||||
}>;
|
||||
let usedMatch = false;
|
||||
|
||||
if (plan.matchQuery) {
|
||||
try {
|
||||
rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` bm25(${params.ftsTable}) AS rank\n` +
|
||||
` FROM ${params.ftsTable}\n` +
|
||||
` WHERE ${params.ftsTable} MATCH ?${substringClause}${modelClause}${params.sourceFilter.sql}\n` +
|
||||
` ORDER BY rank ASC\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(
|
||||
plan.matchQuery,
|
||||
...substringParams,
|
||||
...modelParams,
|
||||
...params.sourceFilter.params,
|
||||
params.limit,
|
||||
) as typeof rows;
|
||||
usedMatch = true;
|
||||
} catch (matchErr) {
|
||||
// FTS5 MATCH can fail on certain token patterns depending on the
|
||||
// Node.js sqlite runtime and tokenizer (e.g. unicode61 vs trigram).
|
||||
// Log the root cause, then fall back to per-token LIKE-based substring
|
||||
// search so results are still returned instead of being silently dropped.
|
||||
console.warn(`memory search: FTS5 MATCH failed, falling back to LIKE: ${String(matchErr)}`);
|
||||
const queryTokens =
|
||||
params.query
|
||||
.match(FTS_QUERY_TOKEN_RE)
|
||||
?.map((t) => t.trim())
|
||||
.filter(Boolean) ?? [];
|
||||
const allTerms = [...new Set([...queryTokens, ...plan.substringTerms])];
|
||||
const fallbackLikeClause = allTerms.map(() => " AND text LIKE ? ESCAPE '\\'").join("");
|
||||
const fallbackLikeParams = allTerms.map((term) => `%${escapeLikePattern(term)}%`);
|
||||
rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` 0 AS rank\n` +
|
||||
` FROM ${params.ftsTable}\n` +
|
||||
` WHERE 1=1${fallbackLikeClause}${modelClause}${params.sourceFilter.sql}\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(
|
||||
...fallbackLikeParams,
|
||||
...modelParams,
|
||||
...params.sourceFilter.params,
|
||||
params.limit,
|
||||
) as typeof rows;
|
||||
}
|
||||
} else {
|
||||
rows = params.db
|
||||
.prepare(
|
||||
`SELECT id, path, source, start_line, end_line, text,\n` +
|
||||
` 0 AS rank\n` +
|
||||
` FROM ${params.ftsTable}\n` +
|
||||
` WHERE 1=1${substringClause}${modelClause}${params.sourceFilter.sql}\n` +
|
||||
` LIMIT ?`,
|
||||
)
|
||||
.all(
|
||||
...substringParams,
|
||||
...modelParams,
|
||||
...params.sourceFilter.params,
|
||||
params.limit,
|
||||
) as typeof rows;
|
||||
}
|
||||
|
||||
return rows.map((row) => {
|
||||
const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
|
||||
const textScore = usedMatch ? params.bm25RankToScore(row.rank) : 1;
|
||||
const score = params.boostFallbackRanking
|
||||
? scoreFallbackKeywordResult({
|
||||
query: params.query,
|
||||
|
||||
@@ -380,7 +380,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
boostFallbackRanking: true,
|
||||
},
|
||||
sourceFilterList,
|
||||
).catch(() => []);
|
||||
).catch((err) => {
|
||||
log.warn(`memory search: FTS keyword query failed: ${formatErrorMessage(err)}`);
|
||||
return [];
|
||||
});
|
||||
const resultSets =
|
||||
fullQueryResults.length > 0
|
||||
? [fullQueryResults]
|
||||
@@ -398,7 +401,12 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
candidates,
|
||||
{ boostFallbackRanking: true },
|
||||
sourceFilterList,
|
||||
).catch(() => []),
|
||||
).catch((err) => {
|
||||
log.warn(
|
||||
`memory search: FTS per-keyword query failed for "${term}": ${formatErrorMessage(err)}`,
|
||||
);
|
||||
return [];
|
||||
}),
|
||||
);
|
||||
})(),
|
||||
);
|
||||
@@ -427,13 +435,23 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
// If FTS isn't available, hybrid mode cannot use keyword search; degrade to vector-only.
|
||||
const keywordResults =
|
||||
hybrid.enabled && this.fts.enabled && this.fts.available
|
||||
? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch(() => [])
|
||||
? await this.searchKeyword(cleaned, candidates, undefined, sourceFilterList).catch(
|
||||
(err) => {
|
||||
log.warn(
|
||||
`memory search: FTS hybrid keyword query failed: ${formatErrorMessage(err)}`,
|
||||
);
|
||||
return [];
|
||||
},
|
||||
)
|
||||
: [];
|
||||
|
||||
const queryVec = await this.embedQueryWithTimeout(cleaned);
|
||||
const hasVector = queryVec.some((v) => v !== 0);
|
||||
const vectorResults = hasVector
|
||||
? await this.searchVector(queryVec, candidates, sourceFilterList).catch(() => [])
|
||||
? await this.searchVector(queryVec, candidates, sourceFilterList).catch((err) => {
|
||||
log.warn(`memory search: vector query failed: ${formatErrorMessage(err)}`);
|
||||
return [];
|
||||
})
|
||||
: [];
|
||||
|
||||
if (!hybrid.enabled || !this.fts.enabled || !this.fts.available) {
|
||||
|
||||
Reference in New Issue
Block a user