From 9660cb705ba372a86a11a9252acaa9f0bc476a4c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 03:58:53 +0100 Subject: [PATCH] fix(memory): preserve KNN filter limits (#69680) (thanks @aalekh-sarvam) --- CHANGELOG.md | 1 + .../src/memory/manager-search.test.ts | 73 ++++++++++++++- .../memory-core/src/memory/manager-search.ts | 89 +++++++++++++------ 3 files changed, 137 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca1de726e2e..d700facb534 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Agents/BTW: route `/btw` side questions through provider stream registration with the session workspace, so Ollama provider URL construction and workspace-scoped hooks apply correctly. Fixes #68336. (#70413) Thanks @suboss87. +- Memory search: use sqlite-vec KNN for vector recall while preserving full post-filter result limits in multi-model indexes. Fixes #69666. (#69680) Thanks @aalekh-sarvam. - Codex harness: route Codex-tagged MCP tool approval elicitations through OpenClaw plugin approvals, including current empty-schema app-server requests, while leaving generic user-input prompts fail-closed. (#68807) Thanks @kesslerio. - WhatsApp/outbound: hold an in-memory active-delivery claim while a live outbound send is in flight, so a concurrent reconnect drain no longer re-drives the same pending queue entry and duplicates cron sends 7-12x after the 30-minute inbound-silence watchdog fires mid-delivery. Crash-replay of fresh queue entries left behind by a dead process is preserved because the claim is intentionally process-local. Fixes #70386. (#70428) Thanks @neeravmakwana. - Providers/SDK retry: cap long `Retry-After` sleeps in Stainless-based Anthropic/OpenAI model SDKs so 60s+ retry windows surface immediately for OpenClaw failover instead of blocking the run. (#68474) Thanks @jetd1. diff --git a/extensions/memory-core/src/memory/manager-search.test.ts b/extensions/memory-core/src/memory/manager-search.test.ts index 284854bef68..24b12eeb4d3 100644 --- a/extensions/memory-core/src/memory/manager-search.test.ts +++ b/extensions/memory-core/src/memory/manager-search.test.ts @@ -1,10 +1,14 @@ import { ensureMemoryIndexSchema, + loadSqliteVecExtension, requireNodeSqlite, } from "openclaw/plugin-sdk/memory-core-host-engine-storage"; import { describe, expect, it } from "vitest"; import { bm25RankToScore, buildFtsQuery } from "./hybrid.js"; -import { searchKeyword } from "./manager-search.js"; +import { searchKeyword, searchVector } from "./manager-search.js"; + +const vectorToBlob = (embedding: number[]): Buffer => + Buffer.from(new Float32Array(embedding).buffer); describe("searchKeyword trigram fallback", () => { const { DatabaseSync } = requireNodeSqlite(); @@ -174,3 +178,70 @@ describe("searchKeyword trigram fallback", () => { expect(repeated[0]?.score).toBe(unique[0]?.score); }); }); + +describe("searchVector sqlite-vec KNN", () => { + const { DatabaseSync } = requireNodeSqlite(); + + it("fills the requested limit after model filters prune nearest KNN candidates", async () => { + const db = new DatabaseSync(":memory:", { allowExtension: true }); + try { + const loaded = await loadSqliteVecExtension({ db }); + expect(loaded.ok, loaded.error).toBe(true); + ensureMemoryIndexSchema({ + db, + embeddingCacheTable: "embedding_cache", + cacheEnabled: false, + ftsTable: "chunks_fts", + ftsEnabled: false, + }); + db.exec(` + CREATE VIRTUAL TABLE chunks_vec USING vec0( + id TEXT PRIMARY KEY, + embedding FLOAT[2] + ); + `); + + const insertChunk = db.prepare( + "INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ); + const insertVector = db.prepare("INSERT INTO chunks_vec (id, embedding) VALUES (?, ?)"); + const addChunk = (params: { id: string; model: string; vector: [number, number] }) => { + insertChunk.run( + params.id, + `memory/${params.id}.md`, + "memory", + 1, + 1, + params.id, + params.model, + `chunk ${params.id}`, + JSON.stringify(params.vector), + 1, + ); + insertVector.run(params.id, vectorToBlob(params.vector)); + }; + + for (let i = 0; i < 20; i += 1) { + addChunk({ id: `other-${i}`, model: "other-model", vector: [1, i / 1000] }); + } + addChunk({ id: "target-1", model: "target-model", vector: [0.5, 0.5] }); + addChunk({ id: "target-2", model: "target-model", vector: [0.4, 0.6] }); + + const results = await searchVector({ + db, + vectorTable: "chunks_vec", + providerModel: "target-model", + queryVec: [1, 0], + limit: 2, + snippetMaxChars: 200, + ensureVectorReady: async () => true, + sourceFilterVec: { sql: "", params: [] }, + sourceFilterChunks: { sql: "", params: [] }, + }); + + expect(results.map((row) => row.id)).toEqual(["target-1", "target-2"]); + } finally { + db.close(); + } + }); +}); diff --git a/extensions/memory-core/src/memory/manager-search.ts b/extensions/memory-core/src/memory/manager-search.ts index d974fac78f9..e3e47f69d14 100644 --- a/extensions/memory-core/src/memory/manager-search.ts +++ b/extensions/memory-core/src/memory/manager-search.ts @@ -9,6 +9,7 @@ const vectorToBlob = (embedding: number[]): Buffer => Buffer.from(new Float32Array(embedding).buffer); const FTS_QUERY_TOKEN_RE = /[\p{L}\p{N}_]+/gu; const SHORT_CJK_TRIGRAM_RE = /[\u3040-\u30ff\u3400-\u9fff\uac00-\ud7af\u3131-\u3163]/u; +const VECTOR_KNN_OVERSAMPLE_FACTOR = 8; export type SearchSource = string; @@ -70,6 +71,16 @@ function buildMatchQueryFromTerms(terms: string[]): string | null { return quoted.join(" AND "); } +function readCount(row: { count?: number | bigint } | undefined): number { + if (typeof row?.count === "bigint") { + return Number(row.count); + } + if (typeof row?.count === "number") { + return row.count; + } + return 0; +} + function planKeywordSearch(params: { query: string; ftsTokenizer?: "unicode61" | "trigram"; @@ -130,31 +141,59 @@ export async function searchVector(params: { // sqlite-vec's default L2 distance, so v.distance cannot be used directly // for scoring.) const qBlob = vectorToBlob(params.queryVec); - const rows = params.db - .prepare( - `SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` + - ` c.source,\n` + - ` vec_distance_cosine(v.embedding, ?) AS dist\n` + - ` FROM ${params.vectorTable} v\n` + - ` JOIN chunks c ON c.id = v.id\n` + - ` WHERE v.embedding MATCH ? AND k = ? AND c.model = ?${params.sourceFilterVec.sql}\n` + - ` ORDER BY dist ASC`, - ) - .all( - qBlob, - qBlob, - params.limit, - params.providerModel, - ...params.sourceFilterVec.params, - ) as Array<{ - id: string; - path: string; - start_line: number; - end_line: number; - text: string; - source: SearchSource; - dist: number; - }>; + const runVectorQuery = (candidateLimit: number) => + params.db + .prepare( + `SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` + + ` c.source,\n` + + ` vec_distance_cosine(v.embedding, ?) AS dist\n` + + ` FROM ${params.vectorTable} v\n` + + ` JOIN chunks c ON c.id = v.id\n` + + ` WHERE v.embedding MATCH ? AND k = ? AND c.model = ?${params.sourceFilterVec.sql}\n` + + ` ORDER BY dist ASC\n` + + ` LIMIT ?`, + ) + .all( + qBlob, + qBlob, + candidateLimit, + params.providerModel, + ...params.sourceFilterVec.params, + params.limit, + ) as Array<{ + id: string; + path: string; + start_line: number; + end_line: number; + text: string; + source: SearchSource; + dist: number; + }>; + + const candidateLimit = params.limit * VECTOR_KNN_OVERSAMPLE_FACTOR; + let rows = runVectorQuery(candidateLimit); + if (rows.length < params.limit) { + const matchingChunkCount = readCount( + params.db + .prepare( + `SELECT COUNT(*) AS count FROM chunks c WHERE c.model = ?${params.sourceFilterVec.sql}`, + ) + .get(params.providerModel, ...params.sourceFilterVec.params) as + | { count?: number | bigint } + | undefined, + ); + if (matchingChunkCount > rows.length) { + const vectorCount = readCount( + params.db.prepare(`SELECT COUNT(*) AS count FROM ${params.vectorTable}`).get() as + | { count?: number | bigint } + | undefined, + ); + if (vectorCount > candidateLimit) { + rows = runVectorQuery(vectorCount); + } + } + } + return rows.map((row) => ({ id: row.id, path: row.path,