fix(memory): preserve KNN filter limits (#69680) (thanks @aalekh-sarvam)

2026-05-06 06:30:42 +00:00 · 2026-04-23 03:58:53 +01:00
parent 7cd051d7f7
commit 9660cb705b
3 changed files with 137 additions and 26 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Agents/BTW: route `/btw` side questions through provider stream registration with the session workspace, so Ollama provider URL construction and workspace-scoped hooks apply correctly. Fixes #68336. (#70413) Thanks @suboss87.
+- Memory search: use sqlite-vec KNN for vector recall while preserving full post-filter result limits in multi-model indexes. Fixes #69666. (#69680) Thanks @aalekh-sarvam.
 - Codex harness: route Codex-tagged MCP tool approval elicitations through OpenClaw plugin approvals, including current empty-schema app-server requests, while leaving generic user-input prompts fail-closed. (#68807) Thanks @kesslerio.
 - WhatsApp/outbound: hold an in-memory active-delivery claim while a live outbound send is in flight, so a concurrent reconnect drain no longer re-drives the same pending queue entry and duplicates cron sends 7-12x after the 30-minute inbound-silence watchdog fires mid-delivery. Crash-replay of fresh queue entries left behind by a dead process is preserved because the claim is intentionally process-local. Fixes #70386. (#70428) Thanks @neeravmakwana.
 - Providers/SDK retry: cap long `Retry-After` sleeps in Stainless-based Anthropic/OpenAI model SDKs so 60s+ retry windows surface immediately for OpenClaw failover instead of blocking the run. (#68474) Thanks @jetd1.
--- a/extensions/memory-core/src/memory/manager-search.test.ts
+++ b/extensions/memory-core/src/memory/manager-search.test.ts
@@ -1,10 +1,14 @@
 import {
  ensureMemoryIndexSchema,
+  loadSqliteVecExtension,
  requireNodeSqlite,
 } from "openclaw/plugin-sdk/memory-core-host-engine-storage";
 import { describe, expect, it } from "vitest";
 import { bm25RankToScore, buildFtsQuery } from "./hybrid.js";
-import { searchKeyword } from "./manager-search.js";
+import { searchKeyword, searchVector } from "./manager-search.js";
+
+const vectorToBlob = (embedding: number[]): Buffer =>
+  Buffer.from(new Float32Array(embedding).buffer);

 describe("searchKeyword trigram fallback", () => {
  const { DatabaseSync } = requireNodeSqlite();
@@ -174,3 +178,70 @@ describe("searchKeyword trigram fallback", () => {
    expect(repeated[0]?.score).toBe(unique[0]?.score);
  });
 });
+
+describe("searchVector sqlite-vec KNN", () => {
+  const { DatabaseSync } = requireNodeSqlite();
+
+  it("fills the requested limit after model filters prune nearest KNN candidates", async () => {
+    const db = new DatabaseSync(":memory:", { allowExtension: true });
+    try {
+      const loaded = await loadSqliteVecExtension({ db });
+      expect(loaded.ok, loaded.error).toBe(true);
+      ensureMemoryIndexSchema({
+        db,
+        embeddingCacheTable: "embedding_cache",
+        cacheEnabled: false,
+        ftsTable: "chunks_fts",
+        ftsEnabled: false,
+      });
+      db.exec(`
+        CREATE VIRTUAL TABLE chunks_vec USING vec0(
+          id TEXT PRIMARY KEY,
+          embedding FLOAT[2]
+        );
+      `);
+
+      const insertChunk = db.prepare(
+        "INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+      );
+      const insertVector = db.prepare("INSERT INTO chunks_vec (id, embedding) VALUES (?, ?)");
+      const addChunk = (params: { id: string; model: string; vector: [number, number] }) => {
+        insertChunk.run(
+          params.id,
+          `memory/${params.id}.md`,
+          "memory",
+          1,
+          1,
+          params.id,
+          params.model,
+          `chunk ${params.id}`,
+          JSON.stringify(params.vector),
+          1,
+        );
+        insertVector.run(params.id, vectorToBlob(params.vector));
+      };
+
+      for (let i = 0; i < 20; i += 1) {
+        addChunk({ id: `other-${i}`, model: "other-model", vector: [1, i / 1000] });
+      }
+      addChunk({ id: "target-1", model: "target-model", vector: [0.5, 0.5] });
+      addChunk({ id: "target-2", model: "target-model", vector: [0.4, 0.6] });
+
+      const results = await searchVector({
+        db,
+        vectorTable: "chunks_vec",
+        providerModel: "target-model",
+        queryVec: [1, 0],
+        limit: 2,
+        snippetMaxChars: 200,
+        ensureVectorReady: async () => true,
+        sourceFilterVec: { sql: "", params: [] },
+        sourceFilterChunks: { sql: "", params: [] },
+      });
+
+      expect(results.map((row) => row.id)).toEqual(["target-1", "target-2"]);
+    } finally {
+      db.close();
+    }
+  });
+});
--- a/extensions/memory-core/src/memory/manager-search.ts
+++ b/extensions/memory-core/src/memory/manager-search.ts
@@ -9,6 +9,7 @@ const vectorToBlob = (embedding: number[]): Buffer =>
  Buffer.from(new Float32Array(embedding).buffer);
 const FTS_QUERY_TOKEN_RE = /[\p{L}\p{N}_]+/gu;
 const SHORT_CJK_TRIGRAM_RE = /[\u3040-\u30ff\u3400-\u9fff\uac00-\ud7af\u3131-\u3163]/u;
+const VECTOR_KNN_OVERSAMPLE_FACTOR = 8;

 export type SearchSource = string;

@@ -70,6 +71,16 @@ function buildMatchQueryFromTerms(terms: string[]): string | null {
  return quoted.join(" AND ");
 }

+function readCount(row: { count?: number | bigint } | undefined): number {
+  if (typeof row?.count === "bigint") {
+    return Number(row.count);
+  }
+  if (typeof row?.count === "number") {
+    return row.count;
+  }
+  return 0;
+}
+
 function planKeywordSearch(params: {
  query: string;
  ftsTokenizer?: "unicode61" | "trigram";
@@ -130,31 +141,59 @@ export async function searchVector(params: {
    // sqlite-vec's default L2 distance, so v.distance cannot be used directly
    // for scoring.)
    const qBlob = vectorToBlob(params.queryVec);
-    const rows = params.db
-      .prepare(
-        `SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` +
-          `       c.source,\n` +
-          `       vec_distance_cosine(v.embedding, ?) AS dist\n` +
-          `  FROM ${params.vectorTable} v\n` +
-          `  JOIN chunks c ON c.id = v.id\n` +
-          ` WHERE v.embedding MATCH ? AND k = ? AND c.model = ?${params.sourceFilterVec.sql}\n` +
-          ` ORDER BY dist ASC`,
-      )
-      .all(
-        qBlob,
-        qBlob,
-        params.limit,
-        params.providerModel,
-        ...params.sourceFilterVec.params,
-      ) as Array<{
-      id: string;
-      path: string;
-      start_line: number;
-      end_line: number;
-      text: string;
-      source: SearchSource;
-      dist: number;
-    }>;
+    const runVectorQuery = (candidateLimit: number) =>
+      params.db
+        .prepare(
+          `SELECT c.id, c.path, c.start_line, c.end_line, c.text,\n` +
+            `       c.source,\n` +
+            `       vec_distance_cosine(v.embedding, ?) AS dist\n` +
+            `  FROM ${params.vectorTable} v\n` +
+            `  JOIN chunks c ON c.id = v.id\n` +
+            ` WHERE v.embedding MATCH ? AND k = ? AND c.model = ?${params.sourceFilterVec.sql}\n` +
+            ` ORDER BY dist ASC\n` +
+            ` LIMIT ?`,
+        )
+        .all(
+          qBlob,
+          qBlob,
+          candidateLimit,
+          params.providerModel,
+          ...params.sourceFilterVec.params,
+          params.limit,
+        ) as Array<{
+        id: string;
+        path: string;
+        start_line: number;
+        end_line: number;
+        text: string;
+        source: SearchSource;
+        dist: number;
+      }>;
+
+    const candidateLimit = params.limit * VECTOR_KNN_OVERSAMPLE_FACTOR;
+    let rows = runVectorQuery(candidateLimit);
+    if (rows.length < params.limit) {
+      const matchingChunkCount = readCount(
+        params.db
+          .prepare(
+            `SELECT COUNT(*) AS count FROM chunks c WHERE c.model = ?${params.sourceFilterVec.sql}`,
+          )
+          .get(params.providerModel, ...params.sourceFilterVec.params) as
+          | { count?: number | bigint }
+          | undefined,
+      );
+      if (matchingChunkCount > rows.length) {
+        const vectorCount = readCount(
+          params.db.prepare(`SELECT COUNT(*) AS count FROM ${params.vectorTable}`).get() as
+            | { count?: number | bigint }
+            | undefined,
+        );
+        if (vectorCount > candidateLimit) {
+          rows = runVectorQuery(vectorCount);
+        }
+      }
+    }
+
    return rows.map((row) => ({
      id: row.id,
      path: row.path,