From 66ab70190b6ffab2bdcc8ff2df053645671d2153 Mon Sep 17 00:00:00 2001 From: Buns Enchantress Date: Tue, 3 Feb 2026 05:44:53 -0600 Subject: [PATCH] feat: enhance DocsStore with improved vector handling and similarity conversion - Added a constant for the distance metric used in vector searches, clarifying the assumption of L2 distance. - Updated the createTable method to ensure all chunk properties are correctly mapped during table creation. - Improved the similarity score calculation by providing a clear explanation of the conversion from L2 distance, ensuring accurate ranking of results. --- scripts/docs-chat/rag/store.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scripts/docs-chat/rag/store.ts b/scripts/docs-chat/rag/store.ts index f6760e6db62..1d2316ac8c3 100644 --- a/scripts/docs-chat/rag/store.ts +++ b/scripts/docs-chat/rag/store.ts @@ -5,6 +5,8 @@ import * as lancedb from "@lancedb/lancedb"; const TABLE_NAME = "docs_chunks"; +/** LanceDB uses L2 (Euclidean) distance by default; similarity conversion below assumes this. */ +const _DISTANCE_METRIC = "L2" as const; export interface DocsChunk { id: string; @@ -82,7 +84,14 @@ export class DocsStore { return; } - this.table = await this.db.createTable(TABLE_NAME, chunks); + this.table = await this.db.createTable(TABLE_NAME, chunks.map(chunk => ({ + id: chunk.id, + path: chunk.path, + title: chunk.title, + content: chunk.content, + url: chunk.url, + vector: chunk.vector, + }))); } /** @@ -97,10 +106,11 @@ export class DocsStore { const results = await this.table.vectorSearch(vector).limit(limit).toArray(); - // LanceDB uses L2 distance by default; convert to similarity score + // Convert L2 distance to similarity: sim = 1 / (1 + d), bounded [0, 1]. + // This assumes DISTANCE_METRIC is L2 (non-negative). If metric changes, + // update this conversion or use raw distance for ranking. return results.map((row) => { const distance = (row._distance as number) ?? 0; - // Inverse for 0-1 range: sim = 1 / (1 + d) const similarity = 1 / (1 + distance); return { chunk: {