Files
openclaw/scripts/docs-chat/rag/store.ts
Buns Enchantress 332d9a2ad1 feat: enhance docs chat with vector-based RAG pipeline
- Added vector index building and serving capabilities to the docs chat.
- Introduced new scripts for generating embeddings and serving the chat API using vector search.
- Updated package.json with new commands for vector index operations.
- Enhanced README with instructions for the new RAG pipeline and legacy keyword pipeline.
- Removed outdated Vercel configuration file.
2026-02-03 02:10:11 -06:00

131 lines
3.1 KiB
TypeScript

/**
* LanceDB storage layer for docs-chat RAG pipeline.
* Stores document chunks with vector embeddings for semantic search.
*/
import * as lancedb from "@lancedb/lancedb";
const TABLE_NAME = "docs_chunks";
export interface DocsChunk {
id: string;
path: string;
title: string;
content: string;
url: string;
vector: number[];
}
export interface SearchResult {
chunk: DocsChunk;
distance: number;
similarity: number;
}
export class DocsStore {
private db: lancedb.Connection | null = null;
private table: lancedb.Table | null = null;
private initPromise: Promise<void> | null = null;
constructor(
private readonly dbPath: string,
private readonly vectorDim: number,
) { }
private async ensureInitialized(): Promise<void> {
if (this.table) {
return;
}
if (this.initPromise) {
return this.initPromise;
}
this.initPromise = this.doInitialize();
return this.initPromise;
}
private async doInitialize(): Promise<void> {
this.db = await lancedb.connect(this.dbPath);
const tables = await this.db.tableNames();
if (tables.includes(TABLE_NAME)) {
this.table = await this.db.openTable(TABLE_NAME);
}
// Table will be created when first storing chunks
}
/**
* Drop existing table and create fresh with new chunks.
* Used during index rebuild.
*/
async replaceAll(chunks: DocsChunk[]): Promise<void> {
if (!this.db) {
this.db = await lancedb.connect(this.dbPath);
}
const tables = await this.db.tableNames();
if (tables.includes(TABLE_NAME)) {
await this.db.dropTable(TABLE_NAME);
}
if (chunks.length === 0) {
// Create empty table with schema
this.table = await this.db.createTable(TABLE_NAME, [
{
id: "__schema__",
path: "",
title: "",
content: "",
url: "",
vector: Array.from({ length: this.vectorDim }).fill(0),
},
]);
await this.table.delete('id = "__schema__"');
return;
}
this.table = await this.db.createTable(TABLE_NAME, chunks);
}
/**
* Search for similar chunks using vector similarity.
*/
async search(vector: number[], limit: number = 8): Promise<SearchResult[]> {
await this.ensureInitialized();
if (!this.table) {
return [];
}
const results = await this.table.vectorSearch(vector).limit(limit).toArray();
// LanceDB uses L2 distance by default; convert to similarity score
return results.map((row) => {
const distance = (row._distance as number) ?? 0;
// Inverse for 0-1 range: sim = 1 / (1 + d)
const similarity = 1 / (1 + distance);
return {
chunk: {
id: row.id as string,
path: row.path as string,
title: row.title as string,
content: row.content as string,
url: row.url as string,
vector: row.vector as number[],
},
distance,
similarity,
};
});
}
/**
* Get count of stored chunks.
*/
async count(): Promise<number> {
await this.ensureInitialized();
if (!this.table) {
return 0;
}
return this.table.countRows();
}
}