Files
openclaw/packages/memory-host-sdk/src/host/memory-schema.ts
Tak Hoffman 3ce48aff66 Memory: add configurable FTS5 tokenizer for CJK text support (openclaw#56707)
Verified:
- pnpm build
- pnpm check
- pnpm test -- extensions/memory-core/src/memory/manager-search.test.ts packages/memory-host-sdk/src/host/query-expansion.test.ts
- pnpm test -- extensions/memory-core/src/memory/index.test.ts -t "reindexes when extraPaths change"
- pnpm test -- src/config/schema.base.generated.test.ts
- pnpm test -- src/media-understanding/image.test.ts
- pnpm test

Co-authored-by: Mitsuyuki Osabe <24588751+carrotRakko@users.noreply.github.com>
2026-03-28 20:53:29 -05:00

103 lines
3.1 KiB
TypeScript

import type { DatabaseSync } from "node:sqlite";
export function ensureMemoryIndexSchema(params: {
db: DatabaseSync;
embeddingCacheTable: string;
cacheEnabled: boolean;
ftsTable: string;
ftsEnabled: boolean;
ftsTokenizer?: "unicode61" | "trigram";
}): { ftsAvailable: boolean; ftsError?: string } {
params.db.exec(`
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
`);
params.db.exec(`
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
source TEXT NOT NULL DEFAULT 'memory',
hash TEXT NOT NULL,
mtime INTEGER NOT NULL,
size INTEGER NOT NULL
);
`);
params.db.exec(`
CREATE TABLE IF NOT EXISTS chunks (
id TEXT PRIMARY KEY,
path TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'memory',
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
hash TEXT NOT NULL,
model TEXT NOT NULL,
text TEXT NOT NULL,
embedding TEXT NOT NULL,
updated_at INTEGER NOT NULL
);
`);
if (params.cacheEnabled) {
params.db.exec(`
CREATE TABLE IF NOT EXISTS ${params.embeddingCacheTable} (
provider TEXT NOT NULL,
model TEXT NOT NULL,
provider_key TEXT NOT NULL,
hash TEXT NOT NULL,
embedding TEXT NOT NULL,
dims INTEGER,
updated_at INTEGER NOT NULL,
PRIMARY KEY (provider, model, provider_key, hash)
);
`);
params.db.exec(
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${params.embeddingCacheTable}(updated_at);`,
);
}
let ftsAvailable = false;
let ftsError: string | undefined;
if (params.ftsEnabled) {
try {
const tokenizer = params.ftsTokenizer ?? "unicode61";
const tokenizeClause = tokenizer === "trigram" ? `, tokenize='trigram case_sensitive 0'` : "";
params.db.exec(
`CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(\n` +
` text,\n` +
` id UNINDEXED,\n` +
` path UNINDEXED,\n` +
` source UNINDEXED,\n` +
` model UNINDEXED,\n` +
` start_line UNINDEXED,\n` +
` end_line UNINDEXED\n` +
`${tokenizeClause});`,
);
ftsAvailable = true;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
ftsAvailable = false;
ftsError = message;
}
}
ensureColumn(params.db, "files", "source", "TEXT NOT NULL DEFAULT 'memory'");
ensureColumn(params.db, "chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);`);
return { ftsAvailable, ...(ftsError ? { ftsError } : {}) };
}
function ensureColumn(
db: DatabaseSync,
table: "files" | "chunks",
column: string,
definition: string,
): void {
const rows = db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>;
if (rows.some((row) => row.name === column)) {
return;
}
db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
}