Memory: extract embedding reindex execution

This commit is contained in:
Gustavo Madeira Santana
2026-03-15 20:32:49 +00:00
parent 29d67c06bc
commit d2a057ebb2
3 changed files with 285 additions and 72 deletions

View File

@@ -0,0 +1,112 @@
import { describe, expect, it, vi } from "vitest";
import {
resetExtensionHostEmbeddingIndexStore,
runExtensionHostEmbeddingReindexBody,
} from "./embedding-reindex-execution.js";
describe("embedding-reindex-execution", () => {
it("runs full reindex syncs, clears dirty flags, and writes metadata", async () => {
const syncMemoryFiles = vi.fn(async () => {});
const syncSessionFiles = vi.fn(async () => {});
const setDirty = vi.fn();
const setSessionsDirty = vi.fn();
const clearAllSessionDirtyFiles = vi.fn();
const writeMeta = vi.fn();
const pruneEmbeddingCacheIfNeeded = vi.fn();
const nextMeta = await runExtensionHostEmbeddingReindexBody({
shouldSyncMemory: true,
shouldSyncSessions: true,
hasDirtySessionFiles: true,
syncMemoryFiles,
syncSessionFiles,
setDirty,
setSessionsDirty,
clearAllSessionDirtyFiles,
buildNextMeta: () => ({
model: "model",
provider: "openai",
providerKey: "key",
sources: ["memory", "sessions"],
scopeHash: "scope",
chunkTokens: 200,
chunkOverlap: 20,
}),
vectorDims: 1536,
writeMeta,
pruneEmbeddingCacheIfNeeded,
});
expect(syncMemoryFiles).toHaveBeenCalledWith({
needsFullReindex: true,
progress: undefined,
});
expect(syncSessionFiles).toHaveBeenCalledWith({
needsFullReindex: true,
progress: undefined,
});
expect(setDirty).toHaveBeenCalledWith(false);
expect(setSessionsDirty).toHaveBeenCalledWith(false);
expect(clearAllSessionDirtyFiles).toHaveBeenCalled();
expect(writeMeta).toHaveBeenCalledWith({
model: "model",
provider: "openai",
providerKey: "key",
sources: ["memory", "sessions"],
scopeHash: "scope",
chunkTokens: 200,
chunkOverlap: 20,
vectorDims: 1536,
});
expect(pruneEmbeddingCacheIfNeeded).toHaveBeenCalled();
expect(nextMeta.vectorDims).toBe(1536);
});
it("preserves session dirty state when sessions are not reindexed", async () => {
const setSessionsDirty = vi.fn();
await runExtensionHostEmbeddingReindexBody({
shouldSyncMemory: false,
shouldSyncSessions: false,
hasDirtySessionFiles: true,
syncMemoryFiles: vi.fn(async () => {}),
syncSessionFiles: vi.fn(async () => {}),
setDirty: vi.fn(),
setSessionsDirty,
clearAllSessionDirtyFiles: vi.fn(),
buildNextMeta: () => ({
model: "model",
provider: "openai",
chunkTokens: 200,
chunkOverlap: 20,
}),
writeMeta: vi.fn(),
});
expect(setSessionsDirty).toHaveBeenCalledWith(true);
});
it("resets the index store and FTS rows when available", () => {
const execSql = vi.fn();
const dropVectorTable = vi.fn();
const clearVectorDims = vi.fn();
const clearAllSessionDirtyFiles = vi.fn();
resetExtensionHostEmbeddingIndexStore({
execSql,
ftsEnabled: true,
ftsAvailable: true,
ftsTable: "chunks_fts",
dropVectorTable,
clearVectorDims,
clearAllSessionDirtyFiles,
});
expect(execSql).toHaveBeenNthCalledWith(1, "DELETE FROM files");
expect(execSql).toHaveBeenNthCalledWith(2, "DELETE FROM chunks");
expect(execSql).toHaveBeenNthCalledWith(3, "DELETE FROM chunks_fts");
expect(dropVectorTable).toHaveBeenCalled();
expect(clearVectorDims).toHaveBeenCalled();
expect(clearAllSessionDirtyFiles).toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,80 @@
import type { EmbeddingIndexMeta } from "./embedding-sync-planning.js";
type EmbeddingReindexProgress = unknown;
type EmbeddingReindexMemoryFiles<TProgress = EmbeddingReindexProgress> = (params: {
needsFullReindex: boolean;
progress?: TProgress;
}) => Promise<void>;
type EmbeddingReindexSessionFiles<TProgress = EmbeddingReindexProgress> = (params: {
needsFullReindex: boolean;
progress?: TProgress;
}) => Promise<void>;
export async function runExtensionHostEmbeddingReindexBody<
TProgress = EmbeddingReindexProgress,
>(params: {
shouldSyncMemory: boolean;
shouldSyncSessions: boolean;
hasDirtySessionFiles: boolean;
progress?: TProgress;
syncMemoryFiles: EmbeddingReindexMemoryFiles<TProgress>;
syncSessionFiles: EmbeddingReindexSessionFiles<TProgress>;
setDirty: (value: boolean) => void;
setSessionsDirty: (value: boolean) => void;
clearAllSessionDirtyFiles: () => void;
buildNextMeta: () => EmbeddingIndexMeta;
vectorDims?: number;
writeMeta: (meta: EmbeddingIndexMeta) => void;
pruneEmbeddingCacheIfNeeded?: () => void;
}): Promise<EmbeddingIndexMeta> {
if (params.shouldSyncMemory) {
await params.syncMemoryFiles({
needsFullReindex: true,
progress: params.progress,
});
params.setDirty(false);
}
if (params.shouldSyncSessions) {
await params.syncSessionFiles({
needsFullReindex: true,
progress: params.progress,
});
params.setSessionsDirty(false);
params.clearAllSessionDirtyFiles();
} else {
params.setSessionsDirty(params.hasDirtySessionFiles);
}
const nextMeta = params.buildNextMeta();
if (params.vectorDims) {
nextMeta.vectorDims = params.vectorDims;
}
params.writeMeta(nextMeta);
params.pruneEmbeddingCacheIfNeeded?.();
return nextMeta;
}
export function resetExtensionHostEmbeddingIndexStore(params: {
execSql: (sql: string) => void;
ftsEnabled: boolean;
ftsAvailable: boolean;
ftsTable: string;
dropVectorTable: () => void;
clearVectorDims: () => void;
clearAllSessionDirtyFiles: () => void;
}): void {
params.execSql("DELETE FROM files");
params.execSql("DELETE FROM chunks");
if (params.ftsEnabled && params.ftsAvailable) {
try {
params.execSql(`DELETE FROM ${params.ftsTable}`);
} catch {}
}
params.dropVectorTable();
params.clearVectorDims();
params.clearAllSessionDirtyFiles();
}

View File

@@ -11,6 +11,10 @@ import {
activateEmbeddingManagerFallbackProvider,
resolveEmbeddingManagerBatchConfig,
} from "../extension-host/embedding-manager-runtime.js";
import {
resetExtensionHostEmbeddingIndexStore,
runExtensionHostEmbeddingReindexBody,
} from "../extension-host/embedding-reindex-execution.js";
import {
type EmbeddingProvider,
type EmbeddingProviderId,
@@ -1103,40 +1107,43 @@ export abstract class MemoryManagerSyncOps {
{ reason: params.reason, force: params.force },
true,
);
if (shouldSyncMemory) {
await this.syncMemoryFiles({ needsFullReindex: true, progress: params.progress });
this.dirty = false;
}
if (shouldSyncSessions) {
await this.syncSessionFiles({ needsFullReindex: true, progress: params.progress });
this.sessionsDirty = false;
this.sessionsDirtyFiles.clear();
} else if (this.sessionsDirtyFiles.size > 0) {
this.sessionsDirty = true;
} else {
this.sessionsDirty = false;
}
nextMeta = buildEmbeddingIndexMeta({
provider: this.provider,
providerKey: this.providerKey,
configuredSources: this.resolveConfiguredSourcesForMeta(),
configuredScopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
nextMeta = await runExtensionHostEmbeddingReindexBody({
shouldSyncMemory,
shouldSyncSessions,
hasDirtySessionFiles: this.sessionsDirtyFiles.size > 0,
progress: params.progress,
syncMemoryFiles: async (syncParams) => {
await this.syncMemoryFiles(syncParams);
},
syncSessionFiles: async (syncParams) => {
await this.syncSessionFiles(syncParams);
},
setDirty: (value) => {
this.dirty = value;
},
setSessionsDirty: (value) => {
this.sessionsDirty = value;
},
clearAllSessionDirtyFiles: () => {
this.sessionsDirtyFiles.clear();
},
buildNextMeta: () =>
buildEmbeddingIndexMeta({
provider: this.provider,
providerKey: this.providerKey,
configuredSources: this.resolveConfiguredSourcesForMeta(),
configuredScopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
}),
vectorDims: this.vector.available && this.vector.dims ? this.vector.dims : undefined,
writeMeta: (meta) => {
this.writeMeta(meta);
},
pruneEmbeddingCacheIfNeeded: () => {
this.pruneEmbeddingCacheIfNeeded?.();
},
});
if (!nextMeta) {
throw new Error("Failed to compute memory index metadata for reindexing.");
}
if (this.vector.available && this.vector.dims) {
nextMeta.vectorDims = this.vector.dims;
}
this.writeMeta(nextMeta);
this.pruneEmbeddingCacheIfNeeded?.();
this.db.close();
originalDb.close();
@@ -1174,49 +1181,63 @@ export abstract class MemoryManagerSyncOps {
{ reason: params.reason, force: params.force },
true,
);
if (shouldSyncMemory) {
await this.syncMemoryFiles({ needsFullReindex: true, progress: params.progress });
this.dirty = false;
}
if (shouldSyncSessions) {
await this.syncSessionFiles({ needsFullReindex: true, progress: params.progress });
this.sessionsDirty = false;
this.sessionsDirtyFiles.clear();
} else if (this.sessionsDirtyFiles.size > 0) {
this.sessionsDirty = true;
} else {
this.sessionsDirty = false;
}
const nextMeta = buildEmbeddingIndexMeta({
provider: this.provider,
providerKey: this.providerKey,
configuredSources: this.resolveConfiguredSourcesForMeta(),
configuredScopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
await runExtensionHostEmbeddingReindexBody({
shouldSyncMemory,
shouldSyncSessions,
hasDirtySessionFiles: this.sessionsDirtyFiles.size > 0,
progress: params.progress,
syncMemoryFiles: async (syncParams) => {
await this.syncMemoryFiles(syncParams);
},
syncSessionFiles: async (syncParams) => {
await this.syncSessionFiles(syncParams);
},
setDirty: (value) => {
this.dirty = value;
},
setSessionsDirty: (value) => {
this.sessionsDirty = value;
},
clearAllSessionDirtyFiles: () => {
this.sessionsDirtyFiles.clear();
},
buildNextMeta: () =>
buildEmbeddingIndexMeta({
provider: this.provider,
providerKey: this.providerKey,
configuredSources: this.resolveConfiguredSourcesForMeta(),
configuredScopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
}),
vectorDims: this.vector.available && this.vector.dims ? this.vector.dims : undefined,
writeMeta: (meta) => {
this.writeMeta(meta);
},
pruneEmbeddingCacheIfNeeded: () => {
this.pruneEmbeddingCacheIfNeeded?.();
},
});
if (this.vector.available && this.vector.dims) {
nextMeta.vectorDims = this.vector.dims;
}
this.writeMeta(nextMeta);
this.pruneEmbeddingCacheIfNeeded?.();
}
private resetIndex() {
this.db.exec(`DELETE FROM files`);
this.db.exec(`DELETE FROM chunks`);
if (this.fts.enabled && this.fts.available) {
try {
this.db.exec(`DELETE FROM ${FTS_TABLE}`);
} catch {}
}
this.dropVectorTable();
this.vector.dims = undefined;
this.sessionsDirtyFiles.clear();
resetExtensionHostEmbeddingIndexStore({
execSql: (sql) => {
this.db.exec(sql);
},
ftsEnabled: this.fts.enabled,
ftsAvailable: this.fts.available,
ftsTable: FTS_TABLE,
dropVectorTable: () => {
this.dropVectorTable();
},
clearVectorDims: () => {
this.vector.dims = undefined;
},
clearAllSessionDirtyFiles: () => {
this.sessionsDirtyFiles.clear();
},
});
}
protected readMeta(): EmbeddingIndexMeta | null {