From d2a057ebb2c9d4a569bf2f45ea7eb372ab64bc50 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Sun, 15 Mar 2026 20:32:49 +0000 Subject: [PATCH] Memory: extract embedding reindex execution --- .../embedding-reindex-execution.test.ts | 112 ++++++++++++ .../embedding-reindex-execution.ts | 80 +++++++++ src/memory/manager-sync-ops.ts | 165 ++++++++++-------- 3 files changed, 285 insertions(+), 72 deletions(-) create mode 100644 src/extension-host/embedding-reindex-execution.test.ts create mode 100644 src/extension-host/embedding-reindex-execution.ts diff --git a/src/extension-host/embedding-reindex-execution.test.ts b/src/extension-host/embedding-reindex-execution.test.ts new file mode 100644 index 00000000000..52ba9da9352 --- /dev/null +++ b/src/extension-host/embedding-reindex-execution.test.ts @@ -0,0 +1,112 @@ +import { describe, expect, it, vi } from "vitest"; +import { + resetExtensionHostEmbeddingIndexStore, + runExtensionHostEmbeddingReindexBody, +} from "./embedding-reindex-execution.js"; + +describe("embedding-reindex-execution", () => { + it("runs full reindex syncs, clears dirty flags, and writes metadata", async () => { + const syncMemoryFiles = vi.fn(async () => {}); + const syncSessionFiles = vi.fn(async () => {}); + const setDirty = vi.fn(); + const setSessionsDirty = vi.fn(); + const clearAllSessionDirtyFiles = vi.fn(); + const writeMeta = vi.fn(); + const pruneEmbeddingCacheIfNeeded = vi.fn(); + + const nextMeta = await runExtensionHostEmbeddingReindexBody({ + shouldSyncMemory: true, + shouldSyncSessions: true, + hasDirtySessionFiles: true, + syncMemoryFiles, + syncSessionFiles, + setDirty, + setSessionsDirty, + clearAllSessionDirtyFiles, + buildNextMeta: () => ({ + model: "model", + provider: "openai", + providerKey: "key", + sources: ["memory", "sessions"], + scopeHash: "scope", + chunkTokens: 200, + chunkOverlap: 20, + }), + vectorDims: 1536, + writeMeta, + pruneEmbeddingCacheIfNeeded, + }); + + expect(syncMemoryFiles).toHaveBeenCalledWith({ + needsFullReindex: true, + progress: undefined, + }); + expect(syncSessionFiles).toHaveBeenCalledWith({ + needsFullReindex: true, + progress: undefined, + }); + expect(setDirty).toHaveBeenCalledWith(false); + expect(setSessionsDirty).toHaveBeenCalledWith(false); + expect(clearAllSessionDirtyFiles).toHaveBeenCalled(); + expect(writeMeta).toHaveBeenCalledWith({ + model: "model", + provider: "openai", + providerKey: "key", + sources: ["memory", "sessions"], + scopeHash: "scope", + chunkTokens: 200, + chunkOverlap: 20, + vectorDims: 1536, + }); + expect(pruneEmbeddingCacheIfNeeded).toHaveBeenCalled(); + expect(nextMeta.vectorDims).toBe(1536); + }); + + it("preserves session dirty state when sessions are not reindexed", async () => { + const setSessionsDirty = vi.fn(); + + await runExtensionHostEmbeddingReindexBody({ + shouldSyncMemory: false, + shouldSyncSessions: false, + hasDirtySessionFiles: true, + syncMemoryFiles: vi.fn(async () => {}), + syncSessionFiles: vi.fn(async () => {}), + setDirty: vi.fn(), + setSessionsDirty, + clearAllSessionDirtyFiles: vi.fn(), + buildNextMeta: () => ({ + model: "model", + provider: "openai", + chunkTokens: 200, + chunkOverlap: 20, + }), + writeMeta: vi.fn(), + }); + + expect(setSessionsDirty).toHaveBeenCalledWith(true); + }); + + it("resets the index store and FTS rows when available", () => { + const execSql = vi.fn(); + const dropVectorTable = vi.fn(); + const clearVectorDims = vi.fn(); + const clearAllSessionDirtyFiles = vi.fn(); + + resetExtensionHostEmbeddingIndexStore({ + execSql, + ftsEnabled: true, + ftsAvailable: true, + ftsTable: "chunks_fts", + dropVectorTable, + clearVectorDims, + clearAllSessionDirtyFiles, + }); + + expect(execSql).toHaveBeenNthCalledWith(1, "DELETE FROM files"); + expect(execSql).toHaveBeenNthCalledWith(2, "DELETE FROM chunks"); + expect(execSql).toHaveBeenNthCalledWith(3, "DELETE FROM chunks_fts"); + expect(dropVectorTable).toHaveBeenCalled(); + expect(clearVectorDims).toHaveBeenCalled(); + expect(clearAllSessionDirtyFiles).toHaveBeenCalled(); + }); +}); diff --git a/src/extension-host/embedding-reindex-execution.ts b/src/extension-host/embedding-reindex-execution.ts new file mode 100644 index 00000000000..548d3397937 --- /dev/null +++ b/src/extension-host/embedding-reindex-execution.ts @@ -0,0 +1,80 @@ +import type { EmbeddingIndexMeta } from "./embedding-sync-planning.js"; + +type EmbeddingReindexProgress = unknown; + +type EmbeddingReindexMemoryFiles = (params: { + needsFullReindex: boolean; + progress?: TProgress; +}) => Promise; + +type EmbeddingReindexSessionFiles = (params: { + needsFullReindex: boolean; + progress?: TProgress; +}) => Promise; + +export async function runExtensionHostEmbeddingReindexBody< + TProgress = EmbeddingReindexProgress, +>(params: { + shouldSyncMemory: boolean; + shouldSyncSessions: boolean; + hasDirtySessionFiles: boolean; + progress?: TProgress; + syncMemoryFiles: EmbeddingReindexMemoryFiles; + syncSessionFiles: EmbeddingReindexSessionFiles; + setDirty: (value: boolean) => void; + setSessionsDirty: (value: boolean) => void; + clearAllSessionDirtyFiles: () => void; + buildNextMeta: () => EmbeddingIndexMeta; + vectorDims?: number; + writeMeta: (meta: EmbeddingIndexMeta) => void; + pruneEmbeddingCacheIfNeeded?: () => void; +}): Promise { + if (params.shouldSyncMemory) { + await params.syncMemoryFiles({ + needsFullReindex: true, + progress: params.progress, + }); + params.setDirty(false); + } + + if (params.shouldSyncSessions) { + await params.syncSessionFiles({ + needsFullReindex: true, + progress: params.progress, + }); + params.setSessionsDirty(false); + params.clearAllSessionDirtyFiles(); + } else { + params.setSessionsDirty(params.hasDirtySessionFiles); + } + + const nextMeta = params.buildNextMeta(); + if (params.vectorDims) { + nextMeta.vectorDims = params.vectorDims; + } + + params.writeMeta(nextMeta); + params.pruneEmbeddingCacheIfNeeded?.(); + return nextMeta; +} + +export function resetExtensionHostEmbeddingIndexStore(params: { + execSql: (sql: string) => void; + ftsEnabled: boolean; + ftsAvailable: boolean; + ftsTable: string; + dropVectorTable: () => void; + clearVectorDims: () => void; + clearAllSessionDirtyFiles: () => void; +}): void { + params.execSql("DELETE FROM files"); + params.execSql("DELETE FROM chunks"); + if (params.ftsEnabled && params.ftsAvailable) { + try { + params.execSql(`DELETE FROM ${params.ftsTable}`); + } catch {} + } + params.dropVectorTable(); + params.clearVectorDims(); + params.clearAllSessionDirtyFiles(); +} diff --git a/src/memory/manager-sync-ops.ts b/src/memory/manager-sync-ops.ts index fb3ee63e53b..70ce34ddc62 100644 --- a/src/memory/manager-sync-ops.ts +++ b/src/memory/manager-sync-ops.ts @@ -11,6 +11,10 @@ import { activateEmbeddingManagerFallbackProvider, resolveEmbeddingManagerBatchConfig, } from "../extension-host/embedding-manager-runtime.js"; +import { + resetExtensionHostEmbeddingIndexStore, + runExtensionHostEmbeddingReindexBody, +} from "../extension-host/embedding-reindex-execution.js"; import { type EmbeddingProvider, type EmbeddingProviderId, @@ -1103,40 +1107,43 @@ export abstract class MemoryManagerSyncOps { { reason: params.reason, force: params.force }, true, ); - - if (shouldSyncMemory) { - await this.syncMemoryFiles({ needsFullReindex: true, progress: params.progress }); - this.dirty = false; - } - - if (shouldSyncSessions) { - await this.syncSessionFiles({ needsFullReindex: true, progress: params.progress }); - this.sessionsDirty = false; - this.sessionsDirtyFiles.clear(); - } else if (this.sessionsDirtyFiles.size > 0) { - this.sessionsDirty = true; - } else { - this.sessionsDirty = false; - } - - nextMeta = buildEmbeddingIndexMeta({ - provider: this.provider, - providerKey: this.providerKey, - configuredSources: this.resolveConfiguredSourcesForMeta(), - configuredScopeHash: this.resolveConfiguredScopeHash(), - chunkTokens: this.settings.chunking.tokens, - chunkOverlap: this.settings.chunking.overlap, + nextMeta = await runExtensionHostEmbeddingReindexBody({ + shouldSyncMemory, + shouldSyncSessions, + hasDirtySessionFiles: this.sessionsDirtyFiles.size > 0, + progress: params.progress, + syncMemoryFiles: async (syncParams) => { + await this.syncMemoryFiles(syncParams); + }, + syncSessionFiles: async (syncParams) => { + await this.syncSessionFiles(syncParams); + }, + setDirty: (value) => { + this.dirty = value; + }, + setSessionsDirty: (value) => { + this.sessionsDirty = value; + }, + clearAllSessionDirtyFiles: () => { + this.sessionsDirtyFiles.clear(); + }, + buildNextMeta: () => + buildEmbeddingIndexMeta({ + provider: this.provider, + providerKey: this.providerKey, + configuredSources: this.resolveConfiguredSourcesForMeta(), + configuredScopeHash: this.resolveConfiguredScopeHash(), + chunkTokens: this.settings.chunking.tokens, + chunkOverlap: this.settings.chunking.overlap, + }), + vectorDims: this.vector.available && this.vector.dims ? this.vector.dims : undefined, + writeMeta: (meta) => { + this.writeMeta(meta); + }, + pruneEmbeddingCacheIfNeeded: () => { + this.pruneEmbeddingCacheIfNeeded?.(); + }, }); - if (!nextMeta) { - throw new Error("Failed to compute memory index metadata for reindexing."); - } - - if (this.vector.available && this.vector.dims) { - nextMeta.vectorDims = this.vector.dims; - } - - this.writeMeta(nextMeta); - this.pruneEmbeddingCacheIfNeeded?.(); this.db.close(); originalDb.close(); @@ -1174,49 +1181,63 @@ export abstract class MemoryManagerSyncOps { { reason: params.reason, force: params.force }, true, ); - - if (shouldSyncMemory) { - await this.syncMemoryFiles({ needsFullReindex: true, progress: params.progress }); - this.dirty = false; - } - - if (shouldSyncSessions) { - await this.syncSessionFiles({ needsFullReindex: true, progress: params.progress }); - this.sessionsDirty = false; - this.sessionsDirtyFiles.clear(); - } else if (this.sessionsDirtyFiles.size > 0) { - this.sessionsDirty = true; - } else { - this.sessionsDirty = false; - } - - const nextMeta = buildEmbeddingIndexMeta({ - provider: this.provider, - providerKey: this.providerKey, - configuredSources: this.resolveConfiguredSourcesForMeta(), - configuredScopeHash: this.resolveConfiguredScopeHash(), - chunkTokens: this.settings.chunking.tokens, - chunkOverlap: this.settings.chunking.overlap, + await runExtensionHostEmbeddingReindexBody({ + shouldSyncMemory, + shouldSyncSessions, + hasDirtySessionFiles: this.sessionsDirtyFiles.size > 0, + progress: params.progress, + syncMemoryFiles: async (syncParams) => { + await this.syncMemoryFiles(syncParams); + }, + syncSessionFiles: async (syncParams) => { + await this.syncSessionFiles(syncParams); + }, + setDirty: (value) => { + this.dirty = value; + }, + setSessionsDirty: (value) => { + this.sessionsDirty = value; + }, + clearAllSessionDirtyFiles: () => { + this.sessionsDirtyFiles.clear(); + }, + buildNextMeta: () => + buildEmbeddingIndexMeta({ + provider: this.provider, + providerKey: this.providerKey, + configuredSources: this.resolveConfiguredSourcesForMeta(), + configuredScopeHash: this.resolveConfiguredScopeHash(), + chunkTokens: this.settings.chunking.tokens, + chunkOverlap: this.settings.chunking.overlap, + }), + vectorDims: this.vector.available && this.vector.dims ? this.vector.dims : undefined, + writeMeta: (meta) => { + this.writeMeta(meta); + }, + pruneEmbeddingCacheIfNeeded: () => { + this.pruneEmbeddingCacheIfNeeded?.(); + }, }); - if (this.vector.available && this.vector.dims) { - nextMeta.vectorDims = this.vector.dims; - } - - this.writeMeta(nextMeta); - this.pruneEmbeddingCacheIfNeeded?.(); } private resetIndex() { - this.db.exec(`DELETE FROM files`); - this.db.exec(`DELETE FROM chunks`); - if (this.fts.enabled && this.fts.available) { - try { - this.db.exec(`DELETE FROM ${FTS_TABLE}`); - } catch {} - } - this.dropVectorTable(); - this.vector.dims = undefined; - this.sessionsDirtyFiles.clear(); + resetExtensionHostEmbeddingIndexStore({ + execSql: (sql) => { + this.db.exec(sql); + }, + ftsEnabled: this.fts.enabled, + ftsAvailable: this.fts.available, + ftsTable: FTS_TABLE, + dropVectorTable: () => { + this.dropVectorTable(); + }, + clearVectorDims: () => { + this.vector.dims = undefined; + }, + clearAllSessionDirtyFiles: () => { + this.sessionsDirtyFiles.clear(); + }, + }); } protected readMeta(): EmbeddingIndexMeta | null {