Memory: keep FTS-only indexing on reindex (#42714)

This commit is contained in:
Vignesh Natarajan
2026-03-29 00:04:51 -07:00
parent 41c30f0c59
commit 598f539be5
4 changed files with 194 additions and 17 deletions

View File

@@ -168,6 +168,7 @@ Docs: https://docs.openclaw.ai
- Plugins/Matrix: encrypt E2EE image thumbnails with `thumbnail_file` while keeping unencrypted-room previews on `thumbnail_url`, so encrypted Matrix image events keep thumbnail metadata without leaking plaintext previews. (#54711) thanks @frischeDaten.
- Telegram/forum topics: keep native `/new` and `/reset` routed to the active topic by preserving the topic target on forum-thread command context. (#35963)
- Status/port diagnostics: treat single-process dual-stack loopback gateway listeners as healthy in `openclaw status --all`, suppressing false “port already in use” conflict warnings. (#53398) Thanks @DanWebb1949.
- Memory/builtin: keep memory-file indexing active in FTS-only mode (no embedding provider) so forced reindexes no longer swap in an empty index and wipe existing memory chunks. (#42714) Thanks @asamimei.
## 2026.3.24

View File

@@ -664,9 +664,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
) {
// FTS-only mode: no embedding provider, but we can still build a FTS index
if (!this.provider) {
if (!this.fts.enabled || !this.fts.available) {
return;
}
// Multimodal files require an embedding provider; skip in FTS-only mode.
if ("kind" in entry && entry.kind === "multimodal") {
return;
@@ -685,6 +682,15 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
let chunks: MemoryChunk[];
let structuredInputBytes: number | undefined;
if ("kind" in entry && entry.kind === "multimodal") {
if (!this.provider) {
log.debug("Skipping multimodal indexing in FTS-only mode", {
path: entry.path,
source: options.source,
});
this.clearIndexedFileData(entry.path, options.source);
this.upsertFileRecord(entry, options.source);
return;
}
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
if (!multimodalChunk) {
this.clearIndexedFileData(entry.path, options.source);
@@ -695,17 +701,67 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
chunks = [multimodalChunk.chunk];
} else {
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
chunks = enforceEmbeddingMaxInputTokens(
this.provider,
chunkMarkdown(content, this.settings.chunking).filter(
(chunk) => chunk.text.trim().length > 0,
),
EMBEDDING_BATCH_MAX_TOKENS,
const baseChunks = chunkMarkdown(content, this.settings.chunking).filter(
(chunk) => chunk.text.trim().length > 0,
);
chunks = this.provider
? enforceEmbeddingMaxInputTokens(this.provider, baseChunks, EMBEDDING_BATCH_MAX_TOKENS)
: baseChunks;
if (options.source === "sessions" && "lineMap" in entry) {
remapChunkLines(chunks, entry.lineMap);
}
}
if (!this.provider) {
this.clearIndexedFileData(entry.path, options.source);
const now = Date.now();
for (const chunk of chunks) {
const id = hashText(
`${options.source}:${entry.path}:${chunk.startLine}:${chunk.endLine}:${chunk.hash}:${indexModel}`,
);
this.db
.prepare(
`INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
hash=excluded.hash,
model=excluded.model,
text=excluded.text,
embedding=excluded.embedding,
updated_at=excluded.updated_at`,
)
.run(
id,
entry.path,
options.source,
chunk.startLine,
chunk.endLine,
chunk.hash,
indexModel,
chunk.text,
"[]",
now,
);
if (this.fts.enabled && this.fts.available) {
this.db
.prepare(
`INSERT INTO ${FTS_TABLE} (text, id, path, source, model, start_line, end_line)\n` +
` VALUES (?, ?, ?, ?, ?, ?, ?)`,
)
.run(
chunk.text,
id,
entry.path,
options.source,
indexModel,
chunk.startLine,
chunk.endLine,
);
}
}
this.upsertFileRecord(entry, options.source);
return;
}
let embeddings: number[][];
try {
embeddings = this.batch.enabled

View File

@@ -707,9 +707,9 @@ export abstract class MemoryManagerSyncOps {
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
: null;
const deleteFtsRowsByPathSourceAndModel =
const deleteFtsRowsByPathAndSource =
this.fts.enabled && this.fts.available
? this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
? this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ?`)
: null;
const files = await listMemoryFiles(
@@ -780,13 +780,9 @@ export abstract class MemoryManagerSyncOps {
} catch {}
}
deleteChunksByPathAndSource.run(stale.path, "memory");
if (deleteFtsRowsByPathSourceAndModel) {
if (deleteFtsRowsByPathAndSource) {
try {
deleteFtsRowsByPathSourceAndModel.run(
stale.path,
"memory",
this.provider?.model ?? "fts-only",
);
deleteFtsRowsByPathAndSource.run(stale.path, "memory");
} catch {}
}
}

View File

@@ -0,0 +1,124 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { DatabaseSync } from "node:sqlite";
import type { OpenClawConfig } from "openclaw/plugin-sdk/memory-core-host-engine-foundation";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { MemoryIndexManager } from "./manager.js";
import "./test-runtime-mocks.js";
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async () => ({
requestedProvider: "auto",
provider: null,
providerUnavailableReason: "No embeddings provider available.",
}),
resolveEmbeddingProviderFallbackModel: () => "fts-only",
}));
type MemoryIndexModule = typeof import("./index.js");
describe("memory manager FTS-only reindex", () => {
let fixtureRoot = "";
let caseId = 0;
let workspaceDir = "";
let indexPath = "";
let manager: MemoryIndexManager | null = null;
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
beforeAll(async () => {
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-fts-only-"));
});
beforeEach(async () => {
vi.resetModules();
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
workspaceDir = path.join(fixtureRoot, `case-${caseId++}`);
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Alpha topic\n\nKeep this note.");
indexPath = path.join(workspaceDir, "index.sqlite");
});
afterEach(async () => {
if (manager) {
await manager.close();
manager = null;
}
await closeAllMemorySearchManagers();
});
afterAll(async () => {
if (!fixtureRoot) {
return;
}
await fs.rm(fixtureRoot, { recursive: true, force: true });
});
async function createManager(): Promise<MemoryIndexManager> {
const cfg = {
memory: {
backend: "builtin",
},
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "auto",
model: "",
store: { path: indexPath },
cache: { enabled: false },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
const result = await getMemorySearchManager({ cfg, agentId: "main" });
if (!result.manager) {
throw new Error(result.error ?? "manager missing");
}
manager = result.manager as unknown as MemoryIndexManager;
return manager;
}
function countChunksContaining(term: string): number {
const db = new DatabaseSync(indexPath);
try {
const row = db
.prepare(`SELECT COUNT(*) as c FROM chunks WHERE text LIKE ?`)
.get(`%${term}%`) as { c: number } | undefined;
return row?.c ?? 0;
} finally {
db.close();
}
}
it("preserves indexed chunks across forced reindex in FTS-only mode", async () => {
const memoryManager = await createManager();
await memoryManager.sync({ force: true });
const firstStatus = memoryManager.status();
expect(firstStatus.chunks).toBeGreaterThan(0);
expect(countChunksContaining("Alpha topic")).toBeGreaterThan(0);
await memoryManager.sync({ force: true });
const secondStatus = memoryManager.status();
expect(secondStatus.chunks).toBeGreaterThan(0);
expect(countChunksContaining("Alpha topic")).toBeGreaterThan(0);
});
it("refreshes FTS-only indexed content after memory file updates", async () => {
const memoryManager = await createManager();
await memoryManager.sync({ force: true });
await fs.writeFile(
path.join(workspaceDir, "MEMORY.md"),
"Beta refresh marker\n\nUpdated memory content.",
);
await memoryManager.sync({ force: true });
expect(countChunksContaining("refresh marker")).toBeGreaterThan(0);
expect(countChunksContaining("Alpha topic")).toBe(0);
});
});