Memory: revalidate multimodal files before indexing

This commit is contained in:
Gustavo Madeira Santana
2026-03-11 22:50:32 +00:00
parent 453c8d7c1b
commit da6f97a3f6
2 changed files with 48 additions and 2 deletions

View File

@@ -214,6 +214,28 @@ describe("buildFileEntry", () => {
]);
expect(built?.structuredInputBytes).toBeGreaterThan(0);
});
it("skips lazy multimodal indexing when the file grows after discovery", async () => {
const tmpDir = getTmpDir();
const target = path.join(tmpDir, "diagram.png");
await fs.writeFile(target, Buffer.from("png"));
const entry = await buildFileEntry(target, tmpDir, multimodal);
await fs.writeFile(target, Buffer.alloc(entry!.size + 32, 1));
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
});
it("skips lazy multimodal indexing when file bytes change after discovery", async () => {
const tmpDir = getTmpDir();
const target = path.join(tmpDir, "diagram.png");
await fs.writeFile(target, Buffer.from("png"));
const entry = await buildFileEntry(target, tmpDir, multimodal);
await fs.writeFile(target, Buffer.from("gif"));
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
});
});
describe("chunkMarkdown", () => {

View File

@@ -20,6 +20,7 @@ export type MemoryFileEntry = {
mtimeMs: number;
size: number;
hash: string;
dataHash?: string;
kind?: "markdown" | "multimodal";
contentText?: string;
modality?: MemoryMultimodalModality;
@@ -234,6 +235,7 @@ export async function buildFileEntry(
mtimeMs: stat.mtimeMs,
size: stat.size,
hash: chunkHash,
dataHash,
kind: "multimodal",
contentText,
modality,
@@ -261,11 +263,26 @@ export async function buildFileEntry(
}
async function loadMultimodalEmbeddingInput(
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
entry: Pick<
MemoryFileEntry,
"absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash"
>,
): Promise<EmbeddingInput | null> {
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
return null;
}
let stat;
try {
stat = await fs.stat(entry.absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
if (stat.size !== entry.size) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(entry.absPath);
@@ -275,6 +292,10 @@ async function loadMultimodalEmbeddingInput(
}
throw err;
}
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
if (entry.dataHash && entry.dataHash !== dataHash) {
return null;
}
return {
text: entry.contentText,
parts: [
@@ -289,7 +310,10 @@ async function loadMultimodalEmbeddingInput(
}
export async function buildMultimodalChunkForIndexing(
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
entry: Pick<
MemoryFileEntry,
"absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash"
>,
): Promise<MultimodalMemoryChunk | null> {
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
if (!embeddingInput) {