From da6f97a3f6ff7c3829d121409fe2ec3e6b3906b3 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Wed, 11 Mar 2026 22:50:32 +0000 Subject: [PATCH] Memory: revalidate multimodal files before indexing --- src/memory/internal.test.ts | 22 ++++++++++++++++++++++ src/memory/internal.ts | 28 ++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/memory/internal.test.ts b/src/memory/internal.test.ts index ec0f75f143b..d18120b413a 100644 --- a/src/memory/internal.test.ts +++ b/src/memory/internal.test.ts @@ -214,6 +214,28 @@ describe("buildFileEntry", () => { ]); expect(built?.structuredInputBytes).toBeGreaterThan(0); }); + + it("skips lazy multimodal indexing when the file grows after discovery", async () => { + const tmpDir = getTmpDir(); + const target = path.join(tmpDir, "diagram.png"); + await fs.writeFile(target, Buffer.from("png")); + + const entry = await buildFileEntry(target, tmpDir, multimodal); + await fs.writeFile(target, Buffer.alloc(entry!.size + 32, 1)); + + await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull(); + }); + + it("skips lazy multimodal indexing when file bytes change after discovery", async () => { + const tmpDir = getTmpDir(); + const target = path.join(tmpDir, "diagram.png"); + await fs.writeFile(target, Buffer.from("png")); + + const entry = await buildFileEntry(target, tmpDir, multimodal); + await fs.writeFile(target, Buffer.from("gif")); + + await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull(); + }); }); describe("chunkMarkdown", () => { diff --git a/src/memory/internal.ts b/src/memory/internal.ts index 96ce0e918ad..d1d7e9c2e96 100644 --- a/src/memory/internal.ts +++ b/src/memory/internal.ts @@ -20,6 +20,7 @@ export type MemoryFileEntry = { mtimeMs: number; size: number; hash: string; + dataHash?: string; kind?: "markdown" | "multimodal"; contentText?: string; modality?: MemoryMultimodalModality; @@ -234,6 +235,7 @@ export async function buildFileEntry( mtimeMs: stat.mtimeMs, size: stat.size, hash: chunkHash, + dataHash, kind: "multimodal", contentText, modality, @@ -261,11 +263,26 @@ export async function buildFileEntry( } async function loadMultimodalEmbeddingInput( - entry: Pick, + entry: Pick< + MemoryFileEntry, + "absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash" + >, ): Promise { if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) { return null; } + let stat; + try { + stat = await fs.stat(entry.absPath); + } catch (err) { + if (isFileMissingError(err)) { + return null; + } + throw err; + } + if (stat.size !== entry.size) { + return null; + } let buffer: Buffer; try { buffer = await fs.readFile(entry.absPath); @@ -275,6 +292,10 @@ async function loadMultimodalEmbeddingInput( } throw err; } + const dataHash = crypto.createHash("sha256").update(buffer).digest("hex"); + if (entry.dataHash && entry.dataHash !== dataHash) { + return null; + } return { text: entry.contentText, parts: [ @@ -289,7 +310,10 @@ async function loadMultimodalEmbeddingInput( } export async function buildMultimodalChunkForIndexing( - entry: Pick, + entry: Pick< + MemoryFileEntry, + "absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash" + >, ): Promise { const embeddingInput = await loadMultimodalEmbeddingInput(entry); if (!embeddingInput) {