mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-11 23:10:29 +00:00
Memory: revalidate multimodal files before indexing
This commit is contained in:
@@ -214,6 +214,28 @@ describe("buildFileEntry", () => {
|
||||
]);
|
||||
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("skips lazy multimodal indexing when the file grows after discovery", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
await fs.writeFile(target, Buffer.from("png"));
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
await fs.writeFile(target, Buffer.alloc(entry!.size + 32, 1));
|
||||
|
||||
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it("skips lazy multimodal indexing when file bytes change after discovery", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
await fs.writeFile(target, Buffer.from("png"));
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
await fs.writeFile(target, Buffer.from("gif"));
|
||||
|
||||
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdown", () => {
|
||||
|
||||
@@ -20,6 +20,7 @@ export type MemoryFileEntry = {
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
hash: string;
|
||||
dataHash?: string;
|
||||
kind?: "markdown" | "multimodal";
|
||||
contentText?: string;
|
||||
modality?: MemoryMultimodalModality;
|
||||
@@ -234,6 +235,7 @@ export async function buildFileEntry(
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash: chunkHash,
|
||||
dataHash,
|
||||
kind: "multimodal",
|
||||
contentText,
|
||||
modality,
|
||||
@@ -261,11 +263,26 @@ export async function buildFileEntry(
|
||||
}
|
||||
|
||||
async function loadMultimodalEmbeddingInput(
|
||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
|
||||
entry: Pick<
|
||||
MemoryFileEntry,
|
||||
"absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash"
|
||||
>,
|
||||
): Promise<EmbeddingInput | null> {
|
||||
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
||||
return null;
|
||||
}
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.stat(entry.absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
if (stat.size !== entry.size) {
|
||||
return null;
|
||||
}
|
||||
let buffer: Buffer;
|
||||
try {
|
||||
buffer = await fs.readFile(entry.absPath);
|
||||
@@ -275,6 +292,10 @@ async function loadMultimodalEmbeddingInput(
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||
if (entry.dataHash && entry.dataHash !== dataHash) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
text: entry.contentText,
|
||||
parts: [
|
||||
@@ -289,7 +310,10 @@ async function loadMultimodalEmbeddingInput(
|
||||
}
|
||||
|
||||
export async function buildMultimodalChunkForIndexing(
|
||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
|
||||
entry: Pick<
|
||||
MemoryFileEntry,
|
||||
"absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash"
|
||||
>,
|
||||
): Promise<MultimodalMemoryChunk | null> {
|
||||
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
||||
if (!embeddingInput) {
|
||||
|
||||
Reference in New Issue
Block a user