mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Memory: revalidate multimodal files before indexing
This commit is contained in:
@@ -214,6 +214,28 @@ describe("buildFileEntry", () => {
|
|||||||
]);
|
]);
|
||||||
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("skips lazy multimodal indexing when the file grows after discovery", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const target = path.join(tmpDir, "diagram.png");
|
||||||
|
await fs.writeFile(target, Buffer.from("png"));
|
||||||
|
|
||||||
|
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||||
|
await fs.writeFile(target, Buffer.alloc(entry!.size + 32, 1));
|
||||||
|
|
||||||
|
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips lazy multimodal indexing when file bytes change after discovery", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const target = path.join(tmpDir, "diagram.png");
|
||||||
|
await fs.writeFile(target, Buffer.from("png"));
|
||||||
|
|
||||||
|
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||||
|
await fs.writeFile(target, Buffer.from("gif"));
|
||||||
|
|
||||||
|
await expect(buildMultimodalChunkForIndexing(entry!)).resolves.toBeNull();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("chunkMarkdown", () => {
|
describe("chunkMarkdown", () => {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ export type MemoryFileEntry = {
|
|||||||
mtimeMs: number;
|
mtimeMs: number;
|
||||||
size: number;
|
size: number;
|
||||||
hash: string;
|
hash: string;
|
||||||
|
dataHash?: string;
|
||||||
kind?: "markdown" | "multimodal";
|
kind?: "markdown" | "multimodal";
|
||||||
contentText?: string;
|
contentText?: string;
|
||||||
modality?: MemoryMultimodalModality;
|
modality?: MemoryMultimodalModality;
|
||||||
@@ -234,6 +235,7 @@ export async function buildFileEntry(
|
|||||||
mtimeMs: stat.mtimeMs,
|
mtimeMs: stat.mtimeMs,
|
||||||
size: stat.size,
|
size: stat.size,
|
||||||
hash: chunkHash,
|
hash: chunkHash,
|
||||||
|
dataHash,
|
||||||
kind: "multimodal",
|
kind: "multimodal",
|
||||||
contentText,
|
contentText,
|
||||||
modality,
|
modality,
|
||||||
@@ -261,11 +263,26 @@ export async function buildFileEntry(
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function loadMultimodalEmbeddingInput(
|
async function loadMultimodalEmbeddingInput(
|
||||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
|
entry: Pick<
|
||||||
|
MemoryFileEntry,
|
||||||
|
"absPath" | "contentText" | "mimeType" | "kind" | "size" | "dataHash"
|
||||||
|
>,
|
||||||
): Promise<EmbeddingInput | null> {
|
): Promise<EmbeddingInput | null> {
|
||||||
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
let stat;
|
||||||
|
try {
|
||||||
|
stat = await fs.stat(entry.absPath);
|
||||||
|
} catch (err) {
|
||||||
|
if (isFileMissingError(err)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
if (stat.size !== entry.size) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
let buffer: Buffer;
|
let buffer: Buffer;
|
||||||
try {
|
try {
|
||||||
buffer = await fs.readFile(entry.absPath);
|
buffer = await fs.readFile(entry.absPath);
|
||||||
@@ -275,6 +292,10 @@ async function loadMultimodalEmbeddingInput(
|
|||||||
}
|
}
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||||
|
if (entry.dataHash && entry.dataHash !== dataHash) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
text: entry.contentText,
|
text: entry.contentText,
|
||||||
parts: [
|
parts: [
|
||||||
@@ -289,7 +310,10 @@ async function loadMultimodalEmbeddingInput(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function buildMultimodalChunkForIndexing(
|
export async function buildMultimodalChunkForIndexing(
|
||||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
|
entry: Pick<
|
||||||
|
MemoryFileEntry,
|
||||||
|
"absPath" | "contentText" | "mimeType" | "kind" | "hash" | "size" | "dataHash"
|
||||||
|
>,
|
||||||
): Promise<MultimodalMemoryChunk | null> {
|
): Promise<MultimodalMemoryChunk | null> {
|
||||||
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
||||||
if (!embeddingInput) {
|
if (!embeddingInput) {
|
||||||
|
|||||||
Reference in New Issue
Block a user