memory: fix multimodal reindex race

This commit is contained in:
Gustavo Madeira Santana
2026-03-11 22:21:13 +00:00
parent b980c086fb
commit 26c89ebff4
4 changed files with 93 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras.
### Breaking

View File

@@ -311,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => {
expect(body.outputDimensionality).toBe(768);
});
it("sanitizes and normalizes embedQuery responses", async () => {
const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]);
vi.stubGlobal("fetch", fetchMock);
mockResolvedProviderKey();
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
});
await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]);
});
it("uses custom outputDimensionality for each embedBatch request", async () => {
const fetchMock = createGeminiBatchFetchMock(2);
vi.stubGlobal("fetch", fetchMock);
@@ -333,6 +348,31 @@ describe("gemini-embedding-2-preview provider", () => {
]);
});
it("sanitizes and normalizes structured batch responses", async () => {
const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]);
vi.stubGlobal("fetch", fetchMock);
mockResolvedProviderKey();
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
});
await expect(
provider.embedBatchInputs?.([
{
text: "Image file: diagram.png",
parts: [
{ type: "text", text: "Image file: diagram.png" },
{ type: "inline-data", mimeType: "image/png", data: "img" },
],
},
]),
).resolves.toEqual([[0, 0, 1]]);
});
it("supports multimodal embedBatchInputs requests", async () => {
const fetchMock = createGeminiBatchFetchMock(2);
vi.stubGlobal("fetch", fetchMock);

View File

@@ -340,6 +340,53 @@ describe("memory index", () => {
await manager.close?.();
});
it("reindexes a multimodal file after a transient mid-sync disappearance", async () => {
const mediaDir = path.join(workspaceDir, "media-race");
const imagePath = path.join(mediaDir, "diagram.png");
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(imagePath, Buffer.from("png"));
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`),
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image"] },
});
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
const realReadFile = fs.readFile.bind(fs);
let imageReads = 0;
const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => {
const [targetPath] = args;
if (typeof targetPath === "string" && targetPath === imagePath) {
imageReads += 1;
if (imageReads === 2) {
const err = Object.assign(
new Error(`ENOENT: no such file or directory, open '${imagePath}'`),
{
code: "ENOENT",
},
) as NodeJS.ErrnoException;
throw err;
}
}
return await realReadFile(...args);
});
await manager.sync({ reason: "test" });
readSpy.mockRestore();
const callsAfterFirstSync = embedBatchInputCalls;
(manager as unknown as { dirty: boolean }).dirty = true;
await manager.sync({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync);
const results = await manager.search("image");
expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
await manager.close?.();
});
it("keeps dirty false in status-only manager after prior indexing", async () => {
const cfg = createCfg({ storePath: indexStatusPath });

View File

@@ -791,6 +791,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
}
private deleteFileRecord(pathname: string, source: MemorySource): void {
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source);
}
private isStructuredInputTooLargeError(message: string): boolean {
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
message,
@@ -816,6 +820,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
if (!multimodalChunk) {
this.clearIndexedFileData(entry.path, options.source);
this.deleteFileRecord(entry.path, options.source);
return;
}
structuredInputBytes = multimodalChunk.structuredInputBytes;