mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
memory: fix multimodal reindex race
This commit is contained in:
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
||||
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
|
||||
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
|
||||
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
|
||||
- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras.
|
||||
|
||||
### Breaking
|
||||
|
||||
|
||||
@@ -311,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => {
|
||||
expect(body.outputDimensionality).toBe(768);
|
||||
});
|
||||
|
||||
it("sanitizes and normalizes embedQuery responses", async () => {
|
||||
const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockResolvedProviderKey();
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]);
|
||||
});
|
||||
|
||||
it("uses custom outputDimensionality for each embedBatch request", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(2);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
@@ -333,6 +348,31 @@ describe("gemini-embedding-2-preview provider", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("sanitizes and normalizes structured batch responses", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockResolvedProviderKey();
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(
|
||||
provider.embedBatchInputs?.([
|
||||
{
|
||||
text: "Image file: diagram.png",
|
||||
parts: [
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||
],
|
||||
},
|
||||
]),
|
||||
).resolves.toEqual([[0, 0, 1]]);
|
||||
});
|
||||
|
||||
it("supports multimodal embedBatchInputs requests", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(2);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
@@ -340,6 +340,53 @@ describe("memory index", () => {
|
||||
await manager.close?.();
|
||||
});
|
||||
|
||||
it("reindexes a multimodal file after a transient mid-sync disappearance", async () => {
|
||||
const mediaDir = path.join(workspaceDir, "media-race");
|
||||
const imagePath = path.join(mediaDir, "diagram.png");
|
||||
await fs.mkdir(mediaDir, { recursive: true });
|
||||
await fs.writeFile(imagePath, Buffer.from("png"));
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`),
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
});
|
||||
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
|
||||
const realReadFile = fs.readFile.bind(fs);
|
||||
let imageReads = 0;
|
||||
const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => {
|
||||
const [targetPath] = args;
|
||||
if (typeof targetPath === "string" && targetPath === imagePath) {
|
||||
imageReads += 1;
|
||||
if (imageReads === 2) {
|
||||
const err = Object.assign(
|
||||
new Error(`ENOENT: no such file or directory, open '${imagePath}'`),
|
||||
{
|
||||
code: "ENOENT",
|
||||
},
|
||||
) as NodeJS.ErrnoException;
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
return await realReadFile(...args);
|
||||
});
|
||||
|
||||
await manager.sync({ reason: "test" });
|
||||
readSpy.mockRestore();
|
||||
|
||||
const callsAfterFirstSync = embedBatchInputCalls;
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
await manager.sync({ reason: "test" });
|
||||
|
||||
expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync);
|
||||
const results = await manager.search("image");
|
||||
expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||
|
||||
await manager.close?.();
|
||||
});
|
||||
|
||||
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
||||
const cfg = createCfg({ storePath: indexStatusPath });
|
||||
|
||||
|
||||
@@ -791,6 +791,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
|
||||
}
|
||||
|
||||
private deleteFileRecord(pathname: string, source: MemorySource): void {
|
||||
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source);
|
||||
}
|
||||
|
||||
private isStructuredInputTooLargeError(message: string): boolean {
|
||||
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
|
||||
message,
|
||||
@@ -816,6 +820,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
|
||||
if (!multimodalChunk) {
|
||||
this.clearIndexedFileData(entry.path, options.source);
|
||||
this.deleteFileRecord(entry.path, options.source);
|
||||
return;
|
||||
}
|
||||
structuredInputBytes = multimodalChunk.structuredInputBytes;
|
||||
|
||||
Reference in New Issue
Block a user