diff --git a/CHANGELOG.md b/CHANGELOG.md index e88bd0d4638..73fb9ac030a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman. - macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman. - Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD. +- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras. ### Breaking diff --git a/src/memory/embeddings-gemini.test.ts b/src/memory/embeddings-gemini.test.ts index 25d5f373431..f97cc6cb142 100644 --- a/src/memory/embeddings-gemini.test.ts +++ b/src/memory/embeddings-gemini.test.ts @@ -311,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => { expect(body.outputDimensionality).toBe(768); }); + it("sanitizes and normalizes embedQuery responses", async () => { + const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]); + }); + it("uses custom outputDimensionality for each embedBatch request", async () => { const fetchMock = createGeminiBatchFetchMock(2); vi.stubGlobal("fetch", fetchMock); @@ -333,6 +348,31 @@ describe("gemini-embedding-2-preview provider", () => { ]); }); + it("sanitizes and normalizes structured batch responses", async () => { + const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await expect( + provider.embedBatchInputs?.([ + { + text: "Image file: diagram.png", + parts: [ + { type: "text", text: "Image file: diagram.png" }, + { type: "inline-data", mimeType: "image/png", data: "img" }, + ], + }, + ]), + ).resolves.toEqual([[0, 0, 1]]); + }); + it("supports multimodal embedBatchInputs requests", async () => { const fetchMock = createGeminiBatchFetchMock(2); vi.stubGlobal("fetch", fetchMock); diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 1a0c6988121..23371056b18 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -340,6 +340,53 @@ describe("memory index", () => { await manager.close?.(); }); + it("reindexes a multimodal file after a transient mid-sync disappearance", async () => { + const mediaDir = path.join(workspaceDir, "media-race"); + const imagePath = path.join(mediaDir, "diagram.png"); + await fs.mkdir(mediaDir, { recursive: true }); + await fs.writeFile(imagePath, Buffer.from("png")); + + const cfg = createCfg({ + storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`), + provider: "gemini", + model: "gemini-embedding-2-preview", + extraPaths: [mediaDir], + multimodal: { enabled: true, modalities: ["image"] }, + }); + const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" })); + const realReadFile = fs.readFile.bind(fs); + let imageReads = 0; + const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => { + const [targetPath] = args; + if (typeof targetPath === "string" && targetPath === imagePath) { + imageReads += 1; + if (imageReads === 2) { + const err = Object.assign( + new Error(`ENOENT: no such file or directory, open '${imagePath}'`), + { + code: "ENOENT", + }, + ) as NodeJS.ErrnoException; + throw err; + } + } + return await realReadFile(...args); + }); + + await manager.sync({ reason: "test" }); + readSpy.mockRestore(); + + const callsAfterFirstSync = embedBatchInputCalls; + (manager as unknown as { dirty: boolean }).dirty = true; + await manager.sync({ reason: "test" }); + + expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync); + const results = await manager.search("image"); + expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true); + + await manager.close?.(); + }); + it("keeps dirty false in status-only manager after prior indexing", async () => { const cfg = createCfg({ storePath: indexStatusPath }); diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index 3f4521e12d1..49171d809cb 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -791,6 +791,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { .run(entry.path, source, entry.hash, entry.mtimeMs, entry.size); } + private deleteFileRecord(pathname: string, source: MemorySource): void { + this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source); + } + private isStructuredInputTooLargeError(message: string): boolean { return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test( message, @@ -816,6 +820,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { const multimodalChunk = await buildMultimodalChunkForIndexing(entry); if (!multimodalChunk) { this.clearIndexedFileData(entry.path, options.source); + this.deleteFileRecord(entry.path, options.source); return; } structuredInputBytes = multimodalChunk.structuredInputBytes;