From 87463eee46ab71be338e0b09db6b138d078dfe98 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Wed, 11 Mar 2026 21:46:56 +0000 Subject: [PATCH] memory: harden multimodal indexing failures --- docs/concepts/memory.md | 1 + src/config/schema.help.ts | 4 +- src/memory/index.test.ts | 37 ++++++++-- src/memory/manager-embedding-ops.ts | 101 +++++++++++++++++++--------- 4 files changed, 105 insertions(+), 38 deletions(-) diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index 99519be871c..8ed755b394c 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -318,6 +318,7 @@ Notes: - Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`. - Supported modalities in this phase: image and audio. - `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled. +- Matching image/audio file bytes are uploaded to the configured Gemini embedding endpoint during indexing. - Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`. - Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`. - Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings. diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 5370853589e..3db7f40fe73 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -780,9 +780,9 @@ export const FIELD_HELP: Record = { "agents.defaults.memorySearch.extraPaths": "Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.", "agents.defaults.memorySearch.multimodal": - 'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled.', + 'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled. Matching files are uploaded to the configured remote embedding provider during indexing.', "agents.defaults.memorySearch.multimodal.enabled": - "Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, and disables memory-search fallback providers.", + "Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, disables memory-search fallback providers, and uploads matching binary content to the configured remote embedding provider.", "agents.defaults.memorySearch.multimodal.modalities": 'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.', "agents.defaults.memorySearch.multimodal.maxFileBytes": diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index c06eb703cbe..1a0c6988121 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -48,15 +48,19 @@ vi.mock("./embeddings.js", () => { inputs: Array<{ text: string; parts?: Array< - { type: "text"; text: string } | { type: "inline-data"; mimeType: string } + | { type: "text"; text: string } + | { type: "inline-data"; mimeType: string; data: string } >; }>, ) => { embedBatchInputCalls += 1; return inputs.map((input) => { - const mimeType = input.parts?.find( - (part) => part.type === "inline-data", - )?.mimeType; + const inlineData = input.parts?.find((part) => part.type === "inline-data"); + if (inlineData?.type === "inline-data" && inlineData.data.length > 9000) { + throw new Error("payload too large"); + } + const mimeType = + inlineData?.type === "inline-data" ? inlineData.mimeType : undefined; if (mimeType?.startsWith("image/")) { return [0, 0, 1, 0]; } @@ -311,6 +315,31 @@ describe("memory index", () => { expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true); }); + it("skips oversized multimodal inputs without aborting sync", async () => { + const mediaDir = path.join(workspaceDir, "media-oversize"); + await fs.mkdir(mediaDir, { recursive: true }); + await fs.writeFile(path.join(mediaDir, "huge.png"), Buffer.alloc(7000, 1)); + + const cfg = createCfg({ + storePath: path.join(workspaceDir, `index-oversize-${randomUUID()}.sqlite`), + provider: "gemini", + model: "gemini-embedding-2-preview", + extraPaths: [mediaDir], + multimodal: { enabled: true, modalities: ["image"] }, + }); + const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" })); + await manager.sync({ reason: "test" }); + + expect(embedBatchInputCalls).toBeGreaterThan(0); + const imageResults = await manager.search("image"); + expect(imageResults.some((result) => result.path.endsWith("huge.png"))).toBe(false); + + const alphaResults = await manager.search("alpha"); + expect(alphaResults.some((result) => result.path.endsWith("memory/2026-01-12.md"))).toBe(true); + + await manager.close?.(); + }); + it("keeps dirty false in status-only manager after prior indexing", async () => { const cfg = createCfg({ storePath: indexStatusPath }); diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index dadaadb513f..2487cfa973c 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -758,6 +758,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY; } + private clearIndexedFileData(pathname: string, source: MemorySource): void { + if (this.vector.enabled) { + try { + this.db + .prepare( + `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`, + ) + .run(pathname, source); + } catch {} + } + if (this.fts.enabled && this.fts.available && this.provider) { + try { + this.db + .prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`) + .run(pathname, source, this.provider.model); + } catch {} + } + this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(pathname, source); + } + + private upsertFileRecord(entry: MemoryFileEntry | SessionFileEntry, source: MemorySource): void { + this.db + .prepare( + `INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?) + ON CONFLICT(path) DO UPDATE SET + source=excluded.source, + hash=excluded.hash, + mtime=excluded.mtime, + size=excluded.size`, + ) + .run(entry.path, source, entry.hash, entry.mtimeMs, entry.size); + } + + private isStructuredInputTooLargeError(message: string): boolean { + return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test( + message, + ); + } + protected async indexFile( entry: MemoryFileEntry | SessionFileEntry, options: { source: MemorySource; content?: string }, @@ -772,11 +811,14 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { } let chunks: MemoryChunk[]; + let structuredInputBytes: number | undefined; if ("kind" in entry && entry.kind === "multimodal") { const embeddingInput = await loadMultimodalEmbeddingInput(entry); if (!embeddingInput) { + this.clearIndexedFileData(entry.path, options.source); return; } + structuredInputBytes = estimateStructuredEmbeddingInputBytes(embeddingInput); chunks = [ { startLine: 1, @@ -799,31 +841,35 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { remapChunkLines(chunks, entry.lineMap); } } - const embeddings = this.batch.enabled - ? await this.embedChunksWithBatch(chunks, entry, options.source) - : await this.embedChunksInBatches(chunks); + let embeddings: number[][]; + try { + embeddings = this.batch.enabled + ? await this.embedChunksWithBatch(chunks, entry, options.source) + : await this.embedChunksInBatches(chunks); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if ( + "kind" in entry && + entry.kind === "multimodal" && + this.isStructuredInputTooLargeError(message) + ) { + log.warn("memory embeddings: skipping multimodal file rejected as too large", { + path: entry.path, + bytes: structuredInputBytes, + provider: this.provider.id, + model: this.provider.model, + error: message, + }); + this.clearIndexedFileData(entry.path, options.source); + this.upsertFileRecord(entry, options.source); + return; + } + throw err; + } const sample = embeddings.find((embedding) => embedding.length > 0); const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false; const now = Date.now(); - if (vectorReady) { - try { - this.db - .prepare( - `DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`, - ) - .run(entry.path, options.source); - } catch {} - } - if (this.fts.enabled && this.fts.available) { - try { - this.db - .prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`) - .run(entry.path, options.source, this.provider.model); - } catch {} - } - this.db - .prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`) - .run(entry.path, options.source); + this.clearIndexedFileData(entry.path, options.source); for (let i = 0; i < chunks.length; i++) { const chunk = chunks[i]; const embedding = embeddings[i] ?? []; @@ -878,15 +924,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { ); } } - this.db - .prepare( - `INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?) - ON CONFLICT(path) DO UPDATE SET - source=excluded.source, - hash=excluded.hash, - mtime=excluded.mtime, - size=excluded.size`, - ) - .run(entry.path, options.source, entry.hash, entry.mtimeMs, entry.size); + this.upsertFileRecord(entry, options.source); } }