memory: harden multimodal indexing failures

This commit is contained in:
Gustavo Madeira Santana
2026-03-11 21:46:56 +00:00
parent 985efee0d8
commit 87463eee46
4 changed files with 105 additions and 38 deletions

View File

@@ -318,6 +318,7 @@ Notes:
- Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`.
- Supported modalities in this phase: image and audio.
- `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled.
- Matching image/audio file bytes are uploaded to the configured Gemini embedding endpoint during indexing.
- Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`.
- Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`.
- Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings.

View File

@@ -780,9 +780,9 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.memorySearch.extraPaths":
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
"agents.defaults.memorySearch.multimodal":
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled.',
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled. Matching files are uploaded to the configured remote embedding provider during indexing.',
"agents.defaults.memorySearch.multimodal.enabled":
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, and disables memory-search fallback providers.",
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, disables memory-search fallback providers, and uploads matching binary content to the configured remote embedding provider.",
"agents.defaults.memorySearch.multimodal.modalities":
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
"agents.defaults.memorySearch.multimodal.maxFileBytes":

View File

@@ -48,15 +48,19 @@ vi.mock("./embeddings.js", () => {
inputs: Array<{
text: string;
parts?: Array<
{ type: "text"; text: string } | { type: "inline-data"; mimeType: string }
| { type: "text"; text: string }
| { type: "inline-data"; mimeType: string; data: string }
>;
}>,
) => {
embedBatchInputCalls += 1;
return inputs.map((input) => {
const mimeType = input.parts?.find(
(part) => part.type === "inline-data",
)?.mimeType;
const inlineData = input.parts?.find((part) => part.type === "inline-data");
if (inlineData?.type === "inline-data" && inlineData.data.length > 9000) {
throw new Error("payload too large");
}
const mimeType =
inlineData?.type === "inline-data" ? inlineData.mimeType : undefined;
if (mimeType?.startsWith("image/")) {
return [0, 0, 1, 0];
}
@@ -311,6 +315,31 @@ describe("memory index", () => {
expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true);
});
it("skips oversized multimodal inputs without aborting sync", async () => {
const mediaDir = path.join(workspaceDir, "media-oversize");
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(path.join(mediaDir, "huge.png"), Buffer.alloc(7000, 1));
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-oversize-${randomUUID()}.sqlite`),
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image"] },
});
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
await manager.sync({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(0);
const imageResults = await manager.search("image");
expect(imageResults.some((result) => result.path.endsWith("huge.png"))).toBe(false);
const alphaResults = await manager.search("alpha");
expect(alphaResults.some((result) => result.path.endsWith("memory/2026-01-12.md"))).toBe(true);
await manager.close?.();
});
it("keeps dirty false in status-only manager after prior indexing", async () => {
const cfg = createCfg({ storePath: indexStatusPath });

View File

@@ -758,6 +758,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY;
}
private clearIndexedFileData(pathname: string, source: MemorySource): void {
if (this.vector.enabled) {
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(pathname, source);
} catch {}
}
if (this.fts.enabled && this.fts.available && this.provider) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(pathname, source, this.provider.model);
} catch {}
}
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(pathname, source);
}
private upsertFileRecord(entry: MemoryFileEntry | SessionFileEntry, source: MemorySource): void {
this.db
.prepare(
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
source=excluded.source,
hash=excluded.hash,
mtime=excluded.mtime,
size=excluded.size`,
)
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
}
private isStructuredInputTooLargeError(message: string): boolean {
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
message,
);
}
protected async indexFile(
entry: MemoryFileEntry | SessionFileEntry,
options: { source: MemorySource; content?: string },
@@ -772,11 +811,14 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
}
let chunks: MemoryChunk[];
let structuredInputBytes: number | undefined;
if ("kind" in entry && entry.kind === "multimodal") {
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
if (!embeddingInput) {
this.clearIndexedFileData(entry.path, options.source);
return;
}
structuredInputBytes = estimateStructuredEmbeddingInputBytes(embeddingInput);
chunks = [
{
startLine: 1,
@@ -799,31 +841,35 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
remapChunkLines(chunks, entry.lineMap);
}
}
const embeddings = this.batch.enabled
? await this.embedChunksWithBatch(chunks, entry, options.source)
: await this.embedChunksInBatches(chunks);
let embeddings: number[][];
try {
embeddings = this.batch.enabled
? await this.embedChunksWithBatch(chunks, entry, options.source)
: await this.embedChunksInBatches(chunks);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (
"kind" in entry &&
entry.kind === "multimodal" &&
this.isStructuredInputTooLargeError(message)
) {
log.warn("memory embeddings: skipping multimodal file rejected as too large", {
path: entry.path,
bytes: structuredInputBytes,
provider: this.provider.id,
model: this.provider.model,
error: message,
});
this.clearIndexedFileData(entry.path, options.source);
this.upsertFileRecord(entry, options.source);
return;
}
throw err;
}
const sample = embeddings.find((embedding) => embedding.length > 0);
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
const now = Date.now();
if (vectorReady) {
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(entry.path, options.source);
} catch {}
}
if (this.fts.enabled && this.fts.available) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(entry.path, options.source, this.provider.model);
} catch {}
}
this.db
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
.run(entry.path, options.source);
this.clearIndexedFileData(entry.path, options.source);
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const embedding = embeddings[i] ?? [];
@@ -878,15 +924,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
);
}
}
this.db
.prepare(
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
source=excluded.source,
hash=excluded.hash,
mtime=excluded.mtime,
size=excluded.size`,
)
.run(entry.path, options.source, entry.hash, entry.mtimeMs, entry.size);
this.upsertFileRecord(entry, options.source);
}
}