memory: defer multimodal payload loading

This commit is contained in:
Gustavo Madeira Santana
2026-03-11 21:14:21 +00:00
parent 73c9e141a4
commit 78d5dcfb77
6 changed files with 78 additions and 25 deletions

View File

@@ -155,6 +155,29 @@ describe("memory search config", () => {
});
});
it("keeps an explicit empty multimodal modalities list empty", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
multimodal: {
enabled: true,
modalities: [],
},
},
},
},
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.multimodal).toEqual({
enabled: true,
modalities: [],
maxFileBytes: 10 * 1024 * 1024,
});
});
it("rejects multimodal memory on unsupported providers", () => {
const cfg = asConfig({
agents: {

View File

@@ -6,6 +6,7 @@ import {
buildFileEntry,
chunkMarkdown,
listMemoryFiles,
loadMultimodalEmbeddingInput,
normalizeExtraMemoryPaths,
remapChunkLines,
} from "./internal.js";
@@ -197,7 +198,18 @@ describe("buildFileEntry", () => {
mimeType: "image/png",
contentText: "Image file: diagram.png",
});
expect(entry?.embeddingInput?.parts).toEqual([
expect(entry?.embeddingInput).toBeUndefined();
});
it("loads multimodal embedding input lazily", async () => {
const tmpDir = getTmpDir();
const target = path.join(tmpDir, "diagram.png");
await fs.writeFile(target, Buffer.from("png"));
const entry = await buildFileEntry(target, tmpDir, multimodal);
const input = await loadMultimodalEmbeddingInput(entry!);
expect(input?.parts).toEqual([
{ type: "text", text: "Image file: diagram.png" },
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
]);

View File

@@ -8,7 +8,6 @@ import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs
import { isFileMissingError } from "./fs-utils.js";
import {
classifyMemoryMultimodalPath,
isMemoryMultimodalEnabled,
type MemoryMultimodalModality,
type MemoryMultimodalSettings,
} from "./multimodal.js";
@@ -21,7 +20,6 @@ export type MemoryFileEntry = {
hash: string;
kind?: "markdown" | "multimodal";
contentText?: string;
embeddingInput?: EmbeddingInput;
modality?: MemoryMultimodalModality;
mimeType?: string;
};
@@ -197,9 +195,6 @@ export async function buildFileEntry(
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
if (modality) {
if (!isMemoryMultimodalEnabled(multimodalSettings)) {
return null;
}
if (stat.size > multimodalSettings.maxFileBytes) {
return null;
}
@@ -217,17 +212,6 @@ export async function buildFileEntry(
return null;
}
const contentText = `${modality === "image" ? "Image" : "Audio"} file: ${normalizedPath}`;
const embeddingInput: EmbeddingInput = {
text: contentText,
parts: [
{ type: "text", text: contentText },
{
type: "inline-data",
mimeType,
data: buffer.toString("base64"),
},
],
};
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
const chunkHash = hashText(
JSON.stringify({
@@ -245,7 +229,6 @@ export async function buildFileEntry(
hash: chunkHash,
kind: "multimodal",
contentText,
embeddingInput,
modality,
mimeType,
};
@@ -270,6 +253,34 @@ export async function buildFileEntry(
};
}
export async function loadMultimodalEmbeddingInput(
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
): Promise<EmbeddingInput | null> {
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(entry.absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
return {
text: entry.contentText,
parts: [
{ type: "text", text: entry.contentText },
{
type: "inline-data",
mimeType: entry.mimeType,
data: buffer.toString("base64"),
},
],
};
}
export function chunkMarkdown(
content: string,
chunking: { tokens: number; overlap: number },

View File

@@ -17,6 +17,7 @@ import { buildGeminiEmbeddingRequest } from "./embeddings-gemini.js";
import {
chunkMarkdown,
hashText,
loadMultimodalEmbeddingInput,
parseEmbedding,
remapChunkLines,
type MemoryChunk,
@@ -771,14 +772,18 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
}
let chunks: MemoryChunk[];
if ("kind" in entry && entry.kind === "multimodal" && entry.embeddingInput) {
if ("kind" in entry && entry.kind === "multimodal") {
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
if (!embeddingInput) {
return;
}
chunks = [
{
startLine: 1,
endLine: 1,
text: entry.contentText ?? entry.embeddingInput.text,
text: entry.contentText ?? embeddingInput.text,
hash: entry.hash,
embeddingInput: entry.embeddingInput,
embeddingInput,
},
];
} else {

View File

@@ -671,10 +671,12 @@ export abstract class MemoryManagerSyncOps {
this.settings.multimodal,
);
const fileEntries = (
await Promise.all(
files.map(async (file) =>
buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
await runWithConcurrency(
files.map(
(file) => async () =>
await buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
),
this.getIndexConcurrency(),
)
).filter((entry): entry is MemoryFileEntry => entry !== null);
log.debug("memory sync: indexing memory files", {

View File

@@ -17,7 +17,7 @@ const AUDIO_EXTENSIONS = new Set([".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac
export function normalizeMemoryMultimodalModalities(
raw: MemoryMultimodalSelection[] | undefined,
): MemoryMultimodalModality[] {
if (!raw?.length || raw.includes("all")) {
if (raw === undefined || raw.includes("all")) {
return [...MEMORY_MULTIMODAL_MODALITIES];
}
const normalized = new Set<MemoryMultimodalModality>();