Memory: add multimodal image and audio indexing (#43460)

Merged via squash.

Prepared head SHA: a994c07190
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Gustavo Madeira Santana
2026-03-11 22:28:34 +00:00
committed by GitHub
parent 20d097ac2f
commit d79ca52960
23 changed files with 1295 additions and 178 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras.
### Breaking

View File

@@ -284,9 +284,46 @@ Notes:
- Paths can be absolute or workspace-relative.
- Directories are scanned recursively for `.md` files.
- Only Markdown files are indexed.
- By default, only Markdown files are indexed.
- If `memorySearch.multimodal.enabled = true`, OpenClaw also indexes supported image/audio files under `extraPaths` only. Default memory roots (`MEMORY.md`, `memory.md`, `memory/**/*.md`) stay Markdown-only.
- Symlinks are ignored (files or directories).
### Multimodal memory files (Gemini image + audio)
OpenClaw can index image and audio files from `memorySearch.extraPaths` when using Gemini embedding 2:
```json5
agents: {
defaults: {
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: ["assets/reference", "voice-notes"],
multimodal: {
enabled: true,
modalities: ["image", "audio"], // or ["all"]
maxFileBytes: 10000000
},
remote: {
apiKey: "YOUR_GEMINI_API_KEY"
}
}
}
}
```
Notes:
- Multimodal memory is currently supported only for `gemini-embedding-2-preview`.
- Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`.
- Supported modalities in this phase: image and audio.
- `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled.
- Matching image/audio file bytes are uploaded to the configured Gemini embedding endpoint during indexing.
- Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`.
- Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`.
- Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings.
- `memory_get` still reads Markdown only; binary files are searchable but not returned as raw file contents.
### Gemini embeddings (native)
Set the provider to `gemini` to use the Gemini embeddings API directly:

View File

@@ -131,6 +131,113 @@ describe("memory search config", () => {
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
});
it("normalizes multimodal settings", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
multimodal: {
enabled: true,
modalities: ["all"],
maxFileBytes: 8192,
},
},
},
},
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.multimodal).toEqual({
enabled: true,
modalities: ["image", "audio"],
maxFileBytes: 8192,
});
});
it("keeps an explicit empty multimodal modalities list empty", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
multimodal: {
enabled: true,
modalities: [],
},
},
},
},
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.multimodal).toEqual({
enabled: true,
modalities: [],
maxFileBytes: 10 * 1024 * 1024,
});
expect(resolved?.provider).toBe("gemini");
});
it("does not enforce multimodal provider validation when no modalities are active", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "openai",
model: "text-embedding-3-small",
fallback: "openai",
multimodal: {
enabled: true,
modalities: [],
},
},
},
},
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.multimodal).toEqual({
enabled: true,
modalities: [],
maxFileBytes: 10 * 1024 * 1024,
});
});
it("rejects multimodal memory on unsupported providers", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "openai",
model: "text-embedding-3-small",
multimodal: { enabled: true, modalities: ["image"] },
},
},
},
});
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
/memorySearch\.multimodal requires memorySearch\.provider = "gemini"/,
);
});
it("rejects multimodal memory when fallback is configured", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "openai",
multimodal: { enabled: true, modalities: ["image"] },
},
},
},
});
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
/memorySearch\.multimodal does not support memorySearch\.fallback/,
);
});
it("includes batch defaults for openai without remote overrides", () => {
const cfg = configWithDefaultProvider("openai");
const resolved = resolveMemorySearchConfig(cfg, "main");

View File

@@ -3,6 +3,12 @@ import path from "node:path";
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
import { resolveStateDir } from "../config/paths.js";
import type { SecretInput } from "../config/types.secrets.js";
import {
isMemoryMultimodalEnabled,
normalizeMemoryMultimodalSettings,
supportsMemoryMultimodalEmbeddings,
type MemoryMultimodalSettings,
} from "../memory/multimodal.js";
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
import { resolveAgentConfig } from "./agent-scope.js";
@@ -10,6 +16,7 @@ export type ResolvedMemorySearchConfig = {
enabled: boolean;
sources: Array<"memory" | "sessions">;
extraPaths: string[];
multimodal: MemoryMultimodalSettings;
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
remote?: {
baseUrl?: string;
@@ -204,6 +211,11 @@ function mergeConfig(
.map((value) => value.trim())
.filter(Boolean);
const extraPaths = Array.from(new Set(rawPaths));
const multimodal = normalizeMemoryMultimodalSettings({
enabled: overrides?.multimodal?.enabled ?? defaults?.multimodal?.enabled,
modalities: overrides?.multimodal?.modalities ?? defaults?.multimodal?.modalities,
maxFileBytes: overrides?.multimodal?.maxFileBytes ?? defaults?.multimodal?.maxFileBytes,
});
const vector = {
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
extensionPath:
@@ -307,6 +319,7 @@ function mergeConfig(
enabled,
sources,
extraPaths,
multimodal,
provider,
remote,
experimental: {
@@ -365,5 +378,22 @@ export function resolveMemorySearchConfig(
if (!resolved.enabled) {
return null;
}
const multimodalActive = isMemoryMultimodalEnabled(resolved.multimodal);
if (
multimodalActive &&
!supportsMemoryMultimodalEmbeddings({
provider: resolved.provider,
model: resolved.model,
})
) {
throw new Error(
'agents.*.memorySearch.multimodal requires memorySearch.provider = "gemini" and model = "gemini-embedding-2-preview".',
);
}
if (multimodalActive && resolved.fallback !== "none") {
throw new Error(
'agents.*.memorySearch.multimodal does not support memorySearch.fallback. Set fallback to "none".',
);
}
return resolved;
}

View File

@@ -72,6 +72,10 @@ const TARGET_KEYS = [
"agents.defaults.memorySearch.fallback",
"agents.defaults.memorySearch.sources",
"agents.defaults.memorySearch.extraPaths",
"agents.defaults.memorySearch.multimodal",
"agents.defaults.memorySearch.multimodal.enabled",
"agents.defaults.memorySearch.multimodal.modalities",
"agents.defaults.memorySearch.multimodal.maxFileBytes",
"agents.defaults.memorySearch.experimental.sessionMemory",
"agents.defaults.memorySearch.remote.baseUrl",
"agents.defaults.memorySearch.remote.apiKey",

View File

@@ -778,7 +778,15 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.memorySearch.sources":
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
"agents.defaults.memorySearch.extraPaths":
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; keep paths small and intentional to avoid noisy recall.",
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
"agents.defaults.memorySearch.multimodal":
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled. Matching files are uploaded to the configured remote embedding provider during indexing.',
"agents.defaults.memorySearch.multimodal.enabled":
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, disables memory-search fallback providers, and uploads matching binary content to the configured remote embedding provider.",
"agents.defaults.memorySearch.multimodal.modalities":
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
"agents.defaults.memorySearch.multimodal.maxFileBytes":
"Sets the maximum bytes allowed per multimodal file before it is skipped during memory indexing. Use this to cap upload cost and indexing latency, or raise it for short high-quality audio clips.",
"agents.defaults.memorySearch.experimental.sessionMemory":
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
"agents.defaults.memorySearch.provider":

View File

@@ -319,6 +319,10 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
"agents.defaults.memorySearch.sources": "Memory Search Sources",
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
"agents.defaults.memorySearch.multimodal": "Memory Search Multimodal",
"agents.defaults.memorySearch.multimodal.enabled": "Enable Memory Search Multimodal",
"agents.defaults.memorySearch.multimodal.modalities": "Memory Search Multimodal Modalities",
"agents.defaults.memorySearch.multimodal.maxFileBytes": "Memory Search Multimodal Max File Bytes",
"agents.defaults.memorySearch.experimental.sessionMemory":
"Memory Search Session Index (Experimental)",
"agents.defaults.memorySearch.provider": "Memory Search Provider",

View File

@@ -319,6 +319,15 @@ export type MemorySearchConfig = {
sources?: Array<"memory" | "sessions">;
/** Extra paths to include in memory search (directories or .md files). */
extraPaths?: string[];
/** Optional multimodal file indexing for selected extra paths. */
multimodal?: {
/** Enable image/audio embeddings from extraPaths. */
enabled?: boolean;
/** Which non-text file types to index. */
modalities?: Array<"image" | "audio" | "all">;
/** Max bytes allowed per multimodal file before it is skipped. */
maxFileBytes?: number;
};
/** Experimental memory search settings. */
experimental?: {
/** Enable session transcript indexing (experimental, default: false). */

View File

@@ -553,6 +553,16 @@ export const MemorySearchSchema = z
enabled: z.boolean().optional(),
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
extraPaths: z.array(z.string()).optional(),
multimodal: z
.object({
enabled: z.boolean().optional(),
modalities: z
.array(z.union([z.literal("image"), z.literal("audio"), z.literal("all")]))
.optional(),
maxFileBytes: z.number().int().positive().optional(),
})
.strict()
.optional(),
experimental: z
.object({
sessionMemory: z.boolean().optional(),

View File

@@ -12,6 +12,10 @@ const EXT_BY_MIME: Record<string, string> = {
"image/gif": ".gif",
"audio/ogg": ".ogg",
"audio/mpeg": ".mp3",
"audio/wav": ".wav",
"audio/flac": ".flac",
"audio/aac": ".aac",
"audio/opus": ".opus",
"audio/x-m4a": ".m4a",
"audio/mp4": ".m4a",
"video/mp4": ".mp4",

View File

@@ -1,4 +1,5 @@
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
import { hasNonTextEmbeddingParts } from "./embedding-inputs.js";
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
import type { EmbeddingProvider } from "./embeddings.js";
import { hashText, type MemoryChunk } from "./internal.js";
@@ -16,6 +17,10 @@ export function enforceEmbeddingMaxInputTokens(
const out: MemoryChunk[] = [];
for (const chunk of chunks) {
if (hasNonTextEmbeddingParts(chunk.embeddingInput)) {
out.push(chunk);
continue;
}
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
out.push(chunk);
continue;
@@ -27,6 +32,7 @@ export function enforceEmbeddingMaxInputTokens(
endLine: chunk.endLine,
text,
hash: hashText(text),
embeddingInput: { text },
});
}
}

View File

@@ -1,3 +1,5 @@
import type { EmbeddingInput } from "./embedding-inputs.js";
// Helpers for enforcing embedding model input size limits.
//
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
@@ -11,6 +13,22 @@ export function estimateUtf8Bytes(text: string): number {
return Buffer.byteLength(text, "utf8");
}
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
if (!input.parts?.length) {
return estimateUtf8Bytes(input.text);
}
let total = 0;
for (const part of input.parts) {
if (part.type === "text") {
total += estimateUtf8Bytes(part.text);
continue;
}
total += estimateUtf8Bytes(part.mimeType);
total += estimateUtf8Bytes(part.data);
}
return total;
}
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
if (maxUtf8Bytes <= 0) {
return [text];

View File

@@ -0,0 +1,34 @@
export type EmbeddingInputTextPart = {
type: "text";
text: string;
};
export type EmbeddingInputInlineDataPart = {
type: "inline-data";
mimeType: string;
data: string;
};
export type EmbeddingInputPart = EmbeddingInputTextPart | EmbeddingInputInlineDataPart;
export type EmbeddingInput = {
text: string;
parts?: EmbeddingInputPart[];
};
export function buildTextEmbeddingInput(text: string): EmbeddingInput {
return { text };
}
export function isInlineDataEmbeddingInputPart(
part: EmbeddingInputPart,
): part is EmbeddingInputInlineDataPart {
return part.type === "inline-data";
}
export function hasNonTextEmbeddingParts(input: EmbeddingInput | undefined): boolean {
if (!input?.parts?.length) {
return false;
}
return input.parts.some((part) => isInlineDataEmbeddingInputPart(part));
}

View File

@@ -1,16 +1,13 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import * as authModule from "../agents/model-auth.js";
import {
buildFileDataPart,
buildGeminiParts,
buildGeminiEmbeddingRequest,
buildGeminiTextEmbeddingRequest,
buildInlineDataPart,
createGeminiEmbeddingProvider,
DEFAULT_GEMINI_EMBEDDING_MODEL,
GEMINI_EMBEDDING_2_MODELS,
isGeminiEmbedding2Model,
resolveGeminiOutputDimensionality,
type GeminiPart,
} from "./embeddings-gemini.js";
vi.mock("../agents/model-auth.js", async () => {
@@ -61,40 +58,6 @@ function mockResolvedProviderKey(apiKey = "test-key") {
});
}
// ---------- Helper function tests ----------
describe("buildGeminiParts", () => {
it("wraps a string into a single text part", () => {
expect(buildGeminiParts("hello")).toEqual([{ text: "hello" }]);
});
it("passes through an existing parts array", () => {
const parts: GeminiPart[] = [
{ text: "hello" },
{ inlineData: { mimeType: "image/png", data: "base64data" } },
];
expect(buildGeminiParts(parts)).toBe(parts);
});
});
describe("buildInlineDataPart", () => {
it("produces the correct shape", () => {
const part = buildInlineDataPart("image/jpeg", "abc123");
expect(part).toEqual({
inlineData: { mimeType: "image/jpeg", data: "abc123" },
});
});
});
describe("buildFileDataPart", () => {
it("produces the correct shape", () => {
const part = buildFileDataPart("application/pdf", "gs://bucket/file.pdf");
expect(part).toEqual({
fileData: { mimeType: "application/pdf", fileUri: "gs://bucket/file.pdf" },
});
});
});
describe("buildGeminiTextEmbeddingRequest", () => {
it("builds a text embedding request with optional model and dimensions", () => {
expect(
@@ -113,6 +76,35 @@ describe("buildGeminiTextEmbeddingRequest", () => {
});
});
describe("buildGeminiEmbeddingRequest", () => {
it("builds a multimodal request from structured input parts", () => {
expect(
buildGeminiEmbeddingRequest({
input: {
text: "Image file: diagram.png",
parts: [
{ type: "text", text: "Image file: diagram.png" },
{ type: "inline-data", mimeType: "image/png", data: "abc123" },
],
},
taskType: "RETRIEVAL_DOCUMENT",
modelPath: "models/gemini-embedding-2-preview",
outputDimensionality: 1536,
}),
).toEqual({
model: "models/gemini-embedding-2-preview",
content: {
parts: [
{ text: "Image file: diagram.png" },
{ inlineData: { mimeType: "image/png", data: "abc123" } },
],
},
taskType: "RETRIEVAL_DOCUMENT",
outputDimensionality: 1536,
});
});
});
// ---------- Model detection ----------
describe("isGeminiEmbedding2Model", () => {
@@ -319,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => {
expect(body.outputDimensionality).toBe(768);
});
it("sanitizes and normalizes embedQuery responses", async () => {
const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]);
vi.stubGlobal("fetch", fetchMock);
mockResolvedProviderKey();
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
});
await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]);
});
it("uses custom outputDimensionality for each embedBatch request", async () => {
const fetchMock = createGeminiBatchFetchMock(2);
vi.stubGlobal("fetch", fetchMock);
@@ -341,6 +348,88 @@ describe("gemini-embedding-2-preview provider", () => {
]);
});
it("sanitizes and normalizes structured batch responses", async () => {
const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]);
vi.stubGlobal("fetch", fetchMock);
mockResolvedProviderKey();
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
});
await expect(
provider.embedBatchInputs?.([
{
text: "Image file: diagram.png",
parts: [
{ type: "text", text: "Image file: diagram.png" },
{ type: "inline-data", mimeType: "image/png", data: "img" },
],
},
]),
).resolves.toEqual([[0, 0, 1]]);
});
it("supports multimodal embedBatchInputs requests", async () => {
const fetchMock = createGeminiBatchFetchMock(2);
vi.stubGlobal("fetch", fetchMock);
mockResolvedProviderKey();
const { provider } = await createGeminiEmbeddingProvider({
config: {} as never,
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
});
expect(provider.embedBatchInputs).toBeDefined();
await provider.embedBatchInputs?.([
{
text: "Image file: diagram.png",
parts: [
{ type: "text", text: "Image file: diagram.png" },
{ type: "inline-data", mimeType: "image/png", data: "img" },
],
},
{
text: "Audio file: note.wav",
parts: [
{ type: "text", text: "Audio file: note.wav" },
{ type: "inline-data", mimeType: "audio/wav", data: "aud" },
],
},
]);
const body = parseFetchBody(fetchMock);
expect(body.requests).toEqual([
{
model: "models/gemini-embedding-2-preview",
content: {
parts: [
{ text: "Image file: diagram.png" },
{ inlineData: { mimeType: "image/png", data: "img" } },
],
},
taskType: "RETRIEVAL_DOCUMENT",
outputDimensionality: 3072,
},
{
model: "models/gemini-embedding-2-preview",
content: {
parts: [
{ text: "Audio file: note.wav" },
{ inlineData: { mimeType: "audio/wav", data: "aud" } },
],
},
taskType: "RETRIEVAL_DOCUMENT",
outputDimensionality: 3072,
},
]);
});
it("throws for invalid outputDimensionality", async () => {
mockResolvedProviderKey();

View File

@@ -5,6 +5,7 @@ import {
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
import { parseGeminiAuth } from "../infra/gemini-auth.js";
import type { SsrFPolicy } from "../infra/net/ssrf.js";
import type { EmbeddingInput } from "./embedding-inputs.js";
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
import { debugEmbeddingsLog } from "./embeddings-debug.js";
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
@@ -50,34 +51,14 @@ export type GeminiTextPart = { text: string };
export type GeminiInlinePart = {
inlineData: { mimeType: string; data: string };
};
export type GeminiFilePart = {
fileData: { mimeType: string; fileUri: string };
};
export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart;
export type GeminiTextEmbeddingRequest = {
content: { parts: GeminiTextPart[] };
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
export type GeminiEmbeddingRequest = {
content: { parts: GeminiPart[] };
taskType: GeminiTaskType;
outputDimensionality?: number;
model?: string;
};
/** Convert a string or pre-built parts array into `GeminiPart[]`. */
export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] {
if (typeof input === "string") {
return [{ text: input }];
}
return input;
}
/** Convenience: build an inline-data part for multimodal embeddings. */
export function buildInlineDataPart(mimeType: string, base64Data: string): GeminiInlinePart {
return { inlineData: { mimeType, data: base64Data } };
}
/** Convenience: build a file-data part for multimodal embeddings. */
export function buildFileDataPart(mimeType: string, fileUri: string): GeminiFilePart {
return { fileData: { mimeType, fileUri } };
}
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
export function buildGeminiTextEmbeddingRequest(params: {
@@ -86,8 +67,30 @@ export function buildGeminiTextEmbeddingRequest(params: {
outputDimensionality?: number;
modelPath?: string;
}): GeminiTextEmbeddingRequest {
const request: GeminiTextEmbeddingRequest = {
content: { parts: [{ text: params.text }] },
return buildGeminiEmbeddingRequest({
input: { text: params.text },
taskType: params.taskType,
outputDimensionality: params.outputDimensionality,
modelPath: params.modelPath,
});
}
export function buildGeminiEmbeddingRequest(params: {
input: EmbeddingInput;
taskType: GeminiTaskType;
outputDimensionality?: number;
modelPath?: string;
}): GeminiEmbeddingRequest {
const request: GeminiEmbeddingRequest = {
content: {
parts: params.input.parts?.map((part) =>
part.type === "text"
? ({ text: part.text } satisfies GeminiTextPart)
: ({
inlineData: { mimeType: part.mimeType, data: part.data },
} satisfies GeminiInlinePart),
) ?? [{ text: params.input.text }],
},
taskType: params.taskType,
};
if (params.modelPath) {
@@ -143,7 +146,7 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
return trimmed;
}
function normalizeGeminiModel(model: string): string {
export function normalizeGeminiModel(model: string): string {
const trimmed = model.trim();
if (!trimmed) {
return DEFAULT_GEMINI_EMBEDDING_MODEL;
@@ -158,6 +161,46 @@ function normalizeGeminiModel(model: string): string {
return withoutPrefix;
}
async function fetchGeminiEmbeddingPayload(params: {
client: GeminiEmbeddingClient;
endpoint: string;
body: unknown;
}): Promise<{
embedding?: { values?: number[] };
embeddings?: Array<{ values?: number[] }>;
}> {
return await executeWithApiKeyRotation({
provider: "google",
apiKeys: params.client.apiKeys,
execute: async (apiKey) => {
const authHeaders = parseGeminiAuth(apiKey);
const headers = {
...authHeaders.headers,
...params.client.headers,
};
return await withRemoteHttpResponse({
url: params.endpoint,
ssrfPolicy: params.client.ssrfPolicy,
init: {
method: "POST",
headers,
body: JSON.stringify(params.body),
},
onResponse: async (res) => {
if (!res.ok) {
const text = await res.text();
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
}
return (await res.json()) as {
embedding?: { values?: number[] };
embeddings?: Array<{ values?: number[] }>;
};
},
});
},
});
}
function normalizeGeminiBaseUrl(raw: string): string {
const trimmed = raw.replace(/\/+$/, "");
const openAiIndex = trimmed.indexOf("/openai");
@@ -181,71 +224,50 @@ export async function createGeminiEmbeddingProvider(
const isV2 = isGeminiEmbedding2Model(client.model);
const outputDimensionality = client.outputDimensionality;
const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => {
const authHeaders = parseGeminiAuth(apiKey);
const headers = {
...authHeaders.headers,
...client.headers,
};
const payload = await withRemoteHttpResponse({
url: endpoint,
ssrfPolicy: client.ssrfPolicy,
init: {
method: "POST",
headers,
body: JSON.stringify(body),
},
onResponse: async (res) => {
if (!res.ok) {
const text = await res.text();
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
}
return (await res.json()) as {
embedding?: { values?: number[] };
embeddings?: Array<{ values?: number[] }>;
};
},
});
return payload;
};
const embedQuery = async (text: string): Promise<number[]> => {
if (!text.trim()) {
return [];
}
const body = buildGeminiTextEmbeddingRequest({
text,
taskType: options.taskType ?? "RETRIEVAL_QUERY",
outputDimensionality: isV2 ? outputDimensionality : undefined,
});
const payload = await executeWithApiKeyRotation({
provider: "google",
apiKeys: client.apiKeys,
execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body),
const payload = await fetchGeminiEmbeddingPayload({
client,
endpoint: embedUrl,
body: buildGeminiTextEmbeddingRequest({
text,
taskType: options.taskType ?? "RETRIEVAL_QUERY",
outputDimensionality: isV2 ? outputDimensionality : undefined,
}),
});
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
};
const embedBatch = async (texts: string[]): Promise<number[][]> => {
if (texts.length === 0) {
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
if (inputs.length === 0) {
return [];
}
const requests = texts.map((text) =>
buildGeminiTextEmbeddingRequest({
text,
modelPath: client.modelPath,
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
outputDimensionality: isV2 ? outputDimensionality : undefined,
}),
);
const batchBody = { requests };
const payload = await executeWithApiKeyRotation({
provider: "google",
apiKeys: client.apiKeys,
execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody),
const payload = await fetchGeminiEmbeddingPayload({
client,
endpoint: batchUrl,
body: {
requests: inputs.map((input) =>
buildGeminiEmbeddingRequest({
input,
modelPath: client.modelPath,
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
outputDimensionality: isV2 ? outputDimensionality : undefined,
}),
),
},
});
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
return texts.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
};
const embedBatch = async (texts: string[]): Promise<number[][]> => {
return await embedBatchInputs(
texts.map((text) => ({
text,
})),
);
};
return {
@@ -255,6 +277,7 @@ export async function createGeminiEmbeddingProvider(
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
embedQuery,
embedBatch,
embedBatchInputs,
},
client,
};

View File

@@ -4,6 +4,7 @@ import type { OpenClawConfig } from "../config/config.js";
import type { SecretInput } from "../config/types.secrets.js";
import { formatErrorMessage } from "../infra/errors.js";
import { resolveUserPath } from "../utils.js";
import type { EmbeddingInput } from "./embedding-inputs.js";
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
import {
createGeminiEmbeddingProvider,
@@ -31,6 +32,7 @@ export type EmbeddingProvider = {
maxInputTokens?: number;
embedQuery: (text: string) => Promise<number[]>;
embedBatch: (texts: string[]) => Promise<number[][]>;
embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise<number[][]>;
};
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";

View File

@@ -1,3 +1,4 @@
import { randomUUID } from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
@@ -6,6 +7,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
import "./test-runtime-mocks.js";
let embedBatchCalls = 0;
let embedBatchInputCalls = 0;
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
vi.mock("./embeddings.js", () => {
@@ -13,7 +15,9 @@ vi.mock("./embeddings.js", () => {
const lower = text.toLowerCase();
const alpha = lower.split("alpha").length - 1;
const beta = lower.split("beta").length - 1;
return [alpha, beta];
const image = lower.split("image").length - 1;
const audio = lower.split("audio").length - 1;
return [alpha, beta, image, audio];
};
return {
createEmbeddingProvider: async (options: {
@@ -38,6 +42,36 @@ vi.mock("./embeddings.js", () => {
embedBatchCalls += 1;
return texts.map(embedText);
},
...(providerId === "gemini"
? {
embedBatchInputs: async (
inputs: Array<{
text: string;
parts?: Array<
| { type: "text"; text: string }
| { type: "inline-data"; mimeType: string; data: string }
>;
}>,
) => {
embedBatchInputCalls += 1;
return inputs.map((input) => {
const inlineData = input.parts?.find((part) => part.type === "inline-data");
if (inlineData?.type === "inline-data" && inlineData.data.length > 9000) {
throw new Error("payload too large");
}
const mimeType =
inlineData?.type === "inline-data" ? inlineData.mimeType : undefined;
if (mimeType?.startsWith("image/")) {
return [0, 0, 1, 0];
}
if (mimeType?.startsWith("audio/")) {
return [0, 0, 0, 1];
}
return embedText(input.text);
});
},
}
: {}),
},
...(providerId === "gemini"
? {
@@ -64,6 +98,7 @@ describe("memory index", () => {
let indexVectorPath = "";
let indexMainPath = "";
let indexExtraPath = "";
let indexMultimodalPath = "";
let indexStatusPath = "";
let indexSourceChangePath = "";
let indexModelPath = "";
@@ -97,6 +132,7 @@ describe("memory index", () => {
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
indexMultimodalPath = path.join(workspaceDir, "index-multimodal.sqlite");
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
@@ -119,6 +155,7 @@ describe("memory index", () => {
// Keep atomic reindex tests on the safe path.
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
embedBatchCalls = 0;
embedBatchInputCalls = 0;
providerCalls = [];
// Keep the workspace stable to allow manager reuse across tests.
@@ -149,6 +186,11 @@ describe("memory index", () => {
provider?: "openai" | "gemini";
model?: string;
outputDimensionality?: number;
multimodal?: {
enabled?: boolean;
modalities?: Array<"image" | "audio" | "all">;
maxFileBytes?: number;
};
vectorEnabled?: boolean;
cacheEnabled?: boolean;
minScore?: number;
@@ -172,6 +214,7 @@ describe("memory index", () => {
},
cache: params.cacheEnabled ? { enabled: true } : undefined,
extraPaths: params.extraPaths,
multimodal: params.multimodal,
sources: params.sources,
experimental: { sessionMemory: params.sessionMemory ?? false },
},
@@ -247,6 +290,103 @@ describe("memory index", () => {
);
});
it("indexes multimodal image and audio files from extra paths with Gemini structured inputs", async () => {
const mediaDir = path.join(workspaceDir, "media-memory");
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
await fs.writeFile(path.join(mediaDir, "meeting.wav"), Buffer.from("wav"));
const cfg = createCfg({
storePath: indexMultimodalPath,
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image", "audio"] },
});
const manager = await getPersistentManager(cfg);
await manager.sync({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(0);
const imageResults = await manager.search("image");
expect(imageResults.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
const audioResults = await manager.search("audio");
expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true);
});
it("skips oversized multimodal inputs without aborting sync", async () => {
const mediaDir = path.join(workspaceDir, "media-oversize");
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(path.join(mediaDir, "huge.png"), Buffer.alloc(7000, 1));
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-oversize-${randomUUID()}.sqlite`),
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image"] },
});
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
await manager.sync({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(0);
const imageResults = await manager.search("image");
expect(imageResults.some((result) => result.path.endsWith("huge.png"))).toBe(false);
const alphaResults = await manager.search("alpha");
expect(alphaResults.some((result) => result.path.endsWith("memory/2026-01-12.md"))).toBe(true);
await manager.close?.();
});
it("reindexes a multimodal file after a transient mid-sync disappearance", async () => {
const mediaDir = path.join(workspaceDir, "media-race");
const imagePath = path.join(mediaDir, "diagram.png");
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(imagePath, Buffer.from("png"));
const cfg = createCfg({
storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`),
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image"] },
});
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
const realReadFile = fs.readFile.bind(fs);
let imageReads = 0;
const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => {
const [targetPath] = args;
if (typeof targetPath === "string" && targetPath === imagePath) {
imageReads += 1;
if (imageReads === 2) {
const err = Object.assign(
new Error(`ENOENT: no such file or directory, open '${imagePath}'`),
{
code: "ENOENT",
},
) as NodeJS.ErrnoException;
throw err;
}
}
return await realReadFile(...args);
});
await manager.sync({ reason: "test" });
readSpy.mockRestore();
const callsAfterFirstSync = embedBatchInputCalls;
(manager as unknown as { dirty: boolean }).dirty = true;
await manager.sync({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync);
const results = await manager.search("image");
expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
await manager.close?.();
});
it("keeps dirty false in status-only manager after prior indexing", async () => {
const cfg = createCfg({ storePath: indexStatusPath });
@@ -433,6 +573,82 @@ describe("memory index", () => {
await secondManager.close?.();
});
it("reindexes when extraPaths change", async () => {
const storePath = path.join(workspaceDir, `index-scope-extra-${randomUUID()}.sqlite`);
const firstExtraDir = path.join(workspaceDir, "scope-extra-a");
const secondExtraDir = path.join(workspaceDir, "scope-extra-b");
await fs.rm(firstExtraDir, { recursive: true, force: true });
await fs.rm(secondExtraDir, { recursive: true, force: true });
await fs.mkdir(firstExtraDir, { recursive: true });
await fs.mkdir(secondExtraDir, { recursive: true });
await fs.writeFile(path.join(firstExtraDir, "a.md"), "alpha only");
await fs.writeFile(path.join(secondExtraDir, "b.md"), "beta only");
const first = await getMemorySearchManager({
cfg: createCfg({
storePath,
extraPaths: [firstExtraDir],
}),
agentId: "main",
});
const firstManager = requireManager(first);
await firstManager.sync?.({ reason: "test" });
await firstManager.close?.();
const second = await getMemorySearchManager({
cfg: createCfg({
storePath,
extraPaths: [secondExtraDir],
}),
agentId: "main",
});
const secondManager = requireManager(second);
await secondManager.sync?.({ reason: "test" });
const results = await secondManager.search("beta");
expect(results.some((result) => result.path.endsWith("scope-extra-b/b.md"))).toBe(true);
expect(results.some((result) => result.path.endsWith("scope-extra-a/a.md"))).toBe(false);
await secondManager.close?.();
});
it("reindexes when multimodal settings change", async () => {
const storePath = path.join(workspaceDir, `index-scope-multimodal-${randomUUID()}.sqlite`);
const mediaDir = path.join(workspaceDir, "scope-media");
await fs.rm(mediaDir, { recursive: true, force: true });
await fs.mkdir(mediaDir, { recursive: true });
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
const first = await getMemorySearchManager({
cfg: createCfg({
storePath,
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
}),
agentId: "main",
});
const firstManager = requireManager(first);
await firstManager.sync?.({ reason: "test" });
const multimodalCallsAfterFirstSync = embedBatchInputCalls;
await firstManager.close?.();
const second = await getMemorySearchManager({
cfg: createCfg({
storePath,
provider: "gemini",
model: "gemini-embedding-2-preview",
extraPaths: [mediaDir],
multimodal: { enabled: true, modalities: ["image"] },
}),
agentId: "main",
});
const secondManager = requireManager(second);
await secondManager.sync?.({ reason: "test" });
expect(embedBatchInputCalls).toBeGreaterThan(multimodalCallsAfterFirstSync);
const results = await secondManager.search("image");
expect(results.some((result) => result.path.endsWith("scope-media/diagram.png"))).toBe(true);
await secondManager.close?.();
});
it("reuses cached embeddings on forced reindex", async () => {
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
const manager = await getPersistentManager(cfg);

View File

@@ -3,12 +3,17 @@ import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
buildMultimodalChunkForIndexing,
buildFileEntry,
chunkMarkdown,
listMemoryFiles,
normalizeExtraMemoryPaths,
remapChunkLines,
} from "./internal.js";
import {
DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
type MemoryMultimodalSettings,
} from "./multimodal.js";
function setupTempDirLifecycle(prefix: string): () => string {
let tmpDir = "";
@@ -38,6 +43,11 @@ describe("normalizeExtraMemoryPaths", () => {
describe("listMemoryFiles", () => {
const getTmpDir = setupTempDirLifecycle("memory-test-");
const multimodal: MemoryMultimodalSettings = {
enabled: true,
modalities: ["image", "audio"],
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
};
it("includes files from additional paths (directory)", async () => {
const tmpDir = getTmpDir();
@@ -131,10 +141,29 @@ describe("listMemoryFiles", () => {
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
expect(memoryMatches).toHaveLength(1);
});
it("includes image and audio files from extra paths when multimodal is enabled", async () => {
const tmpDir = getTmpDir();
const extraDir = path.join(tmpDir, "media");
await fs.mkdir(extraDir, { recursive: true });
await fs.writeFile(path.join(extraDir, "diagram.png"), Buffer.from("png"));
await fs.writeFile(path.join(extraDir, "note.wav"), Buffer.from("wav"));
await fs.writeFile(path.join(extraDir, "ignore.bin"), Buffer.from("bin"));
const files = await listMemoryFiles(tmpDir, [extraDir], multimodal);
expect(files.some((file) => file.endsWith("diagram.png"))).toBe(true);
expect(files.some((file) => file.endsWith("note.wav"))).toBe(true);
expect(files.some((file) => file.endsWith("ignore.bin"))).toBe(false);
});
});
describe("buildFileEntry", () => {
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
const multimodal: MemoryMultimodalSettings = {
enabled: true,
modalities: ["image", "audio"],
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
};
it("returns null when the file disappears before reading", async () => {
const tmpDir = getTmpDir();
@@ -154,6 +183,37 @@ describe("buildFileEntry", () => {
expect(entry?.path).toBe("note.md");
expect(entry?.size).toBeGreaterThan(0);
});
it("returns multimodal metadata for eligible image files", async () => {
const tmpDir = getTmpDir();
const target = path.join(tmpDir, "diagram.png");
await fs.writeFile(target, Buffer.from("png"));
const entry = await buildFileEntry(target, tmpDir, multimodal);
expect(entry).toMatchObject({
path: "diagram.png",
kind: "multimodal",
modality: "image",
mimeType: "image/png",
contentText: "Image file: diagram.png",
});
});
it("builds a multimodal chunk lazily for indexing", async () => {
const tmpDir = getTmpDir();
const target = path.join(tmpDir, "diagram.png");
await fs.writeFile(target, Buffer.from("png"));
const entry = await buildFileEntry(target, tmpDir, multimodal);
const built = await buildMultimodalChunkForIndexing(entry!);
expect(built?.chunk.embeddingInput?.parts).toEqual([
{ type: "text", text: "Image file: diagram.png" },
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
]);
expect(built?.structuredInputBytes).toBeGreaterThan(0);
});
});
describe("chunkMarkdown", () => {

View File

@@ -2,8 +2,17 @@ import crypto from "node:crypto";
import fsSync from "node:fs";
import fs from "node:fs/promises";
import path from "node:path";
import { detectMime } from "../media/mime.js";
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js";
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
import { isFileMissingError } from "./fs-utils.js";
import {
buildMemoryMultimodalLabel,
classifyMemoryMultimodalPath,
type MemoryMultimodalModality,
type MemoryMultimodalSettings,
} from "./multimodal.js";
export type MemoryFileEntry = {
path: string;
@@ -11,6 +20,10 @@ export type MemoryFileEntry = {
mtimeMs: number;
size: number;
hash: string;
kind?: "markdown" | "multimodal";
contentText?: string;
modality?: MemoryMultimodalModality;
mimeType?: string;
};
export type MemoryChunk = {
@@ -18,6 +31,18 @@ export type MemoryChunk = {
endLine: number;
text: string;
hash: string;
embeddingInput?: EmbeddingInput;
};
export type MultimodalMemoryChunk = {
chunk: MemoryChunk;
structuredInputBytes: number;
};
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
enabled: false,
modalities: [],
maxFileBytes: 0,
};
export function ensureDir(dir: string): string {
@@ -56,7 +81,16 @@ export function isMemoryPath(relPath: string): boolean {
return normalized.startsWith("memory/");
}
async function walkDir(dir: string, files: string[]) {
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
if (filePath.endsWith(".md")) {
return true;
}
return (
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
);
}
async function walkDir(dir: string, files: string[], multimodal?: MemoryMultimodalSettings) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const full = path.join(dir, entry.name);
@@ -64,13 +98,13 @@ async function walkDir(dir: string, files: string[]) {
continue;
}
if (entry.isDirectory()) {
await walkDir(full, files);
await walkDir(full, files, multimodal);
continue;
}
if (!entry.isFile()) {
continue;
}
if (!entry.name.endsWith(".md")) {
if (!isAllowedMemoryFilePath(full, multimodal)) {
continue;
}
files.push(full);
@@ -80,6 +114,7 @@ async function walkDir(dir: string, files: string[]) {
export async function listMemoryFiles(
workspaceDir: string,
extraPaths?: string[],
multimodal?: MemoryMultimodalSettings,
): Promise<string[]> {
const result: string[] = [];
const memoryFile = path.join(workspaceDir, "MEMORY.md");
@@ -117,10 +152,10 @@ export async function listMemoryFiles(
continue;
}
if (stat.isDirectory()) {
await walkDir(inputPath, result);
await walkDir(inputPath, result, multimodal);
continue;
}
if (stat.isFile() && inputPath.endsWith(".md")) {
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
result.push(inputPath);
}
} catch {}
@@ -152,6 +187,7 @@ export function hashText(value: string): string {
export async function buildFileEntry(
absPath: string,
workspaceDir: string,
multimodal?: MemoryMultimodalSettings,
): Promise<MemoryFileEntry | null> {
let stat;
try {
@@ -162,6 +198,48 @@ export async function buildFileEntry(
}
throw err;
}
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
if (modality) {
if (stat.size > multimodalSettings.maxFileBytes) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
return null;
}
const contentText = buildMemoryMultimodalLabel(modality, normalizedPath);
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
const chunkHash = hashText(
JSON.stringify({
path: normalizedPath,
contentText,
mimeType,
dataHash,
}),
);
return {
path: normalizedPath,
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash: chunkHash,
kind: "multimodal",
contentText,
modality,
mimeType,
};
}
let content: string;
try {
content = await fs.readFile(absPath, "utf-8");
@@ -173,11 +251,59 @@ export async function buildFileEntry(
}
const hash = hashText(content);
return {
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
path: normalizedPath,
absPath,
mtimeMs: stat.mtimeMs,
size: stat.size,
hash,
kind: "markdown",
};
}
async function loadMultimodalEmbeddingInput(
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
): Promise<EmbeddingInput | null> {
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
return null;
}
let buffer: Buffer;
try {
buffer = await fs.readFile(entry.absPath);
} catch (err) {
if (isFileMissingError(err)) {
return null;
}
throw err;
}
return {
text: entry.contentText,
parts: [
{ type: "text", text: entry.contentText },
{
type: "inline-data",
mimeType: entry.mimeType,
data: buffer.toString("base64"),
},
],
};
}
export async function buildMultimodalChunkForIndexing(
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
): Promise<MultimodalMemoryChunk | null> {
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
if (!embeddingInput) {
return null;
}
return {
chunk: {
startLine: 1,
endLine: 1,
text: entry.contentText ?? embeddingInput.text,
hash: entry.hash,
embeddingInput,
},
structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput),
};
}
@@ -213,6 +339,7 @@ export function chunkMarkdown(
endLine,
text,
hash: hashText(text),
embeddingInput: buildTextEmbeddingInput(text),
});
};

View File

@@ -8,9 +8,14 @@ import {
} from "./batch-openai.js";
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
import { estimateUtf8Bytes } from "./embedding-input-limits.js";
import { buildGeminiTextEmbeddingRequest } from "./embeddings-gemini.js";
import {
estimateStructuredEmbeddingInputBytes,
estimateUtf8Bytes,
} from "./embedding-input-limits.js";
import { type EmbeddingInput, hasNonTextEmbeddingParts } from "./embedding-inputs.js";
import { buildGeminiEmbeddingRequest } from "./embeddings-gemini.js";
import {
buildMultimodalChunkForIndexing,
chunkMarkdown,
hashText,
parseEmbedding,
@@ -53,7 +58,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
let currentTokens = 0;
for (const chunk of chunks) {
const estimate = estimateUtf8Bytes(chunk.text);
const estimate = chunk.embeddingInput
? estimateStructuredEmbeddingInputBytes(chunk.embeddingInput)
: estimateUtf8Bytes(chunk.text);
const wouldExceed =
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
if (wouldExceed) {
@@ -188,9 +195,22 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
const missingChunks = missing.map((m) => m.chunk);
const batches = this.buildEmbeddingBatches(missingChunks);
const toCache: Array<{ hash: string; embedding: number[] }> = [];
const provider = this.provider;
if (!provider) {
throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
}
let cursor = 0;
for (const batch of batches) {
const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
const inputs = batch.map((chunk) => chunk.embeddingInput ?? { text: chunk.text });
const hasStructuredInputs = inputs.some((input) => hasNonTextEmbeddingParts(input));
if (hasStructuredInputs && !provider.embedBatchInputs) {
throw new Error(
`Embedding provider "${provider.id}" does not support multimodal memory inputs.`,
);
}
const batchEmbeddings = hasStructuredInputs
? await this.embedBatchInputsWithRetry(inputs)
: await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
for (let i = 0; i < batch.length; i += 1) {
const item = missing[cursor + i];
const embedding = batchEmbeddings[i] ?? [];
@@ -476,6 +496,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
source: MemorySource,
): Promise<number[][]> {
const gemini = this.gemini;
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
return await this.embedChunksInBatches(chunks);
}
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
chunks,
entry,
@@ -483,9 +506,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
provider: "gemini",
enabled: Boolean(gemini),
buildRequest: (chunk) => ({
request: buildGeminiTextEmbeddingRequest({
text: chunk.text,
request: buildGeminiEmbeddingRequest({
input: chunk.embeddingInput ?? { text: chunk.text },
taskType: "RETRIEVAL_DOCUMENT",
modelPath: this.gemini?.modelPath,
outputDimensionality: this.gemini?.outputDimensionality,
}),
}),
@@ -536,6 +560,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
}
}
protected async embedBatchInputsWithRetry(inputs: EmbeddingInput[]): Promise<number[][]> {
if (inputs.length === 0) {
return [];
}
if (!this.provider?.embedBatchInputs) {
return await this.embedBatchWithRetry(inputs.map((input) => input.text));
}
let attempt = 0;
let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
while (true) {
try {
const timeoutMs = this.resolveEmbeddingTimeout("batch");
log.debug("memory embeddings: structured batch start", {
provider: this.provider.id,
items: inputs.length,
timeoutMs,
});
return await this.withTimeout(
this.provider.embedBatchInputs(inputs),
timeoutMs,
`memory embeddings batch timed out after ${Math.round(timeoutMs / 1000)}s`,
);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (!this.isRetryableEmbeddingError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
throw err;
}
const waitMs = Math.min(
EMBEDDING_RETRY_MAX_DELAY_MS,
Math.round(delayMs * (1 + Math.random() * 0.2)),
);
log.warn(`memory embeddings rate limited; retrying structured batch in ${waitMs}ms`);
await new Promise((resolve) => setTimeout(resolve, waitMs));
delayMs *= 2;
attempt += 1;
}
}
}
private isRetryableEmbeddingError(message: string): boolean {
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
message,
@@ -695,6 +758,49 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY;
}
private clearIndexedFileData(pathname: string, source: MemorySource): void {
if (this.vector.enabled) {
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(pathname, source);
} catch {}
}
if (this.fts.enabled && this.fts.available && this.provider) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(pathname, source, this.provider.model);
} catch {}
}
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(pathname, source);
}
private upsertFileRecord(entry: MemoryFileEntry | SessionFileEntry, source: MemorySource): void {
this.db
.prepare(
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
source=excluded.source,
hash=excluded.hash,
mtime=excluded.mtime,
size=excluded.size`,
)
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
}
private deleteFileRecord(pathname: string, source: MemorySource): void {
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source);
}
private isStructuredInputTooLargeError(message: string): boolean {
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
message,
);
}
protected async indexFile(
entry: MemoryFileEntry | SessionFileEntry,
options: { source: MemorySource; content?: string },
@@ -708,42 +814,59 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
return;
}
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
const chunks = enforceEmbeddingMaxInputTokens(
this.provider,
chunkMarkdown(content, this.settings.chunking).filter(
(chunk) => chunk.text.trim().length > 0,
),
EMBEDDING_BATCH_MAX_TOKENS,
);
if (options.source === "sessions" && "lineMap" in entry) {
remapChunkLines(chunks, entry.lineMap);
let chunks: MemoryChunk[];
let structuredInputBytes: number | undefined;
if ("kind" in entry && entry.kind === "multimodal") {
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
if (!multimodalChunk) {
this.clearIndexedFileData(entry.path, options.source);
this.deleteFileRecord(entry.path, options.source);
return;
}
structuredInputBytes = multimodalChunk.structuredInputBytes;
chunks = [multimodalChunk.chunk];
} else {
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
chunks = enforceEmbeddingMaxInputTokens(
this.provider,
chunkMarkdown(content, this.settings.chunking).filter(
(chunk) => chunk.text.trim().length > 0,
),
EMBEDDING_BATCH_MAX_TOKENS,
);
if (options.source === "sessions" && "lineMap" in entry) {
remapChunkLines(chunks, entry.lineMap);
}
}
let embeddings: number[][];
try {
embeddings = this.batch.enabled
? await this.embedChunksWithBatch(chunks, entry, options.source)
: await this.embedChunksInBatches(chunks);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (
"kind" in entry &&
entry.kind === "multimodal" &&
this.isStructuredInputTooLargeError(message)
) {
log.warn("memory embeddings: skipping multimodal file rejected as too large", {
path: entry.path,
bytes: structuredInputBytes,
provider: this.provider.id,
model: this.provider.model,
error: message,
});
this.clearIndexedFileData(entry.path, options.source);
this.upsertFileRecord(entry, options.source);
return;
}
throw err;
}
const embeddings = this.batch.enabled
? await this.embedChunksWithBatch(chunks, entry, options.source)
: await this.embedChunksInBatches(chunks);
const sample = embeddings.find((embedding) => embedding.length > 0);
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
const now = Date.now();
if (vectorReady) {
try {
this.db
.prepare(
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
)
.run(entry.path, options.source);
} catch {}
}
if (this.fts.enabled && this.fts.available) {
try {
this.db
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
.run(entry.path, options.source, this.provider.model);
} catch {}
}
this.db
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
.run(entry.path, options.source);
this.clearIndexedFileData(entry.path, options.source);
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i];
const embedding = embeddings[i] ?? [];
@@ -798,15 +921,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
);
}
}
this.db
.prepare(
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
source=excluded.source,
hash=excluded.hash,
mtime=excluded.mtime,
size=excluded.size`,
)
.run(entry.path, options.source, entry.hash, entry.mtimeMs, entry.size);
this.upsertFileRecord(entry, options.source);
}
}

View File

@@ -29,12 +29,18 @@ import { isFileMissingError } from "./fs-utils.js";
import {
buildFileEntry,
ensureDir,
hashText,
listMemoryFiles,
normalizeExtraMemoryPaths,
runWithConcurrency,
} from "./internal.js";
import { type MemoryFileEntry } from "./internal.js";
import { ensureMemoryIndexSchema } from "./memory-schema.js";
import {
buildCaseInsensitiveExtensionGlob,
classifyMemoryMultimodalPath,
getMemoryMultimodalExtensions,
} from "./multimodal.js";
import type { SessionFileEntry } from "./session-files.js";
import {
buildSessionEntry,
@@ -50,6 +56,7 @@ type MemoryIndexMeta = {
provider: string;
providerKey?: string;
sources?: MemorySource[];
scopeHash?: string;
chunkTokens: number;
chunkOverlap: number;
vectorDims?: number;
@@ -383,9 +390,22 @@ export abstract class MemoryManagerSyncOps {
}
if (stat.isDirectory()) {
watchPaths.add(path.join(entry, "**", "*.md"));
if (this.settings.multimodal.enabled) {
for (const modality of this.settings.multimodal.modalities) {
for (const extension of getMemoryMultimodalExtensions(modality)) {
watchPaths.add(
path.join(entry, "**", buildCaseInsensitiveExtensionGlob(extension)),
);
}
}
}
continue;
}
if (stat.isFile() && entry.toLowerCase().endsWith(".md")) {
if (
stat.isFile() &&
(entry.toLowerCase().endsWith(".md") ||
classifyMemoryMultimodalPath(entry, this.settings.multimodal) !== null)
) {
watchPaths.add(entry);
}
} catch {
@@ -649,9 +669,19 @@ export abstract class MemoryManagerSyncOps {
return;
}
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
const files = await listMemoryFiles(
this.workspaceDir,
this.settings.extraPaths,
this.settings.multimodal,
);
const fileEntries = (
await Promise.all(files.map(async (file) => buildFileEntry(file, this.workspaceDir)))
await runWithConcurrency(
files.map(
(file) => async () =>
await buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
),
this.getIndexConcurrency(),
)
).filter((entry): entry is MemoryFileEntry => entry !== null);
log.debug("memory sync: indexing memory files", {
files: fileEntries.length,
@@ -868,6 +898,7 @@ export abstract class MemoryManagerSyncOps {
const vectorReady = await this.ensureVectorReady();
const meta = this.readMeta();
const configuredSources = this.resolveConfiguredSourcesForMeta();
const configuredScopeHash = this.resolveConfiguredScopeHash();
const needsFullReindex =
params?.force ||
!meta ||
@@ -875,6 +906,7 @@ export abstract class MemoryManagerSyncOps {
(this.provider && meta.provider !== this.provider.id) ||
meta.providerKey !== this.providerKey ||
this.metaSourcesDiffer(meta, configuredSources) ||
meta.scopeHash !== configuredScopeHash ||
meta.chunkTokens !== this.settings.chunking.tokens ||
meta.chunkOverlap !== this.settings.chunking.overlap ||
(vectorReady && !meta?.vectorDims);
@@ -1088,6 +1120,7 @@ export abstract class MemoryManagerSyncOps {
provider: this.provider?.id ?? "none",
providerKey: this.providerKey!,
sources: this.resolveConfiguredSourcesForMeta(),
scopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
};
@@ -1159,6 +1192,7 @@ export abstract class MemoryManagerSyncOps {
provider: this.provider?.id ?? "none",
providerKey: this.providerKey!,
sources: this.resolveConfiguredSourcesForMeta(),
scopeHash: this.resolveConfiguredScopeHash(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
};
@@ -1236,6 +1270,22 @@ export abstract class MemoryManagerSyncOps {
return normalized.length > 0 ? normalized : ["memory"];
}
private resolveConfiguredScopeHash(): string {
const extraPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths)
.map((value) => value.replace(/\\/g, "/"))
.toSorted();
return hashText(
JSON.stringify({
extraPaths,
multimodal: {
enabled: this.settings.multimodal.enabled,
modalities: [...this.settings.multimodal.modalities].toSorted(),
maxFileBytes: this.settings.multimodal.maxFileBytes,
},
}),
);
}
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
const metaSources = this.normalizeMetaSources(meta);
if (metaSources.length !== configuredSources.length) {

View File

@@ -106,4 +106,50 @@ describe("memory watcher config", () => {
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
});
it("watches multimodal extensions with case-insensitive globs", async () => {
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-watch-"));
extraDir = path.join(workspaceDir, "extra");
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.mkdir(extraDir, { recursive: true });
await fs.writeFile(path.join(extraDir, "PHOTO.PNG"), "png");
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
store: { path: path.join(workspaceDir, "index.sqlite"), vector: { enabled: false } },
sync: { watch: true, watchDebounceMs: 25, onSessionStart: false, onSearch: false },
query: { minScore: 0, hybrid: { enabled: false } },
extraPaths: [extraDir],
multimodal: { enabled: true, modalities: ["image", "audio"] },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
const result = await getMemorySearchManager({ cfg, agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) {
throw new Error("manager missing");
}
manager = result.manager as unknown as MemoryIndexManager;
expect(watchMock).toHaveBeenCalledTimes(1);
const [watchedPaths] = watchMock.mock.calls[0] as unknown as [
string[],
Record<string, unknown>,
];
expect(watchedPaths).toEqual(
expect.arrayContaining([
path.join(extraDir, "**", "*.[pP][nN][gG]"),
path.join(extraDir, "**", "*.[wW][aA][vV]"),
]),
);
});
});

118
src/memory/multimodal.ts Normal file
View File

@@ -0,0 +1,118 @@
const MEMORY_MULTIMODAL_SPECS = {
image: {
labelPrefix: "Image file",
extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"],
},
audio: {
labelPrefix: "Audio file",
extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"],
},
} as const;
export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS;
export const MEMORY_MULTIMODAL_MODALITIES = Object.keys(
MEMORY_MULTIMODAL_SPECS,
) as MemoryMultimodalModality[];
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
export type MemoryMultimodalSettings = {
enabled: boolean;
modalities: MemoryMultimodalModality[];
maxFileBytes: number;
};
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
export function normalizeMemoryMultimodalModalities(
raw: MemoryMultimodalSelection[] | undefined,
): MemoryMultimodalModality[] {
if (raw === undefined || raw.includes("all")) {
return [...MEMORY_MULTIMODAL_MODALITIES];
}
const normalized = new Set<MemoryMultimodalModality>();
for (const value of raw) {
if (value === "image" || value === "audio") {
normalized.add(value);
}
}
return Array.from(normalized);
}
export function normalizeMemoryMultimodalSettings(raw: {
enabled?: boolean;
modalities?: MemoryMultimodalSelection[];
maxFileBytes?: number;
}): MemoryMultimodalSettings {
const enabled = raw.enabled === true;
const maxFileBytes =
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
? Math.max(1, Math.floor(raw.maxFileBytes))
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
return {
enabled,
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
maxFileBytes,
};
}
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
return settings.enabled && settings.modalities.length > 0;
}
export function getMemoryMultimodalExtensions(
modality: MemoryMultimodalModality,
): readonly string[] {
return MEMORY_MULTIMODAL_SPECS[modality].extensions;
}
export function buildMemoryMultimodalLabel(
modality: MemoryMultimodalModality,
normalizedPath: string,
): string {
return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`;
}
export function buildCaseInsensitiveExtensionGlob(extension: string): string {
const normalized = extension.trim().replace(/^\./, "").toLowerCase();
if (!normalized) {
return "*";
}
const parts = Array.from(normalized, (char) => `[${char.toLowerCase()}${char.toUpperCase()}]`);
return `*.${parts.join("")}`;
}
export function classifyMemoryMultimodalPath(
filePath: string,
settings: MemoryMultimodalSettings,
): MemoryMultimodalModality | null {
if (!isMemoryMultimodalEnabled(settings)) {
return null;
}
const lower = filePath.trim().toLowerCase();
for (const modality of settings.modalities) {
for (const extension of getMemoryMultimodalExtensions(modality)) {
if (lower.endsWith(extension)) {
return modality;
}
}
}
return null;
}
export function normalizeGeminiEmbeddingModelForMemory(model: string): string {
const trimmed = model.trim();
if (!trimmed) {
return "";
}
return trimmed.replace(/^models\//, "").replace(/^(gemini|google)\//, "");
}
export function supportsMemoryMultimodalEmbeddings(params: {
provider: string;
model: string;
}): boolean {
if (params.provider !== "gemini") {
return false;
}
return normalizeGeminiEmbeddingModelForMemory(params.model) === "gemini-embedding-2-preview";
}