mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-11 23:10:29 +00:00
Memory: add multimodal image and audio indexing (#43460)
Merged via squash.
Prepared head SHA: a994c07190
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
committed by
GitHub
parent
20d097ac2f
commit
d79ca52960
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
||||
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
|
||||
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
|
||||
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
|
||||
- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras.
|
||||
|
||||
### Breaking
|
||||
|
||||
|
||||
@@ -284,9 +284,46 @@ Notes:
|
||||
|
||||
- Paths can be absolute or workspace-relative.
|
||||
- Directories are scanned recursively for `.md` files.
|
||||
- Only Markdown files are indexed.
|
||||
- By default, only Markdown files are indexed.
|
||||
- If `memorySearch.multimodal.enabled = true`, OpenClaw also indexes supported image/audio files under `extraPaths` only. Default memory roots (`MEMORY.md`, `memory.md`, `memory/**/*.md`) stay Markdown-only.
|
||||
- Symlinks are ignored (files or directories).
|
||||
|
||||
### Multimodal memory files (Gemini image + audio)
|
||||
|
||||
OpenClaw can index image and audio files from `memorySearch.extraPaths` when using Gemini embedding 2:
|
||||
|
||||
```json5
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: ["assets/reference", "voice-notes"],
|
||||
multimodal: {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"], // or ["all"]
|
||||
maxFileBytes: 10000000
|
||||
},
|
||||
remote: {
|
||||
apiKey: "YOUR_GEMINI_API_KEY"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- Multimodal memory is currently supported only for `gemini-embedding-2-preview`.
|
||||
- Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`.
|
||||
- Supported modalities in this phase: image and audio.
|
||||
- `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled.
|
||||
- Matching image/audio file bytes are uploaded to the configured Gemini embedding endpoint during indexing.
|
||||
- Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`.
|
||||
- Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`.
|
||||
- Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings.
|
||||
- `memory_get` still reads Markdown only; binary files are searchable but not returned as raw file contents.
|
||||
|
||||
### Gemini embeddings (native)
|
||||
|
||||
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
||||
|
||||
@@ -131,6 +131,113 @@ describe("memory search config", () => {
|
||||
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
||||
});
|
||||
|
||||
it("normalizes multimodal settings", () => {
|
||||
const cfg = asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
multimodal: {
|
||||
enabled: true,
|
||||
modalities: ["all"],
|
||||
maxFileBytes: 8192,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
expect(resolved?.multimodal).toEqual({
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: 8192,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps an explicit empty multimodal modalities list empty", () => {
|
||||
const cfg = asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
multimodal: {
|
||||
enabled: true,
|
||||
modalities: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
expect(resolved?.multimodal).toEqual({
|
||||
enabled: true,
|
||||
modalities: [],
|
||||
maxFileBytes: 10 * 1024 * 1024,
|
||||
});
|
||||
expect(resolved?.provider).toBe("gemini");
|
||||
});
|
||||
|
||||
it("does not enforce multimodal provider validation when no modalities are active", () => {
|
||||
const cfg = asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
fallback: "openai",
|
||||
multimodal: {
|
||||
enabled: true,
|
||||
modalities: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
expect(resolved?.multimodal).toEqual({
|
||||
enabled: true,
|
||||
modalities: [],
|
||||
maxFileBytes: 10 * 1024 * 1024,
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects multimodal memory on unsupported providers", () => {
|
||||
const cfg = asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||
/memorySearch\.multimodal requires memorySearch\.provider = "gemini"/,
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects multimodal memory when fallback is configured", () => {
|
||||
const cfg = asConfig({
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "openai",
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||
/memorySearch\.multimodal does not support memorySearch\.fallback/,
|
||||
);
|
||||
});
|
||||
|
||||
it("includes batch defaults for openai without remote overrides", () => {
|
||||
const cfg = configWithDefaultProvider("openai");
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
|
||||
@@ -3,6 +3,12 @@ import path from "node:path";
|
||||
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
|
||||
import { resolveStateDir } from "../config/paths.js";
|
||||
import type { SecretInput } from "../config/types.secrets.js";
|
||||
import {
|
||||
isMemoryMultimodalEnabled,
|
||||
normalizeMemoryMultimodalSettings,
|
||||
supportsMemoryMultimodalEmbeddings,
|
||||
type MemoryMultimodalSettings,
|
||||
} from "../memory/multimodal.js";
|
||||
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
|
||||
import { resolveAgentConfig } from "./agent-scope.js";
|
||||
|
||||
@@ -10,6 +16,7 @@ export type ResolvedMemorySearchConfig = {
|
||||
enabled: boolean;
|
||||
sources: Array<"memory" | "sessions">;
|
||||
extraPaths: string[];
|
||||
multimodal: MemoryMultimodalSettings;
|
||||
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
|
||||
remote?: {
|
||||
baseUrl?: string;
|
||||
@@ -204,6 +211,11 @@ function mergeConfig(
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
const extraPaths = Array.from(new Set(rawPaths));
|
||||
const multimodal = normalizeMemoryMultimodalSettings({
|
||||
enabled: overrides?.multimodal?.enabled ?? defaults?.multimodal?.enabled,
|
||||
modalities: overrides?.multimodal?.modalities ?? defaults?.multimodal?.modalities,
|
||||
maxFileBytes: overrides?.multimodal?.maxFileBytes ?? defaults?.multimodal?.maxFileBytes,
|
||||
});
|
||||
const vector = {
|
||||
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
||||
extensionPath:
|
||||
@@ -307,6 +319,7 @@ function mergeConfig(
|
||||
enabled,
|
||||
sources,
|
||||
extraPaths,
|
||||
multimodal,
|
||||
provider,
|
||||
remote,
|
||||
experimental: {
|
||||
@@ -365,5 +378,22 @@ export function resolveMemorySearchConfig(
|
||||
if (!resolved.enabled) {
|
||||
return null;
|
||||
}
|
||||
const multimodalActive = isMemoryMultimodalEnabled(resolved.multimodal);
|
||||
if (
|
||||
multimodalActive &&
|
||||
!supportsMemoryMultimodalEmbeddings({
|
||||
provider: resolved.provider,
|
||||
model: resolved.model,
|
||||
})
|
||||
) {
|
||||
throw new Error(
|
||||
'agents.*.memorySearch.multimodal requires memorySearch.provider = "gemini" and model = "gemini-embedding-2-preview".',
|
||||
);
|
||||
}
|
||||
if (multimodalActive && resolved.fallback !== "none") {
|
||||
throw new Error(
|
||||
'agents.*.memorySearch.multimodal does not support memorySearch.fallback. Set fallback to "none".',
|
||||
);
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
@@ -72,6 +72,10 @@ const TARGET_KEYS = [
|
||||
"agents.defaults.memorySearch.fallback",
|
||||
"agents.defaults.memorySearch.sources",
|
||||
"agents.defaults.memorySearch.extraPaths",
|
||||
"agents.defaults.memorySearch.multimodal",
|
||||
"agents.defaults.memorySearch.multimodal.enabled",
|
||||
"agents.defaults.memorySearch.multimodal.modalities",
|
||||
"agents.defaults.memorySearch.multimodal.maxFileBytes",
|
||||
"agents.defaults.memorySearch.experimental.sessionMemory",
|
||||
"agents.defaults.memorySearch.remote.baseUrl",
|
||||
"agents.defaults.memorySearch.remote.apiKey",
|
||||
|
||||
@@ -778,7 +778,15 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"agents.defaults.memorySearch.sources":
|
||||
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
|
||||
"agents.defaults.memorySearch.extraPaths":
|
||||
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; keep paths small and intentional to avoid noisy recall.",
|
||||
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
|
||||
"agents.defaults.memorySearch.multimodal":
|
||||
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled. Matching files are uploaded to the configured remote embedding provider during indexing.',
|
||||
"agents.defaults.memorySearch.multimodal.enabled":
|
||||
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, disables memory-search fallback providers, and uploads matching binary content to the configured remote embedding provider.",
|
||||
"agents.defaults.memorySearch.multimodal.modalities":
|
||||
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
|
||||
"agents.defaults.memorySearch.multimodal.maxFileBytes":
|
||||
"Sets the maximum bytes allowed per multimodal file before it is skipped during memory indexing. Use this to cap upload cost and indexing latency, or raise it for short high-quality audio clips.",
|
||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
|
||||
"agents.defaults.memorySearch.provider":
|
||||
|
||||
@@ -319,6 +319,10 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
||||
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
||||
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
||||
"agents.defaults.memorySearch.multimodal": "Memory Search Multimodal",
|
||||
"agents.defaults.memorySearch.multimodal.enabled": "Enable Memory Search Multimodal",
|
||||
"agents.defaults.memorySearch.multimodal.modalities": "Memory Search Multimodal Modalities",
|
||||
"agents.defaults.memorySearch.multimodal.maxFileBytes": "Memory Search Multimodal Max File Bytes",
|
||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||
"Memory Search Session Index (Experimental)",
|
||||
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
||||
|
||||
@@ -319,6 +319,15 @@ export type MemorySearchConfig = {
|
||||
sources?: Array<"memory" | "sessions">;
|
||||
/** Extra paths to include in memory search (directories or .md files). */
|
||||
extraPaths?: string[];
|
||||
/** Optional multimodal file indexing for selected extra paths. */
|
||||
multimodal?: {
|
||||
/** Enable image/audio embeddings from extraPaths. */
|
||||
enabled?: boolean;
|
||||
/** Which non-text file types to index. */
|
||||
modalities?: Array<"image" | "audio" | "all">;
|
||||
/** Max bytes allowed per multimodal file before it is skipped. */
|
||||
maxFileBytes?: number;
|
||||
};
|
||||
/** Experimental memory search settings. */
|
||||
experimental?: {
|
||||
/** Enable session transcript indexing (experimental, default: false). */
|
||||
|
||||
@@ -553,6 +553,16 @@ export const MemorySearchSchema = z
|
||||
enabled: z.boolean().optional(),
|
||||
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
||||
extraPaths: z.array(z.string()).optional(),
|
||||
multimodal: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
modalities: z
|
||||
.array(z.union([z.literal("image"), z.literal("audio"), z.literal("all")]))
|
||||
.optional(),
|
||||
maxFileBytes: z.number().int().positive().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
experimental: z
|
||||
.object({
|
||||
sessionMemory: z.boolean().optional(),
|
||||
|
||||
@@ -12,6 +12,10 @@ const EXT_BY_MIME: Record<string, string> = {
|
||||
"image/gif": ".gif",
|
||||
"audio/ogg": ".ogg",
|
||||
"audio/mpeg": ".mp3",
|
||||
"audio/wav": ".wav",
|
||||
"audio/flac": ".flac",
|
||||
"audio/aac": ".aac",
|
||||
"audio/opus": ".opus",
|
||||
"audio/x-m4a": ".m4a",
|
||||
"audio/mp4": ".m4a",
|
||||
"video/mp4": ".mp4",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
||||
import { hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
||||
import type { EmbeddingProvider } from "./embeddings.js";
|
||||
import { hashText, type MemoryChunk } from "./internal.js";
|
||||
@@ -16,6 +17,10 @@ export function enforceEmbeddingMaxInputTokens(
|
||||
const out: MemoryChunk[] = [];
|
||||
|
||||
for (const chunk of chunks) {
|
||||
if (hasNonTextEmbeddingParts(chunk.embeddingInput)) {
|
||||
out.push(chunk);
|
||||
continue;
|
||||
}
|
||||
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
||||
out.push(chunk);
|
||||
continue;
|
||||
@@ -27,6 +32,7 @@ export function enforceEmbeddingMaxInputTokens(
|
||||
endLine: chunk.endLine,
|
||||
text,
|
||||
hash: hashText(text),
|
||||
embeddingInput: { text },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
|
||||
// Helpers for enforcing embedding model input size limits.
|
||||
//
|
||||
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
||||
@@ -11,6 +13,22 @@ export function estimateUtf8Bytes(text: string): number {
|
||||
return Buffer.byteLength(text, "utf8");
|
||||
}
|
||||
|
||||
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
|
||||
if (!input.parts?.length) {
|
||||
return estimateUtf8Bytes(input.text);
|
||||
}
|
||||
let total = 0;
|
||||
for (const part of input.parts) {
|
||||
if (part.type === "text") {
|
||||
total += estimateUtf8Bytes(part.text);
|
||||
continue;
|
||||
}
|
||||
total += estimateUtf8Bytes(part.mimeType);
|
||||
total += estimateUtf8Bytes(part.data);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
||||
if (maxUtf8Bytes <= 0) {
|
||||
return [text];
|
||||
|
||||
34
src/memory/embedding-inputs.ts
Normal file
34
src/memory/embedding-inputs.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
export type EmbeddingInputTextPart = {
|
||||
type: "text";
|
||||
text: string;
|
||||
};
|
||||
|
||||
export type EmbeddingInputInlineDataPart = {
|
||||
type: "inline-data";
|
||||
mimeType: string;
|
||||
data: string;
|
||||
};
|
||||
|
||||
export type EmbeddingInputPart = EmbeddingInputTextPart | EmbeddingInputInlineDataPart;
|
||||
|
||||
export type EmbeddingInput = {
|
||||
text: string;
|
||||
parts?: EmbeddingInputPart[];
|
||||
};
|
||||
|
||||
export function buildTextEmbeddingInput(text: string): EmbeddingInput {
|
||||
return { text };
|
||||
}
|
||||
|
||||
export function isInlineDataEmbeddingInputPart(
|
||||
part: EmbeddingInputPart,
|
||||
): part is EmbeddingInputInlineDataPart {
|
||||
return part.type === "inline-data";
|
||||
}
|
||||
|
||||
export function hasNonTextEmbeddingParts(input: EmbeddingInput | undefined): boolean {
|
||||
if (!input?.parts?.length) {
|
||||
return false;
|
||||
}
|
||||
return input.parts.some((part) => isInlineDataEmbeddingInputPart(part));
|
||||
}
|
||||
@@ -1,16 +1,13 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import * as authModule from "../agents/model-auth.js";
|
||||
import {
|
||||
buildFileDataPart,
|
||||
buildGeminiParts,
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
buildInlineDataPart,
|
||||
createGeminiEmbeddingProvider,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
GEMINI_EMBEDDING_2_MODELS,
|
||||
isGeminiEmbedding2Model,
|
||||
resolveGeminiOutputDimensionality,
|
||||
type GeminiPart,
|
||||
} from "./embeddings-gemini.js";
|
||||
|
||||
vi.mock("../agents/model-auth.js", async () => {
|
||||
@@ -61,40 +58,6 @@ function mockResolvedProviderKey(apiKey = "test-key") {
|
||||
});
|
||||
}
|
||||
|
||||
// ---------- Helper function tests ----------
|
||||
|
||||
describe("buildGeminiParts", () => {
|
||||
it("wraps a string into a single text part", () => {
|
||||
expect(buildGeminiParts("hello")).toEqual([{ text: "hello" }]);
|
||||
});
|
||||
|
||||
it("passes through an existing parts array", () => {
|
||||
const parts: GeminiPart[] = [
|
||||
{ text: "hello" },
|
||||
{ inlineData: { mimeType: "image/png", data: "base64data" } },
|
||||
];
|
||||
expect(buildGeminiParts(parts)).toBe(parts);
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildInlineDataPart", () => {
|
||||
it("produces the correct shape", () => {
|
||||
const part = buildInlineDataPart("image/jpeg", "abc123");
|
||||
expect(part).toEqual({
|
||||
inlineData: { mimeType: "image/jpeg", data: "abc123" },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildFileDataPart", () => {
|
||||
it("produces the correct shape", () => {
|
||||
const part = buildFileDataPart("application/pdf", "gs://bucket/file.pdf");
|
||||
expect(part).toEqual({
|
||||
fileData: { mimeType: "application/pdf", fileUri: "gs://bucket/file.pdf" },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildGeminiTextEmbeddingRequest", () => {
|
||||
it("builds a text embedding request with optional model and dimensions", () => {
|
||||
expect(
|
||||
@@ -113,6 +76,35 @@ describe("buildGeminiTextEmbeddingRequest", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildGeminiEmbeddingRequest", () => {
|
||||
it("builds a multimodal request from structured input parts", () => {
|
||||
expect(
|
||||
buildGeminiEmbeddingRequest({
|
||||
input: {
|
||||
text: "Image file: diagram.png",
|
||||
parts: [
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
{ type: "inline-data", mimeType: "image/png", data: "abc123" },
|
||||
],
|
||||
},
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
modelPath: "models/gemini-embedding-2-preview",
|
||||
outputDimensionality: 1536,
|
||||
}),
|
||||
).toEqual({
|
||||
model: "models/gemini-embedding-2-preview",
|
||||
content: {
|
||||
parts: [
|
||||
{ text: "Image file: diagram.png" },
|
||||
{ inlineData: { mimeType: "image/png", data: "abc123" } },
|
||||
],
|
||||
},
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: 1536,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------- Model detection ----------
|
||||
|
||||
describe("isGeminiEmbedding2Model", () => {
|
||||
@@ -319,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => {
|
||||
expect(body.outputDimensionality).toBe(768);
|
||||
});
|
||||
|
||||
it("sanitizes and normalizes embedQuery responses", async () => {
|
||||
const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockResolvedProviderKey();
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]);
|
||||
});
|
||||
|
||||
it("uses custom outputDimensionality for each embedBatch request", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(2);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
@@ -341,6 +348,88 @@ describe("gemini-embedding-2-preview provider", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
it("sanitizes and normalizes structured batch responses", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockResolvedProviderKey();
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
await expect(
|
||||
provider.embedBatchInputs?.([
|
||||
{
|
||||
text: "Image file: diagram.png",
|
||||
parts: [
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||
],
|
||||
},
|
||||
]),
|
||||
).resolves.toEqual([[0, 0, 1]]);
|
||||
});
|
||||
|
||||
it("supports multimodal embedBatchInputs requests", async () => {
|
||||
const fetchMock = createGeminiBatchFetchMock(2);
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockResolvedProviderKey();
|
||||
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
});
|
||||
|
||||
expect(provider.embedBatchInputs).toBeDefined();
|
||||
await provider.embedBatchInputs?.([
|
||||
{
|
||||
text: "Image file: diagram.png",
|
||||
parts: [
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||
],
|
||||
},
|
||||
{
|
||||
text: "Audio file: note.wav",
|
||||
parts: [
|
||||
{ type: "text", text: "Audio file: note.wav" },
|
||||
{ type: "inline-data", mimeType: "audio/wav", data: "aud" },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const body = parseFetchBody(fetchMock);
|
||||
expect(body.requests).toEqual([
|
||||
{
|
||||
model: "models/gemini-embedding-2-preview",
|
||||
content: {
|
||||
parts: [
|
||||
{ text: "Image file: diagram.png" },
|
||||
{ inlineData: { mimeType: "image/png", data: "img" } },
|
||||
],
|
||||
},
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: 3072,
|
||||
},
|
||||
{
|
||||
model: "models/gemini-embedding-2-preview",
|
||||
content: {
|
||||
parts: [
|
||||
{ text: "Audio file: note.wav" },
|
||||
{ inlineData: { mimeType: "audio/wav", data: "aud" } },
|
||||
],
|
||||
},
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: 3072,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("throws for invalid outputDimensionality", async () => {
|
||||
mockResolvedProviderKey();
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
||||
@@ -50,34 +51,14 @@ export type GeminiTextPart = { text: string };
|
||||
export type GeminiInlinePart = {
|
||||
inlineData: { mimeType: string; data: string };
|
||||
};
|
||||
export type GeminiFilePart = {
|
||||
fileData: { mimeType: string; fileUri: string };
|
||||
};
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart;
|
||||
export type GeminiTextEmbeddingRequest = {
|
||||
content: { parts: GeminiTextPart[] };
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||
export type GeminiEmbeddingRequest = {
|
||||
content: { parts: GeminiPart[] };
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
model?: string;
|
||||
};
|
||||
|
||||
/** Convert a string or pre-built parts array into `GeminiPart[]`. */
|
||||
export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] {
|
||||
if (typeof input === "string") {
|
||||
return [{ text: input }];
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
/** Convenience: build an inline-data part for multimodal embeddings. */
|
||||
export function buildInlineDataPart(mimeType: string, base64Data: string): GeminiInlinePart {
|
||||
return { inlineData: { mimeType, data: base64Data } };
|
||||
}
|
||||
|
||||
/** Convenience: build a file-data part for multimodal embeddings. */
|
||||
export function buildFileDataPart(mimeType: string, fileUri: string): GeminiFilePart {
|
||||
return { fileData: { mimeType, fileUri } };
|
||||
}
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
@@ -86,8 +67,30 @@ export function buildGeminiTextEmbeddingRequest(params: {
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiTextEmbeddingRequest {
|
||||
const request: GeminiTextEmbeddingRequest = {
|
||||
content: { parts: [{ text: params.text }] },
|
||||
return buildGeminiEmbeddingRequest({
|
||||
input: { text: params.text },
|
||||
taskType: params.taskType,
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
modelPath: params.modelPath,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildGeminiEmbeddingRequest(params: {
|
||||
input: EmbeddingInput;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiEmbeddingRequest {
|
||||
const request: GeminiEmbeddingRequest = {
|
||||
content: {
|
||||
parts: params.input.parts?.map((part) =>
|
||||
part.type === "text"
|
||||
? ({ text: part.text } satisfies GeminiTextPart)
|
||||
: ({
|
||||
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||
} satisfies GeminiInlinePart),
|
||||
) ?? [{ text: params.input.text }],
|
||||
},
|
||||
taskType: params.taskType,
|
||||
};
|
||||
if (params.modelPath) {
|
||||
@@ -143,7 +146,7 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function normalizeGeminiModel(model: string): string {
|
||||
export function normalizeGeminiModel(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
@@ -158,6 +161,46 @@ function normalizeGeminiModel(model: string): string {
|
||||
return withoutPrefix;
|
||||
}
|
||||
|
||||
async function fetchGeminiEmbeddingPayload(params: {
|
||||
client: GeminiEmbeddingClient;
|
||||
endpoint: string;
|
||||
body: unknown;
|
||||
}): Promise<{
|
||||
embedding?: { values?: number[] };
|
||||
embeddings?: Array<{ values?: number[] }>;
|
||||
}> {
|
||||
return await executeWithApiKeyRotation({
|
||||
provider: "google",
|
||||
apiKeys: params.client.apiKeys,
|
||||
execute: async (apiKey) => {
|
||||
const authHeaders = parseGeminiAuth(apiKey);
|
||||
const headers = {
|
||||
...authHeaders.headers,
|
||||
...params.client.headers,
|
||||
};
|
||||
return await withRemoteHttpResponse({
|
||||
url: params.endpoint,
|
||||
ssrfPolicy: params.client.ssrfPolicy,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(params.body),
|
||||
},
|
||||
onResponse: async (res) => {
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
||||
}
|
||||
return (await res.json()) as {
|
||||
embedding?: { values?: number[] };
|
||||
embeddings?: Array<{ values?: number[] }>;
|
||||
};
|
||||
},
|
||||
});
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function normalizeGeminiBaseUrl(raw: string): string {
|
||||
const trimmed = raw.replace(/\/+$/, "");
|
||||
const openAiIndex = trimmed.indexOf("/openai");
|
||||
@@ -181,71 +224,50 @@ export async function createGeminiEmbeddingProvider(
|
||||
const isV2 = isGeminiEmbedding2Model(client.model);
|
||||
const outputDimensionality = client.outputDimensionality;
|
||||
|
||||
const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => {
|
||||
const authHeaders = parseGeminiAuth(apiKey);
|
||||
const headers = {
|
||||
...authHeaders.headers,
|
||||
...client.headers,
|
||||
};
|
||||
const payload = await withRemoteHttpResponse({
|
||||
url: endpoint,
|
||||
ssrfPolicy: client.ssrfPolicy,
|
||||
init: {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
onResponse: async (res) => {
|
||||
if (!res.ok) {
|
||||
const text = await res.text();
|
||||
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
||||
}
|
||||
return (await res.json()) as {
|
||||
embedding?: { values?: number[] };
|
||||
embeddings?: Array<{ values?: number[] }>;
|
||||
};
|
||||
},
|
||||
});
|
||||
return payload;
|
||||
};
|
||||
|
||||
const embedQuery = async (text: string): Promise<number[]> => {
|
||||
if (!text.trim()) {
|
||||
return [];
|
||||
}
|
||||
const body = buildGeminiTextEmbeddingRequest({
|
||||
text,
|
||||
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||
});
|
||||
const payload = await executeWithApiKeyRotation({
|
||||
provider: "google",
|
||||
apiKeys: client.apiKeys,
|
||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body),
|
||||
const payload = await fetchGeminiEmbeddingPayload({
|
||||
client,
|
||||
endpoint: embedUrl,
|
||||
body: buildGeminiTextEmbeddingRequest({
|
||||
text,
|
||||
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||
}),
|
||||
});
|
||||
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
||||
};
|
||||
|
||||
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
||||
if (texts.length === 0) {
|
||||
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
|
||||
if (inputs.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const requests = texts.map((text) =>
|
||||
buildGeminiTextEmbeddingRequest({
|
||||
text,
|
||||
modelPath: client.modelPath,
|
||||
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||
}),
|
||||
);
|
||||
const batchBody = { requests };
|
||||
const payload = await executeWithApiKeyRotation({
|
||||
provider: "google",
|
||||
apiKeys: client.apiKeys,
|
||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody),
|
||||
const payload = await fetchGeminiEmbeddingPayload({
|
||||
client,
|
||||
endpoint: batchUrl,
|
||||
body: {
|
||||
requests: inputs.map((input) =>
|
||||
buildGeminiEmbeddingRequest({
|
||||
input,
|
||||
modelPath: client.modelPath,
|
||||
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||
}),
|
||||
),
|
||||
},
|
||||
});
|
||||
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
||||
return texts.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
||||
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
||||
};
|
||||
|
||||
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
||||
return await embedBatchInputs(
|
||||
texts.map((text) => ({
|
||||
text,
|
||||
})),
|
||||
);
|
||||
};
|
||||
|
||||
return {
|
||||
@@ -255,6 +277,7 @@ export async function createGeminiEmbeddingProvider(
|
||||
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
|
||||
embedQuery,
|
||||
embedBatch,
|
||||
embedBatchInputs,
|
||||
},
|
||||
client,
|
||||
};
|
||||
|
||||
@@ -4,6 +4,7 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { SecretInput } from "../config/types.secrets.js";
|
||||
import { formatErrorMessage } from "../infra/errors.js";
|
||||
import { resolveUserPath } from "../utils.js";
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||
import {
|
||||
createGeminiEmbeddingProvider,
|
||||
@@ -31,6 +32,7 @@ export type EmbeddingProvider = {
|
||||
maxInputTokens?: number;
|
||||
embedQuery: (text: string) => Promise<number[]>;
|
||||
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||
embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise<number[][]>;
|
||||
};
|
||||
|
||||
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
@@ -6,6 +7,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
import "./test-runtime-mocks.js";
|
||||
|
||||
let embedBatchCalls = 0;
|
||||
let embedBatchInputCalls = 0;
|
||||
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
||||
|
||||
vi.mock("./embeddings.js", () => {
|
||||
@@ -13,7 +15,9 @@ vi.mock("./embeddings.js", () => {
|
||||
const lower = text.toLowerCase();
|
||||
const alpha = lower.split("alpha").length - 1;
|
||||
const beta = lower.split("beta").length - 1;
|
||||
return [alpha, beta];
|
||||
const image = lower.split("image").length - 1;
|
||||
const audio = lower.split("audio").length - 1;
|
||||
return [alpha, beta, image, audio];
|
||||
};
|
||||
return {
|
||||
createEmbeddingProvider: async (options: {
|
||||
@@ -38,6 +42,36 @@ vi.mock("./embeddings.js", () => {
|
||||
embedBatchCalls += 1;
|
||||
return texts.map(embedText);
|
||||
},
|
||||
...(providerId === "gemini"
|
||||
? {
|
||||
embedBatchInputs: async (
|
||||
inputs: Array<{
|
||||
text: string;
|
||||
parts?: Array<
|
||||
| { type: "text"; text: string }
|
||||
| { type: "inline-data"; mimeType: string; data: string }
|
||||
>;
|
||||
}>,
|
||||
) => {
|
||||
embedBatchInputCalls += 1;
|
||||
return inputs.map((input) => {
|
||||
const inlineData = input.parts?.find((part) => part.type === "inline-data");
|
||||
if (inlineData?.type === "inline-data" && inlineData.data.length > 9000) {
|
||||
throw new Error("payload too large");
|
||||
}
|
||||
const mimeType =
|
||||
inlineData?.type === "inline-data" ? inlineData.mimeType : undefined;
|
||||
if (mimeType?.startsWith("image/")) {
|
||||
return [0, 0, 1, 0];
|
||||
}
|
||||
if (mimeType?.startsWith("audio/")) {
|
||||
return [0, 0, 0, 1];
|
||||
}
|
||||
return embedText(input.text);
|
||||
});
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
},
|
||||
...(providerId === "gemini"
|
||||
? {
|
||||
@@ -64,6 +98,7 @@ describe("memory index", () => {
|
||||
let indexVectorPath = "";
|
||||
let indexMainPath = "";
|
||||
let indexExtraPath = "";
|
||||
let indexMultimodalPath = "";
|
||||
let indexStatusPath = "";
|
||||
let indexSourceChangePath = "";
|
||||
let indexModelPath = "";
|
||||
@@ -97,6 +132,7 @@ describe("memory index", () => {
|
||||
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
|
||||
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
|
||||
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
|
||||
indexMultimodalPath = path.join(workspaceDir, "index-multimodal.sqlite");
|
||||
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
|
||||
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
|
||||
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
|
||||
@@ -119,6 +155,7 @@ describe("memory index", () => {
|
||||
// Keep atomic reindex tests on the safe path.
|
||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
||||
embedBatchCalls = 0;
|
||||
embedBatchInputCalls = 0;
|
||||
providerCalls = [];
|
||||
|
||||
// Keep the workspace stable to allow manager reuse across tests.
|
||||
@@ -149,6 +186,11 @@ describe("memory index", () => {
|
||||
provider?: "openai" | "gemini";
|
||||
model?: string;
|
||||
outputDimensionality?: number;
|
||||
multimodal?: {
|
||||
enabled?: boolean;
|
||||
modalities?: Array<"image" | "audio" | "all">;
|
||||
maxFileBytes?: number;
|
||||
};
|
||||
vectorEnabled?: boolean;
|
||||
cacheEnabled?: boolean;
|
||||
minScore?: number;
|
||||
@@ -172,6 +214,7 @@ describe("memory index", () => {
|
||||
},
|
||||
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
||||
extraPaths: params.extraPaths,
|
||||
multimodal: params.multimodal,
|
||||
sources: params.sources,
|
||||
experimental: { sessionMemory: params.sessionMemory ?? false },
|
||||
},
|
||||
@@ -247,6 +290,103 @@ describe("memory index", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("indexes multimodal image and audio files from extra paths with Gemini structured inputs", async () => {
|
||||
const mediaDir = path.join(workspaceDir, "media-memory");
|
||||
await fs.mkdir(mediaDir, { recursive: true });
|
||||
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||
await fs.writeFile(path.join(mediaDir, "meeting.wav"), Buffer.from("wav"));
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: indexMultimodalPath,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||
});
|
||||
const manager = await getPersistentManager(cfg);
|
||||
await manager.sync({ reason: "test" });
|
||||
|
||||
expect(embedBatchInputCalls).toBeGreaterThan(0);
|
||||
|
||||
const imageResults = await manager.search("image");
|
||||
expect(imageResults.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||
|
||||
const audioResults = await manager.search("audio");
|
||||
expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true);
|
||||
});
|
||||
|
||||
it("skips oversized multimodal inputs without aborting sync", async () => {
|
||||
const mediaDir = path.join(workspaceDir, "media-oversize");
|
||||
await fs.mkdir(mediaDir, { recursive: true });
|
||||
await fs.writeFile(path.join(mediaDir, "huge.png"), Buffer.alloc(7000, 1));
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: path.join(workspaceDir, `index-oversize-${randomUUID()}.sqlite`),
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
});
|
||||
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
|
||||
await manager.sync({ reason: "test" });
|
||||
|
||||
expect(embedBatchInputCalls).toBeGreaterThan(0);
|
||||
const imageResults = await manager.search("image");
|
||||
expect(imageResults.some((result) => result.path.endsWith("huge.png"))).toBe(false);
|
||||
|
||||
const alphaResults = await manager.search("alpha");
|
||||
expect(alphaResults.some((result) => result.path.endsWith("memory/2026-01-12.md"))).toBe(true);
|
||||
|
||||
await manager.close?.();
|
||||
});
|
||||
|
||||
it("reindexes a multimodal file after a transient mid-sync disappearance", async () => {
|
||||
const mediaDir = path.join(workspaceDir, "media-race");
|
||||
const imagePath = path.join(mediaDir, "diagram.png");
|
||||
await fs.mkdir(mediaDir, { recursive: true });
|
||||
await fs.writeFile(imagePath, Buffer.from("png"));
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`),
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
});
|
||||
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
|
||||
const realReadFile = fs.readFile.bind(fs);
|
||||
let imageReads = 0;
|
||||
const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => {
|
||||
const [targetPath] = args;
|
||||
if (typeof targetPath === "string" && targetPath === imagePath) {
|
||||
imageReads += 1;
|
||||
if (imageReads === 2) {
|
||||
const err = Object.assign(
|
||||
new Error(`ENOENT: no such file or directory, open '${imagePath}'`),
|
||||
{
|
||||
code: "ENOENT",
|
||||
},
|
||||
) as NodeJS.ErrnoException;
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
return await realReadFile(...args);
|
||||
});
|
||||
|
||||
await manager.sync({ reason: "test" });
|
||||
readSpy.mockRestore();
|
||||
|
||||
const callsAfterFirstSync = embedBatchInputCalls;
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
await manager.sync({ reason: "test" });
|
||||
|
||||
expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync);
|
||||
const results = await manager.search("image");
|
||||
expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||
|
||||
await manager.close?.();
|
||||
});
|
||||
|
||||
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
||||
const cfg = createCfg({ storePath: indexStatusPath });
|
||||
|
||||
@@ -433,6 +573,82 @@ describe("memory index", () => {
|
||||
await secondManager.close?.();
|
||||
});
|
||||
|
||||
it("reindexes when extraPaths change", async () => {
|
||||
const storePath = path.join(workspaceDir, `index-scope-extra-${randomUUID()}.sqlite`);
|
||||
const firstExtraDir = path.join(workspaceDir, "scope-extra-a");
|
||||
const secondExtraDir = path.join(workspaceDir, "scope-extra-b");
|
||||
await fs.rm(firstExtraDir, { recursive: true, force: true });
|
||||
await fs.rm(secondExtraDir, { recursive: true, force: true });
|
||||
await fs.mkdir(firstExtraDir, { recursive: true });
|
||||
await fs.mkdir(secondExtraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(firstExtraDir, "a.md"), "alpha only");
|
||||
await fs.writeFile(path.join(secondExtraDir, "b.md"), "beta only");
|
||||
|
||||
const first = await getMemorySearchManager({
|
||||
cfg: createCfg({
|
||||
storePath,
|
||||
extraPaths: [firstExtraDir],
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
const firstManager = requireManager(first);
|
||||
await firstManager.sync?.({ reason: "test" });
|
||||
await firstManager.close?.();
|
||||
|
||||
const second = await getMemorySearchManager({
|
||||
cfg: createCfg({
|
||||
storePath,
|
||||
extraPaths: [secondExtraDir],
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
const secondManager = requireManager(second);
|
||||
await secondManager.sync?.({ reason: "test" });
|
||||
const results = await secondManager.search("beta");
|
||||
expect(results.some((result) => result.path.endsWith("scope-extra-b/b.md"))).toBe(true);
|
||||
expect(results.some((result) => result.path.endsWith("scope-extra-a/a.md"))).toBe(false);
|
||||
await secondManager.close?.();
|
||||
});
|
||||
|
||||
it("reindexes when multimodal settings change", async () => {
|
||||
const storePath = path.join(workspaceDir, `index-scope-multimodal-${randomUUID()}.sqlite`);
|
||||
const mediaDir = path.join(workspaceDir, "scope-media");
|
||||
await fs.rm(mediaDir, { recursive: true, force: true });
|
||||
await fs.mkdir(mediaDir, { recursive: true });
|
||||
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||
|
||||
const first = await getMemorySearchManager({
|
||||
cfg: createCfg({
|
||||
storePath,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
const firstManager = requireManager(first);
|
||||
await firstManager.sync?.({ reason: "test" });
|
||||
const multimodalCallsAfterFirstSync = embedBatchInputCalls;
|
||||
await firstManager.close?.();
|
||||
|
||||
const second = await getMemorySearchManager({
|
||||
cfg: createCfg({
|
||||
storePath,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
extraPaths: [mediaDir],
|
||||
multimodal: { enabled: true, modalities: ["image"] },
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
const secondManager = requireManager(second);
|
||||
await secondManager.sync?.({ reason: "test" });
|
||||
expect(embedBatchInputCalls).toBeGreaterThan(multimodalCallsAfterFirstSync);
|
||||
const results = await secondManager.search("image");
|
||||
expect(results.some((result) => result.path.endsWith("scope-media/diagram.png"))).toBe(true);
|
||||
await secondManager.close?.();
|
||||
});
|
||||
|
||||
it("reuses cached embeddings on forced reindex", async () => {
|
||||
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
||||
const manager = await getPersistentManager(cfg);
|
||||
|
||||
@@ -3,12 +3,17 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildMultimodalChunkForIndexing,
|
||||
buildFileEntry,
|
||||
chunkMarkdown,
|
||||
listMemoryFiles,
|
||||
normalizeExtraMemoryPaths,
|
||||
remapChunkLines,
|
||||
} from "./internal.js";
|
||||
import {
|
||||
DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
type MemoryMultimodalSettings,
|
||||
} from "./multimodal.js";
|
||||
|
||||
function setupTempDirLifecycle(prefix: string): () => string {
|
||||
let tmpDir = "";
|
||||
@@ -38,6 +43,11 @@ describe("normalizeExtraMemoryPaths", () => {
|
||||
|
||||
describe("listMemoryFiles", () => {
|
||||
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
||||
const multimodal: MemoryMultimodalSettings = {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
};
|
||||
|
||||
it("includes files from additional paths (directory)", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
@@ -131,10 +141,29 @@ describe("listMemoryFiles", () => {
|
||||
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
||||
expect(memoryMatches).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("includes image and audio files from extra paths when multimodal is enabled", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const extraDir = path.join(tmpDir, "media");
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "diagram.png"), Buffer.from("png"));
|
||||
await fs.writeFile(path.join(extraDir, "note.wav"), Buffer.from("wav"));
|
||||
await fs.writeFile(path.join(extraDir, "ignore.bin"), Buffer.from("bin"));
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir], multimodal);
|
||||
expect(files.some((file) => file.endsWith("diagram.png"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note.wav"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("ignore.bin"))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildFileEntry", () => {
|
||||
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
||||
const multimodal: MemoryMultimodalSettings = {
|
||||
enabled: true,
|
||||
modalities: ["image", "audio"],
|
||||
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||
};
|
||||
|
||||
it("returns null when the file disappears before reading", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
@@ -154,6 +183,37 @@ describe("buildFileEntry", () => {
|
||||
expect(entry?.path).toBe("note.md");
|
||||
expect(entry?.size).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("returns multimodal metadata for eligible image files", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
await fs.writeFile(target, Buffer.from("png"));
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
|
||||
expect(entry).toMatchObject({
|
||||
path: "diagram.png",
|
||||
kind: "multimodal",
|
||||
modality: "image",
|
||||
mimeType: "image/png",
|
||||
contentText: "Image file: diagram.png",
|
||||
});
|
||||
});
|
||||
|
||||
it("builds a multimodal chunk lazily for indexing", async () => {
|
||||
const tmpDir = getTmpDir();
|
||||
const target = path.join(tmpDir, "diagram.png");
|
||||
await fs.writeFile(target, Buffer.from("png"));
|
||||
|
||||
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||
const built = await buildMultimodalChunkForIndexing(entry!);
|
||||
|
||||
expect(built?.chunk.embeddingInput?.parts).toEqual([
|
||||
{ type: "text", text: "Image file: diagram.png" },
|
||||
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
|
||||
]);
|
||||
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdown", () => {
|
||||
|
||||
@@ -2,8 +2,17 @@ import crypto from "node:crypto";
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
||||
import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js";
|
||||
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { isFileMissingError } from "./fs-utils.js";
|
||||
import {
|
||||
buildMemoryMultimodalLabel,
|
||||
classifyMemoryMultimodalPath,
|
||||
type MemoryMultimodalModality,
|
||||
type MemoryMultimodalSettings,
|
||||
} from "./multimodal.js";
|
||||
|
||||
export type MemoryFileEntry = {
|
||||
path: string;
|
||||
@@ -11,6 +20,10 @@ export type MemoryFileEntry = {
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
hash: string;
|
||||
kind?: "markdown" | "multimodal";
|
||||
contentText?: string;
|
||||
modality?: MemoryMultimodalModality;
|
||||
mimeType?: string;
|
||||
};
|
||||
|
||||
export type MemoryChunk = {
|
||||
@@ -18,6 +31,18 @@ export type MemoryChunk = {
|
||||
endLine: number;
|
||||
text: string;
|
||||
hash: string;
|
||||
embeddingInput?: EmbeddingInput;
|
||||
};
|
||||
|
||||
export type MultimodalMemoryChunk = {
|
||||
chunk: MemoryChunk;
|
||||
structuredInputBytes: number;
|
||||
};
|
||||
|
||||
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
|
||||
enabled: false,
|
||||
modalities: [],
|
||||
maxFileBytes: 0,
|
||||
};
|
||||
|
||||
export function ensureDir(dir: string): string {
|
||||
@@ -56,7 +81,16 @@ export function isMemoryPath(relPath: string): boolean {
|
||||
return normalized.startsWith("memory/");
|
||||
}
|
||||
|
||||
async function walkDir(dir: string, files: string[]) {
|
||||
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
|
||||
if (filePath.endsWith(".md")) {
|
||||
return true;
|
||||
}
|
||||
return (
|
||||
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
|
||||
);
|
||||
}
|
||||
|
||||
async function walkDir(dir: string, files: string[], multimodal?: MemoryMultimodalSettings) {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const full = path.join(dir, entry.name);
|
||||
@@ -64,13 +98,13 @@ async function walkDir(dir: string, files: string[]) {
|
||||
continue;
|
||||
}
|
||||
if (entry.isDirectory()) {
|
||||
await walkDir(full, files);
|
||||
await walkDir(full, files, multimodal);
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile()) {
|
||||
continue;
|
||||
}
|
||||
if (!entry.name.endsWith(".md")) {
|
||||
if (!isAllowedMemoryFilePath(full, multimodal)) {
|
||||
continue;
|
||||
}
|
||||
files.push(full);
|
||||
@@ -80,6 +114,7 @@ async function walkDir(dir: string, files: string[]) {
|
||||
export async function listMemoryFiles(
|
||||
workspaceDir: string,
|
||||
extraPaths?: string[],
|
||||
multimodal?: MemoryMultimodalSettings,
|
||||
): Promise<string[]> {
|
||||
const result: string[] = [];
|
||||
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||
@@ -117,10 +152,10 @@ export async function listMemoryFiles(
|
||||
continue;
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
await walkDir(inputPath, result);
|
||||
await walkDir(inputPath, result, multimodal);
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile() && inputPath.endsWith(".md")) {
|
||||
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
|
||||
result.push(inputPath);
|
||||
}
|
||||
} catch {}
|
||||
@@ -152,6 +187,7 @@ export function hashText(value: string): string {
|
||||
export async function buildFileEntry(
|
||||
absPath: string,
|
||||
workspaceDir: string,
|
||||
multimodal?: MemoryMultimodalSettings,
|
||||
): Promise<MemoryFileEntry | null> {
|
||||
let stat;
|
||||
try {
|
||||
@@ -162,6 +198,48 @@ export async function buildFileEntry(
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
|
||||
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
|
||||
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
|
||||
if (modality) {
|
||||
if (stat.size > multimodalSettings.maxFileBytes) {
|
||||
return null;
|
||||
}
|
||||
let buffer: Buffer;
|
||||
try {
|
||||
buffer = await fs.readFile(absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
|
||||
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
|
||||
return null;
|
||||
}
|
||||
const contentText = buildMemoryMultimodalLabel(modality, normalizedPath);
|
||||
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||
const chunkHash = hashText(
|
||||
JSON.stringify({
|
||||
path: normalizedPath,
|
||||
contentText,
|
||||
mimeType,
|
||||
dataHash,
|
||||
}),
|
||||
);
|
||||
return {
|
||||
path: normalizedPath,
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash: chunkHash,
|
||||
kind: "multimodal",
|
||||
contentText,
|
||||
modality,
|
||||
mimeType,
|
||||
};
|
||||
}
|
||||
let content: string;
|
||||
try {
|
||||
content = await fs.readFile(absPath, "utf-8");
|
||||
@@ -173,11 +251,59 @@ export async function buildFileEntry(
|
||||
}
|
||||
const hash = hashText(content);
|
||||
return {
|
||||
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
|
||||
path: normalizedPath,
|
||||
absPath,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
hash,
|
||||
kind: "markdown",
|
||||
};
|
||||
}
|
||||
|
||||
async function loadMultimodalEmbeddingInput(
|
||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
|
||||
): Promise<EmbeddingInput | null> {
|
||||
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
||||
return null;
|
||||
}
|
||||
let buffer: Buffer;
|
||||
try {
|
||||
buffer = await fs.readFile(entry.absPath);
|
||||
} catch (err) {
|
||||
if (isFileMissingError(err)) {
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
return {
|
||||
text: entry.contentText,
|
||||
parts: [
|
||||
{ type: "text", text: entry.contentText },
|
||||
{
|
||||
type: "inline-data",
|
||||
mimeType: entry.mimeType,
|
||||
data: buffer.toString("base64"),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildMultimodalChunkForIndexing(
|
||||
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
|
||||
): Promise<MultimodalMemoryChunk | null> {
|
||||
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
||||
if (!embeddingInput) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
chunk: {
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
text: entry.contentText ?? embeddingInput.text,
|
||||
hash: entry.hash,
|
||||
embeddingInput,
|
||||
},
|
||||
structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -213,6 +339,7 @@ export function chunkMarkdown(
|
||||
endLine,
|
||||
text,
|
||||
hash: hashText(text),
|
||||
embeddingInput: buildTextEmbeddingInput(text),
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
@@ -8,9 +8,14 @@ import {
|
||||
} from "./batch-openai.js";
|
||||
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
|
||||
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
|
||||
import { estimateUtf8Bytes } from "./embedding-input-limits.js";
|
||||
import { buildGeminiTextEmbeddingRequest } from "./embeddings-gemini.js";
|
||||
import {
|
||||
estimateStructuredEmbeddingInputBytes,
|
||||
estimateUtf8Bytes,
|
||||
} from "./embedding-input-limits.js";
|
||||
import { type EmbeddingInput, hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||
import { buildGeminiEmbeddingRequest } from "./embeddings-gemini.js";
|
||||
import {
|
||||
buildMultimodalChunkForIndexing,
|
||||
chunkMarkdown,
|
||||
hashText,
|
||||
parseEmbedding,
|
||||
@@ -53,7 +58,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
let currentTokens = 0;
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const estimate = estimateUtf8Bytes(chunk.text);
|
||||
const estimate = chunk.embeddingInput
|
||||
? estimateStructuredEmbeddingInputBytes(chunk.embeddingInput)
|
||||
: estimateUtf8Bytes(chunk.text);
|
||||
const wouldExceed =
|
||||
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
|
||||
if (wouldExceed) {
|
||||
@@ -188,9 +195,22 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
const missingChunks = missing.map((m) => m.chunk);
|
||||
const batches = this.buildEmbeddingBatches(missingChunks);
|
||||
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
||||
const provider = this.provider;
|
||||
if (!provider) {
|
||||
throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
|
||||
}
|
||||
let cursor = 0;
|
||||
for (const batch of batches) {
|
||||
const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
||||
const inputs = batch.map((chunk) => chunk.embeddingInput ?? { text: chunk.text });
|
||||
const hasStructuredInputs = inputs.some((input) => hasNonTextEmbeddingParts(input));
|
||||
if (hasStructuredInputs && !provider.embedBatchInputs) {
|
||||
throw new Error(
|
||||
`Embedding provider "${provider.id}" does not support multimodal memory inputs.`,
|
||||
);
|
||||
}
|
||||
const batchEmbeddings = hasStructuredInputs
|
||||
? await this.embedBatchInputsWithRetry(inputs)
|
||||
: await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
||||
for (let i = 0; i < batch.length; i += 1) {
|
||||
const item = missing[cursor + i];
|
||||
const embedding = batchEmbeddings[i] ?? [];
|
||||
@@ -476,6 +496,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
source: MemorySource,
|
||||
): Promise<number[][]> {
|
||||
const gemini = this.gemini;
|
||||
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
|
||||
return await this.embedChunksInBatches(chunks);
|
||||
}
|
||||
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
||||
chunks,
|
||||
entry,
|
||||
@@ -483,9 +506,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
provider: "gemini",
|
||||
enabled: Boolean(gemini),
|
||||
buildRequest: (chunk) => ({
|
||||
request: buildGeminiTextEmbeddingRequest({
|
||||
text: chunk.text,
|
||||
request: buildGeminiEmbeddingRequest({
|
||||
input: chunk.embeddingInput ?? { text: chunk.text },
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
modelPath: this.gemini?.modelPath,
|
||||
outputDimensionality: this.gemini?.outputDimensionality,
|
||||
}),
|
||||
}),
|
||||
@@ -536,6 +560,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
}
|
||||
}
|
||||
|
||||
protected async embedBatchInputsWithRetry(inputs: EmbeddingInput[]): Promise<number[][]> {
|
||||
if (inputs.length === 0) {
|
||||
return [];
|
||||
}
|
||||
if (!this.provider?.embedBatchInputs) {
|
||||
return await this.embedBatchWithRetry(inputs.map((input) => input.text));
|
||||
}
|
||||
let attempt = 0;
|
||||
let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
|
||||
while (true) {
|
||||
try {
|
||||
const timeoutMs = this.resolveEmbeddingTimeout("batch");
|
||||
log.debug("memory embeddings: structured batch start", {
|
||||
provider: this.provider.id,
|
||||
items: inputs.length,
|
||||
timeoutMs,
|
||||
});
|
||||
return await this.withTimeout(
|
||||
this.provider.embedBatchInputs(inputs),
|
||||
timeoutMs,
|
||||
`memory embeddings batch timed out after ${Math.round(timeoutMs / 1000)}s`,
|
||||
);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
if (!this.isRetryableEmbeddingError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
|
||||
throw err;
|
||||
}
|
||||
const waitMs = Math.min(
|
||||
EMBEDDING_RETRY_MAX_DELAY_MS,
|
||||
Math.round(delayMs * (1 + Math.random() * 0.2)),
|
||||
);
|
||||
log.warn(`memory embeddings rate limited; retrying structured batch in ${waitMs}ms`);
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
delayMs *= 2;
|
||||
attempt += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private isRetryableEmbeddingError(message: string): boolean {
|
||||
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
||||
message,
|
||||
@@ -695,6 +758,49 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY;
|
||||
}
|
||||
|
||||
private clearIndexedFileData(pathname: string, source: MemorySource): void {
|
||||
if (this.vector.enabled) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
|
||||
)
|
||||
.run(pathname, source);
|
||||
} catch {}
|
||||
}
|
||||
if (this.fts.enabled && this.fts.available && this.provider) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||
.run(pathname, source, this.provider.model);
|
||||
} catch {}
|
||||
}
|
||||
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(pathname, source);
|
||||
}
|
||||
|
||||
private upsertFileRecord(entry: MemoryFileEntry | SessionFileEntry, source: MemorySource): void {
|
||||
this.db
|
||||
.prepare(
|
||||
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
source=excluded.source,
|
||||
hash=excluded.hash,
|
||||
mtime=excluded.mtime,
|
||||
size=excluded.size`,
|
||||
)
|
||||
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
|
||||
}
|
||||
|
||||
private deleteFileRecord(pathname: string, source: MemorySource): void {
|
||||
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source);
|
||||
}
|
||||
|
||||
private isStructuredInputTooLargeError(message: string): boolean {
|
||||
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
protected async indexFile(
|
||||
entry: MemoryFileEntry | SessionFileEntry,
|
||||
options: { source: MemorySource; content?: string },
|
||||
@@ -708,42 +814,59 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
return;
|
||||
}
|
||||
|
||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||
const chunks = enforceEmbeddingMaxInputTokens(
|
||||
this.provider,
|
||||
chunkMarkdown(content, this.settings.chunking).filter(
|
||||
(chunk) => chunk.text.trim().length > 0,
|
||||
),
|
||||
EMBEDDING_BATCH_MAX_TOKENS,
|
||||
);
|
||||
if (options.source === "sessions" && "lineMap" in entry) {
|
||||
remapChunkLines(chunks, entry.lineMap);
|
||||
let chunks: MemoryChunk[];
|
||||
let structuredInputBytes: number | undefined;
|
||||
if ("kind" in entry && entry.kind === "multimodal") {
|
||||
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
|
||||
if (!multimodalChunk) {
|
||||
this.clearIndexedFileData(entry.path, options.source);
|
||||
this.deleteFileRecord(entry.path, options.source);
|
||||
return;
|
||||
}
|
||||
structuredInputBytes = multimodalChunk.structuredInputBytes;
|
||||
chunks = [multimodalChunk.chunk];
|
||||
} else {
|
||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||
chunks = enforceEmbeddingMaxInputTokens(
|
||||
this.provider,
|
||||
chunkMarkdown(content, this.settings.chunking).filter(
|
||||
(chunk) => chunk.text.trim().length > 0,
|
||||
),
|
||||
EMBEDDING_BATCH_MAX_TOKENS,
|
||||
);
|
||||
if (options.source === "sessions" && "lineMap" in entry) {
|
||||
remapChunkLines(chunks, entry.lineMap);
|
||||
}
|
||||
}
|
||||
let embeddings: number[][];
|
||||
try {
|
||||
embeddings = this.batch.enabled
|
||||
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
||||
: await this.embedChunksInBatches(chunks);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
if (
|
||||
"kind" in entry &&
|
||||
entry.kind === "multimodal" &&
|
||||
this.isStructuredInputTooLargeError(message)
|
||||
) {
|
||||
log.warn("memory embeddings: skipping multimodal file rejected as too large", {
|
||||
path: entry.path,
|
||||
bytes: structuredInputBytes,
|
||||
provider: this.provider.id,
|
||||
model: this.provider.model,
|
||||
error: message,
|
||||
});
|
||||
this.clearIndexedFileData(entry.path, options.source);
|
||||
this.upsertFileRecord(entry, options.source);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const embeddings = this.batch.enabled
|
||||
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
||||
: await this.embedChunksInBatches(chunks);
|
||||
const sample = embeddings.find((embedding) => embedding.length > 0);
|
||||
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
||||
const now = Date.now();
|
||||
if (vectorReady) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
|
||||
)
|
||||
.run(entry.path, options.source);
|
||||
} catch {}
|
||||
}
|
||||
if (this.fts.enabled && this.fts.available) {
|
||||
try {
|
||||
this.db
|
||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||
.run(entry.path, options.source, this.provider.model);
|
||||
} catch {}
|
||||
}
|
||||
this.db
|
||||
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
|
||||
.run(entry.path, options.source);
|
||||
this.clearIndexedFileData(entry.path, options.source);
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const chunk = chunks[i];
|
||||
const embedding = embeddings[i] ?? [];
|
||||
@@ -798,15 +921,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
);
|
||||
}
|
||||
}
|
||||
this.db
|
||||
.prepare(
|
||||
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
source=excluded.source,
|
||||
hash=excluded.hash,
|
||||
mtime=excluded.mtime,
|
||||
size=excluded.size`,
|
||||
)
|
||||
.run(entry.path, options.source, entry.hash, entry.mtimeMs, entry.size);
|
||||
this.upsertFileRecord(entry, options.source);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,12 +29,18 @@ import { isFileMissingError } from "./fs-utils.js";
|
||||
import {
|
||||
buildFileEntry,
|
||||
ensureDir,
|
||||
hashText,
|
||||
listMemoryFiles,
|
||||
normalizeExtraMemoryPaths,
|
||||
runWithConcurrency,
|
||||
} from "./internal.js";
|
||||
import { type MemoryFileEntry } from "./internal.js";
|
||||
import { ensureMemoryIndexSchema } from "./memory-schema.js";
|
||||
import {
|
||||
buildCaseInsensitiveExtensionGlob,
|
||||
classifyMemoryMultimodalPath,
|
||||
getMemoryMultimodalExtensions,
|
||||
} from "./multimodal.js";
|
||||
import type { SessionFileEntry } from "./session-files.js";
|
||||
import {
|
||||
buildSessionEntry,
|
||||
@@ -50,6 +56,7 @@ type MemoryIndexMeta = {
|
||||
provider: string;
|
||||
providerKey?: string;
|
||||
sources?: MemorySource[];
|
||||
scopeHash?: string;
|
||||
chunkTokens: number;
|
||||
chunkOverlap: number;
|
||||
vectorDims?: number;
|
||||
@@ -383,9 +390,22 @@ export abstract class MemoryManagerSyncOps {
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
watchPaths.add(path.join(entry, "**", "*.md"));
|
||||
if (this.settings.multimodal.enabled) {
|
||||
for (const modality of this.settings.multimodal.modalities) {
|
||||
for (const extension of getMemoryMultimodalExtensions(modality)) {
|
||||
watchPaths.add(
|
||||
path.join(entry, "**", buildCaseInsensitiveExtensionGlob(extension)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile() && entry.toLowerCase().endsWith(".md")) {
|
||||
if (
|
||||
stat.isFile() &&
|
||||
(entry.toLowerCase().endsWith(".md") ||
|
||||
classifyMemoryMultimodalPath(entry, this.settings.multimodal) !== null)
|
||||
) {
|
||||
watchPaths.add(entry);
|
||||
}
|
||||
} catch {
|
||||
@@ -649,9 +669,19 @@ export abstract class MemoryManagerSyncOps {
|
||||
return;
|
||||
}
|
||||
|
||||
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
|
||||
const files = await listMemoryFiles(
|
||||
this.workspaceDir,
|
||||
this.settings.extraPaths,
|
||||
this.settings.multimodal,
|
||||
);
|
||||
const fileEntries = (
|
||||
await Promise.all(files.map(async (file) => buildFileEntry(file, this.workspaceDir)))
|
||||
await runWithConcurrency(
|
||||
files.map(
|
||||
(file) => async () =>
|
||||
await buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
|
||||
),
|
||||
this.getIndexConcurrency(),
|
||||
)
|
||||
).filter((entry): entry is MemoryFileEntry => entry !== null);
|
||||
log.debug("memory sync: indexing memory files", {
|
||||
files: fileEntries.length,
|
||||
@@ -868,6 +898,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
const vectorReady = await this.ensureVectorReady();
|
||||
const meta = this.readMeta();
|
||||
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
||||
const configuredScopeHash = this.resolveConfiguredScopeHash();
|
||||
const needsFullReindex =
|
||||
params?.force ||
|
||||
!meta ||
|
||||
@@ -875,6 +906,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
(this.provider && meta.provider !== this.provider.id) ||
|
||||
meta.providerKey !== this.providerKey ||
|
||||
this.metaSourcesDiffer(meta, configuredSources) ||
|
||||
meta.scopeHash !== configuredScopeHash ||
|
||||
meta.chunkTokens !== this.settings.chunking.tokens ||
|
||||
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
||||
(vectorReady && !meta?.vectorDims);
|
||||
@@ -1088,6 +1120,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
provider: this.provider?.id ?? "none",
|
||||
providerKey: this.providerKey!,
|
||||
sources: this.resolveConfiguredSourcesForMeta(),
|
||||
scopeHash: this.resolveConfiguredScopeHash(),
|
||||
chunkTokens: this.settings.chunking.tokens,
|
||||
chunkOverlap: this.settings.chunking.overlap,
|
||||
};
|
||||
@@ -1159,6 +1192,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
provider: this.provider?.id ?? "none",
|
||||
providerKey: this.providerKey!,
|
||||
sources: this.resolveConfiguredSourcesForMeta(),
|
||||
scopeHash: this.resolveConfiguredScopeHash(),
|
||||
chunkTokens: this.settings.chunking.tokens,
|
||||
chunkOverlap: this.settings.chunking.overlap,
|
||||
};
|
||||
@@ -1236,6 +1270,22 @@ export abstract class MemoryManagerSyncOps {
|
||||
return normalized.length > 0 ? normalized : ["memory"];
|
||||
}
|
||||
|
||||
private resolveConfiguredScopeHash(): string {
|
||||
const extraPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths)
|
||||
.map((value) => value.replace(/\\/g, "/"))
|
||||
.toSorted();
|
||||
return hashText(
|
||||
JSON.stringify({
|
||||
extraPaths,
|
||||
multimodal: {
|
||||
enabled: this.settings.multimodal.enabled,
|
||||
modalities: [...this.settings.multimodal.modalities].toSorted(),
|
||||
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
||||
const metaSources = this.normalizeMetaSources(meta);
|
||||
if (metaSources.length !== configuredSources.length) {
|
||||
|
||||
@@ -106,4 +106,50 @@ describe("memory watcher config", () => {
|
||||
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
|
||||
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
|
||||
});
|
||||
|
||||
it("watches multimodal extensions with case-insensitive globs", async () => {
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-watch-"));
|
||||
extraDir = path.join(workspaceDir, "extra");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "PHOTO.PNG"), "png");
|
||||
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
store: { path: path.join(workspaceDir, "index.sqlite"), vector: { enabled: false } },
|
||||
sync: { watch: true, watchDebounceMs: 25, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
extraPaths: [extraDir],
|
||||
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
|
||||
expect(watchMock).toHaveBeenCalledTimes(1);
|
||||
const [watchedPaths] = watchMock.mock.calls[0] as unknown as [
|
||||
string[],
|
||||
Record<string, unknown>,
|
||||
];
|
||||
expect(watchedPaths).toEqual(
|
||||
expect.arrayContaining([
|
||||
path.join(extraDir, "**", "*.[pP][nN][gG]"),
|
||||
path.join(extraDir, "**", "*.[wW][aA][vV]"),
|
||||
]),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
118
src/memory/multimodal.ts
Normal file
118
src/memory/multimodal.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
const MEMORY_MULTIMODAL_SPECS = {
|
||||
image: {
|
||||
labelPrefix: "Image file",
|
||||
extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"],
|
||||
},
|
||||
audio: {
|
||||
labelPrefix: "Audio file",
|
||||
extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"],
|
||||
},
|
||||
} as const;
|
||||
|
||||
export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS;
|
||||
export const MEMORY_MULTIMODAL_MODALITIES = Object.keys(
|
||||
MEMORY_MULTIMODAL_SPECS,
|
||||
) as MemoryMultimodalModality[];
|
||||
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
|
||||
|
||||
export type MemoryMultimodalSettings = {
|
||||
enabled: boolean;
|
||||
modalities: MemoryMultimodalModality[];
|
||||
maxFileBytes: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
||||
|
||||
export function normalizeMemoryMultimodalModalities(
|
||||
raw: MemoryMultimodalSelection[] | undefined,
|
||||
): MemoryMultimodalModality[] {
|
||||
if (raw === undefined || raw.includes("all")) {
|
||||
return [...MEMORY_MULTIMODAL_MODALITIES];
|
||||
}
|
||||
const normalized = new Set<MemoryMultimodalModality>();
|
||||
for (const value of raw) {
|
||||
if (value === "image" || value === "audio") {
|
||||
normalized.add(value);
|
||||
}
|
||||
}
|
||||
return Array.from(normalized);
|
||||
}
|
||||
|
||||
export function normalizeMemoryMultimodalSettings(raw: {
|
||||
enabled?: boolean;
|
||||
modalities?: MemoryMultimodalSelection[];
|
||||
maxFileBytes?: number;
|
||||
}): MemoryMultimodalSettings {
|
||||
const enabled = raw.enabled === true;
|
||||
const maxFileBytes =
|
||||
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
|
||||
? Math.max(1, Math.floor(raw.maxFileBytes))
|
||||
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
|
||||
return {
|
||||
enabled,
|
||||
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
|
||||
maxFileBytes,
|
||||
};
|
||||
}
|
||||
|
||||
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
|
||||
return settings.enabled && settings.modalities.length > 0;
|
||||
}
|
||||
|
||||
export function getMemoryMultimodalExtensions(
|
||||
modality: MemoryMultimodalModality,
|
||||
): readonly string[] {
|
||||
return MEMORY_MULTIMODAL_SPECS[modality].extensions;
|
||||
}
|
||||
|
||||
export function buildMemoryMultimodalLabel(
|
||||
modality: MemoryMultimodalModality,
|
||||
normalizedPath: string,
|
||||
): string {
|
||||
return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`;
|
||||
}
|
||||
|
||||
export function buildCaseInsensitiveExtensionGlob(extension: string): string {
|
||||
const normalized = extension.trim().replace(/^\./, "").toLowerCase();
|
||||
if (!normalized) {
|
||||
return "*";
|
||||
}
|
||||
const parts = Array.from(normalized, (char) => `[${char.toLowerCase()}${char.toUpperCase()}]`);
|
||||
return `*.${parts.join("")}`;
|
||||
}
|
||||
|
||||
export function classifyMemoryMultimodalPath(
|
||||
filePath: string,
|
||||
settings: MemoryMultimodalSettings,
|
||||
): MemoryMultimodalModality | null {
|
||||
if (!isMemoryMultimodalEnabled(settings)) {
|
||||
return null;
|
||||
}
|
||||
const lower = filePath.trim().toLowerCase();
|
||||
for (const modality of settings.modalities) {
|
||||
for (const extension of getMemoryMultimodalExtensions(modality)) {
|
||||
if (lower.endsWith(extension)) {
|
||||
return modality;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function normalizeGeminiEmbeddingModelForMemory(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return "";
|
||||
}
|
||||
return trimmed.replace(/^models\//, "").replace(/^(gemini|google)\//, "");
|
||||
}
|
||||
|
||||
export function supportsMemoryMultimodalEmbeddings(params: {
|
||||
provider: string;
|
||||
model: string;
|
||||
}): boolean {
|
||||
if (params.provider !== "gemini") {
|
||||
return false;
|
||||
}
|
||||
return normalizeGeminiEmbeddingModelForMemory(params.model) === "gemini-embedding-2-preview";
|
||||
}
|
||||
Reference in New Issue
Block a user