mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Memory: add multimodal image and audio indexing (#43460)
Merged via squash.
Prepared head SHA: a994c07190
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
committed by
GitHub
parent
20d097ac2f
commit
d79ca52960
@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
|
- iOS/TestFlight: add a local beta release flow with Fastlane prepare/archive/upload support, canonical beta bundle IDs, and watch-app archive fixes. (#42991) Thanks @ngutman.
|
||||||
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
|
- macOS/onboarding: detect when remote gateways need a shared auth token, explain where to find it on the gateway host, and clarify when a successful check used paired-device auth instead. (#43100) Thanks @ngutman.
|
||||||
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
|
- Onboarding/Ollama: add first-class Ollama setup with Local or Cloud + Local modes, browser-based cloud sign-in, curated model suggestions, and cloud-model handling that skips unnecessary local pulls. (#41529) Thanks @BruceMacD.
|
||||||
|
- Memory: add opt-in multimodal image and audio indexing for `memorySearch.extraPaths` with Gemini `gemini-embedding-2-preview`, strict fallback gating, and scope-based reindexing. (#43460) Thanks @gumadeiras.
|
||||||
|
|
||||||
### Breaking
|
### Breaking
|
||||||
|
|
||||||
|
|||||||
@@ -284,9 +284,46 @@ Notes:
|
|||||||
|
|
||||||
- Paths can be absolute or workspace-relative.
|
- Paths can be absolute or workspace-relative.
|
||||||
- Directories are scanned recursively for `.md` files.
|
- Directories are scanned recursively for `.md` files.
|
||||||
- Only Markdown files are indexed.
|
- By default, only Markdown files are indexed.
|
||||||
|
- If `memorySearch.multimodal.enabled = true`, OpenClaw also indexes supported image/audio files under `extraPaths` only. Default memory roots (`MEMORY.md`, `memory.md`, `memory/**/*.md`) stay Markdown-only.
|
||||||
- Symlinks are ignored (files or directories).
|
- Symlinks are ignored (files or directories).
|
||||||
|
|
||||||
|
### Multimodal memory files (Gemini image + audio)
|
||||||
|
|
||||||
|
OpenClaw can index image and audio files from `memorySearch.extraPaths` when using Gemini embedding 2:
|
||||||
|
|
||||||
|
```json5
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: ["assets/reference", "voice-notes"],
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"], // or ["all"]
|
||||||
|
maxFileBytes: 10000000
|
||||||
|
},
|
||||||
|
remote: {
|
||||||
|
apiKey: "YOUR_GEMINI_API_KEY"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
|
||||||
|
- Multimodal memory is currently supported only for `gemini-embedding-2-preview`.
|
||||||
|
- Multimodal indexing applies only to files discovered through `memorySearch.extraPaths`.
|
||||||
|
- Supported modalities in this phase: image and audio.
|
||||||
|
- `memorySearch.fallback` must stay `"none"` while multimodal memory is enabled.
|
||||||
|
- Matching image/audio file bytes are uploaded to the configured Gemini embedding endpoint during indexing.
|
||||||
|
- Supported image extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.gif`, `.heic`, `.heif`.
|
||||||
|
- Supported audio extensions: `.mp3`, `.wav`, `.ogg`, `.opus`, `.m4a`, `.aac`, `.flac`.
|
||||||
|
- Search queries remain text, but Gemini can compare those text queries against indexed image/audio embeddings.
|
||||||
|
- `memory_get` still reads Markdown only; binary files are searchable but not returned as raw file contents.
|
||||||
|
|
||||||
### Gemini embeddings (native)
|
### Gemini embeddings (native)
|
||||||
|
|
||||||
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
||||||
|
|||||||
@@ -131,6 +131,113 @@ describe("memory search config", () => {
|
|||||||
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("normalizes multimodal settings", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["all"],
|
||||||
|
maxFileBytes: 8192,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved?.multimodal).toEqual({
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: 8192,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps an explicit empty multimodal modalities list empty", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: [],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved?.multimodal).toEqual({
|
||||||
|
enabled: true,
|
||||||
|
modalities: [],
|
||||||
|
maxFileBytes: 10 * 1024 * 1024,
|
||||||
|
});
|
||||||
|
expect(resolved?.provider).toBe("gemini");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not enforce multimodal provider validation when no modalities are active", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
fallback: "openai",
|
||||||
|
multimodal: {
|
||||||
|
enabled: true,
|
||||||
|
modalities: [],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
expect(resolved?.multimodal).toEqual({
|
||||||
|
enabled: true,
|
||||||
|
modalities: [],
|
||||||
|
maxFileBytes: 10 * 1024 * 1024,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects multimodal memory on unsupported providers", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "openai",
|
||||||
|
model: "text-embedding-3-small",
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||||
|
/memorySearch\.multimodal requires memorySearch\.provider = "gemini"/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects multimodal memory when fallback is configured", () => {
|
||||||
|
const cfg = asConfig({
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "openai",
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(() => resolveMemorySearchConfig(cfg, "main")).toThrow(
|
||||||
|
/memorySearch\.multimodal does not support memorySearch\.fallback/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("includes batch defaults for openai without remote overrides", () => {
|
it("includes batch defaults for openai without remote overrides", () => {
|
||||||
const cfg = configWithDefaultProvider("openai");
|
const cfg = configWithDefaultProvider("openai");
|
||||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||||
|
|||||||
@@ -3,6 +3,12 @@ import path from "node:path";
|
|||||||
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
|
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
|
||||||
import { resolveStateDir } from "../config/paths.js";
|
import { resolveStateDir } from "../config/paths.js";
|
||||||
import type { SecretInput } from "../config/types.secrets.js";
|
import type { SecretInput } from "../config/types.secrets.js";
|
||||||
|
import {
|
||||||
|
isMemoryMultimodalEnabled,
|
||||||
|
normalizeMemoryMultimodalSettings,
|
||||||
|
supportsMemoryMultimodalEmbeddings,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "../memory/multimodal.js";
|
||||||
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
|
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
|
||||||
import { resolveAgentConfig } from "./agent-scope.js";
|
import { resolveAgentConfig } from "./agent-scope.js";
|
||||||
|
|
||||||
@@ -10,6 +16,7 @@ export type ResolvedMemorySearchConfig = {
|
|||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
sources: Array<"memory" | "sessions">;
|
sources: Array<"memory" | "sessions">;
|
||||||
extraPaths: string[];
|
extraPaths: string[];
|
||||||
|
multimodal: MemoryMultimodalSettings;
|
||||||
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
|
provider: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama" | "auto";
|
||||||
remote?: {
|
remote?: {
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
@@ -204,6 +211,11 @@ function mergeConfig(
|
|||||||
.map((value) => value.trim())
|
.map((value) => value.trim())
|
||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
const extraPaths = Array.from(new Set(rawPaths));
|
const extraPaths = Array.from(new Set(rawPaths));
|
||||||
|
const multimodal = normalizeMemoryMultimodalSettings({
|
||||||
|
enabled: overrides?.multimodal?.enabled ?? defaults?.multimodal?.enabled,
|
||||||
|
modalities: overrides?.multimodal?.modalities ?? defaults?.multimodal?.modalities,
|
||||||
|
maxFileBytes: overrides?.multimodal?.maxFileBytes ?? defaults?.multimodal?.maxFileBytes,
|
||||||
|
});
|
||||||
const vector = {
|
const vector = {
|
||||||
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
||||||
extensionPath:
|
extensionPath:
|
||||||
@@ -307,6 +319,7 @@ function mergeConfig(
|
|||||||
enabled,
|
enabled,
|
||||||
sources,
|
sources,
|
||||||
extraPaths,
|
extraPaths,
|
||||||
|
multimodal,
|
||||||
provider,
|
provider,
|
||||||
remote,
|
remote,
|
||||||
experimental: {
|
experimental: {
|
||||||
@@ -365,5 +378,22 @@ export function resolveMemorySearchConfig(
|
|||||||
if (!resolved.enabled) {
|
if (!resolved.enabled) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
const multimodalActive = isMemoryMultimodalEnabled(resolved.multimodal);
|
||||||
|
if (
|
||||||
|
multimodalActive &&
|
||||||
|
!supportsMemoryMultimodalEmbeddings({
|
||||||
|
provider: resolved.provider,
|
||||||
|
model: resolved.model,
|
||||||
|
})
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
'agents.*.memorySearch.multimodal requires memorySearch.provider = "gemini" and model = "gemini-embedding-2-preview".',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (multimodalActive && resolved.fallback !== "none") {
|
||||||
|
throw new Error(
|
||||||
|
'agents.*.memorySearch.multimodal does not support memorySearch.fallback. Set fallback to "none".',
|
||||||
|
);
|
||||||
|
}
|
||||||
return resolved;
|
return resolved;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,10 @@ const TARGET_KEYS = [
|
|||||||
"agents.defaults.memorySearch.fallback",
|
"agents.defaults.memorySearch.fallback",
|
||||||
"agents.defaults.memorySearch.sources",
|
"agents.defaults.memorySearch.sources",
|
||||||
"agents.defaults.memorySearch.extraPaths",
|
"agents.defaults.memorySearch.extraPaths",
|
||||||
|
"agents.defaults.memorySearch.multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities",
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory",
|
"agents.defaults.memorySearch.experimental.sessionMemory",
|
||||||
"agents.defaults.memorySearch.remote.baseUrl",
|
"agents.defaults.memorySearch.remote.baseUrl",
|
||||||
"agents.defaults.memorySearch.remote.apiKey",
|
"agents.defaults.memorySearch.remote.apiKey",
|
||||||
|
|||||||
@@ -778,7 +778,15 @@ export const FIELD_HELP: Record<string, string> = {
|
|||||||
"agents.defaults.memorySearch.sources":
|
"agents.defaults.memorySearch.sources":
|
||||||
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
|
'Chooses which sources are indexed: "memory" reads MEMORY.md + memory files, and "sessions" includes transcript history. Keep ["memory"] unless you need recall from prior chat transcripts.',
|
||||||
"agents.defaults.memorySearch.extraPaths":
|
"agents.defaults.memorySearch.extraPaths":
|
||||||
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; keep paths small and intentional to avoid noisy recall.",
|
"Adds extra directories or .md files to the memory index beyond default memory files. Use this when key reference docs live elsewhere in your repo; when multimodal memory is enabled, matching image/audio files under these paths are also eligible for indexing.",
|
||||||
|
"agents.defaults.memorySearch.multimodal":
|
||||||
|
'Optional multimodal memory settings for indexing image and audio files from configured extra paths. Keep this off unless your embedding model explicitly supports cross-modal embeddings, and set `memorySearch.fallback` to "none" while it is enabled. Matching files are uploaded to the configured remote embedding provider during indexing.',
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled":
|
||||||
|
"Enables image/audio memory indexing from extraPaths. This currently requires Gemini embedding-2, keeps the default memory roots Markdown-only, disables memory-search fallback providers, and uploads matching binary content to the configured remote embedding provider.",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities":
|
||||||
|
'Selects which multimodal file types are indexed from extraPaths: "image", "audio", or "all". Keep this narrow to avoid indexing large binary corpora unintentionally.',
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes":
|
||||||
|
"Sets the maximum bytes allowed per multimodal file before it is skipped during memory indexing. Use this to cap upload cost and indexing latency, or raise it for short high-quality audio clips.",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||||
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
|
"Indexes session transcripts into memory search so responses can reference prior chat turns. Keep this off unless transcript recall is needed, because indexing cost and storage usage both increase.",
|
||||||
"agents.defaults.memorySearch.provider":
|
"agents.defaults.memorySearch.provider":
|
||||||
|
|||||||
@@ -319,6 +319,10 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||||||
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
||||||
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
||||||
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
||||||
|
"agents.defaults.memorySearch.multimodal": "Memory Search Multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.enabled": "Enable Memory Search Multimodal",
|
||||||
|
"agents.defaults.memorySearch.multimodal.modalities": "Memory Search Multimodal Modalities",
|
||||||
|
"agents.defaults.memorySearch.multimodal.maxFileBytes": "Memory Search Multimodal Max File Bytes",
|
||||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||||
"Memory Search Session Index (Experimental)",
|
"Memory Search Session Index (Experimental)",
|
||||||
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
||||||
|
|||||||
@@ -319,6 +319,15 @@ export type MemorySearchConfig = {
|
|||||||
sources?: Array<"memory" | "sessions">;
|
sources?: Array<"memory" | "sessions">;
|
||||||
/** Extra paths to include in memory search (directories or .md files). */
|
/** Extra paths to include in memory search (directories or .md files). */
|
||||||
extraPaths?: string[];
|
extraPaths?: string[];
|
||||||
|
/** Optional multimodal file indexing for selected extra paths. */
|
||||||
|
multimodal?: {
|
||||||
|
/** Enable image/audio embeddings from extraPaths. */
|
||||||
|
enabled?: boolean;
|
||||||
|
/** Which non-text file types to index. */
|
||||||
|
modalities?: Array<"image" | "audio" | "all">;
|
||||||
|
/** Max bytes allowed per multimodal file before it is skipped. */
|
||||||
|
maxFileBytes?: number;
|
||||||
|
};
|
||||||
/** Experimental memory search settings. */
|
/** Experimental memory search settings. */
|
||||||
experimental?: {
|
experimental?: {
|
||||||
/** Enable session transcript indexing (experimental, default: false). */
|
/** Enable session transcript indexing (experimental, default: false). */
|
||||||
|
|||||||
@@ -553,6 +553,16 @@ export const MemorySearchSchema = z
|
|||||||
enabled: z.boolean().optional(),
|
enabled: z.boolean().optional(),
|
||||||
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
||||||
extraPaths: z.array(z.string()).optional(),
|
extraPaths: z.array(z.string()).optional(),
|
||||||
|
multimodal: z
|
||||||
|
.object({
|
||||||
|
enabled: z.boolean().optional(),
|
||||||
|
modalities: z
|
||||||
|
.array(z.union([z.literal("image"), z.literal("audio"), z.literal("all")]))
|
||||||
|
.optional(),
|
||||||
|
maxFileBytes: z.number().int().positive().optional(),
|
||||||
|
})
|
||||||
|
.strict()
|
||||||
|
.optional(),
|
||||||
experimental: z
|
experimental: z
|
||||||
.object({
|
.object({
|
||||||
sessionMemory: z.boolean().optional(),
|
sessionMemory: z.boolean().optional(),
|
||||||
|
|||||||
@@ -12,6 +12,10 @@ const EXT_BY_MIME: Record<string, string> = {
|
|||||||
"image/gif": ".gif",
|
"image/gif": ".gif",
|
||||||
"audio/ogg": ".ogg",
|
"audio/ogg": ".ogg",
|
||||||
"audio/mpeg": ".mp3",
|
"audio/mpeg": ".mp3",
|
||||||
|
"audio/wav": ".wav",
|
||||||
|
"audio/flac": ".flac",
|
||||||
|
"audio/aac": ".aac",
|
||||||
|
"audio/opus": ".opus",
|
||||||
"audio/x-m4a": ".m4a",
|
"audio/x-m4a": ".m4a",
|
||||||
"audio/mp4": ".m4a",
|
"audio/mp4": ".m4a",
|
||||||
"video/mp4": ".mp4",
|
"video/mp4": ".mp4",
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
||||||
|
import { hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||||
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
||||||
import type { EmbeddingProvider } from "./embeddings.js";
|
import type { EmbeddingProvider } from "./embeddings.js";
|
||||||
import { hashText, type MemoryChunk } from "./internal.js";
|
import { hashText, type MemoryChunk } from "./internal.js";
|
||||||
@@ -16,6 +17,10 @@ export function enforceEmbeddingMaxInputTokens(
|
|||||||
const out: MemoryChunk[] = [];
|
const out: MemoryChunk[] = [];
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
|
if (hasNonTextEmbeddingParts(chunk.embeddingInput)) {
|
||||||
|
out.push(chunk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
||||||
out.push(chunk);
|
out.push(chunk);
|
||||||
continue;
|
continue;
|
||||||
@@ -27,6 +32,7 @@ export function enforceEmbeddingMaxInputTokens(
|
|||||||
endLine: chunk.endLine,
|
endLine: chunk.endLine,
|
||||||
text,
|
text,
|
||||||
hash: hashText(text),
|
hash: hashText(text),
|
||||||
|
embeddingInput: { text },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
|
|
||||||
// Helpers for enforcing embedding model input size limits.
|
// Helpers for enforcing embedding model input size limits.
|
||||||
//
|
//
|
||||||
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
// We use UTF-8 byte length as a conservative upper bound for tokenizer output.
|
||||||
@@ -11,6 +13,22 @@ export function estimateUtf8Bytes(text: string): number {
|
|||||||
return Buffer.byteLength(text, "utf8");
|
return Buffer.byteLength(text, "utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function estimateStructuredEmbeddingInputBytes(input: EmbeddingInput): number {
|
||||||
|
if (!input.parts?.length) {
|
||||||
|
return estimateUtf8Bytes(input.text);
|
||||||
|
}
|
||||||
|
let total = 0;
|
||||||
|
for (const part of input.parts) {
|
||||||
|
if (part.type === "text") {
|
||||||
|
total += estimateUtf8Bytes(part.text);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
total += estimateUtf8Bytes(part.mimeType);
|
||||||
|
total += estimateUtf8Bytes(part.data);
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
export function splitTextToUtf8ByteLimit(text: string, maxUtf8Bytes: number): string[] {
|
||||||
if (maxUtf8Bytes <= 0) {
|
if (maxUtf8Bytes <= 0) {
|
||||||
return [text];
|
return [text];
|
||||||
|
|||||||
34
src/memory/embedding-inputs.ts
Normal file
34
src/memory/embedding-inputs.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export type EmbeddingInputTextPart = {
|
||||||
|
type: "text";
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingInputInlineDataPart = {
|
||||||
|
type: "inline-data";
|
||||||
|
mimeType: string;
|
||||||
|
data: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type EmbeddingInputPart = EmbeddingInputTextPart | EmbeddingInputInlineDataPart;
|
||||||
|
|
||||||
|
export type EmbeddingInput = {
|
||||||
|
text: string;
|
||||||
|
parts?: EmbeddingInputPart[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export function buildTextEmbeddingInput(text: string): EmbeddingInput {
|
||||||
|
return { text };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isInlineDataEmbeddingInputPart(
|
||||||
|
part: EmbeddingInputPart,
|
||||||
|
): part is EmbeddingInputInlineDataPart {
|
||||||
|
return part.type === "inline-data";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hasNonTextEmbeddingParts(input: EmbeddingInput | undefined): boolean {
|
||||||
|
if (!input?.parts?.length) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return input.parts.some((part) => isInlineDataEmbeddingInputPart(part));
|
||||||
|
}
|
||||||
@@ -1,16 +1,13 @@
|
|||||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
import * as authModule from "../agents/model-auth.js";
|
import * as authModule from "../agents/model-auth.js";
|
||||||
import {
|
import {
|
||||||
buildFileDataPart,
|
buildGeminiEmbeddingRequest,
|
||||||
buildGeminiParts,
|
|
||||||
buildGeminiTextEmbeddingRequest,
|
buildGeminiTextEmbeddingRequest,
|
||||||
buildInlineDataPart,
|
|
||||||
createGeminiEmbeddingProvider,
|
createGeminiEmbeddingProvider,
|
||||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||||
GEMINI_EMBEDDING_2_MODELS,
|
GEMINI_EMBEDDING_2_MODELS,
|
||||||
isGeminiEmbedding2Model,
|
isGeminiEmbedding2Model,
|
||||||
resolveGeminiOutputDimensionality,
|
resolveGeminiOutputDimensionality,
|
||||||
type GeminiPart,
|
|
||||||
} from "./embeddings-gemini.js";
|
} from "./embeddings-gemini.js";
|
||||||
|
|
||||||
vi.mock("../agents/model-auth.js", async () => {
|
vi.mock("../agents/model-auth.js", async () => {
|
||||||
@@ -61,40 +58,6 @@ function mockResolvedProviderKey(apiKey = "test-key") {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------- Helper function tests ----------
|
|
||||||
|
|
||||||
describe("buildGeminiParts", () => {
|
|
||||||
it("wraps a string into a single text part", () => {
|
|
||||||
expect(buildGeminiParts("hello")).toEqual([{ text: "hello" }]);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("passes through an existing parts array", () => {
|
|
||||||
const parts: GeminiPart[] = [
|
|
||||||
{ text: "hello" },
|
|
||||||
{ inlineData: { mimeType: "image/png", data: "base64data" } },
|
|
||||||
];
|
|
||||||
expect(buildGeminiParts(parts)).toBe(parts);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("buildInlineDataPart", () => {
|
|
||||||
it("produces the correct shape", () => {
|
|
||||||
const part = buildInlineDataPart("image/jpeg", "abc123");
|
|
||||||
expect(part).toEqual({
|
|
||||||
inlineData: { mimeType: "image/jpeg", data: "abc123" },
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("buildFileDataPart", () => {
|
|
||||||
it("produces the correct shape", () => {
|
|
||||||
const part = buildFileDataPart("application/pdf", "gs://bucket/file.pdf");
|
|
||||||
expect(part).toEqual({
|
|
||||||
fileData: { mimeType: "application/pdf", fileUri: "gs://bucket/file.pdf" },
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("buildGeminiTextEmbeddingRequest", () => {
|
describe("buildGeminiTextEmbeddingRequest", () => {
|
||||||
it("builds a text embedding request with optional model and dimensions", () => {
|
it("builds a text embedding request with optional model and dimensions", () => {
|
||||||
expect(
|
expect(
|
||||||
@@ -113,6 +76,35 @@ describe("buildGeminiTextEmbeddingRequest", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("buildGeminiEmbeddingRequest", () => {
|
||||||
|
it("builds a multimodal request from structured input parts", () => {
|
||||||
|
expect(
|
||||||
|
buildGeminiEmbeddingRequest({
|
||||||
|
input: {
|
||||||
|
text: "Image file: diagram.png",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
{ type: "inline-data", mimeType: "image/png", data: "abc123" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
modelPath: "models/gemini-embedding-2-preview",
|
||||||
|
outputDimensionality: 1536,
|
||||||
|
}),
|
||||||
|
).toEqual({
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Image file: diagram.png" },
|
||||||
|
{ inlineData: { mimeType: "image/png", data: "abc123" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 1536,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ---------- Model detection ----------
|
// ---------- Model detection ----------
|
||||||
|
|
||||||
describe("isGeminiEmbedding2Model", () => {
|
describe("isGeminiEmbedding2Model", () => {
|
||||||
@@ -319,6 +311,21 @@ describe("gemini-embedding-2-preview provider", () => {
|
|||||||
expect(body.outputDimensionality).toBe(768);
|
expect(body.outputDimensionality).toBe(768);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("sanitizes and normalizes embedQuery responses", async () => {
|
||||||
|
const fetchMock = createGeminiFetchMock([3, 4, Number.NaN]);
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
const { provider } = await createGeminiEmbeddingProvider({
|
||||||
|
config: {} as never,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "none",
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(provider.embedQuery("test")).resolves.toEqual([0.6, 0.8, 0]);
|
||||||
|
});
|
||||||
|
|
||||||
it("uses custom outputDimensionality for each embedBatch request", async () => {
|
it("uses custom outputDimensionality for each embedBatch request", async () => {
|
||||||
const fetchMock = createGeminiBatchFetchMock(2);
|
const fetchMock = createGeminiBatchFetchMock(2);
|
||||||
vi.stubGlobal("fetch", fetchMock);
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
@@ -341,6 +348,88 @@ describe("gemini-embedding-2-preview provider", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("sanitizes and normalizes structured batch responses", async () => {
|
||||||
|
const fetchMock = createGeminiBatchFetchMock(1, [0, Number.POSITIVE_INFINITY, 5]);
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
const { provider } = await createGeminiEmbeddingProvider({
|
||||||
|
config: {} as never,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "none",
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
provider.embedBatchInputs?.([
|
||||||
|
{
|
||||||
|
text: "Image file: diagram.png",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
).resolves.toEqual([[0, 0, 1]]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("supports multimodal embedBatchInputs requests", async () => {
|
||||||
|
const fetchMock = createGeminiBatchFetchMock(2);
|
||||||
|
vi.stubGlobal("fetch", fetchMock);
|
||||||
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
const { provider } = await createGeminiEmbeddingProvider({
|
||||||
|
config: {} as never,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "none",
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(provider.embedBatchInputs).toBeDefined();
|
||||||
|
await provider.embedBatchInputs?.([
|
||||||
|
{
|
||||||
|
text: "Image file: diagram.png",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
{ type: "inline-data", mimeType: "image/png", data: "img" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
text: "Audio file: note.wav",
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: "Audio file: note.wav" },
|
||||||
|
{ type: "inline-data", mimeType: "audio/wav", data: "aud" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
const body = parseFetchBody(fetchMock);
|
||||||
|
expect(body.requests).toEqual([
|
||||||
|
{
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Image file: diagram.png" },
|
||||||
|
{ inlineData: { mimeType: "image/png", data: "img" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 3072,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
model: "models/gemini-embedding-2-preview",
|
||||||
|
content: {
|
||||||
|
parts: [
|
||||||
|
{ text: "Audio file: note.wav" },
|
||||||
|
{ inlineData: { mimeType: "audio/wav", data: "aud" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
outputDimensionality: 3072,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
it("throws for invalid outputDimensionality", async () => {
|
it("throws for invalid outputDimensionality", async () => {
|
||||||
mockResolvedProviderKey();
|
mockResolvedProviderKey();
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import {
|
|||||||
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
|
import { requireApiKey, resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||||
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
import { parseGeminiAuth } from "../infra/gemini-auth.js";
|
||||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||||
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
||||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
||||||
@@ -50,34 +51,14 @@ export type GeminiTextPart = { text: string };
|
|||||||
export type GeminiInlinePart = {
|
export type GeminiInlinePart = {
|
||||||
inlineData: { mimeType: string; data: string };
|
inlineData: { mimeType: string; data: string };
|
||||||
};
|
};
|
||||||
export type GeminiFilePart = {
|
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||||
fileData: { mimeType: string; fileUri: string };
|
export type GeminiEmbeddingRequest = {
|
||||||
};
|
content: { parts: GeminiPart[] };
|
||||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart;
|
|
||||||
export type GeminiTextEmbeddingRequest = {
|
|
||||||
content: { parts: GeminiTextPart[] };
|
|
||||||
taskType: GeminiTaskType;
|
taskType: GeminiTaskType;
|
||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
model?: string;
|
model?: string;
|
||||||
};
|
};
|
||||||
|
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||||
/** Convert a string or pre-built parts array into `GeminiPart[]`. */
|
|
||||||
export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] {
|
|
||||||
if (typeof input === "string") {
|
|
||||||
return [{ text: input }];
|
|
||||||
}
|
|
||||||
return input;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Convenience: build an inline-data part for multimodal embeddings. */
|
|
||||||
export function buildInlineDataPart(mimeType: string, base64Data: string): GeminiInlinePart {
|
|
||||||
return { inlineData: { mimeType, data: base64Data } };
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Convenience: build a file-data part for multimodal embeddings. */
|
|
||||||
export function buildFileDataPart(mimeType: string, fileUri: string): GeminiFilePart {
|
|
||||||
return { fileData: { mimeType, fileUri } };
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||||
export function buildGeminiTextEmbeddingRequest(params: {
|
export function buildGeminiTextEmbeddingRequest(params: {
|
||||||
@@ -86,8 +67,30 @@ export function buildGeminiTextEmbeddingRequest(params: {
|
|||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
modelPath?: string;
|
modelPath?: string;
|
||||||
}): GeminiTextEmbeddingRequest {
|
}): GeminiTextEmbeddingRequest {
|
||||||
const request: GeminiTextEmbeddingRequest = {
|
return buildGeminiEmbeddingRequest({
|
||||||
content: { parts: [{ text: params.text }] },
|
input: { text: params.text },
|
||||||
|
taskType: params.taskType,
|
||||||
|
outputDimensionality: params.outputDimensionality,
|
||||||
|
modelPath: params.modelPath,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildGeminiEmbeddingRequest(params: {
|
||||||
|
input: EmbeddingInput;
|
||||||
|
taskType: GeminiTaskType;
|
||||||
|
outputDimensionality?: number;
|
||||||
|
modelPath?: string;
|
||||||
|
}): GeminiEmbeddingRequest {
|
||||||
|
const request: GeminiEmbeddingRequest = {
|
||||||
|
content: {
|
||||||
|
parts: params.input.parts?.map((part) =>
|
||||||
|
part.type === "text"
|
||||||
|
? ({ text: part.text } satisfies GeminiTextPart)
|
||||||
|
: ({
|
||||||
|
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||||
|
} satisfies GeminiInlinePart),
|
||||||
|
) ?? [{ text: params.input.text }],
|
||||||
|
},
|
||||||
taskType: params.taskType,
|
taskType: params.taskType,
|
||||||
};
|
};
|
||||||
if (params.modelPath) {
|
if (params.modelPath) {
|
||||||
@@ -143,7 +146,7 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
|||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeGeminiModel(model: string): string {
|
export function normalizeGeminiModel(model: string): string {
|
||||||
const trimmed = model.trim();
|
const trimmed = model.trim();
|
||||||
if (!trimmed) {
|
if (!trimmed) {
|
||||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||||
@@ -158,6 +161,46 @@ function normalizeGeminiModel(model: string): string {
|
|||||||
return withoutPrefix;
|
return withoutPrefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchGeminiEmbeddingPayload(params: {
|
||||||
|
client: GeminiEmbeddingClient;
|
||||||
|
endpoint: string;
|
||||||
|
body: unknown;
|
||||||
|
}): Promise<{
|
||||||
|
embedding?: { values?: number[] };
|
||||||
|
embeddings?: Array<{ values?: number[] }>;
|
||||||
|
}> {
|
||||||
|
return await executeWithApiKeyRotation({
|
||||||
|
provider: "google",
|
||||||
|
apiKeys: params.client.apiKeys,
|
||||||
|
execute: async (apiKey) => {
|
||||||
|
const authHeaders = parseGeminiAuth(apiKey);
|
||||||
|
const headers = {
|
||||||
|
...authHeaders.headers,
|
||||||
|
...params.client.headers,
|
||||||
|
};
|
||||||
|
return await withRemoteHttpResponse({
|
||||||
|
url: params.endpoint,
|
||||||
|
ssrfPolicy: params.client.ssrfPolicy,
|
||||||
|
init: {
|
||||||
|
method: "POST",
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(params.body),
|
||||||
|
},
|
||||||
|
onResponse: async (res) => {
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text();
|
||||||
|
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
||||||
|
}
|
||||||
|
return (await res.json()) as {
|
||||||
|
embedding?: { values?: number[] };
|
||||||
|
embeddings?: Array<{ values?: number[] }>;
|
||||||
|
};
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeGeminiBaseUrl(raw: string): string {
|
function normalizeGeminiBaseUrl(raw: string): string {
|
||||||
const trimmed = raw.replace(/\/+$/, "");
|
const trimmed = raw.replace(/\/+$/, "");
|
||||||
const openAiIndex = trimmed.indexOf("/openai");
|
const openAiIndex = trimmed.indexOf("/openai");
|
||||||
@@ -181,71 +224,50 @@ export async function createGeminiEmbeddingProvider(
|
|||||||
const isV2 = isGeminiEmbedding2Model(client.model);
|
const isV2 = isGeminiEmbedding2Model(client.model);
|
||||||
const outputDimensionality = client.outputDimensionality;
|
const outputDimensionality = client.outputDimensionality;
|
||||||
|
|
||||||
const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => {
|
|
||||||
const authHeaders = parseGeminiAuth(apiKey);
|
|
||||||
const headers = {
|
|
||||||
...authHeaders.headers,
|
|
||||||
...client.headers,
|
|
||||||
};
|
|
||||||
const payload = await withRemoteHttpResponse({
|
|
||||||
url: endpoint,
|
|
||||||
ssrfPolicy: client.ssrfPolicy,
|
|
||||||
init: {
|
|
||||||
method: "POST",
|
|
||||||
headers,
|
|
||||||
body: JSON.stringify(body),
|
|
||||||
},
|
|
||||||
onResponse: async (res) => {
|
|
||||||
if (!res.ok) {
|
|
||||||
const text = await res.text();
|
|
||||||
throw new Error(`gemini embeddings failed: ${res.status} ${text}`);
|
|
||||||
}
|
|
||||||
return (await res.json()) as {
|
|
||||||
embedding?: { values?: number[] };
|
|
||||||
embeddings?: Array<{ values?: number[] }>;
|
|
||||||
};
|
|
||||||
},
|
|
||||||
});
|
|
||||||
return payload;
|
|
||||||
};
|
|
||||||
|
|
||||||
const embedQuery = async (text: string): Promise<number[]> => {
|
const embedQuery = async (text: string): Promise<number[]> => {
|
||||||
if (!text.trim()) {
|
if (!text.trim()) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const body = buildGeminiTextEmbeddingRequest({
|
const payload = await fetchGeminiEmbeddingPayload({
|
||||||
text,
|
client,
|
||||||
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
endpoint: embedUrl,
|
||||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
body: buildGeminiTextEmbeddingRequest({
|
||||||
});
|
text,
|
||||||
const payload = await executeWithApiKeyRotation({
|
taskType: options.taskType ?? "RETRIEVAL_QUERY",
|
||||||
provider: "google",
|
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||||
apiKeys: client.apiKeys,
|
}),
|
||||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body),
|
|
||||||
});
|
});
|
||||||
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
return sanitizeAndNormalizeEmbedding(payload.embedding?.values ?? []);
|
||||||
};
|
};
|
||||||
|
|
||||||
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
const embedBatchInputs = async (inputs: EmbeddingInput[]): Promise<number[][]> => {
|
||||||
if (texts.length === 0) {
|
if (inputs.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
const requests = texts.map((text) =>
|
const payload = await fetchGeminiEmbeddingPayload({
|
||||||
buildGeminiTextEmbeddingRequest({
|
client,
|
||||||
text,
|
endpoint: batchUrl,
|
||||||
modelPath: client.modelPath,
|
body: {
|
||||||
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
requests: inputs.map((input) =>
|
||||||
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
buildGeminiEmbeddingRequest({
|
||||||
}),
|
input,
|
||||||
);
|
modelPath: client.modelPath,
|
||||||
const batchBody = { requests };
|
taskType: options.taskType ?? "RETRIEVAL_DOCUMENT",
|
||||||
const payload = await executeWithApiKeyRotation({
|
outputDimensionality: isV2 ? outputDimensionality : undefined,
|
||||||
provider: "google",
|
}),
|
||||||
apiKeys: client.apiKeys,
|
),
|
||||||
execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody),
|
},
|
||||||
});
|
});
|
||||||
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
||||||
return texts.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
return inputs.map((_, index) => sanitizeAndNormalizeEmbedding(embeddings[index]?.values ?? []));
|
||||||
|
};
|
||||||
|
|
||||||
|
const embedBatch = async (texts: string[]): Promise<number[][]> => {
|
||||||
|
return await embedBatchInputs(
|
||||||
|
texts.map((text) => ({
|
||||||
|
text,
|
||||||
|
})),
|
||||||
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -255,6 +277,7 @@ export async function createGeminiEmbeddingProvider(
|
|||||||
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
|
maxInputTokens: GEMINI_MAX_INPUT_TOKENS[client.model],
|
||||||
embedQuery,
|
embedQuery,
|
||||||
embedBatch,
|
embedBatch,
|
||||||
|
embedBatchInputs,
|
||||||
},
|
},
|
||||||
client,
|
client,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import type { OpenClawConfig } from "../config/config.js";
|
|||||||
import type { SecretInput } from "../config/types.secrets.js";
|
import type { SecretInput } from "../config/types.secrets.js";
|
||||||
import { formatErrorMessage } from "../infra/errors.js";
|
import { formatErrorMessage } from "../infra/errors.js";
|
||||||
import { resolveUserPath } from "../utils.js";
|
import { resolveUserPath } from "../utils.js";
|
||||||
|
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||||
import {
|
import {
|
||||||
createGeminiEmbeddingProvider,
|
createGeminiEmbeddingProvider,
|
||||||
@@ -31,6 +32,7 @@ export type EmbeddingProvider = {
|
|||||||
maxInputTokens?: number;
|
maxInputTokens?: number;
|
||||||
embedQuery: (text: string) => Promise<number[]>;
|
embedQuery: (text: string) => Promise<number[]>;
|
||||||
embedBatch: (texts: string[]) => Promise<number[][]>;
|
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||||
|
embedBatchInputs?: (inputs: EmbeddingInput[]) => Promise<number[][]>;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
export type EmbeddingProviderId = "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import { randomUUID } from "node:crypto";
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import os from "node:os";
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
@@ -6,6 +7,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
|||||||
import "./test-runtime-mocks.js";
|
import "./test-runtime-mocks.js";
|
||||||
|
|
||||||
let embedBatchCalls = 0;
|
let embedBatchCalls = 0;
|
||||||
|
let embedBatchInputCalls = 0;
|
||||||
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
||||||
|
|
||||||
vi.mock("./embeddings.js", () => {
|
vi.mock("./embeddings.js", () => {
|
||||||
@@ -13,7 +15,9 @@ vi.mock("./embeddings.js", () => {
|
|||||||
const lower = text.toLowerCase();
|
const lower = text.toLowerCase();
|
||||||
const alpha = lower.split("alpha").length - 1;
|
const alpha = lower.split("alpha").length - 1;
|
||||||
const beta = lower.split("beta").length - 1;
|
const beta = lower.split("beta").length - 1;
|
||||||
return [alpha, beta];
|
const image = lower.split("image").length - 1;
|
||||||
|
const audio = lower.split("audio").length - 1;
|
||||||
|
return [alpha, beta, image, audio];
|
||||||
};
|
};
|
||||||
return {
|
return {
|
||||||
createEmbeddingProvider: async (options: {
|
createEmbeddingProvider: async (options: {
|
||||||
@@ -38,6 +42,36 @@ vi.mock("./embeddings.js", () => {
|
|||||||
embedBatchCalls += 1;
|
embedBatchCalls += 1;
|
||||||
return texts.map(embedText);
|
return texts.map(embedText);
|
||||||
},
|
},
|
||||||
|
...(providerId === "gemini"
|
||||||
|
? {
|
||||||
|
embedBatchInputs: async (
|
||||||
|
inputs: Array<{
|
||||||
|
text: string;
|
||||||
|
parts?: Array<
|
||||||
|
| { type: "text"; text: string }
|
||||||
|
| { type: "inline-data"; mimeType: string; data: string }
|
||||||
|
>;
|
||||||
|
}>,
|
||||||
|
) => {
|
||||||
|
embedBatchInputCalls += 1;
|
||||||
|
return inputs.map((input) => {
|
||||||
|
const inlineData = input.parts?.find((part) => part.type === "inline-data");
|
||||||
|
if (inlineData?.type === "inline-data" && inlineData.data.length > 9000) {
|
||||||
|
throw new Error("payload too large");
|
||||||
|
}
|
||||||
|
const mimeType =
|
||||||
|
inlineData?.type === "inline-data" ? inlineData.mimeType : undefined;
|
||||||
|
if (mimeType?.startsWith("image/")) {
|
||||||
|
return [0, 0, 1, 0];
|
||||||
|
}
|
||||||
|
if (mimeType?.startsWith("audio/")) {
|
||||||
|
return [0, 0, 0, 1];
|
||||||
|
}
|
||||||
|
return embedText(input.text);
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
},
|
},
|
||||||
...(providerId === "gemini"
|
...(providerId === "gemini"
|
||||||
? {
|
? {
|
||||||
@@ -64,6 +98,7 @@ describe("memory index", () => {
|
|||||||
let indexVectorPath = "";
|
let indexVectorPath = "";
|
||||||
let indexMainPath = "";
|
let indexMainPath = "";
|
||||||
let indexExtraPath = "";
|
let indexExtraPath = "";
|
||||||
|
let indexMultimodalPath = "";
|
||||||
let indexStatusPath = "";
|
let indexStatusPath = "";
|
||||||
let indexSourceChangePath = "";
|
let indexSourceChangePath = "";
|
||||||
let indexModelPath = "";
|
let indexModelPath = "";
|
||||||
@@ -97,6 +132,7 @@ describe("memory index", () => {
|
|||||||
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
|
indexMainPath = path.join(workspaceDir, "index-main.sqlite");
|
||||||
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
|
indexVectorPath = path.join(workspaceDir, "index-vector.sqlite");
|
||||||
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
|
indexExtraPath = path.join(workspaceDir, "index-extra.sqlite");
|
||||||
|
indexMultimodalPath = path.join(workspaceDir, "index-multimodal.sqlite");
|
||||||
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
|
indexStatusPath = path.join(workspaceDir, "index-status.sqlite");
|
||||||
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
|
indexSourceChangePath = path.join(workspaceDir, "index-source-change.sqlite");
|
||||||
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
|
indexModelPath = path.join(workspaceDir, "index-model-change.sqlite");
|
||||||
@@ -119,6 +155,7 @@ describe("memory index", () => {
|
|||||||
// Keep atomic reindex tests on the safe path.
|
// Keep atomic reindex tests on the safe path.
|
||||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
||||||
embedBatchCalls = 0;
|
embedBatchCalls = 0;
|
||||||
|
embedBatchInputCalls = 0;
|
||||||
providerCalls = [];
|
providerCalls = [];
|
||||||
|
|
||||||
// Keep the workspace stable to allow manager reuse across tests.
|
// Keep the workspace stable to allow manager reuse across tests.
|
||||||
@@ -149,6 +186,11 @@ describe("memory index", () => {
|
|||||||
provider?: "openai" | "gemini";
|
provider?: "openai" | "gemini";
|
||||||
model?: string;
|
model?: string;
|
||||||
outputDimensionality?: number;
|
outputDimensionality?: number;
|
||||||
|
multimodal?: {
|
||||||
|
enabled?: boolean;
|
||||||
|
modalities?: Array<"image" | "audio" | "all">;
|
||||||
|
maxFileBytes?: number;
|
||||||
|
};
|
||||||
vectorEnabled?: boolean;
|
vectorEnabled?: boolean;
|
||||||
cacheEnabled?: boolean;
|
cacheEnabled?: boolean;
|
||||||
minScore?: number;
|
minScore?: number;
|
||||||
@@ -172,6 +214,7 @@ describe("memory index", () => {
|
|||||||
},
|
},
|
||||||
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
||||||
extraPaths: params.extraPaths,
|
extraPaths: params.extraPaths,
|
||||||
|
multimodal: params.multimodal,
|
||||||
sources: params.sources,
|
sources: params.sources,
|
||||||
experimental: { sessionMemory: params.sessionMemory ?? false },
|
experimental: { sessionMemory: params.sessionMemory ?? false },
|
||||||
},
|
},
|
||||||
@@ -247,6 +290,103 @@ describe("memory index", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("indexes multimodal image and audio files from extra paths with Gemini structured inputs", async () => {
|
||||||
|
const mediaDir = path.join(workspaceDir, "media-memory");
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
await fs.writeFile(path.join(mediaDir, "meeting.wav"), Buffer.from("wav"));
|
||||||
|
|
||||||
|
const cfg = createCfg({
|
||||||
|
storePath: indexMultimodalPath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||||
|
});
|
||||||
|
const manager = await getPersistentManager(cfg);
|
||||||
|
await manager.sync({ reason: "test" });
|
||||||
|
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const imageResults = await manager.search("image");
|
||||||
|
expect(imageResults.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||||
|
|
||||||
|
const audioResults = await manager.search("audio");
|
||||||
|
expect(audioResults.some((result) => result.path.endsWith("meeting.wav"))).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips oversized multimodal inputs without aborting sync", async () => {
|
||||||
|
const mediaDir = path.join(workspaceDir, "media-oversize");
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(mediaDir, "huge.png"), Buffer.alloc(7000, 1));
|
||||||
|
|
||||||
|
const cfg = createCfg({
|
||||||
|
storePath: path.join(workspaceDir, `index-oversize-${randomUUID()}.sqlite`),
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
});
|
||||||
|
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
|
||||||
|
await manager.sync({ reason: "test" });
|
||||||
|
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(0);
|
||||||
|
const imageResults = await manager.search("image");
|
||||||
|
expect(imageResults.some((result) => result.path.endsWith("huge.png"))).toBe(false);
|
||||||
|
|
||||||
|
const alphaResults = await manager.search("alpha");
|
||||||
|
expect(alphaResults.some((result) => result.path.endsWith("memory/2026-01-12.md"))).toBe(true);
|
||||||
|
|
||||||
|
await manager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("reindexes a multimodal file after a transient mid-sync disappearance", async () => {
|
||||||
|
const mediaDir = path.join(workspaceDir, "media-race");
|
||||||
|
const imagePath = path.join(mediaDir, "diagram.png");
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(imagePath, Buffer.from("png"));
|
||||||
|
|
||||||
|
const cfg = createCfg({
|
||||||
|
storePath: path.join(workspaceDir, `index-race-${randomUUID()}.sqlite`),
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
});
|
||||||
|
const manager = requireManager(await getMemorySearchManager({ cfg, agentId: "main" }));
|
||||||
|
const realReadFile = fs.readFile.bind(fs);
|
||||||
|
let imageReads = 0;
|
||||||
|
const readSpy = vi.spyOn(fs, "readFile").mockImplementation(async (...args) => {
|
||||||
|
const [targetPath] = args;
|
||||||
|
if (typeof targetPath === "string" && targetPath === imagePath) {
|
||||||
|
imageReads += 1;
|
||||||
|
if (imageReads === 2) {
|
||||||
|
const err = Object.assign(
|
||||||
|
new Error(`ENOENT: no such file or directory, open '${imagePath}'`),
|
||||||
|
{
|
||||||
|
code: "ENOENT",
|
||||||
|
},
|
||||||
|
) as NodeJS.ErrnoException;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return await realReadFile(...args);
|
||||||
|
});
|
||||||
|
|
||||||
|
await manager.sync({ reason: "test" });
|
||||||
|
readSpy.mockRestore();
|
||||||
|
|
||||||
|
const callsAfterFirstSync = embedBatchInputCalls;
|
||||||
|
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||||
|
await manager.sync({ reason: "test" });
|
||||||
|
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(callsAfterFirstSync);
|
||||||
|
const results = await manager.search("image");
|
||||||
|
expect(results.some((result) => result.path.endsWith("diagram.png"))).toBe(true);
|
||||||
|
|
||||||
|
await manager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
it("keeps dirty false in status-only manager after prior indexing", async () => {
|
||||||
const cfg = createCfg({ storePath: indexStatusPath });
|
const cfg = createCfg({ storePath: indexStatusPath });
|
||||||
|
|
||||||
@@ -433,6 +573,82 @@ describe("memory index", () => {
|
|||||||
await secondManager.close?.();
|
await secondManager.close?.();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("reindexes when extraPaths change", async () => {
|
||||||
|
const storePath = path.join(workspaceDir, `index-scope-extra-${randomUUID()}.sqlite`);
|
||||||
|
const firstExtraDir = path.join(workspaceDir, "scope-extra-a");
|
||||||
|
const secondExtraDir = path.join(workspaceDir, "scope-extra-b");
|
||||||
|
await fs.rm(firstExtraDir, { recursive: true, force: true });
|
||||||
|
await fs.rm(secondExtraDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(firstExtraDir, { recursive: true });
|
||||||
|
await fs.mkdir(secondExtraDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(firstExtraDir, "a.md"), "alpha only");
|
||||||
|
await fs.writeFile(path.join(secondExtraDir, "b.md"), "beta only");
|
||||||
|
|
||||||
|
const first = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
extraPaths: [firstExtraDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const firstManager = requireManager(first);
|
||||||
|
await firstManager.sync?.({ reason: "test" });
|
||||||
|
await firstManager.close?.();
|
||||||
|
|
||||||
|
const second = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
extraPaths: [secondExtraDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const secondManager = requireManager(second);
|
||||||
|
await secondManager.sync?.({ reason: "test" });
|
||||||
|
const results = await secondManager.search("beta");
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-extra-b/b.md"))).toBe(true);
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-extra-a/a.md"))).toBe(false);
|
||||||
|
await secondManager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("reindexes when multimodal settings change", async () => {
|
||||||
|
const storePath = path.join(workspaceDir, `index-scope-multimodal-${randomUUID()}.sqlite`);
|
||||||
|
const mediaDir = path.join(workspaceDir, "scope-media");
|
||||||
|
await fs.rm(mediaDir, { recursive: true, force: true });
|
||||||
|
await fs.mkdir(mediaDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(mediaDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
|
||||||
|
const first = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const firstManager = requireManager(first);
|
||||||
|
await firstManager.sync?.({ reason: "test" });
|
||||||
|
const multimodalCallsAfterFirstSync = embedBatchInputCalls;
|
||||||
|
await firstManager.close?.();
|
||||||
|
|
||||||
|
const second = await getMemorySearchManager({
|
||||||
|
cfg: createCfg({
|
||||||
|
storePath,
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
extraPaths: [mediaDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image"] },
|
||||||
|
}),
|
||||||
|
agentId: "main",
|
||||||
|
});
|
||||||
|
const secondManager = requireManager(second);
|
||||||
|
await secondManager.sync?.({ reason: "test" });
|
||||||
|
expect(embedBatchInputCalls).toBeGreaterThan(multimodalCallsAfterFirstSync);
|
||||||
|
const results = await secondManager.search("image");
|
||||||
|
expect(results.some((result) => result.path.endsWith("scope-media/diagram.png"))).toBe(true);
|
||||||
|
await secondManager.close?.();
|
||||||
|
});
|
||||||
|
|
||||||
it("reuses cached embeddings on forced reindex", async () => {
|
it("reuses cached embeddings on forced reindex", async () => {
|
||||||
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
||||||
const manager = await getPersistentManager(cfg);
|
const manager = await getPersistentManager(cfg);
|
||||||
|
|||||||
@@ -3,12 +3,17 @@ import os from "node:os";
|
|||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||||
import {
|
import {
|
||||||
|
buildMultimodalChunkForIndexing,
|
||||||
buildFileEntry,
|
buildFileEntry,
|
||||||
chunkMarkdown,
|
chunkMarkdown,
|
||||||
listMemoryFiles,
|
listMemoryFiles,
|
||||||
normalizeExtraMemoryPaths,
|
normalizeExtraMemoryPaths,
|
||||||
remapChunkLines,
|
remapChunkLines,
|
||||||
} from "./internal.js";
|
} from "./internal.js";
|
||||||
|
import {
|
||||||
|
DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "./multimodal.js";
|
||||||
|
|
||||||
function setupTempDirLifecycle(prefix: string): () => string {
|
function setupTempDirLifecycle(prefix: string): () => string {
|
||||||
let tmpDir = "";
|
let tmpDir = "";
|
||||||
@@ -38,6 +43,11 @@ describe("normalizeExtraMemoryPaths", () => {
|
|||||||
|
|
||||||
describe("listMemoryFiles", () => {
|
describe("listMemoryFiles", () => {
|
||||||
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
const getTmpDir = setupTempDirLifecycle("memory-test-");
|
||||||
|
const multimodal: MemoryMultimodalSettings = {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
};
|
||||||
|
|
||||||
it("includes files from additional paths (directory)", async () => {
|
it("includes files from additional paths (directory)", async () => {
|
||||||
const tmpDir = getTmpDir();
|
const tmpDir = getTmpDir();
|
||||||
@@ -131,10 +141,29 @@ describe("listMemoryFiles", () => {
|
|||||||
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
const memoryMatches = files.filter((file) => file.endsWith("MEMORY.md"));
|
||||||
expect(memoryMatches).toHaveLength(1);
|
expect(memoryMatches).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("includes image and audio files from extra paths when multimodal is enabled", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const extraDir = path.join(tmpDir, "media");
|
||||||
|
await fs.mkdir(extraDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(extraDir, "diagram.png"), Buffer.from("png"));
|
||||||
|
await fs.writeFile(path.join(extraDir, "note.wav"), Buffer.from("wav"));
|
||||||
|
await fs.writeFile(path.join(extraDir, "ignore.bin"), Buffer.from("bin"));
|
||||||
|
|
||||||
|
const files = await listMemoryFiles(tmpDir, [extraDir], multimodal);
|
||||||
|
expect(files.some((file) => file.endsWith("diagram.png"))).toBe(true);
|
||||||
|
expect(files.some((file) => file.endsWith("note.wav"))).toBe(true);
|
||||||
|
expect(files.some((file) => file.endsWith("ignore.bin"))).toBe(false);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("buildFileEntry", () => {
|
describe("buildFileEntry", () => {
|
||||||
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
const getTmpDir = setupTempDirLifecycle("memory-build-entry-");
|
||||||
|
const multimodal: MemoryMultimodalSettings = {
|
||||||
|
enabled: true,
|
||||||
|
modalities: ["image", "audio"],
|
||||||
|
maxFileBytes: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES,
|
||||||
|
};
|
||||||
|
|
||||||
it("returns null when the file disappears before reading", async () => {
|
it("returns null when the file disappears before reading", async () => {
|
||||||
const tmpDir = getTmpDir();
|
const tmpDir = getTmpDir();
|
||||||
@@ -154,6 +183,37 @@ describe("buildFileEntry", () => {
|
|||||||
expect(entry?.path).toBe("note.md");
|
expect(entry?.path).toBe("note.md");
|
||||||
expect(entry?.size).toBeGreaterThan(0);
|
expect(entry?.size).toBeGreaterThan(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("returns multimodal metadata for eligible image files", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const target = path.join(tmpDir, "diagram.png");
|
||||||
|
await fs.writeFile(target, Buffer.from("png"));
|
||||||
|
|
||||||
|
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||||
|
|
||||||
|
expect(entry).toMatchObject({
|
||||||
|
path: "diagram.png",
|
||||||
|
kind: "multimodal",
|
||||||
|
modality: "image",
|
||||||
|
mimeType: "image/png",
|
||||||
|
contentText: "Image file: diagram.png",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it("builds a multimodal chunk lazily for indexing", async () => {
|
||||||
|
const tmpDir = getTmpDir();
|
||||||
|
const target = path.join(tmpDir, "diagram.png");
|
||||||
|
await fs.writeFile(target, Buffer.from("png"));
|
||||||
|
|
||||||
|
const entry = await buildFileEntry(target, tmpDir, multimodal);
|
||||||
|
const built = await buildMultimodalChunkForIndexing(entry!);
|
||||||
|
|
||||||
|
expect(built?.chunk.embeddingInput?.parts).toEqual([
|
||||||
|
{ type: "text", text: "Image file: diagram.png" },
|
||||||
|
expect.objectContaining({ type: "inline-data", mimeType: "image/png" }),
|
||||||
|
]);
|
||||||
|
expect(built?.structuredInputBytes).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("chunkMarkdown", () => {
|
describe("chunkMarkdown", () => {
|
||||||
|
|||||||
@@ -2,8 +2,17 @@ import crypto from "node:crypto";
|
|||||||
import fsSync from "node:fs";
|
import fsSync from "node:fs";
|
||||||
import fs from "node:fs/promises";
|
import fs from "node:fs/promises";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
import { detectMime } from "../media/mime.js";
|
||||||
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
|
||||||
|
import { estimateStructuredEmbeddingInputBytes } from "./embedding-input-limits.js";
|
||||||
|
import { buildTextEmbeddingInput, type EmbeddingInput } from "./embedding-inputs.js";
|
||||||
import { isFileMissingError } from "./fs-utils.js";
|
import { isFileMissingError } from "./fs-utils.js";
|
||||||
|
import {
|
||||||
|
buildMemoryMultimodalLabel,
|
||||||
|
classifyMemoryMultimodalPath,
|
||||||
|
type MemoryMultimodalModality,
|
||||||
|
type MemoryMultimodalSettings,
|
||||||
|
} from "./multimodal.js";
|
||||||
|
|
||||||
export type MemoryFileEntry = {
|
export type MemoryFileEntry = {
|
||||||
path: string;
|
path: string;
|
||||||
@@ -11,6 +20,10 @@ export type MemoryFileEntry = {
|
|||||||
mtimeMs: number;
|
mtimeMs: number;
|
||||||
size: number;
|
size: number;
|
||||||
hash: string;
|
hash: string;
|
||||||
|
kind?: "markdown" | "multimodal";
|
||||||
|
contentText?: string;
|
||||||
|
modality?: MemoryMultimodalModality;
|
||||||
|
mimeType?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type MemoryChunk = {
|
export type MemoryChunk = {
|
||||||
@@ -18,6 +31,18 @@ export type MemoryChunk = {
|
|||||||
endLine: number;
|
endLine: number;
|
||||||
text: string;
|
text: string;
|
||||||
hash: string;
|
hash: string;
|
||||||
|
embeddingInput?: EmbeddingInput;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type MultimodalMemoryChunk = {
|
||||||
|
chunk: MemoryChunk;
|
||||||
|
structuredInputBytes: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
const DISABLED_MULTIMODAL_SETTINGS: MemoryMultimodalSettings = {
|
||||||
|
enabled: false,
|
||||||
|
modalities: [],
|
||||||
|
maxFileBytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
export function ensureDir(dir: string): string {
|
export function ensureDir(dir: string): string {
|
||||||
@@ -56,7 +81,16 @@ export function isMemoryPath(relPath: string): boolean {
|
|||||||
return normalized.startsWith("memory/");
|
return normalized.startsWith("memory/");
|
||||||
}
|
}
|
||||||
|
|
||||||
async function walkDir(dir: string, files: string[]) {
|
function isAllowedMemoryFilePath(filePath: string, multimodal?: MemoryMultimodalSettings): boolean {
|
||||||
|
if (filePath.endsWith(".md")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
classifyMemoryMultimodalPath(filePath, multimodal ?? DISABLED_MULTIMODAL_SETTINGS) !== null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function walkDir(dir: string, files: string[], multimodal?: MemoryMultimodalSettings) {
|
||||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||||
for (const entry of entries) {
|
for (const entry of entries) {
|
||||||
const full = path.join(dir, entry.name);
|
const full = path.join(dir, entry.name);
|
||||||
@@ -64,13 +98,13 @@ async function walkDir(dir: string, files: string[]) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (entry.isDirectory()) {
|
if (entry.isDirectory()) {
|
||||||
await walkDir(full, files);
|
await walkDir(full, files, multimodal);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!entry.isFile()) {
|
if (!entry.isFile()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!entry.name.endsWith(".md")) {
|
if (!isAllowedMemoryFilePath(full, multimodal)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
files.push(full);
|
files.push(full);
|
||||||
@@ -80,6 +114,7 @@ async function walkDir(dir: string, files: string[]) {
|
|||||||
export async function listMemoryFiles(
|
export async function listMemoryFiles(
|
||||||
workspaceDir: string,
|
workspaceDir: string,
|
||||||
extraPaths?: string[],
|
extraPaths?: string[],
|
||||||
|
multimodal?: MemoryMultimodalSettings,
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
const result: string[] = [];
|
const result: string[] = [];
|
||||||
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||||
@@ -117,10 +152,10 @@ export async function listMemoryFiles(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isDirectory()) {
|
if (stat.isDirectory()) {
|
||||||
await walkDir(inputPath, result);
|
await walkDir(inputPath, result, multimodal);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isFile() && inputPath.endsWith(".md")) {
|
if (stat.isFile() && isAllowedMemoryFilePath(inputPath, multimodal)) {
|
||||||
result.push(inputPath);
|
result.push(inputPath);
|
||||||
}
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
@@ -152,6 +187,7 @@ export function hashText(value: string): string {
|
|||||||
export async function buildFileEntry(
|
export async function buildFileEntry(
|
||||||
absPath: string,
|
absPath: string,
|
||||||
workspaceDir: string,
|
workspaceDir: string,
|
||||||
|
multimodal?: MemoryMultimodalSettings,
|
||||||
): Promise<MemoryFileEntry | null> {
|
): Promise<MemoryFileEntry | null> {
|
||||||
let stat;
|
let stat;
|
||||||
try {
|
try {
|
||||||
@@ -162,6 +198,48 @@ export async function buildFileEntry(
|
|||||||
}
|
}
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
const normalizedPath = path.relative(workspaceDir, absPath).replace(/\\/g, "/");
|
||||||
|
const multimodalSettings = multimodal ?? DISABLED_MULTIMODAL_SETTINGS;
|
||||||
|
const modality = classifyMemoryMultimodalPath(absPath, multimodalSettings);
|
||||||
|
if (modality) {
|
||||||
|
if (stat.size > multimodalSettings.maxFileBytes) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
let buffer: Buffer;
|
||||||
|
try {
|
||||||
|
buffer = await fs.readFile(absPath);
|
||||||
|
} catch (err) {
|
||||||
|
if (isFileMissingError(err)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const mimeType = await detectMime({ buffer: buffer.subarray(0, 512), filePath: absPath });
|
||||||
|
if (!mimeType || !mimeType.startsWith(`${modality}/`)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const contentText = buildMemoryMultimodalLabel(modality, normalizedPath);
|
||||||
|
const dataHash = crypto.createHash("sha256").update(buffer).digest("hex");
|
||||||
|
const chunkHash = hashText(
|
||||||
|
JSON.stringify({
|
||||||
|
path: normalizedPath,
|
||||||
|
contentText,
|
||||||
|
mimeType,
|
||||||
|
dataHash,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
path: normalizedPath,
|
||||||
|
absPath,
|
||||||
|
mtimeMs: stat.mtimeMs,
|
||||||
|
size: stat.size,
|
||||||
|
hash: chunkHash,
|
||||||
|
kind: "multimodal",
|
||||||
|
contentText,
|
||||||
|
modality,
|
||||||
|
mimeType,
|
||||||
|
};
|
||||||
|
}
|
||||||
let content: string;
|
let content: string;
|
||||||
try {
|
try {
|
||||||
content = await fs.readFile(absPath, "utf-8");
|
content = await fs.readFile(absPath, "utf-8");
|
||||||
@@ -173,11 +251,59 @@ export async function buildFileEntry(
|
|||||||
}
|
}
|
||||||
const hash = hashText(content);
|
const hash = hashText(content);
|
||||||
return {
|
return {
|
||||||
path: path.relative(workspaceDir, absPath).replace(/\\/g, "/"),
|
path: normalizedPath,
|
||||||
absPath,
|
absPath,
|
||||||
mtimeMs: stat.mtimeMs,
|
mtimeMs: stat.mtimeMs,
|
||||||
size: stat.size,
|
size: stat.size,
|
||||||
hash,
|
hash,
|
||||||
|
kind: "markdown",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadMultimodalEmbeddingInput(
|
||||||
|
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind">,
|
||||||
|
): Promise<EmbeddingInput | null> {
|
||||||
|
if (entry.kind !== "multimodal" || !entry.contentText || !entry.mimeType) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
let buffer: Buffer;
|
||||||
|
try {
|
||||||
|
buffer = await fs.readFile(entry.absPath);
|
||||||
|
} catch (err) {
|
||||||
|
if (isFileMissingError(err)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
text: entry.contentText,
|
||||||
|
parts: [
|
||||||
|
{ type: "text", text: entry.contentText },
|
||||||
|
{
|
||||||
|
type: "inline-data",
|
||||||
|
mimeType: entry.mimeType,
|
||||||
|
data: buffer.toString("base64"),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function buildMultimodalChunkForIndexing(
|
||||||
|
entry: Pick<MemoryFileEntry, "absPath" | "contentText" | "mimeType" | "kind" | "hash">,
|
||||||
|
): Promise<MultimodalMemoryChunk | null> {
|
||||||
|
const embeddingInput = await loadMultimodalEmbeddingInput(entry);
|
||||||
|
if (!embeddingInput) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
chunk: {
|
||||||
|
startLine: 1,
|
||||||
|
endLine: 1,
|
||||||
|
text: entry.contentText ?? embeddingInput.text,
|
||||||
|
hash: entry.hash,
|
||||||
|
embeddingInput,
|
||||||
|
},
|
||||||
|
structuredInputBytes: estimateStructuredEmbeddingInputBytes(embeddingInput),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,6 +339,7 @@ export function chunkMarkdown(
|
|||||||
endLine,
|
endLine,
|
||||||
text,
|
text,
|
||||||
hash: hashText(text),
|
hash: hashText(text),
|
||||||
|
embeddingInput: buildTextEmbeddingInput(text),
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -8,9 +8,14 @@ import {
|
|||||||
} from "./batch-openai.js";
|
} from "./batch-openai.js";
|
||||||
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
|
import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js";
|
||||||
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
|
import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js";
|
||||||
import { estimateUtf8Bytes } from "./embedding-input-limits.js";
|
|
||||||
import { buildGeminiTextEmbeddingRequest } from "./embeddings-gemini.js";
|
|
||||||
import {
|
import {
|
||||||
|
estimateStructuredEmbeddingInputBytes,
|
||||||
|
estimateUtf8Bytes,
|
||||||
|
} from "./embedding-input-limits.js";
|
||||||
|
import { type EmbeddingInput, hasNonTextEmbeddingParts } from "./embedding-inputs.js";
|
||||||
|
import { buildGeminiEmbeddingRequest } from "./embeddings-gemini.js";
|
||||||
|
import {
|
||||||
|
buildMultimodalChunkForIndexing,
|
||||||
chunkMarkdown,
|
chunkMarkdown,
|
||||||
hashText,
|
hashText,
|
||||||
parseEmbedding,
|
parseEmbedding,
|
||||||
@@ -53,7 +58,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
let currentTokens = 0;
|
let currentTokens = 0;
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
const estimate = estimateUtf8Bytes(chunk.text);
|
const estimate = chunk.embeddingInput
|
||||||
|
? estimateStructuredEmbeddingInputBytes(chunk.embeddingInput)
|
||||||
|
: estimateUtf8Bytes(chunk.text);
|
||||||
const wouldExceed =
|
const wouldExceed =
|
||||||
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
|
current.length > 0 && currentTokens + estimate > EMBEDDING_BATCH_MAX_TOKENS;
|
||||||
if (wouldExceed) {
|
if (wouldExceed) {
|
||||||
@@ -188,9 +195,22 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
const missingChunks = missing.map((m) => m.chunk);
|
const missingChunks = missing.map((m) => m.chunk);
|
||||||
const batches = this.buildEmbeddingBatches(missingChunks);
|
const batches = this.buildEmbeddingBatches(missingChunks);
|
||||||
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
||||||
|
const provider = this.provider;
|
||||||
|
if (!provider) {
|
||||||
|
throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
|
||||||
|
}
|
||||||
let cursor = 0;
|
let cursor = 0;
|
||||||
for (const batch of batches) {
|
for (const batch of batches) {
|
||||||
const batchEmbeddings = await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
const inputs = batch.map((chunk) => chunk.embeddingInput ?? { text: chunk.text });
|
||||||
|
const hasStructuredInputs = inputs.some((input) => hasNonTextEmbeddingParts(input));
|
||||||
|
if (hasStructuredInputs && !provider.embedBatchInputs) {
|
||||||
|
throw new Error(
|
||||||
|
`Embedding provider "${provider.id}" does not support multimodal memory inputs.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const batchEmbeddings = hasStructuredInputs
|
||||||
|
? await this.embedBatchInputsWithRetry(inputs)
|
||||||
|
: await this.embedBatchWithRetry(batch.map((chunk) => chunk.text));
|
||||||
for (let i = 0; i < batch.length; i += 1) {
|
for (let i = 0; i < batch.length; i += 1) {
|
||||||
const item = missing[cursor + i];
|
const item = missing[cursor + i];
|
||||||
const embedding = batchEmbeddings[i] ?? [];
|
const embedding = batchEmbeddings[i] ?? [];
|
||||||
@@ -476,6 +496,9 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
source: MemorySource,
|
source: MemorySource,
|
||||||
): Promise<number[][]> {
|
): Promise<number[][]> {
|
||||||
const gemini = this.gemini;
|
const gemini = this.gemini;
|
||||||
|
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
|
||||||
|
return await this.embedChunksInBatches(chunks);
|
||||||
|
}
|
||||||
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
||||||
chunks,
|
chunks,
|
||||||
entry,
|
entry,
|
||||||
@@ -483,9 +506,10 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
provider: "gemini",
|
provider: "gemini",
|
||||||
enabled: Boolean(gemini),
|
enabled: Boolean(gemini),
|
||||||
buildRequest: (chunk) => ({
|
buildRequest: (chunk) => ({
|
||||||
request: buildGeminiTextEmbeddingRequest({
|
request: buildGeminiEmbeddingRequest({
|
||||||
text: chunk.text,
|
input: chunk.embeddingInput ?? { text: chunk.text },
|
||||||
taskType: "RETRIEVAL_DOCUMENT",
|
taskType: "RETRIEVAL_DOCUMENT",
|
||||||
|
modelPath: this.gemini?.modelPath,
|
||||||
outputDimensionality: this.gemini?.outputDimensionality,
|
outputDimensionality: this.gemini?.outputDimensionality,
|
||||||
}),
|
}),
|
||||||
}),
|
}),
|
||||||
@@ -536,6 +560,45 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected async embedBatchInputsWithRetry(inputs: EmbeddingInput[]): Promise<number[][]> {
|
||||||
|
if (inputs.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (!this.provider?.embedBatchInputs) {
|
||||||
|
return await this.embedBatchWithRetry(inputs.map((input) => input.text));
|
||||||
|
}
|
||||||
|
let attempt = 0;
|
||||||
|
let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
const timeoutMs = this.resolveEmbeddingTimeout("batch");
|
||||||
|
log.debug("memory embeddings: structured batch start", {
|
||||||
|
provider: this.provider.id,
|
||||||
|
items: inputs.length,
|
||||||
|
timeoutMs,
|
||||||
|
});
|
||||||
|
return await this.withTimeout(
|
||||||
|
this.provider.embedBatchInputs(inputs),
|
||||||
|
timeoutMs,
|
||||||
|
`memory embeddings batch timed out after ${Math.round(timeoutMs / 1000)}s`,
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
if (!this.isRetryableEmbeddingError(message) || attempt >= EMBEDDING_RETRY_MAX_ATTEMPTS) {
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
const waitMs = Math.min(
|
||||||
|
EMBEDDING_RETRY_MAX_DELAY_MS,
|
||||||
|
Math.round(delayMs * (1 + Math.random() * 0.2)),
|
||||||
|
);
|
||||||
|
log.warn(`memory embeddings rate limited; retrying structured batch in ${waitMs}ms`);
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||||
|
delayMs *= 2;
|
||||||
|
attempt += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private isRetryableEmbeddingError(message: string): boolean {
|
private isRetryableEmbeddingError(message: string): boolean {
|
||||||
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
||||||
message,
|
message,
|
||||||
@@ -695,6 +758,49 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY;
|
return this.batch.enabled ? this.batch.concurrency : EMBEDDING_INDEX_CONCURRENCY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private clearIndexedFileData(pathname: string, source: MemorySource): void {
|
||||||
|
if (this.vector.enabled) {
|
||||||
|
try {
|
||||||
|
this.db
|
||||||
|
.prepare(
|
||||||
|
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
|
||||||
|
)
|
||||||
|
.run(pathname, source);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
if (this.fts.enabled && this.fts.available && this.provider) {
|
||||||
|
try {
|
||||||
|
this.db
|
||||||
|
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
||||||
|
.run(pathname, source, this.provider.model);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(pathname, source);
|
||||||
|
}
|
||||||
|
|
||||||
|
private upsertFileRecord(entry: MemoryFileEntry | SessionFileEntry, source: MemorySource): void {
|
||||||
|
this.db
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(path) DO UPDATE SET
|
||||||
|
source=excluded.source,
|
||||||
|
hash=excluded.hash,
|
||||||
|
mtime=excluded.mtime,
|
||||||
|
size=excluded.size`,
|
||||||
|
)
|
||||||
|
.run(entry.path, source, entry.hash, entry.mtimeMs, entry.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private deleteFileRecord(pathname: string, source: MemorySource): void {
|
||||||
|
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(pathname, source);
|
||||||
|
}
|
||||||
|
|
||||||
|
private isStructuredInputTooLargeError(message: string): boolean {
|
||||||
|
return /(413|payload too large|request too large|input too large|too many tokens|input limit|request size)/i.test(
|
||||||
|
message,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
protected async indexFile(
|
protected async indexFile(
|
||||||
entry: MemoryFileEntry | SessionFileEntry,
|
entry: MemoryFileEntry | SessionFileEntry,
|
||||||
options: { source: MemorySource; content?: string },
|
options: { source: MemorySource; content?: string },
|
||||||
@@ -708,42 +814,59 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
let chunks: MemoryChunk[];
|
||||||
const chunks = enforceEmbeddingMaxInputTokens(
|
let structuredInputBytes: number | undefined;
|
||||||
this.provider,
|
if ("kind" in entry && entry.kind === "multimodal") {
|
||||||
chunkMarkdown(content, this.settings.chunking).filter(
|
const multimodalChunk = await buildMultimodalChunkForIndexing(entry);
|
||||||
(chunk) => chunk.text.trim().length > 0,
|
if (!multimodalChunk) {
|
||||||
),
|
this.clearIndexedFileData(entry.path, options.source);
|
||||||
EMBEDDING_BATCH_MAX_TOKENS,
|
this.deleteFileRecord(entry.path, options.source);
|
||||||
);
|
return;
|
||||||
if (options.source === "sessions" && "lineMap" in entry) {
|
}
|
||||||
remapChunkLines(chunks, entry.lineMap);
|
structuredInputBytes = multimodalChunk.structuredInputBytes;
|
||||||
|
chunks = [multimodalChunk.chunk];
|
||||||
|
} else {
|
||||||
|
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
|
||||||
|
chunks = enforceEmbeddingMaxInputTokens(
|
||||||
|
this.provider,
|
||||||
|
chunkMarkdown(content, this.settings.chunking).filter(
|
||||||
|
(chunk) => chunk.text.trim().length > 0,
|
||||||
|
),
|
||||||
|
EMBEDDING_BATCH_MAX_TOKENS,
|
||||||
|
);
|
||||||
|
if (options.source === "sessions" && "lineMap" in entry) {
|
||||||
|
remapChunkLines(chunks, entry.lineMap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let embeddings: number[][];
|
||||||
|
try {
|
||||||
|
embeddings = this.batch.enabled
|
||||||
|
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
||||||
|
: await this.embedChunksInBatches(chunks);
|
||||||
|
} catch (err) {
|
||||||
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
|
if (
|
||||||
|
"kind" in entry &&
|
||||||
|
entry.kind === "multimodal" &&
|
||||||
|
this.isStructuredInputTooLargeError(message)
|
||||||
|
) {
|
||||||
|
log.warn("memory embeddings: skipping multimodal file rejected as too large", {
|
||||||
|
path: entry.path,
|
||||||
|
bytes: structuredInputBytes,
|
||||||
|
provider: this.provider.id,
|
||||||
|
model: this.provider.model,
|
||||||
|
error: message,
|
||||||
|
});
|
||||||
|
this.clearIndexedFileData(entry.path, options.source);
|
||||||
|
this.upsertFileRecord(entry, options.source);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
}
|
}
|
||||||
const embeddings = this.batch.enabled
|
|
||||||
? await this.embedChunksWithBatch(chunks, entry, options.source)
|
|
||||||
: await this.embedChunksInBatches(chunks);
|
|
||||||
const sample = embeddings.find((embedding) => embedding.length > 0);
|
const sample = embeddings.find((embedding) => embedding.length > 0);
|
||||||
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
const vectorReady = sample ? await this.ensureVectorReady(sample.length) : false;
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
if (vectorReady) {
|
this.clearIndexedFileData(entry.path, options.source);
|
||||||
try {
|
|
||||||
this.db
|
|
||||||
.prepare(
|
|
||||||
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`,
|
|
||||||
)
|
|
||||||
.run(entry.path, options.source);
|
|
||||||
} catch {}
|
|
||||||
}
|
|
||||||
if (this.fts.enabled && this.fts.available) {
|
|
||||||
try {
|
|
||||||
this.db
|
|
||||||
.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`)
|
|
||||||
.run(entry.path, options.source, this.provider.model);
|
|
||||||
} catch {}
|
|
||||||
}
|
|
||||||
this.db
|
|
||||||
.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`)
|
|
||||||
.run(entry.path, options.source);
|
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
const chunk = chunks[i];
|
const chunk = chunks[i];
|
||||||
const embedding = embeddings[i] ?? [];
|
const embedding = embeddings[i] ?? [];
|
||||||
@@ -798,15 +921,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.db
|
this.upsertFileRecord(entry, options.source);
|
||||||
.prepare(
|
|
||||||
`INSERT INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)
|
|
||||||
ON CONFLICT(path) DO UPDATE SET
|
|
||||||
source=excluded.source,
|
|
||||||
hash=excluded.hash,
|
|
||||||
mtime=excluded.mtime,
|
|
||||||
size=excluded.size`,
|
|
||||||
)
|
|
||||||
.run(entry.path, options.source, entry.hash, entry.mtimeMs, entry.size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,12 +29,18 @@ import { isFileMissingError } from "./fs-utils.js";
|
|||||||
import {
|
import {
|
||||||
buildFileEntry,
|
buildFileEntry,
|
||||||
ensureDir,
|
ensureDir,
|
||||||
|
hashText,
|
||||||
listMemoryFiles,
|
listMemoryFiles,
|
||||||
normalizeExtraMemoryPaths,
|
normalizeExtraMemoryPaths,
|
||||||
runWithConcurrency,
|
runWithConcurrency,
|
||||||
} from "./internal.js";
|
} from "./internal.js";
|
||||||
import { type MemoryFileEntry } from "./internal.js";
|
import { type MemoryFileEntry } from "./internal.js";
|
||||||
import { ensureMemoryIndexSchema } from "./memory-schema.js";
|
import { ensureMemoryIndexSchema } from "./memory-schema.js";
|
||||||
|
import {
|
||||||
|
buildCaseInsensitiveExtensionGlob,
|
||||||
|
classifyMemoryMultimodalPath,
|
||||||
|
getMemoryMultimodalExtensions,
|
||||||
|
} from "./multimodal.js";
|
||||||
import type { SessionFileEntry } from "./session-files.js";
|
import type { SessionFileEntry } from "./session-files.js";
|
||||||
import {
|
import {
|
||||||
buildSessionEntry,
|
buildSessionEntry,
|
||||||
@@ -50,6 +56,7 @@ type MemoryIndexMeta = {
|
|||||||
provider: string;
|
provider: string;
|
||||||
providerKey?: string;
|
providerKey?: string;
|
||||||
sources?: MemorySource[];
|
sources?: MemorySource[];
|
||||||
|
scopeHash?: string;
|
||||||
chunkTokens: number;
|
chunkTokens: number;
|
||||||
chunkOverlap: number;
|
chunkOverlap: number;
|
||||||
vectorDims?: number;
|
vectorDims?: number;
|
||||||
@@ -383,9 +390,22 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
}
|
}
|
||||||
if (stat.isDirectory()) {
|
if (stat.isDirectory()) {
|
||||||
watchPaths.add(path.join(entry, "**", "*.md"));
|
watchPaths.add(path.join(entry, "**", "*.md"));
|
||||||
|
if (this.settings.multimodal.enabled) {
|
||||||
|
for (const modality of this.settings.multimodal.modalities) {
|
||||||
|
for (const extension of getMemoryMultimodalExtensions(modality)) {
|
||||||
|
watchPaths.add(
|
||||||
|
path.join(entry, "**", buildCaseInsensitiveExtensionGlob(extension)),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (stat.isFile() && entry.toLowerCase().endsWith(".md")) {
|
if (
|
||||||
|
stat.isFile() &&
|
||||||
|
(entry.toLowerCase().endsWith(".md") ||
|
||||||
|
classifyMemoryMultimodalPath(entry, this.settings.multimodal) !== null)
|
||||||
|
) {
|
||||||
watchPaths.add(entry);
|
watchPaths.add(entry);
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
@@ -649,9 +669,19 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
|
const files = await listMemoryFiles(
|
||||||
|
this.workspaceDir,
|
||||||
|
this.settings.extraPaths,
|
||||||
|
this.settings.multimodal,
|
||||||
|
);
|
||||||
const fileEntries = (
|
const fileEntries = (
|
||||||
await Promise.all(files.map(async (file) => buildFileEntry(file, this.workspaceDir)))
|
await runWithConcurrency(
|
||||||
|
files.map(
|
||||||
|
(file) => async () =>
|
||||||
|
await buildFileEntry(file, this.workspaceDir, this.settings.multimodal),
|
||||||
|
),
|
||||||
|
this.getIndexConcurrency(),
|
||||||
|
)
|
||||||
).filter((entry): entry is MemoryFileEntry => entry !== null);
|
).filter((entry): entry is MemoryFileEntry => entry !== null);
|
||||||
log.debug("memory sync: indexing memory files", {
|
log.debug("memory sync: indexing memory files", {
|
||||||
files: fileEntries.length,
|
files: fileEntries.length,
|
||||||
@@ -868,6 +898,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
const vectorReady = await this.ensureVectorReady();
|
const vectorReady = await this.ensureVectorReady();
|
||||||
const meta = this.readMeta();
|
const meta = this.readMeta();
|
||||||
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
||||||
|
const configuredScopeHash = this.resolveConfiguredScopeHash();
|
||||||
const needsFullReindex =
|
const needsFullReindex =
|
||||||
params?.force ||
|
params?.force ||
|
||||||
!meta ||
|
!meta ||
|
||||||
@@ -875,6 +906,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
(this.provider && meta.provider !== this.provider.id) ||
|
(this.provider && meta.provider !== this.provider.id) ||
|
||||||
meta.providerKey !== this.providerKey ||
|
meta.providerKey !== this.providerKey ||
|
||||||
this.metaSourcesDiffer(meta, configuredSources) ||
|
this.metaSourcesDiffer(meta, configuredSources) ||
|
||||||
|
meta.scopeHash !== configuredScopeHash ||
|
||||||
meta.chunkTokens !== this.settings.chunking.tokens ||
|
meta.chunkTokens !== this.settings.chunking.tokens ||
|
||||||
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
||||||
(vectorReady && !meta?.vectorDims);
|
(vectorReady && !meta?.vectorDims);
|
||||||
@@ -1088,6 +1120,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
sources: this.resolveConfiguredSourcesForMeta(),
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
|
scopeHash: this.resolveConfiguredScopeHash(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1159,6 +1192,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
sources: this.resolveConfiguredSourcesForMeta(),
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
|
scopeHash: this.resolveConfiguredScopeHash(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1236,6 +1270,22 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
return normalized.length > 0 ? normalized : ["memory"];
|
return normalized.length > 0 ? normalized : ["memory"];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private resolveConfiguredScopeHash(): string {
|
||||||
|
const extraPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths)
|
||||||
|
.map((value) => value.replace(/\\/g, "/"))
|
||||||
|
.toSorted();
|
||||||
|
return hashText(
|
||||||
|
JSON.stringify({
|
||||||
|
extraPaths,
|
||||||
|
multimodal: {
|
||||||
|
enabled: this.settings.multimodal.enabled,
|
||||||
|
modalities: [...this.settings.multimodal.modalities].toSorted(),
|
||||||
|
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
||||||
const metaSources = this.normalizeMetaSources(meta);
|
const metaSources = this.normalizeMetaSources(meta);
|
||||||
if (metaSources.length !== configuredSources.length) {
|
if (metaSources.length !== configuredSources.length) {
|
||||||
|
|||||||
@@ -106,4 +106,50 @@ describe("memory watcher config", () => {
|
|||||||
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
|
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
|
||||||
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
|
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("watches multimodal extensions with case-insensitive globs", async () => {
|
||||||
|
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-watch-"));
|
||||||
|
extraDir = path.join(workspaceDir, "extra");
|
||||||
|
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||||
|
await fs.mkdir(extraDir, { recursive: true });
|
||||||
|
await fs.writeFile(path.join(extraDir, "PHOTO.PNG"), "png");
|
||||||
|
|
||||||
|
const cfg = {
|
||||||
|
agents: {
|
||||||
|
defaults: {
|
||||||
|
workspace: workspaceDir,
|
||||||
|
memorySearch: {
|
||||||
|
provider: "gemini",
|
||||||
|
model: "gemini-embedding-2-preview",
|
||||||
|
fallback: "none",
|
||||||
|
store: { path: path.join(workspaceDir, "index.sqlite"), vector: { enabled: false } },
|
||||||
|
sync: { watch: true, watchDebounceMs: 25, onSessionStart: false, onSearch: false },
|
||||||
|
query: { minScore: 0, hybrid: { enabled: false } },
|
||||||
|
extraPaths: [extraDir],
|
||||||
|
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
list: [{ id: "main", default: true }],
|
||||||
|
},
|
||||||
|
} as OpenClawConfig;
|
||||||
|
|
||||||
|
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||||
|
expect(result.manager).not.toBeNull();
|
||||||
|
if (!result.manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
manager = result.manager as unknown as MemoryIndexManager;
|
||||||
|
|
||||||
|
expect(watchMock).toHaveBeenCalledTimes(1);
|
||||||
|
const [watchedPaths] = watchMock.mock.calls[0] as unknown as [
|
||||||
|
string[],
|
||||||
|
Record<string, unknown>,
|
||||||
|
];
|
||||||
|
expect(watchedPaths).toEqual(
|
||||||
|
expect.arrayContaining([
|
||||||
|
path.join(extraDir, "**", "*.[pP][nN][gG]"),
|
||||||
|
path.join(extraDir, "**", "*.[wW][aA][vV]"),
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
118
src/memory/multimodal.ts
Normal file
118
src/memory/multimodal.ts
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
const MEMORY_MULTIMODAL_SPECS = {
|
||||||
|
image: {
|
||||||
|
labelPrefix: "Image file",
|
||||||
|
extensions: [".jpg", ".jpeg", ".png", ".webp", ".gif", ".heic", ".heif"],
|
||||||
|
},
|
||||||
|
audio: {
|
||||||
|
labelPrefix: "Audio file",
|
||||||
|
extensions: [".mp3", ".wav", ".ogg", ".opus", ".m4a", ".aac", ".flac"],
|
||||||
|
},
|
||||||
|
} as const;
|
||||||
|
|
||||||
|
export type MemoryMultimodalModality = keyof typeof MEMORY_MULTIMODAL_SPECS;
|
||||||
|
export const MEMORY_MULTIMODAL_MODALITIES = Object.keys(
|
||||||
|
MEMORY_MULTIMODAL_SPECS,
|
||||||
|
) as MemoryMultimodalModality[];
|
||||||
|
export type MemoryMultimodalSelection = MemoryMultimodalModality | "all";
|
||||||
|
|
||||||
|
export type MemoryMultimodalSettings = {
|
||||||
|
enabled: boolean;
|
||||||
|
modalities: MemoryMultimodalModality[];
|
||||||
|
maxFileBytes: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export const DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
||||||
|
|
||||||
|
export function normalizeMemoryMultimodalModalities(
|
||||||
|
raw: MemoryMultimodalSelection[] | undefined,
|
||||||
|
): MemoryMultimodalModality[] {
|
||||||
|
if (raw === undefined || raw.includes("all")) {
|
||||||
|
return [...MEMORY_MULTIMODAL_MODALITIES];
|
||||||
|
}
|
||||||
|
const normalized = new Set<MemoryMultimodalModality>();
|
||||||
|
for (const value of raw) {
|
||||||
|
if (value === "image" || value === "audio") {
|
||||||
|
normalized.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Array.from(normalized);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeMemoryMultimodalSettings(raw: {
|
||||||
|
enabled?: boolean;
|
||||||
|
modalities?: MemoryMultimodalSelection[];
|
||||||
|
maxFileBytes?: number;
|
||||||
|
}): MemoryMultimodalSettings {
|
||||||
|
const enabled = raw.enabled === true;
|
||||||
|
const maxFileBytes =
|
||||||
|
typeof raw.maxFileBytes === "number" && Number.isFinite(raw.maxFileBytes)
|
||||||
|
? Math.max(1, Math.floor(raw.maxFileBytes))
|
||||||
|
: DEFAULT_MEMORY_MULTIMODAL_MAX_FILE_BYTES;
|
||||||
|
return {
|
||||||
|
enabled,
|
||||||
|
modalities: enabled ? normalizeMemoryMultimodalModalities(raw.modalities) : [],
|
||||||
|
maxFileBytes,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isMemoryMultimodalEnabled(settings: MemoryMultimodalSettings): boolean {
|
||||||
|
return settings.enabled && settings.modalities.length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getMemoryMultimodalExtensions(
|
||||||
|
modality: MemoryMultimodalModality,
|
||||||
|
): readonly string[] {
|
||||||
|
return MEMORY_MULTIMODAL_SPECS[modality].extensions;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildMemoryMultimodalLabel(
|
||||||
|
modality: MemoryMultimodalModality,
|
||||||
|
normalizedPath: string,
|
||||||
|
): string {
|
||||||
|
return `${MEMORY_MULTIMODAL_SPECS[modality].labelPrefix}: ${normalizedPath}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildCaseInsensitiveExtensionGlob(extension: string): string {
|
||||||
|
const normalized = extension.trim().replace(/^\./, "").toLowerCase();
|
||||||
|
if (!normalized) {
|
||||||
|
return "*";
|
||||||
|
}
|
||||||
|
const parts = Array.from(normalized, (char) => `[${char.toLowerCase()}${char.toUpperCase()}]`);
|
||||||
|
return `*.${parts.join("")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function classifyMemoryMultimodalPath(
|
||||||
|
filePath: string,
|
||||||
|
settings: MemoryMultimodalSettings,
|
||||||
|
): MemoryMultimodalModality | null {
|
||||||
|
if (!isMemoryMultimodalEnabled(settings)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const lower = filePath.trim().toLowerCase();
|
||||||
|
for (const modality of settings.modalities) {
|
||||||
|
for (const extension of getMemoryMultimodalExtensions(modality)) {
|
||||||
|
if (lower.endsWith(extension)) {
|
||||||
|
return modality;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function normalizeGeminiEmbeddingModelForMemory(model: string): string {
|
||||||
|
const trimmed = model.trim();
|
||||||
|
if (!trimmed) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return trimmed.replace(/^models\//, "").replace(/^(gemini|google)\//, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function supportsMemoryMultimodalEmbeddings(params: {
|
||||||
|
provider: string;
|
||||||
|
model: string;
|
||||||
|
}): boolean {
|
||||||
|
if (params.provider !== "gemini") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return normalizeGeminiEmbeddingModelForMemory(params.model) === "gemini-embedding-2-preview";
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user