refactor: move memory engine behind plugin adapters

This commit is contained in:
Peter Steinberger
2026-03-27 00:40:45 +00:00
parent aed6283faa
commit dbf78de7c6
142 changed files with 1610 additions and 966 deletions

View File

@@ -59,12 +59,14 @@ describe("plugin registration", () => {
const registerMemoryPromptSection = vi.fn();
const registerMemoryFlushPlan = vi.fn();
const registerMemoryRuntime = vi.fn();
const registerMemoryEmbeddingProvider = vi.fn();
const registerCli = vi.fn();
const api = {
registerTool,
registerMemoryPromptSection,
registerMemoryFlushPlan,
registerMemoryRuntime,
registerMemoryEmbeddingProvider,
registerCli,
};
@@ -73,6 +75,7 @@ describe("plugin registration", () => {
expect(registerMemoryPromptSection).toHaveBeenCalledWith(buildPromptSection);
expect(registerMemoryFlushPlan).toHaveBeenCalledWith(buildMemoryFlushPlan);
expect(registerMemoryRuntime).toHaveBeenCalledWith(memoryRuntime);
expect(registerMemoryEmbeddingProvider).toHaveBeenCalledTimes(6);
expect(registerTool).toHaveBeenCalledTimes(2);
expect(registerTool.mock.calls[0]?.[1]).toEqual({ names: ["memory_search"] });
expect(registerTool.mock.calls[1]?.[1]).toEqual({ names: ["memory_get"] });

View File

@@ -6,6 +6,7 @@ import {
DEFAULT_MEMORY_FLUSH_PROMPT,
DEFAULT_MEMORY_FLUSH_SOFT_TOKENS,
} from "./src/flush-plan.js";
import { registerBuiltInMemoryEmbeddingProviders } from "./src/memory/provider-adapters.js";
import { buildPromptSection } from "./src/prompt-section.js";
import { memoryRuntime } from "./src/runtime-provider.js";
import { createMemoryGetTool, createMemorySearchTool } from "./src/tools.js";
@@ -23,6 +24,7 @@ export default definePluginEntry({
description: "File-backed memory search tools and CLI",
kind: "memory",
register(api) {
registerBuiltInMemoryEmbeddingProviders(api);
api.registerMemoryPromptSection(buildPromptSection);
api.registerMemoryFlushPlan(buildMemoryFlushPlan);
api.registerMemoryRuntime(memoryRuntime);

View File

@@ -1 +1 @@
export * from "openclaw/plugin-sdk/memory-core-host";
export * from "openclaw/plugin-sdk/memory-core-host-runtime";

View File

@@ -0,0 +1 @@
export * from "openclaw/plugin-sdk/memory-core-host-engine";

View File

@@ -0,0 +1 @@
export { resolveMemoryBackendConfig } from "../engine-host-api.js";

View File

@@ -0,0 +1,138 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterAll, beforeAll, beforeEach, expect, vi, type Mock } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemoryIndexManager, MemorySearchManager } from "./index.js";
type EmbeddingTestMocksModule = typeof import("./embedding.test-mocks.js");
type MemoryIndexModule = typeof import("./index.js");
type MemorySearchManagerHandle = Awaited<
ReturnType<MemoryIndexModule["getMemorySearchManager"]>
>["manager"];
export function installEmbeddingManagerFixture(opts: {
fixturePrefix: string;
largeTokens: number;
smallTokens: number;
createCfg: (params: {
workspaceDir: string;
indexPath: string;
tokens: number;
}) => OpenClawConfig;
resetIndexEachTest?: boolean;
}) {
const resetIndexEachTest = opts.resetIndexEachTest ?? true;
let fixtureRoot: string | undefined;
let workspaceDir: string | undefined;
let memoryDir: string | undefined;
let managerLarge: MemoryIndexManager | undefined;
let managerSmall: MemoryIndexManager | undefined;
let embedBatch: Mock<(texts: string[]) => Promise<number[][]>> | undefined;
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
let resetEmbeddingMocks: EmbeddingTestMocksModule["resetEmbeddingMocks"];
const resetManager = (manager: MemoryIndexManager) => {
(manager as unknown as { resetIndex: () => void }).resetIndex();
(manager as unknown as { dirty: boolean }).dirty = true;
};
const requireValue = <T>(value: T | undefined, name: string): T => {
if (!value) {
throw new Error(`${name} missing`);
}
return value;
};
const requireIndexManager = (
manager: MemorySearchManagerHandle,
name: string,
): MemoryIndexManager => {
if (!manager) {
throw new Error(`${name} missing`);
}
if (!("resetIndex" in manager) || typeof manager.resetIndex !== "function") {
throw new Error(`${name} is not a MemoryIndexManager`);
}
return manager as unknown as MemoryIndexManager;
};
beforeAll(async () => {
vi.resetModules();
await import("./embedding.test-mocks.js");
const embeddingMocks = await import("./embedding.test-mocks.js");
embedBatch = embeddingMocks.getEmbedBatchMock();
resetEmbeddingMocks = embeddingMocks.resetEmbeddingMocks;
({ getMemorySearchManager } = await import("./index.js"));
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), opts.fixturePrefix));
workspaceDir = path.join(fixtureRoot, "workspace");
memoryDir = path.join(workspaceDir, "memory");
await fs.mkdir(memoryDir, { recursive: true });
const indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
const indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
const large = await getMemorySearchManager({
cfg: opts.createCfg({
workspaceDir,
indexPath: indexPathLarge,
tokens: opts.largeTokens,
}),
agentId: "main",
});
expect(large.manager).not.toBeNull();
managerLarge = requireIndexManager(large.manager, "managerLarge");
const small = await getMemorySearchManager({
cfg: opts.createCfg({
workspaceDir,
indexPath: indexPathSmall,
tokens: opts.smallTokens,
}),
agentId: "main",
});
expect(small.manager).not.toBeNull();
managerSmall = requireIndexManager(small.manager, "managerSmall");
});
afterAll(async () => {
if (managerLarge) {
await managerLarge.close();
managerLarge = undefined;
}
if (managerSmall) {
await managerSmall.close();
managerSmall = undefined;
}
if (fixtureRoot) {
await fs.rm(fixtureRoot, { recursive: true, force: true });
fixtureRoot = undefined;
}
});
beforeEach(async () => {
resetEmbeddingMocks();
const dir = requireValue(memoryDir, "memoryDir");
await fs.rm(dir, { recursive: true, force: true });
await fs.mkdir(dir, { recursive: true });
if (resetIndexEachTest) {
resetManager(requireValue(managerLarge, "managerLarge"));
resetManager(requireValue(managerSmall, "managerSmall"));
}
});
return {
get embedBatch() {
return requireValue(embedBatch, "embedBatch");
},
getFixtureRoot: () => requireValue(fixtureRoot, "fixtureRoot"),
getWorkspaceDir: () => requireValue(workspaceDir, "workspaceDir"),
getMemoryDir: () => requireValue(memoryDir, "memoryDir"),
getManagerLarge: () => requireValue(managerLarge, "managerLarge"),
getManagerSmall: () => requireValue(managerSmall, "managerSmall"),
resetManager,
};
}

View File

@@ -0,0 +1,39 @@
import { vi } from "vitest";
import "./test-runtime-mocks.js";
// Avoid exporting vitest mock types (TS2742 under pnpm + d.ts emit).
// oxlint-disable-next-line typescript/no-explicit-any
type AnyMock = any;
const hoisted = vi.hoisted(() => ({
embedBatch: vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0])),
embedQuery: vi.fn(async () => [0, 1, 0]),
}));
export function getEmbedBatchMock(): AnyMock {
return hoisted.embedBatch;
}
export function getEmbedQueryMock(): AnyMock {
return hoisted.embedQuery;
}
export function resetEmbeddingMocks(): void {
hoisted.embedBatch.mockReset();
hoisted.embedQuery.mockReset();
hoisted.embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0]));
hoisted.embedQuery.mockImplementation(async () => [0, 1, 0]);
}
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async () => ({
requestedProvider: "openai",
provider: {
id: "mock",
model: "mock-embed",
maxInputTokens: 8192,
embedQuery: hoisted.embedQuery,
embedBatch: hoisted.embedBatch,
},
}),
}));

View File

@@ -0,0 +1 @@
export { DEFAULT_OLLAMA_EMBEDDING_MODEL } from "./embeddings.js";

View File

@@ -0,0 +1,175 @@
import {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_LOCAL_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
getMemoryEmbeddingProvider,
listMemoryEmbeddingProviders,
type MemoryEmbeddingProvider,
type MemoryEmbeddingProviderAdapter,
type MemoryEmbeddingProviderCreateOptions,
type MemoryEmbeddingProviderRuntime,
} from "../engine-host-api.js";
import { canAutoSelectLocal } from "./provider-adapters.js";
export {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_LOCAL_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
} from "../engine-host-api.js";
export type EmbeddingProvider = MemoryEmbeddingProvider;
export type EmbeddingProviderId = string;
export type EmbeddingProviderRequest = string;
export type EmbeddingProviderFallback = string;
export type EmbeddingProviderRuntime = MemoryEmbeddingProviderRuntime;
export type EmbeddingProviderResult = {
provider: EmbeddingProvider | null;
requestedProvider: EmbeddingProviderRequest;
fallbackFrom?: string;
fallbackReason?: string;
providerUnavailableReason?: string;
runtime?: EmbeddingProviderRuntime;
};
type CreateEmbeddingProviderOptions = MemoryEmbeddingProviderCreateOptions & {
provider: EmbeddingProviderRequest;
fallback: EmbeddingProviderFallback;
};
function formatErrorMessage(err: unknown): string {
return err instanceof Error ? err.message : String(err);
}
function formatProviderError(adapter: MemoryEmbeddingProviderAdapter, err: unknown): string {
return adapter.formatSetupError?.(err) ?? formatErrorMessage(err);
}
function shouldContinueAutoSelection(
adapter: MemoryEmbeddingProviderAdapter,
err: unknown,
): boolean {
return adapter.shouldContinueAutoSelection?.(err) ?? false;
}
function getAdapter(id: string): MemoryEmbeddingProviderAdapter {
const adapter = getMemoryEmbeddingProvider(id);
if (!adapter) {
throw new Error(`Unknown memory embedding provider: ${id}`);
}
return adapter;
}
function listAutoSelectAdapters(
options: CreateEmbeddingProviderOptions,
): MemoryEmbeddingProviderAdapter[] {
return listMemoryEmbeddingProviders()
.filter((adapter) => typeof adapter.autoSelectPriority === "number")
.filter((adapter) =>
adapter.id === "local" ? canAutoSelectLocal(options.local?.modelPath) : true,
)
.toSorted(
(a, b) =>
(a.autoSelectPriority ?? Number.MAX_SAFE_INTEGER) -
(b.autoSelectPriority ?? Number.MAX_SAFE_INTEGER),
);
}
function resolveProviderModel(
adapter: MemoryEmbeddingProviderAdapter,
requestedModel: string,
): string {
const trimmed = requestedModel.trim();
if (trimmed) {
return trimmed;
}
return adapter.defaultModel ?? "";
}
async function createWithAdapter(
adapter: MemoryEmbeddingProviderAdapter,
options: CreateEmbeddingProviderOptions,
): Promise<EmbeddingProviderResult> {
const result = await adapter.create({
...options,
model: resolveProviderModel(adapter, options.model),
});
return {
provider: result.provider,
requestedProvider: options.provider,
runtime: result.runtime,
};
}
export async function createEmbeddingProvider(
options: CreateEmbeddingProviderOptions,
): Promise<EmbeddingProviderResult> {
if (options.provider === "auto") {
const reasons: string[] = [];
for (const adapter of listAutoSelectAdapters(options)) {
try {
const result = await createWithAdapter(adapter, {
...options,
provider: adapter.id,
});
return {
...result,
requestedProvider: "auto",
};
} catch (err) {
const message = formatProviderError(adapter, err);
if (shouldContinueAutoSelection(adapter, err)) {
reasons.push(message);
continue;
}
const wrapped = new Error(message) as Error & { cause?: unknown };
wrapped.cause = err;
throw wrapped;
}
}
return {
provider: null,
requestedProvider: "auto",
providerUnavailableReason:
reasons.length > 0 ? reasons.join("\n\n") : "No embeddings provider available.",
};
}
const primaryAdapter = getAdapter(options.provider);
try {
return await createWithAdapter(primaryAdapter, options);
} catch (primaryErr) {
const reason = formatProviderError(primaryAdapter, primaryErr);
if (options.fallback && options.fallback !== "none" && options.fallback !== options.provider) {
const fallbackAdapter = getAdapter(options.fallback);
try {
const fallbackResult = await createWithAdapter(fallbackAdapter, {
...options,
provider: options.fallback,
});
return {
...fallbackResult,
requestedProvider: options.provider,
fallbackFrom: options.provider,
fallbackReason: reason,
};
} catch (fallbackErr) {
const fallbackReason = formatProviderError(fallbackAdapter, fallbackErr);
const wrapped = new Error(
`${reason}\n\nFallback to ${options.fallback} failed: ${fallbackReason}`,
) as Error & { cause?: unknown };
wrapped.cause = primaryErr;
throw wrapped;
}
}
const wrapped = new Error(reason) as Error & { cause?: unknown };
wrapped.cause = primaryErr;
throw wrapped;
}
}

View File

@@ -0,0 +1,98 @@
import { describe, expect, it } from "vitest";
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
describe("memory hybrid helpers", () => {
it("buildFtsQuery tokenizes and AND-joins", () => {
expect(buildFtsQuery("hello world")).toBe('"hello" AND "world"');
expect(buildFtsQuery("FOO_bar baz-1")).toBe('"FOO_bar" AND "baz" AND "1"');
expect(buildFtsQuery("金银价格")).toBe('"金银价格"');
expect(buildFtsQuery("価格 2026年")).toBe('"価格" AND "2026年"');
expect(buildFtsQuery(" ")).toBeNull();
});
it("bm25RankToScore is monotonic and clamped", () => {
expect(bm25RankToScore(0)).toBeCloseTo(1);
expect(bm25RankToScore(1)).toBeCloseTo(0.5);
expect(bm25RankToScore(10)).toBeLessThan(bm25RankToScore(1));
expect(bm25RankToScore(-100)).toBeCloseTo(1, 1);
});
it("bm25RankToScore preserves FTS5 BM25 relevance ordering", () => {
const strongest = bm25RankToScore(-4.2);
const middle = bm25RankToScore(-2.1);
const weakest = bm25RankToScore(-0.5);
expect(strongest).toBeGreaterThan(middle);
expect(middle).toBeGreaterThan(weakest);
expect(strongest).not.toBe(middle);
expect(middle).not.toBe(weakest);
});
it("mergeHybridResults unions by id and combines weighted scores", async () => {
const merged = await mergeHybridResults({
vectorWeight: 0.7,
textWeight: 0.3,
vector: [
{
id: "a",
path: "memory/a.md",
startLine: 1,
endLine: 2,
source: "memory",
snippet: "vec-a",
vectorScore: 0.9,
},
],
keyword: [
{
id: "b",
path: "memory/b.md",
startLine: 3,
endLine: 4,
source: "memory",
snippet: "kw-b",
textScore: 1.0,
},
],
});
expect(merged).toHaveLength(2);
const a = merged.find((r) => r.path === "memory/a.md");
const b = merged.find((r) => r.path === "memory/b.md");
expect(a?.score).toBeCloseTo(0.7 * 0.9);
expect(b?.score).toBeCloseTo(0.3 * 1.0);
});
it("mergeHybridResults prefers keyword snippet when ids overlap", async () => {
const merged = await mergeHybridResults({
vectorWeight: 0.5,
textWeight: 0.5,
vector: [
{
id: "a",
path: "memory/a.md",
startLine: 1,
endLine: 2,
source: "memory",
snippet: "vec-a",
vectorScore: 0.2,
},
],
keyword: [
{
id: "a",
path: "memory/a.md",
startLine: 1,
endLine: 2,
source: "memory",
snippet: "kw-a",
textScore: 1.0,
},
],
});
expect(merged).toHaveLength(1);
expect(merged[0]?.snippet).toBe("kw-a");
expect(merged[0]?.score).toBeCloseTo(0.5 * 0.2 + 0.5 * 1.0);
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@ export type {
MemoryEmbeddingProbeResult,
MemorySearchManager,
MemorySearchResult,
} from "../api.js";
} from "../engine-host-api.js";
export {
closeAllMemorySearchManagers,
getMemorySearchManager,

View File

@@ -0,0 +1 @@
export { buildFileEntry } from "../engine-host-api.js";

View File

@@ -1,6 +1,5 @@
import fs from "node:fs/promises";
import {
buildGeminiEmbeddingRequest,
buildMultimodalChunkForIndexing,
chunkMarkdown,
createSubsystemLogger,
@@ -11,19 +10,12 @@ import {
hashText,
parseEmbedding,
remapChunkLines,
runGeminiEmbeddingBatches,
runOpenAiEmbeddingBatches,
runVoyageEmbeddingBatches,
type EmbeddingInput,
type GeminiBatchRequest,
type MemoryChunk,
type MemoryFileEntry,
type MemorySource,
type OpenAiBatchRequest,
type SessionFileEntry,
type VoyageBatchRequest,
OPENAI_BATCH_ENDPOINT,
} from "../api.js";
} from "../engine-host-api.js";
import { MemoryManagerSyncOps } from "./manager-sync-ops.js";
const VECTOR_TABLE = "chunks_vec";
@@ -229,59 +221,67 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
if (!this.provider) {
return hashText(JSON.stringify({ provider: "none", model: "fts-only" }));
}
if (this.provider.id === "openai" && this.openAi) {
const entries = Object.entries(this.openAi.headers)
.filter(([key]) => key.toLowerCase() !== "authorization")
.toSorted(([a], [b]) => a.localeCompare(b))
.map(([key, value]) => [key, value]);
return hashText(
JSON.stringify({
provider: "openai",
baseUrl: this.openAi.baseUrl,
model: this.openAi.model,
headers: entries,
}),
);
}
if (this.provider.id === "gemini" && this.gemini) {
const entries = Object.entries(this.gemini.headers)
.filter(([key]) => {
const lower = key.toLowerCase();
return lower !== "authorization" && lower !== "x-goog-api-key";
})
.toSorted(([a], [b]) => a.localeCompare(b))
.map(([key, value]) => [key, value]);
return hashText(
JSON.stringify({
provider: "gemini",
baseUrl: this.gemini.baseUrl,
model: this.gemini.model,
outputDimensionality: this.gemini.outputDimensionality,
headers: entries,
}),
);
if (this.providerRuntime?.cacheKeyData) {
return hashText(JSON.stringify(this.providerRuntime.cacheKeyData));
}
return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model }));
}
private buildBatchDebug(source: MemorySource, chunks: MemoryChunk[]) {
return (message: string, data?: Record<string, unknown>) =>
log.debug(
message,
data ? { ...data, source, chunks: chunks.length } : { source, chunks: chunks.length },
);
}
private async embedChunksWithBatch(
chunks: MemoryChunk[],
entry: MemoryFileEntry | SessionFileEntry,
_entry: MemoryFileEntry | SessionFileEntry,
source: MemorySource,
): Promise<number[][]> {
if (!this.provider) {
const batchEmbed = this.providerRuntime?.batchEmbed;
if (!this.provider || !batchEmbed) {
return this.embedChunksInBatches(chunks);
}
if (this.provider.id === "openai" && this.openAi) {
return this.embedChunksWithOpenAiBatch(chunks, entry, source);
if (chunks.length === 0) {
return [];
}
if (this.provider.id === "gemini" && this.gemini) {
return this.embedChunksWithGeminiBatch(chunks, entry, source);
const { embeddings, missing } = this.collectCachedEmbeddings(chunks);
if (missing.length === 0) {
return embeddings;
}
if (this.provider.id === "voyage" && this.voyage) {
return this.embedChunksWithVoyageBatch(chunks, entry, source);
const missingChunks = missing.map((item) => item.chunk);
const batchResult = await this.runBatchWithFallback({
provider: this.provider.id,
run: async () =>
await batchEmbed({
agentId: this.agentId,
chunks: missingChunks,
wait: this.batch.wait,
concurrency: this.batch.concurrency,
pollIntervalMs: this.batch.pollIntervalMs,
timeoutMs: this.batch.timeoutMs,
debug: this.buildBatchDebug(source, chunks),
}),
fallback: async () => await this.embedChunksInBatches(chunks),
});
if (!batchResult) {
return this.embedChunksInBatches(chunks);
}
return this.embedChunksInBatches(chunks);
const toCache: Array<{ hash: string; embedding: number[] }> = [];
for (let index = 0; index < missing.length; index += 1) {
const item = missing[index];
const embedding = batchResult[index] ?? [];
if (!item) {
continue;
}
embeddings[item.index] = embedding;
toCache.push({ hash: item.chunk.hash, embedding });
}
this.upsertEmbeddingCache(toCache);
return embeddings;
}
private collectCachedEmbeddings(chunks: MemoryChunk[]): {
@@ -305,221 +305,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
return { embeddings, missing };
}
private buildBatchCustomId(params: {
source: MemorySource;
entry: MemoryFileEntry | SessionFileEntry;
chunk: MemoryChunk;
index: number;
}): string {
return hashText(
`${params.source}:${params.entry.path}:${params.chunk.startLine}:${params.chunk.endLine}:${params.chunk.hash}:${params.index}`,
);
}
private buildBatchRequests<T extends { custom_id: string }>(params: {
missing: Array<{ index: number; chunk: MemoryChunk }>;
entry: MemoryFileEntry | SessionFileEntry;
source: MemorySource;
build: (chunk: MemoryChunk) => Omit<T, "custom_id">;
}): { requests: T[]; mapping: Map<string, { index: number; hash: string }> } {
const requests: T[] = [];
const mapping = new Map<string, { index: number; hash: string }>();
for (const item of params.missing) {
const chunk = item.chunk;
const customId = this.buildBatchCustomId({
source: params.source,
entry: params.entry,
chunk,
index: item.index,
});
mapping.set(customId, { index: item.index, hash: chunk.hash });
const built = params.build(chunk);
requests.push({ custom_id: customId, ...built } as T);
}
return { requests, mapping };
}
private applyBatchEmbeddings(params: {
byCustomId: Map<string, number[]>;
mapping: Map<string, { index: number; hash: string }>;
embeddings: number[][];
}): void {
const toCache: Array<{ hash: string; embedding: number[] }> = [];
for (const [customId, embedding] of params.byCustomId.entries()) {
const mapped = params.mapping.get(customId);
if (!mapped) {
continue;
}
params.embeddings[mapped.index] = embedding;
toCache.push({ hash: mapped.hash, embedding });
}
this.upsertEmbeddingCache(toCache);
}
private buildEmbeddingBatchRunnerOptions<TRequest>(params: {
requests: TRequest[];
chunks: MemoryChunk[];
source: MemorySource;
}): {
agentId: string;
requests: TRequest[];
wait: boolean;
concurrency: number;
pollIntervalMs: number;
timeoutMs: number;
debug: (message: string, data?: Record<string, unknown>) => void;
} {
const { requests, chunks, source } = params;
return {
agentId: this.agentId,
requests,
wait: this.batch.wait,
concurrency: this.batch.concurrency,
pollIntervalMs: this.batch.pollIntervalMs,
timeoutMs: this.batch.timeoutMs,
debug: (message, data) =>
log.debug(
message,
data ? { ...data, source, chunks: chunks.length } : { source, chunks: chunks.length },
),
};
}
private async embedChunksWithProviderBatch<TRequest extends { custom_id: string }>(params: {
chunks: MemoryChunk[];
entry: MemoryFileEntry | SessionFileEntry;
source: MemorySource;
provider: "voyage" | "openai" | "gemini";
enabled: boolean;
buildRequest: (chunk: MemoryChunk) => Omit<TRequest, "custom_id">;
runBatch: (runnerOptions: {
agentId: string;
requests: TRequest[];
wait: boolean;
concurrency: number;
pollIntervalMs: number;
timeoutMs: number;
debug: (message: string, data?: Record<string, unknown>) => void;
}) => Promise<Map<string, number[]> | number[][]>;
}): Promise<number[][]> {
if (!params.enabled) {
return this.embedChunksInBatches(params.chunks);
}
if (params.chunks.length === 0) {
return [];
}
const { embeddings, missing } = this.collectCachedEmbeddings(params.chunks);
if (missing.length === 0) {
return embeddings;
}
const { requests, mapping } = this.buildBatchRequests<TRequest>({
missing,
entry: params.entry,
source: params.source,
build: params.buildRequest,
});
const runnerOptions = this.buildEmbeddingBatchRunnerOptions({
requests,
chunks: params.chunks,
source: params.source,
});
const batchResult = await this.runBatchWithFallback({
provider: params.provider,
run: async () => await params.runBatch(runnerOptions),
fallback: async () => await this.embedChunksInBatches(params.chunks),
});
if (Array.isArray(batchResult)) {
return batchResult;
}
this.applyBatchEmbeddings({ byCustomId: batchResult, mapping, embeddings });
return embeddings;
}
private async embedChunksWithVoyageBatch(
chunks: MemoryChunk[],
entry: MemoryFileEntry | SessionFileEntry,
source: MemorySource,
): Promise<number[][]> {
const voyage = this.voyage;
return await this.embedChunksWithProviderBatch<VoyageBatchRequest>({
chunks,
entry,
source,
provider: "voyage",
enabled: Boolean(voyage),
buildRequest: (chunk) => ({
body: { input: chunk.text },
}),
runBatch: async (runnerOptions) =>
await runVoyageEmbeddingBatches({
client: voyage!,
...runnerOptions,
}),
});
}
private async embedChunksWithOpenAiBatch(
chunks: MemoryChunk[],
entry: MemoryFileEntry | SessionFileEntry,
source: MemorySource,
): Promise<number[][]> {
const openAi = this.openAi;
return await this.embedChunksWithProviderBatch<OpenAiBatchRequest>({
chunks,
entry,
source,
provider: "openai",
enabled: Boolean(openAi),
buildRequest: (chunk) => ({
method: "POST",
url: OPENAI_BATCH_ENDPOINT,
body: {
model: openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
input: chunk.text,
},
}),
runBatch: async (runnerOptions) =>
await runOpenAiEmbeddingBatches({
openAi: openAi!,
...runnerOptions,
}),
});
}
private async embedChunksWithGeminiBatch(
chunks: MemoryChunk[],
entry: MemoryFileEntry | SessionFileEntry,
source: MemorySource,
): Promise<number[][]> {
const gemini = this.gemini;
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
return await this.embedChunksInBatches(chunks);
}
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
chunks,
entry,
source,
provider: "gemini",
enabled: Boolean(gemini),
buildRequest: (chunk) => ({
request: buildGeminiEmbeddingRequest({
input: chunk.embeddingInput ?? { text: chunk.text },
taskType: "RETRIEVAL_DOCUMENT",
modelPath: this.gemini?.modelPath,
outputDimensionality: this.gemini?.outputDimensionality,
}),
}),
runBatch: async (runnerOptions) =>
await runGeminiEmbeddingBatches({
gemini: gemini!,
...runnerOptions,
}),
});
}
protected async embedBatchWithRetry(texts: string[]): Promise<number[][]> {
if (texts.length === 0) {
return [];

View File

@@ -1,5 +1,5 @@
import type { DatabaseSync } from "node:sqlite";
import { cosineSimilarity, parseEmbedding, truncateUtf16Safe } from "../api.js";
import { cosineSimilarity, parseEmbedding, truncateUtf16Safe } from "../engine-host-api.js";
const vectorToBlob = (embedding: number[]): Buffer =>
Buffer.from(new Float32Array(embedding).buffer);

View File

@@ -5,15 +5,8 @@ import path from "node:path";
import type { DatabaseSync } from "node:sqlite";
import chokidar, { FSWatcher } from "chokidar";
import {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
buildCaseInsensitiveExtensionGlob,
buildFileEntry,
classifyMemoryMultimodalPath,
createEmbeddingProvider,
createSubsystemLogger,
ensureDir,
ensureMemoryIndexSchema,
@@ -22,10 +15,8 @@ import {
isFileMissingError,
listMemoryFiles,
listSessionFilesForAgent,
loadSqliteVecExtension,
normalizeExtraMemoryPaths,
onSessionTranscriptUpdate,
requireNodeSqlite,
resolveAgentDir,
resolveSessionTranscriptsDirForAgent,
resolveUserPath,
@@ -37,14 +28,22 @@ import {
type OpenClawConfig,
type ResolvedMemorySearchConfig,
type SessionFileEntry,
type EmbeddingProvider,
type GeminiEmbeddingClient,
type MistralEmbeddingClient,
type OllamaEmbeddingClient,
type OpenAiEmbeddingClient,
type VoyageEmbeddingClient,
buildSessionEntry,
} from "../api.js";
} from "../engine-host-api.js";
import {
createEmbeddingProvider,
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
type EmbeddingProvider,
type EmbeddingProviderId,
type EmbeddingProviderRuntime,
} from "./embeddings.js";
import { buildFileEntry } from "./internal.js";
import { loadSqliteVecExtension } from "./sqlite-vec.js";
import { requireNodeSqlite } from "./sqlite.js";
type MemoryIndexMeta = {
model: string;
@@ -101,12 +100,8 @@ export abstract class MemoryManagerSyncOps {
protected abstract readonly workspaceDir: string;
protected abstract readonly settings: ResolvedMemorySearchConfig;
protected provider: EmbeddingProvider | null = null;
protected fallbackFrom?: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
protected openAi?: OpenAiEmbeddingClient;
protected gemini?: GeminiEmbeddingClient;
protected voyage?: VoyageEmbeddingClient;
protected mistral?: MistralEmbeddingClient;
protected ollama?: OllamaEmbeddingClient;
protected fallbackFrom?: EmbeddingProviderId;
protected providerRuntime?: EmbeddingProviderRuntime;
protected abstract batch: {
enabled: boolean;
wait: boolean;
@@ -1104,13 +1099,7 @@ export abstract class MemoryManagerSyncOps {
timeoutMs: number;
} {
const batch = this.settings.remote?.batch;
const enabled = Boolean(
batch?.enabled &&
this.provider &&
((this.openAi && this.provider.id === "openai") ||
(this.gemini && this.provider.id === "gemini") ||
(this.voyage && this.provider.id === "voyage")),
);
const enabled = Boolean(batch?.enabled && this.provider && this.providerRuntime?.batchEmbed);
return {
enabled,
wait: batch?.wait ?? true,
@@ -1128,13 +1117,7 @@ export abstract class MemoryManagerSyncOps {
if (this.fallbackFrom) {
return false;
}
const fallbackFrom = this.provider.id as
| "openai"
| "gemini"
| "local"
| "voyage"
| "mistral"
| "ollama";
const fallbackFrom = this.provider.id as EmbeddingProviderId;
const fallbackModel =
fallback === "gemini"
@@ -1163,11 +1146,7 @@ export abstract class MemoryManagerSyncOps {
this.fallbackFrom = fallbackFrom;
this.fallbackReason = reason;
this.provider = fallbackResult.provider;
this.openAi = fallbackResult.openAi;
this.gemini = fallbackResult.gemini;
this.voyage = fallbackResult.voyage;
this.mistral = fallbackResult.mistral;
this.ollama = fallbackResult.ollama;
this.providerRuntime = fallbackResult.runtime;
this.providerKey = this.computeProviderKey();
this.batch = this.resolveBatchConfig();
log.warn(`memory embeddings: switched to fallback provider (${fallback})`, { reason });

View File

@@ -0,0 +1,126 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemoryIndexManager } from "./index.js";
import { closeAllMemorySearchManagers } from "./index.js";
import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js";
import { createMemoryManagerOrThrow } from "./test-manager.js";
const embedBatch = vi.fn(async (_input: string[]): Promise<number[][]> => []);
const embedQuery = vi.fn(async (_input: string): Promise<number[]> => [0.2, 0.2, 0.2]);
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async (_options: unknown) =>
createOpenAIEmbeddingProviderMock({
embedQuery: embedQuery as unknown as (input: string) => Promise<number[]>,
embedBatch: embedBatch as unknown as (input: string[]) => Promise<number[][]>,
}),
}));
describe("memory search async sync", () => {
let workspaceDir: string;
let indexPath: string;
let manager: MemoryIndexManager | null = null;
const buildConfig = (): OpenClawConfig =>
({
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "text-embedding-3-small",
store: { path: indexPath },
sync: { watch: false, onSessionStart: false, onSearch: true },
query: { minScore: 0 },
remote: { batch: { enabled: false, wait: false } },
},
},
list: [{ id: "main", default: true }],
},
}) as OpenClawConfig;
beforeEach(async () => {
await closeAllMemorySearchManagers();
embedBatch.mockClear();
embedBatch.mockImplementation(async (input: string[]) => input.map(() => [0.2, 0.2, 0.2]));
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-async-"));
indexPath = path.join(workspaceDir, "index.sqlite");
await fs.mkdir(path.join(workspaceDir, "memory"));
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "hello\n");
});
afterEach(async () => {
vi.unstubAllGlobals();
if (manager) {
await manager.close();
manager = null;
}
await closeAllMemorySearchManagers();
await fs.rm(workspaceDir, { recursive: true, force: true });
});
it("does not await sync when searching", async () => {
const cfg = buildConfig();
manager = await createMemoryManagerOrThrow(cfg);
let releaseSync = () => {};
const pending = new Promise<void>((resolve) => {
releaseSync = () => resolve();
}).finally(() => {
(manager as unknown as { syncing: Promise<void> | null }).syncing = null;
});
const syncMock = vi.fn(async () => {
(manager as unknown as { syncing: Promise<void> | null }).syncing = pending;
return pending;
});
(manager as unknown as { sync: () => Promise<void> }).sync = syncMock;
const activeManager = manager;
if (!activeManager) {
throw new Error("manager missing");
}
await activeManager.search("hello");
expect(syncMock).toHaveBeenCalledTimes(1);
releaseSync();
await vi.waitFor(() => {
expect((manager as unknown as { syncing: Promise<void> | null }).syncing).toBeNull();
});
}, 300_000);
it("waits for in-flight search sync during close", async () => {
const cfg = buildConfig();
manager = await createMemoryManagerOrThrow(cfg);
let releaseSync = () => {};
const pendingSync = new Promise<void>((resolve) => {
releaseSync = () => resolve();
}).finally(() => {
(manager as unknown as { syncing: Promise<void> | null }).syncing = null;
});
const syncMock = vi.fn(async () => {
(manager as unknown as { syncing: Promise<void> | null }).syncing = pendingSync;
return pendingSync;
});
(manager as unknown as { dirty: boolean }).dirty = true;
(manager as unknown as { sync: () => Promise<void> }).sync = syncMock;
await manager.search("hello");
await vi.waitFor(() => {
expect((manager as unknown as { syncing: Promise<void> | null }).syncing).toBe(pendingSync);
});
let closed = false;
const closePromise = manager.close().then(() => {
closed = true;
});
await Promise.resolve();
expect(closed).toBe(false);
releaseSync();
await closePromise;
manager = null;
});
});

View File

@@ -0,0 +1,99 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemoryIndexManager } from "./index.js";
let shouldFail = false;
type EmbeddingTestMocksModule = typeof import("./embedding.test-mocks.js");
type TestManagerHelpersModule = typeof import("./test-manager-helpers.js");
type MemoryIndexModule = typeof import("./index.js");
describe("memory manager atomic reindex", () => {
let fixtureRoot = "";
let caseId = 0;
let workspaceDir: string;
let indexPath: string;
let manager: MemoryIndexManager | null = null;
let embedBatch: ReturnType<EmbeddingTestMocksModule["getEmbedBatchMock"]>;
let resetEmbeddingMocks: EmbeddingTestMocksModule["resetEmbeddingMocks"];
let getRequiredMemoryIndexManager: TestManagerHelpersModule["getRequiredMemoryIndexManager"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
beforeAll(async () => {
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-atomic-"));
});
beforeEach(async () => {
vi.resetModules();
const embeddingMocks = await import("./embedding.test-mocks.js");
embedBatch = embeddingMocks.getEmbedBatchMock();
resetEmbeddingMocks = embeddingMocks.resetEmbeddingMocks;
({ getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js"));
({ closeAllMemorySearchManagers } = await import("./index.js"));
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "0");
resetEmbeddingMocks();
shouldFail = false;
embedBatch.mockImplementation(async (texts: string[]) => {
if (shouldFail) {
throw new Error("embedding failure");
}
return texts.map((_, index) => [index + 1, 0, 0]);
});
workspaceDir = path.join(fixtureRoot, `case-${caseId++}`);
await fs.mkdir(workspaceDir, { recursive: true });
indexPath = path.join(workspaceDir, "index.sqlite");
await fs.mkdir(path.join(workspaceDir, "memory"));
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
});
afterEach(async () => {
if (manager) {
await manager.close();
manager = null;
}
await closeAllMemorySearchManagers();
vi.unstubAllEnvs();
});
afterAll(async () => {
if (!fixtureRoot) {
return;
}
await fs.rm(fixtureRoot, { recursive: true, force: true });
});
it("keeps the prior index when a full reindex fails", async () => {
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath },
cache: { enabled: false },
// Perf: keep test indexes to a single chunk to reduce sqlite work.
chunking: { tokens: 4000, overlap: 0 },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
manager = await getRequiredMemoryIndexManager({ cfg, agentId: "main" });
await manager.sync({ force: true });
const beforeStatus = manager.status();
expect(beforeStatus.chunks).toBeGreaterThan(0);
shouldFail = true;
await expect(manager.sync({ force: true })).rejects.toThrow("embedding failure");
const afterStatus = manager.status();
expect(afterStatus.chunks).toBeGreaterThan(0);
});
});

View File

@@ -0,0 +1,330 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import { useFastShortTimeouts } from "../../../../test/helpers/fast-short-timeouts.js";
import type { OpenClawConfig } from "../engine-host-api.js";
import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js";
import { mockPublicPinnedHostname } from "./test-helpers/ssrf.js";
type MemoryIndexManager = import("./index.js").MemoryIndexManager;
type MemoryIndexModule = typeof import("./index.js");
const embedBatch = vi.fn(async (_texts: string[]) => [] as number[][]);
const embedQuery = vi.fn(async () => [0.5, 0.5, 0.5]);
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
describe("memory indexing with OpenAI batches", () => {
let fixtureRoot: string;
let workspaceDir: string;
let memoryDir: string;
let indexPath: string;
let manager: MemoryIndexManager | null = null;
async function readOpenAIBatchUploadRequests(body: FormData) {
let uploadedRequests: Array<{ custom_id?: string }> = [];
const entries = body.entries() as IterableIterator<[string, FormDataEntryValue]>;
for (const [key, value] of entries) {
if (key !== "file") {
continue;
}
const text = typeof value === "string" ? value : await value.text();
uploadedRequests = text
.split("\n")
.filter(Boolean)
.map((line: string) => JSON.parse(line) as { custom_id?: string });
}
return uploadedRequests;
}
function createOpenAIBatchFetchMock(options?: {
onCreateBatch?: (ctx: { batchCreates: number }) => Response | Promise<Response>;
}) {
let uploadedRequests: Array<{ custom_id?: string }> = [];
const state = { batchCreates: 0 };
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
const url =
typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
if (url.endsWith("/files")) {
const body = init?.body;
if (!(body instanceof FormData)) {
throw new Error("expected FormData upload");
}
uploadedRequests = await readOpenAIBatchUploadRequests(body);
return new Response(JSON.stringify({ id: "file_1" }), {
status: 200,
headers: { "Content-Type": "application/json" },
});
}
if (url.endsWith("/batches")) {
state.batchCreates += 1;
if (options?.onCreateBatch) {
return await options.onCreateBatch({ batchCreates: state.batchCreates });
}
return new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
status: 200,
headers: { "Content-Type": "application/json" },
});
}
if (url.endsWith("/batches/batch_1")) {
return new Response(
JSON.stringify({ id: "batch_1", status: "completed", output_file_id: "file_out" }),
{ status: 200, headers: { "Content-Type": "application/json" } },
);
}
if (url.endsWith("/files/file_out/content")) {
const lines = uploadedRequests.map((request, index) =>
JSON.stringify({
custom_id: request.custom_id,
response: {
status_code: 200,
body: { data: [{ embedding: [index + 1, 0, 0], index: 0 }] },
},
}),
);
return new Response(lines.join("\n"), {
status: 200,
headers: { "Content-Type": "application/jsonl" },
});
}
throw new Error(`unexpected fetch ${url}`);
});
return { fetchMock, state };
}
function createBatchCfg(): OpenClawConfig {
return {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "text-embedding-3-small",
store: { path: indexPath, vector: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0, hybrid: { enabled: false } },
remote: { batch: { enabled: true, wait: true, pollIntervalMs: 1 } },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
beforeAll(async () => {
vi.resetModules();
vi.doMock("./embeddings.js", () => ({
createEmbeddingProvider: async () =>
createOpenAIEmbeddingProviderMock({
embedQuery,
embedBatch,
}),
}));
await import("./test-runtime-mocks.js");
({ getMemorySearchManager } = await import("./index.js"));
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-batch-"));
workspaceDir = path.join(fixtureRoot, "workspace");
memoryDir = path.join(workspaceDir, "memory");
indexPath = path.join(fixtureRoot, "index.sqlite");
await fs.mkdir(memoryDir, { recursive: true });
const result = await getMemorySearchManager({ cfg: createBatchCfg(), agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) {
throw new Error("manager missing");
}
manager = result.manager as unknown as MemoryIndexManager;
});
afterAll(async () => {
if (manager) {
await manager.close();
manager = null;
}
await fs.rm(fixtureRoot, { recursive: true, force: true });
});
beforeEach(async () => {
embedBatch.mockClear();
embedQuery.mockClear();
embedBatch.mockImplementation(async (texts: string[]) =>
texts.map((_text, index) => [index + 1, 0, 0]),
);
await fs.rm(memoryDir, { recursive: true, force: true });
await fs.mkdir(memoryDir, { recursive: true });
// Reuse one manager instance across tests; keep index state isolated.
if (!manager) {
throw new Error("manager missing");
}
(manager as unknown as { resetIndex: () => void }).resetIndex();
(manager as unknown as { dirty: boolean }).dirty = true;
(manager as unknown as { batchFailureCount: number }).batchFailureCount = 0;
(manager as unknown as { batchFailureLastError?: string }).batchFailureLastError = undefined;
(manager as unknown as { batchFailureLastProvider?: string }).batchFailureLastProvider =
undefined;
(manager as unknown as { batch: { enabled: boolean } }).batch.enabled = true;
});
afterEach(async () => {
vi.unstubAllGlobals();
});
it("uses OpenAI batch uploads when enabled", async () => {
const restoreTimeouts = useFastShortTimeouts();
const content = ["hello", "from", "batch"].join("\n\n");
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), content);
const { fetchMock } = createOpenAIBatchFetchMock();
vi.stubGlobal("fetch", fetchMock);
mockPublicPinnedHostname();
try {
if (!manager) {
throw new Error("manager missing");
}
const labels: string[] = [];
await manager.sync({
progress: (update) => {
if (update.label) {
labels.push(update.label);
}
},
});
const status = manager.status();
expect(status.chunks).toBeGreaterThan(0);
expect(embedBatch).not.toHaveBeenCalled();
expect(fetchMock).toHaveBeenCalled();
expect(labels.some((label) => label.toLowerCase().includes("batch"))).toBe(true);
} finally {
restoreTimeouts();
}
});
it("retries OpenAI batch create on transient failures", async () => {
const restoreTimeouts = useFastShortTimeouts();
const content = ["retry", "the", "batch"].join("\n\n");
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
const { fetchMock, state } = createOpenAIBatchFetchMock({
onCreateBatch: ({ batchCreates }) => {
if (batchCreates === 1) {
return new Response("upstream connect error", { status: 503 });
}
return new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
status: 200,
headers: { "Content-Type": "application/json" },
});
},
});
vi.stubGlobal("fetch", fetchMock);
mockPublicPinnedHostname();
try {
if (!manager) {
throw new Error("manager missing");
}
await manager.sync({ reason: "test" });
const status = manager.status();
expect(status.chunks).toBeGreaterThan(0);
expect(state.batchCreates).toBe(2);
} finally {
restoreTimeouts();
}
});
it("tracks batch failures, resets on success, and disables after repeated failures", async () => {
const restoreTimeouts = useFastShortTimeouts();
const memoryFile = path.join(memoryDir, "2026-01-09.md");
await fs.writeFile(memoryFile, ["flaky", "batch"].join("\n\n"));
let mtimeMs = Date.now();
const touch = async () => {
mtimeMs += 1_000;
const date = new Date(mtimeMs);
await fs.utimes(memoryFile, date, date);
};
await touch();
let mode: "fail" | "ok" = "fail";
const { fetchMock } = createOpenAIBatchFetchMock({
onCreateBatch: () =>
mode === "fail"
? new Response("batch failed", { status: 400 })
: new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
status: 200,
headers: { "Content-Type": "application/json" },
}),
});
vi.stubGlobal("fetch", fetchMock);
mockPublicPinnedHostname();
try {
if (!manager) {
throw new Error("manager missing");
}
// First failure: fallback to regular embeddings and increment failure count.
await manager.sync({ reason: "test" });
expect(embedBatch).toHaveBeenCalled();
let status = manager.status();
expect(status.batch?.enabled).toBe(true);
expect(status.batch?.failures).toBe(1);
// Success should reset failure count.
embedBatch.mockClear();
mode = "ok";
await fs.writeFile(memoryFile, ["flaky", "batch", "recovery"].join("\n\n"));
await touch();
(manager as unknown as { dirty: boolean }).dirty = true;
await manager.sync({ reason: "test" });
status = manager.status();
expect(status.batch?.enabled).toBe(true);
expect(status.batch?.failures).toBe(0);
expect(embedBatch).not.toHaveBeenCalled();
// Two more failures after reset should disable remote batching.
await (
manager as unknown as {
recordBatchFailure: (params: {
provider: string;
message: string;
attempts?: number;
forceDisable?: boolean;
}) => Promise<unknown>;
}
).recordBatchFailure({ provider: "openai", message: "batch failed", attempts: 1 });
await (
manager as unknown as {
recordBatchFailure: (params: {
provider: string;
message: string;
attempts?: number;
forceDisable?: boolean;
}) => Promise<unknown>;
}
).recordBatchFailure({ provider: "openai", message: "batch failed", attempts: 1 });
status = manager.status();
expect(status.batch?.enabled).toBe(false);
expect(status.batch?.failures).toBeGreaterThanOrEqual(2);
// Once disabled, batch endpoints are skipped and fallback embeddings run directly.
const fetchCalls = fetchMock.mock.calls.length;
embedBatch.mockClear();
await fs.writeFile(memoryFile, ["flaky", "batch", "fallback"].join("\n\n"));
await touch();
(manager as unknown as { dirty: boolean }).dirty = true;
await manager.sync({ reason: "test" });
expect(fetchMock.mock.calls.length).toBe(fetchCalls);
expect(embedBatch).toHaveBeenCalled();
} finally {
restoreTimeouts();
}
});
});

View File

@@ -0,0 +1,143 @@
import fs from "node:fs/promises";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { useFastShortTimeouts } from "../../../../test/helpers/fast-short-timeouts.js";
import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js";
const fx = installEmbeddingManagerFixture({
fixturePrefix: "openclaw-mem-",
largeTokens: 4000,
smallTokens: 200,
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath, vector: { enabled: false } },
chunking: { tokens, overlap: 0 },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0, hybrid: { enabled: false } },
},
},
list: [{ id: "main", default: true }],
},
}),
});
describe("memory embedding batches", () => {
async function expectSyncWithFastTimeouts(manager: {
sync: (params: { reason: string }) => Promise<void>;
}) {
const restoreFastTimeouts = useFastShortTimeouts();
try {
await manager.sync({ reason: "test" });
} finally {
restoreFastTimeouts();
}
}
it("splits large files across multiple embedding batches", async () => {
const memoryDir = fx.getMemoryDir();
const managerLarge = fx.getManagerLarge();
// Keep this small but above the embedding batch byte threshold (8k) so we
// exercise multi-batch behavior without generating lots of chunks/DB rows.
const line = "a".repeat(4200);
const content = [line, line].join("\n");
await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content);
const updates: Array<{ completed: number; total: number; label?: string }> = [];
await managerLarge.sync({
progress: (update) => {
updates.push(update);
},
});
const status = managerLarge.status();
const totalTexts = fx.embedBatch.mock.calls.reduce(
(sum: number, call: unknown[]) => sum + ((call[0] as string[] | undefined)?.length ?? 0),
0,
);
expect(totalTexts).toBe(status.chunks);
expect(fx.embedBatch.mock.calls.length).toBeGreaterThan(1);
const inputs: string[] = fx.embedBatch.mock.calls.flatMap(
(call: unknown[]) => (call[0] as string[] | undefined) ?? [],
);
expect(inputs.every((text) => Buffer.byteLength(text, "utf8") <= 8000)).toBe(true);
expect(updates.length).toBeGreaterThan(0);
expect(updates.some((update) => update.label?.includes("/"))).toBe(true);
const last = updates[updates.length - 1];
expect(last?.total).toBeGreaterThan(0);
expect(last?.completed).toBe(last?.total);
});
it("keeps small files in a single embedding batch", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();
const line = "b".repeat(120);
const content = Array.from({ length: 4 }, () => line).join("\n");
await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content);
await managerSmall.sync({ reason: "test" });
expect(fx.embedBatch.mock.calls.length).toBe(1);
});
it("retries embeddings on transient rate limit and 5xx errors", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();
const line = "d".repeat(120);
const content = Array.from({ length: 4 }, () => line).join("\n");
await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content);
const transientErrors = [
"openai embeddings failed: 429 rate limit",
"openai embeddings failed: 502 Bad Gateway (cloudflare)",
];
let calls = 0;
fx.embedBatch.mockImplementation(async (texts: string[]) => {
calls += 1;
const transient = transientErrors[calls - 1];
if (transient) {
throw new Error(transient);
}
return texts.map(() => [0, 1, 0]);
});
await expectSyncWithFastTimeouts(managerSmall);
expect(calls).toBe(3);
}, 10000);
it("retries embeddings on too-many-tokens-per-day rate limits", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();
const line = "e".repeat(120);
const content = Array.from({ length: 4 }, () => line).join("\n");
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
let calls = 0;
fx.embedBatch.mockImplementation(async (texts: string[]) => {
calls += 1;
if (calls === 1) {
throw new Error("AWS Bedrock embeddings failed: Too many tokens per day");
}
return texts.map(() => [0, 1, 0]);
});
await expectSyncWithFastTimeouts(managerSmall);
expect(calls).toBe(2);
}, 10000);
it("skips empty chunks so embeddings input stays valid", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n");
await managerSmall.sync({ reason: "test" });
const inputs = fx.embedBatch.mock.calls.flatMap(
(call: unknown[]) => (call[0] as string[]) ?? [],
);
expect(inputs).not.toContain("");
});
});

View File

@@ -0,0 +1,137 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { setTimeout as sleep } from "node:timers/promises";
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import "./test-runtime-mocks.js";
import type { MemoryIndexManager } from "./index.js";
type MemoryIndexModule = typeof import("./index.js");
type ManagerModule = typeof import("./manager.js");
const hoisted = vi.hoisted(() => ({
providerCreateCalls: 0,
providerDelayMs: 0,
}));
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async () => {
hoisted.providerCreateCalls += 1;
if (hoisted.providerDelayMs > 0) {
await sleep(hoisted.providerDelayMs);
}
return {
requestedProvider: "openai",
provider: {
id: "mock",
model: "mock-embed",
maxInputTokens: 8192,
embedQuery: async () => [0, 1, 0],
embedBatch: async (texts: string[]) => texts.map(() => [0, 1, 0]),
},
};
},
}));
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
let closeAllMemoryIndexManagers: ManagerModule["closeAllMemoryIndexManagers"];
let RawMemoryIndexManager: ManagerModule["MemoryIndexManager"];
describe("memory manager cache hydration", () => {
let workspaceDir = "";
beforeAll(async () => {
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
({ closeAllMemoryIndexManagers, MemoryIndexManager: RawMemoryIndexManager } =
await import("./manager.js"));
});
beforeEach(async () => {
await closeAllMemoryIndexManagers();
vi.clearAllMocks();
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-concurrent-"));
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
hoisted.providerCreateCalls = 0;
hoisted.providerDelayMs = 50;
});
afterEach(async () => {
await closeAllMemorySearchManagers();
await fs.rm(workspaceDir, { recursive: true, force: true });
});
function createMemoryConcurrencyConfig(indexPath: string): OpenClawConfig {
return {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath, vector: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
it("deduplicates concurrent manager creation for the same cache key", async () => {
const indexPath = path.join(workspaceDir, "index.sqlite");
const cfg = createMemoryConcurrencyConfig(indexPath);
const results = await Promise.all(
Array.from(
{ length: 12 },
async () => await getMemorySearchManager({ cfg, agentId: "main" }),
),
);
const managers = results
.map((result) => result.manager)
.filter((manager): manager is MemoryIndexManager => Boolean(manager));
expect(managers).toHaveLength(12);
expect(new Set(managers).size).toBe(1);
expect(hoisted.providerCreateCalls).toBe(0);
await managers[0].close();
});
it("evicts cached managers during global teardown", async () => {
const indexPath = path.join(workspaceDir, "index.sqlite");
const cfg = createMemoryConcurrencyConfig(indexPath);
const pendingResult = RawMemoryIndexManager.get({ cfg, agentId: "main" });
await closeAllMemoryIndexManagers();
const firstManager = await pendingResult;
const secondManager = await RawMemoryIndexManager.get({ cfg, agentId: "main" });
expect(firstManager).toBeTruthy();
expect(secondManager).toBeTruthy();
expect(Object.is(secondManager, firstManager)).toBe(false);
expect(hoisted.providerCreateCalls).toBe(0);
await secondManager?.close?.();
});
it("does not identity-cache status-only managers", async () => {
const indexPath = path.join(workspaceDir, "index.sqlite");
const cfg = createMemoryConcurrencyConfig(indexPath);
const first = await RawMemoryIndexManager.get({ cfg, agentId: "main", purpose: "status" });
const second = await RawMemoryIndexManager.get({ cfg, agentId: "main", purpose: "status" });
expect(first).toBeTruthy();
expect(second).toBeTruthy();
expect(Object.is(second, first)).toBe(false);
expect(hoisted.providerCreateCalls).toBe(0);
await first?.close?.();
await second?.close?.();
});
});

View File

@@ -0,0 +1,217 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import { DEFAULT_OLLAMA_EMBEDDING_MODEL } from "./embeddings-ollama.js";
import type {
EmbeddingProvider,
EmbeddingProviderRuntime,
EmbeddingProviderResult,
} from "./embeddings.js";
import type { MemoryIndexManager } from "./index.js";
type MemoryIndexModule = typeof import("./index.js");
const { createEmbeddingProviderMock } = vi.hoisted(() => ({
createEmbeddingProviderMock: vi.fn(),
}));
vi.mock("./embeddings.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("./embeddings.js")>();
return {
...actual,
createEmbeddingProvider: createEmbeddingProviderMock,
};
});
vi.mock("./sqlite-vec.js", () => ({
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
}));
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
async function ensureProviderInitialized(manager: MemoryIndexManager): Promise<void> {
await (
manager as unknown as {
ensureProviderInitialized: () => Promise<void>;
}
).ensureProviderInitialized();
}
function createProvider(id: string): EmbeddingProvider {
return {
id,
model: `${id}-model`,
embedQuery: async () => [0.1, 0.2, 0.3],
embedBatch: async (texts: string[]) => texts.map(() => [0.1, 0.2, 0.3]),
};
}
function buildConfig(params: {
workspaceDir: string;
indexPath: string;
provider: "openai" | "mistral";
fallback?: "none" | "mistral" | "ollama";
}): OpenClawConfig {
return {
agents: {
defaults: {
workspace: params.workspaceDir,
memorySearch: {
provider: params.provider,
model: params.provider === "mistral" ? "mistral/mistral-embed" : "text-embedding-3-small",
fallback: params.fallback ?? "none",
store: { path: params.indexPath, vector: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
query: { minScore: 0, hybrid: { enabled: false } },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
describe("memory manager mistral provider wiring", () => {
let workspaceDir = "";
let indexPath = "";
let manager: MemoryIndexManager | null = null;
beforeEach(async () => {
vi.resetModules();
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
vi.clearAllMocks();
createEmbeddingProviderMock.mockReset();
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-mistral-"));
indexPath = path.join(workspaceDir, "index.sqlite");
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "test");
});
afterEach(async () => {
if (manager) {
await manager.close();
manager = null;
}
await closeAllMemorySearchManagers();
if (workspaceDir) {
await fs.rm(workspaceDir, { recursive: true, force: true });
workspaceDir = "";
indexPath = "";
}
});
it("stores mistral client when mistral provider is selected", async () => {
const mistralRuntime: EmbeddingProviderRuntime = {
id: "mistral",
cacheKeyData: { provider: "mistral", model: "mistral-embed" },
};
const providerResult: EmbeddingProviderResult = {
requestedProvider: "mistral",
provider: createProvider("mistral"),
runtime: mistralRuntime,
};
createEmbeddingProviderMock.mockResolvedValueOnce(providerResult);
const cfg = buildConfig({ workspaceDir, indexPath, provider: "mistral" });
const result = await getMemorySearchManager({ cfg, agentId: "main" });
if (!result.manager) {
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
}
manager = result.manager as unknown as MemoryIndexManager;
await ensureProviderInitialized(manager);
const internal = manager as unknown as {
ensureProviderInitialized: () => Promise<void>;
providerRuntime?: EmbeddingProviderRuntime;
};
await internal.ensureProviderInitialized();
expect(internal.providerRuntime).toBe(mistralRuntime);
});
it("stores mistral client after fallback activation", async () => {
const openAiRuntime: EmbeddingProviderRuntime = {
id: "openai",
cacheKeyData: { provider: "openai", model: "text-embedding-3-small" },
};
const mistralRuntime: EmbeddingProviderRuntime = {
id: "mistral",
cacheKeyData: { provider: "mistral", model: "mistral-embed" },
};
createEmbeddingProviderMock.mockResolvedValueOnce({
requestedProvider: "openai",
provider: createProvider("openai"),
runtime: openAiRuntime,
} as EmbeddingProviderResult);
createEmbeddingProviderMock.mockResolvedValueOnce({
requestedProvider: "mistral",
provider: createProvider("mistral"),
runtime: mistralRuntime,
} as EmbeddingProviderResult);
const cfg = buildConfig({ workspaceDir, indexPath, provider: "openai", fallback: "mistral" });
const result = await getMemorySearchManager({ cfg, agentId: "main" });
if (!result.manager) {
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
}
manager = result.manager as unknown as MemoryIndexManager;
await ensureProviderInitialized(manager);
const internal = manager as unknown as {
ensureProviderInitialized: () => Promise<void>;
activateFallbackProvider: (reason: string) => Promise<boolean>;
providerRuntime?: EmbeddingProviderRuntime;
};
await internal.ensureProviderInitialized();
expect(internal.providerRuntime?.id).toBe("openai");
const activated = await internal.activateFallbackProvider("forced test");
expect(activated).toBe(true);
expect(internal.providerRuntime).toBe(mistralRuntime);
});
it("uses default ollama model when activating ollama fallback", async () => {
const openAiRuntime: EmbeddingProviderRuntime = {
id: "openai",
cacheKeyData: { provider: "openai", model: "text-embedding-3-small" },
};
const ollamaRuntime: EmbeddingProviderRuntime = {
id: "ollama",
cacheKeyData: { provider: "ollama", model: DEFAULT_OLLAMA_EMBEDDING_MODEL },
};
createEmbeddingProviderMock.mockResolvedValueOnce({
requestedProvider: "openai",
provider: createProvider("openai"),
runtime: openAiRuntime,
} as EmbeddingProviderResult);
createEmbeddingProviderMock.mockResolvedValueOnce({
requestedProvider: "ollama",
provider: createProvider("ollama"),
runtime: ollamaRuntime,
} as EmbeddingProviderResult);
const cfg = buildConfig({ workspaceDir, indexPath, provider: "openai", fallback: "ollama" });
const result = await getMemorySearchManager({ cfg, agentId: "main" });
if (!result.manager) {
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
}
manager = result.manager as unknown as MemoryIndexManager;
await ensureProviderInitialized(manager);
const internal = manager as unknown as {
ensureProviderInitialized: () => Promise<void>;
activateFallbackProvider: (reason: string) => Promise<boolean>;
providerRuntime?: EmbeddingProviderRuntime;
};
await internal.ensureProviderInitialized();
expect(internal.providerRuntime?.id).toBe("openai");
const activated = await internal.activateFallbackProvider("forced ollama fallback");
expect(activated).toBe(true);
expect(internal.providerRuntime).toBe(ollamaRuntime);
const fallbackCall = createEmbeddingProviderMock.mock.calls[1]?.[0] as
| { provider?: string; model?: string }
| undefined;
expect(fallbackCall?.provider).toBe("ollama");
expect(fallbackCall?.model).toBe(DEFAULT_OLLAMA_EMBEDDING_MODEL);
});
});

View File

@@ -0,0 +1,122 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import { resetEmbeddingMocks } from "./embedding.test-mocks.js";
import type { MemoryIndexManager } from "./index.js";
import { getRequiredMemoryIndexManager } from "./test-manager-helpers.js";
function createMemorySearchCfg(options: {
workspaceDir: string;
indexPath: string;
}): OpenClawConfig {
return {
agents: {
defaults: {
workspace: options.workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: options.indexPath, vector: { enabled: false } },
cache: { enabled: false },
query: { minScore: 0, hybrid: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
describe("MemoryIndexManager.readFile", () => {
let workspaceDir: string;
let indexPath: string;
let memoryDir: string;
let manager: MemoryIndexManager | null = null;
beforeAll(async () => {
resetEmbeddingMocks();
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-read-"));
indexPath = path.join(workspaceDir, "index.sqlite");
memoryDir = path.join(workspaceDir, "memory");
await fs.mkdir(memoryDir, { recursive: true });
manager = await getRequiredMemoryIndexManager({
cfg: createMemorySearchCfg({ workspaceDir, indexPath }),
agentId: "main",
purpose: "status",
});
});
afterEach(async () => {
const entries = await fs.readdir(memoryDir).catch(() => []);
await Promise.all(
entries.map(async (entry) => {
await fs.rm(path.join(memoryDir, entry), { recursive: true, force: true });
}),
);
});
afterAll(async () => {
if (manager) {
await manager.close();
manager = null;
}
await fs.rm(workspaceDir, { recursive: true, force: true });
});
it("returns empty text when the requested file does not exist", async () => {
const relPath = "memory/2099-01-01.md";
const result = await manager!.readFile({ relPath });
expect(result).toEqual({ text: "", path: relPath });
});
it("returns content slices when the file exists", async () => {
const relPath = "memory/2026-02-20.md";
const absPath = path.join(workspaceDir, relPath);
await fs.mkdir(path.dirname(absPath), { recursive: true });
await fs.writeFile(absPath, ["line 1", "line 2", "line 3"].join("\n"), "utf-8");
const result = await manager!.readFile({ relPath, from: 2, lines: 1 });
expect(result).toEqual({ text: "line 2", path: relPath });
});
it("returns empty text when the requested slice is past EOF", async () => {
const relPath = "memory/window.md";
const absPath = path.join(workspaceDir, relPath);
await fs.mkdir(path.dirname(absPath), { recursive: true });
await fs.writeFile(absPath, ["alpha", "beta"].join("\n"), "utf-8");
const result = await manager!.readFile({ relPath, from: 10, lines: 5 });
expect(result).toEqual({ text: "", path: relPath });
});
it("returns empty text when the file disappears after stat", async () => {
const relPath = "memory/transient.md";
const absPath = path.join(workspaceDir, relPath);
await fs.mkdir(path.dirname(absPath), { recursive: true });
await fs.writeFile(absPath, "first\nsecond", "utf-8");
const realReadFile = fs.readFile;
let injected = false;
const readSpy = vi
.spyOn(fs, "readFile")
.mockImplementation(async (...args: Parameters<typeof realReadFile>) => {
const [target, options] = args;
if (!injected && typeof target === "string" && path.resolve(target) === absPath) {
injected = true;
const err = new Error("missing") as NodeJS.ErrnoException;
err.code = "ENOENT";
throw err;
}
return realReadFile(target, options);
});
try {
const result = await manager!.readFile({ relPath });
expect(result).toEqual({ text: "", path: relPath });
} finally {
readSpy.mockRestore();
}
});
});

View File

@@ -0,0 +1,198 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import type { DatabaseSync } from "node:sqlite";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import { resetEmbeddingMocks } from "./embedding.test-mocks.js";
import { MemoryIndexManager } from "./manager.js";
import { getRequiredMemoryIndexManager } from "./test-manager-helpers.js";
type ReadonlyRecoveryHarness = {
closed: boolean;
syncing: Promise<void> | null;
queuedSessionFiles: Set<string>;
queuedSessionSync: Promise<void> | null;
db: DatabaseSync;
vectorReady: Promise<boolean> | null;
vector: {
enabled: boolean;
available: boolean | null;
loadError?: string;
dims?: number;
};
readonlyRecoveryAttempts: number;
readonlyRecoverySuccesses: number;
readonlyRecoveryFailures: number;
readonlyRecoveryLastError?: string;
ensureProviderInitialized: ReturnType<typeof vi.fn>;
enqueueTargetedSessionSync: ReturnType<typeof vi.fn>;
runSync: ReturnType<typeof vi.fn>;
openDatabase: ReturnType<typeof vi.fn>;
ensureSchema: ReturnType<typeof vi.fn>;
readMeta: ReturnType<typeof vi.fn>;
};
describe("memory manager readonly recovery", () => {
let workspaceDir = "";
let indexPath = "";
let manager: MemoryIndexManager | null = null;
function createMemoryConfig(): OpenClawConfig {
return {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath, vector: { enabled: false } },
cache: { enabled: false },
query: { minScore: 0, hybrid: { enabled: false } },
sync: { watch: false, onSessionStart: false, onSearch: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
async function createRealManager() {
manager = await getRequiredMemoryIndexManager({
cfg: createMemoryConfig(),
agentId: "main",
purpose: "status",
});
return manager;
}
function createReadonlyRecoveryHarness() {
const reopenedClose = vi.fn();
const initialClose = vi.fn();
const reopenedDb = { close: reopenedClose } as unknown as DatabaseSync;
const initialDb = { close: initialClose } as unknown as DatabaseSync;
const harness: ReadonlyRecoveryHarness = {
closed: false,
syncing: null,
queuedSessionFiles: new Set<string>(),
queuedSessionSync: null,
db: initialDb,
vectorReady: null,
vector: {
enabled: false,
available: null,
loadError: "stale",
dims: 123,
},
readonlyRecoveryAttempts: 0,
readonlyRecoverySuccesses: 0,
readonlyRecoveryFailures: 0,
readonlyRecoveryLastError: undefined,
ensureProviderInitialized: vi.fn(async () => {}),
enqueueTargetedSessionSync: vi.fn(async () => {}),
runSync: vi.fn(),
openDatabase: vi.fn(() => reopenedDb),
ensureSchema: vi.fn(),
readMeta: vi.fn(() => undefined),
};
Object.setPrototypeOf(harness, MemoryIndexManager.prototype);
return {
harness,
initialDb,
initialClose,
reopenedDb,
reopenedClose,
};
}
function expectReadonlyRecoveryStatus(
instance: {
readonlyRecoveryAttempts: number;
readonlyRecoverySuccesses: number;
readonlyRecoveryFailures: number;
readonlyRecoveryLastError?: string;
},
lastError: string,
) {
expect({
attempts: instance.readonlyRecoveryAttempts,
successes: instance.readonlyRecoverySuccesses,
failures: instance.readonlyRecoveryFailures,
lastError: instance.readonlyRecoveryLastError,
}).toEqual({
attempts: 1,
successes: 1,
failures: 0,
lastError,
});
}
async function expectReadonlyRetry(params: { firstError: unknown; expectedLastError: string }) {
const { harness, initialClose } = createReadonlyRecoveryHarness();
harness.runSync.mockRejectedValueOnce(params.firstError).mockResolvedValueOnce(undefined);
await MemoryIndexManager.prototype.sync.call(harness as unknown as MemoryIndexManager, {
reason: "test",
});
expect(harness.runSync).toHaveBeenCalledTimes(2);
expect(harness.openDatabase).toHaveBeenCalledTimes(1);
expect(initialClose).toHaveBeenCalledTimes(1);
expectReadonlyRecoveryStatus(harness, params.expectedLastError);
}
beforeEach(async () => {
resetEmbeddingMocks();
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-readonly-"));
indexPath = path.join(workspaceDir, "index.sqlite");
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
});
afterEach(async () => {
vi.restoreAllMocks();
if (manager) {
await manager.close();
manager = null;
}
await fs.rm(workspaceDir, { recursive: true, force: true });
});
it("reopens sqlite and retries once when sync hits SQLITE_READONLY", async () => {
await expectReadonlyRetry({
firstError: new Error("attempt to write a readonly database"),
expectedLastError: "attempt to write a readonly database",
});
});
it("reopens sqlite and retries when readonly appears in error code", async () => {
await expectReadonlyRetry({
firstError: { message: "write failed", code: "SQLITE_READONLY" },
expectedLastError: "write failed",
});
});
it("does not retry non-readonly sync errors", async () => {
const { harness, initialClose } = createReadonlyRecoveryHarness();
harness.runSync.mockRejectedValueOnce(new Error("embedding timeout"));
await expect(
MemoryIndexManager.prototype.sync.call(harness as unknown as MemoryIndexManager, {
reason: "test",
}),
).rejects.toThrow("embedding timeout");
expect(harness.runSync).toHaveBeenCalledTimes(1);
expect(harness.openDatabase).not.toHaveBeenCalled();
expect(initialClose).not.toHaveBeenCalled();
});
it("sets busy_timeout on memory sqlite connections", async () => {
const currentManager = await createRealManager();
const db = (currentManager as unknown as { db: DatabaseSync }).db;
const row = db.prepare("PRAGMA busy_timeout").get() as
| { busy_timeout?: number; timeout?: number }
| undefined;
const busyTimeout = row?.busy_timeout ?? row?.timeout;
expect(busyTimeout).toBe(5000);
});
});

View File

@@ -0,0 +1,33 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { runDetachedMemorySync } from "./manager-sync-ops.js";
describe("memory manager sync failures", () => {
beforeEach(() => {
vi.useFakeTimers();
});
afterEach(async () => {
vi.useRealTimers();
});
it("does not raise unhandledRejection when watch-triggered sync fails", async () => {
const unhandled: unknown[] = [];
const handler = (reason: unknown) => {
unhandled.push(reason);
};
process.on("unhandledRejection", handler);
const syncSpy = vi
.fn()
.mockRejectedValueOnce(new Error("openai embeddings failed: 400 bad request"));
setTimeout(() => {
runDetachedMemorySync(syncSpy, "watch");
}, 1);
await vi.runOnlyPendingTimersAsync();
vi.useRealTimers();
await syncSpy.mock.results[0]?.value?.catch(() => undefined);
process.off("unhandledRejection", handler);
expect(unhandled).toHaveLength(0);
});
});

View File

@@ -1,15 +1,6 @@
import type { DatabaseSync } from "node:sqlite";
import { type FSWatcher } from "chokidar";
import {
createEmbeddingProvider,
type EmbeddingProvider,
type EmbeddingProviderRequest,
type EmbeddingProviderResult,
type GeminiEmbeddingClient,
type MistralEmbeddingClient,
type OllamaEmbeddingClient,
type OpenAiEmbeddingClient,
type VoyageEmbeddingClient,
extractKeywords,
readMemoryFile,
resolveAgentDir,
@@ -25,7 +16,15 @@ import {
type OpenClawConfig,
type ResolvedMemorySearchConfig,
createSubsystemLogger,
} from "../api.js";
} from "../engine-host-api.js";
import {
createEmbeddingProvider,
type EmbeddingProvider,
type EmbeddingProviderId,
type EmbeddingProviderRequest,
type EmbeddingProviderResult,
type EmbeddingProviderRuntime,
} from "./embeddings.js";
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
import { MemoryManagerEmbeddingOps } from "./manager-embedding-ops.js";
import { searchKeyword, searchVector } from "./manager-search.js";
@@ -83,14 +82,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
private readonly requestedProvider: EmbeddingProviderRequest;
private providerInitPromise: Promise<void> | null = null;
private providerInitialized = false;
protected fallbackFrom?: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
protected fallbackFrom?: EmbeddingProviderId;
protected fallbackReason?: string;
private providerUnavailableReason?: string;
protected openAi?: OpenAiEmbeddingClient;
protected gemini?: GeminiEmbeddingClient;
protected voyage?: VoyageEmbeddingClient;
protected mistral?: MistralEmbeddingClient;
protected ollama?: OllamaEmbeddingClient;
protected providerRuntime?: EmbeddingProviderRuntime;
protected batch: {
enabled: boolean;
wait: boolean;
@@ -270,11 +265,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
this.fallbackFrom = providerResult.fallbackFrom;
this.fallbackReason = providerResult.fallbackReason;
this.providerUnavailableReason = providerResult.providerUnavailableReason;
this.openAi = providerResult.openAi;
this.gemini = providerResult.gemini;
this.voyage = providerResult.voyage;
this.mistral = providerResult.mistral;
this.ollama = providerResult.ollama;
this.providerRuntime = providerResult.runtime;
this.providerInitialized = true;
}

View File

@@ -0,0 +1,138 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemoryIndexManager } from "./index.js";
vi.mock("./embeddings.js", () => {
return {
createEmbeddingProvider: async () => ({
requestedProvider: "openai",
provider: {
id: "mock",
model: "mock-embed",
embedQuery: async () => [0.1, 0.2, 0.3],
embedBatch: async (texts: string[]) => texts.map((_, index) => [index + 1, 0, 0]),
},
}),
};
});
type MemoryInternalModule = typeof import("./internal.js");
type TestManagerModule = typeof import("./test-manager.js");
type MemoryIndexModule = typeof import("./index.js");
let buildFileEntry: MemoryInternalModule["buildFileEntry"];
let createMemoryManagerOrThrow: TestManagerModule["createMemoryManagerOrThrow"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
async function ensureProviderInitialized(manager: MemoryIndexManager): Promise<void> {
await (
manager as unknown as {
ensureProviderInitialized: () => Promise<void>;
}
).ensureProviderInitialized();
}
describe("memory vector dedupe", () => {
let workspaceDir: string;
let indexPath: string;
let manager: MemoryIndexManager | null = null;
async function seedMemoryWorkspace(rootDir: string) {
await fs.mkdir(path.join(rootDir, "memory"));
await fs.writeFile(path.join(rootDir, "MEMORY.md"), "Hello memory.");
}
async function closeManagerIfOpen() {
if (!manager) {
return;
}
await manager.close();
manager = null;
}
beforeEach(async () => {
vi.resetModules();
({ buildFileEntry } = await import("./internal.js"));
({ createMemoryManagerOrThrow } = await import("./test-manager.js"));
({ closeAllMemorySearchManagers } = await import("./index.js"));
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-"));
indexPath = path.join(workspaceDir, "index.sqlite");
await seedMemoryWorkspace(workspaceDir);
});
afterEach(async () => {
await closeManagerIfOpen();
await closeAllMemorySearchManagers();
await fs.rm(workspaceDir, { recursive: true, force: true });
});
it("deletes existing vector rows before inserting replacements", async () => {
const cfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: indexPath, vector: { enabled: true } },
sync: { watch: false, onSessionStart: false, onSearch: false },
cache: { enabled: false },
},
},
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
manager = await createMemoryManagerOrThrow(cfg);
await ensureProviderInitialized(manager);
const db = (
manager as unknown as {
db: { exec: (sql: string) => void; prepare: (sql: string) => unknown };
}
).db;
db.exec("CREATE TABLE IF NOT EXISTS chunks_vec (id TEXT PRIMARY KEY, embedding BLOB)");
(
manager as unknown as { ensureVectorReady: (dims?: number) => Promise<boolean> }
).ensureVectorReady = async () => true;
await (
manager as unknown as { ensureProviderInitialized: () => Promise<void> }
).ensureProviderInitialized();
const entry = await buildFileEntry(path.join(workspaceDir, "MEMORY.md"), workspaceDir);
if (!entry) {
throw new Error("entry missing");
}
await (
manager as unknown as {
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
}
).indexFile(entry, { source: "memory" });
await (
manager as unknown as {
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
}
).indexFile(entry, { source: "memory" });
db.exec(`
CREATE TRIGGER IF NOT EXISTS fail_if_vector_row_not_deleted
BEFORE INSERT ON chunks_vec
WHEN EXISTS (SELECT 1 FROM chunks_vec WHERE id = NEW.id)
BEGIN
SELECT RAISE(FAIL, 'vector row not deleted before insert');
END;
`);
await expect(
(
manager as unknown as {
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
}
).indexFile(entry, { source: "memory" }),
).resolves.toBeUndefined();
});
});

View File

@@ -0,0 +1,159 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemorySearchConfig } from "../engine-host-api.js";
import type { MemoryIndexManager } from "./index.js";
const { watchMock } = vi.hoisted(() => ({
watchMock: vi.fn(() => ({
on: vi.fn(),
close: vi.fn(async () => undefined),
})),
}));
vi.mock("chokidar", () => ({
default: { watch: watchMock },
watch: watchMock,
}));
vi.mock("./sqlite-vec.js", () => ({
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
}));
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async () => ({
requestedProvider: "openai",
provider: {
id: "mock",
model: "mock-embed",
embedQuery: async () => [1, 0],
embedBatch: async (texts: string[]) => texts.map(() => [1, 0]),
},
}),
}));
type MemoryIndexModule = typeof import("./index.js");
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
describe("memory watcher config", () => {
let manager: MemoryIndexManager | null = null;
let workspaceDir = "";
let extraDir = "";
beforeEach(async () => {
vi.resetModules();
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
vi.clearAllMocks();
});
afterEach(async () => {
watchMock.mockClear();
if (manager) {
await manager.close();
manager = null;
}
await closeAllMemorySearchManagers();
if (workspaceDir) {
await fs.rm(workspaceDir, { recursive: true, force: true });
workspaceDir = "";
extraDir = "";
}
});
async function setupWatcherWorkspace(seedFile: { name: string; contents: string }) {
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-watch-"));
extraDir = path.join(workspaceDir, "extra");
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
await fs.mkdir(extraDir, { recursive: true });
await fs.writeFile(path.join(extraDir, seedFile.name), seedFile.contents);
}
function createWatcherConfig(overrides?: Partial<MemorySearchConfig>): OpenClawConfig {
const defaults: NonNullable<NonNullable<OpenClawConfig["agents"]>["defaults"]> = {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { path: path.join(workspaceDir, "index.sqlite"), vector: { enabled: false } },
sync: { watch: true, watchDebounceMs: 25, onSessionStart: false, onSearch: false },
query: { minScore: 0, hybrid: { enabled: false } },
extraPaths: [extraDir],
...overrides,
},
};
return {
agents: {
defaults,
list: [{ id: "main", default: true }],
},
} as OpenClawConfig;
}
async function expectWatcherManager(cfg: OpenClawConfig) {
const result = await getMemorySearchManager({ cfg, agentId: "main" });
expect(result.manager).not.toBeNull();
if (!result.manager) {
throw new Error("manager missing");
}
manager = result.manager as unknown as MemoryIndexManager;
}
it("watches markdown globs and ignores dependency directories", async () => {
await setupWatcherWorkspace({ name: "notes.md", contents: "hello" });
const cfg = createWatcherConfig();
await expectWatcherManager(cfg);
expect(watchMock).toHaveBeenCalledTimes(1);
const [watchedPaths, options] = watchMock.mock.calls[0] as unknown as [
string[],
Record<string, unknown>,
];
expect(watchedPaths).toEqual(
expect.arrayContaining([
path.join(workspaceDir, "MEMORY.md"),
path.join(workspaceDir, "memory.md"),
path.join(workspaceDir, "memory", "**", "*.md"),
path.join(extraDir, "**", "*.md"),
]),
);
expect(options.ignoreInitial).toBe(true);
expect(options.awaitWriteFinish).toEqual({ stabilityThreshold: 25, pollInterval: 100 });
const ignored = options.ignored as ((watchPath: string) => boolean) | undefined;
expect(ignored).toBeTypeOf("function");
expect(ignored?.(path.join(workspaceDir, "memory", "node_modules", "pkg", "index.md"))).toBe(
true,
);
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
});
it("watches multimodal extensions with case-insensitive globs", async () => {
await setupWatcherWorkspace({ name: "PHOTO.PNG", contents: "png" });
const cfg = createWatcherConfig({
provider: "gemini",
model: "gemini-embedding-2-preview",
fallback: "none",
multimodal: { enabled: true, modalities: ["image", "audio"] },
});
await expectWatcherManager(cfg);
expect(watchMock).toHaveBeenCalledTimes(1);
const [watchedPaths] = watchMock.mock.calls[0] as unknown as [
string[],
Record<string, unknown>,
];
expect(watchedPaths).toEqual(
expect.arrayContaining([
path.join(extraDir, "**", "*.[pP][nN][gG]"),
path.join(extraDir, "**", "*.[wW][aA][vV]"),
]),
);
});
});

View File

@@ -0,0 +1,390 @@
import { describe, it, expect } from "vitest";
import {
tokenize,
jaccardSimilarity,
textSimilarity,
computeMMRScore,
mmrRerank,
applyMMRToHybridResults,
DEFAULT_MMR_CONFIG,
type MMRItem,
} from "./mmr.js";
describe("tokenize", () => {
it("normalizes, filters, and deduplicates token sets", () => {
const cases = [
{
name: "alphanumeric lowercase",
input: "Hello World 123",
expected: ["hello", "world", "123"],
},
{ name: "empty string", input: "", expected: [] },
{ name: "special chars only", input: "!@#$%^&*()", expected: [] },
{
name: "underscores",
input: "hello_world test_case",
expected: ["hello_world", "test_case"],
},
{
name: "dedupe repeated tokens",
input: "hello hello world world",
expected: ["hello", "world"],
},
] as const;
for (const testCase of cases) {
expect(tokenize(testCase.input), testCase.name).toEqual(new Set(testCase.expected));
}
});
});
describe("jaccardSimilarity", () => {
it("computes expected scores for overlap edge cases", () => {
const cases = [
{
name: "identical sets",
left: new Set(["a", "b", "c"]),
right: new Set(["a", "b", "c"]),
expected: 1,
},
{ name: "disjoint sets", left: new Set(["a", "b"]), right: new Set(["c", "d"]), expected: 0 },
{ name: "two empty sets", left: new Set<string>(), right: new Set<string>(), expected: 1 },
{
name: "left non-empty right empty",
left: new Set(["a"]),
right: new Set<string>(),
expected: 0,
},
{
name: "left empty right non-empty",
left: new Set<string>(),
right: new Set(["a"]),
expected: 0,
},
{
name: "partial overlap",
left: new Set(["a", "b", "c"]),
right: new Set(["b", "c", "d"]),
expected: 0.5,
},
] as const;
for (const testCase of cases) {
expect(jaccardSimilarity(testCase.left, testCase.right), testCase.name).toBe(
testCase.expected,
);
}
});
it("is symmetric", () => {
const setA = new Set(["a", "b"]);
const setB = new Set(["b", "c"]);
expect(jaccardSimilarity(setA, setB)).toBe(jaccardSimilarity(setB, setA));
});
});
describe("textSimilarity", () => {
it("computes expected text-level similarity cases", () => {
const cases = [
{ name: "identical", left: "hello world", right: "hello world", expected: 1 },
{ name: "same words reordered", left: "hello world", right: "world hello", expected: 1 },
{ name: "different text", left: "hello world", right: "foo bar", expected: 0 },
{ name: "case insensitive", left: "Hello World", right: "hello world", expected: 1 },
] as const;
for (const testCase of cases) {
expect(textSimilarity(testCase.left, testCase.right), testCase.name).toBe(testCase.expected);
}
});
});
describe("computeMMRScore", () => {
it("balances relevance and diversity across lambda settings", () => {
const cases = [
{
name: "lambda=1 relevance only",
relevance: 0.8,
similarity: 0.5,
lambda: 1,
expected: 0.8,
},
{
name: "lambda=0 diversity only",
relevance: 0.8,
similarity: 0.5,
lambda: 0,
expected: -0.5,
},
{ name: "lambda=0.5 mixed", relevance: 0.8, similarity: 0.6, lambda: 0.5, expected: 0.1 },
{ name: "default lambda math", relevance: 1.0, similarity: 0.5, lambda: 0.7, expected: 0.55 },
] as const;
for (const testCase of cases) {
expect(
computeMMRScore(testCase.relevance, testCase.similarity, testCase.lambda),
testCase.name,
).toBeCloseTo(testCase.expected);
}
});
});
describe("empty input behavior", () => {
it("returns empty array for empty input", () => {
expect(mmrRerank([])).toEqual([]);
expect(applyMMRToHybridResults([])).toEqual([]);
});
});
describe("mmrRerank", () => {
describe("edge cases", () => {
it("returns single item unchanged", () => {
const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
expect(mmrRerank(items)).toEqual(items);
});
it("returns copy, not original array", () => {
const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
const result = mmrRerank(items);
expect(result).not.toBe(items);
});
it("returns items unchanged when disabled", () => {
const items: MMRItem[] = [
{ id: "1", score: 0.9, content: "hello" },
{ id: "2", score: 0.8, content: "hello" },
];
const result = mmrRerank(items, { enabled: false });
expect(result).toEqual(items);
});
});
describe("lambda edge cases", () => {
const diverseItems: MMRItem[] = [
{ id: "1", score: 1.0, content: "apple banana cherry" },
{ id: "2", score: 0.9, content: "apple banana date" },
{ id: "3", score: 0.8, content: "elderberry fig grape" },
];
it("lambda=1 returns pure relevance order", () => {
const result = mmrRerank(diverseItems, { lambda: 1 });
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
});
it("lambda=0 maximizes diversity", () => {
const result = mmrRerank(diverseItems, { enabled: true, lambda: 0 });
// First item is still highest score (no penalty yet)
expect(result[0].id).toBe("1");
// Second should be most different from first
expect(result[1].id).toBe("3"); // elderberry... is most different
});
it("clamps lambda > 1 to 1", () => {
const result = mmrRerank(diverseItems, { lambda: 1.5 });
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
});
it("clamps lambda < 0 to 0", () => {
const result = mmrRerank(diverseItems, { enabled: true, lambda: -0.5 });
expect(result[0].id).toBe("1");
expect(result[1].id).toBe("3");
});
});
describe("diversity behavior", () => {
it("promotes diverse results over similar high-scoring ones", () => {
const items: MMRItem[] = [
{ id: "1", score: 1.0, content: "machine learning neural networks" },
{ id: "2", score: 0.95, content: "machine learning deep learning" },
{ id: "3", score: 0.9, content: "database systems sql queries" },
{ id: "4", score: 0.85, content: "machine learning algorithms" },
];
const result = mmrRerank(items, { enabled: true, lambda: 0.5 });
// First is always highest score
expect(result[0].id).toBe("1");
// Second should be the diverse database item, not another ML item
expect(result[1].id).toBe("3");
});
it("handles items with identical content", () => {
const items: MMRItem[] = [
{ id: "1", score: 1.0, content: "identical content" },
{ id: "2", score: 0.9, content: "identical content" },
{ id: "3", score: 0.8, content: "different stuff" },
];
const result = mmrRerank(items, { enabled: true, lambda: 0.5 });
expect(result[0].id).toBe("1");
// Second should be different, not identical duplicate
expect(result[1].id).toBe("3");
});
it("handles all identical content gracefully", () => {
const items: MMRItem[] = [
{ id: "1", score: 1.0, content: "same" },
{ id: "2", score: 0.9, content: "same" },
{ id: "3", score: 0.8, content: "same" },
];
const result = mmrRerank(items, { lambda: 0.7 });
// Should still complete without error, order by score as tiebreaker
expect(result).toHaveLength(3);
});
});
describe("tie-breaking", () => {
it("uses original score as tiebreaker", () => {
const items: MMRItem[] = [
{ id: "1", score: 1.0, content: "unique content one" },
{ id: "2", score: 0.9, content: "unique content two" },
{ id: "3", score: 0.8, content: "unique content three" },
];
// With very different content and lambda=1, should be pure score order
const result = mmrRerank(items, { lambda: 1 });
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
});
it("preserves all items even with same MMR scores", () => {
const items: MMRItem[] = [
{ id: "1", score: 0.5, content: "a" },
{ id: "2", score: 0.5, content: "b" },
{ id: "3", score: 0.5, content: "c" },
];
const result = mmrRerank(items, { lambda: 0.7 });
expect(result).toHaveLength(3);
expect(new Set(result.map((i) => i.id))).toEqual(new Set(["1", "2", "3"]));
});
});
describe("score normalization", () => {
it("handles items with same scores", () => {
const items: MMRItem[] = [
{ id: "1", score: 0.5, content: "hello world" },
{ id: "2", score: 0.5, content: "foo bar" },
];
const result = mmrRerank(items, { lambda: 0.7 });
expect(result).toHaveLength(2);
});
it("handles negative scores", () => {
const items: MMRItem[] = [
{ id: "1", score: -0.5, content: "hello world" },
{ id: "2", score: -1.0, content: "foo bar" },
];
const result = mmrRerank(items, { lambda: 0.7 });
expect(result).toHaveLength(2);
// Higher score (less negative) should come first
expect(result[0].id).toBe("1");
});
});
});
describe("applyMMRToHybridResults", () => {
type HybridResult = {
path: string;
startLine: number;
endLine: number;
score: number;
snippet: string;
source: string;
};
it("preserves all original fields", () => {
const results: HybridResult[] = [
{
path: "/test/file.ts",
startLine: 1,
endLine: 10,
score: 0.9,
snippet: "hello world",
source: "memory",
},
];
const reranked = applyMMRToHybridResults(results);
expect(reranked[0]).toEqual(results[0]);
});
it("creates unique IDs from path and startLine", () => {
const results: HybridResult[] = [
{
path: "/test/a.ts",
startLine: 1,
endLine: 10,
score: 0.9,
snippet: "same content here",
source: "memory",
},
{
path: "/test/a.ts",
startLine: 20,
endLine: 30,
score: 0.8,
snippet: "same content here",
source: "memory",
},
];
// Should work without ID collision
const reranked = applyMMRToHybridResults(results);
expect(reranked).toHaveLength(2);
});
it("re-ranks results for diversity", () => {
const results: HybridResult[] = [
{
path: "/a.ts",
startLine: 1,
endLine: 10,
score: 1.0,
snippet: "function add numbers together",
source: "memory",
},
{
path: "/b.ts",
startLine: 1,
endLine: 10,
score: 0.95,
snippet: "function add values together",
source: "memory",
},
{
path: "/c.ts",
startLine: 1,
endLine: 10,
score: 0.9,
snippet: "database connection pool",
source: "memory",
},
];
const reranked = applyMMRToHybridResults(results, { enabled: true, lambda: 0.5 });
// First stays the same (highest score)
expect(reranked[0].path).toBe("/a.ts");
// Second should be the diverse one
expect(reranked[1].path).toBe("/c.ts");
});
it("respects disabled config", () => {
const results: HybridResult[] = [
{ path: "/a.ts", startLine: 1, endLine: 10, score: 0.9, snippet: "test", source: "memory" },
{ path: "/b.ts", startLine: 1, endLine: 10, score: 0.8, snippet: "test", source: "memory" },
];
const reranked = applyMMRToHybridResults(results, { enabled: false });
expect(reranked).toEqual(results);
});
});
describe("DEFAULT_MMR_CONFIG", () => {
it("has expected default values", () => {
expect(DEFAULT_MMR_CONFIG.enabled).toBe(false);
expect(DEFAULT_MMR_CONFIG.lambda).toBe(0.7);
});
});

View File

@@ -0,0 +1,359 @@
import fsSync from "node:fs";
import {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_LOCAL_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
OPENAI_BATCH_ENDPOINT,
buildGeminiEmbeddingRequest,
createGeminiEmbeddingProvider,
createLocalEmbeddingProvider,
createMistralEmbeddingProvider,
createOllamaEmbeddingProvider,
createOpenAiEmbeddingProvider,
createVoyageEmbeddingProvider,
hasNonTextEmbeddingParts,
listMemoryEmbeddingProviders,
resolveUserPath,
runGeminiEmbeddingBatches,
runOpenAiEmbeddingBatches,
runVoyageEmbeddingBatches,
type MemoryEmbeddingProviderAdapter,
} from "../engine-host-api.js";
function formatErrorMessage(err: unknown): string {
return err instanceof Error ? err.message : String(err);
}
function isMissingApiKeyError(err: unknown): boolean {
return formatErrorMessage(err).includes("No API key found for provider");
}
function sanitizeHeaders(
headers: Record<string, string>,
excludedHeaderNames: string[],
): Array<[string, string]> {
const excluded = new Set(excludedHeaderNames.map((name) => name.toLowerCase()));
return Object.entries(headers)
.filter(([key]) => !excluded.has(key.toLowerCase()))
.toSorted(([a], [b]) => a.localeCompare(b))
.map(([key, value]) => [key, value]);
}
function mapBatchEmbeddingsByIndex(byCustomId: Map<string, number[]>, count: number): number[][] {
const embeddings: number[][] = [];
for (let index = 0; index < count; index += 1) {
embeddings.push(byCustomId.get(String(index)) ?? []);
}
return embeddings;
}
function isNodeLlamaCppMissing(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
const code = (err as Error & { code?: unknown }).code;
return code === "ERR_MODULE_NOT_FOUND" && err.message.includes("node-llama-cpp");
}
function formatLocalSetupError(err: unknown): string {
const detail = formatErrorMessage(err);
const missing = isNodeLlamaCppMissing(err);
return [
"Local embeddings unavailable.",
missing
? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
: detail
? `Reason: ${detail}`
: undefined,
missing && detail ? `Detail: ${detail}` : null,
"To enable local embeddings:",
"1) Use Node 24 (recommended for installs/updates; Node 22 LTS, currently 22.14+, remains supported)",
missing
? "2) Reinstall OpenClaw (this should install node-llama-cpp): npm i -g openclaw@latest"
: null,
"3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
...["openai", "gemini", "voyage", "mistral"].map(
(provider) => `Or set agents.defaults.memorySearch.provider = "${provider}" (remote).`,
),
]
.filter(Boolean)
.join("\n");
}
function canAutoSelectLocal(modelPath?: string): boolean {
const trimmed = modelPath?.trim();
if (!trimmed) {
return false;
}
if (/^(hf:|https?:)/i.test(trimmed)) {
return false;
}
const resolved = resolveUserPath(trimmed);
try {
return fsSync.statSync(resolved).isFile();
} catch {
return false;
}
}
function supportsGeminiMultimodalEmbeddings(model: string): boolean {
const normalized = model
.trim()
.replace(/^models\//, "")
.replace(/^(gemini|google)\//, "");
return normalized === "gemini-embedding-2-preview";
}
const openAiAdapter: MemoryEmbeddingProviderAdapter = {
id: "openai",
defaultModel: DEFAULT_OPENAI_EMBEDDING_MODEL,
transport: "remote",
autoSelectPriority: 20,
allowExplicitWhenConfiguredAuto: true,
shouldContinueAutoSelection: isMissingApiKeyError,
create: async (options) => {
const { provider, client } = await createOpenAiEmbeddingProvider({
...options,
provider: "openai",
fallback: "none",
});
return {
provider,
runtime: {
id: "openai",
cacheKeyData: {
provider: "openai",
baseUrl: client.baseUrl,
model: client.model,
headers: sanitizeHeaders(client.headers, ["authorization"]),
},
batchEmbed: async (batch) => {
const byCustomId = await runOpenAiEmbeddingBatches({
openAi: client,
agentId: batch.agentId,
requests: batch.chunks.map((chunk, index) => ({
custom_id: String(index),
method: "POST",
url: OPENAI_BATCH_ENDPOINT,
body: {
model: client.model,
input: chunk.text,
},
})),
wait: batch.wait,
concurrency: batch.concurrency,
pollIntervalMs: batch.pollIntervalMs,
timeoutMs: batch.timeoutMs,
debug: batch.debug,
});
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
},
},
};
},
};
const geminiAdapter: MemoryEmbeddingProviderAdapter = {
id: "gemini",
defaultModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
transport: "remote",
autoSelectPriority: 30,
allowExplicitWhenConfiguredAuto: true,
supportsMultimodalEmbeddings: ({ model }) => supportsGeminiMultimodalEmbeddings(model),
shouldContinueAutoSelection: isMissingApiKeyError,
create: async (options) => {
const { provider, client } = await createGeminiEmbeddingProvider({
...options,
provider: "gemini",
fallback: "none",
});
return {
provider,
runtime: {
id: "gemini",
cacheKeyData: {
provider: "gemini",
baseUrl: client.baseUrl,
model: client.model,
outputDimensionality: client.outputDimensionality,
headers: sanitizeHeaders(client.headers, ["authorization", "x-goog-api-key"]),
},
batchEmbed: async (batch) => {
if (batch.chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
return null;
}
const byCustomId = await runGeminiEmbeddingBatches({
gemini: client,
agentId: batch.agentId,
requests: batch.chunks.map((chunk, index) => ({
custom_id: String(index),
request: buildGeminiEmbeddingRequest({
input: chunk.embeddingInput ?? { text: chunk.text },
taskType: "RETRIEVAL_DOCUMENT",
modelPath: client.modelPath,
outputDimensionality: client.outputDimensionality,
}),
})),
wait: batch.wait,
concurrency: batch.concurrency,
pollIntervalMs: batch.pollIntervalMs,
timeoutMs: batch.timeoutMs,
debug: batch.debug,
});
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
},
},
};
},
};
const voyageAdapter: MemoryEmbeddingProviderAdapter = {
id: "voyage",
defaultModel: DEFAULT_VOYAGE_EMBEDDING_MODEL,
transport: "remote",
autoSelectPriority: 40,
allowExplicitWhenConfiguredAuto: true,
shouldContinueAutoSelection: isMissingApiKeyError,
create: async (options) => {
const { provider, client } = await createVoyageEmbeddingProvider({
...options,
provider: "voyage",
fallback: "none",
});
return {
provider,
runtime: {
id: "voyage",
batchEmbed: async (batch) => {
const byCustomId = await runVoyageEmbeddingBatches({
client,
agentId: batch.agentId,
requests: batch.chunks.map((chunk, index) => ({
custom_id: String(index),
body: {
input: chunk.text,
},
})),
wait: batch.wait,
concurrency: batch.concurrency,
pollIntervalMs: batch.pollIntervalMs,
timeoutMs: batch.timeoutMs,
debug: batch.debug,
});
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
},
},
};
},
};
const mistralAdapter: MemoryEmbeddingProviderAdapter = {
id: "mistral",
defaultModel: DEFAULT_MISTRAL_EMBEDDING_MODEL,
transport: "remote",
autoSelectPriority: 50,
allowExplicitWhenConfiguredAuto: true,
shouldContinueAutoSelection: isMissingApiKeyError,
create: async (options) => {
const { provider, client } = await createMistralEmbeddingProvider({
...options,
provider: "mistral",
fallback: "none",
});
return {
provider,
runtime: {
id: "mistral",
cacheKeyData: {
provider: "mistral",
model: client.model,
},
},
};
},
};
const ollamaAdapter: MemoryEmbeddingProviderAdapter = {
id: "ollama",
defaultModel: DEFAULT_OLLAMA_EMBEDDING_MODEL,
transport: "remote",
create: async (options) => {
const { provider, client } = await createOllamaEmbeddingProvider({
...options,
provider: "ollama",
fallback: "none",
});
return {
provider,
runtime: {
id: "ollama",
cacheKeyData: {
provider: "ollama",
model: client.model,
},
},
};
},
};
const localAdapter: MemoryEmbeddingProviderAdapter = {
id: "local",
defaultModel: DEFAULT_LOCAL_MODEL,
transport: "local",
autoSelectPriority: 10,
formatSetupError: formatLocalSetupError,
shouldContinueAutoSelection: () => true,
create: async (options) => {
const provider = await createLocalEmbeddingProvider({
...options,
provider: "local",
fallback: "none",
});
return {
provider,
runtime: {
id: "local",
cacheKeyData: {
provider: "local",
model: provider.model,
},
},
};
},
};
export const builtinMemoryEmbeddingProviderAdapters = [
localAdapter,
openAiAdapter,
geminiAdapter,
voyageAdapter,
mistralAdapter,
ollamaAdapter,
] as const;
export function registerBuiltInMemoryEmbeddingProviders(register: {
registerMemoryEmbeddingProvider: (adapter: MemoryEmbeddingProviderAdapter) => void;
}): void {
const existingIds = new Set(listMemoryEmbeddingProviders().map((adapter) => adapter.id));
for (const adapter of builtinMemoryEmbeddingProviderAdapters) {
if (existingIds.has(adapter.id)) {
continue;
}
register.registerMemoryEmbeddingProvider(adapter);
}
}
export {
DEFAULT_GEMINI_EMBEDDING_MODEL,
DEFAULT_LOCAL_MODEL,
DEFAULT_MISTRAL_EMBEDDING_MODEL,
DEFAULT_OLLAMA_EMBEDDING_MODEL,
DEFAULT_OPENAI_EMBEDDING_MODEL,
DEFAULT_VOYAGE_EMBEDDING_MODEL,
canAutoSelectLocal,
formatLocalSetupError,
isMissingApiKeyError,
};

File diff suppressed because it is too large Load Diff

View File

@@ -12,7 +12,6 @@ import {
isQmdScopeAllowed,
listSessionFilesForAgent,
parseQmdQueryJson,
requireNodeSqlite,
resolveAgentWorkspaceDir,
resolveCliSpawnInvocation,
resolveGlobalSingleton,
@@ -32,7 +31,8 @@ import {
type ResolvedQmdMcporterConfig,
type SessionFileEntry,
writeFileWithinRoot,
} from "../api.js";
} from "../engine-host-api.js";
import { requireNodeSqlite } from "./sqlite.js";
type SqliteDatabase = import("node:sqlite").DatabaseSync;

View File

@@ -0,0 +1,365 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../engine-host-api.js";
function createManagerStatus(params: {
backend: "qmd" | "builtin";
provider: string;
model: string;
requestedProvider: string;
withMemorySourceCounts?: boolean;
}) {
const base = {
backend: params.backend,
provider: params.provider,
model: params.model,
requestedProvider: params.requestedProvider,
files: 0,
chunks: 0,
dirty: false,
workspaceDir: "/tmp",
dbPath: "/tmp/index.sqlite",
};
if (!params.withMemorySourceCounts) {
return base;
}
return {
...base,
sources: ["memory" as const],
sourceCounts: [{ source: "memory" as const, files: 0, chunks: 0 }],
};
}
function createManagerMock(params: {
backend: "qmd" | "builtin";
provider: string;
model: string;
requestedProvider: string;
searchResults?: Array<{
path: string;
startLine: number;
endLine: number;
score: number;
snippet: string;
source: "memory";
}>;
withMemorySourceCounts?: boolean;
}) {
return {
search: vi.fn(async () => params.searchResults ?? []),
readFile: vi.fn(async () => ({ text: "", path: "MEMORY.md" })),
status: vi.fn(() =>
createManagerStatus({
backend: params.backend,
provider: params.provider,
model: params.model,
requestedProvider: params.requestedProvider,
withMemorySourceCounts: params.withMemorySourceCounts,
}),
),
sync: vi.fn(async () => {}),
probeEmbeddingAvailability: vi.fn(async () => ({ ok: true })),
probeVectorAvailability: vi.fn(async () => true),
close: vi.fn(async () => {}),
};
}
const mockPrimary = vi.hoisted(() => ({
...createManagerMock({
backend: "qmd",
provider: "qmd",
model: "qmd",
requestedProvider: "qmd",
withMemorySourceCounts: true,
}),
}));
const fallbackManager = vi.hoisted(() => ({
...createManagerMock({
backend: "builtin",
provider: "openai",
model: "text-embedding-3-small",
requestedProvider: "openai",
searchResults: [
{
path: "MEMORY.md",
startLine: 1,
endLine: 1,
score: 1,
snippet: "fallback",
source: "memory",
},
],
}),
}));
const fallbackSearch = fallbackManager.search;
const mockMemoryIndexGet = vi.hoisted(() => vi.fn(async () => fallbackManager));
const mockCloseAllMemoryIndexManagers = vi.hoisted(() => vi.fn(async () => {}));
vi.mock("./qmd-manager.js", () => ({
QmdMemoryManager: {
create: vi.fn(async () => mockPrimary),
},
}));
vi.mock("../../extensions/memory-core/src/memory/manager-runtime.js", () => ({
MemoryIndexManager: {
get: mockMemoryIndexGet,
},
closeAllMemoryIndexManagers: mockCloseAllMemoryIndexManagers,
}));
import { QmdMemoryManager } from "./qmd-manager.js";
import { closeAllMemorySearchManagers, getMemorySearchManager } from "./search-manager.js";
// eslint-disable-next-line @typescript-eslint/unbound-method -- mocked static function
const createQmdManagerMock = vi.mocked(QmdMemoryManager.create);
type SearchManagerResult = Awaited<ReturnType<typeof getMemorySearchManager>>;
type SearchManager = NonNullable<SearchManagerResult["manager"]>;
function createQmdCfg(agentId: string): OpenClawConfig {
return {
memory: { backend: "qmd", qmd: {} },
agents: { list: [{ id: agentId, default: true, workspace: "/tmp/workspace" }] },
};
}
function requireManager(result: SearchManagerResult): SearchManager {
expect(result.manager).toBeTruthy();
if (!result.manager) {
throw new Error("manager missing");
}
return result.manager;
}
async function createFailedQmdSearchHarness(params: { agentId: string; errorMessage: string }) {
const cfg = createQmdCfg(params.agentId);
mockPrimary.search.mockRejectedValueOnce(new Error(params.errorMessage));
const first = await getMemorySearchManager({ cfg, agentId: params.agentId });
return { cfg, manager: requireManager(first), firstResult: first };
}
beforeEach(async () => {
await closeAllMemorySearchManagers();
mockPrimary.search.mockClear();
mockPrimary.readFile.mockClear();
mockPrimary.status.mockClear();
mockPrimary.sync.mockClear();
mockPrimary.probeEmbeddingAvailability.mockClear();
mockPrimary.probeVectorAvailability.mockClear();
mockPrimary.close.mockClear();
fallbackSearch.mockClear();
fallbackManager.readFile.mockClear();
fallbackManager.status.mockClear();
fallbackManager.sync.mockClear();
fallbackManager.probeEmbeddingAvailability.mockClear();
fallbackManager.probeVectorAvailability.mockClear();
fallbackManager.close.mockClear();
mockCloseAllMemoryIndexManagers.mockClear();
mockMemoryIndexGet.mockClear();
mockMemoryIndexGet.mockResolvedValue(fallbackManager);
createQmdManagerMock.mockClear();
});
describe("getMemorySearchManager caching", () => {
it("reuses the same QMD manager instance for repeated calls", async () => {
const cfg = createQmdCfg("main");
const first = await getMemorySearchManager({ cfg, agentId: "main" });
const second = await getMemorySearchManager({ cfg, agentId: "main" });
expect(first.manager).toBe(second.manager);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(1);
});
it("evicts failed qmd wrapper so next call retries qmd", async () => {
const retryAgentId = "retry-agent";
const {
cfg,
manager: firstManager,
firstResult: first,
} = await createFailedQmdSearchHarness({
agentId: retryAgentId,
errorMessage: "qmd query failed",
});
const fallbackResults = await firstManager.search("hello");
expect(fallbackResults).toHaveLength(1);
expect(fallbackResults[0]?.path).toBe("MEMORY.md");
const second = await getMemorySearchManager({ cfg, agentId: retryAgentId });
requireManager(second);
expect(second.manager).not.toBe(first.manager);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
});
it("does not cache qmd managers for status-only requests", async () => {
const agentId = "status-agent";
const cfg = createQmdCfg(agentId);
const first = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
const second = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
requireManager(first);
requireManager(second);
expect(first.manager?.status()).toMatchObject({
backend: "qmd",
provider: "qmd",
model: "qmd",
requestedProvider: "qmd",
});
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
expect(mockMemoryIndexGet).not.toHaveBeenCalled();
await first.manager?.close?.();
await second.manager?.close?.();
expect(mockPrimary.close).toHaveBeenCalledTimes(2);
});
it("reports real qmd index counts for status-only requests", async () => {
const agentId = "status-counts-agent";
const cfg = createQmdCfg(agentId);
mockPrimary.status.mockReturnValueOnce({
...createManagerStatus({
backend: "qmd",
provider: "qmd",
model: "qmd",
requestedProvider: "qmd",
withMemorySourceCounts: true,
}),
files: 10,
chunks: 42,
sourceCounts: [{ source: "memory" as const, files: 10, chunks: 42 }],
});
const result = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
const manager = requireManager(result);
expect(manager.status()).toMatchObject({
backend: "qmd",
files: 10,
chunks: 42,
sourceCounts: [{ source: "memory", files: 10, chunks: 42 }],
});
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledWith(
expect.objectContaining({ agentId, mode: "status" }),
);
});
it("reuses cached full qmd manager for status-only requests", async () => {
const agentId = "status-reuses-full-agent";
const cfg = createQmdCfg(agentId);
const full = await getMemorySearchManager({ cfg, agentId });
const status = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
requireManager(full);
requireManager(status);
expect(status.manager).not.toBe(full.manager);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(1);
await status.manager?.close?.();
expect(mockPrimary.close).not.toHaveBeenCalled();
const fullAgain = await getMemorySearchManager({ cfg, agentId });
expect(fullAgain.manager).toBe(full.manager);
});
it("gets a fresh qmd manager for later status requests after close", async () => {
const agentId = "status-eviction-agent";
const cfg = createQmdCfg(agentId);
const first = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
const firstManager = requireManager(first);
await firstManager.close?.();
const second = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
requireManager(second);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
expect(mockPrimary.close).toHaveBeenCalledTimes(1);
});
it("does not evict a newer cached wrapper when closing an older failed wrapper", async () => {
const retryAgentId = "retry-agent-close";
const {
cfg,
manager: firstManager,
firstResult: first,
} = await createFailedQmdSearchHarness({
agentId: retryAgentId,
errorMessage: "qmd query failed",
});
await firstManager.search("hello");
const second = await getMemorySearchManager({ cfg, agentId: retryAgentId });
const secondManager = requireManager(second);
expect(second.manager).not.toBe(first.manager);
await firstManager.close?.();
const third = await getMemorySearchManager({ cfg, agentId: retryAgentId });
expect(third.manager).toBe(secondManager);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
});
it("falls back to builtin search when qmd fails with sqlite busy", async () => {
const retryAgentId = "retry-agent-busy";
const { manager: firstManager } = await createFailedQmdSearchHarness({
agentId: retryAgentId,
errorMessage: "qmd index busy while reading results: SQLITE_BUSY: database is locked",
});
const results = await firstManager.search("hello");
expect(results).toHaveLength(1);
expect(results[0]?.path).toBe("MEMORY.md");
expect(fallbackSearch).toHaveBeenCalledTimes(1);
});
it("keeps original qmd error when fallback manager initialization fails", async () => {
const retryAgentId = "retry-agent-no-fallback-auth";
const { manager: firstManager } = await createFailedQmdSearchHarness({
agentId: retryAgentId,
errorMessage: "qmd query failed",
});
mockMemoryIndexGet.mockRejectedValueOnce(new Error("No API key found for provider openai"));
await expect(firstManager.search("hello")).rejects.toThrow("qmd query failed");
});
it("closes cached managers on global teardown", async () => {
const cfg = createQmdCfg("teardown-agent");
const first = await getMemorySearchManager({ cfg, agentId: "teardown-agent" });
const firstManager = requireManager(first);
await closeAllMemorySearchManagers();
expect(mockPrimary.close).toHaveBeenCalledTimes(1);
expect(mockCloseAllMemoryIndexManagers).toHaveBeenCalledTimes(1);
const second = await getMemorySearchManager({ cfg, agentId: "teardown-agent" });
expect(second.manager).toBeTruthy();
expect(second.manager).not.toBe(firstManager);
// eslint-disable-next-line @typescript-eslint/unbound-method
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
});
it("closes builtin index managers on teardown after runtime is loaded", async () => {
const retryAgentId = "teardown-with-fallback";
const { manager } = await createFailedQmdSearchHarness({
agentId: retryAgentId,
errorMessage: "qmd query failed",
});
await manager.search("hello");
await closeAllMemorySearchManagers();
expect(mockCloseAllMemoryIndexManagers).toHaveBeenCalledTimes(1);
});
});

View File

@@ -7,7 +7,7 @@ import {
type MemorySyncProgressUpdate,
type OpenClawConfig,
type ResolvedQmdConfig,
} from "../api.js";
} from "../engine-host-api.js";
const MEMORY_SEARCH_MANAGER_CACHE_KEY = Symbol.for("openclaw.memorySearchManagerCache");
type MemorySearchManagerCacheStore = {

View File

@@ -0,0 +1 @@
export { loadSqliteVecExtension } from "../engine-host-api.js";

View File

@@ -0,0 +1 @@
export { requireNodeSqlite } from "../engine-host-api.js";

View File

@@ -0,0 +1,173 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { mergeHybridResults } from "./hybrid.js";
import {
applyTemporalDecayToHybridResults,
applyTemporalDecayToScore,
calculateTemporalDecayMultiplier,
} from "./temporal-decay.js";
const DAY_MS = 24 * 60 * 60 * 1000;
const NOW_MS = Date.UTC(2026, 1, 10, 0, 0, 0);
const tempDirs: string[] = [];
async function makeTempDir(): Promise<string> {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-temporal-decay-"));
tempDirs.push(dir);
return dir;
}
function createVectorMemoryEntry(params: {
id: string;
path: string;
snippet: string;
vectorScore: number;
}) {
return {
id: params.id,
path: params.path,
startLine: 1,
endLine: 1,
source: "memory" as const,
snippet: params.snippet,
vectorScore: params.vectorScore,
};
}
async function mergeVectorResultsWithTemporalDecay(
vector: Parameters<typeof mergeHybridResults>[0]["vector"],
) {
return mergeHybridResults({
vectorWeight: 1,
textWeight: 0,
temporalDecay: { enabled: true, halfLifeDays: 30 },
mmr: { enabled: false },
nowMs: NOW_MS,
vector,
keyword: [],
});
}
afterEach(async () => {
await Promise.all(
tempDirs.splice(0).map(async (dir) => {
await fs.rm(dir, { recursive: true, force: true });
}),
);
});
describe("temporal decay", () => {
it("matches exponential decay formula", () => {
const halfLifeDays = 30;
const ageInDays = 10;
const lambda = Math.LN2 / halfLifeDays;
const expectedMultiplier = Math.exp(-lambda * ageInDays);
expect(calculateTemporalDecayMultiplier({ ageInDays, halfLifeDays })).toBeCloseTo(
expectedMultiplier,
);
expect(applyTemporalDecayToScore({ score: 0.8, ageInDays, halfLifeDays })).toBeCloseTo(
0.8 * expectedMultiplier,
);
});
it("is 0.5 exactly at half-life", () => {
expect(calculateTemporalDecayMultiplier({ ageInDays: 30, halfLifeDays: 30 })).toBeCloseTo(0.5);
});
it("does not decay evergreen memory files", async () => {
const dir = await makeTempDir();
const rootMemoryPath = path.join(dir, "MEMORY.md");
const topicPath = path.join(dir, "memory", "projects.md");
await fs.mkdir(path.dirname(topicPath), { recursive: true });
await fs.writeFile(rootMemoryPath, "evergreen");
await fs.writeFile(topicPath, "topic evergreen");
const veryOld = new Date(Date.UTC(2010, 0, 1));
await fs.utimes(rootMemoryPath, veryOld, veryOld);
await fs.utimes(topicPath, veryOld, veryOld);
const decayed = await applyTemporalDecayToHybridResults({
results: [
{ path: "MEMORY.md", score: 1, source: "memory" },
{ path: "memory/projects.md", score: 0.75, source: "memory" },
],
workspaceDir: dir,
temporalDecay: { enabled: true, halfLifeDays: 30 },
nowMs: NOW_MS,
});
expect(decayed[0]?.score).toBeCloseTo(1);
expect(decayed[1]?.score).toBeCloseTo(0.75);
});
it("applies decay in hybrid merging before ranking", async () => {
const merged = await mergeVectorResultsWithTemporalDecay([
createVectorMemoryEntry({
id: "old",
path: "memory/2025-01-01.md",
snippet: "old but high",
vectorScore: 0.95,
}),
createVectorMemoryEntry({
id: "new",
path: "memory/2026-02-10.md",
snippet: "new and relevant",
vectorScore: 0.8,
}),
]);
expect(merged[0]?.path).toBe("memory/2026-02-10.md");
expect(merged[0]?.score ?? 0).toBeGreaterThan(merged[1]?.score ?? 0);
});
it("handles future dates, zero age, and very old memories", async () => {
const merged = await mergeVectorResultsWithTemporalDecay([
createVectorMemoryEntry({
id: "future",
path: "memory/2099-01-01.md",
snippet: "future",
vectorScore: 0.9,
}),
createVectorMemoryEntry({
id: "today",
path: "memory/2026-02-10.md",
snippet: "today",
vectorScore: 0.8,
}),
createVectorMemoryEntry({
id: "very-old",
path: "memory/2000-01-01.md",
snippet: "ancient",
vectorScore: 1,
}),
]);
const byPath = new Map(merged.map((entry) => [entry.path, entry]));
expect(byPath.get("memory/2099-01-01.md")?.score).toBeCloseTo(0.9);
expect(byPath.get("memory/2026-02-10.md")?.score).toBeCloseTo(0.8);
expect(byPath.get("memory/2000-01-01.md")?.score ?? 1).toBeLessThan(0.001);
});
it("uses file mtime fallback for non-memory sources", async () => {
const dir = await makeTempDir();
const sessionPath = path.join(dir, "sessions", "thread.jsonl");
await fs.mkdir(path.dirname(sessionPath), { recursive: true });
await fs.writeFile(sessionPath, "{}\n");
const oldMtime = new Date(NOW_MS - 30 * DAY_MS);
await fs.utimes(sessionPath, oldMtime, oldMtime);
const decayed = await applyTemporalDecayToHybridResults({
results: [{ path: "sessions/thread.jsonl", score: 1, source: "sessions" }],
workspaceDir: dir,
temporalDecay: { enabled: true, halfLifeDays: 30 },
nowMs: NOW_MS,
});
expect(decayed[0]?.score).toBeCloseTo(0.5, 2);
});
});

View File

@@ -0,0 +1,64 @@
import {
OPENAI_BATCH_ENDPOINT,
runOpenAiEmbeddingBatches,
type MemoryChunk,
} from "../engine-host-api.js";
export function createOpenAIEmbeddingProviderMock(params: {
embedQuery: (input: string) => Promise<number[]>;
embedBatch: (input: string[]) => Promise<number[][]>;
}) {
const openAiClient = {
baseUrl: "https://api.openai.com/v1",
headers: { Authorization: "Bearer test", "Content-Type": "application/json" },
model: "text-embedding-3-small",
};
return {
requestedProvider: "openai",
provider: {
id: "openai",
model: "text-embedding-3-small",
embedQuery: params.embedQuery,
embedBatch: params.embedBatch,
},
runtime: {
id: "openai",
cacheKeyData: {
provider: "openai",
baseUrl: openAiClient.baseUrl,
model: openAiClient.model,
},
batchEmbed: async (options: {
agentId: string;
chunks: MemoryChunk[];
wait: boolean;
concurrency: number;
pollIntervalMs: number;
timeoutMs: number;
debug: (message: string, data?: Record<string, unknown>) => void;
}) => {
const byCustomId = await runOpenAiEmbeddingBatches({
openAi: openAiClient,
agentId: options.agentId,
requests: options.chunks.map((chunk: MemoryChunk, index: number) => ({
custom_id: String(index),
method: "POST",
url: OPENAI_BATCH_ENDPOINT,
body: {
model: openAiClient.model,
input: chunk.text,
},
})),
wait: options.wait,
concurrency: options.concurrency,
pollIntervalMs: options.pollIntervalMs,
timeoutMs: options.timeoutMs,
debug: options.debug,
});
return options.chunks.map(
(_: MemoryChunk, index: number) => byCustomId.get(String(index)) ?? [],
);
},
},
};
}

View File

@@ -0,0 +1,34 @@
import * as ssrf from "openclaw/plugin-sdk/ssrf-runtime";
import { vi } from "vitest";
export function mockPublicPinnedHostname() {
return vi.spyOn(ssrf, "resolvePinnedHostnameWithPolicy").mockImplementation(async (hostname) => {
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
const addresses = ["93.184.216.34"];
const lookup = ((host: string, options?: unknown, callback?: unknown) => {
const cb =
typeof options === "function"
? (options as (err: NodeJS.ErrnoException | null, address: unknown) => void)
: (callback as (err: NodeJS.ErrnoException | null, address: unknown) => void);
if (!cb) {
return;
}
if (host.trim().toLowerCase().replace(/\.$/, "") !== normalized) {
cb(null, []);
return;
}
cb(
null,
addresses.map((address) => ({
address,
family: address.includes(":") ? 6 : 4,
})),
);
}) as never;
return {
hostname: normalized,
addresses,
lookup,
};
});
}

View File

@@ -0,0 +1,23 @@
import type { OpenClawConfig } from "../engine-host-api.js";
import type { MemoryIndexManager } from "./index.js";
export async function getRequiredMemoryIndexManager(params: {
cfg: OpenClawConfig;
agentId?: string;
purpose?: "default" | "status";
}): Promise<MemoryIndexManager> {
await import("./embedding.test-mocks.js");
const { getMemorySearchManager } = await import("./index.js");
const result = await getMemorySearchManager({
cfg: params.cfg,
agentId: params.agentId ?? "main",
purpose: params.purpose,
});
if (!result.manager) {
throw new Error("manager missing");
}
if (!("sync" in result.manager) || typeof result.manager.sync !== "function") {
throw new Error("manager does not support sync");
}
return result.manager as unknown as MemoryIndexManager;
}

View File

@@ -0,0 +1,13 @@
import type { OpenClawConfig } from "../engine-host-api.js";
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
export async function createMemoryManagerOrThrow(
cfg: OpenClawConfig,
agentId = "main",
): Promise<MemoryIndexManager> {
const result = await getMemorySearchManager({ cfg, agentId });
if (!result.manager) {
throw new Error("manager missing");
}
return result.manager as unknown as MemoryIndexManager;
}

View File

@@ -0,0 +1,13 @@
import { vi } from "vitest";
// Unit tests: avoid importing the real chokidar implementation (native fsevents, etc.).
vi.mock("chokidar", () => ({
default: {
watch: () => ({ on: () => {}, close: async () => {} }),
},
watch: () => ({ on: () => {}, close: async () => {} }),
}));
vi.mock("./sqlite-vec.js", () => ({
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
}));