mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-03 06:40:22 +00:00
refactor: move memory engine behind plugin adapters
This commit is contained in:
@@ -1 +1 @@
|
||||
export * from "openclaw/plugin-sdk/memory-core-host";
|
||||
export * from "openclaw/plugin-sdk/memory-core-host-runtime";
|
||||
|
||||
1
extensions/memory-core/src/engine-host-api.ts
Normal file
1
extensions/memory-core/src/engine-host-api.ts
Normal file
@@ -0,0 +1 @@
|
||||
export * from "openclaw/plugin-sdk/memory-core-host-engine";
|
||||
1
extensions/memory-core/src/memory/backend-config.ts
Normal file
1
extensions/memory-core/src/memory/backend-config.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { resolveMemoryBackendConfig } from "../engine-host-api.js";
|
||||
@@ -0,0 +1,138 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, expect, vi, type Mock } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager, MemorySearchManager } from "./index.js";
|
||||
|
||||
type EmbeddingTestMocksModule = typeof import("./embedding.test-mocks.js");
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
type MemorySearchManagerHandle = Awaited<
|
||||
ReturnType<MemoryIndexModule["getMemorySearchManager"]>
|
||||
>["manager"];
|
||||
|
||||
export function installEmbeddingManagerFixture(opts: {
|
||||
fixturePrefix: string;
|
||||
largeTokens: number;
|
||||
smallTokens: number;
|
||||
createCfg: (params: {
|
||||
workspaceDir: string;
|
||||
indexPath: string;
|
||||
tokens: number;
|
||||
}) => OpenClawConfig;
|
||||
resetIndexEachTest?: boolean;
|
||||
}) {
|
||||
const resetIndexEachTest = opts.resetIndexEachTest ?? true;
|
||||
|
||||
let fixtureRoot: string | undefined;
|
||||
let workspaceDir: string | undefined;
|
||||
let memoryDir: string | undefined;
|
||||
let managerLarge: MemoryIndexManager | undefined;
|
||||
let managerSmall: MemoryIndexManager | undefined;
|
||||
let embedBatch: Mock<(texts: string[]) => Promise<number[][]>> | undefined;
|
||||
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
|
||||
let resetEmbeddingMocks: EmbeddingTestMocksModule["resetEmbeddingMocks"];
|
||||
|
||||
const resetManager = (manager: MemoryIndexManager) => {
|
||||
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
};
|
||||
|
||||
const requireValue = <T>(value: T | undefined, name: string): T => {
|
||||
if (!value) {
|
||||
throw new Error(`${name} missing`);
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
const requireIndexManager = (
|
||||
manager: MemorySearchManagerHandle,
|
||||
name: string,
|
||||
): MemoryIndexManager => {
|
||||
if (!manager) {
|
||||
throw new Error(`${name} missing`);
|
||||
}
|
||||
if (!("resetIndex" in manager) || typeof manager.resetIndex !== "function") {
|
||||
throw new Error(`${name} is not a MemoryIndexManager`);
|
||||
}
|
||||
return manager as unknown as MemoryIndexManager;
|
||||
};
|
||||
|
||||
beforeAll(async () => {
|
||||
vi.resetModules();
|
||||
await import("./embedding.test-mocks.js");
|
||||
const embeddingMocks = await import("./embedding.test-mocks.js");
|
||||
embedBatch = embeddingMocks.getEmbedBatchMock();
|
||||
resetEmbeddingMocks = embeddingMocks.resetEmbeddingMocks;
|
||||
({ getMemorySearchManager } = await import("./index.js"));
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), opts.fixturePrefix));
|
||||
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
|
||||
const indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
|
||||
|
||||
const large = await getMemorySearchManager({
|
||||
cfg: opts.createCfg({
|
||||
workspaceDir,
|
||||
indexPath: indexPathLarge,
|
||||
tokens: opts.largeTokens,
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(large.manager).not.toBeNull();
|
||||
managerLarge = requireIndexManager(large.manager, "managerLarge");
|
||||
|
||||
const small = await getMemorySearchManager({
|
||||
cfg: opts.createCfg({
|
||||
workspaceDir,
|
||||
indexPath: indexPathSmall,
|
||||
tokens: opts.smallTokens,
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(small.manager).not.toBeNull();
|
||||
managerSmall = requireIndexManager(small.manager, "managerSmall");
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (managerLarge) {
|
||||
await managerLarge.close();
|
||||
managerLarge = undefined;
|
||||
}
|
||||
if (managerSmall) {
|
||||
await managerSmall.close();
|
||||
managerSmall = undefined;
|
||||
}
|
||||
if (fixtureRoot) {
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
fixtureRoot = undefined;
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
resetEmbeddingMocks();
|
||||
|
||||
const dir = requireValue(memoryDir, "memoryDir");
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
|
||||
if (resetIndexEachTest) {
|
||||
resetManager(requireValue(managerLarge, "managerLarge"));
|
||||
resetManager(requireValue(managerSmall, "managerSmall"));
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
get embedBatch() {
|
||||
return requireValue(embedBatch, "embedBatch");
|
||||
},
|
||||
getFixtureRoot: () => requireValue(fixtureRoot, "fixtureRoot"),
|
||||
getWorkspaceDir: () => requireValue(workspaceDir, "workspaceDir"),
|
||||
getMemoryDir: () => requireValue(memoryDir, "memoryDir"),
|
||||
getManagerLarge: () => requireValue(managerLarge, "managerLarge"),
|
||||
getManagerSmall: () => requireValue(managerSmall, "managerSmall"),
|
||||
resetManager,
|
||||
};
|
||||
}
|
||||
39
extensions/memory-core/src/memory/embedding.test-mocks.ts
Normal file
39
extensions/memory-core/src/memory/embedding.test-mocks.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { vi } from "vitest";
|
||||
import "./test-runtime-mocks.js";
|
||||
|
||||
// Avoid exporting vitest mock types (TS2742 under pnpm + d.ts emit).
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
type AnyMock = any;
|
||||
|
||||
const hoisted = vi.hoisted(() => ({
|
||||
embedBatch: vi.fn(async (texts: string[]) => texts.map(() => [0, 1, 0])),
|
||||
embedQuery: vi.fn(async () => [0, 1, 0]),
|
||||
}));
|
||||
|
||||
export function getEmbedBatchMock(): AnyMock {
|
||||
return hoisted.embedBatch;
|
||||
}
|
||||
|
||||
export function getEmbedQueryMock(): AnyMock {
|
||||
return hoisted.embedQuery;
|
||||
}
|
||||
|
||||
export function resetEmbeddingMocks(): void {
|
||||
hoisted.embedBatch.mockReset();
|
||||
hoisted.embedQuery.mockReset();
|
||||
hoisted.embedBatch.mockImplementation(async (texts: string[]) => texts.map(() => [0, 1, 0]));
|
||||
hoisted.embedQuery.mockImplementation(async () => [0, 1, 0]);
|
||||
}
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
createEmbeddingProvider: async () => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
maxInputTokens: 8192,
|
||||
embedQuery: hoisted.embedQuery,
|
||||
embedBatch: hoisted.embedBatch,
|
||||
},
|
||||
}),
|
||||
}));
|
||||
1
extensions/memory-core/src/memory/embeddings-ollama.ts
Normal file
1
extensions/memory-core/src/memory/embeddings-ollama.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { DEFAULT_OLLAMA_EMBEDDING_MODEL } from "./embeddings.js";
|
||||
175
extensions/memory-core/src/memory/embeddings.ts
Normal file
175
extensions/memory-core/src/memory/embeddings.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
import {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
getMemoryEmbeddingProvider,
|
||||
listMemoryEmbeddingProviders,
|
||||
type MemoryEmbeddingProvider,
|
||||
type MemoryEmbeddingProviderAdapter,
|
||||
type MemoryEmbeddingProviderCreateOptions,
|
||||
type MemoryEmbeddingProviderRuntime,
|
||||
} from "../engine-host-api.js";
|
||||
import { canAutoSelectLocal } from "./provider-adapters.js";
|
||||
|
||||
export {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
} from "../engine-host-api.js";
|
||||
|
||||
export type EmbeddingProvider = MemoryEmbeddingProvider;
|
||||
export type EmbeddingProviderId = string;
|
||||
export type EmbeddingProviderRequest = string;
|
||||
export type EmbeddingProviderFallback = string;
|
||||
export type EmbeddingProviderRuntime = MemoryEmbeddingProviderRuntime;
|
||||
|
||||
export type EmbeddingProviderResult = {
|
||||
provider: EmbeddingProvider | null;
|
||||
requestedProvider: EmbeddingProviderRequest;
|
||||
fallbackFrom?: string;
|
||||
fallbackReason?: string;
|
||||
providerUnavailableReason?: string;
|
||||
runtime?: EmbeddingProviderRuntime;
|
||||
};
|
||||
|
||||
type CreateEmbeddingProviderOptions = MemoryEmbeddingProviderCreateOptions & {
|
||||
provider: EmbeddingProviderRequest;
|
||||
fallback: EmbeddingProviderFallback;
|
||||
};
|
||||
|
||||
function formatErrorMessage(err: unknown): string {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
|
||||
function formatProviderError(adapter: MemoryEmbeddingProviderAdapter, err: unknown): string {
|
||||
return adapter.formatSetupError?.(err) ?? formatErrorMessage(err);
|
||||
}
|
||||
|
||||
function shouldContinueAutoSelection(
|
||||
adapter: MemoryEmbeddingProviderAdapter,
|
||||
err: unknown,
|
||||
): boolean {
|
||||
return adapter.shouldContinueAutoSelection?.(err) ?? false;
|
||||
}
|
||||
|
||||
function getAdapter(id: string): MemoryEmbeddingProviderAdapter {
|
||||
const adapter = getMemoryEmbeddingProvider(id);
|
||||
if (!adapter) {
|
||||
throw new Error(`Unknown memory embedding provider: ${id}`);
|
||||
}
|
||||
return adapter;
|
||||
}
|
||||
|
||||
function listAutoSelectAdapters(
|
||||
options: CreateEmbeddingProviderOptions,
|
||||
): MemoryEmbeddingProviderAdapter[] {
|
||||
return listMemoryEmbeddingProviders()
|
||||
.filter((adapter) => typeof adapter.autoSelectPriority === "number")
|
||||
.filter((adapter) =>
|
||||
adapter.id === "local" ? canAutoSelectLocal(options.local?.modelPath) : true,
|
||||
)
|
||||
.toSorted(
|
||||
(a, b) =>
|
||||
(a.autoSelectPriority ?? Number.MAX_SAFE_INTEGER) -
|
||||
(b.autoSelectPriority ?? Number.MAX_SAFE_INTEGER),
|
||||
);
|
||||
}
|
||||
|
||||
function resolveProviderModel(
|
||||
adapter: MemoryEmbeddingProviderAdapter,
|
||||
requestedModel: string,
|
||||
): string {
|
||||
const trimmed = requestedModel.trim();
|
||||
if (trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
return adapter.defaultModel ?? "";
|
||||
}
|
||||
|
||||
async function createWithAdapter(
|
||||
adapter: MemoryEmbeddingProviderAdapter,
|
||||
options: CreateEmbeddingProviderOptions,
|
||||
): Promise<EmbeddingProviderResult> {
|
||||
const result = await adapter.create({
|
||||
...options,
|
||||
model: resolveProviderModel(adapter, options.model),
|
||||
});
|
||||
return {
|
||||
provider: result.provider,
|
||||
requestedProvider: options.provider,
|
||||
runtime: result.runtime,
|
||||
};
|
||||
}
|
||||
|
||||
export async function createEmbeddingProvider(
|
||||
options: CreateEmbeddingProviderOptions,
|
||||
): Promise<EmbeddingProviderResult> {
|
||||
if (options.provider === "auto") {
|
||||
const reasons: string[] = [];
|
||||
for (const adapter of listAutoSelectAdapters(options)) {
|
||||
try {
|
||||
const result = await createWithAdapter(adapter, {
|
||||
...options,
|
||||
provider: adapter.id,
|
||||
});
|
||||
return {
|
||||
...result,
|
||||
requestedProvider: "auto",
|
||||
};
|
||||
} catch (err) {
|
||||
const message = formatProviderError(adapter, err);
|
||||
if (shouldContinueAutoSelection(adapter, err)) {
|
||||
reasons.push(message);
|
||||
continue;
|
||||
}
|
||||
const wrapped = new Error(message) as Error & { cause?: unknown };
|
||||
wrapped.cause = err;
|
||||
throw wrapped;
|
||||
}
|
||||
}
|
||||
return {
|
||||
provider: null,
|
||||
requestedProvider: "auto",
|
||||
providerUnavailableReason:
|
||||
reasons.length > 0 ? reasons.join("\n\n") : "No embeddings provider available.",
|
||||
};
|
||||
}
|
||||
|
||||
const primaryAdapter = getAdapter(options.provider);
|
||||
try {
|
||||
return await createWithAdapter(primaryAdapter, options);
|
||||
} catch (primaryErr) {
|
||||
const reason = formatProviderError(primaryAdapter, primaryErr);
|
||||
if (options.fallback && options.fallback !== "none" && options.fallback !== options.provider) {
|
||||
const fallbackAdapter = getAdapter(options.fallback);
|
||||
try {
|
||||
const fallbackResult = await createWithAdapter(fallbackAdapter, {
|
||||
...options,
|
||||
provider: options.fallback,
|
||||
});
|
||||
return {
|
||||
...fallbackResult,
|
||||
requestedProvider: options.provider,
|
||||
fallbackFrom: options.provider,
|
||||
fallbackReason: reason,
|
||||
};
|
||||
} catch (fallbackErr) {
|
||||
const fallbackReason = formatProviderError(fallbackAdapter, fallbackErr);
|
||||
const wrapped = new Error(
|
||||
`${reason}\n\nFallback to ${options.fallback} failed: ${fallbackReason}`,
|
||||
) as Error & { cause?: unknown };
|
||||
wrapped.cause = primaryErr;
|
||||
throw wrapped;
|
||||
}
|
||||
}
|
||||
const wrapped = new Error(reason) as Error & { cause?: unknown };
|
||||
wrapped.cause = primaryErr;
|
||||
throw wrapped;
|
||||
}
|
||||
}
|
||||
98
extensions/memory-core/src/memory/hybrid.test.ts
Normal file
98
extensions/memory-core/src/memory/hybrid.test.ts
Normal file
@@ -0,0 +1,98 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
|
||||
|
||||
describe("memory hybrid helpers", () => {
|
||||
it("buildFtsQuery tokenizes and AND-joins", () => {
|
||||
expect(buildFtsQuery("hello world")).toBe('"hello" AND "world"');
|
||||
expect(buildFtsQuery("FOO_bar baz-1")).toBe('"FOO_bar" AND "baz" AND "1"');
|
||||
expect(buildFtsQuery("金银价格")).toBe('"金银价格"');
|
||||
expect(buildFtsQuery("価格 2026年")).toBe('"価格" AND "2026年"');
|
||||
expect(buildFtsQuery(" ")).toBeNull();
|
||||
});
|
||||
|
||||
it("bm25RankToScore is monotonic and clamped", () => {
|
||||
expect(bm25RankToScore(0)).toBeCloseTo(1);
|
||||
expect(bm25RankToScore(1)).toBeCloseTo(0.5);
|
||||
expect(bm25RankToScore(10)).toBeLessThan(bm25RankToScore(1));
|
||||
expect(bm25RankToScore(-100)).toBeCloseTo(1, 1);
|
||||
});
|
||||
|
||||
it("bm25RankToScore preserves FTS5 BM25 relevance ordering", () => {
|
||||
const strongest = bm25RankToScore(-4.2);
|
||||
const middle = bm25RankToScore(-2.1);
|
||||
const weakest = bm25RankToScore(-0.5);
|
||||
|
||||
expect(strongest).toBeGreaterThan(middle);
|
||||
expect(middle).toBeGreaterThan(weakest);
|
||||
expect(strongest).not.toBe(middle);
|
||||
expect(middle).not.toBe(weakest);
|
||||
});
|
||||
|
||||
it("mergeHybridResults unions by id and combines weighted scores", async () => {
|
||||
const merged = await mergeHybridResults({
|
||||
vectorWeight: 0.7,
|
||||
textWeight: 0.3,
|
||||
vector: [
|
||||
{
|
||||
id: "a",
|
||||
path: "memory/a.md",
|
||||
startLine: 1,
|
||||
endLine: 2,
|
||||
source: "memory",
|
||||
snippet: "vec-a",
|
||||
vectorScore: 0.9,
|
||||
},
|
||||
],
|
||||
keyword: [
|
||||
{
|
||||
id: "b",
|
||||
path: "memory/b.md",
|
||||
startLine: 3,
|
||||
endLine: 4,
|
||||
source: "memory",
|
||||
snippet: "kw-b",
|
||||
textScore: 1.0,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(merged).toHaveLength(2);
|
||||
const a = merged.find((r) => r.path === "memory/a.md");
|
||||
const b = merged.find((r) => r.path === "memory/b.md");
|
||||
expect(a?.score).toBeCloseTo(0.7 * 0.9);
|
||||
expect(b?.score).toBeCloseTo(0.3 * 1.0);
|
||||
});
|
||||
|
||||
it("mergeHybridResults prefers keyword snippet when ids overlap", async () => {
|
||||
const merged = await mergeHybridResults({
|
||||
vectorWeight: 0.5,
|
||||
textWeight: 0.5,
|
||||
vector: [
|
||||
{
|
||||
id: "a",
|
||||
path: "memory/a.md",
|
||||
startLine: 1,
|
||||
endLine: 2,
|
||||
source: "memory",
|
||||
snippet: "vec-a",
|
||||
vectorScore: 0.2,
|
||||
},
|
||||
],
|
||||
keyword: [
|
||||
{
|
||||
id: "a",
|
||||
path: "memory/a.md",
|
||||
startLine: 1,
|
||||
endLine: 2,
|
||||
source: "memory",
|
||||
snippet: "kw-a",
|
||||
textScore: 1.0,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(merged).toHaveLength(1);
|
||||
expect(merged[0]?.snippet).toBe("kw-a");
|
||||
expect(merged[0]?.score).toBeCloseTo(0.5 * 0.2 + 0.5 * 1.0);
|
||||
});
|
||||
});
|
||||
1331
extensions/memory-core/src/memory/index.test.ts
Normal file
1331
extensions/memory-core/src/memory/index.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@ export type {
|
||||
MemoryEmbeddingProbeResult,
|
||||
MemorySearchManager,
|
||||
MemorySearchResult,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
export {
|
||||
closeAllMemorySearchManagers,
|
||||
getMemorySearchManager,
|
||||
|
||||
1
extensions/memory-core/src/memory/internal.ts
Normal file
1
extensions/memory-core/src/memory/internal.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { buildFileEntry } from "../engine-host-api.js";
|
||||
@@ -1,6 +1,5 @@
|
||||
import fs from "node:fs/promises";
|
||||
import {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildMultimodalChunkForIndexing,
|
||||
chunkMarkdown,
|
||||
createSubsystemLogger,
|
||||
@@ -11,19 +10,12 @@ import {
|
||||
hashText,
|
||||
parseEmbedding,
|
||||
remapChunkLines,
|
||||
runGeminiEmbeddingBatches,
|
||||
runOpenAiEmbeddingBatches,
|
||||
runVoyageEmbeddingBatches,
|
||||
type EmbeddingInput,
|
||||
type GeminiBatchRequest,
|
||||
type MemoryChunk,
|
||||
type MemoryFileEntry,
|
||||
type MemorySource,
|
||||
type OpenAiBatchRequest,
|
||||
type SessionFileEntry,
|
||||
type VoyageBatchRequest,
|
||||
OPENAI_BATCH_ENDPOINT,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
import { MemoryManagerSyncOps } from "./manager-sync-ops.js";
|
||||
|
||||
const VECTOR_TABLE = "chunks_vec";
|
||||
@@ -229,59 +221,67 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
if (!this.provider) {
|
||||
return hashText(JSON.stringify({ provider: "none", model: "fts-only" }));
|
||||
}
|
||||
if (this.provider.id === "openai" && this.openAi) {
|
||||
const entries = Object.entries(this.openAi.headers)
|
||||
.filter(([key]) => key.toLowerCase() !== "authorization")
|
||||
.toSorted(([a], [b]) => a.localeCompare(b))
|
||||
.map(([key, value]) => [key, value]);
|
||||
return hashText(
|
||||
JSON.stringify({
|
||||
provider: "openai",
|
||||
baseUrl: this.openAi.baseUrl,
|
||||
model: this.openAi.model,
|
||||
headers: entries,
|
||||
}),
|
||||
);
|
||||
}
|
||||
if (this.provider.id === "gemini" && this.gemini) {
|
||||
const entries = Object.entries(this.gemini.headers)
|
||||
.filter(([key]) => {
|
||||
const lower = key.toLowerCase();
|
||||
return lower !== "authorization" && lower !== "x-goog-api-key";
|
||||
})
|
||||
.toSorted(([a], [b]) => a.localeCompare(b))
|
||||
.map(([key, value]) => [key, value]);
|
||||
return hashText(
|
||||
JSON.stringify({
|
||||
provider: "gemini",
|
||||
baseUrl: this.gemini.baseUrl,
|
||||
model: this.gemini.model,
|
||||
outputDimensionality: this.gemini.outputDimensionality,
|
||||
headers: entries,
|
||||
}),
|
||||
);
|
||||
if (this.providerRuntime?.cacheKeyData) {
|
||||
return hashText(JSON.stringify(this.providerRuntime.cacheKeyData));
|
||||
}
|
||||
return hashText(JSON.stringify({ provider: this.provider.id, model: this.provider.model }));
|
||||
}
|
||||
|
||||
private buildBatchDebug(source: MemorySource, chunks: MemoryChunk[]) {
|
||||
return (message: string, data?: Record<string, unknown>) =>
|
||||
log.debug(
|
||||
message,
|
||||
data ? { ...data, source, chunks: chunks.length } : { source, chunks: chunks.length },
|
||||
);
|
||||
}
|
||||
|
||||
private async embedChunksWithBatch(
|
||||
chunks: MemoryChunk[],
|
||||
entry: MemoryFileEntry | SessionFileEntry,
|
||||
_entry: MemoryFileEntry | SessionFileEntry,
|
||||
source: MemorySource,
|
||||
): Promise<number[][]> {
|
||||
if (!this.provider) {
|
||||
const batchEmbed = this.providerRuntime?.batchEmbed;
|
||||
if (!this.provider || !batchEmbed) {
|
||||
return this.embedChunksInBatches(chunks);
|
||||
}
|
||||
if (this.provider.id === "openai" && this.openAi) {
|
||||
return this.embedChunksWithOpenAiBatch(chunks, entry, source);
|
||||
if (chunks.length === 0) {
|
||||
return [];
|
||||
}
|
||||
if (this.provider.id === "gemini" && this.gemini) {
|
||||
return this.embedChunksWithGeminiBatch(chunks, entry, source);
|
||||
const { embeddings, missing } = this.collectCachedEmbeddings(chunks);
|
||||
if (missing.length === 0) {
|
||||
return embeddings;
|
||||
}
|
||||
if (this.provider.id === "voyage" && this.voyage) {
|
||||
return this.embedChunksWithVoyageBatch(chunks, entry, source);
|
||||
|
||||
const missingChunks = missing.map((item) => item.chunk);
|
||||
const batchResult = await this.runBatchWithFallback({
|
||||
provider: this.provider.id,
|
||||
run: async () =>
|
||||
await batchEmbed({
|
||||
agentId: this.agentId,
|
||||
chunks: missingChunks,
|
||||
wait: this.batch.wait,
|
||||
concurrency: this.batch.concurrency,
|
||||
pollIntervalMs: this.batch.pollIntervalMs,
|
||||
timeoutMs: this.batch.timeoutMs,
|
||||
debug: this.buildBatchDebug(source, chunks),
|
||||
}),
|
||||
fallback: async () => await this.embedChunksInBatches(chunks),
|
||||
});
|
||||
if (!batchResult) {
|
||||
return this.embedChunksInBatches(chunks);
|
||||
}
|
||||
return this.embedChunksInBatches(chunks);
|
||||
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
||||
for (let index = 0; index < missing.length; index += 1) {
|
||||
const item = missing[index];
|
||||
const embedding = batchResult[index] ?? [];
|
||||
if (!item) {
|
||||
continue;
|
||||
}
|
||||
embeddings[item.index] = embedding;
|
||||
toCache.push({ hash: item.chunk.hash, embedding });
|
||||
}
|
||||
this.upsertEmbeddingCache(toCache);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
private collectCachedEmbeddings(chunks: MemoryChunk[]): {
|
||||
@@ -305,221 +305,6 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
return { embeddings, missing };
|
||||
}
|
||||
|
||||
private buildBatchCustomId(params: {
|
||||
source: MemorySource;
|
||||
entry: MemoryFileEntry | SessionFileEntry;
|
||||
chunk: MemoryChunk;
|
||||
index: number;
|
||||
}): string {
|
||||
return hashText(
|
||||
`${params.source}:${params.entry.path}:${params.chunk.startLine}:${params.chunk.endLine}:${params.chunk.hash}:${params.index}`,
|
||||
);
|
||||
}
|
||||
|
||||
private buildBatchRequests<T extends { custom_id: string }>(params: {
|
||||
missing: Array<{ index: number; chunk: MemoryChunk }>;
|
||||
entry: MemoryFileEntry | SessionFileEntry;
|
||||
source: MemorySource;
|
||||
build: (chunk: MemoryChunk) => Omit<T, "custom_id">;
|
||||
}): { requests: T[]; mapping: Map<string, { index: number; hash: string }> } {
|
||||
const requests: T[] = [];
|
||||
const mapping = new Map<string, { index: number; hash: string }>();
|
||||
|
||||
for (const item of params.missing) {
|
||||
const chunk = item.chunk;
|
||||
const customId = this.buildBatchCustomId({
|
||||
source: params.source,
|
||||
entry: params.entry,
|
||||
chunk,
|
||||
index: item.index,
|
||||
});
|
||||
mapping.set(customId, { index: item.index, hash: chunk.hash });
|
||||
const built = params.build(chunk);
|
||||
requests.push({ custom_id: customId, ...built } as T);
|
||||
}
|
||||
|
||||
return { requests, mapping };
|
||||
}
|
||||
|
||||
private applyBatchEmbeddings(params: {
|
||||
byCustomId: Map<string, number[]>;
|
||||
mapping: Map<string, { index: number; hash: string }>;
|
||||
embeddings: number[][];
|
||||
}): void {
|
||||
const toCache: Array<{ hash: string; embedding: number[] }> = [];
|
||||
for (const [customId, embedding] of params.byCustomId.entries()) {
|
||||
const mapped = params.mapping.get(customId);
|
||||
if (!mapped) {
|
||||
continue;
|
||||
}
|
||||
params.embeddings[mapped.index] = embedding;
|
||||
toCache.push({ hash: mapped.hash, embedding });
|
||||
}
|
||||
this.upsertEmbeddingCache(toCache);
|
||||
}
|
||||
|
||||
private buildEmbeddingBatchRunnerOptions<TRequest>(params: {
|
||||
requests: TRequest[];
|
||||
chunks: MemoryChunk[];
|
||||
source: MemorySource;
|
||||
}): {
|
||||
agentId: string;
|
||||
requests: TRequest[];
|
||||
wait: boolean;
|
||||
concurrency: number;
|
||||
pollIntervalMs: number;
|
||||
timeoutMs: number;
|
||||
debug: (message: string, data?: Record<string, unknown>) => void;
|
||||
} {
|
||||
const { requests, chunks, source } = params;
|
||||
return {
|
||||
agentId: this.agentId,
|
||||
requests,
|
||||
wait: this.batch.wait,
|
||||
concurrency: this.batch.concurrency,
|
||||
pollIntervalMs: this.batch.pollIntervalMs,
|
||||
timeoutMs: this.batch.timeoutMs,
|
||||
debug: (message, data) =>
|
||||
log.debug(
|
||||
message,
|
||||
data ? { ...data, source, chunks: chunks.length } : { source, chunks: chunks.length },
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
private async embedChunksWithProviderBatch<TRequest extends { custom_id: string }>(params: {
|
||||
chunks: MemoryChunk[];
|
||||
entry: MemoryFileEntry | SessionFileEntry;
|
||||
source: MemorySource;
|
||||
provider: "voyage" | "openai" | "gemini";
|
||||
enabled: boolean;
|
||||
buildRequest: (chunk: MemoryChunk) => Omit<TRequest, "custom_id">;
|
||||
runBatch: (runnerOptions: {
|
||||
agentId: string;
|
||||
requests: TRequest[];
|
||||
wait: boolean;
|
||||
concurrency: number;
|
||||
pollIntervalMs: number;
|
||||
timeoutMs: number;
|
||||
debug: (message: string, data?: Record<string, unknown>) => void;
|
||||
}) => Promise<Map<string, number[]> | number[][]>;
|
||||
}): Promise<number[][]> {
|
||||
if (!params.enabled) {
|
||||
return this.embedChunksInBatches(params.chunks);
|
||||
}
|
||||
if (params.chunks.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const { embeddings, missing } = this.collectCachedEmbeddings(params.chunks);
|
||||
if (missing.length === 0) {
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
const { requests, mapping } = this.buildBatchRequests<TRequest>({
|
||||
missing,
|
||||
entry: params.entry,
|
||||
source: params.source,
|
||||
build: params.buildRequest,
|
||||
});
|
||||
const runnerOptions = this.buildEmbeddingBatchRunnerOptions({
|
||||
requests,
|
||||
chunks: params.chunks,
|
||||
source: params.source,
|
||||
});
|
||||
const batchResult = await this.runBatchWithFallback({
|
||||
provider: params.provider,
|
||||
run: async () => await params.runBatch(runnerOptions),
|
||||
fallback: async () => await this.embedChunksInBatches(params.chunks),
|
||||
});
|
||||
if (Array.isArray(batchResult)) {
|
||||
return batchResult;
|
||||
}
|
||||
this.applyBatchEmbeddings({ byCustomId: batchResult, mapping, embeddings });
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
private async embedChunksWithVoyageBatch(
|
||||
chunks: MemoryChunk[],
|
||||
entry: MemoryFileEntry | SessionFileEntry,
|
||||
source: MemorySource,
|
||||
): Promise<number[][]> {
|
||||
const voyage = this.voyage;
|
||||
return await this.embedChunksWithProviderBatch<VoyageBatchRequest>({
|
||||
chunks,
|
||||
entry,
|
||||
source,
|
||||
provider: "voyage",
|
||||
enabled: Boolean(voyage),
|
||||
buildRequest: (chunk) => ({
|
||||
body: { input: chunk.text },
|
||||
}),
|
||||
runBatch: async (runnerOptions) =>
|
||||
await runVoyageEmbeddingBatches({
|
||||
client: voyage!,
|
||||
...runnerOptions,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
private async embedChunksWithOpenAiBatch(
|
||||
chunks: MemoryChunk[],
|
||||
entry: MemoryFileEntry | SessionFileEntry,
|
||||
source: MemorySource,
|
||||
): Promise<number[][]> {
|
||||
const openAi = this.openAi;
|
||||
return await this.embedChunksWithProviderBatch<OpenAiBatchRequest>({
|
||||
chunks,
|
||||
entry,
|
||||
source,
|
||||
provider: "openai",
|
||||
enabled: Boolean(openAi),
|
||||
buildRequest: (chunk) => ({
|
||||
method: "POST",
|
||||
url: OPENAI_BATCH_ENDPOINT,
|
||||
body: {
|
||||
model: openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
|
||||
input: chunk.text,
|
||||
},
|
||||
}),
|
||||
runBatch: async (runnerOptions) =>
|
||||
await runOpenAiEmbeddingBatches({
|
||||
openAi: openAi!,
|
||||
...runnerOptions,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
private async embedChunksWithGeminiBatch(
|
||||
chunks: MemoryChunk[],
|
||||
entry: MemoryFileEntry | SessionFileEntry,
|
||||
source: MemorySource,
|
||||
): Promise<number[][]> {
|
||||
const gemini = this.gemini;
|
||||
if (chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
|
||||
return await this.embedChunksInBatches(chunks);
|
||||
}
|
||||
return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
|
||||
chunks,
|
||||
entry,
|
||||
source,
|
||||
provider: "gemini",
|
||||
enabled: Boolean(gemini),
|
||||
buildRequest: (chunk) => ({
|
||||
request: buildGeminiEmbeddingRequest({
|
||||
input: chunk.embeddingInput ?? { text: chunk.text },
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
modelPath: this.gemini?.modelPath,
|
||||
outputDimensionality: this.gemini?.outputDimensionality,
|
||||
}),
|
||||
}),
|
||||
runBatch: async (runnerOptions) =>
|
||||
await runGeminiEmbeddingBatches({
|
||||
gemini: gemini!,
|
||||
...runnerOptions,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
protected async embedBatchWithRetry(texts: string[]): Promise<number[][]> {
|
||||
if (texts.length === 0) {
|
||||
return [];
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
import { cosineSimilarity, parseEmbedding, truncateUtf16Safe } from "../api.js";
|
||||
import { cosineSimilarity, parseEmbedding, truncateUtf16Safe } from "../engine-host-api.js";
|
||||
|
||||
const vectorToBlob = (embedding: number[]): Buffer =>
|
||||
Buffer.from(new Float32Array(embedding).buffer);
|
||||
|
||||
@@ -5,15 +5,8 @@ import path from "node:path";
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
import chokidar, { FSWatcher } from "chokidar";
|
||||
import {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
buildCaseInsensitiveExtensionGlob,
|
||||
buildFileEntry,
|
||||
classifyMemoryMultimodalPath,
|
||||
createEmbeddingProvider,
|
||||
createSubsystemLogger,
|
||||
ensureDir,
|
||||
ensureMemoryIndexSchema,
|
||||
@@ -22,10 +15,8 @@ import {
|
||||
isFileMissingError,
|
||||
listMemoryFiles,
|
||||
listSessionFilesForAgent,
|
||||
loadSqliteVecExtension,
|
||||
normalizeExtraMemoryPaths,
|
||||
onSessionTranscriptUpdate,
|
||||
requireNodeSqlite,
|
||||
resolveAgentDir,
|
||||
resolveSessionTranscriptsDirForAgent,
|
||||
resolveUserPath,
|
||||
@@ -37,14 +28,22 @@ import {
|
||||
type OpenClawConfig,
|
||||
type ResolvedMemorySearchConfig,
|
||||
type SessionFileEntry,
|
||||
type EmbeddingProvider,
|
||||
type GeminiEmbeddingClient,
|
||||
type MistralEmbeddingClient,
|
||||
type OllamaEmbeddingClient,
|
||||
type OpenAiEmbeddingClient,
|
||||
type VoyageEmbeddingClient,
|
||||
buildSessionEntry,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
import {
|
||||
createEmbeddingProvider,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
type EmbeddingProvider,
|
||||
type EmbeddingProviderId,
|
||||
type EmbeddingProviderRuntime,
|
||||
} from "./embeddings.js";
|
||||
import { buildFileEntry } from "./internal.js";
|
||||
import { loadSqliteVecExtension } from "./sqlite-vec.js";
|
||||
import { requireNodeSqlite } from "./sqlite.js";
|
||||
|
||||
type MemoryIndexMeta = {
|
||||
model: string;
|
||||
@@ -101,12 +100,8 @@ export abstract class MemoryManagerSyncOps {
|
||||
protected abstract readonly workspaceDir: string;
|
||||
protected abstract readonly settings: ResolvedMemorySearchConfig;
|
||||
protected provider: EmbeddingProvider | null = null;
|
||||
protected fallbackFrom?: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
||||
protected openAi?: OpenAiEmbeddingClient;
|
||||
protected gemini?: GeminiEmbeddingClient;
|
||||
protected voyage?: VoyageEmbeddingClient;
|
||||
protected mistral?: MistralEmbeddingClient;
|
||||
protected ollama?: OllamaEmbeddingClient;
|
||||
protected fallbackFrom?: EmbeddingProviderId;
|
||||
protected providerRuntime?: EmbeddingProviderRuntime;
|
||||
protected abstract batch: {
|
||||
enabled: boolean;
|
||||
wait: boolean;
|
||||
@@ -1104,13 +1099,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
timeoutMs: number;
|
||||
} {
|
||||
const batch = this.settings.remote?.batch;
|
||||
const enabled = Boolean(
|
||||
batch?.enabled &&
|
||||
this.provider &&
|
||||
((this.openAi && this.provider.id === "openai") ||
|
||||
(this.gemini && this.provider.id === "gemini") ||
|
||||
(this.voyage && this.provider.id === "voyage")),
|
||||
);
|
||||
const enabled = Boolean(batch?.enabled && this.provider && this.providerRuntime?.batchEmbed);
|
||||
return {
|
||||
enabled,
|
||||
wait: batch?.wait ?? true,
|
||||
@@ -1128,13 +1117,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
if (this.fallbackFrom) {
|
||||
return false;
|
||||
}
|
||||
const fallbackFrom = this.provider.id as
|
||||
| "openai"
|
||||
| "gemini"
|
||||
| "local"
|
||||
| "voyage"
|
||||
| "mistral"
|
||||
| "ollama";
|
||||
const fallbackFrom = this.provider.id as EmbeddingProviderId;
|
||||
|
||||
const fallbackModel =
|
||||
fallback === "gemini"
|
||||
@@ -1163,11 +1146,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
this.fallbackFrom = fallbackFrom;
|
||||
this.fallbackReason = reason;
|
||||
this.provider = fallbackResult.provider;
|
||||
this.openAi = fallbackResult.openAi;
|
||||
this.gemini = fallbackResult.gemini;
|
||||
this.voyage = fallbackResult.voyage;
|
||||
this.mistral = fallbackResult.mistral;
|
||||
this.ollama = fallbackResult.ollama;
|
||||
this.providerRuntime = fallbackResult.runtime;
|
||||
this.providerKey = this.computeProviderKey();
|
||||
this.batch = this.resolveBatchConfig();
|
||||
log.warn(`memory embeddings: switched to fallback provider (${fallback})`, { reason });
|
||||
|
||||
126
extensions/memory-core/src/memory/manager.async-search.test.ts
Normal file
126
extensions/memory-core/src/memory/manager.async-search.test.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
import { closeAllMemorySearchManagers } from "./index.js";
|
||||
import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js";
|
||||
import { createMemoryManagerOrThrow } from "./test-manager.js";
|
||||
|
||||
const embedBatch = vi.fn(async (_input: string[]): Promise<number[][]> => []);
|
||||
const embedQuery = vi.fn(async (_input: string): Promise<number[]> => [0.2, 0.2, 0.2]);
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
createEmbeddingProvider: async (_options: unknown) =>
|
||||
createOpenAIEmbeddingProviderMock({
|
||||
embedQuery: embedQuery as unknown as (input: string) => Promise<number[]>,
|
||||
embedBatch: embedBatch as unknown as (input: string[]) => Promise<number[][]>,
|
||||
}),
|
||||
}));
|
||||
|
||||
describe("memory search async sync", () => {
|
||||
let workspaceDir: string;
|
||||
let indexPath: string;
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
const buildConfig = (): OpenClawConfig =>
|
||||
({
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
store: { path: indexPath },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||
query: { minScore: 0 },
|
||||
remote: { batch: { enabled: false, wait: false } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
}) as OpenClawConfig;
|
||||
|
||||
beforeEach(async () => {
|
||||
await closeAllMemorySearchManagers();
|
||||
embedBatch.mockClear();
|
||||
embedBatch.mockImplementation(async (input: string[]) => input.map(() => [0.2, 0.2, 0.2]));
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-async-"));
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"));
|
||||
await fs.writeFile(path.join(workspaceDir, "memory", "2026-01-07.md"), "hello\n");
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.unstubAllGlobals();
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await closeAllMemorySearchManagers();
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("does not await sync when searching", async () => {
|
||||
const cfg = buildConfig();
|
||||
manager = await createMemoryManagerOrThrow(cfg);
|
||||
|
||||
let releaseSync = () => {};
|
||||
const pending = new Promise<void>((resolve) => {
|
||||
releaseSync = () => resolve();
|
||||
}).finally(() => {
|
||||
(manager as unknown as { syncing: Promise<void> | null }).syncing = null;
|
||||
});
|
||||
const syncMock = vi.fn(async () => {
|
||||
(manager as unknown as { syncing: Promise<void> | null }).syncing = pending;
|
||||
return pending;
|
||||
});
|
||||
(manager as unknown as { sync: () => Promise<void> }).sync = syncMock;
|
||||
|
||||
const activeManager = manager;
|
||||
if (!activeManager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await activeManager.search("hello");
|
||||
expect(syncMock).toHaveBeenCalledTimes(1);
|
||||
releaseSync();
|
||||
await vi.waitFor(() => {
|
||||
expect((manager as unknown as { syncing: Promise<void> | null }).syncing).toBeNull();
|
||||
});
|
||||
}, 300_000);
|
||||
|
||||
it("waits for in-flight search sync during close", async () => {
|
||||
const cfg = buildConfig();
|
||||
manager = await createMemoryManagerOrThrow(cfg);
|
||||
let releaseSync = () => {};
|
||||
const pendingSync = new Promise<void>((resolve) => {
|
||||
releaseSync = () => resolve();
|
||||
}).finally(() => {
|
||||
(manager as unknown as { syncing: Promise<void> | null }).syncing = null;
|
||||
});
|
||||
const syncMock = vi.fn(async () => {
|
||||
(manager as unknown as { syncing: Promise<void> | null }).syncing = pendingSync;
|
||||
return pendingSync;
|
||||
});
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
(manager as unknown as { sync: () => Promise<void> }).sync = syncMock;
|
||||
|
||||
await manager.search("hello");
|
||||
await vi.waitFor(() => {
|
||||
expect((manager as unknown as { syncing: Promise<void> | null }).syncing).toBe(pendingSync);
|
||||
});
|
||||
|
||||
let closed = false;
|
||||
const closePromise = manager.close().then(() => {
|
||||
closed = true;
|
||||
});
|
||||
|
||||
await Promise.resolve();
|
||||
expect(closed).toBe(false);
|
||||
|
||||
releaseSync();
|
||||
await closePromise;
|
||||
manager = null;
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,99 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
|
||||
let shouldFail = false;
|
||||
|
||||
type EmbeddingTestMocksModule = typeof import("./embedding.test-mocks.js");
|
||||
type TestManagerHelpersModule = typeof import("./test-manager-helpers.js");
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
|
||||
describe("memory manager atomic reindex", () => {
|
||||
let fixtureRoot = "";
|
||||
let caseId = 0;
|
||||
let workspaceDir: string;
|
||||
let indexPath: string;
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
let embedBatch: ReturnType<EmbeddingTestMocksModule["getEmbedBatchMock"]>;
|
||||
let resetEmbeddingMocks: EmbeddingTestMocksModule["resetEmbeddingMocks"];
|
||||
let getRequiredMemoryIndexManager: TestManagerHelpersModule["getRequiredMemoryIndexManager"];
|
||||
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
|
||||
|
||||
beforeAll(async () => {
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-atomic-"));
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
const embeddingMocks = await import("./embedding.test-mocks.js");
|
||||
embedBatch = embeddingMocks.getEmbedBatchMock();
|
||||
resetEmbeddingMocks = embeddingMocks.resetEmbeddingMocks;
|
||||
({ getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js"));
|
||||
({ closeAllMemorySearchManagers } = await import("./index.js"));
|
||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "0");
|
||||
resetEmbeddingMocks();
|
||||
shouldFail = false;
|
||||
embedBatch.mockImplementation(async (texts: string[]) => {
|
||||
if (shouldFail) {
|
||||
throw new Error("embedding failure");
|
||||
}
|
||||
return texts.map((_, index) => [index + 1, 0, 0]);
|
||||
});
|
||||
workspaceDir = path.join(fixtureRoot, `case-${caseId++}`);
|
||||
await fs.mkdir(workspaceDir, { recursive: true });
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"));
|
||||
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await closeAllMemorySearchManagers();
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (!fixtureRoot) {
|
||||
return;
|
||||
}
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("keeps the prior index when a full reindex fails", async () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath },
|
||||
cache: { enabled: false },
|
||||
// Perf: keep test indexes to a single chunk to reduce sqlite work.
|
||||
chunking: { tokens: 4000, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
manager = await getRequiredMemoryIndexManager({ cfg, agentId: "main" });
|
||||
|
||||
await manager.sync({ force: true });
|
||||
const beforeStatus = manager.status();
|
||||
expect(beforeStatus.chunks).toBeGreaterThan(0);
|
||||
|
||||
shouldFail = true;
|
||||
await expect(manager.sync({ force: true })).rejects.toThrow("embedding failure");
|
||||
|
||||
const afterStatus = manager.status();
|
||||
expect(afterStatus.chunks).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
330
extensions/memory-core/src/memory/manager.batch.test.ts
Normal file
330
extensions/memory-core/src/memory/manager.batch.test.ts
Normal file
@@ -0,0 +1,330 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { useFastShortTimeouts } from "../../../../test/helpers/fast-short-timeouts.js";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js";
|
||||
import { mockPublicPinnedHostname } from "./test-helpers/ssrf.js";
|
||||
|
||||
type MemoryIndexManager = import("./index.js").MemoryIndexManager;
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
|
||||
const embedBatch = vi.fn(async (_texts: string[]) => [] as number[][]);
|
||||
const embedQuery = vi.fn(async () => [0.5, 0.5, 0.5]);
|
||||
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
|
||||
|
||||
describe("memory indexing with OpenAI batches", () => {
|
||||
let fixtureRoot: string;
|
||||
let workspaceDir: string;
|
||||
let memoryDir: string;
|
||||
let indexPath: string;
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
async function readOpenAIBatchUploadRequests(body: FormData) {
|
||||
let uploadedRequests: Array<{ custom_id?: string }> = [];
|
||||
const entries = body.entries() as IterableIterator<[string, FormDataEntryValue]>;
|
||||
for (const [key, value] of entries) {
|
||||
if (key !== "file") {
|
||||
continue;
|
||||
}
|
||||
const text = typeof value === "string" ? value : await value.text();
|
||||
uploadedRequests = text
|
||||
.split("\n")
|
||||
.filter(Boolean)
|
||||
.map((line: string) => JSON.parse(line) as { custom_id?: string });
|
||||
}
|
||||
return uploadedRequests;
|
||||
}
|
||||
|
||||
function createOpenAIBatchFetchMock(options?: {
|
||||
onCreateBatch?: (ctx: { batchCreates: number }) => Response | Promise<Response>;
|
||||
}) {
|
||||
let uploadedRequests: Array<{ custom_id?: string }> = [];
|
||||
const state = { batchCreates: 0 };
|
||||
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const url =
|
||||
typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
||||
if (url.endsWith("/files")) {
|
||||
const body = init?.body;
|
||||
if (!(body instanceof FormData)) {
|
||||
throw new Error("expected FormData upload");
|
||||
}
|
||||
uploadedRequests = await readOpenAIBatchUploadRequests(body);
|
||||
return new Response(JSON.stringify({ id: "file_1" }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
}
|
||||
if (url.endsWith("/batches")) {
|
||||
state.batchCreates += 1;
|
||||
if (options?.onCreateBatch) {
|
||||
return await options.onCreateBatch({ batchCreates: state.batchCreates });
|
||||
}
|
||||
return new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
}
|
||||
if (url.endsWith("/batches/batch_1")) {
|
||||
return new Response(
|
||||
JSON.stringify({ id: "batch_1", status: "completed", output_file_id: "file_out" }),
|
||||
{ status: 200, headers: { "Content-Type": "application/json" } },
|
||||
);
|
||||
}
|
||||
if (url.endsWith("/files/file_out/content")) {
|
||||
const lines = uploadedRequests.map((request, index) =>
|
||||
JSON.stringify({
|
||||
custom_id: request.custom_id,
|
||||
response: {
|
||||
status_code: 200,
|
||||
body: { data: [{ embedding: [index + 1, 0, 0], index: 0 }] },
|
||||
},
|
||||
}),
|
||||
);
|
||||
return new Response(lines.join("\n"), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/jsonl" },
|
||||
});
|
||||
}
|
||||
throw new Error(`unexpected fetch ${url}`);
|
||||
});
|
||||
return { fetchMock, state };
|
||||
}
|
||||
|
||||
function createBatchCfg(): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
remote: { batch: { enabled: true, wait: true, pollIntervalMs: 1 } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
vi.resetModules();
|
||||
vi.doMock("./embeddings.js", () => ({
|
||||
createEmbeddingProvider: async () =>
|
||||
createOpenAIEmbeddingProviderMock({
|
||||
embedQuery,
|
||||
embedBatch,
|
||||
}),
|
||||
}));
|
||||
await import("./test-runtime-mocks.js");
|
||||
({ getMemorySearchManager } = await import("./index.js"));
|
||||
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-batch-"));
|
||||
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
indexPath = path.join(fixtureRoot, "index.sqlite");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const result = await getMemorySearchManager({ cfg: createBatchCfg(), agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
embedBatch.mockClear();
|
||||
embedQuery.mockClear();
|
||||
embedBatch.mockImplementation(async (texts: string[]) =>
|
||||
texts.map((_text, index) => [index + 1, 0, 0]),
|
||||
);
|
||||
|
||||
await fs.rm(memoryDir, { recursive: true, force: true });
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
// Reuse one manager instance across tests; keep index state isolated.
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
(manager as unknown as { batchFailureCount: number }).batchFailureCount = 0;
|
||||
(manager as unknown as { batchFailureLastError?: string }).batchFailureLastError = undefined;
|
||||
(manager as unknown as { batchFailureLastProvider?: string }).batchFailureLastProvider =
|
||||
undefined;
|
||||
(manager as unknown as { batch: { enabled: boolean } }).batch.enabled = true;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
it("uses OpenAI batch uploads when enabled", async () => {
|
||||
const restoreTimeouts = useFastShortTimeouts();
|
||||
const content = ["hello", "from", "batch"].join("\n\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), content);
|
||||
|
||||
const { fetchMock } = createOpenAIBatchFetchMock();
|
||||
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockPublicPinnedHostname();
|
||||
|
||||
try {
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
const labels: string[] = [];
|
||||
await manager.sync({
|
||||
progress: (update) => {
|
||||
if (update.label) {
|
||||
labels.push(update.label);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const status = manager.status();
|
||||
expect(status.chunks).toBeGreaterThan(0);
|
||||
expect(embedBatch).not.toHaveBeenCalled();
|
||||
expect(fetchMock).toHaveBeenCalled();
|
||||
expect(labels.some((label) => label.toLowerCase().includes("batch"))).toBe(true);
|
||||
} finally {
|
||||
restoreTimeouts();
|
||||
}
|
||||
});
|
||||
|
||||
it("retries OpenAI batch create on transient failures", async () => {
|
||||
const restoreTimeouts = useFastShortTimeouts();
|
||||
const content = ["retry", "the", "batch"].join("\n\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
|
||||
|
||||
const { fetchMock, state } = createOpenAIBatchFetchMock({
|
||||
onCreateBatch: ({ batchCreates }) => {
|
||||
if (batchCreates === 1) {
|
||||
return new Response("upstream connect error", { status: 503 });
|
||||
}
|
||||
return new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockPublicPinnedHostname();
|
||||
|
||||
try {
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await manager.sync({ reason: "test" });
|
||||
|
||||
const status = manager.status();
|
||||
expect(status.chunks).toBeGreaterThan(0);
|
||||
expect(state.batchCreates).toBe(2);
|
||||
} finally {
|
||||
restoreTimeouts();
|
||||
}
|
||||
});
|
||||
|
||||
it("tracks batch failures, resets on success, and disables after repeated failures", async () => {
|
||||
const restoreTimeouts = useFastShortTimeouts();
|
||||
const memoryFile = path.join(memoryDir, "2026-01-09.md");
|
||||
await fs.writeFile(memoryFile, ["flaky", "batch"].join("\n\n"));
|
||||
let mtimeMs = Date.now();
|
||||
const touch = async () => {
|
||||
mtimeMs += 1_000;
|
||||
const date = new Date(mtimeMs);
|
||||
await fs.utimes(memoryFile, date, date);
|
||||
};
|
||||
await touch();
|
||||
|
||||
let mode: "fail" | "ok" = "fail";
|
||||
const { fetchMock } = createOpenAIBatchFetchMock({
|
||||
onCreateBatch: () =>
|
||||
mode === "fail"
|
||||
? new Response("batch failed", { status: 400 })
|
||||
: new Response(JSON.stringify({ id: "batch_1", status: "in_progress" }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
});
|
||||
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
mockPublicPinnedHostname();
|
||||
|
||||
try {
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
|
||||
// First failure: fallback to regular embeddings and increment failure count.
|
||||
await manager.sync({ reason: "test" });
|
||||
expect(embedBatch).toHaveBeenCalled();
|
||||
let status = manager.status();
|
||||
expect(status.batch?.enabled).toBe(true);
|
||||
expect(status.batch?.failures).toBe(1);
|
||||
|
||||
// Success should reset failure count.
|
||||
embedBatch.mockClear();
|
||||
mode = "ok";
|
||||
await fs.writeFile(memoryFile, ["flaky", "batch", "recovery"].join("\n\n"));
|
||||
await touch();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
await manager.sync({ reason: "test" });
|
||||
status = manager.status();
|
||||
expect(status.batch?.enabled).toBe(true);
|
||||
expect(status.batch?.failures).toBe(0);
|
||||
expect(embedBatch).not.toHaveBeenCalled();
|
||||
|
||||
// Two more failures after reset should disable remote batching.
|
||||
await (
|
||||
manager as unknown as {
|
||||
recordBatchFailure: (params: {
|
||||
provider: string;
|
||||
message: string;
|
||||
attempts?: number;
|
||||
forceDisable?: boolean;
|
||||
}) => Promise<unknown>;
|
||||
}
|
||||
).recordBatchFailure({ provider: "openai", message: "batch failed", attempts: 1 });
|
||||
await (
|
||||
manager as unknown as {
|
||||
recordBatchFailure: (params: {
|
||||
provider: string;
|
||||
message: string;
|
||||
attempts?: number;
|
||||
forceDisable?: boolean;
|
||||
}) => Promise<unknown>;
|
||||
}
|
||||
).recordBatchFailure({ provider: "openai", message: "batch failed", attempts: 1 });
|
||||
status = manager.status();
|
||||
expect(status.batch?.enabled).toBe(false);
|
||||
expect(status.batch?.failures).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Once disabled, batch endpoints are skipped and fallback embeddings run directly.
|
||||
const fetchCalls = fetchMock.mock.calls.length;
|
||||
embedBatch.mockClear();
|
||||
await fs.writeFile(memoryFile, ["flaky", "batch", "fallback"].join("\n\n"));
|
||||
await touch();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
await manager.sync({ reason: "test" });
|
||||
expect(fetchMock.mock.calls.length).toBe(fetchCalls);
|
||||
expect(embedBatch).toHaveBeenCalled();
|
||||
} finally {
|
||||
restoreTimeouts();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,143 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { useFastShortTimeouts } from "../../../../test/helpers/fast-short-timeouts.js";
|
||||
import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js";
|
||||
|
||||
const fx = installEmbeddingManagerFixture({
|
||||
fixturePrefix: "openclaw-mem-",
|
||||
largeTokens: 4000,
|
||||
smallTokens: 200,
|
||||
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
chunking: { tokens, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
describe("memory embedding batches", () => {
|
||||
async function expectSyncWithFastTimeouts(manager: {
|
||||
sync: (params: { reason: string }) => Promise<void>;
|
||||
}) {
|
||||
const restoreFastTimeouts = useFastShortTimeouts();
|
||||
try {
|
||||
await manager.sync({ reason: "test" });
|
||||
} finally {
|
||||
restoreFastTimeouts();
|
||||
}
|
||||
}
|
||||
|
||||
it("splits large files across multiple embedding batches", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerLarge = fx.getManagerLarge();
|
||||
// Keep this small but above the embedding batch byte threshold (8k) so we
|
||||
// exercise multi-batch behavior without generating lots of chunks/DB rows.
|
||||
const line = "a".repeat(4200);
|
||||
const content = [line, line].join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content);
|
||||
const updates: Array<{ completed: number; total: number; label?: string }> = [];
|
||||
await managerLarge.sync({
|
||||
progress: (update) => {
|
||||
updates.push(update);
|
||||
},
|
||||
});
|
||||
|
||||
const status = managerLarge.status();
|
||||
const totalTexts = fx.embedBatch.mock.calls.reduce(
|
||||
(sum: number, call: unknown[]) => sum + ((call[0] as string[] | undefined)?.length ?? 0),
|
||||
0,
|
||||
);
|
||||
expect(totalTexts).toBe(status.chunks);
|
||||
expect(fx.embedBatch.mock.calls.length).toBeGreaterThan(1);
|
||||
const inputs: string[] = fx.embedBatch.mock.calls.flatMap(
|
||||
(call: unknown[]) => (call[0] as string[] | undefined) ?? [],
|
||||
);
|
||||
expect(inputs.every((text) => Buffer.byteLength(text, "utf8") <= 8000)).toBe(true);
|
||||
expect(updates.length).toBeGreaterThan(0);
|
||||
expect(updates.some((update) => update.label?.includes("/"))).toBe(true);
|
||||
const last = updates[updates.length - 1];
|
||||
expect(last?.total).toBeGreaterThan(0);
|
||||
expect(last?.completed).toBe(last?.total);
|
||||
});
|
||||
|
||||
it("keeps small files in a single embedding batch", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "b".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content);
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
|
||||
expect(fx.embedBatch.mock.calls.length).toBe(1);
|
||||
});
|
||||
|
||||
it("retries embeddings on transient rate limit and 5xx errors", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "d".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content);
|
||||
|
||||
const transientErrors = [
|
||||
"openai embeddings failed: 429 rate limit",
|
||||
"openai embeddings failed: 502 Bad Gateway (cloudflare)",
|
||||
];
|
||||
let calls = 0;
|
||||
fx.embedBatch.mockImplementation(async (texts: string[]) => {
|
||||
calls += 1;
|
||||
const transient = transientErrors[calls - 1];
|
||||
if (transient) {
|
||||
throw new Error(transient);
|
||||
}
|
||||
return texts.map(() => [0, 1, 0]);
|
||||
});
|
||||
|
||||
await expectSyncWithFastTimeouts(managerSmall);
|
||||
|
||||
expect(calls).toBe(3);
|
||||
}, 10000);
|
||||
|
||||
it("retries embeddings on too-many-tokens-per-day rate limits", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "e".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
|
||||
|
||||
let calls = 0;
|
||||
fx.embedBatch.mockImplementation(async (texts: string[]) => {
|
||||
calls += 1;
|
||||
if (calls === 1) {
|
||||
throw new Error("AWS Bedrock embeddings failed: Too many tokens per day");
|
||||
}
|
||||
return texts.map(() => [0, 1, 0]);
|
||||
});
|
||||
|
||||
await expectSyncWithFastTimeouts(managerSmall);
|
||||
|
||||
expect(calls).toBe(2);
|
||||
}, 10000);
|
||||
|
||||
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n");
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
|
||||
const inputs = fx.embedBatch.mock.calls.flatMap(
|
||||
(call: unknown[]) => (call[0] as string[]) ?? [],
|
||||
);
|
||||
expect(inputs).not.toContain("");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,137 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { setTimeout as sleep } from "node:timers/promises";
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import "./test-runtime-mocks.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
type ManagerModule = typeof import("./manager.js");
|
||||
|
||||
const hoisted = vi.hoisted(() => ({
|
||||
providerCreateCalls: 0,
|
||||
providerDelayMs: 0,
|
||||
}));
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
createEmbeddingProvider: async () => {
|
||||
hoisted.providerCreateCalls += 1;
|
||||
if (hoisted.providerDelayMs > 0) {
|
||||
await sleep(hoisted.providerDelayMs);
|
||||
}
|
||||
return {
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
maxInputTokens: 8192,
|
||||
embedQuery: async () => [0, 1, 0],
|
||||
embedBatch: async (texts: string[]) => texts.map(() => [0, 1, 0]),
|
||||
},
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
|
||||
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
|
||||
let closeAllMemoryIndexManagers: ManagerModule["closeAllMemoryIndexManagers"];
|
||||
let RawMemoryIndexManager: ManagerModule["MemoryIndexManager"];
|
||||
|
||||
describe("memory manager cache hydration", () => {
|
||||
let workspaceDir = "";
|
||||
|
||||
beforeAll(async () => {
|
||||
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
|
||||
({ closeAllMemoryIndexManagers, MemoryIndexManager: RawMemoryIndexManager } =
|
||||
await import("./manager.js"));
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
await closeAllMemoryIndexManagers();
|
||||
vi.clearAllMocks();
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-concurrent-"));
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
|
||||
hoisted.providerCreateCalls = 0;
|
||||
hoisted.providerDelayMs = 50;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await closeAllMemorySearchManagers();
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function createMemoryConcurrencyConfig(indexPath: string): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
it("deduplicates concurrent manager creation for the same cache key", async () => {
|
||||
const indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
const cfg = createMemoryConcurrencyConfig(indexPath);
|
||||
|
||||
const results = await Promise.all(
|
||||
Array.from(
|
||||
{ length: 12 },
|
||||
async () => await getMemorySearchManager({ cfg, agentId: "main" }),
|
||||
),
|
||||
);
|
||||
const managers = results
|
||||
.map((result) => result.manager)
|
||||
.filter((manager): manager is MemoryIndexManager => Boolean(manager));
|
||||
|
||||
expect(managers).toHaveLength(12);
|
||||
expect(new Set(managers).size).toBe(1);
|
||||
expect(hoisted.providerCreateCalls).toBe(0);
|
||||
|
||||
await managers[0].close();
|
||||
});
|
||||
|
||||
it("evicts cached managers during global teardown", async () => {
|
||||
const indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
const cfg = createMemoryConcurrencyConfig(indexPath);
|
||||
|
||||
const pendingResult = RawMemoryIndexManager.get({ cfg, agentId: "main" });
|
||||
await closeAllMemoryIndexManagers();
|
||||
const firstManager = await pendingResult;
|
||||
|
||||
const secondManager = await RawMemoryIndexManager.get({ cfg, agentId: "main" });
|
||||
|
||||
expect(firstManager).toBeTruthy();
|
||||
expect(secondManager).toBeTruthy();
|
||||
expect(Object.is(secondManager, firstManager)).toBe(false);
|
||||
expect(hoisted.providerCreateCalls).toBe(0);
|
||||
|
||||
await secondManager?.close?.();
|
||||
});
|
||||
|
||||
it("does not identity-cache status-only managers", async () => {
|
||||
const indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
const cfg = createMemoryConcurrencyConfig(indexPath);
|
||||
|
||||
const first = await RawMemoryIndexManager.get({ cfg, agentId: "main", purpose: "status" });
|
||||
const second = await RawMemoryIndexManager.get({ cfg, agentId: "main", purpose: "status" });
|
||||
|
||||
expect(first).toBeTruthy();
|
||||
expect(second).toBeTruthy();
|
||||
expect(Object.is(second, first)).toBe(false);
|
||||
expect(hoisted.providerCreateCalls).toBe(0);
|
||||
|
||||
await first?.close?.();
|
||||
await second?.close?.();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,217 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import { DEFAULT_OLLAMA_EMBEDDING_MODEL } from "./embeddings-ollama.js";
|
||||
import type {
|
||||
EmbeddingProvider,
|
||||
EmbeddingProviderRuntime,
|
||||
EmbeddingProviderResult,
|
||||
} from "./embeddings.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
|
||||
const { createEmbeddingProviderMock } = vi.hoisted(() => ({
|
||||
createEmbeddingProviderMock: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("./embeddings.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("./embeddings.js")>();
|
||||
return {
|
||||
...actual,
|
||||
createEmbeddingProvider: createEmbeddingProviderMock,
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("./sqlite-vec.js", () => ({
|
||||
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
|
||||
}));
|
||||
|
||||
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
|
||||
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
|
||||
|
||||
async function ensureProviderInitialized(manager: MemoryIndexManager): Promise<void> {
|
||||
await (
|
||||
manager as unknown as {
|
||||
ensureProviderInitialized: () => Promise<void>;
|
||||
}
|
||||
).ensureProviderInitialized();
|
||||
}
|
||||
|
||||
function createProvider(id: string): EmbeddingProvider {
|
||||
return {
|
||||
id,
|
||||
model: `${id}-model`,
|
||||
embedQuery: async () => [0.1, 0.2, 0.3],
|
||||
embedBatch: async (texts: string[]) => texts.map(() => [0.1, 0.2, 0.3]),
|
||||
};
|
||||
}
|
||||
|
||||
function buildConfig(params: {
|
||||
workspaceDir: string;
|
||||
indexPath: string;
|
||||
provider: "openai" | "mistral";
|
||||
fallback?: "none" | "mistral" | "ollama";
|
||||
}): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: params.workspaceDir,
|
||||
memorySearch: {
|
||||
provider: params.provider,
|
||||
model: params.provider === "mistral" ? "mistral/mistral-embed" : "text-embedding-3-small",
|
||||
fallback: params.fallback ?? "none",
|
||||
store: { path: params.indexPath, vector: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
describe("memory manager mistral provider wiring", () => {
|
||||
let workspaceDir = "";
|
||||
let indexPath = "";
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
|
||||
vi.clearAllMocks();
|
||||
createEmbeddingProviderMock.mockReset();
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-mistral-"));
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "test");
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await closeAllMemorySearchManagers();
|
||||
if (workspaceDir) {
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
workspaceDir = "";
|
||||
indexPath = "";
|
||||
}
|
||||
});
|
||||
|
||||
it("stores mistral client when mistral provider is selected", async () => {
|
||||
const mistralRuntime: EmbeddingProviderRuntime = {
|
||||
id: "mistral",
|
||||
cacheKeyData: { provider: "mistral", model: "mistral-embed" },
|
||||
};
|
||||
const providerResult: EmbeddingProviderResult = {
|
||||
requestedProvider: "mistral",
|
||||
provider: createProvider("mistral"),
|
||||
runtime: mistralRuntime,
|
||||
};
|
||||
createEmbeddingProviderMock.mockResolvedValueOnce(providerResult);
|
||||
|
||||
const cfg = buildConfig({ workspaceDir, indexPath, provider: "mistral" });
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
if (!result.manager) {
|
||||
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
await ensureProviderInitialized(manager);
|
||||
|
||||
const internal = manager as unknown as {
|
||||
ensureProviderInitialized: () => Promise<void>;
|
||||
providerRuntime?: EmbeddingProviderRuntime;
|
||||
};
|
||||
await internal.ensureProviderInitialized();
|
||||
expect(internal.providerRuntime).toBe(mistralRuntime);
|
||||
});
|
||||
|
||||
it("stores mistral client after fallback activation", async () => {
|
||||
const openAiRuntime: EmbeddingProviderRuntime = {
|
||||
id: "openai",
|
||||
cacheKeyData: { provider: "openai", model: "text-embedding-3-small" },
|
||||
};
|
||||
const mistralRuntime: EmbeddingProviderRuntime = {
|
||||
id: "mistral",
|
||||
cacheKeyData: { provider: "mistral", model: "mistral-embed" },
|
||||
};
|
||||
createEmbeddingProviderMock.mockResolvedValueOnce({
|
||||
requestedProvider: "openai",
|
||||
provider: createProvider("openai"),
|
||||
runtime: openAiRuntime,
|
||||
} as EmbeddingProviderResult);
|
||||
createEmbeddingProviderMock.mockResolvedValueOnce({
|
||||
requestedProvider: "mistral",
|
||||
provider: createProvider("mistral"),
|
||||
runtime: mistralRuntime,
|
||||
} as EmbeddingProviderResult);
|
||||
|
||||
const cfg = buildConfig({ workspaceDir, indexPath, provider: "openai", fallback: "mistral" });
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
if (!result.manager) {
|
||||
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
await ensureProviderInitialized(manager);
|
||||
const internal = manager as unknown as {
|
||||
ensureProviderInitialized: () => Promise<void>;
|
||||
activateFallbackProvider: (reason: string) => Promise<boolean>;
|
||||
providerRuntime?: EmbeddingProviderRuntime;
|
||||
};
|
||||
|
||||
await internal.ensureProviderInitialized();
|
||||
expect(internal.providerRuntime?.id).toBe("openai");
|
||||
const activated = await internal.activateFallbackProvider("forced test");
|
||||
expect(activated).toBe(true);
|
||||
expect(internal.providerRuntime).toBe(mistralRuntime);
|
||||
});
|
||||
|
||||
it("uses default ollama model when activating ollama fallback", async () => {
|
||||
const openAiRuntime: EmbeddingProviderRuntime = {
|
||||
id: "openai",
|
||||
cacheKeyData: { provider: "openai", model: "text-embedding-3-small" },
|
||||
};
|
||||
const ollamaRuntime: EmbeddingProviderRuntime = {
|
||||
id: "ollama",
|
||||
cacheKeyData: { provider: "ollama", model: DEFAULT_OLLAMA_EMBEDDING_MODEL },
|
||||
};
|
||||
createEmbeddingProviderMock.mockResolvedValueOnce({
|
||||
requestedProvider: "openai",
|
||||
provider: createProvider("openai"),
|
||||
runtime: openAiRuntime,
|
||||
} as EmbeddingProviderResult);
|
||||
createEmbeddingProviderMock.mockResolvedValueOnce({
|
||||
requestedProvider: "ollama",
|
||||
provider: createProvider("ollama"),
|
||||
runtime: ollamaRuntime,
|
||||
} as EmbeddingProviderResult);
|
||||
|
||||
const cfg = buildConfig({ workspaceDir, indexPath, provider: "openai", fallback: "ollama" });
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
if (!result.manager) {
|
||||
throw new Error(`manager missing: ${result.error ?? "no error provided"}`);
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
await ensureProviderInitialized(manager);
|
||||
const internal = manager as unknown as {
|
||||
ensureProviderInitialized: () => Promise<void>;
|
||||
activateFallbackProvider: (reason: string) => Promise<boolean>;
|
||||
providerRuntime?: EmbeddingProviderRuntime;
|
||||
};
|
||||
|
||||
await internal.ensureProviderInitialized();
|
||||
expect(internal.providerRuntime?.id).toBe("openai");
|
||||
const activated = await internal.activateFallbackProvider("forced ollama fallback");
|
||||
expect(activated).toBe(true);
|
||||
expect(internal.providerRuntime).toBe(ollamaRuntime);
|
||||
|
||||
const fallbackCall = createEmbeddingProviderMock.mock.calls[1]?.[0] as
|
||||
| { provider?: string; model?: string }
|
||||
| undefined;
|
||||
expect(fallbackCall?.provider).toBe("ollama");
|
||||
expect(fallbackCall?.model).toBe(DEFAULT_OLLAMA_EMBEDDING_MODEL);
|
||||
});
|
||||
});
|
||||
122
extensions/memory-core/src/memory/manager.read-file.test.ts
Normal file
122
extensions/memory-core/src/memory/manager.read-file.test.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import { resetEmbeddingMocks } from "./embedding.test-mocks.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
import { getRequiredMemoryIndexManager } from "./test-manager-helpers.js";
|
||||
|
||||
function createMemorySearchCfg(options: {
|
||||
workspaceDir: string;
|
||||
indexPath: string;
|
||||
}): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: options.workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: options.indexPath, vector: { enabled: false } },
|
||||
cache: { enabled: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
describe("MemoryIndexManager.readFile", () => {
|
||||
let workspaceDir: string;
|
||||
let indexPath: string;
|
||||
let memoryDir: string;
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
beforeAll(async () => {
|
||||
resetEmbeddingMocks();
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-read-"));
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
manager = await getRequiredMemoryIndexManager({
|
||||
cfg: createMemorySearchCfg({ workspaceDir, indexPath }),
|
||||
agentId: "main",
|
||||
purpose: "status",
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
const entries = await fs.readdir(memoryDir).catch(() => []);
|
||||
await Promise.all(
|
||||
entries.map(async (entry) => {
|
||||
await fs.rm(path.join(memoryDir, entry), { recursive: true, force: true });
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("returns empty text when the requested file does not exist", async () => {
|
||||
const relPath = "memory/2099-01-01.md";
|
||||
const result = await manager!.readFile({ relPath });
|
||||
expect(result).toEqual({ text: "", path: relPath });
|
||||
});
|
||||
|
||||
it("returns content slices when the file exists", async () => {
|
||||
const relPath = "memory/2026-02-20.md";
|
||||
const absPath = path.join(workspaceDir, relPath);
|
||||
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
||||
await fs.writeFile(absPath, ["line 1", "line 2", "line 3"].join("\n"), "utf-8");
|
||||
|
||||
const result = await manager!.readFile({ relPath, from: 2, lines: 1 });
|
||||
expect(result).toEqual({ text: "line 2", path: relPath });
|
||||
});
|
||||
|
||||
it("returns empty text when the requested slice is past EOF", async () => {
|
||||
const relPath = "memory/window.md";
|
||||
const absPath = path.join(workspaceDir, relPath);
|
||||
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
||||
await fs.writeFile(absPath, ["alpha", "beta"].join("\n"), "utf-8");
|
||||
|
||||
const result = await manager!.readFile({ relPath, from: 10, lines: 5 });
|
||||
expect(result).toEqual({ text: "", path: relPath });
|
||||
});
|
||||
|
||||
it("returns empty text when the file disappears after stat", async () => {
|
||||
const relPath = "memory/transient.md";
|
||||
const absPath = path.join(workspaceDir, relPath);
|
||||
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
||||
await fs.writeFile(absPath, "first\nsecond", "utf-8");
|
||||
|
||||
const realReadFile = fs.readFile;
|
||||
let injected = false;
|
||||
const readSpy = vi
|
||||
.spyOn(fs, "readFile")
|
||||
.mockImplementation(async (...args: Parameters<typeof realReadFile>) => {
|
||||
const [target, options] = args;
|
||||
if (!injected && typeof target === "string" && path.resolve(target) === absPath) {
|
||||
injected = true;
|
||||
const err = new Error("missing") as NodeJS.ErrnoException;
|
||||
err.code = "ENOENT";
|
||||
throw err;
|
||||
}
|
||||
return realReadFile(target, options);
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await manager!.readFile({ relPath });
|
||||
expect(result).toEqual({ text: "", path: relPath });
|
||||
} finally {
|
||||
readSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,198 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import { resetEmbeddingMocks } from "./embedding.test-mocks.js";
|
||||
import { MemoryIndexManager } from "./manager.js";
|
||||
import { getRequiredMemoryIndexManager } from "./test-manager-helpers.js";
|
||||
|
||||
type ReadonlyRecoveryHarness = {
|
||||
closed: boolean;
|
||||
syncing: Promise<void> | null;
|
||||
queuedSessionFiles: Set<string>;
|
||||
queuedSessionSync: Promise<void> | null;
|
||||
db: DatabaseSync;
|
||||
vectorReady: Promise<boolean> | null;
|
||||
vector: {
|
||||
enabled: boolean;
|
||||
available: boolean | null;
|
||||
loadError?: string;
|
||||
dims?: number;
|
||||
};
|
||||
readonlyRecoveryAttempts: number;
|
||||
readonlyRecoverySuccesses: number;
|
||||
readonlyRecoveryFailures: number;
|
||||
readonlyRecoveryLastError?: string;
|
||||
ensureProviderInitialized: ReturnType<typeof vi.fn>;
|
||||
enqueueTargetedSessionSync: ReturnType<typeof vi.fn>;
|
||||
runSync: ReturnType<typeof vi.fn>;
|
||||
openDatabase: ReturnType<typeof vi.fn>;
|
||||
ensureSchema: ReturnType<typeof vi.fn>;
|
||||
readMeta: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
describe("memory manager readonly recovery", () => {
|
||||
let workspaceDir = "";
|
||||
let indexPath = "";
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
function createMemoryConfig(): OpenClawConfig {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
cache: { enabled: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
async function createRealManager() {
|
||||
manager = await getRequiredMemoryIndexManager({
|
||||
cfg: createMemoryConfig(),
|
||||
agentId: "main",
|
||||
purpose: "status",
|
||||
});
|
||||
return manager;
|
||||
}
|
||||
|
||||
function createReadonlyRecoveryHarness() {
|
||||
const reopenedClose = vi.fn();
|
||||
const initialClose = vi.fn();
|
||||
const reopenedDb = { close: reopenedClose } as unknown as DatabaseSync;
|
||||
const initialDb = { close: initialClose } as unknown as DatabaseSync;
|
||||
const harness: ReadonlyRecoveryHarness = {
|
||||
closed: false,
|
||||
syncing: null,
|
||||
queuedSessionFiles: new Set<string>(),
|
||||
queuedSessionSync: null,
|
||||
db: initialDb,
|
||||
vectorReady: null,
|
||||
vector: {
|
||||
enabled: false,
|
||||
available: null,
|
||||
loadError: "stale",
|
||||
dims: 123,
|
||||
},
|
||||
readonlyRecoveryAttempts: 0,
|
||||
readonlyRecoverySuccesses: 0,
|
||||
readonlyRecoveryFailures: 0,
|
||||
readonlyRecoveryLastError: undefined,
|
||||
ensureProviderInitialized: vi.fn(async () => {}),
|
||||
enqueueTargetedSessionSync: vi.fn(async () => {}),
|
||||
runSync: vi.fn(),
|
||||
openDatabase: vi.fn(() => reopenedDb),
|
||||
ensureSchema: vi.fn(),
|
||||
readMeta: vi.fn(() => undefined),
|
||||
};
|
||||
Object.setPrototypeOf(harness, MemoryIndexManager.prototype);
|
||||
return {
|
||||
harness,
|
||||
initialDb,
|
||||
initialClose,
|
||||
reopenedDb,
|
||||
reopenedClose,
|
||||
};
|
||||
}
|
||||
|
||||
function expectReadonlyRecoveryStatus(
|
||||
instance: {
|
||||
readonlyRecoveryAttempts: number;
|
||||
readonlyRecoverySuccesses: number;
|
||||
readonlyRecoveryFailures: number;
|
||||
readonlyRecoveryLastError?: string;
|
||||
},
|
||||
lastError: string,
|
||||
) {
|
||||
expect({
|
||||
attempts: instance.readonlyRecoveryAttempts,
|
||||
successes: instance.readonlyRecoverySuccesses,
|
||||
failures: instance.readonlyRecoveryFailures,
|
||||
lastError: instance.readonlyRecoveryLastError,
|
||||
}).toEqual({
|
||||
attempts: 1,
|
||||
successes: 1,
|
||||
failures: 0,
|
||||
lastError,
|
||||
});
|
||||
}
|
||||
|
||||
async function expectReadonlyRetry(params: { firstError: unknown; expectedLastError: string }) {
|
||||
const { harness, initialClose } = createReadonlyRecoveryHarness();
|
||||
harness.runSync.mockRejectedValueOnce(params.firstError).mockResolvedValueOnce(undefined);
|
||||
|
||||
await MemoryIndexManager.prototype.sync.call(harness as unknown as MemoryIndexManager, {
|
||||
reason: "test",
|
||||
});
|
||||
|
||||
expect(harness.runSync).toHaveBeenCalledTimes(2);
|
||||
expect(harness.openDatabase).toHaveBeenCalledTimes(1);
|
||||
expect(initialClose).toHaveBeenCalledTimes(1);
|
||||
expectReadonlyRecoveryStatus(harness, params.expectedLastError);
|
||||
}
|
||||
|
||||
beforeEach(async () => {
|
||||
resetEmbeddingMocks();
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-readonly-"));
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||
await fs.writeFile(path.join(workspaceDir, "MEMORY.md"), "Hello memory.");
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.restoreAllMocks();
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("reopens sqlite and retries once when sync hits SQLITE_READONLY", async () => {
|
||||
await expectReadonlyRetry({
|
||||
firstError: new Error("attempt to write a readonly database"),
|
||||
expectedLastError: "attempt to write a readonly database",
|
||||
});
|
||||
});
|
||||
|
||||
it("reopens sqlite and retries when readonly appears in error code", async () => {
|
||||
await expectReadonlyRetry({
|
||||
firstError: { message: "write failed", code: "SQLITE_READONLY" },
|
||||
expectedLastError: "write failed",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not retry non-readonly sync errors", async () => {
|
||||
const { harness, initialClose } = createReadonlyRecoveryHarness();
|
||||
harness.runSync.mockRejectedValueOnce(new Error("embedding timeout"));
|
||||
|
||||
await expect(
|
||||
MemoryIndexManager.prototype.sync.call(harness as unknown as MemoryIndexManager, {
|
||||
reason: "test",
|
||||
}),
|
||||
).rejects.toThrow("embedding timeout");
|
||||
expect(harness.runSync).toHaveBeenCalledTimes(1);
|
||||
expect(harness.openDatabase).not.toHaveBeenCalled();
|
||||
expect(initialClose).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("sets busy_timeout on memory sqlite connections", async () => {
|
||||
const currentManager = await createRealManager();
|
||||
const db = (currentManager as unknown as { db: DatabaseSync }).db;
|
||||
const row = db.prepare("PRAGMA busy_timeout").get() as
|
||||
| { busy_timeout?: number; timeout?: number }
|
||||
| undefined;
|
||||
const busyTimeout = row?.busy_timeout ?? row?.timeout;
|
||||
expect(busyTimeout).toBe(5000);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,33 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { runDetachedMemorySync } from "./manager-sync-ops.js";
|
||||
|
||||
describe("memory manager sync failures", () => {
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("does not raise unhandledRejection when watch-triggered sync fails", async () => {
|
||||
const unhandled: unknown[] = [];
|
||||
const handler = (reason: unknown) => {
|
||||
unhandled.push(reason);
|
||||
};
|
||||
process.on("unhandledRejection", handler);
|
||||
const syncSpy = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("openai embeddings failed: 400 bad request"));
|
||||
setTimeout(() => {
|
||||
runDetachedMemorySync(syncSpy, "watch");
|
||||
}, 1);
|
||||
|
||||
await vi.runOnlyPendingTimersAsync();
|
||||
vi.useRealTimers();
|
||||
await syncSpy.mock.results[0]?.value?.catch(() => undefined);
|
||||
|
||||
process.off("unhandledRejection", handler);
|
||||
expect(unhandled).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -1,15 +1,6 @@
|
||||
import type { DatabaseSync } from "node:sqlite";
|
||||
import { type FSWatcher } from "chokidar";
|
||||
import {
|
||||
createEmbeddingProvider,
|
||||
type EmbeddingProvider,
|
||||
type EmbeddingProviderRequest,
|
||||
type EmbeddingProviderResult,
|
||||
type GeminiEmbeddingClient,
|
||||
type MistralEmbeddingClient,
|
||||
type OllamaEmbeddingClient,
|
||||
type OpenAiEmbeddingClient,
|
||||
type VoyageEmbeddingClient,
|
||||
extractKeywords,
|
||||
readMemoryFile,
|
||||
resolveAgentDir,
|
||||
@@ -25,7 +16,15 @@ import {
|
||||
type OpenClawConfig,
|
||||
type ResolvedMemorySearchConfig,
|
||||
createSubsystemLogger,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
import {
|
||||
createEmbeddingProvider,
|
||||
type EmbeddingProvider,
|
||||
type EmbeddingProviderId,
|
||||
type EmbeddingProviderRequest,
|
||||
type EmbeddingProviderResult,
|
||||
type EmbeddingProviderRuntime,
|
||||
} from "./embeddings.js";
|
||||
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
|
||||
import { MemoryManagerEmbeddingOps } from "./manager-embedding-ops.js";
|
||||
import { searchKeyword, searchVector } from "./manager-search.js";
|
||||
@@ -83,14 +82,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
private readonly requestedProvider: EmbeddingProviderRequest;
|
||||
private providerInitPromise: Promise<void> | null = null;
|
||||
private providerInitialized = false;
|
||||
protected fallbackFrom?: "openai" | "local" | "gemini" | "voyage" | "mistral" | "ollama";
|
||||
protected fallbackFrom?: EmbeddingProviderId;
|
||||
protected fallbackReason?: string;
|
||||
private providerUnavailableReason?: string;
|
||||
protected openAi?: OpenAiEmbeddingClient;
|
||||
protected gemini?: GeminiEmbeddingClient;
|
||||
protected voyage?: VoyageEmbeddingClient;
|
||||
protected mistral?: MistralEmbeddingClient;
|
||||
protected ollama?: OllamaEmbeddingClient;
|
||||
protected providerRuntime?: EmbeddingProviderRuntime;
|
||||
protected batch: {
|
||||
enabled: boolean;
|
||||
wait: boolean;
|
||||
@@ -270,11 +265,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
this.fallbackFrom = providerResult.fallbackFrom;
|
||||
this.fallbackReason = providerResult.fallbackReason;
|
||||
this.providerUnavailableReason = providerResult.providerUnavailableReason;
|
||||
this.openAi = providerResult.openAi;
|
||||
this.gemini = providerResult.gemini;
|
||||
this.voyage = providerResult.voyage;
|
||||
this.mistral = providerResult.mistral;
|
||||
this.ollama = providerResult.ollama;
|
||||
this.providerRuntime = providerResult.runtime;
|
||||
this.providerInitialized = true;
|
||||
}
|
||||
|
||||
|
||||
138
extensions/memory-core/src/memory/manager.vector-dedupe.test.ts
Normal file
138
extensions/memory-core/src/memory/manager.vector-dedupe.test.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
|
||||
vi.mock("./embeddings.js", () => {
|
||||
return {
|
||||
createEmbeddingProvider: async () => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
embedQuery: async () => [0.1, 0.2, 0.3],
|
||||
embedBatch: async (texts: string[]) => texts.map((_, index) => [index + 1, 0, 0]),
|
||||
},
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
type MemoryInternalModule = typeof import("./internal.js");
|
||||
type TestManagerModule = typeof import("./test-manager.js");
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
|
||||
let buildFileEntry: MemoryInternalModule["buildFileEntry"];
|
||||
let createMemoryManagerOrThrow: TestManagerModule["createMemoryManagerOrThrow"];
|
||||
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
|
||||
|
||||
async function ensureProviderInitialized(manager: MemoryIndexManager): Promise<void> {
|
||||
await (
|
||||
manager as unknown as {
|
||||
ensureProviderInitialized: () => Promise<void>;
|
||||
}
|
||||
).ensureProviderInitialized();
|
||||
}
|
||||
|
||||
describe("memory vector dedupe", () => {
|
||||
let workspaceDir: string;
|
||||
let indexPath: string;
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
|
||||
async function seedMemoryWorkspace(rootDir: string) {
|
||||
await fs.mkdir(path.join(rootDir, "memory"));
|
||||
await fs.writeFile(path.join(rootDir, "MEMORY.md"), "Hello memory.");
|
||||
}
|
||||
|
||||
async function closeManagerIfOpen() {
|
||||
if (!manager) {
|
||||
return;
|
||||
}
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
({ buildFileEntry } = await import("./internal.js"));
|
||||
({ createMemoryManagerOrThrow } = await import("./test-manager.js"));
|
||||
({ closeAllMemorySearchManagers } = await import("./index.js"));
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-"));
|
||||
indexPath = path.join(workspaceDir, "index.sqlite");
|
||||
await seedMemoryWorkspace(workspaceDir);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await closeManagerIfOpen();
|
||||
await closeAllMemorySearchManagers();
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("deletes existing vector rows before inserting replacements", async () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: true } },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
cache: { enabled: false },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
manager = await createMemoryManagerOrThrow(cfg);
|
||||
await ensureProviderInitialized(manager);
|
||||
|
||||
const db = (
|
||||
manager as unknown as {
|
||||
db: { exec: (sql: string) => void; prepare: (sql: string) => unknown };
|
||||
}
|
||||
).db;
|
||||
db.exec("CREATE TABLE IF NOT EXISTS chunks_vec (id TEXT PRIMARY KEY, embedding BLOB)");
|
||||
|
||||
(
|
||||
manager as unknown as { ensureVectorReady: (dims?: number) => Promise<boolean> }
|
||||
).ensureVectorReady = async () => true;
|
||||
await (
|
||||
manager as unknown as { ensureProviderInitialized: () => Promise<void> }
|
||||
).ensureProviderInitialized();
|
||||
|
||||
const entry = await buildFileEntry(path.join(workspaceDir, "MEMORY.md"), workspaceDir);
|
||||
if (!entry) {
|
||||
throw new Error("entry missing");
|
||||
}
|
||||
await (
|
||||
manager as unknown as {
|
||||
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
|
||||
}
|
||||
).indexFile(entry, { source: "memory" });
|
||||
await (
|
||||
manager as unknown as {
|
||||
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
|
||||
}
|
||||
).indexFile(entry, { source: "memory" });
|
||||
|
||||
db.exec(`
|
||||
CREATE TRIGGER IF NOT EXISTS fail_if_vector_row_not_deleted
|
||||
BEFORE INSERT ON chunks_vec
|
||||
WHEN EXISTS (SELECT 1 FROM chunks_vec WHERE id = NEW.id)
|
||||
BEGIN
|
||||
SELECT RAISE(FAIL, 'vector row not deleted before insert');
|
||||
END;
|
||||
`);
|
||||
|
||||
await expect(
|
||||
(
|
||||
manager as unknown as {
|
||||
indexFile: (entry: unknown, options: { source: "memory" }) => Promise<void>;
|
||||
}
|
||||
).indexFile(entry, { source: "memory" }),
|
||||
).resolves.toBeUndefined();
|
||||
});
|
||||
});
|
||||
159
extensions/memory-core/src/memory/manager.watcher-config.test.ts
Normal file
159
extensions/memory-core/src/memory/manager.watcher-config.test.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemorySearchConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
|
||||
const { watchMock } = vi.hoisted(() => ({
|
||||
watchMock: vi.fn(() => ({
|
||||
on: vi.fn(),
|
||||
close: vi.fn(async () => undefined),
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("chokidar", () => ({
|
||||
default: { watch: watchMock },
|
||||
watch: watchMock,
|
||||
}));
|
||||
|
||||
vi.mock("./sqlite-vec.js", () => ({
|
||||
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
|
||||
}));
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
createEmbeddingProvider: async () => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
embedQuery: async () => [1, 0],
|
||||
embedBatch: async (texts: string[]) => texts.map(() => [1, 0]),
|
||||
},
|
||||
}),
|
||||
}));
|
||||
|
||||
type MemoryIndexModule = typeof import("./index.js");
|
||||
|
||||
let getMemorySearchManager: MemoryIndexModule["getMemorySearchManager"];
|
||||
let closeAllMemorySearchManagers: MemoryIndexModule["closeAllMemorySearchManagers"];
|
||||
|
||||
describe("memory watcher config", () => {
|
||||
let manager: MemoryIndexManager | null = null;
|
||||
let workspaceDir = "";
|
||||
let extraDir = "";
|
||||
|
||||
beforeEach(async () => {
|
||||
vi.resetModules();
|
||||
({ getMemorySearchManager, closeAllMemorySearchManagers } = await import("./index.js"));
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
watchMock.mockClear();
|
||||
if (manager) {
|
||||
await manager.close();
|
||||
manager = null;
|
||||
}
|
||||
await closeAllMemorySearchManagers();
|
||||
if (workspaceDir) {
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true });
|
||||
workspaceDir = "";
|
||||
extraDir = "";
|
||||
}
|
||||
});
|
||||
|
||||
async function setupWatcherWorkspace(seedFile: { name: string; contents: string }) {
|
||||
workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-memory-watch-"));
|
||||
extraDir = path.join(workspaceDir, "extra");
|
||||
await fs.mkdir(path.join(workspaceDir, "memory"), { recursive: true });
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, seedFile.name), seedFile.contents);
|
||||
}
|
||||
|
||||
function createWatcherConfig(overrides?: Partial<MemorySearchConfig>): OpenClawConfig {
|
||||
const defaults: NonNullable<NonNullable<OpenClawConfig["agents"]>["defaults"]> = {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: path.join(workspaceDir, "index.sqlite"), vector: { enabled: false } },
|
||||
sync: { watch: true, watchDebounceMs: 25, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
extraPaths: [extraDir],
|
||||
...overrides,
|
||||
},
|
||||
};
|
||||
return {
|
||||
agents: {
|
||||
defaults,
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
}
|
||||
|
||||
async function expectWatcherManager(cfg: OpenClawConfig) {
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
manager = result.manager as unknown as MemoryIndexManager;
|
||||
}
|
||||
|
||||
it("watches markdown globs and ignores dependency directories", async () => {
|
||||
await setupWatcherWorkspace({ name: "notes.md", contents: "hello" });
|
||||
const cfg = createWatcherConfig();
|
||||
|
||||
await expectWatcherManager(cfg);
|
||||
|
||||
expect(watchMock).toHaveBeenCalledTimes(1);
|
||||
const [watchedPaths, options] = watchMock.mock.calls[0] as unknown as [
|
||||
string[],
|
||||
Record<string, unknown>,
|
||||
];
|
||||
expect(watchedPaths).toEqual(
|
||||
expect.arrayContaining([
|
||||
path.join(workspaceDir, "MEMORY.md"),
|
||||
path.join(workspaceDir, "memory.md"),
|
||||
path.join(workspaceDir, "memory", "**", "*.md"),
|
||||
path.join(extraDir, "**", "*.md"),
|
||||
]),
|
||||
);
|
||||
expect(options.ignoreInitial).toBe(true);
|
||||
expect(options.awaitWriteFinish).toEqual({ stabilityThreshold: 25, pollInterval: 100 });
|
||||
|
||||
const ignored = options.ignored as ((watchPath: string) => boolean) | undefined;
|
||||
expect(ignored).toBeTypeOf("function");
|
||||
expect(ignored?.(path.join(workspaceDir, "memory", "node_modules", "pkg", "index.md"))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(ignored?.(path.join(workspaceDir, "memory", ".venv", "lib", "python.md"))).toBe(true);
|
||||
expect(ignored?.(path.join(workspaceDir, "memory", "project", "notes.md"))).toBe(false);
|
||||
});
|
||||
|
||||
it("watches multimodal extensions with case-insensitive globs", async () => {
|
||||
await setupWatcherWorkspace({ name: "PHOTO.PNG", contents: "png" });
|
||||
const cfg = createWatcherConfig({
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
fallback: "none",
|
||||
multimodal: { enabled: true, modalities: ["image", "audio"] },
|
||||
});
|
||||
|
||||
await expectWatcherManager(cfg);
|
||||
|
||||
expect(watchMock).toHaveBeenCalledTimes(1);
|
||||
const [watchedPaths] = watchMock.mock.calls[0] as unknown as [
|
||||
string[],
|
||||
Record<string, unknown>,
|
||||
];
|
||||
expect(watchedPaths).toEqual(
|
||||
expect.arrayContaining([
|
||||
path.join(extraDir, "**", "*.[pP][nN][gG]"),
|
||||
path.join(extraDir, "**", "*.[wW][aA][vV]"),
|
||||
]),
|
||||
);
|
||||
});
|
||||
});
|
||||
390
extensions/memory-core/src/memory/mmr.test.ts
Normal file
390
extensions/memory-core/src/memory/mmr.test.ts
Normal file
@@ -0,0 +1,390 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
tokenize,
|
||||
jaccardSimilarity,
|
||||
textSimilarity,
|
||||
computeMMRScore,
|
||||
mmrRerank,
|
||||
applyMMRToHybridResults,
|
||||
DEFAULT_MMR_CONFIG,
|
||||
type MMRItem,
|
||||
} from "./mmr.js";
|
||||
|
||||
describe("tokenize", () => {
|
||||
it("normalizes, filters, and deduplicates token sets", () => {
|
||||
const cases = [
|
||||
{
|
||||
name: "alphanumeric lowercase",
|
||||
input: "Hello World 123",
|
||||
expected: ["hello", "world", "123"],
|
||||
},
|
||||
{ name: "empty string", input: "", expected: [] },
|
||||
{ name: "special chars only", input: "!@#$%^&*()", expected: [] },
|
||||
{
|
||||
name: "underscores",
|
||||
input: "hello_world test_case",
|
||||
expected: ["hello_world", "test_case"],
|
||||
},
|
||||
{
|
||||
name: "dedupe repeated tokens",
|
||||
input: "hello hello world world",
|
||||
expected: ["hello", "world"],
|
||||
},
|
||||
] as const;
|
||||
|
||||
for (const testCase of cases) {
|
||||
expect(tokenize(testCase.input), testCase.name).toEqual(new Set(testCase.expected));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("jaccardSimilarity", () => {
|
||||
it("computes expected scores for overlap edge cases", () => {
|
||||
const cases = [
|
||||
{
|
||||
name: "identical sets",
|
||||
left: new Set(["a", "b", "c"]),
|
||||
right: new Set(["a", "b", "c"]),
|
||||
expected: 1,
|
||||
},
|
||||
{ name: "disjoint sets", left: new Set(["a", "b"]), right: new Set(["c", "d"]), expected: 0 },
|
||||
{ name: "two empty sets", left: new Set<string>(), right: new Set<string>(), expected: 1 },
|
||||
{
|
||||
name: "left non-empty right empty",
|
||||
left: new Set(["a"]),
|
||||
right: new Set<string>(),
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "left empty right non-empty",
|
||||
left: new Set<string>(),
|
||||
right: new Set(["a"]),
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "partial overlap",
|
||||
left: new Set(["a", "b", "c"]),
|
||||
right: new Set(["b", "c", "d"]),
|
||||
expected: 0.5,
|
||||
},
|
||||
] as const;
|
||||
|
||||
for (const testCase of cases) {
|
||||
expect(jaccardSimilarity(testCase.left, testCase.right), testCase.name).toBe(
|
||||
testCase.expected,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("is symmetric", () => {
|
||||
const setA = new Set(["a", "b"]);
|
||||
const setB = new Set(["b", "c"]);
|
||||
expect(jaccardSimilarity(setA, setB)).toBe(jaccardSimilarity(setB, setA));
|
||||
});
|
||||
});
|
||||
|
||||
describe("textSimilarity", () => {
|
||||
it("computes expected text-level similarity cases", () => {
|
||||
const cases = [
|
||||
{ name: "identical", left: "hello world", right: "hello world", expected: 1 },
|
||||
{ name: "same words reordered", left: "hello world", right: "world hello", expected: 1 },
|
||||
{ name: "different text", left: "hello world", right: "foo bar", expected: 0 },
|
||||
{ name: "case insensitive", left: "Hello World", right: "hello world", expected: 1 },
|
||||
] as const;
|
||||
|
||||
for (const testCase of cases) {
|
||||
expect(textSimilarity(testCase.left, testCase.right), testCase.name).toBe(testCase.expected);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeMMRScore", () => {
|
||||
it("balances relevance and diversity across lambda settings", () => {
|
||||
const cases = [
|
||||
{
|
||||
name: "lambda=1 relevance only",
|
||||
relevance: 0.8,
|
||||
similarity: 0.5,
|
||||
lambda: 1,
|
||||
expected: 0.8,
|
||||
},
|
||||
{
|
||||
name: "lambda=0 diversity only",
|
||||
relevance: 0.8,
|
||||
similarity: 0.5,
|
||||
lambda: 0,
|
||||
expected: -0.5,
|
||||
},
|
||||
{ name: "lambda=0.5 mixed", relevance: 0.8, similarity: 0.6, lambda: 0.5, expected: 0.1 },
|
||||
{ name: "default lambda math", relevance: 1.0, similarity: 0.5, lambda: 0.7, expected: 0.55 },
|
||||
] as const;
|
||||
|
||||
for (const testCase of cases) {
|
||||
expect(
|
||||
computeMMRScore(testCase.relevance, testCase.similarity, testCase.lambda),
|
||||
testCase.name,
|
||||
).toBeCloseTo(testCase.expected);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("empty input behavior", () => {
|
||||
it("returns empty array for empty input", () => {
|
||||
expect(mmrRerank([])).toEqual([]);
|
||||
expect(applyMMRToHybridResults([])).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("mmrRerank", () => {
|
||||
describe("edge cases", () => {
|
||||
it("returns single item unchanged", () => {
|
||||
const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
|
||||
expect(mmrRerank(items)).toEqual(items);
|
||||
});
|
||||
|
||||
it("returns copy, not original array", () => {
|
||||
const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
|
||||
const result = mmrRerank(items);
|
||||
expect(result).not.toBe(items);
|
||||
});
|
||||
|
||||
it("returns items unchanged when disabled", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 0.9, content: "hello" },
|
||||
{ id: "2", score: 0.8, content: "hello" },
|
||||
];
|
||||
const result = mmrRerank(items, { enabled: false });
|
||||
expect(result).toEqual(items);
|
||||
});
|
||||
});
|
||||
|
||||
describe("lambda edge cases", () => {
|
||||
const diverseItems: MMRItem[] = [
|
||||
{ id: "1", score: 1.0, content: "apple banana cherry" },
|
||||
{ id: "2", score: 0.9, content: "apple banana date" },
|
||||
{ id: "3", score: 0.8, content: "elderberry fig grape" },
|
||||
];
|
||||
|
||||
it("lambda=1 returns pure relevance order", () => {
|
||||
const result = mmrRerank(diverseItems, { lambda: 1 });
|
||||
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
|
||||
});
|
||||
|
||||
it("lambda=0 maximizes diversity", () => {
|
||||
const result = mmrRerank(diverseItems, { enabled: true, lambda: 0 });
|
||||
// First item is still highest score (no penalty yet)
|
||||
expect(result[0].id).toBe("1");
|
||||
// Second should be most different from first
|
||||
expect(result[1].id).toBe("3"); // elderberry... is most different
|
||||
});
|
||||
|
||||
it("clamps lambda > 1 to 1", () => {
|
||||
const result = mmrRerank(diverseItems, { lambda: 1.5 });
|
||||
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
|
||||
});
|
||||
|
||||
it("clamps lambda < 0 to 0", () => {
|
||||
const result = mmrRerank(diverseItems, { enabled: true, lambda: -0.5 });
|
||||
expect(result[0].id).toBe("1");
|
||||
expect(result[1].id).toBe("3");
|
||||
});
|
||||
});
|
||||
|
||||
describe("diversity behavior", () => {
|
||||
it("promotes diverse results over similar high-scoring ones", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 1.0, content: "machine learning neural networks" },
|
||||
{ id: "2", score: 0.95, content: "machine learning deep learning" },
|
||||
{ id: "3", score: 0.9, content: "database systems sql queries" },
|
||||
{ id: "4", score: 0.85, content: "machine learning algorithms" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { enabled: true, lambda: 0.5 });
|
||||
|
||||
// First is always highest score
|
||||
expect(result[0].id).toBe("1");
|
||||
// Second should be the diverse database item, not another ML item
|
||||
expect(result[1].id).toBe("3");
|
||||
});
|
||||
|
||||
it("handles items with identical content", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 1.0, content: "identical content" },
|
||||
{ id: "2", score: 0.9, content: "identical content" },
|
||||
{ id: "3", score: 0.8, content: "different stuff" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { enabled: true, lambda: 0.5 });
|
||||
expect(result[0].id).toBe("1");
|
||||
// Second should be different, not identical duplicate
|
||||
expect(result[1].id).toBe("3");
|
||||
});
|
||||
|
||||
it("handles all identical content gracefully", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 1.0, content: "same" },
|
||||
{ id: "2", score: 0.9, content: "same" },
|
||||
{ id: "3", score: 0.8, content: "same" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { lambda: 0.7 });
|
||||
// Should still complete without error, order by score as tiebreaker
|
||||
expect(result).toHaveLength(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("tie-breaking", () => {
|
||||
it("uses original score as tiebreaker", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 1.0, content: "unique content one" },
|
||||
{ id: "2", score: 0.9, content: "unique content two" },
|
||||
{ id: "3", score: 0.8, content: "unique content three" },
|
||||
];
|
||||
|
||||
// With very different content and lambda=1, should be pure score order
|
||||
const result = mmrRerank(items, { lambda: 1 });
|
||||
expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
|
||||
});
|
||||
|
||||
it("preserves all items even with same MMR scores", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 0.5, content: "a" },
|
||||
{ id: "2", score: 0.5, content: "b" },
|
||||
{ id: "3", score: 0.5, content: "c" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { lambda: 0.7 });
|
||||
expect(result).toHaveLength(3);
|
||||
expect(new Set(result.map((i) => i.id))).toEqual(new Set(["1", "2", "3"]));
|
||||
});
|
||||
});
|
||||
|
||||
describe("score normalization", () => {
|
||||
it("handles items with same scores", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: 0.5, content: "hello world" },
|
||||
{ id: "2", score: 0.5, content: "foo bar" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { lambda: 0.7 });
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("handles negative scores", () => {
|
||||
const items: MMRItem[] = [
|
||||
{ id: "1", score: -0.5, content: "hello world" },
|
||||
{ id: "2", score: -1.0, content: "foo bar" },
|
||||
];
|
||||
|
||||
const result = mmrRerank(items, { lambda: 0.7 });
|
||||
expect(result).toHaveLength(2);
|
||||
// Higher score (less negative) should come first
|
||||
expect(result[0].id).toBe("1");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("applyMMRToHybridResults", () => {
|
||||
type HybridResult = {
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
score: number;
|
||||
snippet: string;
|
||||
source: string;
|
||||
};
|
||||
|
||||
it("preserves all original fields", () => {
|
||||
const results: HybridResult[] = [
|
||||
{
|
||||
path: "/test/file.ts",
|
||||
startLine: 1,
|
||||
endLine: 10,
|
||||
score: 0.9,
|
||||
snippet: "hello world",
|
||||
source: "memory",
|
||||
},
|
||||
];
|
||||
|
||||
const reranked = applyMMRToHybridResults(results);
|
||||
expect(reranked[0]).toEqual(results[0]);
|
||||
});
|
||||
|
||||
it("creates unique IDs from path and startLine", () => {
|
||||
const results: HybridResult[] = [
|
||||
{
|
||||
path: "/test/a.ts",
|
||||
startLine: 1,
|
||||
endLine: 10,
|
||||
score: 0.9,
|
||||
snippet: "same content here",
|
||||
source: "memory",
|
||||
},
|
||||
{
|
||||
path: "/test/a.ts",
|
||||
startLine: 20,
|
||||
endLine: 30,
|
||||
score: 0.8,
|
||||
snippet: "same content here",
|
||||
source: "memory",
|
||||
},
|
||||
];
|
||||
|
||||
// Should work without ID collision
|
||||
const reranked = applyMMRToHybridResults(results);
|
||||
expect(reranked).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("re-ranks results for diversity", () => {
|
||||
const results: HybridResult[] = [
|
||||
{
|
||||
path: "/a.ts",
|
||||
startLine: 1,
|
||||
endLine: 10,
|
||||
score: 1.0,
|
||||
snippet: "function add numbers together",
|
||||
source: "memory",
|
||||
},
|
||||
{
|
||||
path: "/b.ts",
|
||||
startLine: 1,
|
||||
endLine: 10,
|
||||
score: 0.95,
|
||||
snippet: "function add values together",
|
||||
source: "memory",
|
||||
},
|
||||
{
|
||||
path: "/c.ts",
|
||||
startLine: 1,
|
||||
endLine: 10,
|
||||
score: 0.9,
|
||||
snippet: "database connection pool",
|
||||
source: "memory",
|
||||
},
|
||||
];
|
||||
|
||||
const reranked = applyMMRToHybridResults(results, { enabled: true, lambda: 0.5 });
|
||||
|
||||
// First stays the same (highest score)
|
||||
expect(reranked[0].path).toBe("/a.ts");
|
||||
// Second should be the diverse one
|
||||
expect(reranked[1].path).toBe("/c.ts");
|
||||
});
|
||||
|
||||
it("respects disabled config", () => {
|
||||
const results: HybridResult[] = [
|
||||
{ path: "/a.ts", startLine: 1, endLine: 10, score: 0.9, snippet: "test", source: "memory" },
|
||||
{ path: "/b.ts", startLine: 1, endLine: 10, score: 0.8, snippet: "test", source: "memory" },
|
||||
];
|
||||
|
||||
const reranked = applyMMRToHybridResults(results, { enabled: false });
|
||||
expect(reranked).toEqual(results);
|
||||
});
|
||||
});
|
||||
|
||||
describe("DEFAULT_MMR_CONFIG", () => {
|
||||
it("has expected default values", () => {
|
||||
expect(DEFAULT_MMR_CONFIG.enabled).toBe(false);
|
||||
expect(DEFAULT_MMR_CONFIG.lambda).toBe(0.7);
|
||||
});
|
||||
});
|
||||
359
extensions/memory-core/src/memory/provider-adapters.ts
Normal file
359
extensions/memory-core/src/memory/provider-adapters.ts
Normal file
@@ -0,0 +1,359 @@
|
||||
import fsSync from "node:fs";
|
||||
import {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
OPENAI_BATCH_ENDPOINT,
|
||||
buildGeminiEmbeddingRequest,
|
||||
createGeminiEmbeddingProvider,
|
||||
createLocalEmbeddingProvider,
|
||||
createMistralEmbeddingProvider,
|
||||
createOllamaEmbeddingProvider,
|
||||
createOpenAiEmbeddingProvider,
|
||||
createVoyageEmbeddingProvider,
|
||||
hasNonTextEmbeddingParts,
|
||||
listMemoryEmbeddingProviders,
|
||||
resolveUserPath,
|
||||
runGeminiEmbeddingBatches,
|
||||
runOpenAiEmbeddingBatches,
|
||||
runVoyageEmbeddingBatches,
|
||||
type MemoryEmbeddingProviderAdapter,
|
||||
} from "../engine-host-api.js";
|
||||
|
||||
function formatErrorMessage(err: unknown): string {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
|
||||
function isMissingApiKeyError(err: unknown): boolean {
|
||||
return formatErrorMessage(err).includes("No API key found for provider");
|
||||
}
|
||||
|
||||
function sanitizeHeaders(
|
||||
headers: Record<string, string>,
|
||||
excludedHeaderNames: string[],
|
||||
): Array<[string, string]> {
|
||||
const excluded = new Set(excludedHeaderNames.map((name) => name.toLowerCase()));
|
||||
return Object.entries(headers)
|
||||
.filter(([key]) => !excluded.has(key.toLowerCase()))
|
||||
.toSorted(([a], [b]) => a.localeCompare(b))
|
||||
.map(([key, value]) => [key, value]);
|
||||
}
|
||||
|
||||
function mapBatchEmbeddingsByIndex(byCustomId: Map<string, number[]>, count: number): number[][] {
|
||||
const embeddings: number[][] = [];
|
||||
for (let index = 0; index < count; index += 1) {
|
||||
embeddings.push(byCustomId.get(String(index)) ?? []);
|
||||
}
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
function isNodeLlamaCppMissing(err: unknown): boolean {
|
||||
if (!(err instanceof Error)) {
|
||||
return false;
|
||||
}
|
||||
const code = (err as Error & { code?: unknown }).code;
|
||||
return code === "ERR_MODULE_NOT_FOUND" && err.message.includes("node-llama-cpp");
|
||||
}
|
||||
|
||||
function formatLocalSetupError(err: unknown): string {
|
||||
const detail = formatErrorMessage(err);
|
||||
const missing = isNodeLlamaCppMissing(err);
|
||||
return [
|
||||
"Local embeddings unavailable.",
|
||||
missing
|
||||
? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
|
||||
: detail
|
||||
? `Reason: ${detail}`
|
||||
: undefined,
|
||||
missing && detail ? `Detail: ${detail}` : null,
|
||||
"To enable local embeddings:",
|
||||
"1) Use Node 24 (recommended for installs/updates; Node 22 LTS, currently 22.14+, remains supported)",
|
||||
missing
|
||||
? "2) Reinstall OpenClaw (this should install node-llama-cpp): npm i -g openclaw@latest"
|
||||
: null,
|
||||
"3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
|
||||
...["openai", "gemini", "voyage", "mistral"].map(
|
||||
(provider) => `Or set agents.defaults.memorySearch.provider = "${provider}" (remote).`,
|
||||
),
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function canAutoSelectLocal(modelPath?: string): boolean {
|
||||
const trimmed = modelPath?.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
if (/^(hf:|https?:)/i.test(trimmed)) {
|
||||
return false;
|
||||
}
|
||||
const resolved = resolveUserPath(trimmed);
|
||||
try {
|
||||
return fsSync.statSync(resolved).isFile();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function supportsGeminiMultimodalEmbeddings(model: string): boolean {
|
||||
const normalized = model
|
||||
.trim()
|
||||
.replace(/^models\//, "")
|
||||
.replace(/^(gemini|google)\//, "");
|
||||
return normalized === "gemini-embedding-2-preview";
|
||||
}
|
||||
|
||||
const openAiAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "openai",
|
||||
defaultModel: DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
autoSelectPriority: 20,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
shouldContinueAutoSelection: isMissingApiKeyError,
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createOpenAiEmbeddingProvider({
|
||||
...options,
|
||||
provider: "openai",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "openai",
|
||||
cacheKeyData: {
|
||||
provider: "openai",
|
||||
baseUrl: client.baseUrl,
|
||||
model: client.model,
|
||||
headers: sanitizeHeaders(client.headers, ["authorization"]),
|
||||
},
|
||||
batchEmbed: async (batch) => {
|
||||
const byCustomId = await runOpenAiEmbeddingBatches({
|
||||
openAi: client,
|
||||
agentId: batch.agentId,
|
||||
requests: batch.chunks.map((chunk, index) => ({
|
||||
custom_id: String(index),
|
||||
method: "POST",
|
||||
url: OPENAI_BATCH_ENDPOINT,
|
||||
body: {
|
||||
model: client.model,
|
||||
input: chunk.text,
|
||||
},
|
||||
})),
|
||||
wait: batch.wait,
|
||||
concurrency: batch.concurrency,
|
||||
pollIntervalMs: batch.pollIntervalMs,
|
||||
timeoutMs: batch.timeoutMs,
|
||||
debug: batch.debug,
|
||||
});
|
||||
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const geminiAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "gemini",
|
||||
defaultModel: DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
autoSelectPriority: 30,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
supportsMultimodalEmbeddings: ({ model }) => supportsGeminiMultimodalEmbeddings(model),
|
||||
shouldContinueAutoSelection: isMissingApiKeyError,
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createGeminiEmbeddingProvider({
|
||||
...options,
|
||||
provider: "gemini",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "gemini",
|
||||
cacheKeyData: {
|
||||
provider: "gemini",
|
||||
baseUrl: client.baseUrl,
|
||||
model: client.model,
|
||||
outputDimensionality: client.outputDimensionality,
|
||||
headers: sanitizeHeaders(client.headers, ["authorization", "x-goog-api-key"]),
|
||||
},
|
||||
batchEmbed: async (batch) => {
|
||||
if (batch.chunks.some((chunk) => hasNonTextEmbeddingParts(chunk.embeddingInput))) {
|
||||
return null;
|
||||
}
|
||||
const byCustomId = await runGeminiEmbeddingBatches({
|
||||
gemini: client,
|
||||
agentId: batch.agentId,
|
||||
requests: batch.chunks.map((chunk, index) => ({
|
||||
custom_id: String(index),
|
||||
request: buildGeminiEmbeddingRequest({
|
||||
input: chunk.embeddingInput ?? { text: chunk.text },
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
modelPath: client.modelPath,
|
||||
outputDimensionality: client.outputDimensionality,
|
||||
}),
|
||||
})),
|
||||
wait: batch.wait,
|
||||
concurrency: batch.concurrency,
|
||||
pollIntervalMs: batch.pollIntervalMs,
|
||||
timeoutMs: batch.timeoutMs,
|
||||
debug: batch.debug,
|
||||
});
|
||||
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const voyageAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "voyage",
|
||||
defaultModel: DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
autoSelectPriority: 40,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
shouldContinueAutoSelection: isMissingApiKeyError,
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createVoyageEmbeddingProvider({
|
||||
...options,
|
||||
provider: "voyage",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "voyage",
|
||||
batchEmbed: async (batch) => {
|
||||
const byCustomId = await runVoyageEmbeddingBatches({
|
||||
client,
|
||||
agentId: batch.agentId,
|
||||
requests: batch.chunks.map((chunk, index) => ({
|
||||
custom_id: String(index),
|
||||
body: {
|
||||
input: chunk.text,
|
||||
},
|
||||
})),
|
||||
wait: batch.wait,
|
||||
concurrency: batch.concurrency,
|
||||
pollIntervalMs: batch.pollIntervalMs,
|
||||
timeoutMs: batch.timeoutMs,
|
||||
debug: batch.debug,
|
||||
});
|
||||
return mapBatchEmbeddingsByIndex(byCustomId, batch.chunks.length);
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const mistralAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "mistral",
|
||||
defaultModel: DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
autoSelectPriority: 50,
|
||||
allowExplicitWhenConfiguredAuto: true,
|
||||
shouldContinueAutoSelection: isMissingApiKeyError,
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createMistralEmbeddingProvider({
|
||||
...options,
|
||||
provider: "mistral",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "mistral",
|
||||
cacheKeyData: {
|
||||
provider: "mistral",
|
||||
model: client.model,
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const ollamaAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "ollama",
|
||||
defaultModel: DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
transport: "remote",
|
||||
create: async (options) => {
|
||||
const { provider, client } = await createOllamaEmbeddingProvider({
|
||||
...options,
|
||||
provider: "ollama",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "ollama",
|
||||
cacheKeyData: {
|
||||
provider: "ollama",
|
||||
model: client.model,
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const localAdapter: MemoryEmbeddingProviderAdapter = {
|
||||
id: "local",
|
||||
defaultModel: DEFAULT_LOCAL_MODEL,
|
||||
transport: "local",
|
||||
autoSelectPriority: 10,
|
||||
formatSetupError: formatLocalSetupError,
|
||||
shouldContinueAutoSelection: () => true,
|
||||
create: async (options) => {
|
||||
const provider = await createLocalEmbeddingProvider({
|
||||
...options,
|
||||
provider: "local",
|
||||
fallback: "none",
|
||||
});
|
||||
return {
|
||||
provider,
|
||||
runtime: {
|
||||
id: "local",
|
||||
cacheKeyData: {
|
||||
provider: "local",
|
||||
model: provider.model,
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export const builtinMemoryEmbeddingProviderAdapters = [
|
||||
localAdapter,
|
||||
openAiAdapter,
|
||||
geminiAdapter,
|
||||
voyageAdapter,
|
||||
mistralAdapter,
|
||||
ollamaAdapter,
|
||||
] as const;
|
||||
|
||||
export function registerBuiltInMemoryEmbeddingProviders(register: {
|
||||
registerMemoryEmbeddingProvider: (adapter: MemoryEmbeddingProviderAdapter) => void;
|
||||
}): void {
|
||||
const existingIds = new Set(listMemoryEmbeddingProviders().map((adapter) => adapter.id));
|
||||
for (const adapter of builtinMemoryEmbeddingProviderAdapters) {
|
||||
if (existingIds.has(adapter.id)) {
|
||||
continue;
|
||||
}
|
||||
register.registerMemoryEmbeddingProvider(adapter);
|
||||
}
|
||||
}
|
||||
|
||||
export {
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
DEFAULT_MISTRAL_EMBEDDING_MODEL,
|
||||
DEFAULT_OLLAMA_EMBEDDING_MODEL,
|
||||
DEFAULT_OPENAI_EMBEDDING_MODEL,
|
||||
DEFAULT_VOYAGE_EMBEDDING_MODEL,
|
||||
canAutoSelectLocal,
|
||||
formatLocalSetupError,
|
||||
isMissingApiKeyError,
|
||||
};
|
||||
2833
extensions/memory-core/src/memory/qmd-manager.test.ts
Normal file
2833
extensions/memory-core/src/memory/qmd-manager.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -12,7 +12,6 @@ import {
|
||||
isQmdScopeAllowed,
|
||||
listSessionFilesForAgent,
|
||||
parseQmdQueryJson,
|
||||
requireNodeSqlite,
|
||||
resolveAgentWorkspaceDir,
|
||||
resolveCliSpawnInvocation,
|
||||
resolveGlobalSingleton,
|
||||
@@ -32,7 +31,8 @@ import {
|
||||
type ResolvedQmdMcporterConfig,
|
||||
type SessionFileEntry,
|
||||
writeFileWithinRoot,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
import { requireNodeSqlite } from "./sqlite.js";
|
||||
|
||||
type SqliteDatabase = import("node:sqlite").DatabaseSync;
|
||||
|
||||
|
||||
365
extensions/memory-core/src/memory/search-manager.test.ts
Normal file
365
extensions/memory-core/src/memory/search-manager.test.ts
Normal file
@@ -0,0 +1,365 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
|
||||
function createManagerStatus(params: {
|
||||
backend: "qmd" | "builtin";
|
||||
provider: string;
|
||||
model: string;
|
||||
requestedProvider: string;
|
||||
withMemorySourceCounts?: boolean;
|
||||
}) {
|
||||
const base = {
|
||||
backend: params.backend,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
requestedProvider: params.requestedProvider,
|
||||
files: 0,
|
||||
chunks: 0,
|
||||
dirty: false,
|
||||
workspaceDir: "/tmp",
|
||||
dbPath: "/tmp/index.sqlite",
|
||||
};
|
||||
if (!params.withMemorySourceCounts) {
|
||||
return base;
|
||||
}
|
||||
return {
|
||||
...base,
|
||||
sources: ["memory" as const],
|
||||
sourceCounts: [{ source: "memory" as const, files: 0, chunks: 0 }],
|
||||
};
|
||||
}
|
||||
|
||||
function createManagerMock(params: {
|
||||
backend: "qmd" | "builtin";
|
||||
provider: string;
|
||||
model: string;
|
||||
requestedProvider: string;
|
||||
searchResults?: Array<{
|
||||
path: string;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
score: number;
|
||||
snippet: string;
|
||||
source: "memory";
|
||||
}>;
|
||||
withMemorySourceCounts?: boolean;
|
||||
}) {
|
||||
return {
|
||||
search: vi.fn(async () => params.searchResults ?? []),
|
||||
readFile: vi.fn(async () => ({ text: "", path: "MEMORY.md" })),
|
||||
status: vi.fn(() =>
|
||||
createManagerStatus({
|
||||
backend: params.backend,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
requestedProvider: params.requestedProvider,
|
||||
withMemorySourceCounts: params.withMemorySourceCounts,
|
||||
}),
|
||||
),
|
||||
sync: vi.fn(async () => {}),
|
||||
probeEmbeddingAvailability: vi.fn(async () => ({ ok: true })),
|
||||
probeVectorAvailability: vi.fn(async () => true),
|
||||
close: vi.fn(async () => {}),
|
||||
};
|
||||
}
|
||||
|
||||
const mockPrimary = vi.hoisted(() => ({
|
||||
...createManagerMock({
|
||||
backend: "qmd",
|
||||
provider: "qmd",
|
||||
model: "qmd",
|
||||
requestedProvider: "qmd",
|
||||
withMemorySourceCounts: true,
|
||||
}),
|
||||
}));
|
||||
|
||||
const fallbackManager = vi.hoisted(() => ({
|
||||
...createManagerMock({
|
||||
backend: "builtin",
|
||||
provider: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
requestedProvider: "openai",
|
||||
searchResults: [
|
||||
{
|
||||
path: "MEMORY.md",
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
score: 1,
|
||||
snippet: "fallback",
|
||||
source: "memory",
|
||||
},
|
||||
],
|
||||
}),
|
||||
}));
|
||||
|
||||
const fallbackSearch = fallbackManager.search;
|
||||
const mockMemoryIndexGet = vi.hoisted(() => vi.fn(async () => fallbackManager));
|
||||
const mockCloseAllMemoryIndexManagers = vi.hoisted(() => vi.fn(async () => {}));
|
||||
|
||||
vi.mock("./qmd-manager.js", () => ({
|
||||
QmdMemoryManager: {
|
||||
create: vi.fn(async () => mockPrimary),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("../../extensions/memory-core/src/memory/manager-runtime.js", () => ({
|
||||
MemoryIndexManager: {
|
||||
get: mockMemoryIndexGet,
|
||||
},
|
||||
closeAllMemoryIndexManagers: mockCloseAllMemoryIndexManagers,
|
||||
}));
|
||||
|
||||
import { QmdMemoryManager } from "./qmd-manager.js";
|
||||
import { closeAllMemorySearchManagers, getMemorySearchManager } from "./search-manager.js";
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method -- mocked static function
|
||||
const createQmdManagerMock = vi.mocked(QmdMemoryManager.create);
|
||||
|
||||
type SearchManagerResult = Awaited<ReturnType<typeof getMemorySearchManager>>;
|
||||
type SearchManager = NonNullable<SearchManagerResult["manager"]>;
|
||||
|
||||
function createQmdCfg(agentId: string): OpenClawConfig {
|
||||
return {
|
||||
memory: { backend: "qmd", qmd: {} },
|
||||
agents: { list: [{ id: agentId, default: true, workspace: "/tmp/workspace" }] },
|
||||
};
|
||||
}
|
||||
|
||||
function requireManager(result: SearchManagerResult): SearchManager {
|
||||
expect(result.manager).toBeTruthy();
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
return result.manager;
|
||||
}
|
||||
|
||||
async function createFailedQmdSearchHarness(params: { agentId: string; errorMessage: string }) {
|
||||
const cfg = createQmdCfg(params.agentId);
|
||||
mockPrimary.search.mockRejectedValueOnce(new Error(params.errorMessage));
|
||||
const first = await getMemorySearchManager({ cfg, agentId: params.agentId });
|
||||
return { cfg, manager: requireManager(first), firstResult: first };
|
||||
}
|
||||
|
||||
beforeEach(async () => {
|
||||
await closeAllMemorySearchManagers();
|
||||
mockPrimary.search.mockClear();
|
||||
mockPrimary.readFile.mockClear();
|
||||
mockPrimary.status.mockClear();
|
||||
mockPrimary.sync.mockClear();
|
||||
mockPrimary.probeEmbeddingAvailability.mockClear();
|
||||
mockPrimary.probeVectorAvailability.mockClear();
|
||||
mockPrimary.close.mockClear();
|
||||
fallbackSearch.mockClear();
|
||||
fallbackManager.readFile.mockClear();
|
||||
fallbackManager.status.mockClear();
|
||||
fallbackManager.sync.mockClear();
|
||||
fallbackManager.probeEmbeddingAvailability.mockClear();
|
||||
fallbackManager.probeVectorAvailability.mockClear();
|
||||
fallbackManager.close.mockClear();
|
||||
mockCloseAllMemoryIndexManagers.mockClear();
|
||||
mockMemoryIndexGet.mockClear();
|
||||
mockMemoryIndexGet.mockResolvedValue(fallbackManager);
|
||||
createQmdManagerMock.mockClear();
|
||||
});
|
||||
|
||||
describe("getMemorySearchManager caching", () => {
|
||||
it("reuses the same QMD manager instance for repeated calls", async () => {
|
||||
const cfg = createQmdCfg("main");
|
||||
|
||||
const first = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
const second = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
|
||||
expect(first.manager).toBe(second.manager);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("evicts failed qmd wrapper so next call retries qmd", async () => {
|
||||
const retryAgentId = "retry-agent";
|
||||
const {
|
||||
cfg,
|
||||
manager: firstManager,
|
||||
firstResult: first,
|
||||
} = await createFailedQmdSearchHarness({
|
||||
agentId: retryAgentId,
|
||||
errorMessage: "qmd query failed",
|
||||
});
|
||||
|
||||
const fallbackResults = await firstManager.search("hello");
|
||||
expect(fallbackResults).toHaveLength(1);
|
||||
expect(fallbackResults[0]?.path).toBe("MEMORY.md");
|
||||
|
||||
const second = await getMemorySearchManager({ cfg, agentId: retryAgentId });
|
||||
requireManager(second);
|
||||
expect(second.manager).not.toBe(first.manager);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("does not cache qmd managers for status-only requests", async () => {
|
||||
const agentId = "status-agent";
|
||||
const cfg = createQmdCfg(agentId);
|
||||
|
||||
const first = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
const second = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
|
||||
requireManager(first);
|
||||
requireManager(second);
|
||||
expect(first.manager?.status()).toMatchObject({
|
||||
backend: "qmd",
|
||||
provider: "qmd",
|
||||
model: "qmd",
|
||||
requestedProvider: "qmd",
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
|
||||
expect(mockMemoryIndexGet).not.toHaveBeenCalled();
|
||||
|
||||
await first.manager?.close?.();
|
||||
await second.manager?.close?.();
|
||||
expect(mockPrimary.close).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("reports real qmd index counts for status-only requests", async () => {
|
||||
const agentId = "status-counts-agent";
|
||||
const cfg = createQmdCfg(agentId);
|
||||
mockPrimary.status.mockReturnValueOnce({
|
||||
...createManagerStatus({
|
||||
backend: "qmd",
|
||||
provider: "qmd",
|
||||
model: "qmd",
|
||||
requestedProvider: "qmd",
|
||||
withMemorySourceCounts: true,
|
||||
}),
|
||||
files: 10,
|
||||
chunks: 42,
|
||||
sourceCounts: [{ source: "memory" as const, files: 10, chunks: 42 }],
|
||||
});
|
||||
|
||||
const result = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
const manager = requireManager(result);
|
||||
|
||||
expect(manager.status()).toMatchObject({
|
||||
backend: "qmd",
|
||||
files: 10,
|
||||
chunks: 42,
|
||||
sourceCounts: [{ source: "memory", files: 10, chunks: 42 }],
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ agentId, mode: "status" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("reuses cached full qmd manager for status-only requests", async () => {
|
||||
const agentId = "status-reuses-full-agent";
|
||||
const cfg = createQmdCfg(agentId);
|
||||
|
||||
const full = await getMemorySearchManager({ cfg, agentId });
|
||||
const status = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
|
||||
requireManager(full);
|
||||
requireManager(status);
|
||||
expect(status.manager).not.toBe(full.manager);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(1);
|
||||
await status.manager?.close?.();
|
||||
expect(mockPrimary.close).not.toHaveBeenCalled();
|
||||
|
||||
const fullAgain = await getMemorySearchManager({ cfg, agentId });
|
||||
expect(fullAgain.manager).toBe(full.manager);
|
||||
});
|
||||
|
||||
it("gets a fresh qmd manager for later status requests after close", async () => {
|
||||
const agentId = "status-eviction-agent";
|
||||
const cfg = createQmdCfg(agentId);
|
||||
|
||||
const first = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
const firstManager = requireManager(first);
|
||||
await firstManager.close?.();
|
||||
|
||||
const second = await getMemorySearchManager({ cfg, agentId, purpose: "status" });
|
||||
requireManager(second);
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
|
||||
expect(mockPrimary.close).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("does not evict a newer cached wrapper when closing an older failed wrapper", async () => {
|
||||
const retryAgentId = "retry-agent-close";
|
||||
const {
|
||||
cfg,
|
||||
manager: firstManager,
|
||||
firstResult: first,
|
||||
} = await createFailedQmdSearchHarness({
|
||||
agentId: retryAgentId,
|
||||
errorMessage: "qmd query failed",
|
||||
});
|
||||
await firstManager.search("hello");
|
||||
|
||||
const second = await getMemorySearchManager({ cfg, agentId: retryAgentId });
|
||||
const secondManager = requireManager(second);
|
||||
expect(second.manager).not.toBe(first.manager);
|
||||
|
||||
await firstManager.close?.();
|
||||
|
||||
const third = await getMemorySearchManager({ cfg, agentId: retryAgentId });
|
||||
expect(third.manager).toBe(secondManager);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("falls back to builtin search when qmd fails with sqlite busy", async () => {
|
||||
const retryAgentId = "retry-agent-busy";
|
||||
const { manager: firstManager } = await createFailedQmdSearchHarness({
|
||||
agentId: retryAgentId,
|
||||
errorMessage: "qmd index busy while reading results: SQLITE_BUSY: database is locked",
|
||||
});
|
||||
|
||||
const results = await firstManager.search("hello");
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]?.path).toBe("MEMORY.md");
|
||||
expect(fallbackSearch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("keeps original qmd error when fallback manager initialization fails", async () => {
|
||||
const retryAgentId = "retry-agent-no-fallback-auth";
|
||||
const { manager: firstManager } = await createFailedQmdSearchHarness({
|
||||
agentId: retryAgentId,
|
||||
errorMessage: "qmd query failed",
|
||||
});
|
||||
mockMemoryIndexGet.mockRejectedValueOnce(new Error("No API key found for provider openai"));
|
||||
|
||||
await expect(firstManager.search("hello")).rejects.toThrow("qmd query failed");
|
||||
});
|
||||
|
||||
it("closes cached managers on global teardown", async () => {
|
||||
const cfg = createQmdCfg("teardown-agent");
|
||||
const first = await getMemorySearchManager({ cfg, agentId: "teardown-agent" });
|
||||
const firstManager = requireManager(first);
|
||||
|
||||
await closeAllMemorySearchManagers();
|
||||
|
||||
expect(mockPrimary.close).toHaveBeenCalledTimes(1);
|
||||
expect(mockCloseAllMemoryIndexManagers).toHaveBeenCalledTimes(1);
|
||||
|
||||
const second = await getMemorySearchManager({ cfg, agentId: "teardown-agent" });
|
||||
expect(second.manager).toBeTruthy();
|
||||
expect(second.manager).not.toBe(firstManager);
|
||||
// eslint-disable-next-line @typescript-eslint/unbound-method
|
||||
expect(createQmdManagerMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("closes builtin index managers on teardown after runtime is loaded", async () => {
|
||||
const retryAgentId = "teardown-with-fallback";
|
||||
const { manager } = await createFailedQmdSearchHarness({
|
||||
agentId: retryAgentId,
|
||||
errorMessage: "qmd query failed",
|
||||
});
|
||||
await manager.search("hello");
|
||||
|
||||
await closeAllMemorySearchManagers();
|
||||
|
||||
expect(mockCloseAllMemoryIndexManagers).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
@@ -7,7 +7,7 @@ import {
|
||||
type MemorySyncProgressUpdate,
|
||||
type OpenClawConfig,
|
||||
type ResolvedQmdConfig,
|
||||
} from "../api.js";
|
||||
} from "../engine-host-api.js";
|
||||
|
||||
const MEMORY_SEARCH_MANAGER_CACHE_KEY = Symbol.for("openclaw.memorySearchManagerCache");
|
||||
type MemorySearchManagerCacheStore = {
|
||||
|
||||
1
extensions/memory-core/src/memory/sqlite-vec.ts
Normal file
1
extensions/memory-core/src/memory/sqlite-vec.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { loadSqliteVecExtension } from "../engine-host-api.js";
|
||||
1
extensions/memory-core/src/memory/sqlite.ts
Normal file
1
extensions/memory-core/src/memory/sqlite.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { requireNodeSqlite } from "../engine-host-api.js";
|
||||
173
extensions/memory-core/src/memory/temporal-decay.test.ts
Normal file
173
extensions/memory-core/src/memory/temporal-decay.test.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { mergeHybridResults } from "./hybrid.js";
|
||||
import {
|
||||
applyTemporalDecayToHybridResults,
|
||||
applyTemporalDecayToScore,
|
||||
calculateTemporalDecayMultiplier,
|
||||
} from "./temporal-decay.js";
|
||||
|
||||
const DAY_MS = 24 * 60 * 60 * 1000;
|
||||
const NOW_MS = Date.UTC(2026, 1, 10, 0, 0, 0);
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
||||
async function makeTempDir(): Promise<string> {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-temporal-decay-"));
|
||||
tempDirs.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
function createVectorMemoryEntry(params: {
|
||||
id: string;
|
||||
path: string;
|
||||
snippet: string;
|
||||
vectorScore: number;
|
||||
}) {
|
||||
return {
|
||||
id: params.id,
|
||||
path: params.path,
|
||||
startLine: 1,
|
||||
endLine: 1,
|
||||
source: "memory" as const,
|
||||
snippet: params.snippet,
|
||||
vectorScore: params.vectorScore,
|
||||
};
|
||||
}
|
||||
|
||||
async function mergeVectorResultsWithTemporalDecay(
|
||||
vector: Parameters<typeof mergeHybridResults>[0]["vector"],
|
||||
) {
|
||||
return mergeHybridResults({
|
||||
vectorWeight: 1,
|
||||
textWeight: 0,
|
||||
temporalDecay: { enabled: true, halfLifeDays: 30 },
|
||||
mmr: { enabled: false },
|
||||
nowMs: NOW_MS,
|
||||
vector,
|
||||
keyword: [],
|
||||
});
|
||||
}
|
||||
|
||||
afterEach(async () => {
|
||||
await Promise.all(
|
||||
tempDirs.splice(0).map(async (dir) => {
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
describe("temporal decay", () => {
|
||||
it("matches exponential decay formula", () => {
|
||||
const halfLifeDays = 30;
|
||||
const ageInDays = 10;
|
||||
const lambda = Math.LN2 / halfLifeDays;
|
||||
const expectedMultiplier = Math.exp(-lambda * ageInDays);
|
||||
|
||||
expect(calculateTemporalDecayMultiplier({ ageInDays, halfLifeDays })).toBeCloseTo(
|
||||
expectedMultiplier,
|
||||
);
|
||||
expect(applyTemporalDecayToScore({ score: 0.8, ageInDays, halfLifeDays })).toBeCloseTo(
|
||||
0.8 * expectedMultiplier,
|
||||
);
|
||||
});
|
||||
|
||||
it("is 0.5 exactly at half-life", () => {
|
||||
expect(calculateTemporalDecayMultiplier({ ageInDays: 30, halfLifeDays: 30 })).toBeCloseTo(0.5);
|
||||
});
|
||||
|
||||
it("does not decay evergreen memory files", async () => {
|
||||
const dir = await makeTempDir();
|
||||
|
||||
const rootMemoryPath = path.join(dir, "MEMORY.md");
|
||||
const topicPath = path.join(dir, "memory", "projects.md");
|
||||
await fs.mkdir(path.dirname(topicPath), { recursive: true });
|
||||
await fs.writeFile(rootMemoryPath, "evergreen");
|
||||
await fs.writeFile(topicPath, "topic evergreen");
|
||||
|
||||
const veryOld = new Date(Date.UTC(2010, 0, 1));
|
||||
await fs.utimes(rootMemoryPath, veryOld, veryOld);
|
||||
await fs.utimes(topicPath, veryOld, veryOld);
|
||||
|
||||
const decayed = await applyTemporalDecayToHybridResults({
|
||||
results: [
|
||||
{ path: "MEMORY.md", score: 1, source: "memory" },
|
||||
{ path: "memory/projects.md", score: 0.75, source: "memory" },
|
||||
],
|
||||
workspaceDir: dir,
|
||||
temporalDecay: { enabled: true, halfLifeDays: 30 },
|
||||
nowMs: NOW_MS,
|
||||
});
|
||||
|
||||
expect(decayed[0]?.score).toBeCloseTo(1);
|
||||
expect(decayed[1]?.score).toBeCloseTo(0.75);
|
||||
});
|
||||
|
||||
it("applies decay in hybrid merging before ranking", async () => {
|
||||
const merged = await mergeVectorResultsWithTemporalDecay([
|
||||
createVectorMemoryEntry({
|
||||
id: "old",
|
||||
path: "memory/2025-01-01.md",
|
||||
snippet: "old but high",
|
||||
vectorScore: 0.95,
|
||||
}),
|
||||
createVectorMemoryEntry({
|
||||
id: "new",
|
||||
path: "memory/2026-02-10.md",
|
||||
snippet: "new and relevant",
|
||||
vectorScore: 0.8,
|
||||
}),
|
||||
]);
|
||||
|
||||
expect(merged[0]?.path).toBe("memory/2026-02-10.md");
|
||||
expect(merged[0]?.score ?? 0).toBeGreaterThan(merged[1]?.score ?? 0);
|
||||
});
|
||||
|
||||
it("handles future dates, zero age, and very old memories", async () => {
|
||||
const merged = await mergeVectorResultsWithTemporalDecay([
|
||||
createVectorMemoryEntry({
|
||||
id: "future",
|
||||
path: "memory/2099-01-01.md",
|
||||
snippet: "future",
|
||||
vectorScore: 0.9,
|
||||
}),
|
||||
createVectorMemoryEntry({
|
||||
id: "today",
|
||||
path: "memory/2026-02-10.md",
|
||||
snippet: "today",
|
||||
vectorScore: 0.8,
|
||||
}),
|
||||
createVectorMemoryEntry({
|
||||
id: "very-old",
|
||||
path: "memory/2000-01-01.md",
|
||||
snippet: "ancient",
|
||||
vectorScore: 1,
|
||||
}),
|
||||
]);
|
||||
|
||||
const byPath = new Map(merged.map((entry) => [entry.path, entry]));
|
||||
expect(byPath.get("memory/2099-01-01.md")?.score).toBeCloseTo(0.9);
|
||||
expect(byPath.get("memory/2026-02-10.md")?.score).toBeCloseTo(0.8);
|
||||
expect(byPath.get("memory/2000-01-01.md")?.score ?? 1).toBeLessThan(0.001);
|
||||
});
|
||||
|
||||
it("uses file mtime fallback for non-memory sources", async () => {
|
||||
const dir = await makeTempDir();
|
||||
const sessionPath = path.join(dir, "sessions", "thread.jsonl");
|
||||
await fs.mkdir(path.dirname(sessionPath), { recursive: true });
|
||||
await fs.writeFile(sessionPath, "{}\n");
|
||||
const oldMtime = new Date(NOW_MS - 30 * DAY_MS);
|
||||
await fs.utimes(sessionPath, oldMtime, oldMtime);
|
||||
|
||||
const decayed = await applyTemporalDecayToHybridResults({
|
||||
results: [{ path: "sessions/thread.jsonl", score: 1, source: "sessions" }],
|
||||
workspaceDir: dir,
|
||||
temporalDecay: { enabled: true, halfLifeDays: 30 },
|
||||
nowMs: NOW_MS,
|
||||
});
|
||||
|
||||
expect(decayed[0]?.score).toBeCloseTo(0.5, 2);
|
||||
});
|
||||
});
|
||||
64
extensions/memory-core/src/memory/test-embeddings-mock.ts
Normal file
64
extensions/memory-core/src/memory/test-embeddings-mock.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import {
|
||||
OPENAI_BATCH_ENDPOINT,
|
||||
runOpenAiEmbeddingBatches,
|
||||
type MemoryChunk,
|
||||
} from "../engine-host-api.js";
|
||||
|
||||
export function createOpenAIEmbeddingProviderMock(params: {
|
||||
embedQuery: (input: string) => Promise<number[]>;
|
||||
embedBatch: (input: string[]) => Promise<number[][]>;
|
||||
}) {
|
||||
const openAiClient = {
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
headers: { Authorization: "Bearer test", "Content-Type": "application/json" },
|
||||
model: "text-embedding-3-small",
|
||||
};
|
||||
return {
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "openai",
|
||||
model: "text-embedding-3-small",
|
||||
embedQuery: params.embedQuery,
|
||||
embedBatch: params.embedBatch,
|
||||
},
|
||||
runtime: {
|
||||
id: "openai",
|
||||
cacheKeyData: {
|
||||
provider: "openai",
|
||||
baseUrl: openAiClient.baseUrl,
|
||||
model: openAiClient.model,
|
||||
},
|
||||
batchEmbed: async (options: {
|
||||
agentId: string;
|
||||
chunks: MemoryChunk[];
|
||||
wait: boolean;
|
||||
concurrency: number;
|
||||
pollIntervalMs: number;
|
||||
timeoutMs: number;
|
||||
debug: (message: string, data?: Record<string, unknown>) => void;
|
||||
}) => {
|
||||
const byCustomId = await runOpenAiEmbeddingBatches({
|
||||
openAi: openAiClient,
|
||||
agentId: options.agentId,
|
||||
requests: options.chunks.map((chunk: MemoryChunk, index: number) => ({
|
||||
custom_id: String(index),
|
||||
method: "POST",
|
||||
url: OPENAI_BATCH_ENDPOINT,
|
||||
body: {
|
||||
model: openAiClient.model,
|
||||
input: chunk.text,
|
||||
},
|
||||
})),
|
||||
wait: options.wait,
|
||||
concurrency: options.concurrency,
|
||||
pollIntervalMs: options.pollIntervalMs,
|
||||
timeoutMs: options.timeoutMs,
|
||||
debug: options.debug,
|
||||
});
|
||||
return options.chunks.map(
|
||||
(_: MemoryChunk, index: number) => byCustomId.get(String(index)) ?? [],
|
||||
);
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
34
extensions/memory-core/src/memory/test-helpers/ssrf.ts
Normal file
34
extensions/memory-core/src/memory/test-helpers/ssrf.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import * as ssrf from "openclaw/plugin-sdk/ssrf-runtime";
|
||||
import { vi } from "vitest";
|
||||
|
||||
export function mockPublicPinnedHostname() {
|
||||
return vi.spyOn(ssrf, "resolvePinnedHostnameWithPolicy").mockImplementation(async (hostname) => {
|
||||
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
|
||||
const addresses = ["93.184.216.34"];
|
||||
const lookup = ((host: string, options?: unknown, callback?: unknown) => {
|
||||
const cb =
|
||||
typeof options === "function"
|
||||
? (options as (err: NodeJS.ErrnoException | null, address: unknown) => void)
|
||||
: (callback as (err: NodeJS.ErrnoException | null, address: unknown) => void);
|
||||
if (!cb) {
|
||||
return;
|
||||
}
|
||||
if (host.trim().toLowerCase().replace(/\.$/, "") !== normalized) {
|
||||
cb(null, []);
|
||||
return;
|
||||
}
|
||||
cb(
|
||||
null,
|
||||
addresses.map((address) => ({
|
||||
address,
|
||||
family: address.includes(":") ? 6 : 4,
|
||||
})),
|
||||
);
|
||||
}) as never;
|
||||
return {
|
||||
hostname: normalized,
|
||||
addresses,
|
||||
lookup,
|
||||
};
|
||||
});
|
||||
}
|
||||
23
extensions/memory-core/src/memory/test-manager-helpers.ts
Normal file
23
extensions/memory-core/src/memory/test-manager-helpers.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
|
||||
export async function getRequiredMemoryIndexManager(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId?: string;
|
||||
purpose?: "default" | "status";
|
||||
}): Promise<MemoryIndexManager> {
|
||||
await import("./embedding.test-mocks.js");
|
||||
const { getMemorySearchManager } = await import("./index.js");
|
||||
const result = await getMemorySearchManager({
|
||||
cfg: params.cfg,
|
||||
agentId: params.agentId ?? "main",
|
||||
purpose: params.purpose,
|
||||
});
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
if (!("sync" in result.manager) || typeof result.manager.sync !== "function") {
|
||||
throw new Error("manager does not support sync");
|
||||
}
|
||||
return result.manager as unknown as MemoryIndexManager;
|
||||
}
|
||||
13
extensions/memory-core/src/memory/test-manager.ts
Normal file
13
extensions/memory-core/src/memory/test-manager.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import type { OpenClawConfig } from "../engine-host-api.js";
|
||||
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
|
||||
export async function createMemoryManagerOrThrow(
|
||||
cfg: OpenClawConfig,
|
||||
agentId = "main",
|
||||
): Promise<MemoryIndexManager> {
|
||||
const result = await getMemorySearchManager({ cfg, agentId });
|
||||
if (!result.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
return result.manager as unknown as MemoryIndexManager;
|
||||
}
|
||||
13
extensions/memory-core/src/memory/test-runtime-mocks.ts
Normal file
13
extensions/memory-core/src/memory/test-runtime-mocks.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { vi } from "vitest";
|
||||
|
||||
// Unit tests: avoid importing the real chokidar implementation (native fsevents, etc.).
|
||||
vi.mock("chokidar", () => ({
|
||||
default: {
|
||||
watch: () => ({ on: () => {}, close: async () => {} }),
|
||||
},
|
||||
watch: () => ({ on: () => {}, close: async () => {} }),
|
||||
}));
|
||||
|
||||
vi.mock("./sqlite-vec.js", () => ({
|
||||
loadSqliteVecExtension: async () => ({ ok: false, error: "sqlite-vec disabled in tests" }),
|
||||
}));
|
||||
Reference in New Issue
Block a user