From 32d76e2429ab341b4714c384d23492f8f21d3a28 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 00:58:06 +0100 Subject: [PATCH] fix(memory): bound lancedb recall embedding queries --- CHANGELOG.md | 1 + docs/.i18n/glossary.zh-CN.json | 4 + docs/concepts/memory.md | 6 + docs/docs.json | 1 + docs/plugins/memory-lancedb.md | 262 ++++++++++++++++++ docs/tools/plugin.md | 4 + extensions/memory-lancedb/config.ts | 15 + extensions/memory-lancedb/index.test.ts | 43 ++- extensions/memory-lancedb/index.ts | 44 ++- .../memory-lancedb/openclaw.plugin.json | 11 + 10 files changed, 384 insertions(+), 7 deletions(-) create mode 100644 docs/plugins/memory-lancedb.md diff --git a/CHANGELOG.md b/CHANGELOG.md index bcce02b1e7e..b9c174ec110 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator. - Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc. - Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65. - Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys. diff --git a/docs/.i18n/glossary.zh-CN.json b/docs/.i18n/glossary.zh-CN.json index 068b06265e3..e930bb4bbbf 100644 --- a/docs/.i18n/glossary.zh-CN.json +++ b/docs/.i18n/glossary.zh-CN.json @@ -343,6 +343,10 @@ "source": "Ollama Web Search", "target": "Ollama Web 搜索" }, + { + "source": "Memory LanceDB", + "target": "Memory LanceDB" + }, { "source": "onboarding", "target": "新手引导" diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index b02d8f1bdce..7affc9abe15 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -89,6 +89,10 @@ directories outside the workspace. AI-native cross-session memory with user modeling, semantic search, and multi-agent awareness. Plugin install. + +Bundled LanceDB-backed memory with OpenAI-compatible embeddings, auto-recall, +auto-capture, and local Ollama embedding support. + ## Knowledge wiki layer @@ -179,6 +183,7 @@ openclaw memory index --force # Rebuild the index - [Builtin memory engine](/concepts/memory-builtin): default SQLite backend. - [QMD memory engine](/concepts/memory-qmd): advanced local-first sidecar. - [Honcho memory](/concepts/memory-honcho): AI-native cross-session memory. +- [Memory LanceDB](/plugins/memory-lancedb): LanceDB-backed plugin with OpenAI-compatible embeddings. - [Memory Wiki](/plugins/memory-wiki): compiled knowledge vault and wiki-native tools. - [Memory search](/concepts/memory-search): search pipeline, providers, and tuning. - [Dreaming](/concepts/dreaming): background promotion from short-term recall to long-term memory. @@ -191,3 +196,4 @@ openclaw memory index --force # Rebuild the index - [Memory search](/concepts/memory-search) - [Builtin memory engine](/concepts/memory-builtin) - [Honcho memory](/concepts/memory-honcho) +- [Memory LanceDB](/plugins/memory-lancedb) diff --git a/docs/docs.json b/docs/docs.json index 18cd983517a..65ba9809b82 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1185,6 +1185,7 @@ "plugins/webhooks", "plugins/voice-call", "plugins/memory-wiki", + "plugins/memory-lancedb", "plugins/message-presentation", "plugins/skill-workshop", "plugins/zalouser", diff --git a/docs/plugins/memory-lancedb.md b/docs/plugins/memory-lancedb.md new file mode 100644 index 00000000000..7cac365ed04 --- /dev/null +++ b/docs/plugins/memory-lancedb.md @@ -0,0 +1,262 @@ +--- +summary: "Configure the bundled LanceDB memory plugin, including local Ollama-compatible embeddings" +read_when: + - You are configuring the bundled memory-lancedb plugin + - You want LanceDB-backed long-term memory with auto-recall or auto-capture + - You are using local OpenAI-compatible embeddings such as Ollama +title: "Memory LanceDB" +sidebarTitle: "Memory LanceDB" +--- + +`memory-lancedb` is a bundled memory plugin that stores long-term memory in +LanceDB and uses embeddings for recall. It can automatically recall relevant +memories before a model turn and capture important facts after a response. + +Use it when you want a local vector database for memory, need an +OpenAI-compatible embedding endpoint, or want to keep a memory database outside +the default built-in memory store. + + +`memory-lancedb` is an active memory plugin. Enable it by selecting the memory +slot with `plugins.slots.memory = "memory-lancedb"`. Companion plugins such as +`memory-wiki` can run beside it, but only one plugin owns the active memory slot. + + +## Quick start + +```json5 +{ + plugins: { + slots: { + memory: "memory-lancedb", + }, + entries: { + "memory-lancedb": { + enabled: true, + config: { + embedding: { + apiKey: "${OPENAI_API_KEY}", + model: "text-embedding-3-small", + }, + autoRecall: true, + autoCapture: false, + }, + }, + }, + }, +} +``` + +Restart the Gateway after changing plugin config: + +```bash +openclaw gateway restart +``` + +Then verify the plugin is loaded: + +```bash +openclaw plugins list +``` + +## Ollama embeddings + +`memory-lancedb` calls embeddings through an OpenAI-compatible embeddings API. +For Ollama embeddings, use the Ollama `/v1` compatibility endpoint here. This +is only for embeddings; the Ollama chat/model provider uses the native Ollama +API URL documented in [Ollama](/providers/ollama). + +```json5 +{ + plugins: { + slots: { + memory: "memory-lancedb", + }, + entries: { + "memory-lancedb": { + enabled: true, + config: { + embedding: { + apiKey: "ollama", + baseUrl: "http://127.0.0.1:11434/v1", + model: "mxbai-embed-large", + dimensions: 1024, + }, + recallMaxChars: 400, + autoRecall: true, + autoCapture: false, + }, + }, + }, + }, +} +``` + +Set `dimensions` for non-standard embedding models. OpenClaw knows the +dimensions for `text-embedding-3-small` and `text-embedding-3-large`; custom +models need the value in config so LanceDB can create the vector column. + +For small local embedding models, lower `recallMaxChars` if you see context +length errors from the local server. + +## Recall and capture limits + +`memory-lancedb` has two separate text limits: + +| Setting | Default | Range | Applies to | +| ----------------- | ------- | --------- | --------------------------------------------- | +| `recallMaxChars` | `1000` | 100-10000 | text sent to the embedding API for recall | +| `captureMaxChars` | `500` | 100-10000 | assistant message length eligible for capture | + +`recallMaxChars` controls auto-recall, the `memory_recall` tool, the +`memory_forget` query path, and `openclaw ltm search`. Auto-recall prefers the +latest user message from the turn and falls back to the full prompt only when no +user message is available. This keeps channel metadata and large prompt blocks +out of the embedding request. + +`captureMaxChars` controls whether a response is short enough to be considered +for automatic capture. It does not cap recall query embeddings. + +## Commands + +When `memory-lancedb` is the active memory plugin, it registers the `ltm` CLI +namespace: + +```bash +openclaw ltm list +openclaw ltm search "project preferences" +openclaw ltm stats +``` + +Agents also get LanceDB memory tools from the active memory plugin: + +- `memory_recall` for LanceDB-backed recall +- `memory_store` for saving important facts, preferences, decisions, and entities +- `memory_forget` for removing matching memories + +## Storage + +By default, LanceDB data lives under `~/.openclaw/memory/lancedb`. Override the +path with `dbPath`: + +```json5 +{ + plugins: { + entries: { + "memory-lancedb": { + enabled: true, + config: { + dbPath: "~/.openclaw/memory/lancedb", + embedding: { + apiKey: "${OPENAI_API_KEY}", + model: "text-embedding-3-small", + }, + }, + }, + }, + }, +} +``` + +`storageOptions` accepts string key/value pairs for LanceDB storage backends and +supports `${ENV_VAR}` expansion: + +```json5 +{ + plugins: { + entries: { + "memory-lancedb": { + enabled: true, + config: { + dbPath: "s3://memory-bucket/openclaw", + storageOptions: { + access_key: "${AWS_ACCESS_KEY_ID}", + secret_key: "${AWS_SECRET_ACCESS_KEY}", + endpoint: "${AWS_ENDPOINT_URL}", + }, + embedding: { + apiKey: "${OPENAI_API_KEY}", + model: "text-embedding-3-small", + }, + }, + }, + }, + }, +} +``` + +## Runtime dependencies + +`memory-lancedb` depends on the native `@lancedb/lancedb` package. Packaged +OpenClaw installs first try the bundled runtime dependency and can repair the +plugin runtime dependency under OpenClaw state when the bundled import is not +available. + +If an older install logs a missing `dist/package.json` or missing +`@lancedb/lancedb` error during plugin load, upgrade OpenClaw and restart the +Gateway. + +If the plugin logs that LanceDB is unavailable on `darwin-x64`, use the default +memory backend on that machine, move the Gateway to a supported platform, or +disable `memory-lancedb`. + +## Troubleshooting + +### Input length exceeds the context length + +This usually means the embedding model rejected the recall query: + +```text +memory-lancedb: recall failed: Error: 400 the input length exceeds the context length +``` + +Set a lower `recallMaxChars`, then restart the Gateway: + +```json5 +{ + plugins: { + entries: { + "memory-lancedb": { + config: { + recallMaxChars: 400, + }, + }, + }, + }, +} +``` + +For Ollama, also verify the embedding server is reachable from the Gateway host: + +```bash +curl http://127.0.0.1:11434/v1/embeddings \ + -H "Content-Type: application/json" \ + -d '{"model":"mxbai-embed-large","input":"hello"}' +``` + +### Unsupported embedding model + +Without `dimensions`, only the built-in OpenAI embedding dimensions are known. +For local or custom embedding models, set `embedding.dimensions` to the vector +size reported by that model. + +### Plugin loads but no memories appear + +Check that `plugins.slots.memory` points at `memory-lancedb`, then run: + +```bash +openclaw ltm stats +openclaw ltm search "recent preference" +``` + +If `autoCapture` is disabled, the plugin will recall existing memories but will +not automatically store new ones. Use the `memory_store` tool or enable +`autoCapture` if you want automatic capture. + +## Related + +- [Memory overview](/concepts/memory) +- [Active memory](/concepts/active-memory) +- [Memory search](/concepts/memory-search) +- [Memory Wiki](/plugins/memory-wiki) +- [Ollama](/providers/ollama) diff --git a/docs/tools/plugin.md b/docs/tools/plugin.md index 52a0df329d4..9c0fb729da2 100644 --- a/docs/tools/plugin.md +++ b/docs/tools/plugin.md @@ -155,6 +155,10 @@ plugin discovery rather than silently falling back to source paths. - `memory-core` — bundled memory search (default via `plugins.slots.memory`) - `memory-lancedb` — install-on-demand long-term memory with auto-recall/capture (set `plugins.slots.memory = "memory-lancedb"`) + + See [Memory LanceDB](/plugins/memory-lancedb) for OpenAI-compatible + embedding setup, Ollama examples, recall limits, and troubleshooting. + diff --git a/extensions/memory-lancedb/config.ts b/extensions/memory-lancedb/config.ts index 79716ae573b..27244d09c0d 100644 --- a/extensions/memory-lancedb/config.ts +++ b/extensions/memory-lancedb/config.ts @@ -15,6 +15,7 @@ export type MemoryConfig = { autoCapture?: boolean; autoRecall?: boolean; captureMaxChars?: number; + recallMaxChars?: number; storageOptions?: Record; }; @@ -23,6 +24,7 @@ export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number]; const DEFAULT_MODEL = "text-embedding-3-small"; export const DEFAULT_CAPTURE_MAX_CHARS = 500; +export const DEFAULT_RECALL_MAX_CHARS = 1000; const LEGACY_STATE_DIRS: string[] = []; function resolveDefaultDbPath(): string { @@ -106,6 +108,7 @@ export const memoryConfigSchema = { "autoCapture", "autoRecall", "captureMaxChars", + "recallMaxChars", "storageOptions", ], "memory config", @@ -121,12 +124,17 @@ export const memoryConfigSchema = { const captureMaxChars = typeof cfg.captureMaxChars === "number" ? Math.floor(cfg.captureMaxChars) : undefined; + const recallMaxChars = + typeof cfg.recallMaxChars === "number" ? Math.floor(cfg.recallMaxChars) : undefined; if ( typeof captureMaxChars === "number" && (captureMaxChars < 100 || captureMaxChars > 10_000) ) { throw new Error("captureMaxChars must be between 100 and 10000"); } + if (typeof recallMaxChars === "number" && (recallMaxChars < 100 || recallMaxChars > 10_000)) { + throw new Error("recallMaxChars must be between 100 and 10000"); + } const dreaming = cfg.dreaming === undefined @@ -168,6 +176,7 @@ export const memoryConfigSchema = { autoCapture: cfg.autoCapture === true, autoRecall: cfg.autoRecall !== false, captureMaxChars: captureMaxChars ?? DEFAULT_CAPTURE_MAX_CHARS, + recallMaxChars: recallMaxChars ?? DEFAULT_RECALL_MAX_CHARS, ...(storageOptions ? { storageOptions } : {}), }; }, @@ -215,6 +224,12 @@ export const memoryConfigSchema = { advanced: true, placeholder: String(DEFAULT_CAPTURE_MAX_CHARS), }, + recallMaxChars: { + label: "Recall Query Max Chars", + help: "Maximum prompt/query length embedded for memory recall. Lower for small local embedding models.", + advanced: true, + placeholder: String(DEFAULT_RECALL_MAX_CHARS), + }, storageOptions: { label: "Storage Options", sensitive: true, diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts index a103fbe0222..a3d8bd3b633 100644 --- a/extensions/memory-lancedb/index.test.ts +++ b/extensions/memory-lancedb/index.test.ts @@ -13,6 +13,7 @@ import memoryPlugin, { detectCategory, formatRelevantMemoriesContext, looksLikePromptInjection, + normalizeRecallQuery, shouldCapture, } from "./index.js"; import { createLanceDbRuntimeLoader, type LanceDbRuntimeLogger } from "./lancedb-runtime.js"; @@ -27,6 +28,7 @@ type MemoryPluginTestConfig = { }; dbPath?: string; captureMaxChars?: number; + recallMaxChars?: number; autoCapture?: boolean; autoRecall?: boolean; storageOptions?: Record; @@ -117,6 +119,7 @@ describe("memory plugin e2e", () => { expect(config?.embedding?.apiKey).toBe(OPENAI_API_KEY); expect(config?.dbPath).toBe(getDbPath()); expect(config?.captureMaxChars).toBe(500); + expect(config?.recallMaxChars).toBe(1000); }); test("config schema resolves env vars", async () => { @@ -162,6 +165,24 @@ describe("memory plugin e2e", () => { expect(config?.captureMaxChars).toBe(1800); }); + test("config schema validates recallMaxChars range", async () => { + expect(() => { + memoryPlugin.configSchema?.parse?.({ + embedding: { apiKey: OPENAI_API_KEY }, + dbPath: getDbPath(), + recallMaxChars: 99, + }); + }).toThrow("recallMaxChars must be between 100 and 10000"); + }); + + test("config schema accepts recallMaxChars override", async () => { + const config = parseConfig({ + recallMaxChars: 1800, + }); + + expect(config?.recallMaxChars).toBe(1800); + }); + test("config schema keeps autoCapture disabled by default", async () => { const config = parseConfig(); @@ -359,6 +380,7 @@ describe("memory plugin e2e", () => { dbPath: getDbPath(), autoCapture: false, autoRecall: true, + recallMaxChars: 120, }, runtime: {}, logger, @@ -376,8 +398,17 @@ describe("memory plugin e2e", () => { )?.[1]; expect(beforePromptBuild).toBeTypeOf("function"); + const latestUserText = `what editor should i use? ${"with a very long channel metadata tail ".repeat(10)}`; + const expectedRecallQuery = normalizeRecallQuery(latestUserText, 120); const result = await beforePromptBuild?.( - { prompt: "what editor should i use?", messages: [] }, + { + prompt: `discord metadata ${"ignored ".repeat(100)}`, + messages: [ + { role: "user", content: "old preference question" }, + { role: "assistant", content: "old answer" }, + { role: "user", content: latestUserText }, + ], + }, {}, ); @@ -385,9 +416,10 @@ describe("memory plugin e2e", () => { expect(ensureGlobalUndiciEnvProxyDispatcher).toHaveBeenCalledOnce(); expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", - input: "what editor should i use?", + input: expectedRecallQuery, encoding_format: "float", }); + expect(expectedRecallQuery).toHaveLength(120); expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]); expect(limit).toHaveBeenCalledWith(3); expect(result).toMatchObject({ @@ -1794,6 +1826,13 @@ describe("memory plugin e2e", () => { expect(shouldCapture(customTooLong, { maxChars: 1500 })).toBe(false); }); + test("normalizeRecallQuery trims whitespace and bounds embedding input", async () => { + expect(normalizeRecallQuery(" remember the blue mug ", 100)).toBe( + "remember the blue mug", + ); + expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120); + }); + test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => { const context = formatRelevantMemoriesContext([ { diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts index 45d12034afd..a7f71fa67dc 100644 --- a/extensions/memory-lancedb/index.ts +++ b/extensions/memory-lancedb/index.ts @@ -12,11 +12,15 @@ import OpenAI from "openai"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types"; import { resolveLivePluginConfigObject } from "openclaw/plugin-sdk/plugin-config-runtime"; import { ensureGlobalUndiciEnvProxyDispatcher } from "openclaw/plugin-sdk/runtime-env"; -import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; +import { + normalizeLowercaseStringOrEmpty, + truncateUtf16Safe, +} from "openclaw/plugin-sdk/text-runtime"; import { Type } from "typebox"; import { definePluginEntry, type OpenClawPluginApi } from "./api.js"; import { DEFAULT_CAPTURE_MAX_CHARS, + DEFAULT_RECALL_MAX_CHARS, MEMORY_CATEGORIES, type MemoryCategory, memoryConfigSchema, @@ -78,6 +82,25 @@ function extractUserTextContent(message: unknown): string[] { return texts; } +function extractLatestUserText(messages: unknown[]): string | undefined { + for (let index = messages.length - 1; index >= 0; index--) { + const text = extractUserTextContent(messages[index]).join("\n").trim(); + if (text) { + return text; + } + } + return undefined; +} + +export function normalizeRecallQuery( + text: string, + maxChars: number = DEFAULT_RECALL_MAX_CHARS, +): string { + const normalized = text.replace(/\s+/g, " ").trim(); + const limit = Math.max(0, Math.floor(maxChars)); + return normalized.length > limit ? truncateUtf16Safe(normalized, limit).trimEnd() : normalized; +} + function messageFingerprint(message: unknown): string { const msgObj = asRecord(message); if (!msgObj) { @@ -430,7 +453,10 @@ export default definePluginEntry({ async execute(_toolCallId, params) { const { query, limit = 5 } = params as { query: string; limit?: number }; - const vector = await embeddings.embed(query); + const currentCfg = resolveCurrentHookConfig(); + const vector = await embeddings.embed( + normalizeRecallQuery(query, currentCfg.recallMaxChars), + ); const results = await db.search(vector, limit, 0.1); if (results.length === 0) { @@ -549,7 +575,10 @@ export default definePluginEntry({ } if (query) { - const vector = await embeddings.embed(query); + const currentCfg = resolveCurrentHookConfig(); + const vector = await embeddings.embed( + normalizeRecallQuery(query, currentCfg.recallMaxChars), + ); const results = await db.search(vector, 5, 0.7); if (results.length === 0) { @@ -621,7 +650,7 @@ export default definePluginEntry({ .argument("", "Search query") .option("--limit ", "Max results", "5") .action(async (query, opts) => { - const vector = await embeddings.embed(query); + const vector = await embeddings.embed(normalizeRecallQuery(query, cfg.recallMaxChars)); const results = await db.search(vector, Number.parseInt(opts.limit, 10), 0.3); // Strip vectors for output const output = results.map((r) => ({ @@ -660,7 +689,12 @@ export default definePluginEntry({ } try { - const vector = await embeddings.embed(event.prompt); + const recallQuery = normalizeRecallQuery( + extractLatestUserText(Array.isArray(event.messages) ? event.messages : []) ?? + event.prompt, + currentCfg.recallMaxChars, + ); + const vector = await embeddings.embed(recallQuery); const results = await db.search(vector, 3, 0.3); if (results.length === 0) { diff --git a/extensions/memory-lancedb/openclaw.plugin.json b/extensions/memory-lancedb/openclaw.plugin.json index 7a4e38ccea9..e924bef2429 100644 --- a/extensions/memory-lancedb/openclaw.plugin.json +++ b/extensions/memory-lancedb/openclaw.plugin.json @@ -48,6 +48,12 @@ "advanced": true, "placeholder": "500" }, + "recallMaxChars": { + "label": "Recall Query Max Chars", + "help": "Maximum prompt/query length embedded for memory recall. Lower for small local embedding models.", + "advanced": true, + "placeholder": "1000" + }, "storageOptions": { "label": "Storage Options", "advanced": true, @@ -94,6 +100,11 @@ "minimum": 100, "maximum": 10000 }, + "recallMaxChars": { + "type": "number", + "minimum": 100, + "maximum": 10000 + }, "storageOptions": { "type": "object", "additionalProperties": {