mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:40:43 +00:00
fix(memory): bound lancedb recall embedding queries
This commit is contained in:
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator.
|
||||
- Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc.
|
||||
- Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65.
|
||||
- Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys.
|
||||
|
||||
@@ -343,6 +343,10 @@
|
||||
"source": "Ollama Web Search",
|
||||
"target": "Ollama Web 搜索"
|
||||
},
|
||||
{
|
||||
"source": "Memory LanceDB",
|
||||
"target": "Memory LanceDB"
|
||||
},
|
||||
{
|
||||
"source": "onboarding",
|
||||
"target": "新手引导"
|
||||
|
||||
@@ -89,6 +89,10 @@ directories outside the workspace.
|
||||
AI-native cross-session memory with user modeling, semantic search, and
|
||||
multi-agent awareness. Plugin install.
|
||||
</Card>
|
||||
<Card title="LanceDB" icon="layers" href="/plugins/memory-lancedb">
|
||||
Bundled LanceDB-backed memory with OpenAI-compatible embeddings, auto-recall,
|
||||
auto-capture, and local Ollama embedding support.
|
||||
</Card>
|
||||
</CardGroup>
|
||||
|
||||
## Knowledge wiki layer
|
||||
@@ -179,6 +183,7 @@ openclaw memory index --force # Rebuild the index
|
||||
- [Builtin memory engine](/concepts/memory-builtin): default SQLite backend.
|
||||
- [QMD memory engine](/concepts/memory-qmd): advanced local-first sidecar.
|
||||
- [Honcho memory](/concepts/memory-honcho): AI-native cross-session memory.
|
||||
- [Memory LanceDB](/plugins/memory-lancedb): LanceDB-backed plugin with OpenAI-compatible embeddings.
|
||||
- [Memory Wiki](/plugins/memory-wiki): compiled knowledge vault and wiki-native tools.
|
||||
- [Memory search](/concepts/memory-search): search pipeline, providers, and tuning.
|
||||
- [Dreaming](/concepts/dreaming): background promotion from short-term recall to long-term memory.
|
||||
@@ -191,3 +196,4 @@ openclaw memory index --force # Rebuild the index
|
||||
- [Memory search](/concepts/memory-search)
|
||||
- [Builtin memory engine](/concepts/memory-builtin)
|
||||
- [Honcho memory](/concepts/memory-honcho)
|
||||
- [Memory LanceDB](/plugins/memory-lancedb)
|
||||
|
||||
@@ -1185,6 +1185,7 @@
|
||||
"plugins/webhooks",
|
||||
"plugins/voice-call",
|
||||
"plugins/memory-wiki",
|
||||
"plugins/memory-lancedb",
|
||||
"plugins/message-presentation",
|
||||
"plugins/skill-workshop",
|
||||
"plugins/zalouser",
|
||||
|
||||
262
docs/plugins/memory-lancedb.md
Normal file
262
docs/plugins/memory-lancedb.md
Normal file
@@ -0,0 +1,262 @@
|
||||
---
|
||||
summary: "Configure the bundled LanceDB memory plugin, including local Ollama-compatible embeddings"
|
||||
read_when:
|
||||
- You are configuring the bundled memory-lancedb plugin
|
||||
- You want LanceDB-backed long-term memory with auto-recall or auto-capture
|
||||
- You are using local OpenAI-compatible embeddings such as Ollama
|
||||
title: "Memory LanceDB"
|
||||
sidebarTitle: "Memory LanceDB"
|
||||
---
|
||||
|
||||
`memory-lancedb` is a bundled memory plugin that stores long-term memory in
|
||||
LanceDB and uses embeddings for recall. It can automatically recall relevant
|
||||
memories before a model turn and capture important facts after a response.
|
||||
|
||||
Use it when you want a local vector database for memory, need an
|
||||
OpenAI-compatible embedding endpoint, or want to keep a memory database outside
|
||||
the default built-in memory store.
|
||||
|
||||
<Note>
|
||||
`memory-lancedb` is an active memory plugin. Enable it by selecting the memory
|
||||
slot with `plugins.slots.memory = "memory-lancedb"`. Companion plugins such as
|
||||
`memory-wiki` can run beside it, but only one plugin owns the active memory slot.
|
||||
</Note>
|
||||
|
||||
## Quick start
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
slots: {
|
||||
memory: "memory-lancedb",
|
||||
},
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
enabled: true,
|
||||
config: {
|
||||
embedding: {
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
model: "text-embedding-3-small",
|
||||
},
|
||||
autoRecall: true,
|
||||
autoCapture: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Restart the Gateway after changing plugin config:
|
||||
|
||||
```bash
|
||||
openclaw gateway restart
|
||||
```
|
||||
|
||||
Then verify the plugin is loaded:
|
||||
|
||||
```bash
|
||||
openclaw plugins list
|
||||
```
|
||||
|
||||
## Ollama embeddings
|
||||
|
||||
`memory-lancedb` calls embeddings through an OpenAI-compatible embeddings API.
|
||||
For Ollama embeddings, use the Ollama `/v1` compatibility endpoint here. This
|
||||
is only for embeddings; the Ollama chat/model provider uses the native Ollama
|
||||
API URL documented in [Ollama](/providers/ollama).
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
slots: {
|
||||
memory: "memory-lancedb",
|
||||
},
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
enabled: true,
|
||||
config: {
|
||||
embedding: {
|
||||
apiKey: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
model: "mxbai-embed-large",
|
||||
dimensions: 1024,
|
||||
},
|
||||
recallMaxChars: 400,
|
||||
autoRecall: true,
|
||||
autoCapture: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Set `dimensions` for non-standard embedding models. OpenClaw knows the
|
||||
dimensions for `text-embedding-3-small` and `text-embedding-3-large`; custom
|
||||
models need the value in config so LanceDB can create the vector column.
|
||||
|
||||
For small local embedding models, lower `recallMaxChars` if you see context
|
||||
length errors from the local server.
|
||||
|
||||
## Recall and capture limits
|
||||
|
||||
`memory-lancedb` has two separate text limits:
|
||||
|
||||
| Setting | Default | Range | Applies to |
|
||||
| ----------------- | ------- | --------- | --------------------------------------------- |
|
||||
| `recallMaxChars` | `1000` | 100-10000 | text sent to the embedding API for recall |
|
||||
| `captureMaxChars` | `500` | 100-10000 | assistant message length eligible for capture |
|
||||
|
||||
`recallMaxChars` controls auto-recall, the `memory_recall` tool, the
|
||||
`memory_forget` query path, and `openclaw ltm search`. Auto-recall prefers the
|
||||
latest user message from the turn and falls back to the full prompt only when no
|
||||
user message is available. This keeps channel metadata and large prompt blocks
|
||||
out of the embedding request.
|
||||
|
||||
`captureMaxChars` controls whether a response is short enough to be considered
|
||||
for automatic capture. It does not cap recall query embeddings.
|
||||
|
||||
## Commands
|
||||
|
||||
When `memory-lancedb` is the active memory plugin, it registers the `ltm` CLI
|
||||
namespace:
|
||||
|
||||
```bash
|
||||
openclaw ltm list
|
||||
openclaw ltm search "project preferences"
|
||||
openclaw ltm stats
|
||||
```
|
||||
|
||||
Agents also get LanceDB memory tools from the active memory plugin:
|
||||
|
||||
- `memory_recall` for LanceDB-backed recall
|
||||
- `memory_store` for saving important facts, preferences, decisions, and entities
|
||||
- `memory_forget` for removing matching memories
|
||||
|
||||
## Storage
|
||||
|
||||
By default, LanceDB data lives under `~/.openclaw/memory/lancedb`. Override the
|
||||
path with `dbPath`:
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
enabled: true,
|
||||
config: {
|
||||
dbPath: "~/.openclaw/memory/lancedb",
|
||||
embedding: {
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
model: "text-embedding-3-small",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
`storageOptions` accepts string key/value pairs for LanceDB storage backends and
|
||||
supports `${ENV_VAR}` expansion:
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
enabled: true,
|
||||
config: {
|
||||
dbPath: "s3://memory-bucket/openclaw",
|
||||
storageOptions: {
|
||||
access_key: "${AWS_ACCESS_KEY_ID}",
|
||||
secret_key: "${AWS_SECRET_ACCESS_KEY}",
|
||||
endpoint: "${AWS_ENDPOINT_URL}",
|
||||
},
|
||||
embedding: {
|
||||
apiKey: "${OPENAI_API_KEY}",
|
||||
model: "text-embedding-3-small",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## Runtime dependencies
|
||||
|
||||
`memory-lancedb` depends on the native `@lancedb/lancedb` package. Packaged
|
||||
OpenClaw installs first try the bundled runtime dependency and can repair the
|
||||
plugin runtime dependency under OpenClaw state when the bundled import is not
|
||||
available.
|
||||
|
||||
If an older install logs a missing `dist/package.json` or missing
|
||||
`@lancedb/lancedb` error during plugin load, upgrade OpenClaw and restart the
|
||||
Gateway.
|
||||
|
||||
If the plugin logs that LanceDB is unavailable on `darwin-x64`, use the default
|
||||
memory backend on that machine, move the Gateway to a supported platform, or
|
||||
disable `memory-lancedb`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Input length exceeds the context length
|
||||
|
||||
This usually means the embedding model rejected the recall query:
|
||||
|
||||
```text
|
||||
memory-lancedb: recall failed: Error: 400 the input length exceeds the context length
|
||||
```
|
||||
|
||||
Set a lower `recallMaxChars`, then restart the Gateway:
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
config: {
|
||||
recallMaxChars: 400,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
For Ollama, also verify the embedding server is reachable from the Gateway host:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:11434/v1/embeddings \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"mxbai-embed-large","input":"hello"}'
|
||||
```
|
||||
|
||||
### Unsupported embedding model
|
||||
|
||||
Without `dimensions`, only the built-in OpenAI embedding dimensions are known.
|
||||
For local or custom embedding models, set `embedding.dimensions` to the vector
|
||||
size reported by that model.
|
||||
|
||||
### Plugin loads but no memories appear
|
||||
|
||||
Check that `plugins.slots.memory` points at `memory-lancedb`, then run:
|
||||
|
||||
```bash
|
||||
openclaw ltm stats
|
||||
openclaw ltm search "recent preference"
|
||||
```
|
||||
|
||||
If `autoCapture` is disabled, the plugin will recall existing memories but will
|
||||
not automatically store new ones. Use the `memory_store` tool or enable
|
||||
`autoCapture` if you want automatic capture.
|
||||
|
||||
## Related
|
||||
|
||||
- [Memory overview](/concepts/memory)
|
||||
- [Active memory](/concepts/active-memory)
|
||||
- [Memory search](/concepts/memory-search)
|
||||
- [Memory Wiki](/plugins/memory-wiki)
|
||||
- [Ollama](/providers/ollama)
|
||||
@@ -155,6 +155,10 @@ plugin discovery rather than silently falling back to source paths.
|
||||
<Accordion title="Memory plugins">
|
||||
- `memory-core` — bundled memory search (default via `plugins.slots.memory`)
|
||||
- `memory-lancedb` — install-on-demand long-term memory with auto-recall/capture (set `plugins.slots.memory = "memory-lancedb"`)
|
||||
|
||||
See [Memory LanceDB](/plugins/memory-lancedb) for OpenAI-compatible
|
||||
embedding setup, Ollama examples, recall limits, and troubleshooting.
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Speech providers (enabled by default)">
|
||||
|
||||
@@ -15,6 +15,7 @@ export type MemoryConfig = {
|
||||
autoCapture?: boolean;
|
||||
autoRecall?: boolean;
|
||||
captureMaxChars?: number;
|
||||
recallMaxChars?: number;
|
||||
storageOptions?: Record<string, string>;
|
||||
};
|
||||
|
||||
@@ -23,6 +24,7 @@ export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
|
||||
|
||||
const DEFAULT_MODEL = "text-embedding-3-small";
|
||||
export const DEFAULT_CAPTURE_MAX_CHARS = 500;
|
||||
export const DEFAULT_RECALL_MAX_CHARS = 1000;
|
||||
const LEGACY_STATE_DIRS: string[] = [];
|
||||
|
||||
function resolveDefaultDbPath(): string {
|
||||
@@ -106,6 +108,7 @@ export const memoryConfigSchema = {
|
||||
"autoCapture",
|
||||
"autoRecall",
|
||||
"captureMaxChars",
|
||||
"recallMaxChars",
|
||||
"storageOptions",
|
||||
],
|
||||
"memory config",
|
||||
@@ -121,12 +124,17 @@ export const memoryConfigSchema = {
|
||||
|
||||
const captureMaxChars =
|
||||
typeof cfg.captureMaxChars === "number" ? Math.floor(cfg.captureMaxChars) : undefined;
|
||||
const recallMaxChars =
|
||||
typeof cfg.recallMaxChars === "number" ? Math.floor(cfg.recallMaxChars) : undefined;
|
||||
if (
|
||||
typeof captureMaxChars === "number" &&
|
||||
(captureMaxChars < 100 || captureMaxChars > 10_000)
|
||||
) {
|
||||
throw new Error("captureMaxChars must be between 100 and 10000");
|
||||
}
|
||||
if (typeof recallMaxChars === "number" && (recallMaxChars < 100 || recallMaxChars > 10_000)) {
|
||||
throw new Error("recallMaxChars must be between 100 and 10000");
|
||||
}
|
||||
|
||||
const dreaming =
|
||||
cfg.dreaming === undefined
|
||||
@@ -168,6 +176,7 @@ export const memoryConfigSchema = {
|
||||
autoCapture: cfg.autoCapture === true,
|
||||
autoRecall: cfg.autoRecall !== false,
|
||||
captureMaxChars: captureMaxChars ?? DEFAULT_CAPTURE_MAX_CHARS,
|
||||
recallMaxChars: recallMaxChars ?? DEFAULT_RECALL_MAX_CHARS,
|
||||
...(storageOptions ? { storageOptions } : {}),
|
||||
};
|
||||
},
|
||||
@@ -215,6 +224,12 @@ export const memoryConfigSchema = {
|
||||
advanced: true,
|
||||
placeholder: String(DEFAULT_CAPTURE_MAX_CHARS),
|
||||
},
|
||||
recallMaxChars: {
|
||||
label: "Recall Query Max Chars",
|
||||
help: "Maximum prompt/query length embedded for memory recall. Lower for small local embedding models.",
|
||||
advanced: true,
|
||||
placeholder: String(DEFAULT_RECALL_MAX_CHARS),
|
||||
},
|
||||
storageOptions: {
|
||||
label: "Storage Options",
|
||||
sensitive: true,
|
||||
|
||||
@@ -13,6 +13,7 @@ import memoryPlugin, {
|
||||
detectCategory,
|
||||
formatRelevantMemoriesContext,
|
||||
looksLikePromptInjection,
|
||||
normalizeRecallQuery,
|
||||
shouldCapture,
|
||||
} from "./index.js";
|
||||
import { createLanceDbRuntimeLoader, type LanceDbRuntimeLogger } from "./lancedb-runtime.js";
|
||||
@@ -27,6 +28,7 @@ type MemoryPluginTestConfig = {
|
||||
};
|
||||
dbPath?: string;
|
||||
captureMaxChars?: number;
|
||||
recallMaxChars?: number;
|
||||
autoCapture?: boolean;
|
||||
autoRecall?: boolean;
|
||||
storageOptions?: Record<string, string>;
|
||||
@@ -117,6 +119,7 @@ describe("memory plugin e2e", () => {
|
||||
expect(config?.embedding?.apiKey).toBe(OPENAI_API_KEY);
|
||||
expect(config?.dbPath).toBe(getDbPath());
|
||||
expect(config?.captureMaxChars).toBe(500);
|
||||
expect(config?.recallMaxChars).toBe(1000);
|
||||
});
|
||||
|
||||
test("config schema resolves env vars", async () => {
|
||||
@@ -162,6 +165,24 @@ describe("memory plugin e2e", () => {
|
||||
expect(config?.captureMaxChars).toBe(1800);
|
||||
});
|
||||
|
||||
test("config schema validates recallMaxChars range", async () => {
|
||||
expect(() => {
|
||||
memoryPlugin.configSchema?.parse?.({
|
||||
embedding: { apiKey: OPENAI_API_KEY },
|
||||
dbPath: getDbPath(),
|
||||
recallMaxChars: 99,
|
||||
});
|
||||
}).toThrow("recallMaxChars must be between 100 and 10000");
|
||||
});
|
||||
|
||||
test("config schema accepts recallMaxChars override", async () => {
|
||||
const config = parseConfig({
|
||||
recallMaxChars: 1800,
|
||||
});
|
||||
|
||||
expect(config?.recallMaxChars).toBe(1800);
|
||||
});
|
||||
|
||||
test("config schema keeps autoCapture disabled by default", async () => {
|
||||
const config = parseConfig();
|
||||
|
||||
@@ -359,6 +380,7 @@ describe("memory plugin e2e", () => {
|
||||
dbPath: getDbPath(),
|
||||
autoCapture: false,
|
||||
autoRecall: true,
|
||||
recallMaxChars: 120,
|
||||
},
|
||||
runtime: {},
|
||||
logger,
|
||||
@@ -376,8 +398,17 @@ describe("memory plugin e2e", () => {
|
||||
)?.[1];
|
||||
expect(beforePromptBuild).toBeTypeOf("function");
|
||||
|
||||
const latestUserText = `what editor should i use? ${"with a very long channel metadata tail ".repeat(10)}`;
|
||||
const expectedRecallQuery = normalizeRecallQuery(latestUserText, 120);
|
||||
const result = await beforePromptBuild?.(
|
||||
{ prompt: "what editor should i use?", messages: [] },
|
||||
{
|
||||
prompt: `discord metadata ${"ignored ".repeat(100)}`,
|
||||
messages: [
|
||||
{ role: "user", content: "old preference question" },
|
||||
{ role: "assistant", content: "old answer" },
|
||||
{ role: "user", content: latestUserText },
|
||||
],
|
||||
},
|
||||
{},
|
||||
);
|
||||
|
||||
@@ -385,9 +416,10 @@ describe("memory plugin e2e", () => {
|
||||
expect(ensureGlobalUndiciEnvProxyDispatcher).toHaveBeenCalledOnce();
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: "what editor should i use?",
|
||||
input: expectedRecallQuery,
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(expectedRecallQuery).toHaveLength(120);
|
||||
expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]);
|
||||
expect(limit).toHaveBeenCalledWith(3);
|
||||
expect(result).toMatchObject({
|
||||
@@ -1794,6 +1826,13 @@ describe("memory plugin e2e", () => {
|
||||
expect(shouldCapture(customTooLong, { maxChars: 1500 })).toBe(false);
|
||||
});
|
||||
|
||||
test("normalizeRecallQuery trims whitespace and bounds embedding input", async () => {
|
||||
expect(normalizeRecallQuery(" remember the blue mug ", 100)).toBe(
|
||||
"remember the blue mug",
|
||||
);
|
||||
expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120);
|
||||
});
|
||||
|
||||
test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => {
|
||||
const context = formatRelevantMemoriesContext([
|
||||
{
|
||||
|
||||
@@ -12,11 +12,15 @@ import OpenAI from "openai";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
||||
import { resolveLivePluginConfigObject } from "openclaw/plugin-sdk/plugin-config-runtime";
|
||||
import { ensureGlobalUndiciEnvProxyDispatcher } from "openclaw/plugin-sdk/runtime-env";
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
|
||||
import {
|
||||
normalizeLowercaseStringOrEmpty,
|
||||
truncateUtf16Safe,
|
||||
} from "openclaw/plugin-sdk/text-runtime";
|
||||
import { Type } from "typebox";
|
||||
import { definePluginEntry, type OpenClawPluginApi } from "./api.js";
|
||||
import {
|
||||
DEFAULT_CAPTURE_MAX_CHARS,
|
||||
DEFAULT_RECALL_MAX_CHARS,
|
||||
MEMORY_CATEGORIES,
|
||||
type MemoryCategory,
|
||||
memoryConfigSchema,
|
||||
@@ -78,6 +82,25 @@ function extractUserTextContent(message: unknown): string[] {
|
||||
return texts;
|
||||
}
|
||||
|
||||
function extractLatestUserText(messages: unknown[]): string | undefined {
|
||||
for (let index = messages.length - 1; index >= 0; index--) {
|
||||
const text = extractUserTextContent(messages[index]).join("\n").trim();
|
||||
if (text) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function normalizeRecallQuery(
|
||||
text: string,
|
||||
maxChars: number = DEFAULT_RECALL_MAX_CHARS,
|
||||
): string {
|
||||
const normalized = text.replace(/\s+/g, " ").trim();
|
||||
const limit = Math.max(0, Math.floor(maxChars));
|
||||
return normalized.length > limit ? truncateUtf16Safe(normalized, limit).trimEnd() : normalized;
|
||||
}
|
||||
|
||||
function messageFingerprint(message: unknown): string {
|
||||
const msgObj = asRecord(message);
|
||||
if (!msgObj) {
|
||||
@@ -430,7 +453,10 @@ export default definePluginEntry({
|
||||
async execute(_toolCallId, params) {
|
||||
const { query, limit = 5 } = params as { query: string; limit?: number };
|
||||
|
||||
const vector = await embeddings.embed(query);
|
||||
const currentCfg = resolveCurrentHookConfig();
|
||||
const vector = await embeddings.embed(
|
||||
normalizeRecallQuery(query, currentCfg.recallMaxChars),
|
||||
);
|
||||
const results = await db.search(vector, limit, 0.1);
|
||||
|
||||
if (results.length === 0) {
|
||||
@@ -549,7 +575,10 @@ export default definePluginEntry({
|
||||
}
|
||||
|
||||
if (query) {
|
||||
const vector = await embeddings.embed(query);
|
||||
const currentCfg = resolveCurrentHookConfig();
|
||||
const vector = await embeddings.embed(
|
||||
normalizeRecallQuery(query, currentCfg.recallMaxChars),
|
||||
);
|
||||
const results = await db.search(vector, 5, 0.7);
|
||||
|
||||
if (results.length === 0) {
|
||||
@@ -621,7 +650,7 @@ export default definePluginEntry({
|
||||
.argument("<query>", "Search query")
|
||||
.option("--limit <n>", "Max results", "5")
|
||||
.action(async (query, opts) => {
|
||||
const vector = await embeddings.embed(query);
|
||||
const vector = await embeddings.embed(normalizeRecallQuery(query, cfg.recallMaxChars));
|
||||
const results = await db.search(vector, Number.parseInt(opts.limit, 10), 0.3);
|
||||
// Strip vectors for output
|
||||
const output = results.map((r) => ({
|
||||
@@ -660,7 +689,12 @@ export default definePluginEntry({
|
||||
}
|
||||
|
||||
try {
|
||||
const vector = await embeddings.embed(event.prompt);
|
||||
const recallQuery = normalizeRecallQuery(
|
||||
extractLatestUserText(Array.isArray(event.messages) ? event.messages : []) ??
|
||||
event.prompt,
|
||||
currentCfg.recallMaxChars,
|
||||
);
|
||||
const vector = await embeddings.embed(recallQuery);
|
||||
const results = await db.search(vector, 3, 0.3);
|
||||
|
||||
if (results.length === 0) {
|
||||
|
||||
@@ -48,6 +48,12 @@
|
||||
"advanced": true,
|
||||
"placeholder": "500"
|
||||
},
|
||||
"recallMaxChars": {
|
||||
"label": "Recall Query Max Chars",
|
||||
"help": "Maximum prompt/query length embedded for memory recall. Lower for small local embedding models.",
|
||||
"advanced": true,
|
||||
"placeholder": "1000"
|
||||
},
|
||||
"storageOptions": {
|
||||
"label": "Storage Options",
|
||||
"advanced": true,
|
||||
@@ -94,6 +100,11 @@
|
||||
"minimum": 100,
|
||||
"maximum": 10000
|
||||
},
|
||||
"recallMaxChars": {
|
||||
"type": "number",
|
||||
"minimum": 100,
|
||||
"maximum": 10000
|
||||
},
|
||||
"storageOptions": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
|
||||
Reference in New Issue
Block a user