mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:30:43 +00:00
fix(memory): skip qmd vectors in lexical mode
This commit is contained in:
@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt.
|
||||
- Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden.
|
||||
- Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system.
|
||||
- Memory/QMD: skip QMD vector status probes and embedding maintenance in lexical `searchMode: "search"`, so BM25-only QMD setups on ARM do not trigger llama.cpp/Vulkan builds during status checks or embed cycles. Fixes #59234 and #67113. Thanks @PrinceOfEgypt, @Vksh07, @Snipe76, @NomLom, @t4r3e2q1-commits, and @dmak.
|
||||
- Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia.
|
||||
- Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine.
|
||||
- Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy.
|
||||
|
||||
@@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose
|
||||
|
||||
`memory status`:
|
||||
|
||||
- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping.
|
||||
- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. QMD lexical `searchMode: "search"` skips semantic vector probes and embedding maintenance even with `--deep`.
|
||||
- `--index`: run a reindex if the store is dirty (implies `--deep`).
|
||||
- `--fix`: repair stale recall locks and normalize promotion metadata.
|
||||
- `--json`: print JSON output.
|
||||
|
||||
@@ -51,13 +51,15 @@ present.
|
||||
## How the sidecar works
|
||||
|
||||
- OpenClaw creates collections from your workspace memory files and any
|
||||
configured `memory.qmd.paths`, then runs `qmd update` + `qmd embed` on boot
|
||||
and periodically (default every 5 minutes).
|
||||
configured `memory.qmd.paths`, then runs `qmd update` on boot and
|
||||
periodically (default every 5 minutes). Semantic modes also run `qmd embed`.
|
||||
- The default workspace collection tracks `MEMORY.md` plus the `memory/`
|
||||
tree. Lowercase `memory.md` is not indexed as a root memory file.
|
||||
- Boot refresh runs in the background so chat startup is not blocked.
|
||||
- Searches use the configured `searchMode` (default: `search`; also supports
|
||||
`vsearch` and `query`). If a mode fails, OpenClaw retries with `qmd query`.
|
||||
`vsearch` and `query`). `search` is BM25-only, so OpenClaw skips semantic
|
||||
vector readiness probes and embedding maintenance in that mode. If a mode
|
||||
fails, OpenClaw retries with `qmd query`.
|
||||
- If QMD fails entirely, OpenClaw falls back to the builtin SQLite engine.
|
||||
|
||||
<Info>
|
||||
@@ -164,6 +166,11 @@ runs as a service, create a symlink:
|
||||
**First search very slow?** QMD downloads GGUF models on first use. Pre-warm
|
||||
with `qmd query "test"` using the same XDG dirs OpenClaw uses.
|
||||
|
||||
**BM25-only QMD still trying to build llama.cpp?** Set
|
||||
`memory.qmd.searchMode = "search"`. OpenClaw treats that mode as lexical-only,
|
||||
does not run QMD vector status probes or embedding maintenance, and leaves
|
||||
semantic readiness checks to `vsearch` or `query` setups.
|
||||
|
||||
**Search times out?** Increase `memory.qmd.limits.timeoutMs` (default: 4000ms).
|
||||
Set to `120000` for slower hardware.
|
||||
|
||||
|
||||
@@ -449,6 +449,8 @@ Set `memory.backend = "qmd"` to enable. All QMD settings live under `memory.qmd`
|
||||
| `sessions.retentionDays` | `number` | -- | Transcript retention |
|
||||
| `sessions.exportDir` | `string` | -- | Export directory |
|
||||
|
||||
`searchMode: "search"` is lexical/BM25-only. OpenClaw does not run semantic vector readiness probes or QMD embedding maintenance for that mode, including during `memory status --deep`; `vsearch` and `query` continue to require QMD vector readiness and embeddings.
|
||||
|
||||
OpenClaw prefers the current QMD collection and MCP query shapes, but keeps older QMD releases working by falling back to legacy `--mask` collection flags and older MCP tool names when needed.
|
||||
|
||||
<Note>
|
||||
|
||||
@@ -288,16 +288,16 @@ describe("QmdMemoryManager", () => {
|
||||
const baselineCalls = spawnMock.mock.calls.length;
|
||||
|
||||
await manager.sync({ reason: "manual" });
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
|
||||
|
||||
await manager.sync({ reason: "manual-again" });
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
|
||||
|
||||
(manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt =
|
||||
Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10;
|
||||
|
||||
await manager.sync({ reason: "after-wait" });
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3);
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
|
||||
|
||||
await manager.close();
|
||||
});
|
||||
@@ -1975,6 +1975,7 @@ describe("QmdMemoryManager", () => {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
searchMode: "query",
|
||||
update: {
|
||||
interval: "0s",
|
||||
debounceMs: 0,
|
||||
@@ -3418,7 +3419,7 @@ describe("QmdMemoryManager", () => {
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("arms periodic embed maintenance in search mode", async () => {
|
||||
it("skips periodic embed maintenance in lexical search mode", async () => {
|
||||
vi.useFakeTimers();
|
||||
cfg = {
|
||||
...cfg,
|
||||
@@ -3445,7 +3446,7 @@ describe("QmdMemoryManager", () => {
|
||||
const commandCalls = spawnMock.mock.calls
|
||||
.map((call: unknown[]) => call[1] as string[])
|
||||
.filter((args: string[]) => args[0] === "update" || args[0] === "embed");
|
||||
expect(commandCalls).toEqual([["update"], ["embed"]]);
|
||||
expect(commandCalls).toEqual([]);
|
||||
|
||||
await manager.close();
|
||||
});
|
||||
@@ -3498,6 +3499,18 @@ describe("QmdMemoryManager", () => {
|
||||
|
||||
it("serializes qmd embeds within a process before taking the shared file lock", async () => {
|
||||
vi.useFakeTimers();
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
backend: "qmd",
|
||||
qmd: {
|
||||
includeDefaultMemory: false,
|
||||
searchMode: "query",
|
||||
update: { interval: "0s", debounceMs: 0, onBoot: false },
|
||||
paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
const embedChildren: MockChild[] = [];
|
||||
spawnMock.mockImplementation((_cmd: string, args: string[]) => {
|
||||
if (args[0] === "embed") {
|
||||
@@ -3682,7 +3695,7 @@ describe("QmdMemoryManager", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("runs qmd embed in search mode for forced sync", async () => {
|
||||
it("skips qmd embed in lexical search mode for forced sync", async () => {
|
||||
cfg = {
|
||||
...cfg,
|
||||
memory: {
|
||||
@@ -3702,7 +3715,7 @@ describe("QmdMemoryManager", () => {
|
||||
const commandCalls = spawnMock.mock.calls
|
||||
.map((call: unknown[]) => call[1] as string[])
|
||||
.filter((args: string[]) => args[0] === "update" || args[0] === "embed");
|
||||
expect(commandCalls).toEqual([["update"], ["embed"]]);
|
||||
expect(commandCalls).toEqual([["update"]]);
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
@@ -4617,7 +4630,15 @@ describe("QmdMemoryManager", () => {
|
||||
return createMockChild();
|
||||
});
|
||||
|
||||
const { manager } = await createManager();
|
||||
const { manager } = await createManager({
|
||||
cfg: {
|
||||
...cfg,
|
||||
memory: {
|
||||
...cfg.memory,
|
||||
qmd: { ...cfg.memory?.qmd, searchMode: "query" },
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
});
|
||||
|
||||
await expect(manager.probeVectorAvailability()).resolves.toBe(false);
|
||||
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
|
||||
@@ -4642,7 +4663,15 @@ describe("QmdMemoryManager", () => {
|
||||
return createMockChild();
|
||||
});
|
||||
|
||||
const { manager } = await createManager();
|
||||
const { manager } = await createManager({
|
||||
cfg: {
|
||||
...cfg,
|
||||
memory: {
|
||||
...cfg.memory,
|
||||
qmd: { ...cfg.memory?.qmd, searchMode: "query" },
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
});
|
||||
|
||||
await expect(manager.probeVectorAvailability()).resolves.toBe(true);
|
||||
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
|
||||
@@ -4657,6 +4686,32 @@ describe("QmdMemoryManager", () => {
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
it("skips qmd status vector probes for lexical search mode", async () => {
|
||||
const { manager } = await createManager({
|
||||
cfg: {
|
||||
...cfg,
|
||||
memory: {
|
||||
...cfg.memory,
|
||||
qmd: { ...cfg.memory?.qmd, searchMode: "search" },
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
});
|
||||
const baselineCalls = spawnMock.mock.calls.length;
|
||||
|
||||
await expect(manager.probeVectorAvailability()).resolves.toBe(false);
|
||||
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
|
||||
ok: false,
|
||||
error: "QMD semantic vectors are unavailable",
|
||||
});
|
||||
expect(spawnMock.mock.calls.length).toBe(baselineCalls);
|
||||
expect(manager.status().vector).toEqual({
|
||||
enabled: false,
|
||||
available: false,
|
||||
loadError: undefined,
|
||||
});
|
||||
await manager.close();
|
||||
});
|
||||
|
||||
describe("model cache symlink", () => {
|
||||
let defaultModelsDir: string;
|
||||
let customModelsDir: string;
|
||||
|
||||
@@ -87,6 +87,10 @@ const IGNORED_MEMORY_WATCH_DIR_NAMES = new Set([
|
||||
"__pycache__",
|
||||
]);
|
||||
|
||||
function qmdUsesVectors(searchMode: ResolvedQmdConfig["searchMode"]): boolean {
|
||||
return searchMode !== "search";
|
||||
}
|
||||
|
||||
function isDefaultMemoryPath(relPath: string): boolean {
|
||||
const normalized = relPath.trim().replace(/^\.\//, "").replace(/\\/g, "/");
|
||||
if (!normalized) {
|
||||
@@ -1326,7 +1330,7 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
sources: Array.from(this.sources),
|
||||
sourceCounts: counts.sourceCounts,
|
||||
vector: {
|
||||
enabled: true,
|
||||
enabled: qmdUsesVectors(this.qmd.searchMode),
|
||||
available: this.vectorAvailable ?? undefined,
|
||||
loadError: this.vectorStatusDetail ?? undefined,
|
||||
},
|
||||
@@ -1357,6 +1361,11 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
}
|
||||
|
||||
async probeVectorAvailability(): Promise<boolean> {
|
||||
if (!qmdUsesVectors(this.qmd.searchMode)) {
|
||||
this.vectorAvailable = false;
|
||||
this.vectorStatusDetail = null;
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const result = await this.runQmd(["status"], {
|
||||
timeoutMs: Math.min(this.qmd.limits.timeoutMs, 5_000),
|
||||
@@ -1597,9 +1606,9 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
}
|
||||
|
||||
private shouldRunEmbed(force?: boolean): boolean {
|
||||
// Keep embeddings current regardless of the active retrieval mode.
|
||||
// Search-mode indexing still needs vectors so later mode switches and
|
||||
// hybrid flows do not inherit an incomplete QMD index.
|
||||
if (!qmdUsesVectors(this.qmd.searchMode)) {
|
||||
return false;
|
||||
}
|
||||
const now = Date.now();
|
||||
if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
|
||||
return false;
|
||||
@@ -1613,6 +1622,9 @@ export class QmdMemoryManager implements MemorySearchManager {
|
||||
}
|
||||
|
||||
private shouldScheduleEmbedTimer(): boolean {
|
||||
if (!qmdUsesVectors(this.qmd.searchMode)) {
|
||||
return false;
|
||||
}
|
||||
const embedIntervalMs = this.qmd.update.embedIntervalMs;
|
||||
if (embedIntervalMs <= 0) {
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user