diff --git a/CHANGELOG.md b/CHANGELOG.md index ba50079a58d..c2c6148060d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai - Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt. - Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden. - Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system. +- Memory/QMD: skip QMD vector status probes and embedding maintenance in lexical `searchMode: "search"`, so BM25-only QMD setups on ARM do not trigger llama.cpp/Vulkan builds during status checks or embed cycles. Fixes #59234 and #67113. Thanks @PrinceOfEgypt, @Vksh07, @Snipe76, @NomLom, @t4r3e2q1-commits, and @dmak. - Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia. - Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine. - Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy. diff --git a/docs/cli/memory.md b/docs/cli/memory.md index 7f0ad65ae28..3dba66822d9 100644 --- a/docs/cli/memory.md +++ b/docs/cli/memory.md @@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose `memory status`: -- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. +- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. QMD lexical `searchMode: "search"` skips semantic vector probes and embedding maintenance even with `--deep`. - `--index`: run a reindex if the store is dirty (implies `--deep`). - `--fix`: repair stale recall locks and normalize promotion metadata. - `--json`: print JSON output. diff --git a/docs/concepts/memory-qmd.md b/docs/concepts/memory-qmd.md index 7fef71b0585..10f740de4c7 100644 --- a/docs/concepts/memory-qmd.md +++ b/docs/concepts/memory-qmd.md @@ -51,13 +51,15 @@ present. ## How the sidecar works - OpenClaw creates collections from your workspace memory files and any - configured `memory.qmd.paths`, then runs `qmd update` + `qmd embed` on boot - and periodically (default every 5 minutes). + configured `memory.qmd.paths`, then runs `qmd update` on boot and + periodically (default every 5 minutes). Semantic modes also run `qmd embed`. - The default workspace collection tracks `MEMORY.md` plus the `memory/` tree. Lowercase `memory.md` is not indexed as a root memory file. - Boot refresh runs in the background so chat startup is not blocked. - Searches use the configured `searchMode` (default: `search`; also supports - `vsearch` and `query`). If a mode fails, OpenClaw retries with `qmd query`. + `vsearch` and `query`). `search` is BM25-only, so OpenClaw skips semantic + vector readiness probes and embedding maintenance in that mode. If a mode + fails, OpenClaw retries with `qmd query`. - If QMD fails entirely, OpenClaw falls back to the builtin SQLite engine. @@ -164,6 +166,11 @@ runs as a service, create a symlink: **First search very slow?** QMD downloads GGUF models on first use. Pre-warm with `qmd query "test"` using the same XDG dirs OpenClaw uses. +**BM25-only QMD still trying to build llama.cpp?** Set +`memory.qmd.searchMode = "search"`. OpenClaw treats that mode as lexical-only, +does not run QMD vector status probes or embedding maintenance, and leaves +semantic readiness checks to `vsearch` or `query` setups. + **Search times out?** Increase `memory.qmd.limits.timeoutMs` (default: 4000ms). Set to `120000` for slower hardware. diff --git a/docs/reference/memory-config.md b/docs/reference/memory-config.md index 88ee5f5c09b..ecdba3a7af1 100644 --- a/docs/reference/memory-config.md +++ b/docs/reference/memory-config.md @@ -449,6 +449,8 @@ Set `memory.backend = "qmd"` to enable. All QMD settings live under `memory.qmd` | `sessions.retentionDays` | `number` | -- | Transcript retention | | `sessions.exportDir` | `string` | -- | Export directory | +`searchMode: "search"` is lexical/BM25-only. OpenClaw does not run semantic vector readiness probes or QMD embedding maintenance for that mode, including during `memory status --deep`; `vsearch` and `query` continue to require QMD vector readiness and embeddings. + OpenClaw prefers the current QMD collection and MCP query shapes, but keeps older QMD releases working by falling back to legacy `--mask` collection flags and older MCP tool names when needed. diff --git a/extensions/memory-core/src/memory/qmd-manager.test.ts b/extensions/memory-core/src/memory/qmd-manager.test.ts index 0d3335f10c6..dbf13ae5c21 100644 --- a/extensions/memory-core/src/memory/qmd-manager.test.ts +++ b/extensions/memory-core/src/memory/qmd-manager.test.ts @@ -288,16 +288,16 @@ describe("QmdMemoryManager", () => { const baselineCalls = spawnMock.mock.calls.length; await manager.sync({ reason: "manual" }); - expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2); + expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1); await manager.sync({ reason: "manual-again" }); - expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2); + expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1); (manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt = Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10; await manager.sync({ reason: "after-wait" }); - expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3); + expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2); await manager.close(); }); @@ -1975,6 +1975,7 @@ describe("QmdMemoryManager", () => { backend: "qmd", qmd: { includeDefaultMemory: false, + searchMode: "query", update: { interval: "0s", debounceMs: 0, @@ -3418,7 +3419,7 @@ describe("QmdMemoryManager", () => { await manager.close(); }); - it("arms periodic embed maintenance in search mode", async () => { + it("skips periodic embed maintenance in lexical search mode", async () => { vi.useFakeTimers(); cfg = { ...cfg, @@ -3445,7 +3446,7 @@ describe("QmdMemoryManager", () => { const commandCalls = spawnMock.mock.calls .map((call: unknown[]) => call[1] as string[]) .filter((args: string[]) => args[0] === "update" || args[0] === "embed"); - expect(commandCalls).toEqual([["update"], ["embed"]]); + expect(commandCalls).toEqual([]); await manager.close(); }); @@ -3498,6 +3499,18 @@ describe("QmdMemoryManager", () => { it("serializes qmd embeds within a process before taking the shared file lock", async () => { vi.useFakeTimers(); + cfg = { + ...cfg, + memory: { + backend: "qmd", + qmd: { + includeDefaultMemory: false, + searchMode: "query", + update: { interval: "0s", debounceMs: 0, onBoot: false }, + paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }], + }, + }, + } as OpenClawConfig; const embedChildren: MockChild[] = []; spawnMock.mockImplementation((_cmd: string, args: string[]) => { if (args[0] === "embed") { @@ -3682,7 +3695,7 @@ describe("QmdMemoryManager", () => { } }); - it("runs qmd embed in search mode for forced sync", async () => { + it("skips qmd embed in lexical search mode for forced sync", async () => { cfg = { ...cfg, memory: { @@ -3702,7 +3715,7 @@ describe("QmdMemoryManager", () => { const commandCalls = spawnMock.mock.calls .map((call: unknown[]) => call[1] as string[]) .filter((args: string[]) => args[0] === "update" || args[0] === "embed"); - expect(commandCalls).toEqual([["update"], ["embed"]]); + expect(commandCalls).toEqual([["update"]]); await manager.close(); }); @@ -4617,7 +4630,15 @@ describe("QmdMemoryManager", () => { return createMockChild(); }); - const { manager } = await createManager(); + const { manager } = await createManager({ + cfg: { + ...cfg, + memory: { + ...cfg.memory, + qmd: { ...cfg.memory?.qmd, searchMode: "query" }, + }, + } as OpenClawConfig, + }); await expect(manager.probeVectorAvailability()).resolves.toBe(false); await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({ @@ -4642,7 +4663,15 @@ describe("QmdMemoryManager", () => { return createMockChild(); }); - const { manager } = await createManager(); + const { manager } = await createManager({ + cfg: { + ...cfg, + memory: { + ...cfg.memory, + qmd: { ...cfg.memory?.qmd, searchMode: "query" }, + }, + } as OpenClawConfig, + }); await expect(manager.probeVectorAvailability()).resolves.toBe(true); await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({ @@ -4657,6 +4686,32 @@ describe("QmdMemoryManager", () => { await manager.close(); }); + it("skips qmd status vector probes for lexical search mode", async () => { + const { manager } = await createManager({ + cfg: { + ...cfg, + memory: { + ...cfg.memory, + qmd: { ...cfg.memory?.qmd, searchMode: "search" }, + }, + } as OpenClawConfig, + }); + const baselineCalls = spawnMock.mock.calls.length; + + await expect(manager.probeVectorAvailability()).resolves.toBe(false); + await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({ + ok: false, + error: "QMD semantic vectors are unavailable", + }); + expect(spawnMock.mock.calls.length).toBe(baselineCalls); + expect(manager.status().vector).toEqual({ + enabled: false, + available: false, + loadError: undefined, + }); + await manager.close(); + }); + describe("model cache symlink", () => { let defaultModelsDir: string; let customModelsDir: string; diff --git a/extensions/memory-core/src/memory/qmd-manager.ts b/extensions/memory-core/src/memory/qmd-manager.ts index d525373225d..d0d10173003 100644 --- a/extensions/memory-core/src/memory/qmd-manager.ts +++ b/extensions/memory-core/src/memory/qmd-manager.ts @@ -87,6 +87,10 @@ const IGNORED_MEMORY_WATCH_DIR_NAMES = new Set([ "__pycache__", ]); +function qmdUsesVectors(searchMode: ResolvedQmdConfig["searchMode"]): boolean { + return searchMode !== "search"; +} + function isDefaultMemoryPath(relPath: string): boolean { const normalized = relPath.trim().replace(/^\.\//, "").replace(/\\/g, "/"); if (!normalized) { @@ -1326,7 +1330,7 @@ export class QmdMemoryManager implements MemorySearchManager { sources: Array.from(this.sources), sourceCounts: counts.sourceCounts, vector: { - enabled: true, + enabled: qmdUsesVectors(this.qmd.searchMode), available: this.vectorAvailable ?? undefined, loadError: this.vectorStatusDetail ?? undefined, }, @@ -1357,6 +1361,11 @@ export class QmdMemoryManager implements MemorySearchManager { } async probeVectorAvailability(): Promise { + if (!qmdUsesVectors(this.qmd.searchMode)) { + this.vectorAvailable = false; + this.vectorStatusDetail = null; + return false; + } try { const result = await this.runQmd(["status"], { timeoutMs: Math.min(this.qmd.limits.timeoutMs, 5_000), @@ -1597,9 +1606,9 @@ export class QmdMemoryManager implements MemorySearchManager { } private shouldRunEmbed(force?: boolean): boolean { - // Keep embeddings current regardless of the active retrieval mode. - // Search-mode indexing still needs vectors so later mode switches and - // hybrid flows do not inherit an incomplete QMD index. + if (!qmdUsesVectors(this.qmd.searchMode)) { + return false; + } const now = Date.now(); if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) { return false; @@ -1613,6 +1622,9 @@ export class QmdMemoryManager implements MemorySearchManager { } private shouldScheduleEmbedTimer(): boolean { + if (!qmdUsesVectors(this.qmd.searchMode)) { + return false; + } const embedIntervalMs = this.qmd.update.embedIntervalMs; if (embedIntervalMs <= 0) { return false;