fix(memory): skip qmd vectors in lexical mode

2026-05-06 11:30:43 +00:00 · 2026-04-27 14:09:32 +01:00
parent 6a0dc3a9bc
commit b181930c23
6 changed files with 94 additions and 17 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
 - Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt.
 - Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden.
 - Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system.
+- Memory/QMD: skip QMD vector status probes and embedding maintenance in lexical `searchMode: "search"`, so BM25-only QMD setups on ARM do not trigger llama.cpp/Vulkan builds during status checks or embed cycles. Fixes #59234 and #67113. Thanks @PrinceOfEgypt, @Vksh07, @Snipe76, @NomLom, @t4r3e2q1-commits, and @dmak.
 - Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia.
 - Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine.
 - Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy.
--- a/docs/cli/memory.md
+++ b/docs/cli/memory.md
@@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose

 `memory status`:

- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping.
+- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. QMD lexical `searchMode: "search"` skips semantic vector probes and embedding maintenance even with `--deep`.
 - `--index`: run a reindex if the store is dirty (implies `--deep`).
 - `--fix`: repair stale recall locks and normalize promotion metadata.
 - `--json`: print JSON output.
--- a/docs/concepts/memory-qmd.md
+++ b/docs/concepts/memory-qmd.md
@@ -51,13 +51,15 @@ present.
 ## How the sidecar works

 - OpenClaw creates collections from your workspace memory files and any
-  configured `memory.qmd.paths`, then runs `qmd update` + `qmd embed` on boot
-  and periodically (default every 5 minutes).
+  configured `memory.qmd.paths`, then runs `qmd update` on boot and
+  periodically (default every 5 minutes). Semantic modes also run `qmd embed`.
 - The default workspace collection tracks `MEMORY.md` plus the `memory/`
  tree. Lowercase `memory.md` is not indexed as a root memory file.
 - Boot refresh runs in the background so chat startup is not blocked.
 - Searches use the configured `searchMode` (default: `search`; also supports
-  `vsearch` and `query`). If a mode fails, OpenClaw retries with `qmd query`.
+  `vsearch` and `query`). `search` is BM25-only, so OpenClaw skips semantic
+  vector readiness probes and embedding maintenance in that mode. If a mode
+  fails, OpenClaw retries with `qmd query`.
 - If QMD fails entirely, OpenClaw falls back to the builtin SQLite engine.

 <Info>
@@ -164,6 +166,11 @@ runs as a service, create a symlink:
 **First search very slow?** QMD downloads GGUF models on first use. Pre-warm
 with `qmd query "test"` using the same XDG dirs OpenClaw uses.

+**BM25-only QMD still trying to build llama.cpp?** Set
+`memory.qmd.searchMode = "search"`. OpenClaw treats that mode as lexical-only,
+does not run QMD vector status probes or embedding maintenance, and leaves
+semantic readiness checks to `vsearch` or `query` setups.
+
 **Search times out?** Increase `memory.qmd.limits.timeoutMs` (default: 4000ms).
 Set to `120000` for slower hardware.

--- a/docs/reference/memory-config.md
+++ b/docs/reference/memory-config.md
@@ -449,6 +449,8 @@ Set `memory.backend = "qmd"` to enable. All QMD settings live under `memory.qmd`
 | `sessions.retentionDays` | `number`  | --       | Transcript retention                         |
 | `sessions.exportDir`     | `string`  | --       | Export directory                             |

+`searchMode: "search"` is lexical/BM25-only. OpenClaw does not run semantic vector readiness probes or QMD embedding maintenance for that mode, including during `memory status --deep`; `vsearch` and `query` continue to require QMD vector readiness and embeddings.
+
 OpenClaw prefers the current QMD collection and MCP query shapes, but keeps older QMD releases working by falling back to legacy `--mask` collection flags and older MCP tool names when needed.

 <Note>
--- a/extensions/memory-core/src/memory/qmd-manager.test.ts
+++ b/extensions/memory-core/src/memory/qmd-manager.test.ts
@@ -288,16 +288,16 @@ describe("QmdMemoryManager", () => {
    const baselineCalls = spawnMock.mock.calls.length;

    await manager.sync({ reason: "manual" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);

    await manager.sync({ reason: "manual-again" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);

    (manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt =
      Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10;

    await manager.sync({ reason: "after-wait" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);

    await manager.close();
  });
@@ -1975,6 +1975,7 @@ describe("QmdMemoryManager", () => {
        backend: "qmd",
        qmd: {
          includeDefaultMemory: false,
+          searchMode: "query",
          update: {
            interval: "0s",
            debounceMs: 0,
@@ -3418,7 +3419,7 @@ describe("QmdMemoryManager", () => {
    await manager.close();
  });

-  it("arms periodic embed maintenance in search mode", async () => {
+  it("skips periodic embed maintenance in lexical search mode", async () => {
    vi.useFakeTimers();
    cfg = {
      ...cfg,
@@ -3445,7 +3446,7 @@ describe("QmdMemoryManager", () => {
    const commandCalls = spawnMock.mock.calls
      .map((call: unknown[]) => call[1] as string[])
      .filter((args: string[]) => args[0] === "update" || args[0] === "embed");
-    expect(commandCalls).toEqual([["update"], ["embed"]]);
+    expect(commandCalls).toEqual([]);

    await manager.close();
  });
@@ -3498,6 +3499,18 @@ describe("QmdMemoryManager", () => {

  it("serializes qmd embeds within a process before taking the shared file lock", async () => {
    vi.useFakeTimers();
+    cfg = {
+      ...cfg,
+      memory: {
+        backend: "qmd",
+        qmd: {
+          includeDefaultMemory: false,
+          searchMode: "query",
+          update: { interval: "0s", debounceMs: 0, onBoot: false },
+          paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
+        },
+      },
+    } as OpenClawConfig;
    const embedChildren: MockChild[] = [];
    spawnMock.mockImplementation((_cmd: string, args: string[]) => {
      if (args[0] === "embed") {
@@ -3682,7 +3695,7 @@ describe("QmdMemoryManager", () => {
    }
  });

-  it("runs qmd embed in search mode for forced sync", async () => {
+  it("skips qmd embed in lexical search mode for forced sync", async () => {
    cfg = {
      ...cfg,
      memory: {
@@ -3702,7 +3715,7 @@ describe("QmdMemoryManager", () => {
    const commandCalls = spawnMock.mock.calls
      .map((call: unknown[]) => call[1] as string[])
      .filter((args: string[]) => args[0] === "update" || args[0] === "embed");
-    expect(commandCalls).toEqual([["update"], ["embed"]]);
+    expect(commandCalls).toEqual([["update"]]);
    await manager.close();
  });

@@ -4617,7 +4630,15 @@ describe("QmdMemoryManager", () => {
      return createMockChild();
    });

-    const { manager } = await createManager();
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "query" },
+        },
+      } as OpenClawConfig,
+    });

    await expect(manager.probeVectorAvailability()).resolves.toBe(false);
    await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4642,7 +4663,15 @@ describe("QmdMemoryManager", () => {
      return createMockChild();
    });

-    const { manager } = await createManager();
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "query" },
+        },
+      } as OpenClawConfig,
+    });

    await expect(manager.probeVectorAvailability()).resolves.toBe(true);
    await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4657,6 +4686,32 @@ describe("QmdMemoryManager", () => {
    await manager.close();
  });

+  it("skips qmd status vector probes for lexical search mode", async () => {
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "search" },
+        },
+      } as OpenClawConfig,
+    });
+    const baselineCalls = spawnMock.mock.calls.length;
+
+    await expect(manager.probeVectorAvailability()).resolves.toBe(false);
+    await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
+      ok: false,
+      error: "QMD semantic vectors are unavailable",
+    });
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls);
+    expect(manager.status().vector).toEqual({
+      enabled: false,
+      available: false,
+      loadError: undefined,
+    });
+    await manager.close();
+  });
+
  describe("model cache symlink", () => {
    let defaultModelsDir: string;
    let customModelsDir: string;
--- a/extensions/memory-core/src/memory/qmd-manager.ts
+++ b/extensions/memory-core/src/memory/qmd-manager.ts
@@ -87,6 +87,10 @@ const IGNORED_MEMORY_WATCH_DIR_NAMES = new Set([
  "__pycache__",
 ]);

+function qmdUsesVectors(searchMode: ResolvedQmdConfig["searchMode"]): boolean {
+  return searchMode !== "search";
+}
+
 function isDefaultMemoryPath(relPath: string): boolean {
  const normalized = relPath.trim().replace(/^\.\//, "").replace(/\\/g, "/");
  if (!normalized) {
@@ -1326,7 +1330,7 @@ export class QmdMemoryManager implements MemorySearchManager {
      sources: Array.from(this.sources),
      sourceCounts: counts.sourceCounts,
      vector: {
-        enabled: true,
+        enabled: qmdUsesVectors(this.qmd.searchMode),
        available: this.vectorAvailable ?? undefined,
        loadError: this.vectorStatusDetail ?? undefined,
      },
@@ -1357,6 +1361,11 @@ export class QmdMemoryManager implements MemorySearchManager {
  }

  async probeVectorAvailability(): Promise<boolean> {
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      this.vectorAvailable = false;
+      this.vectorStatusDetail = null;
+      return false;
+    }
    try {
      const result = await this.runQmd(["status"], {
        timeoutMs: Math.min(this.qmd.limits.timeoutMs, 5_000),
@@ -1597,9 +1606,9 @@ export class QmdMemoryManager implements MemorySearchManager {
  }

  private shouldRunEmbed(force?: boolean): boolean {
-    // Keep embeddings current regardless of the active retrieval mode.
-    // Search-mode indexing still needs vectors so later mode switches and
-    // hybrid flows do not inherit an incomplete QMD index.
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      return false;
+    }
    const now = Date.now();
    if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
      return false;
@@ -1613,6 +1622,9 @@ export class QmdMemoryManager implements MemorySearchManager {
  }

  private shouldScheduleEmbedTimer(): boolean {
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      return false;
+    }
    const embedIntervalMs = this.qmd.update.embedIntervalMs;
    if (embedIntervalMs <= 0) {
      return false;