diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba50079a58d..c2c6148060d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
 - Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt.
 - Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden.
 - Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system.
+- Memory/QMD: skip QMD vector status probes and embedding maintenance in lexical `searchMode: "search"`, so BM25-only QMD setups on ARM do not trigger llama.cpp/Vulkan builds during status checks or embed cycles. Fixes #59234 and #67113. Thanks @PrinceOfEgypt, @Vksh07, @Snipe76, @NomLom, @t4r3e2q1-commits, and @dmak.
 - Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia.
 - Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine.
 - Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy.
diff --git a/docs/cli/memory.md b/docs/cli/memory.md
index 7f0ad65ae28..3dba66822d9 100644
--- a/docs/cli/memory.md
+++ b/docs/cli/memory.md
@@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose
 
 `memory status`:
 
-- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping.
+- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. QMD lexical `searchMode: "search"` skips semantic vector probes and embedding maintenance even with `--deep`.
 - `--index`: run a reindex if the store is dirty (implies `--deep`).
 - `--fix`: repair stale recall locks and normalize promotion metadata.
 - `--json`: print JSON output.
diff --git a/docs/concepts/memory-qmd.md b/docs/concepts/memory-qmd.md
index 7fef71b0585..10f740de4c7 100644
--- a/docs/concepts/memory-qmd.md
+++ b/docs/concepts/memory-qmd.md
@@ -51,13 +51,15 @@ present.
 ## How the sidecar works
 
 - OpenClaw creates collections from your workspace memory files and any
-  configured `memory.qmd.paths`, then runs `qmd update` + `qmd embed` on boot
-  and periodically (default every 5 minutes).
+  configured `memory.qmd.paths`, then runs `qmd update` on boot and
+  periodically (default every 5 minutes). Semantic modes also run `qmd embed`.
 - The default workspace collection tracks `MEMORY.md` plus the `memory/`
   tree. Lowercase `memory.md` is not indexed as a root memory file.
 - Boot refresh runs in the background so chat startup is not blocked.
 - Searches use the configured `searchMode` (default: `search`; also supports
-  `vsearch` and `query`). If a mode fails, OpenClaw retries with `qmd query`.
+  `vsearch` and `query`). `search` is BM25-only, so OpenClaw skips semantic
+  vector readiness probes and embedding maintenance in that mode. If a mode
+  fails, OpenClaw retries with `qmd query`.
 - If QMD fails entirely, OpenClaw falls back to the builtin SQLite engine.
 
 <Info>
@@ -164,6 +166,11 @@ runs as a service, create a symlink:
 **First search very slow?** QMD downloads GGUF models on first use. Pre-warm
 with `qmd query "test"` using the same XDG dirs OpenClaw uses.
 
+**BM25-only QMD still trying to build llama.cpp?** Set
+`memory.qmd.searchMode = "search"`. OpenClaw treats that mode as lexical-only,
+does not run QMD vector status probes or embedding maintenance, and leaves
+semantic readiness checks to `vsearch` or `query` setups.
+
 **Search times out?** Increase `memory.qmd.limits.timeoutMs` (default: 4000ms).
 Set to `120000` for slower hardware.
 
diff --git a/docs/reference/memory-config.md b/docs/reference/memory-config.md
index 88ee5f5c09b..ecdba3a7af1 100644
--- a/docs/reference/memory-config.md
+++ b/docs/reference/memory-config.md
@@ -449,6 +449,8 @@ Set `memory.backend = "qmd"` to enable. All QMD settings live under `memory.qmd`
 | `sessions.retentionDays` | `number`  | --       | Transcript retention                         |
 | `sessions.exportDir`     | `string`  | --       | Export directory                             |
 
+`searchMode: "search"` is lexical/BM25-only. OpenClaw does not run semantic vector readiness probes or QMD embedding maintenance for that mode, including during `memory status --deep`; `vsearch` and `query` continue to require QMD vector readiness and embeddings.
+
 OpenClaw prefers the current QMD collection and MCP query shapes, but keeps older QMD releases working by falling back to legacy `--mask` collection flags and older MCP tool names when needed.
 
 <Note>
diff --git a/extensions/memory-core/src/memory/qmd-manager.test.ts b/extensions/memory-core/src/memory/qmd-manager.test.ts
index 0d3335f10c6..dbf13ae5c21 100644
--- a/extensions/memory-core/src/memory/qmd-manager.test.ts
+++ b/extensions/memory-core/src/memory/qmd-manager.test.ts
@@ -288,16 +288,16 @@ describe("QmdMemoryManager", () => {
     const baselineCalls = spawnMock.mock.calls.length;
 
     await manager.sync({ reason: "manual" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
 
     await manager.sync({ reason: "manual-again" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
 
     (manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt =
       Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10;
 
     await manager.sync({ reason: "after-wait" });
-    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3);
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
 
     await manager.close();
   });
@@ -1975,6 +1975,7 @@ describe("QmdMemoryManager", () => {
         backend: "qmd",
         qmd: {
           includeDefaultMemory: false,
+          searchMode: "query",
           update: {
             interval: "0s",
             debounceMs: 0,
@@ -3418,7 +3419,7 @@ describe("QmdMemoryManager", () => {
     await manager.close();
   });
 
-  it("arms periodic embed maintenance in search mode", async () => {
+  it("skips periodic embed maintenance in lexical search mode", async () => {
     vi.useFakeTimers();
     cfg = {
       ...cfg,
@@ -3445,7 +3446,7 @@ describe("QmdMemoryManager", () => {
     const commandCalls = spawnMock.mock.calls
       .map((call: unknown[]) => call[1] as string[])
       .filter((args: string[]) => args[0] === "update" || args[0] === "embed");
-    expect(commandCalls).toEqual([["update"], ["embed"]]);
+    expect(commandCalls).toEqual([]);
 
     await manager.close();
   });
@@ -3498,6 +3499,18 @@ describe("QmdMemoryManager", () => {
 
   it("serializes qmd embeds within a process before taking the shared file lock", async () => {
     vi.useFakeTimers();
+    cfg = {
+      ...cfg,
+      memory: {
+        backend: "qmd",
+        qmd: {
+          includeDefaultMemory: false,
+          searchMode: "query",
+          update: { interval: "0s", debounceMs: 0, onBoot: false },
+          paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
+        },
+      },
+    } as OpenClawConfig;
     const embedChildren: MockChild[] = [];
     spawnMock.mockImplementation((_cmd: string, args: string[]) => {
       if (args[0] === "embed") {
@@ -3682,7 +3695,7 @@ describe("QmdMemoryManager", () => {
     }
   });
 
-  it("runs qmd embed in search mode for forced sync", async () => {
+  it("skips qmd embed in lexical search mode for forced sync", async () => {
     cfg = {
       ...cfg,
       memory: {
@@ -3702,7 +3715,7 @@ describe("QmdMemoryManager", () => {
     const commandCalls = spawnMock.mock.calls
       .map((call: unknown[]) => call[1] as string[])
       .filter((args: string[]) => args[0] === "update" || args[0] === "embed");
-    expect(commandCalls).toEqual([["update"], ["embed"]]);
+    expect(commandCalls).toEqual([["update"]]);
     await manager.close();
   });
 
@@ -4617,7 +4630,15 @@ describe("QmdMemoryManager", () => {
       return createMockChild();
     });
 
-    const { manager } = await createManager();
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "query" },
+        },
+      } as OpenClawConfig,
+    });
 
     await expect(manager.probeVectorAvailability()).resolves.toBe(false);
     await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4642,7 +4663,15 @@ describe("QmdMemoryManager", () => {
       return createMockChild();
     });
 
-    const { manager } = await createManager();
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "query" },
+        },
+      } as OpenClawConfig,
+    });
 
     await expect(manager.probeVectorAvailability()).resolves.toBe(true);
     await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4657,6 +4686,32 @@ describe("QmdMemoryManager", () => {
     await manager.close();
   });
 
+  it("skips qmd status vector probes for lexical search mode", async () => {
+    const { manager } = await createManager({
+      cfg: {
+        ...cfg,
+        memory: {
+          ...cfg.memory,
+          qmd: { ...cfg.memory?.qmd, searchMode: "search" },
+        },
+      } as OpenClawConfig,
+    });
+    const baselineCalls = spawnMock.mock.calls.length;
+
+    await expect(manager.probeVectorAvailability()).resolves.toBe(false);
+    await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
+      ok: false,
+      error: "QMD semantic vectors are unavailable",
+    });
+    expect(spawnMock.mock.calls.length).toBe(baselineCalls);
+    expect(manager.status().vector).toEqual({
+      enabled: false,
+      available: false,
+      loadError: undefined,
+    });
+    await manager.close();
+  });
+
   describe("model cache symlink", () => {
     let defaultModelsDir: string;
     let customModelsDir: string;
diff --git a/extensions/memory-core/src/memory/qmd-manager.ts b/extensions/memory-core/src/memory/qmd-manager.ts
index d525373225d..d0d10173003 100644
--- a/extensions/memory-core/src/memory/qmd-manager.ts
+++ b/extensions/memory-core/src/memory/qmd-manager.ts
@@ -87,6 +87,10 @@ const IGNORED_MEMORY_WATCH_DIR_NAMES = new Set([
   "__pycache__",
 ]);
 
+function qmdUsesVectors(searchMode: ResolvedQmdConfig["searchMode"]): boolean {
+  return searchMode !== "search";
+}
+
 function isDefaultMemoryPath(relPath: string): boolean {
   const normalized = relPath.trim().replace(/^\.\//, "").replace(/\\/g, "/");
   if (!normalized) {
@@ -1326,7 +1330,7 @@ export class QmdMemoryManager implements MemorySearchManager {
       sources: Array.from(this.sources),
       sourceCounts: counts.sourceCounts,
       vector: {
-        enabled: true,
+        enabled: qmdUsesVectors(this.qmd.searchMode),
         available: this.vectorAvailable ?? undefined,
         loadError: this.vectorStatusDetail ?? undefined,
       },
@@ -1357,6 +1361,11 @@ export class QmdMemoryManager implements MemorySearchManager {
   }
 
   async probeVectorAvailability(): Promise<boolean> {
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      this.vectorAvailable = false;
+      this.vectorStatusDetail = null;
+      return false;
+    }
     try {
       const result = await this.runQmd(["status"], {
         timeoutMs: Math.min(this.qmd.limits.timeoutMs, 5_000),
@@ -1597,9 +1606,9 @@ export class QmdMemoryManager implements MemorySearchManager {
   }
 
   private shouldRunEmbed(force?: boolean): boolean {
-    // Keep embeddings current regardless of the active retrieval mode.
-    // Search-mode indexing still needs vectors so later mode switches and
-    // hybrid flows do not inherit an incomplete QMD index.
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      return false;
+    }
     const now = Date.now();
     if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
       return false;
@@ -1613,6 +1622,9 @@ export class QmdMemoryManager implements MemorySearchManager {
   }
 
   private shouldScheduleEmbedTimer(): boolean {
+    if (!qmdUsesVectors(this.qmd.searchMode)) {
+      return false;
+    }
     const embedIntervalMs = this.qmd.update.embedIntervalMs;
     if (embedIntervalMs <= 0) {
       return false;