fix(memory): skip qmd vectors in lexical mode

This commit is contained in:
Peter Steinberger
2026-04-27 14:09:32 +01:00
parent 6a0dc3a9bc
commit b181930c23
6 changed files with 94 additions and 17 deletions

View File

@@ -55,6 +55,7 @@ Docs: https://docs.openclaw.ai
- Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt.
- Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden.
- Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system.
- Memory/QMD: skip QMD vector status probes and embedding maintenance in lexical `searchMode: "search"`, so BM25-only QMD setups on ARM do not trigger llama.cpp/Vulkan builds during status checks or embed cycles. Fixes #59234 and #67113. Thanks @PrinceOfEgypt, @Vksh07, @Snipe76, @NomLom, @t4r3e2q1-commits, and @dmak.
- Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia.
- Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine.
- Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy.

View File

@@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose
`memory status`:
- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping.
- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping. QMD lexical `searchMode: "search"` skips semantic vector probes and embedding maintenance even with `--deep`.
- `--index`: run a reindex if the store is dirty (implies `--deep`).
- `--fix`: repair stale recall locks and normalize promotion metadata.
- `--json`: print JSON output.

View File

@@ -51,13 +51,15 @@ present.
## How the sidecar works
- OpenClaw creates collections from your workspace memory files and any
configured `memory.qmd.paths`, then runs `qmd update` + `qmd embed` on boot
and periodically (default every 5 minutes).
configured `memory.qmd.paths`, then runs `qmd update` on boot and
periodically (default every 5 minutes). Semantic modes also run `qmd embed`.
- The default workspace collection tracks `MEMORY.md` plus the `memory/`
tree. Lowercase `memory.md` is not indexed as a root memory file.
- Boot refresh runs in the background so chat startup is not blocked.
- Searches use the configured `searchMode` (default: `search`; also supports
`vsearch` and `query`). If a mode fails, OpenClaw retries with `qmd query`.
`vsearch` and `query`). `search` is BM25-only, so OpenClaw skips semantic
vector readiness probes and embedding maintenance in that mode. If a mode
fails, OpenClaw retries with `qmd query`.
- If QMD fails entirely, OpenClaw falls back to the builtin SQLite engine.
<Info>
@@ -164,6 +166,11 @@ runs as a service, create a symlink:
**First search very slow?** QMD downloads GGUF models on first use. Pre-warm
with `qmd query "test"` using the same XDG dirs OpenClaw uses.
**BM25-only QMD still trying to build llama.cpp?** Set
`memory.qmd.searchMode = "search"`. OpenClaw treats that mode as lexical-only,
does not run QMD vector status probes or embedding maintenance, and leaves
semantic readiness checks to `vsearch` or `query` setups.
**Search times out?** Increase `memory.qmd.limits.timeoutMs` (default: 4000ms).
Set to `120000` for slower hardware.

View File

@@ -449,6 +449,8 @@ Set `memory.backend = "qmd"` to enable. All QMD settings live under `memory.qmd`
| `sessions.retentionDays` | `number` | -- | Transcript retention |
| `sessions.exportDir` | `string` | -- | Export directory |
`searchMode: "search"` is lexical/BM25-only. OpenClaw does not run semantic vector readiness probes or QMD embedding maintenance for that mode, including during `memory status --deep`; `vsearch` and `query` continue to require QMD vector readiness and embeddings.
OpenClaw prefers the current QMD collection and MCP query shapes, but keeps older QMD releases working by falling back to legacy `--mask` collection flags and older MCP tool names when needed.
<Note>

View File

@@ -288,16 +288,16 @@ describe("QmdMemoryManager", () => {
const baselineCalls = spawnMock.mock.calls.length;
await manager.sync({ reason: "manual" });
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
await manager.sync({ reason: "manual-again" });
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 1);
(manager as unknown as { lastUpdateAt: number | null }).lastUpdateAt =
Date.now() - (resolved.qmd?.update.debounceMs ?? 0) - 10;
await manager.sync({ reason: "after-wait" });
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 3);
expect(spawnMock.mock.calls.length).toBe(baselineCalls + 2);
await manager.close();
});
@@ -1975,6 +1975,7 @@ describe("QmdMemoryManager", () => {
backend: "qmd",
qmd: {
includeDefaultMemory: false,
searchMode: "query",
update: {
interval: "0s",
debounceMs: 0,
@@ -3418,7 +3419,7 @@ describe("QmdMemoryManager", () => {
await manager.close();
});
it("arms periodic embed maintenance in search mode", async () => {
it("skips periodic embed maintenance in lexical search mode", async () => {
vi.useFakeTimers();
cfg = {
...cfg,
@@ -3445,7 +3446,7 @@ describe("QmdMemoryManager", () => {
const commandCalls = spawnMock.mock.calls
.map((call: unknown[]) => call[1] as string[])
.filter((args: string[]) => args[0] === "update" || args[0] === "embed");
expect(commandCalls).toEqual([["update"], ["embed"]]);
expect(commandCalls).toEqual([]);
await manager.close();
});
@@ -3498,6 +3499,18 @@ describe("QmdMemoryManager", () => {
it("serializes qmd embeds within a process before taking the shared file lock", async () => {
vi.useFakeTimers();
cfg = {
...cfg,
memory: {
backend: "qmd",
qmd: {
includeDefaultMemory: false,
searchMode: "query",
update: { interval: "0s", debounceMs: 0, onBoot: false },
paths: [{ path: workspaceDir, pattern: "**/*.md", name: "workspace" }],
},
},
} as OpenClawConfig;
const embedChildren: MockChild[] = [];
spawnMock.mockImplementation((_cmd: string, args: string[]) => {
if (args[0] === "embed") {
@@ -3682,7 +3695,7 @@ describe("QmdMemoryManager", () => {
}
});
it("runs qmd embed in search mode for forced sync", async () => {
it("skips qmd embed in lexical search mode for forced sync", async () => {
cfg = {
...cfg,
memory: {
@@ -3702,7 +3715,7 @@ describe("QmdMemoryManager", () => {
const commandCalls = spawnMock.mock.calls
.map((call: unknown[]) => call[1] as string[])
.filter((args: string[]) => args[0] === "update" || args[0] === "embed");
expect(commandCalls).toEqual([["update"], ["embed"]]);
expect(commandCalls).toEqual([["update"]]);
await manager.close();
});
@@ -4617,7 +4630,15 @@ describe("QmdMemoryManager", () => {
return createMockChild();
});
const { manager } = await createManager();
const { manager } = await createManager({
cfg: {
...cfg,
memory: {
...cfg.memory,
qmd: { ...cfg.memory?.qmd, searchMode: "query" },
},
} as OpenClawConfig,
});
await expect(manager.probeVectorAvailability()).resolves.toBe(false);
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4642,7 +4663,15 @@ describe("QmdMemoryManager", () => {
return createMockChild();
});
const { manager } = await createManager();
const { manager } = await createManager({
cfg: {
...cfg,
memory: {
...cfg.memory,
qmd: { ...cfg.memory?.qmd, searchMode: "query" },
},
} as OpenClawConfig,
});
await expect(manager.probeVectorAvailability()).resolves.toBe(true);
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
@@ -4657,6 +4686,32 @@ describe("QmdMemoryManager", () => {
await manager.close();
});
it("skips qmd status vector probes for lexical search mode", async () => {
const { manager } = await createManager({
cfg: {
...cfg,
memory: {
...cfg.memory,
qmd: { ...cfg.memory?.qmd, searchMode: "search" },
},
} as OpenClawConfig,
});
const baselineCalls = spawnMock.mock.calls.length;
await expect(manager.probeVectorAvailability()).resolves.toBe(false);
await expect(manager.probeEmbeddingAvailability()).resolves.toEqual({
ok: false,
error: "QMD semantic vectors are unavailable",
});
expect(spawnMock.mock.calls.length).toBe(baselineCalls);
expect(manager.status().vector).toEqual({
enabled: false,
available: false,
loadError: undefined,
});
await manager.close();
});
describe("model cache symlink", () => {
let defaultModelsDir: string;
let customModelsDir: string;

View File

@@ -87,6 +87,10 @@ const IGNORED_MEMORY_WATCH_DIR_NAMES = new Set([
"__pycache__",
]);
function qmdUsesVectors(searchMode: ResolvedQmdConfig["searchMode"]): boolean {
return searchMode !== "search";
}
function isDefaultMemoryPath(relPath: string): boolean {
const normalized = relPath.trim().replace(/^\.\//, "").replace(/\\/g, "/");
if (!normalized) {
@@ -1326,7 +1330,7 @@ export class QmdMemoryManager implements MemorySearchManager {
sources: Array.from(this.sources),
sourceCounts: counts.sourceCounts,
vector: {
enabled: true,
enabled: qmdUsesVectors(this.qmd.searchMode),
available: this.vectorAvailable ?? undefined,
loadError: this.vectorStatusDetail ?? undefined,
},
@@ -1357,6 +1361,11 @@ export class QmdMemoryManager implements MemorySearchManager {
}
async probeVectorAvailability(): Promise<boolean> {
if (!qmdUsesVectors(this.qmd.searchMode)) {
this.vectorAvailable = false;
this.vectorStatusDetail = null;
return false;
}
try {
const result = await this.runQmd(["status"], {
timeoutMs: Math.min(this.qmd.limits.timeoutMs, 5_000),
@@ -1597,9 +1606,9 @@ export class QmdMemoryManager implements MemorySearchManager {
}
private shouldRunEmbed(force?: boolean): boolean {
// Keep embeddings current regardless of the active retrieval mode.
// Search-mode indexing still needs vectors so later mode switches and
// hybrid flows do not inherit an incomplete QMD index.
if (!qmdUsesVectors(this.qmd.searchMode)) {
return false;
}
const now = Date.now();
if (this.embedBackoffUntil !== null && now < this.embedBackoffUntil) {
return false;
@@ -1613,6 +1622,9 @@ export class QmdMemoryManager implements MemorySearchManager {
}
private shouldScheduleEmbedTimer(): boolean {
if (!qmdUsesVectors(this.qmd.searchMode)) {
return false;
}
const embedIntervalMs = this.qmd.update.embedIntervalMs;
if (embedIntervalMs <= 0) {
return false;