fix(memory): avoid live embedding probes in status

2026-05-06 07:50:43 +00:00 · 2026-04-27 13:55:51 +01:00
parent dc495e6d62
commit df65a75f92
13 changed files with 216 additions and 16 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Docs: https://docs.openclaw.ai
 - Media-understanding/audio: migrate deprecated `{input}` placeholders in legacy `audio.transcription.command` configs to `{{MediaPath}}`, so custom audio transcribers no longer receive the literal placeholder after doctor repair. Fixes #72760. Thanks @krisfanue3-hash.
 - Ollama/WSL2: warn when GPU-backed WSL2 installs combine CUDA visibility with an autostarting `ollama.service` using `Restart=always`, and document the systemd, `.wslconfig`, and keep-alive mitigation for crash loops. Carries forward #61022; fixes #61185. Thanks @yhyatt.
 - Ollama/onboarding: de-dupe suggested bare local models against installed `:latest` tags and skip redundant pulls, so setup shows the installed model once and no longer says it is downloading an already available model. Fixes #68952. Thanks @tleyden.
+- Memory-core/doctor: keep `doctor.memory.status` on the cached path by default and only run live embedding pings for explicit deep probes, preventing slow local embedding backends from blocking Gateway status checks. Fixes #71568. Thanks @apex-system.
 - Compaction: skip oversized pre-compaction checkpoint snapshots and prune duplicate long user turns from compaction input and rotated successor transcripts, preventing retry storms from being preserved across checkpoint cycles. Fixes #72780. Thanks @SweetSophia.
 - Control UI/Cron: render cron job prompts and run summaries as sanitized markdown in the dashboard, with full-width block content, safer link clicks, and no duplicate error text when a failed run has no summary. Supersedes #48504. Thanks @garethdaine.
 - Control UI/Gateway: preserve WebChat client version labels across localhost, 127.0.0.1, and IPv6 loopback aliases on the same port, avoiding misleading `vcontrol-ui` connection logs while investigating duplicate-message reports. Refs #72753 and #72742. Thanks @LumenFromTheFuture and @allesgutefy.
--- a/docs/cli/memory.md
+++ b/docs/cli/memory.md
@@ -51,7 +51,7 @@ openclaw memory index --agent main --verbose

 `memory status`:

- `--deep`: probe vector + embedding availability.
+- `--deep`: probe vector + embedding availability. Plain `memory status` stays fast and does not run a live embedding ping.
 - `--index`: run a reindex if the store is dirty (implies `--deep`).
 - `--fix`: repair stale recall locks and normalize promotion metadata.
 - `--json`: print JSON output.
--- a/docs/gateway/doctor.md
+++ b/docs/gateway/doctor.md
@@ -419,7 +419,7 @@ That stages grounded durable candidates into the short-term dreaming store while
    - **Explicit remote provider** (`openai`, `voyage`, etc.): verifies an API key is present in the environment or auth store. Prints actionable fix hints if missing.
    - **Auto provider**: checks local model availability first, then tries each remote provider in auto-selection order.

-    When a gateway probe result is available (gateway was healthy at the time of the check), doctor cross-references its result with the CLI-visible config and notes any discrepancy.
+    When a cached gateway probe result is available (gateway was healthy at the time of the check), doctor cross-references its result with the CLI-visible config and notes any discrepancy. Doctor does not start a fresh embedding ping on the default path; use the deep memory status command when you want a live provider check.

    Use `openclaw memory status --deep` to verify embedding readiness at runtime.

--- a/docs/gateway/protocol.md
+++ b/docs/gateway/protocol.md
@@ -291,7 +291,7 @@ enumeration of `src/gateway/server-methods/*.ts`.
    - `models.list` returns the runtime-allowed model catalog.
    - `usage.status` returns provider usage windows/remaining quota summaries.
    - `usage.cost` returns aggregated cost usage summaries for a date range.
-    - `doctor.memory.status` returns vector-memory / embedding readiness for the active default agent workspace.
+    - `doctor.memory.status` returns vector-memory / cached embedding readiness for the active default agent workspace. Pass `{ "probe": true }` or `{ "deep": true }` only when the caller explicitly wants a live embedding provider ping.
    - `sessions.usage` returns per-session usage summaries.
    - `sessions.usage.timeseries` returns timeseries usage for one session.
    - `sessions.usage.logs` returns usage log entries for one session.
--- a/extensions/memory-core/src/memory/index.test.ts
+++ b/extensions/memory-core/src/memory/index.test.ts
@@ -12,6 +12,7 @@ import {
 import "./test-runtime-mocks.js";
 import type { MemoryIndexManager } from "./index.js";
 import { closeAllMemorySearchManagers, getMemorySearchManager } from "./index.js";
+import { EMBEDDING_PROBE_CACHE_TTL_MS } from "./manager.js";
 import {
  DEFAULT_LOCAL_MODEL,
  registerBuiltInMemoryEmbeddingProviders,
@@ -384,6 +385,42 @@ describe("memory index", () => {
    expect(status.vector?.available).toBe(available);
  });

+  it("caches embedding probe readiness across transient status managers", async () => {
+    const cfg = createCfg({ storePath: path.join(workspaceDir, "index-probe-cache.sqlite") });
+    const first = requireManager(
+      await getMemorySearchManager({ cfg, agentId: "main", purpose: "status" }),
+    );
+    managersForCleanup.add(first);
+
+    await expect(first.probeEmbeddingAvailability()).resolves.toEqual({ ok: true });
+    expect(embedBatchCalls).toBe(1);
+    await first.close();
+
+    const second = requireManager(
+      await getMemorySearchManager({ cfg, agentId: "main", purpose: "status" }),
+    );
+    managersForCleanup.add(second);
+
+    expect(second.getCachedEmbeddingAvailability?.()).toEqual(
+      expect.objectContaining({
+        ok: true,
+        checked: true,
+        cached: true,
+        checkedAtMs: expect.any(Number),
+        cacheExpiresAtMs: expect.any(Number),
+      }),
+    );
+    await expect(second.probeEmbeddingAvailability()).resolves.toEqual(
+      expect.objectContaining({ ok: true, cached: true }),
+    );
+    expect(embedBatchCalls).toBe(1);
+
+    const cached = second.getCachedEmbeddingAvailability?.();
+    expect((cached?.cacheExpiresAtMs ?? 0) - (cached?.checkedAtMs ?? 0)).toBe(
+      EMBEDDING_PROBE_CACHE_TTL_MS,
+    );
+  });
+
  it("builds FTS index and returns search results when no embedding provider is available", async () => {
    forceNoProvider = true;

--- a/extensions/memory-core/src/memory/manager.ts
+++ b/extensions/memory-core/src/memory/manager.ts
@@ -62,12 +62,23 @@ const VECTOR_TABLE = "chunks_vec";
 const FTS_TABLE = "chunks_fts";
 const EMBEDDING_CACHE_TABLE = "embedding_cache";
 const MEMORY_INDEX_MANAGER_CACHE_KEY = Symbol.for("openclaw.memoryIndexManagerCache");
+export const EMBEDDING_PROBE_CACHE_TTL_MS = 30_000;
 const log = createSubsystemLogger("memory");
 type MemoryIndexManagerPurpose = "default" | "status" | "cli";

 const { cache: INDEX_CACHE, pending: INDEX_CACHE_PENDING } =
  resolveSingletonManagedCache<MemoryIndexManager>(MEMORY_INDEX_MANAGER_CACHE_KEY);
+
+type EmbeddingProbeCacheEntry = {
+  result: MemoryEmbeddingProbeResult;
+  checkedAtMs: number;
+  expireAtMs: number;
+};
+
+const EMBEDDING_PROBE_CACHE = new Map<string, EmbeddingProbeCacheEntry>();
+
 export async function closeAllMemoryIndexManagers(): Promise<void> {
+  EMBEDDING_PROBE_CACHE.clear();
  await closeManagedCacheEntries({
    cache: INDEX_CACHE,
    pending: INDEX_CACHE_PENDING,
@@ -818,21 +829,54 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
    return this.ensureVectorReady();
  }

+  private cacheProbeResult(result: MemoryEmbeddingProbeResult): MemoryEmbeddingProbeResult {
+    const checkedAtMs = Date.now();
+    EMBEDDING_PROBE_CACHE.set(this.cacheKey, {
+      result,
+      checkedAtMs,
+      expireAtMs: checkedAtMs + EMBEDDING_PROBE_CACHE_TTL_MS,
+    });
+    return result;
+  }
+
+  getCachedEmbeddingAvailability(): MemoryEmbeddingProbeResult | null {
+    const cached = EMBEDDING_PROBE_CACHE.get(this.cacheKey);
+    if (!cached) {
+      return null;
+    }
+    const nowMs = Date.now();
+    if (nowMs >= cached.expireAtMs) {
+      EMBEDDING_PROBE_CACHE.delete(this.cacheKey);
+      return null;
+    }
+    return {
+      ...cached.result,
+      checked: true,
+      cached: true,
+      checkedAtMs: cached.checkedAtMs,
+      cacheExpiresAtMs: cached.expireAtMs,
+    };
+  }
+
  async probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult> {
+    const cached = this.getCachedEmbeddingAvailability();
+    if (cached) {
+      return cached;
+    }
    await this.ensureProviderInitialized();
    // FTS-only mode: embeddings not available but search still works
    if (!this.provider) {
-      return {
+      return this.cacheProbeResult({
        ok: false,
        error: this.providerUnavailableReason ?? "No embedding provider available (FTS-only mode)",
-      };
+      });
    }
    try {
      await this.embedBatchWithRetry(["ping"]);
-      return { ok: true };
+      return this.cacheProbeResult({ ok: true });
    } catch (err) {
      const message = formatErrorMessage(err);
-      return { ok: false, error: message };
+      return this.cacheProbeResult({ ok: false, error: message });
    }
  }

--- a/extensions/memory-core/src/memory/search-manager.ts
+++ b/extensions/memory-core/src/memory/search-manager.ts
@@ -290,6 +290,10 @@ class BorrowedMemoryManager implements MemorySearchManager {
    return await this.inner.probeEmbeddingAvailability();
  }

+  getCachedEmbeddingAvailability(): MemoryEmbeddingProbeResult | null {
+    return this.inner.getCachedEmbeddingAvailability?.() ?? null;
+  }
+
  async probeVectorAvailability() {
    return await this.inner.probeVectorAvailability();
  }
@@ -432,6 +436,14 @@ class FallbackMemoryManager implements MemorySearchManager {
    return { ok: false, error: this.lastError ?? "memory embeddings unavailable" };
  }

+  getCachedEmbeddingAvailability(): MemoryEmbeddingProbeResult | null {
+    this.ensureOpen();
+    if (!this.primaryFailed) {
+      return this.deps.primary.getCachedEmbeddingAvailability?.() ?? null;
+    }
+    return this.fallback?.getCachedEmbeddingAvailability?.() ?? null;
+  }
+
  async probeVectorAvailability() {
    this.ensureOpen();
    if (!this.primaryFailed) {
--- a/packages/memory-host-sdk/src/host/types.ts
+++ b/packages/memory-host-sdk/src/host/types.ts
@@ -15,6 +15,10 @@ export type MemorySearchResult = {
 export type MemoryEmbeddingProbeResult = {
  ok: boolean;
  error?: string;
+  checked?: boolean;
+  cached?: boolean;
+  checkedAtMs?: number;
+  cacheExpiresAtMs?: number;
 };

 export type MemorySyncProgressUpdate = {
@@ -82,6 +86,7 @@ export interface MemorySearchManager {
    sessionFiles?: string[];
    progress?: (update: MemorySyncProgressUpdate) => void;
  }): Promise<void>;
+  getCachedEmbeddingAvailability?(): MemoryEmbeddingProbeResult | null;
  probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult>;
  probeVectorAvailability(): Promise<boolean>;
  close?(): Promise<void>;
--- a/src/commands/doctor-gateway-health.test.ts
+++ b/src/commands/doctor-gateway-health.test.ts
@@ -23,6 +23,23 @@ describe("probeGatewayMemoryStatus", () => {
    callGateway.mockReset();
  });

+  it("requests cached memory status without a live embedding probe", async () => {
+    callGateway.mockResolvedValue({ embedding: { ok: true } });
+
+    await expect(probeGatewayMemoryStatus({ cfg, timeoutMs: 1234 })).resolves.toEqual({
+      checked: true,
+      ready: true,
+      error: undefined,
+    });
+
+    expect(callGateway).toHaveBeenCalledWith({
+      method: "doctor.memory.status",
+      params: { probe: false },
+      timeoutMs: 1234,
+      config: cfg,
+    });
+  });
+
  it("treats outer gateway timeouts as inconclusive", async () => {
    callGateway.mockRejectedValue(
      new Error("gateway timeout after 8000ms\nGateway target: ws://127.0.0.1:18789"),
--- a/src/commands/doctor-gateway-health.ts
+++ b/src/commands/doctor-gateway-health.ts
@@ -78,6 +78,7 @@ export async function probeGatewayMemoryStatus(params: {
  try {
    const payload = await callGateway<DoctorMemoryStatusPayload>({
      method: "doctor.memory.status",
+      params: { probe: false },
      timeoutMs,
      config: params.cfg,
    });
--- a/src/gateway/server-methods/doctor.test.ts
+++ b/src/gateway/server-methods/doctor.test.ts
@@ -54,16 +54,16 @@ const makeRuntimeContext = () => ({ getRuntimeConfig: () => getRuntimeConfig() }

 const invokeDoctorMemoryStatus = async (
  respond: ReturnType<typeof vi.fn>,
-  context?: { cron?: { list?: ReturnType<typeof vi.fn> } },
+  options?: { cron?: { list?: ReturnType<typeof vi.fn> }; params?: unknown },
 ) => {
  const cronList =
-    context?.cron?.list ??
+    options?.cron?.list ??
    vi.fn(async () => {
      return [];
    });
  await doctorHandlers["doctor.memory.status"]({
    req: {} as never,
-    params: {} as never,
+    params: (options?.params ?? {}) as never,
    respond: respond as never,
    context: {
      ...makeRuntimeContext(),
@@ -182,7 +182,7 @@ describe("doctor.memory.status", () => {
    });
    const respond = vi.fn();

-    await invokeDoctorMemoryStatus(respond);
+    await invokeDoctorMemoryStatus(respond, { params: { probe: true } });

    expect(getMemorySearchManager).toHaveBeenCalledWith({
      cfg: expect.any(Object),
@@ -217,6 +217,63 @@ describe("doctor.memory.status", () => {
    expect(close).toHaveBeenCalled();
  });

+  it("does not live-probe embedding readiness by default", async () => {
+    const close = vi.fn().mockResolvedValue(undefined);
+    const probeEmbeddingAvailability = vi.fn().mockResolvedValue({ ok: true });
+    getMemorySearchManager.mockResolvedValue({
+      manager: {
+        status: () => ({ provider: "gemini" }),
+        probeEmbeddingAvailability,
+        close,
+      },
+    });
+    const respond = vi.fn();
+
+    await invokeDoctorMemoryStatus(respond);
+
+    expect(probeEmbeddingAvailability).not.toHaveBeenCalled();
+    expect(respond).toHaveBeenCalledWith(
+      true,
+      expect.objectContaining({
+        embedding: expect.objectContaining({ ok: false, checked: false }),
+      }),
+      undefined,
+    );
+    expect(close).toHaveBeenCalled();
+  });
+
+  it("returns cached embedding readiness without a live probe", async () => {
+    const close = vi.fn().mockResolvedValue(undefined);
+    const probeEmbeddingAvailability = vi.fn().mockResolvedValue({ ok: false });
+    getMemorySearchManager.mockResolvedValue({
+      manager: {
+        status: () => ({ provider: "gemini" }),
+        getCachedEmbeddingAvailability: vi.fn(() => ({
+          ok: true,
+          checked: true,
+          cached: true,
+          checkedAtMs: 123,
+          cacheExpiresAtMs: 456,
+        })),
+        probeEmbeddingAvailability,
+        close,
+      },
+    });
+    const respond = vi.fn();
+
+    await invokeDoctorMemoryStatus(respond);
+
+    expect(probeEmbeddingAvailability).not.toHaveBeenCalled();
+    expect(respond).toHaveBeenCalledWith(
+      true,
+      expect.objectContaining({
+        embedding: expect.objectContaining({ ok: true, checked: true, cached: true }),
+      }),
+      undefined,
+    );
+    expect(close).toHaveBeenCalled();
+  });
+
  it("returns unavailable when memory manager is missing", async () => {
    getMemorySearchManager.mockResolvedValue({
      manager: null,
@@ -224,7 +281,7 @@ describe("doctor.memory.status", () => {
    });
    const respond = vi.fn();

-    await invokeDoctorMemoryStatus(respond);
+    await invokeDoctorMemoryStatus(respond, { params: { probe: true } });

    expectEmbeddingErrorResponse(respond, "memory search unavailable");
  });
@@ -240,7 +297,7 @@ describe("doctor.memory.status", () => {
    });
    const respond = vi.fn();

-    await invokeDoctorMemoryStatus(respond);
+    await invokeDoctorMemoryStatus(respond, { params: { probe: true } });

    expectEmbeddingErrorResponse(respond, "gateway memory probe failed: timeout");
    expect(close).toHaveBeenCalled();
@@ -460,7 +517,7 @@ describe("doctor.memory.status", () => {
        expect.objectContaining({
          agentId: "main",
          provider: "gemini",
-          embedding: { ok: true },
+          embedding: expect.objectContaining({ ok: false, checked: false }),
          dreaming: expect.objectContaining({
            enabled: true,
            timezone: "America/Los_Angeles",
--- a/src/gateway/server-methods/doctor.ts
+++ b/src/gateway/server-methods/doctor.ts
@@ -112,6 +112,10 @@ export type DoctorMemoryStatusPayload = {
  embedding: {
    ok: boolean;
    error?: string;
+    checked?: boolean;
+    cached?: boolean;
+    checkedAtMs?: number;
+    cacheExpiresAtMs?: number;
  };
  dreaming?: DoctorMemoryDreamingPayload;
 };
@@ -780,8 +784,22 @@ async function readDreamDiary(
  };
 }

+function shouldProbeMemoryEmbeddings(params: unknown): boolean {
+  if (!params || typeof params !== "object") {
+    return false;
+  }
+  const record = params as Record<string, unknown>;
+  return record.probe === true || record.deep === true;
+}
+
+const SKIPPED_MEMORY_EMBEDDING_PROBE = {
+  ok: false,
+  checked: false,
+  error: "memory embedding readiness not checked; run `openclaw memory status --deep` to probe",
+} as const;
+
 export const doctorHandlers: GatewayRequestHandlers = {
-  "doctor.memory.status": async ({ respond, context }) => {
+  "doctor.memory.status": async ({ respond, context, params }) => {
    const cfg = context.getRuntimeConfig();
    const agentId = resolveDefaultAgentId(cfg);
    const { manager, error } = await getActiveMemorySearchManager({
@@ -803,7 +821,10 @@ export const doctorHandlers: GatewayRequestHandlers = {

    try {
      const status = manager.status();
-      let embedding = await manager.probeEmbeddingAvailability();
+      const shouldProbe = shouldProbeMemoryEmbeddings(params);
+      let embedding = shouldProbe
+        ? await manager.probeEmbeddingAvailability()
+        : (manager.getCachedEmbeddingAvailability?.() ?? SKIPPED_MEMORY_EMBEDDING_PROBE);
      if (!embedding.ok && !embedding.error) {
        embedding = { ok: false, error: "memory embeddings unavailable" };
      }
--- a/src/memory-host-sdk/host/types.ts
+++ b/src/memory-host-sdk/host/types.ts
@@ -15,6 +15,10 @@ export type MemorySearchResult = {
 export type MemoryEmbeddingProbeResult = {
  ok: boolean;
  error?: string;
+  checked?: boolean;
+  cached?: boolean;
+  checkedAtMs?: number;
+  cacheExpiresAtMs?: number;
 };

 export type MemorySyncProgressUpdate = {
@@ -96,6 +100,7 @@ export interface MemorySearchManager {
    sessionFiles?: string[];
    progress?: (update: MemorySyncProgressUpdate) => void;
  }): Promise<void>;
+  getCachedEmbeddingAvailability?(): MemoryEmbeddingProbeResult | null;
  probeEmbeddingAvailability(): Promise<MemoryEmbeddingProbeResult>;
  probeVectorAvailability(): Promise<boolean>;
  close?(): Promise<void>;