From ee63b9ee49ef594bb62821f4a8472996d64d853d Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Wed, 22 Apr 2026 21:20:03 +0100
Subject: [PATCH] fix(memory-lancedb): retry failed runtime initialization

---
 CHANGELOG.md                                  |   1 +
 extensions/memory-lancedb/index.test.ts       | 115 ++++++++++++++++++
 extensions/memory-lancedb/index.ts            |   5 +-
 extensions/memory-lancedb/lancedb-runtime.ts  |  31 +++++
 .../bundled-channel-runtime-deps-docker.sh    |  88 ++++++++++++--
 5 files changed, 230 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fc58b37dd8..49a189af856 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
 - ACP/sessions_spawn: honor explicit `model` overrides for ACP child sessions instead of silently falling back to the target agent default model. (#70210) Thanks @felix-miao.
 - Agents/subagents: drop bare `NO_REPLY` from the parent turn when the session still has pending spawned children, so direct-conversation surfaces such as Telegram DMs no longer rewrite the sentinel into visible fallback chatter while waiting for the child completion event. (#69942) Thanks @neeravmakwana.
 - Plugins/install: keep bundled plugin dependencies off npm install while repairing them when plugins activate from a packaged install, including Feishu/Lark, Browser, and direct bundled channel setup-entry loads.
+- Memory/LanceDB: retry initialization after a failed LanceDB load and report unsupported Intel macOS native runtime clearly instead of caching the failure or repeatedly attempting an install that cannot work.
 - CLI/Claude: hash only static extra system prompt parts when deciding whether to reuse a CLI session, so per-message inbound metadata no longer resets Claude CLI conversations on every turn. (#70122) Thanks @zijunl.
 - Hooks/Slack: standardize shared message hook routing fields (`threadId` / `replyToId`) and stop Slack outbound delivery from re-running `message_sending` inside the channel adapter, so plugins like thread-ownership make one outbound routing decision per reply. Thanks @vincentkoc.
 - Auto-reply/media: share one run-scoped reply media context between streamed block delivery and final payload filtering, so a local `MEDIA:` attachment is staged once and duplicate media sends are suppressed reliably. (#68111) Thanks @ayeshakhalid192007-dev.
diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts
index 3aeefff3aaf..31a965db28e 100644
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@@ -60,6 +60,8 @@ function createRuntimeLoader(
     env?: NodeJS.ProcessEnv;
     importBundled?: () => Promise<LanceDbModule>;
     importResolved?: (resolvedPath: string) => Promise<LanceDbModule>;
+    platform?: NodeJS.Platform;
+    arch?: NodeJS.Architecture;
     resolveRuntimeEntry?: (params: {
       runtimeDir: string;
       manifest: RuntimeManifest;
@@ -74,6 +76,8 @@ function createRuntimeLoader(
 ) {
   return createLanceDbRuntimeLoader({
     env: overrides.env ?? ({} as NodeJS.ProcessEnv),
+    platform: overrides.platform,
+    arch: overrides.arch,
     resolveStateDir: () => "/tmp/openclaw-state",
     runtimeManifest: TEST_RUNTIME_MANIFEST,
     importBundled:
@@ -832,6 +836,100 @@ describe("memory plugin e2e", () => {
     }
   });
 
+  test("clears failed database initialization so later tool calls can retry", async () => {
+    const embeddingsCreate = vi.fn(async () => ({
+      data: [{ embedding: [0.1, 0.2, 0.3] }],
+    }));
+    const ensureGlobalUndiciEnvProxyDispatcher = vi.fn();
+    const toArray = vi.fn(async () => []);
+    const limit = vi.fn(() => ({ toArray }));
+    const vectorSearch = vi.fn(() => ({ limit }));
+    const loadLanceDbModule = vi
+      .fn()
+      .mockRejectedValueOnce(new Error("temporary LanceDB install failure"))
+      .mockResolvedValueOnce({
+        connect: vi.fn(async () => ({
+          tableNames: vi.fn(async () => ["memories"]),
+          openTable: vi.fn(async () => ({
+            vectorSearch,
+            countRows: vi.fn(async () => 0),
+            add: vi.fn(async () => undefined),
+            delete: vi.fn(async () => undefined),
+          })),
+        })),
+      });
+
+    vi.resetModules();
+    vi.doMock("openclaw/plugin-sdk/runtime-env", () => ({
+      ensureGlobalUndiciEnvProxyDispatcher,
+    }));
+    vi.doMock("openai", () => ({
+      default: class MockOpenAI {
+        embeddings = { create: embeddingsCreate };
+      },
+    }));
+    vi.doMock("./lancedb-runtime.js", () => ({
+      loadLanceDbModule,
+    }));
+
+    try {
+      const { default: dynamicMemoryPlugin } = await import("./index.js");
+      const registeredTools: any[] = [];
+      const mockApi = {
+        id: "memory-lancedb",
+        name: "Memory (LanceDB)",
+        source: "test",
+        config: {},
+        pluginConfig: {
+          embedding: {
+            apiKey: OPENAI_API_KEY,
+            model: "text-embedding-3-small",
+          },
+          dbPath: getDbPath(),
+          autoCapture: false,
+          autoRecall: false,
+        },
+        runtime: {},
+        logger: {
+          info: vi.fn(),
+          warn: vi.fn(),
+          error: vi.fn(),
+          debug: vi.fn(),
+        },
+        registerTool: (tool: any, opts: any) => {
+          registeredTools.push({ tool, opts });
+        },
+        registerCli: vi.fn(),
+        registerService: vi.fn(),
+        on: vi.fn(),
+        resolvePath: (p: string) => p,
+      };
+
+      dynamicMemoryPlugin.register(mockApi as any);
+      const recallTool = registeredTools.find((t) => t.opts?.name === "memory_recall")?.tool;
+      if (!recallTool) {
+        throw new Error("memory_recall tool was not registered");
+      }
+
+      await expect(recallTool.execute("test-call-retry-1", { query: "hello" })).rejects.toThrow(
+        "temporary LanceDB install failure",
+      );
+      await expect(
+        recallTool.execute("test-call-retry-2", { query: "hello again" }),
+      ).resolves.toMatchObject({
+        details: { count: 0 },
+      });
+
+      expect(loadLanceDbModule).toHaveBeenCalledTimes(2);
+      expect(embeddingsCreate).toHaveBeenCalledTimes(2);
+    } finally {
+      vi.doUnmock("openclaw/plugin-sdk/runtime-env");
+      vi.doUnmock("openai");
+      vi.doUnmock("./lancedb-runtime.js");
+      vi.resetModules();
+    }
+  });
+
   test("config schema accepts storageOptions with string values", async () => {
     const { default: memoryPlugin } = await import("./index.js");
 
@@ -1067,6 +1165,23 @@ describe("lancedb runtime loader", () => {
     expect(installRuntime).not.toHaveBeenCalled();
   });
 
+  test("fails clearly on Intel macOS instead of attempting an unsupported native install", async () => {
+    const installRuntime = vi.fn(
+      async ({ runtimeDir }: { runtimeDir: string }) =>
+        `${runtimeDir}/node_modules/@lancedb/lancedb/index.js`,
+    );
+    const loader = createRuntimeLoader({
+      platform: "darwin",
+      arch: "x64",
+      installRuntime,
+    });
+
+    await expect(loader.load()).rejects.toThrow(
+      "memory-lancedb: LanceDB runtime is unavailable on darwin-x64.",
+    );
+    expect(installRuntime).not.toHaveBeenCalled();
+  });
+
   test("clears the cached failure so later calls can retry the install", async () => {
     const runtimeModule = createMockModule();
     const installRuntime = vi
diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts
index 911d1d46eeb..b69668cafcf 100644
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@@ -71,7 +71,10 @@ class MemoryDB {
       return this.initPromise;
     }
 
-    this.initPromise = this.doInitialize();
+    this.initPromise = this.doInitialize().catch((error) => {
+      this.initPromise = null;
+      throw error;
+    });
     return this.initPromise;
   }
 
diff --git a/extensions/memory-lancedb/lancedb-runtime.ts b/extensions/memory-lancedb/lancedb-runtime.ts
index 369936e213e..15900d3c53d 100644
--- a/extensions/memory-lancedb/lancedb-runtime.ts
+++ b/extensions/memory-lancedb/lancedb-runtime.ts
@@ -28,6 +28,8 @@ type ReadPackageJson = (manifestPath: string) => PackageJsonWithDependencies | n
 
 type LanceDbRuntimeLoaderDeps = {
   env: NodeJS.ProcessEnv;
+  platform: NodeJS.Platform;
+  arch: NodeJS.Architecture;
   resolveStateDir: (env?: NodeJS.ProcessEnv, homedir?: () => string) => string;
   runtimeManifest: RuntimeManifest;
   importBundled: () => Promise<LanceDbModule>;
@@ -218,11 +220,31 @@ function buildLoadFailureMessage(prefix: string, error: unknown): string {
   return `memory-lancedb: ${prefix}. ${String(error)}`;
 }
 
+function isUnsupportedNativePlatform(params: {
+  platform: NodeJS.Platform;
+  arch: NodeJS.Architecture;
+}): boolean {
+  return params.platform === "darwin" && params.arch === "x64";
+}
+
+function buildUnsupportedNativePlatformMessage(params: {
+  platform: NodeJS.Platform;
+  arch: NodeJS.Architecture;
+}): string {
+  return [
+    `memory-lancedb: LanceDB runtime is unavailable on ${params.platform}-${params.arch}.`,
+    "The bundled @lancedb/lancedb dependency does not publish a native package for this platform.",
+    "Disable memory-lancedb or switch to a supported memory backend/platform.",
+  ].join(" ");
+}
+
 export function createLanceDbRuntimeLoader(overrides: Partial<LanceDbRuntimeLoaderDeps> = {}): {
   load: (logger?: LanceDbRuntimeLogger) => Promise<LanceDbModule>;
 } {
   const deps: LanceDbRuntimeLoaderDeps = {
     env: overrides.env ?? process.env,
+    platform: overrides.platform ?? process.platform,
+    arch: overrides.arch ?? process.arch,
     resolveStateDir: overrides.resolveStateDir ?? resolveStateDir,
     runtimeManifest: overrides.runtimeManifest ?? MEMORY_LANCEDB_RUNTIME_MANIFEST,
     importBundled: overrides.importBundled ?? (() => import("@lancedb/lancedb")),
@@ -240,6 +262,15 @@ export function createLanceDbRuntimeLoader(overrides: Partial<LanceDbRuntimeLoad
           try {
             return await deps.importBundled();
           } catch (bundledError) {
+            if (isUnsupportedNativePlatform({ platform: deps.platform, arch: deps.arch })) {
+              throw new Error(
+                buildUnsupportedNativePlatformMessage({
+                  platform: deps.platform,
+                  arch: deps.arch,
+                }),
+                { cause: bundledError },
+              );
+            }
             const runtimeDir = resolveRuntimeDir(
               deps.resolveStateDir(deps.env, () =>
                 deps.env.HOME?.trim() ? deps.env.HOME : os.homedir(),
diff --git a/scripts/e2e/bundled-channel-runtime-deps-docker.sh b/scripts/e2e/bundled-channel-runtime-deps-docker.sh
index 4f4946075d8..3cea4300147 100644
--- a/scripts/e2e/bundled-channel-runtime-deps-docker.sh
+++ b/scripts/e2e/bundled-channel-runtime-deps-docker.sh
@@ -65,6 +65,7 @@ test -d "$package_root/dist/extensions/telegram"
 test -d "$package_root/dist/extensions/discord"
 test -d "$package_root/dist/extensions/slack"
 test -d "$package_root/dist/extensions/feishu"
+test -d "$package_root/dist/extensions/memory-lancedb"
 
 if [ -d "$package_root/dist/extensions/$CHANNEL/node_modules" ]; then
   echo "$CHANNEL runtime deps should not be preinstalled in package" >&2
@@ -156,6 +157,35 @@ if (mode === "feishu") {
     },
   };
 }
+if (mode === "memory-lancedb") {
+  config.plugins = {
+    ...(config.plugins || {}),
+    enabled: true,
+    allow: [...new Set([...(config.plugins?.allow || []), "memory-lancedb"])],
+    slots: {
+      ...(config.plugins?.slots || {}),
+      memory: "memory-lancedb",
+    },
+    entries: {
+      ...(config.plugins?.entries || {}),
+      "memory-lancedb": {
+        ...(config.plugins?.entries?.["memory-lancedb"] || {}),
+        enabled: true,
+        config: {
+          ...(config.plugins?.entries?.["memory-lancedb"]?.config || {}),
+          embedding: {
+            ...(config.plugins?.entries?.["memory-lancedb"]?.config?.embedding || {}),
+            apiKey: process.env.OPENAI_API_KEY,
+            model: "text-embedding-3-small",
+          },
+          dbPath: "~/.openclaw/memory/lancedb-e2e",
+          autoCapture: false,
+          autoRecall: false,
+        },
+      },
+    },
+  };
+}
 
 fs.mkdirSync(path.dirname(configPath), { recursive: true });
 fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
@@ -206,6 +236,10 @@ wait_for_gateway_health() {
 
 assert_channel_status() {
   local channel="$1"
+  if [ "$channel" = "memory-lancedb" ]; then
+    echo "memory-lancedb plugin activation verified by dependency sentinel"
+    return 0
+  fi
   local out="/tmp/openclaw-channel-status-$channel.json"
   openclaw gateway call channels.status \
     --url "ws://127.0.0.1:$PORT" \
@@ -635,7 +669,6 @@ export OPENAI_API_KEY="sk-openclaw-bundled-channel-update-e2e"
 export OPENCLAW_NO_ONBOARD=1
 export OPENCLAW_UPDATE_PACKAGE_SPEC=""
 
-BASELINE_VERSION="${OPENCLAW_BUNDLED_CHANNEL_UPDATE_BASELINE_VERSION:?missing baseline version}"
 TOKEN="bundled-channel-update-token"
 PORT="18790"
 
@@ -736,6 +769,35 @@ config.channels = {
     enabled: mode === "feishu",
   },
 };
+if (mode === "memory-lancedb") {
+  config.plugins = {
+    ...(config.plugins || {}),
+    enabled: true,
+    allow: [...new Set([...(config.plugins?.allow || []), "memory-lancedb"])],
+    slots: {
+      ...(config.plugins?.slots || {}),
+      memory: "memory-lancedb",
+    },
+    entries: {
+      ...(config.plugins?.entries || {}),
+      "memory-lancedb": {
+        ...(config.plugins?.entries?.["memory-lancedb"] || {}),
+        enabled: true,
+        config: {
+          ...(config.plugins?.entries?.["memory-lancedb"]?.config || {}),
+          embedding: {
+            ...(config.plugins?.entries?.["memory-lancedb"]?.config?.embedding || {}),
+            apiKey: process.env.OPENAI_API_KEY,
+            model: "text-embedding-3-small",
+          },
+          dbPath: "~/.openclaw/memory/lancedb-update-e2e",
+          autoCapture: false,
+          autoRecall: false,
+        },
+      },
+    },
+  };
+}
 
 fs.mkdirSync(path.dirname(configPath), { recursive: true });
 fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
@@ -857,8 +919,8 @@ run_update_and_capture() {
   fi
 }
 
-echo "Installing known-bad baseline $BASELINE_VERSION..."
-npm install -g "openclaw@$BASELINE_VERSION" --omit=optional --no-fund --no-audit >/tmp/openclaw-update-baseline-install.log 2>&1
+echo "Installing current candidate as update baseline..."
+npm install -g "$package_tgz" --no-fund --no-audit >/tmp/openclaw-update-baseline-install.log 2>&1
 command -v openclaw >/dev/null
 baseline_root="$(package_root)"
 test -d "$baseline_root/dist/extensions/telegram"
@@ -871,16 +933,14 @@ set +e
 openclaw doctor --non-interactive >/tmp/openclaw-baseline-doctor.log 2>&1
 baseline_doctor_status=$?
 set -e
-if [ "$baseline_doctor_status" -eq 0 ] || ! grep -Eq "grammy|ERR_MODULE_NOT_FOUND|Cannot find module" /tmp/openclaw-baseline-doctor.log; then
-  echo "expected baseline doctor to fail on missing Telegram runtime deps" >&2
-  cat /tmp/openclaw-baseline-doctor.log >&2
-  exit 1
-fi
+echo "baseline doctor exited with $baseline_doctor_status"
+remove_runtime_dep telegram grammy
+assert_no_dep_available telegram grammy
 
 echo "Updating from baseline to current candidate; candidate doctor must repair Telegram deps..."
 run_update_and_capture telegram /tmp/openclaw-update-telegram.json
 cat /tmp/openclaw-update-telegram.json
-assert_update_ok /tmp/openclaw-update-telegram.json "$BASELINE_VERSION"
+assert_update_ok /tmp/openclaw-update-telegram.json "$candidate_version"
 assert_dep_available telegram grammy
 
 echo "Mutating installed package: remove Telegram deps, then update-mode doctor repairs them..."
@@ -920,6 +980,15 @@ cat /tmp/openclaw-update-feishu.json
 assert_update_ok /tmp/openclaw-update-feishu.json "$candidate_version"
 assert_dep_available feishu @larksuiteoapi/node-sdk
 
+echo "Mutating config to memory-lancedb and rerunning same-version update path..."
+write_config memory-lancedb
+remove_runtime_dep memory-lancedb @lancedb/lancedb
+assert_no_dep_available memory-lancedb @lancedb/lancedb
+run_update_and_capture memory-lancedb /tmp/openclaw-update-memory-lancedb.json
+cat /tmp/openclaw-update-memory-lancedb.json
+assert_update_ok /tmp/openclaw-update-memory-lancedb.json "$candidate_version"
+assert_dep_available memory-lancedb @lancedb/lancedb
+
 echo "bundled channel runtime deps Docker update E2E passed"
 EOF
   then
@@ -937,6 +1006,7 @@ if [ "$RUN_CHANNEL_SCENARIOS" != "0" ]; then
   run_channel_scenario discord discord-api-types
   run_channel_scenario slack @slack/web-api
   run_channel_scenario feishu @larksuiteoapi/node-sdk
+  run_channel_scenario memory-lancedb @lancedb/lancedb
 fi
 if [ "$RUN_UPDATE_SCENARIO" != "0" ]; then
   run_update_scenario