fix(plugins): bound prompt memory recall latency

2026-05-06 15:10:52 +00:00 · 2026-04-28 03:45:54 -07:00
parent 5de284c2e3
commit d55c7ea997
4 changed files with 213 additions and 10 deletions
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@@ -602,6 +602,102 @@ describe("memory plugin e2e", () => {
    }
  });

+  test("bounds auto-recall latency during prompt build", async () => {
+    vi.useFakeTimers();
+    const post = vi.fn(() => new Promise(() => undefined));
+    const ensureGlobalUndiciEnvProxyDispatcher = vi.fn();
+    const loadLanceDbModule = vi.fn(async () => ({
+      connect: vi.fn(async () => ({
+        tableNames: vi.fn(async () => ["memories"]),
+        openTable: vi.fn(async () => ({
+          vectorSearch: vi.fn(() => ({ limit: vi.fn(() => ({ toArray: vi.fn(async () => []) })) })),
+          countRows: vi.fn(async () => 0),
+          add: vi.fn(async () => undefined),
+          delete: vi.fn(async () => undefined),
+        })),
+      })),
+    }));
+
+    vi.resetModules();
+    vi.doMock("openclaw/plugin-sdk/runtime-env", () => ({
+      ensureGlobalUndiciEnvProxyDispatcher,
+    }));
+    vi.doMock("openai", () => ({
+      default: class MockOpenAI {
+        post = post;
+      },
+    }));
+    vi.doMock("./lancedb-runtime.js", () => ({
+      loadLanceDbModule,
+    }));
+
+    try {
+      const { default: dynamicMemoryPlugin } = await import("./index.js");
+      const on = vi.fn();
+      const logger = {
+        info: vi.fn(),
+        warn: vi.fn(),
+        error: vi.fn(),
+        debug: vi.fn(),
+      };
+      const mockApi = {
+        id: "memory-lancedb",
+        name: "Memory (LanceDB)",
+        source: "test",
+        config: {},
+        pluginConfig: {
+          embedding: {
+            apiKey: OPENAI_API_KEY,
+            model: "text-embedding-3-small",
+          },
+          dbPath: getDbPath(),
+          autoCapture: false,
+          autoRecall: true,
+        },
+        runtime: {},
+        logger,
+        registerTool: vi.fn(),
+        registerCli: vi.fn(),
+        registerService: vi.fn(),
+        on,
+        resolvePath: (p: string) => p,
+      };
+
+      dynamicMemoryPlugin.register(mockApi as any);
+
+      const beforePromptBuild = on.mock.calls.find(
+        ([hookName]) => hookName === "before_prompt_build",
+      )?.[1];
+      expect(beforePromptBuild).toBeTypeOf("function");
+
+      const resultPromise = beforePromptBuild?.(
+        { prompt: "what editor should i use?", messages: [] },
+        {},
+      );
+      await vi.advanceTimersByTimeAsync(15_000);
+
+      await expect(resultPromise).resolves.toBeUndefined();
+      expect(ensureGlobalUndiciEnvProxyDispatcher).toHaveBeenCalledOnce();
+      expect(post).toHaveBeenCalledWith(
+        "/embeddings",
+        expect.objectContaining({
+          maxRetries: 0,
+          timeout: 15_000,
+        }),
+      );
+      expect(loadLanceDbModule).not.toHaveBeenCalled();
+      expect(logger.warn).toHaveBeenCalledWith(
+        "memory-lancedb: auto-recall timed out after 15000ms; skipping memory injection to avoid stalling agent startup",
+      );
+    } finally {
+      vi.doUnmock("openclaw/plugin-sdk/runtime-env");
+      vi.doUnmock("openai");
+      vi.doUnmock("./lancedb-runtime.js");
+      vi.resetModules();
+      vi.useRealTimers();
+    }
+  });
+
  test("uses live runtime config to enable auto-recall after startup disable", async () => {
    const embeddingsCreate = vi.fn(async () => ({
      data: [{ embedding: [0.1, 0.2, 0.3] }],
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@@ -149,6 +149,7 @@ function resolveAutoCaptureStartIndex(
 // ============================================================================

 const TABLE_NAME = "memories";
+const DEFAULT_AUTO_RECALL_TIMEOUT_MS = 15_000;

 class MemoryDB {
  private db: LanceDB.Connection | null = null;
@@ -262,7 +263,7 @@ class MemoryDB {
 // ============================================================================

 type Embeddings = {
-  embed(text: string): Promise<number[]>;
+  embed(text: string, options?: { timeoutMs?: number }): Promise<number[]>;
 };

 class OpenAiCompatibleEmbeddings implements Embeddings {
@@ -277,7 +278,7 @@ class OpenAiCompatibleEmbeddings implements Embeddings {
    this.client = new OpenAI({ apiKey, baseURL: baseUrl });
  }

-  async embed(text: string): Promise<number[]> {
+  async embed(text: string, options?: { timeoutMs?: number }): Promise<number[]> {
    const params: OpenAI.EmbeddingCreateParams = {
      model: this.model,
      input: text,
@@ -292,6 +293,7 @@ class OpenAiCompatibleEmbeddings implements Embeddings {
    // transport and normalize the response ourselves.
    const response = await this.client.post<EmbeddingCreateResponse>("/embeddings", {
      body: params,
+      ...(options?.timeoutMs ? { timeout: options.timeoutMs, maxRetries: 0 } : {}),
    });
    return normalizeEmbeddingVector(response.data?.[0]?.embedding);
  }
@@ -353,6 +355,32 @@ class ProviderAdapterEmbeddings implements Embeddings {
  }
 }

+async function runWithTimeout<T>(params: {
+  timeoutMs: number;
+  task: () => Promise<T>;
+}): Promise<{ status: "ok"; value: T } | { status: "timeout" }> {
+  let timeout: ReturnType<typeof setTimeout> | undefined;
+  const TIMEOUT = Symbol("timeout");
+  const timeoutPromise = new Promise<typeof TIMEOUT>((resolve) => {
+    timeout = setTimeout(() => resolve(TIMEOUT), params.timeoutMs);
+    timeout.unref?.();
+  });
+  const taskPromise = params.task();
+  taskPromise.catch(() => undefined);
+
+  try {
+    const result = await Promise.race([taskPromise, timeoutPromise]);
+    if (result === TIMEOUT) {
+      return { status: "timeout" };
+    }
+    return { status: "ok", value: result };
+  } finally {
+    if (timeout) {
+      clearTimeout(timeout);
+    }
+  }
+}
+
 function createEmbeddings(api: OpenClawPluginApi, cfg: MemoryConfig): Embeddings {
  const { provider, model, dimensions, apiKey, baseUrl } = cfg.embedding;
  if (provider === "openai" && apiKey) {
@@ -818,8 +846,22 @@ export default definePluginEntry({
            event.prompt,
          currentCfg.recallMaxChars,
        );
-        const vector = await embeddings.embed(recallQuery);
-        const results = await db.search(vector, 3, 0.3);
+        const recall = await runWithTimeout({
+          timeoutMs: DEFAULT_AUTO_RECALL_TIMEOUT_MS,
+          task: async () => {
+            const vector = await embeddings.embed(recallQuery, {
+              timeoutMs: DEFAULT_AUTO_RECALL_TIMEOUT_MS,
+            });
+            return await db.search(vector, 3, 0.3);
+          },
+        });
+        if (recall.status === "timeout") {
+          api.logger.warn?.(
+            `memory-lancedb: auto-recall timed out after ${DEFAULT_AUTO_RECALL_TIMEOUT_MS}ms; skipping memory injection to avoid stalling agent startup`,
+          );
+          return undefined;
+        }
+        const results = recall.value;

        if (results.length === 0) {
          return undefined;