From 1fde7dbc0eef35711c1e2bb43e216ad746f9c141 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Tue, 28 Apr 2026 01:12:27 +0100
Subject: [PATCH] fix(memory): support embedding providers without encoding
 format

---
 CHANGELOG.md                            |  1 +
 docs/plugins/memory-lancedb.md          | 30 +++++++++
 extensions/memory-lancedb/index.test.ts | 83 +++++++++++++++++++------
 extensions/memory-lancedb/index.ts      | 42 ++++++++++++-
 4 files changed, 134 insertions(+), 22 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b83f9dabd4..32fb0d7cce4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
 
 ### Fixes
 
+- Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial.
 - Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator.
 - CLI/skills: resolve workspace-backed skills commands from `--agent`, then the current agent workspace, before falling back to the default agent, so multi-agent ClawHub installs, updates, and status checks stay scoped to the active workspace. Fixes #56161; carries forward #72726. Thanks @langbowang and @luyao618.
 - Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc.
diff --git a/docs/plugins/memory-lancedb.md b/docs/plugins/memory-lancedb.md
index 7cac365ed04..6e1933fc207 100644
--- a/docs/plugins/memory-lancedb.md
+++ b/docs/plugins/memory-lancedb.md
@@ -99,6 +99,36 @@ models need the value in config so LanceDB can create the vector column.
 For small local embedding models, lower `recallMaxChars` if you see context
 length errors from the local server.
 
+## OpenAI-compatible providers
+
+Some OpenAI-compatible embedding providers reject the `encoding_format`
+parameter, while others ignore it and always return `number[]` vectors.
+`memory-lancedb` therefore omits `encoding_format` on embedding requests and
+accepts either float-array responses or base64-encoded float32 responses.
+
+Set `embedding.dimensions` for providers whose model dimensions are not built
+in. For example, ZhiPu `embedding-3` uses `2048` dimensions:
+
+```json5
+{
+  plugins: {
+    entries: {
+      "memory-lancedb": {
+        enabled: true,
+        config: {
+          embedding: {
+            apiKey: "${ZHIPU_API_KEY}",
+            baseUrl: "https://open.bigmodel.cn/api/paas/v4",
+            model: "embedding-3",
+            dimensions: 2048,
+          },
+        },
+      },
+    },
+  },
+}
+```
+
 ## Recall and capture limits
 
 `memory-lancedb` has two separate text limits:
diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts
index a3d8bd3b633..e78b8f919b0 100644
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@@ -8,11 +8,13 @@
  * - Auto-capture filtering
  */
 
+import { Buffer } from "node:buffer";
 import { describe, test, expect, vi } from "vitest";
 import memoryPlugin, {
   detectCategory,
   formatRelevantMemoriesContext,
   looksLikePromptInjection,
+  normalizeEmbeddingVector,
   normalizeRecallQuery,
   shouldCapture,
 } from "./index.js";
@@ -57,6 +59,10 @@ function createMockModule(): LanceDbModule {
   } as unknown as LanceDbModule;
 }
 
+function invokeEmbeddingCreate(mock: ReturnType<typeof vi.fn>, body: unknown) {
+  return (mock as unknown as (body: unknown) => unknown)(body);
+}
+
 function createRuntimeLoader(
   overrides: {
     env?: NodeJS.ProcessEnv;
@@ -351,7 +357,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -417,7 +425,6 @@ describe("memory plugin e2e", () => {
       expect(embeddingsCreate).toHaveBeenCalledWith({
         model: "text-embedding-3-small",
         input: expectedRecallQuery,
-        encoding_format: "float",
       });
       expect(expectedRecallQuery).toHaveLength(120);
       expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]);
@@ -491,7 +498,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -568,7 +577,6 @@ describe("memory plugin e2e", () => {
       expect(embeddingsCreate).toHaveBeenCalledWith({
         model: "text-embedding-3-small",
         input: "what editor should i use?",
-        encoding_format: "float",
       });
       expect(result).toMatchObject({
         prependContext: expect.stringContaining("I prefer Helix for editing code."),
@@ -622,7 +630,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -745,7 +755,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -844,7 +856,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -905,7 +919,6 @@ describe("memory plugin e2e", () => {
       expect(embeddingsCreate).toHaveBeenCalledWith({
         model: "text-embedding-3-small",
         input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
       });
       expect(vectorSearch).toHaveBeenCalledTimes(1);
       expect(add).toHaveBeenCalledTimes(1);
@@ -970,7 +983,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1047,7 +1062,6 @@ describe("memory plugin e2e", () => {
       expect(embeddingsCreate).toHaveBeenCalledWith({
         model: "text-embedding-3-small",
         input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
       });
       expect(add).toHaveBeenCalledWith([
         expect.objectContaining({
@@ -1106,7 +1120,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1231,7 +1247,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1336,7 +1354,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1425,12 +1445,10 @@ describe("memory plugin e2e", () => {
       expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(1, {
         model: "text-embedding-3-small",
         input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
       });
       expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(2, {
         model: "text-embedding-3-small",
         input: "I prefer Fish for shell commands every day.",
-        encoding_format: "float",
       });
       expect(harness.add).toHaveBeenCalledTimes(2);
     } finally {
@@ -1493,7 +1511,6 @@ describe("memory plugin e2e", () => {
       expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(3, {
         model: "text-embedding-3-small",
         input: "I prefer Deno for small scripts every day.",
-        encoding_format: "float",
       });
       expect(harness.add).toHaveBeenCalledTimes(3);
     } finally {
@@ -1555,7 +1572,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1611,7 +1630,6 @@ describe("memory plugin e2e", () => {
       expect(embeddingsCreate).toHaveBeenCalledWith({
         model: "text-embedding-3-small",
         input: "hello dimensions",
-        encoding_format: "float",
         dimensions: 1024,
       });
     } finally {
@@ -1651,7 +1669,9 @@ describe("memory plugin e2e", () => {
     }));
     vi.doMock("openai", () => ({
       default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
       },
     }));
     vi.doMock("./lancedb-runtime.js", () => ({
@@ -1833,6 +1853,31 @@ describe("memory plugin e2e", () => {
     expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120);
   });
 
+  test("normalizeEmbeddingVector accepts float arrays and base64 float32 responses", async () => {
+    expect(normalizeEmbeddingVector([0.1, 0.2, 0.3])).toEqual([0.1, 0.2, 0.3]);
+
+    const bytes = Buffer.alloc(2 * Float32Array.BYTES_PER_ELEMENT);
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    view.setFloat32(0, 1.25, true);
+    view.setFloat32(Float32Array.BYTES_PER_ELEMENT, -2.5, true);
+
+    const decoded = normalizeEmbeddingVector(bytes.toString("base64"));
+    expect(decoded[0]).toBeCloseTo(1.25);
+    expect(decoded[1]).toBeCloseTo(-2.5);
+  });
+
+  test("normalizeEmbeddingVector rejects malformed embedding payloads", async () => {
+    expect(() => normalizeEmbeddingVector([0.1, Number.NaN])).toThrow(
+      "Embedding response contains non-numeric values",
+    );
+    expect(() => normalizeEmbeddingVector("abc")).toThrow(
+      "Base64 embedding response has invalid byte length",
+    );
+    expect(() => normalizeEmbeddingVector(undefined)).toThrow(
+      "Embedding response is missing a vector",
+    );
+  });
+
   test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => {
     const context = formatRelevantMemoriesContext([
       {
diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts
index a7f71fa67dc..c527595f12c 100644
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@@ -6,6 +6,7 @@
  * Provides seamless auto-recall and auto-capture via lifecycle hooks.
  */
 
+import { Buffer } from "node:buffer";
 import { randomUUID } from "node:crypto";
 import type * as LanceDB from "@lancedb/lancedb";
 import OpenAI from "openai";
@@ -270,17 +271,52 @@ class Embeddings {
     const params: OpenAI.EmbeddingCreateParams = {
       model: this.model,
       input: text,
-      encoding_format: "float",
     };
     if (this.dimensions) {
       params.dimensions = this.dimensions;
     }
     ensureGlobalUndiciEnvProxyDispatcher();
-    const response = await this.client.embeddings.create(params);
-    return response.data[0].embedding;
+    // The OpenAI SDK's embeddings helper injects encoding_format=base64 when
+    // omitted, then decodes the response. Several compatible providers either
+    // reject encoding_format or always return float arrays, so use the generic
+    // transport and normalize the response ourselves.
+    const response = await this.client.post<EmbeddingCreateResponse>("/embeddings", {
+      body: params,
+    });
+    return normalizeEmbeddingVector(response.data?.[0]?.embedding);
   }
 }
 
+type EmbeddingCreateResponse = {
+  data?: Array<{
+    embedding?: unknown;
+  }>;
+};
+
+export function normalizeEmbeddingVector(value: unknown): number[] {
+  if (Array.isArray(value)) {
+    if (!value.every((item) => typeof item === "number" && Number.isFinite(item))) {
+      throw new Error("Embedding response contains non-numeric values");
+    }
+    return value;
+  }
+
+  if (typeof value === "string") {
+    const bytes = Buffer.from(value, "base64");
+    if (bytes.byteLength % Float32Array.BYTES_PER_ELEMENT !== 0) {
+      throw new Error("Base64 embedding response has invalid byte length");
+    }
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    const floats: number[] = [];
+    for (let offset = 0; offset < bytes.byteLength; offset += Float32Array.BYTES_PER_ELEMENT) {
+      floats.push(view.getFloat32(offset, true));
+    }
+    return floats;
+  }
+
+  throw new Error("Embedding response is missing a vector");
+}
+
 // ============================================================================
 // Rule-based capture filter
 // ============================================================================