fix(memory): support embedding providers without encoding format

2026-05-06 16:01:01 +00:00 · 2026-04-28 01:12:27 +01:00
parent 100c595fbc
commit 1fde7dbc0e
4 changed files with 134 additions and 22 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial.
 - Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator.
 - CLI/skills: resolve workspace-backed skills commands from `--agent`, then the current agent workspace, before falling back to the default agent, so multi-agent ClawHub installs, updates, and status checks stay scoped to the active workspace. Fixes #56161; carries forward #72726. Thanks @langbowang and @luyao618.
 - Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc.
--- a/docs/plugins/memory-lancedb.md
+++ b/docs/plugins/memory-lancedb.md
@@ -99,6 +99,36 @@ models need the value in config so LanceDB can create the vector column.
 For small local embedding models, lower `recallMaxChars` if you see context
 length errors from the local server.

+## OpenAI-compatible providers
+
+Some OpenAI-compatible embedding providers reject the `encoding_format`
+parameter, while others ignore it and always return `number[]` vectors.
+`memory-lancedb` therefore omits `encoding_format` on embedding requests and
+accepts either float-array responses or base64-encoded float32 responses.
+
+Set `embedding.dimensions` for providers whose model dimensions are not built
+in. For example, ZhiPu `embedding-3` uses `2048` dimensions:
+
+```json5
+{
+  plugins: {
+    entries: {
+      "memory-lancedb": {
+        enabled: true,
+        config: {
+          embedding: {
+            apiKey: "${ZHIPU_API_KEY}",
+            baseUrl: "https://open.bigmodel.cn/api/paas/v4",
+            model: "embedding-3",
+            dimensions: 2048,
+          },
+        },
+      },
+    },
+  },
+}
+```
+
 ## Recall and capture limits

 `memory-lancedb` has two separate text limits:
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@@ -8,11 +8,13 @@
 * - Auto-capture filtering
 */

+import { Buffer } from "node:buffer";
 import { describe, test, expect, vi } from "vitest";
 import memoryPlugin, {
  detectCategory,
  formatRelevantMemoriesContext,
  looksLikePromptInjection,
+  normalizeEmbeddingVector,
  normalizeRecallQuery,
  shouldCapture,
 } from "./index.js";
@@ -57,6 +59,10 @@ function createMockModule(): LanceDbModule {
  } as unknown as LanceDbModule;
 }

+function invokeEmbeddingCreate(mock: ReturnType<typeof vi.fn>, body: unknown) {
+  return (mock as unknown as (body: unknown) => unknown)(body);
+}
+
 function createRuntimeLoader(
  overrides: {
    env?: NodeJS.ProcessEnv;
@@ -351,7 +357,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -417,7 +425,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: expectedRecallQuery,
-        encoding_format: "float",
      });
      expect(expectedRecallQuery).toHaveLength(120);
      expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]);
@@ -491,7 +498,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -568,7 +577,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "what editor should i use?",
-        encoding_format: "float",
      });
      expect(result).toMatchObject({
        prependContext: expect.stringContaining("I prefer Helix for editing code."),
@@ -622,7 +630,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -745,7 +755,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -844,7 +856,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -905,7 +919,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(vectorSearch).toHaveBeenCalledTimes(1);
      expect(add).toHaveBeenCalledTimes(1);
@@ -970,7 +983,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1047,7 +1062,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(add).toHaveBeenCalledWith([
        expect.objectContaining({
@@ -1106,7 +1120,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1231,7 +1247,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1336,7 +1354,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1425,12 +1445,10 @@ describe("memory plugin e2e", () => {
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(1, {
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(2, {
        model: "text-embedding-3-small",
        input: "I prefer Fish for shell commands every day.",
-        encoding_format: "float",
      });
      expect(harness.add).toHaveBeenCalledTimes(2);
    } finally {
@@ -1493,7 +1511,6 @@ describe("memory plugin e2e", () => {
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(3, {
        model: "text-embedding-3-small",
        input: "I prefer Deno for small scripts every day.",
-        encoding_format: "float",
      });
      expect(harness.add).toHaveBeenCalledTimes(3);
    } finally {
@@ -1555,7 +1572,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1611,7 +1630,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "hello dimensions",
-        encoding_format: "float",
        dimensions: 1024,
      });
    } finally {
@@ -1651,7 +1669,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1833,6 +1853,31 @@ describe("memory plugin e2e", () => {
    expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120);
  });

+  test("normalizeEmbeddingVector accepts float arrays and base64 float32 responses", async () => {
+    expect(normalizeEmbeddingVector([0.1, 0.2, 0.3])).toEqual([0.1, 0.2, 0.3]);
+
+    const bytes = Buffer.alloc(2 * Float32Array.BYTES_PER_ELEMENT);
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    view.setFloat32(0, 1.25, true);
+    view.setFloat32(Float32Array.BYTES_PER_ELEMENT, -2.5, true);
+
+    const decoded = normalizeEmbeddingVector(bytes.toString("base64"));
+    expect(decoded[0]).toBeCloseTo(1.25);
+    expect(decoded[1]).toBeCloseTo(-2.5);
+  });
+
+  test("normalizeEmbeddingVector rejects malformed embedding payloads", async () => {
+    expect(() => normalizeEmbeddingVector([0.1, Number.NaN])).toThrow(
+      "Embedding response contains non-numeric values",
+    );
+    expect(() => normalizeEmbeddingVector("abc")).toThrow(
+      "Base64 embedding response has invalid byte length",
+    );
+    expect(() => normalizeEmbeddingVector(undefined)).toThrow(
+      "Embedding response is missing a vector",
+    );
+  });
+
  test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => {
    const context = formatRelevantMemoriesContext([
      {
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@@ -6,6 +6,7 @@
 * Provides seamless auto-recall and auto-capture via lifecycle hooks.
 */

+import { Buffer } from "node:buffer";
 import { randomUUID } from "node:crypto";
 import type * as LanceDB from "@lancedb/lancedb";
 import OpenAI from "openai";
@@ -270,17 +271,52 @@ class Embeddings {
    const params: OpenAI.EmbeddingCreateParams = {
      model: this.model,
      input: text,
-      encoding_format: "float",
    };
    if (this.dimensions) {
      params.dimensions = this.dimensions;
    }
    ensureGlobalUndiciEnvProxyDispatcher();
-    const response = await this.client.embeddings.create(params);
-    return response.data[0].embedding;
+    // The OpenAI SDK's embeddings helper injects encoding_format=base64 when
+    // omitted, then decodes the response. Several compatible providers either
+    // reject encoding_format or always return float arrays, so use the generic
+    // transport and normalize the response ourselves.
+    const response = await this.client.post<EmbeddingCreateResponse>("/embeddings", {
+      body: params,
+    });
+    return normalizeEmbeddingVector(response.data?.[0]?.embedding);
  }
 }

+type EmbeddingCreateResponse = {
+  data?: Array<{
+    embedding?: unknown;
+  }>;
+};
+
+export function normalizeEmbeddingVector(value: unknown): number[] {
+  if (Array.isArray(value)) {
+    if (!value.every((item) => typeof item === "number" && Number.isFinite(item))) {
+      throw new Error("Embedding response contains non-numeric values");
+    }
+    return value;
+  }
+
+  if (typeof value === "string") {
+    const bytes = Buffer.from(value, "base64");
+    if (bytes.byteLength % Float32Array.BYTES_PER_ELEMENT !== 0) {
+      throw new Error("Base64 embedding response has invalid byte length");
+    }
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    const floats: number[] = [];
+    for (let offset = 0; offset < bytes.byteLength; offset += Float32Array.BYTES_PER_ELEMENT) {
+      floats.push(view.getFloat32(offset, true));
+    }
+    return floats;
+  }
+
+  throw new Error("Embedding response is missing a vector");
+}
+
 // ============================================================================
 // Rule-based capture filter
 // ============================================================================