fix(memory): support embedding providers without encoding format

2026-05-06 15:50:46 +00:00 · 2026-04-28 01:12:27 +01:00
parent 100c595fbc
commit 1fde7dbc0e
4 changed files with 134 additions and 22 deletions
--- a/extensions/memory-lancedb/index.test.ts
+++ b/extensions/memory-lancedb/index.test.ts
@@ -8,11 +8,13 @@
 * - Auto-capture filtering
 */

+import { Buffer } from "node:buffer";
 import { describe, test, expect, vi } from "vitest";
 import memoryPlugin, {
  detectCategory,
  formatRelevantMemoriesContext,
  looksLikePromptInjection,
+  normalizeEmbeddingVector,
  normalizeRecallQuery,
  shouldCapture,
 } from "./index.js";
@@ -57,6 +59,10 @@ function createMockModule(): LanceDbModule {
  } as unknown as LanceDbModule;
 }

+function invokeEmbeddingCreate(mock: ReturnType<typeof vi.fn>, body: unknown) {
+  return (mock as unknown as (body: unknown) => unknown)(body);
+}
+
 function createRuntimeLoader(
  overrides: {
    env?: NodeJS.ProcessEnv;
@@ -351,7 +357,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -417,7 +425,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: expectedRecallQuery,
-        encoding_format: "float",
      });
      expect(expectedRecallQuery).toHaveLength(120);
      expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]);
@@ -491,7 +498,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -568,7 +577,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "what editor should i use?",
-        encoding_format: "float",
      });
      expect(result).toMatchObject({
        prependContext: expect.stringContaining("I prefer Helix for editing code."),
@@ -622,7 +630,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -745,7 +755,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -844,7 +856,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -905,7 +919,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(vectorSearch).toHaveBeenCalledTimes(1);
      expect(add).toHaveBeenCalledTimes(1);
@@ -970,7 +983,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1047,7 +1062,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(add).toHaveBeenCalledWith([
        expect.objectContaining({
@@ -1106,7 +1120,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1231,7 +1247,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1336,7 +1354,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1425,12 +1445,10 @@ describe("memory plugin e2e", () => {
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(1, {
        model: "text-embedding-3-small",
        input: "I prefer Helix for editing code every day.",
-        encoding_format: "float",
      });
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(2, {
        model: "text-embedding-3-small",
        input: "I prefer Fish for shell commands every day.",
-        encoding_format: "float",
      });
      expect(harness.add).toHaveBeenCalledTimes(2);
    } finally {
@@ -1493,7 +1511,6 @@ describe("memory plugin e2e", () => {
      expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(3, {
        model: "text-embedding-3-small",
        input: "I prefer Deno for small scripts every day.",
-        encoding_format: "float",
      });
      expect(harness.add).toHaveBeenCalledTimes(3);
    } finally {
@@ -1555,7 +1572,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1611,7 +1630,6 @@ describe("memory plugin e2e", () => {
      expect(embeddingsCreate).toHaveBeenCalledWith({
        model: "text-embedding-3-small",
        input: "hello dimensions",
-        encoding_format: "float",
        dimensions: 1024,
      });
    } finally {
@@ -1651,7 +1669,9 @@ describe("memory plugin e2e", () => {
    }));
    vi.doMock("openai", () => ({
      default: class MockOpenAI {
-        embeddings = { create: embeddingsCreate };
+        post = vi.fn((_path: string, opts: { body?: unknown }) =>
+          invokeEmbeddingCreate(embeddingsCreate, opts.body),
+        );
      },
    }));
    vi.doMock("./lancedb-runtime.js", () => ({
@@ -1833,6 +1853,31 @@ describe("memory plugin e2e", () => {
    expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120);
  });

+  test("normalizeEmbeddingVector accepts float arrays and base64 float32 responses", async () => {
+    expect(normalizeEmbeddingVector([0.1, 0.2, 0.3])).toEqual([0.1, 0.2, 0.3]);
+
+    const bytes = Buffer.alloc(2 * Float32Array.BYTES_PER_ELEMENT);
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    view.setFloat32(0, 1.25, true);
+    view.setFloat32(Float32Array.BYTES_PER_ELEMENT, -2.5, true);
+
+    const decoded = normalizeEmbeddingVector(bytes.toString("base64"));
+    expect(decoded[0]).toBeCloseTo(1.25);
+    expect(decoded[1]).toBeCloseTo(-2.5);
+  });
+
+  test("normalizeEmbeddingVector rejects malformed embedding payloads", async () => {
+    expect(() => normalizeEmbeddingVector([0.1, Number.NaN])).toThrow(
+      "Embedding response contains non-numeric values",
+    );
+    expect(() => normalizeEmbeddingVector("abc")).toThrow(
+      "Base64 embedding response has invalid byte length",
+    );
+    expect(() => normalizeEmbeddingVector(undefined)).toThrow(
+      "Embedding response is missing a vector",
+    );
+  });
+
  test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => {
    const context = formatRelevantMemoriesContext([
      {
--- a/extensions/memory-lancedb/index.ts
+++ b/extensions/memory-lancedb/index.ts
@@ -6,6 +6,7 @@
 * Provides seamless auto-recall and auto-capture via lifecycle hooks.
 */

+import { Buffer } from "node:buffer";
 import { randomUUID } from "node:crypto";
 import type * as LanceDB from "@lancedb/lancedb";
 import OpenAI from "openai";
@@ -270,17 +271,52 @@ class Embeddings {
    const params: OpenAI.EmbeddingCreateParams = {
      model: this.model,
      input: text,
-      encoding_format: "float",
    };
    if (this.dimensions) {
      params.dimensions = this.dimensions;
    }
    ensureGlobalUndiciEnvProxyDispatcher();
-    const response = await this.client.embeddings.create(params);
-    return response.data[0].embedding;
+    // The OpenAI SDK's embeddings helper injects encoding_format=base64 when
+    // omitted, then decodes the response. Several compatible providers either
+    // reject encoding_format or always return float arrays, so use the generic
+    // transport and normalize the response ourselves.
+    const response = await this.client.post<EmbeddingCreateResponse>("/embeddings", {
+      body: params,
+    });
+    return normalizeEmbeddingVector(response.data?.[0]?.embedding);
  }
 }

+type EmbeddingCreateResponse = {
+  data?: Array<{
+    embedding?: unknown;
+  }>;
+};
+
+export function normalizeEmbeddingVector(value: unknown): number[] {
+  if (Array.isArray(value)) {
+    if (!value.every((item) => typeof item === "number" && Number.isFinite(item))) {
+      throw new Error("Embedding response contains non-numeric values");
+    }
+    return value;
+  }
+
+  if (typeof value === "string") {
+    const bytes = Buffer.from(value, "base64");
+    if (bytes.byteLength % Float32Array.BYTES_PER_ELEMENT !== 0) {
+      throw new Error("Base64 embedding response has invalid byte length");
+    }
+    const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
+    const floats: number[] = [];
+    for (let offset = 0; offset < bytes.byteLength; offset += Float32Array.BYTES_PER_ELEMENT) {
+      floats.push(view.getFloat32(offset, true));
+    }
+    return floats;
+  }
+
+  throw new Error("Embedding response is missing a vector");
+}
+
 // ============================================================================
 // Rule-based capture filter
 // ============================================================================