From 1fde7dbc0eef35711c1e2bb43e216ad746f9c141 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 28 Apr 2026 01:12:27 +0100 Subject: [PATCH] fix(memory): support embedding providers without encoding format --- CHANGELOG.md | 1 + docs/plugins/memory-lancedb.md | 30 +++++++++ extensions/memory-lancedb/index.test.ts | 83 +++++++++++++++++++------ extensions/memory-lancedb/index.ts | 42 ++++++++++++- 4 files changed, 134 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b83f9dabd4..32fb0d7cce4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial. - Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator. - CLI/skills: resolve workspace-backed skills commands from `--agent`, then the current agent workspace, before falling back to the default agent, so multi-agent ClawHub installs, updates, and status checks stay scoped to the active workspace. Fixes #56161; carries forward #72726. Thanks @langbowang and @luyao618. - Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc. diff --git a/docs/plugins/memory-lancedb.md b/docs/plugins/memory-lancedb.md index 7cac365ed04..6e1933fc207 100644 --- a/docs/plugins/memory-lancedb.md +++ b/docs/plugins/memory-lancedb.md @@ -99,6 +99,36 @@ models need the value in config so LanceDB can create the vector column. For small local embedding models, lower `recallMaxChars` if you see context length errors from the local server. +## OpenAI-compatible providers + +Some OpenAI-compatible embedding providers reject the `encoding_format` +parameter, while others ignore it and always return `number[]` vectors. +`memory-lancedb` therefore omits `encoding_format` on embedding requests and +accepts either float-array responses or base64-encoded float32 responses. + +Set `embedding.dimensions` for providers whose model dimensions are not built +in. For example, ZhiPu `embedding-3` uses `2048` dimensions: + +```json5 +{ + plugins: { + entries: { + "memory-lancedb": { + enabled: true, + config: { + embedding: { + apiKey: "${ZHIPU_API_KEY}", + baseUrl: "https://open.bigmodel.cn/api/paas/v4", + model: "embedding-3", + dimensions: 2048, + }, + }, + }, + }, + }, +} +``` + ## Recall and capture limits `memory-lancedb` has two separate text limits: diff --git a/extensions/memory-lancedb/index.test.ts b/extensions/memory-lancedb/index.test.ts index a3d8bd3b633..e78b8f919b0 100644 --- a/extensions/memory-lancedb/index.test.ts +++ b/extensions/memory-lancedb/index.test.ts @@ -8,11 +8,13 @@ * - Auto-capture filtering */ +import { Buffer } from "node:buffer"; import { describe, test, expect, vi } from "vitest"; import memoryPlugin, { detectCategory, formatRelevantMemoriesContext, looksLikePromptInjection, + normalizeEmbeddingVector, normalizeRecallQuery, shouldCapture, } from "./index.js"; @@ -57,6 +59,10 @@ function createMockModule(): LanceDbModule { } as unknown as LanceDbModule; } +function invokeEmbeddingCreate(mock: ReturnType, body: unknown) { + return (mock as unknown as (body: unknown) => unknown)(body); +} + function createRuntimeLoader( overrides: { env?: NodeJS.ProcessEnv; @@ -351,7 +357,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -417,7 +425,6 @@ describe("memory plugin e2e", () => { expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", input: expectedRecallQuery, - encoding_format: "float", }); expect(expectedRecallQuery).toHaveLength(120); expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]); @@ -491,7 +498,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -568,7 +577,6 @@ describe("memory plugin e2e", () => { expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", input: "what editor should i use?", - encoding_format: "float", }); expect(result).toMatchObject({ prependContext: expect.stringContaining("I prefer Helix for editing code."), @@ -622,7 +630,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -745,7 +755,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -844,7 +856,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -905,7 +919,6 @@ describe("memory plugin e2e", () => { expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", input: "I prefer Helix for editing code every day.", - encoding_format: "float", }); expect(vectorSearch).toHaveBeenCalledTimes(1); expect(add).toHaveBeenCalledTimes(1); @@ -970,7 +983,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1047,7 +1062,6 @@ describe("memory plugin e2e", () => { expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", input: "I prefer Helix for editing code every day.", - encoding_format: "float", }); expect(add).toHaveBeenCalledWith([ expect.objectContaining({ @@ -1106,7 +1120,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1231,7 +1247,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1336,7 +1354,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1425,12 +1445,10 @@ describe("memory plugin e2e", () => { expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(1, { model: "text-embedding-3-small", input: "I prefer Helix for editing code every day.", - encoding_format: "float", }); expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(2, { model: "text-embedding-3-small", input: "I prefer Fish for shell commands every day.", - encoding_format: "float", }); expect(harness.add).toHaveBeenCalledTimes(2); } finally { @@ -1493,7 +1511,6 @@ describe("memory plugin e2e", () => { expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(3, { model: "text-embedding-3-small", input: "I prefer Deno for small scripts every day.", - encoding_format: "float", }); expect(harness.add).toHaveBeenCalledTimes(3); } finally { @@ -1555,7 +1572,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1611,7 +1630,6 @@ describe("memory plugin e2e", () => { expect(embeddingsCreate).toHaveBeenCalledWith({ model: "text-embedding-3-small", input: "hello dimensions", - encoding_format: "float", dimensions: 1024, }); } finally { @@ -1651,7 +1669,9 @@ describe("memory plugin e2e", () => { })); vi.doMock("openai", () => ({ default: class MockOpenAI { - embeddings = { create: embeddingsCreate }; + post = vi.fn((_path: string, opts: { body?: unknown }) => + invokeEmbeddingCreate(embeddingsCreate, opts.body), + ); }, })); vi.doMock("./lancedb-runtime.js", () => ({ @@ -1833,6 +1853,31 @@ describe("memory plugin e2e", () => { expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120); }); + test("normalizeEmbeddingVector accepts float arrays and base64 float32 responses", async () => { + expect(normalizeEmbeddingVector([0.1, 0.2, 0.3])).toEqual([0.1, 0.2, 0.3]); + + const bytes = Buffer.alloc(2 * Float32Array.BYTES_PER_ELEMENT); + const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + view.setFloat32(0, 1.25, true); + view.setFloat32(Float32Array.BYTES_PER_ELEMENT, -2.5, true); + + const decoded = normalizeEmbeddingVector(bytes.toString("base64")); + expect(decoded[0]).toBeCloseTo(1.25); + expect(decoded[1]).toBeCloseTo(-2.5); + }); + + test("normalizeEmbeddingVector rejects malformed embedding payloads", async () => { + expect(() => normalizeEmbeddingVector([0.1, Number.NaN])).toThrow( + "Embedding response contains non-numeric values", + ); + expect(() => normalizeEmbeddingVector("abc")).toThrow( + "Base64 embedding response has invalid byte length", + ); + expect(() => normalizeEmbeddingVector(undefined)).toThrow( + "Embedding response is missing a vector", + ); + }); + test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => { const context = formatRelevantMemoriesContext([ { diff --git a/extensions/memory-lancedb/index.ts b/extensions/memory-lancedb/index.ts index a7f71fa67dc..c527595f12c 100644 --- a/extensions/memory-lancedb/index.ts +++ b/extensions/memory-lancedb/index.ts @@ -6,6 +6,7 @@ * Provides seamless auto-recall and auto-capture via lifecycle hooks. */ +import { Buffer } from "node:buffer"; import { randomUUID } from "node:crypto"; import type * as LanceDB from "@lancedb/lancedb"; import OpenAI from "openai"; @@ -270,17 +271,52 @@ class Embeddings { const params: OpenAI.EmbeddingCreateParams = { model: this.model, input: text, - encoding_format: "float", }; if (this.dimensions) { params.dimensions = this.dimensions; } ensureGlobalUndiciEnvProxyDispatcher(); - const response = await this.client.embeddings.create(params); - return response.data[0].embedding; + // The OpenAI SDK's embeddings helper injects encoding_format=base64 when + // omitted, then decodes the response. Several compatible providers either + // reject encoding_format or always return float arrays, so use the generic + // transport and normalize the response ourselves. + const response = await this.client.post("/embeddings", { + body: params, + }); + return normalizeEmbeddingVector(response.data?.[0]?.embedding); } } +type EmbeddingCreateResponse = { + data?: Array<{ + embedding?: unknown; + }>; +}; + +export function normalizeEmbeddingVector(value: unknown): number[] { + if (Array.isArray(value)) { + if (!value.every((item) => typeof item === "number" && Number.isFinite(item))) { + throw new Error("Embedding response contains non-numeric values"); + } + return value; + } + + if (typeof value === "string") { + const bytes = Buffer.from(value, "base64"); + if (bytes.byteLength % Float32Array.BYTES_PER_ELEMENT !== 0) { + throw new Error("Base64 embedding response has invalid byte length"); + } + const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + const floats: number[] = []; + for (let offset = 0; offset < bytes.byteLength; offset += Float32Array.BYTES_PER_ELEMENT) { + floats.push(view.getFloat32(offset, true)); + } + return floats; + } + + throw new Error("Embedding response is missing a vector"); +} + // ============================================================================ // Rule-based capture filter // ============================================================================