mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 16:01:01 +00:00
fix(memory): support embedding providers without encoding format
This commit is contained in:
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Memory/LanceDB: call OpenAI-compatible embedding endpoints through the raw SDK transport without sending `encoding_format`, then normalize float-array or base64 responses so providers such as ZhiPu and DashScope no longer fail recall with wrong vector dimensions or rejected parameters. Fixes #63655. Thanks @kinthaiofficial.
|
||||
- Memory/LanceDB: bound memory recall embedding queries with a new `recallMaxChars` setting, prefer the latest user message over channel prompt metadata during auto-recall, and document the knob so small Ollama embedding models avoid context-length failures. Fixes #56780. Thanks @rungmc357 and @zak-collaborator.
|
||||
- CLI/skills: resolve workspace-backed skills commands from `--agent`, then the current agent workspace, before falling back to the default agent, so multi-agent ClawHub installs, updates, and status checks stay scoped to the active workspace. Fixes #56161; carries forward #72726. Thanks @langbowang and @luyao618.
|
||||
- Plugin SDK: fall back from partial bundled plugin directory overrides to package source public surfaces while preserving `OPENCLAW_DISABLE_BUNDLED_PLUGINS` as a hard disable. (#72817) Thanks @serkonyc.
|
||||
|
||||
@@ -99,6 +99,36 @@ models need the value in config so LanceDB can create the vector column.
|
||||
For small local embedding models, lower `recallMaxChars` if you see context
|
||||
length errors from the local server.
|
||||
|
||||
## OpenAI-compatible providers
|
||||
|
||||
Some OpenAI-compatible embedding providers reject the `encoding_format`
|
||||
parameter, while others ignore it and always return `number[]` vectors.
|
||||
`memory-lancedb` therefore omits `encoding_format` on embedding requests and
|
||||
accepts either float-array responses or base64-encoded float32 responses.
|
||||
|
||||
Set `embedding.dimensions` for providers whose model dimensions are not built
|
||||
in. For example, ZhiPu `embedding-3` uses `2048` dimensions:
|
||||
|
||||
```json5
|
||||
{
|
||||
plugins: {
|
||||
entries: {
|
||||
"memory-lancedb": {
|
||||
enabled: true,
|
||||
config: {
|
||||
embedding: {
|
||||
apiKey: "${ZHIPU_API_KEY}",
|
||||
baseUrl: "https://open.bigmodel.cn/api/paas/v4",
|
||||
model: "embedding-3",
|
||||
dimensions: 2048,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
## Recall and capture limits
|
||||
|
||||
`memory-lancedb` has two separate text limits:
|
||||
|
||||
@@ -8,11 +8,13 @@
|
||||
* - Auto-capture filtering
|
||||
*/
|
||||
|
||||
import { Buffer } from "node:buffer";
|
||||
import { describe, test, expect, vi } from "vitest";
|
||||
import memoryPlugin, {
|
||||
detectCategory,
|
||||
formatRelevantMemoriesContext,
|
||||
looksLikePromptInjection,
|
||||
normalizeEmbeddingVector,
|
||||
normalizeRecallQuery,
|
||||
shouldCapture,
|
||||
} from "./index.js";
|
||||
@@ -57,6 +59,10 @@ function createMockModule(): LanceDbModule {
|
||||
} as unknown as LanceDbModule;
|
||||
}
|
||||
|
||||
function invokeEmbeddingCreate(mock: ReturnType<typeof vi.fn>, body: unknown) {
|
||||
return (mock as unknown as (body: unknown) => unknown)(body);
|
||||
}
|
||||
|
||||
function createRuntimeLoader(
|
||||
overrides: {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
@@ -351,7 +357,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -417,7 +425,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: expectedRecallQuery,
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(expectedRecallQuery).toHaveLength(120);
|
||||
expect(vectorSearch).toHaveBeenCalledWith([0.1, 0.2, 0.3]);
|
||||
@@ -491,7 +498,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -568,7 +577,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: "what editor should i use?",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(result).toMatchObject({
|
||||
prependContext: expect.stringContaining("I prefer Helix for editing code."),
|
||||
@@ -622,7 +630,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -745,7 +755,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -844,7 +856,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -905,7 +919,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: "I prefer Helix for editing code every day.",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(vectorSearch).toHaveBeenCalledTimes(1);
|
||||
expect(add).toHaveBeenCalledTimes(1);
|
||||
@@ -970,7 +983,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1047,7 +1062,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: "I prefer Helix for editing code every day.",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(add).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
@@ -1106,7 +1120,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1231,7 +1247,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1336,7 +1354,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1425,12 +1445,10 @@ describe("memory plugin e2e", () => {
|
||||
expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(1, {
|
||||
model: "text-embedding-3-small",
|
||||
input: "I prefer Helix for editing code every day.",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(2, {
|
||||
model: "text-embedding-3-small",
|
||||
input: "I prefer Fish for shell commands every day.",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(harness.add).toHaveBeenCalledTimes(2);
|
||||
} finally {
|
||||
@@ -1493,7 +1511,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(harness.embeddingsCreate).toHaveBeenNthCalledWith(3, {
|
||||
model: "text-embedding-3-small",
|
||||
input: "I prefer Deno for small scripts every day.",
|
||||
encoding_format: "float",
|
||||
});
|
||||
expect(harness.add).toHaveBeenCalledTimes(3);
|
||||
} finally {
|
||||
@@ -1555,7 +1572,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1611,7 +1630,6 @@ describe("memory plugin e2e", () => {
|
||||
expect(embeddingsCreate).toHaveBeenCalledWith({
|
||||
model: "text-embedding-3-small",
|
||||
input: "hello dimensions",
|
||||
encoding_format: "float",
|
||||
dimensions: 1024,
|
||||
});
|
||||
} finally {
|
||||
@@ -1651,7 +1669,9 @@ describe("memory plugin e2e", () => {
|
||||
}));
|
||||
vi.doMock("openai", () => ({
|
||||
default: class MockOpenAI {
|
||||
embeddings = { create: embeddingsCreate };
|
||||
post = vi.fn((_path: string, opts: { body?: unknown }) =>
|
||||
invokeEmbeddingCreate(embeddingsCreate, opts.body),
|
||||
);
|
||||
},
|
||||
}));
|
||||
vi.doMock("./lancedb-runtime.js", () => ({
|
||||
@@ -1833,6 +1853,31 @@ describe("memory plugin e2e", () => {
|
||||
expect(normalizeRecallQuery(`look up ${"x".repeat(200)}`, 120)).toHaveLength(120);
|
||||
});
|
||||
|
||||
test("normalizeEmbeddingVector accepts float arrays and base64 float32 responses", async () => {
|
||||
expect(normalizeEmbeddingVector([0.1, 0.2, 0.3])).toEqual([0.1, 0.2, 0.3]);
|
||||
|
||||
const bytes = Buffer.alloc(2 * Float32Array.BYTES_PER_ELEMENT);
|
||||
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
||||
view.setFloat32(0, 1.25, true);
|
||||
view.setFloat32(Float32Array.BYTES_PER_ELEMENT, -2.5, true);
|
||||
|
||||
const decoded = normalizeEmbeddingVector(bytes.toString("base64"));
|
||||
expect(decoded[0]).toBeCloseTo(1.25);
|
||||
expect(decoded[1]).toBeCloseTo(-2.5);
|
||||
});
|
||||
|
||||
test("normalizeEmbeddingVector rejects malformed embedding payloads", async () => {
|
||||
expect(() => normalizeEmbeddingVector([0.1, Number.NaN])).toThrow(
|
||||
"Embedding response contains non-numeric values",
|
||||
);
|
||||
expect(() => normalizeEmbeddingVector("abc")).toThrow(
|
||||
"Base64 embedding response has invalid byte length",
|
||||
);
|
||||
expect(() => normalizeEmbeddingVector(undefined)).toThrow(
|
||||
"Embedding response is missing a vector",
|
||||
);
|
||||
});
|
||||
|
||||
test("formatRelevantMemoriesContext escapes memory text and marks entries as untrusted", async () => {
|
||||
const context = formatRelevantMemoriesContext([
|
||||
{
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* Provides seamless auto-recall and auto-capture via lifecycle hooks.
|
||||
*/
|
||||
|
||||
import { Buffer } from "node:buffer";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import type * as LanceDB from "@lancedb/lancedb";
|
||||
import OpenAI from "openai";
|
||||
@@ -270,17 +271,52 @@ class Embeddings {
|
||||
const params: OpenAI.EmbeddingCreateParams = {
|
||||
model: this.model,
|
||||
input: text,
|
||||
encoding_format: "float",
|
||||
};
|
||||
if (this.dimensions) {
|
||||
params.dimensions = this.dimensions;
|
||||
}
|
||||
ensureGlobalUndiciEnvProxyDispatcher();
|
||||
const response = await this.client.embeddings.create(params);
|
||||
return response.data[0].embedding;
|
||||
// The OpenAI SDK's embeddings helper injects encoding_format=base64 when
|
||||
// omitted, then decodes the response. Several compatible providers either
|
||||
// reject encoding_format or always return float arrays, so use the generic
|
||||
// transport and normalize the response ourselves.
|
||||
const response = await this.client.post<EmbeddingCreateResponse>("/embeddings", {
|
||||
body: params,
|
||||
});
|
||||
return normalizeEmbeddingVector(response.data?.[0]?.embedding);
|
||||
}
|
||||
}
|
||||
|
||||
type EmbeddingCreateResponse = {
|
||||
data?: Array<{
|
||||
embedding?: unknown;
|
||||
}>;
|
||||
};
|
||||
|
||||
export function normalizeEmbeddingVector(value: unknown): number[] {
|
||||
if (Array.isArray(value)) {
|
||||
if (!value.every((item) => typeof item === "number" && Number.isFinite(item))) {
|
||||
throw new Error("Embedding response contains non-numeric values");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
if (typeof value === "string") {
|
||||
const bytes = Buffer.from(value, "base64");
|
||||
if (bytes.byteLength % Float32Array.BYTES_PER_ELEMENT !== 0) {
|
||||
throw new Error("Base64 embedding response has invalid byte length");
|
||||
}
|
||||
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
||||
const floats: number[] = [];
|
||||
for (let offset = 0; offset < bytes.byteLength; offset += Float32Array.BYTES_PER_ELEMENT) {
|
||||
floats.push(view.getFloat32(offset, true));
|
||||
}
|
||||
return floats;
|
||||
}
|
||||
|
||||
throw new Error("Embedding response is missing a vector");
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Rule-based capture filter
|
||||
// ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user