Files
openclaw/extensions/llama-cpp/index.test.ts
liuhao1024 94e6255666 feat(memory): apply outputDimensionality truncation to local GGUF embeddings (fixes #58765) (#93758)
* feat(memory): apply outputDimensionality truncation to local GGUF embeddings

The outputDimensionality config field was passed through to the local
embedding provider but never applied. Local GGUF models (e.g.
Qwen3-Embedding-0.6B) always returned their full dimension vector.

Apply slice(0, N) after normalization so MRL-capable models can benefit
from dimension truncation — matching the behavior already supported by
Gemini embedding-2 and OpenAI providers.

Fixes #58765

* fix(memory): preserve local embedding dimensions through worker

---------

Co-authored-by: Vincent Koc <25068+vincentkoc@users.noreply.github.com>
2026-06-17 05:05:49 +08:00

445 lines
13 KiB
TypeScript

import os from "node:os";
import path from "node:path";
import {
createPluginRegistryFixture,
registerVirtualTestPlugin,
} from "openclaw/plugin-sdk/plugin-test-contracts";
import {
clearEmbeddingProviders,
clearMemoryEmbeddingProviders,
getRegisteredEmbeddingProvider,
} from "openclaw/plugin-sdk/plugin-test-runtime";
import { afterEach, describe, expect, it, vi } from "vitest";
const memoryHostEmbeddingMocks = vi.hoisted(() => ({
createLocalEmbeddingProvider: vi.fn(),
}));
vi.mock("openclaw/plugin-sdk/memory-core-host-engine-embeddings", () => ({
createLocalEmbeddingProvider: memoryHostEmbeddingMocks.createLocalEmbeddingProvider,
}));
import llamaCppPlugin from "./index.js";
import {
DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
createLlamaCppEmbeddingProvider,
createLlamaCppMemoryEmbeddingProvider,
formatLlamaCppSetupError,
llamaCppEmbeddingProviderAdapter,
} from "./src/embedding-provider.js";
afterEach(() => {
clearEmbeddingProviders();
clearMemoryEmbeddingProviders();
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockReset();
});
describe("llama.cpp provider plugin", () => {
it("registers the local embedding provider through the generic SDK contract", () => {
const { config, registry } = createPluginRegistryFixture();
registerVirtualTestPlugin({
registry,
config,
id: "llama-cpp",
name: "llama.cpp Provider",
contracts: {
embeddingProviders: ["local"],
},
register: llamaCppPlugin.register,
});
const provider = getRegisteredEmbeddingProvider("local");
expect(provider?.ownerPluginId).toBe("llama-cpp");
expect(provider?.adapter).toMatchObject({
id: "local",
defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
transport: "local",
});
});
it("adapts the worker-backed local embedding provider", async () => {
const close = vi.fn();
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
maxInputTokens: 2048,
embedQuery: vi.fn(async () => [0.6, 0.8]),
embedBatchInputs: vi.fn(async () => [[0.3, 0.4]]),
embedBatch: vi.fn(async () => [[1, 0]]),
close,
});
const abortController = new AbortController();
const provider = await createLlamaCppEmbeddingProvider(
{
config: {},
provider: "local",
model: "text-embedding-3-small",
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
await expect(provider.embed("hello")).resolves.toEqual([0.6, 0.8]);
await expect(
provider.embedBatch([{ text: "doc" }], { signal: abortController.signal }),
).resolves.toEqual([[0.3, 0.4]]);
await provider.close?.();
expect(provider.model).toBe(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL);
expect(provider.maxInputTokens).toBe(2048);
expect(close).toHaveBeenCalledTimes(1);
expect(memoryHostEmbeddingMocks.createLocalEmbeddingProvider).toHaveBeenCalledWith(
{
config: {},
provider: "local",
fallback: "none",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
local: {
modelPath: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
},
},
{
nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js",
},
);
const workerProvider =
await memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mock.results[0].value;
expect(workerProvider.embedBatchInputs).toHaveBeenCalledWith([{ text: "doc" }], {
signal: abortController.signal,
});
});
it("includes output dimensionality in local cache and index identities", async () => {
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
outputDimensionality: 512,
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
const resolvedIdentity = llamaCppEmbeddingProviderAdapter.resolveIndexIdentity?.({
config: {},
provider: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
dimensions: 512,
});
expect(result.runtime?.cacheKeyData).toMatchObject({ outputDimensionality: 512 });
expect(result.runtime?.indexIdentityAliases).toEqual(
expect.arrayContaining([
expect.objectContaining({
cacheKeyData: expect.objectContaining({ outputDimensionality: 512 }),
}),
]),
);
expect(resolvedIdentity?.cacheKeyData).toMatchObject({ outputDimensionality: 512 });
expect(resolvedIdentity?.aliases).toEqual(
expect.arrayContaining([
expect.objectContaining({
cacheKeyData: expect.objectContaining({ outputDimensionality: 512 }),
}),
]),
);
});
it("keeps the default model identity when configured with its exact cache artifact path", async () => {
const modelPath = path.join(
os.homedir(),
".node-llama-cpp",
"models",
"hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
);
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: modelPath,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: modelPath,
local: { modelPath },
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
expect(result.provider?.model).toBe(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL);
expect(result.runtime?.cacheKeyData).toEqual({
provider: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
});
expect(result.runtime?.indexIdentityAliases).toEqual([
{
model: modelPath,
cacheKeyData: {
provider: "local",
model: modelPath,
},
},
{
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
cacheKeyData: {
provider: "local",
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
},
},
]);
expect(
llamaCppEmbeddingProviderAdapter.resolveIndexIdentity?.({
config: {},
provider: "local",
model: modelPath,
local: { modelPath },
}),
).toEqual({
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
cacheKeyData: {
provider: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
},
aliases: [
{
model: modelPath,
cacheKeyData: {
provider: "local",
model: modelPath,
},
},
{
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
cacheKeyData: {
provider: "local",
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
},
},
],
});
expect(memoryHostEmbeddingMocks.createLocalEmbeddingProvider).toHaveBeenCalledWith(
expect.objectContaining({
model: modelPath,
local: { modelPath },
}),
{
nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js",
},
);
});
it("keeps an arbitrary same-basename model path as a distinct identity", async () => {
const modelPath = path.join(
os.tmpdir(),
"custom-models",
DEFAULT_LLAMA_CPP_EMBEDDING_MODEL.split("/").at(-1)!,
);
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: modelPath,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: modelPath,
local: { modelPath },
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
expect(result.provider?.model).toBe(modelPath);
expect(result.runtime?.cacheKeyData).toEqual({
provider: "local",
model: modelPath,
});
expect(result.runtime).not.toHaveProperty("indexIdentityAliases");
});
it("keeps a bare same-basename file in the default cache as a distinct identity", async () => {
const modelPath = path.join(
os.homedir(),
".node-llama-cpp",
"models",
DEFAULT_LLAMA_CPP_EMBEDDING_MODEL.split("/").at(-1)!,
);
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: modelPath,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: modelPath,
local: { modelPath },
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
expect(result.provider?.model).toBe(modelPath);
expect(result.runtime).not.toHaveProperty("indexIdentityAliases");
});
it("keeps the default model identity with a custom cache directory", async () => {
const modelCacheDir = path.join(os.tmpdir(), "llama-cpp-model-cache");
const modelPath = path.join(modelCacheDir, "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf");
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: modelPath,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
local: { modelPath: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL, modelCacheDir },
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
expect(result.provider?.model).toBe(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL);
expect(result.runtime?.cacheKeyData).toEqual({
provider: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
});
expect(result.runtime?.indexIdentityAliases).toEqual([
{
model: modelPath,
cacheKeyData: {
provider: "local",
model: modelPath,
},
},
{
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
cacheKeyData: {
provider: "local",
model: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
},
},
]);
});
it.each([
{
direction: "default URI to exact relative cache artifact",
modelPath: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
},
{
direction: "exact relative cache artifact to default URI",
modelPath: "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf",
},
])("keeps $direction compatible", ({ modelPath }) => {
const modelCacheDir = path.join(os.tmpdir(), "llama-cpp-relative-model-cache");
const relativeModelPath = "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf";
const resolvedModelPath = path.join(modelCacheDir, relativeModelPath);
expect(
llamaCppEmbeddingProviderAdapter.resolveIndexIdentity?.({
config: {},
provider: "local",
model: modelPath,
local: { modelPath, modelCacheDir },
}),
).toEqual({
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
cacheKeyData: {
provider: "local",
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
},
aliases: [
{
model: resolvedModelPath,
cacheKeyData: {
provider: "local",
model: resolvedModelPath,
},
},
{
model: relativeModelPath,
cacheKeyData: {
provider: "local",
model: relativeModelPath,
},
},
],
});
});
it("keeps the default model identity for its exact relative cache artifact", async () => {
const modelCacheDir = path.join(os.tmpdir(), "llama-cpp-relative-model-cache");
const modelPath = "hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf";
const resolvedModelPath = path.join(modelCacheDir, modelPath);
memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
id: "local",
model: modelPath,
embedQuery: vi.fn(),
embedBatch: vi.fn(),
});
const result = await createLlamaCppMemoryEmbeddingProvider(
{
config: {},
provider: "local",
fallback: "none",
model: modelPath,
local: { modelPath, modelCacheDir },
},
{ nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
);
expect(result.provider?.model).toBe(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL);
expect(result.runtime?.indexIdentityAliases).toEqual([
{
model: resolvedModelPath,
cacheKeyData: {
provider: "local",
model: resolvedModelPath,
},
},
{
model: modelPath,
cacheKeyData: {
provider: "local",
model: modelPath,
},
},
]);
});
it("formats missing runtime errors with the plugin install command", () => {
const err = Object.assign(new Error("Cannot find package 'node-llama-cpp'"), {
code: "ERR_MODULE_NOT_FOUND",
});
expect(formatLlamaCppSetupError(err)).toContain(
"openclaw plugins install @openclaw/llama-cpp-provider",
);
});
});