Files
openclaw/extensions/llama-cpp/src/embedding-provider.ts
mushuiyu_xydt 44e6caff54 fix(memory): accept local default model path migration (#92954)
* fix(memory): accept local default model path migration

Treat the official local default embedding model's hf URI and downloaded GGUF path identities as equivalent so upgraded local memory indexes do not pause solely on path-format changes.

* fix(memory): satisfy local identity lint

Avoid filtered array tail access in the local model filename helper while preserving the same compatibility behavior.

* fix(memory): preserve local embedding identity aliases

---------

Co-authored-by: Vincent Koc <25068+vincentkoc@users.noreply.github.com>
2026-06-15 09:29:42 +08:00

269 lines
9.1 KiB
TypeScript

import { createRequire } from "node:module";
import os from "node:os";
import path from "node:path";
import { pathToFileURL } from "node:url";
import type {
EmbeddingInput,
EmbeddingProvider,
EmbeddingProviderAdapter,
EmbeddingProviderCreateOptions,
EmbeddingProviderCreateResult,
} from "openclaw/plugin-sdk/embedding-providers";
import {
createLocalEmbeddingProvider,
type EmbeddingInput as MemoryEmbeddingInput,
type MemoryEmbeddingProvider,
type MemoryEmbeddingProviderCreateOptions,
type MemoryEmbeddingProviderCreateResult,
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
type LlamaCppLocalOptions = {
modelPath?: string;
modelCacheDir?: string;
contextSize?: number | "auto";
};
export type LlamaCppEmbeddingProviderRuntimeOptions = {
nodeLlamaCppImportUrl?: string;
};
export const LLAMA_CPP_EMBEDDING_PROVIDER_ID = "local";
export const DEFAULT_LLAMA_CPP_EMBEDDING_MODEL =
"hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf";
const DEFAULT_LLAMA_CPP_EMBEDDING_MODEL_CACHE_FILE_NAME =
"hf_ggml-org_embeddinggemma-300m-qat-Q8_0.gguf";
type LlamaCppModelIdentity = {
model: string;
cacheKeyData: Record<string, unknown>;
aliases: Array<{
model: string;
cacheKeyData: Record<string, unknown>;
}>;
};
function normalizeOptionalString(value: unknown): string | undefined {
return typeof value === "string" && value.trim() ? value.trim() : undefined;
}
function readLocalOptions(options: { local?: unknown }): LlamaCppLocalOptions {
const local = options.local as LlamaCppLocalOptions | undefined;
return local ?? {};
}
function createLlamaCppCacheKeyData(model: string): Record<string, unknown> {
return {
provider: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
model,
};
}
function resolveLlamaCppModelIdentity(
local: LlamaCppLocalOptions,
modelPath: string,
): LlamaCppModelIdentity {
const modelCacheDir =
normalizeOptionalString(local.modelCacheDir) ??
path.join(os.homedir(), ".node-llama-cpp", "models");
const resolvedDefaultModelPath = path.resolve(
modelCacheDir,
DEFAULT_LLAMA_CPP_EMBEDDING_MODEL_CACHE_FILE_NAME,
);
const isModelUri = /^(?:hf:|https?:\/\/)/i.test(modelPath);
const resolvedModelPath = isModelUri ? undefined : path.resolve(modelCacheDir, modelPath);
// node-llama-cpp resolves the default HF URI to this exact cache target and
// accepts its URI-derived filename relative to any configured cache directory.
// Preserve that exact historical key; arbitrary filenames and paths stay distinct.
if (
modelPath !== DEFAULT_LLAMA_CPP_EMBEDDING_MODEL &&
resolvedModelPath !== resolvedDefaultModelPath
) {
return {
model: modelPath,
cacheKeyData: createLlamaCppCacheKeyData(modelPath),
aliases: [],
};
}
const aliasModels = new Set([
resolvedDefaultModelPath,
DEFAULT_LLAMA_CPP_EMBEDDING_MODEL_CACHE_FILE_NAME,
]);
if (modelPath !== DEFAULT_LLAMA_CPP_EMBEDDING_MODEL) {
aliasModels.add(modelPath);
}
return {
model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
cacheKeyData: createLlamaCppCacheKeyData(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL),
aliases: Array.from(aliasModels, (aliasModel) => ({
model: aliasModel,
cacheKeyData: createLlamaCppCacheKeyData(aliasModel),
})),
};
}
function textFromEmbeddingInput(input: EmbeddingInput): string {
return typeof input === "string" ? input : input.text;
}
function toMemoryEmbeddingInput(input: EmbeddingInput): MemoryEmbeddingInput {
return typeof input === "string" ? { text: input } : input;
}
function isNodeLlamaCppMissing(err: unknown): boolean {
if (!(err instanceof Error)) {
return false;
}
const code = (err as Error & { code?: unknown }).code;
return code === "ERR_MODULE_NOT_FOUND" && err.message.includes("node-llama-cpp");
}
function formatErrorMessage(err: unknown): string {
if (err instanceof Error) {
return err.message;
}
return String(err);
}
export function formatLlamaCppSetupError(err: unknown): string {
const detail = formatErrorMessage(err);
const missing = isNodeLlamaCppMissing(err);
return [
"Local llama.cpp embeddings unavailable.",
missing
? "Reason: node-llama-cpp is missing or failed to install."
: detail
? `Reason: ${detail}`
: undefined,
missing && detail ? `Detail: ${detail}` : null,
"To enable local GGUF embeddings:",
"1) Install the official provider plugin: openclaw plugins install @openclaw/llama-cpp-provider",
"2) Use Node 24 for native installs/updates.",
"3) If you use pnpm from source: pnpm approve-builds, then pnpm rebuild node-llama-cpp.",
'Or set agents.defaults.memorySearch.provider to a remote embedding provider such as "openai", "ollama", "lmstudio", or "voyage".',
]
.filter(Boolean)
.join("\n");
}
const requireFromPlugin = createRequire(import.meta.url);
export function resolveNodeLlamaCppImportUrl(): string {
return pathToFileURL(requireFromPlugin.resolve("node-llama-cpp")).href;
}
function adaptMemoryEmbeddingProvider(provider: MemoryEmbeddingProvider): EmbeddingProvider {
return {
id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
model: provider.model,
maxInputTokens: provider.maxInputTokens,
embed: async (input, callOptions) =>
await provider.embedQuery(textFromEmbeddingInput(input), {
signal: callOptions?.signal,
}),
embedBatch: async (inputs, callOptions) => {
if (provider.embedBatchInputs) {
return await provider.embedBatchInputs(inputs.map(toMemoryEmbeddingInput), {
signal: callOptions?.signal,
});
}
return await provider.embedBatch(inputs.map(textFromEmbeddingInput), {
signal: callOptions?.signal,
});
},
close: provider.close,
};
}
export async function createLlamaCppEmbeddingProvider(
options: EmbeddingProviderCreateOptions,
runtimeOptions: LlamaCppEmbeddingProviderRuntimeOptions = {},
): Promise<EmbeddingProvider> {
const result = await createLlamaCppEmbeddingProviderResult(options, runtimeOptions);
if (!result.provider) {
throw new Error("llama.cpp local embedding provider was unavailable");
}
return result.provider;
}
export async function createLlamaCppMemoryEmbeddingProvider(
options: MemoryEmbeddingProviderCreateOptions,
runtimeOptions: LlamaCppEmbeddingProviderRuntimeOptions = {},
): Promise<MemoryEmbeddingProviderCreateResult> {
const createOptions = buildMemoryCreateOptions(options, options.outputDimensionality);
const local = readLocalOptions(createOptions);
const provider = await createLocalEmbeddingProvider(createOptions, {
nodeLlamaCppImportUrl: runtimeOptions.nodeLlamaCppImportUrl ?? resolveNodeLlamaCppImportUrl(),
});
const identity = resolveLlamaCppModelIdentity(local, provider.model);
const identifiedProvider =
identity.model === provider.model ? provider : { ...provider, model: identity.model };
return {
provider: identifiedProvider,
runtime: createLlamaCppEmbeddingProviderRuntime(identity),
};
}
async function createLlamaCppEmbeddingProviderResult(
options: EmbeddingProviderCreateOptions,
runtimeOptions: LlamaCppEmbeddingProviderRuntimeOptions = {},
): Promise<EmbeddingProviderCreateResult> {
const result = await createLlamaCppMemoryEmbeddingProvider(
buildMemoryCreateOptions(options, options.dimensions),
runtimeOptions,
);
return {
provider: result.provider ? adaptMemoryEmbeddingProvider(result.provider) : null,
runtime: result.runtime,
};
}
function buildMemoryCreateOptions(
options: MemoryEmbeddingProviderCreateOptions | EmbeddingProviderCreateOptions,
outputDimensionality: number | undefined,
): MemoryEmbeddingProviderCreateOptions {
const local = readLocalOptions(options);
const modelPath = normalizeOptionalString(local.modelPath) || DEFAULT_LLAMA_CPP_EMBEDDING_MODEL;
return {
config: options.config,
agentDir: options.agentDir,
provider: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
fallback: "none",
remote: options.remote,
model: modelPath,
inputType: options.inputType,
queryInputType: options.queryInputType,
documentInputType: options.documentInputType,
local: {
...local,
modelPath,
},
outputDimensionality,
};
}
function createLlamaCppEmbeddingProviderRuntime(identity: LlamaCppModelIdentity) {
return {
id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
inlineQueryTimeoutMs: 5 * 60_000,
inlineBatchTimeoutMs: 10 * 60_000,
cacheKeyData: identity.cacheKeyData,
...(identity.aliases.length > 0 ? { indexIdentityAliases: identity.aliases } : {}),
};
}
export const llamaCppEmbeddingProviderAdapter: EmbeddingProviderAdapter = {
id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
transport: "local",
formatSetupError: formatLlamaCppSetupError,
resolveIndexIdentity: (options) => {
const createOptions = buildMemoryCreateOptions(options, options.dimensions);
const local = readLocalOptions(createOptions);
return resolveLlamaCppModelIdentity(
local,
normalizeOptionalString(local.modelPath) ?? DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
);
},
create: async (options) => await createLlamaCppEmbeddingProviderResult(options),
};