mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
feat(ollama): prefix memory embedding queries
This commit is contained in:
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Changes
|
||||
|
||||
- Providers: add Cerebras as a bundled plugin with onboarding, static model catalog, docs, and manifest-owned endpoint metadata. Thanks @codex.
|
||||
- Ollama/memory: add model-specific retrieval query prefixes for `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large` memory-search queries while leaving document batches unchanged. Carries forward #45013. Thanks @laolin5564.
|
||||
- Plugins/providers: move pre-runtime model-id normalization, provider endpoint host metadata, and OpenAI-compatible request-family hints into plugin manifests so core no longer carries bundled-provider routing tables. Thanks @codex.
|
||||
- Plugins/install: allow `OPENCLAW_PLUGIN_STAGE_DIR` to contain layered runtime-dependency roots, resolving read-only preinstalled deps before installing missing deps into the final writable root. Fixes #72396. Thanks @liorb-mountapps.
|
||||
- Control UI: add a raw config pending-changes diff panel that parses JSON5, redacts sensitive values until reveal, and avoids fake raw-edit callbacks when opening the panel. Refs #39831; supersedes #48621 and #46654. Thanks @JiajunBernoulli and @BunsDev.
|
||||
|
||||
@@ -847,6 +847,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
| Default model | `nomic-embed-text` |
|
||||
| Auto-pull | Yes — the embedding model is pulled automatically if not present locally |
|
||||
|
||||
Query-time embeddings use retrieval prefixes for models that require or recommend them, including `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large`. Memory document batches stay raw so existing indexes do not need a format migration.
|
||||
|
||||
To select Ollama as the memory search embedding provider:
|
||||
|
||||
```json5
|
||||
|
||||
@@ -46,6 +46,19 @@ function mockEmbeddingFetch(embedding: number[]) {
|
||||
return fetchMock;
|
||||
}
|
||||
|
||||
function readEmbeddingRequestBody(init: RequestInit | undefined): { input?: unknown } {
|
||||
if (typeof init?.body !== "string") {
|
||||
throw new Error("expected JSON string request body");
|
||||
}
|
||||
return JSON.parse(init.body) as { input?: unknown };
|
||||
}
|
||||
|
||||
function readFirstEmbeddingInput(fetchMock: ReturnType<typeof mockEmbeddingFetch>): unknown {
|
||||
const [, init] = (fetchMock.mock.calls[0] ?? []) as unknown as [string, RequestInit | undefined];
|
||||
const body = readEmbeddingRequestBody(init);
|
||||
return body.input;
|
||||
}
|
||||
|
||||
describe("ollama embedding provider", () => {
|
||||
it("calls /api/embed and returns normalized vectors", async () => {
|
||||
const fetchMock = mockEmbeddingFetch([3, 4]);
|
||||
@@ -53,7 +66,7 @@ describe("ollama embedding provider", () => {
|
||||
const { provider } = await createOllamaEmbeddingProvider({
|
||||
config: {} as OpenClawConfig,
|
||||
provider: "ollama",
|
||||
model: "nomic-embed-text",
|
||||
model: "unknown-embedder",
|
||||
fallback: "none",
|
||||
remote: { baseUrl: "http://127.0.0.1:11434" },
|
||||
});
|
||||
@@ -65,7 +78,7 @@ describe("ollama embedding provider", () => {
|
||||
"http://127.0.0.1:11434/api/embed",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
body: JSON.stringify({ model: "nomic-embed-text", input: "hi" }),
|
||||
body: JSON.stringify({ model: "unknown-embedder", input: "hi" }),
|
||||
}),
|
||||
);
|
||||
expect(vector[0]).toBeCloseTo(0.6, 5);
|
||||
@@ -224,6 +237,90 @@ describe("ollama embedding provider", () => {
|
||||
expect(inputs).toEqual([["a", "bb", "ccc"]]);
|
||||
});
|
||||
|
||||
it("uses a retrieval query prefix for qwen3 embedding queries", async () => {
|
||||
const fetchMock = mockEmbeddingFetch([1, 0]);
|
||||
|
||||
const { provider } = await createOllamaEmbeddingProvider({
|
||||
config: {} as OpenClawConfig,
|
||||
provider: "ollama",
|
||||
model: "qwen3-embedding:0.6b",
|
||||
fallback: "none",
|
||||
remote: { baseUrl: "http://127.0.0.1:11434" },
|
||||
});
|
||||
|
||||
await provider.embedQuery("怀孕");
|
||||
|
||||
expect(readFirstEmbeddingInput(fetchMock)).toBe(
|
||||
"Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:怀孕",
|
||||
);
|
||||
});
|
||||
|
||||
it("uses the nomic search_query prefix for query embeddings", async () => {
|
||||
const fetchMock = mockEmbeddingFetch([1, 0]);
|
||||
|
||||
const { provider } = await createOllamaEmbeddingProvider({
|
||||
config: {} as OpenClawConfig,
|
||||
provider: "ollama",
|
||||
model: "nomic-embed-text",
|
||||
fallback: "none",
|
||||
remote: { baseUrl: "http://127.0.0.1:11434" },
|
||||
});
|
||||
|
||||
await provider.embedQuery("What does $& mean?");
|
||||
|
||||
expect(readFirstEmbeddingInput(fetchMock)).toBe("search_query: What does $& mean?");
|
||||
});
|
||||
|
||||
it("uses the mixedbread retrieval prompt for query embeddings", async () => {
|
||||
const fetchMock = mockEmbeddingFetch([1, 0]);
|
||||
|
||||
const { provider } = await createOllamaEmbeddingProvider({
|
||||
config: {} as OpenClawConfig,
|
||||
provider: "ollama",
|
||||
model: "mxbai-embed-large:latest",
|
||||
fallback: "none",
|
||||
remote: { baseUrl: "http://127.0.0.1:11434" },
|
||||
});
|
||||
|
||||
await provider.embedQuery("capital of Australia");
|
||||
|
||||
expect(readFirstEmbeddingInput(fetchMock)).toBe(
|
||||
"Represent this sentence for searching relevant passages: capital of Australia",
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps document batch embeddings raw", async () => {
|
||||
const inputs: unknown[] = [];
|
||||
const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => {
|
||||
const body = readEmbeddingRequestBody(init);
|
||||
inputs.push(body.input);
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
embeddings: [
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
},
|
||||
);
|
||||
});
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const { provider } = await createOllamaEmbeddingProvider({
|
||||
config: {} as OpenClawConfig,
|
||||
provider: "ollama",
|
||||
model: "qwen3-embedding:0.6b",
|
||||
fallback: "none",
|
||||
remote: { baseUrl: "http://127.0.0.1:11434" },
|
||||
});
|
||||
|
||||
await expect(provider.embedBatch(["doc one", "doc two"])).resolves.toHaveLength(2);
|
||||
expect(inputs).toEqual([["doc one", "doc two"]]);
|
||||
});
|
||||
|
||||
it("uses custom Ollama provider config and strips that provider prefix", async () => {
|
||||
const fetchMock = mockEmbeddingFetch([1, 0]);
|
||||
|
||||
|
||||
@@ -57,6 +57,22 @@ type OllamaEmbeddingClientConfig = Omit<OllamaEmbeddingClient, "embedBatch">;
|
||||
|
||||
export const DEFAULT_OLLAMA_EMBEDDING_MODEL = "nomic-embed-text";
|
||||
|
||||
const QUERY_INSTRUCTION_TEMPLATES = [
|
||||
{
|
||||
prefix: "qwen3-embedding",
|
||||
template:
|
||||
"Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:{query}",
|
||||
},
|
||||
{
|
||||
prefix: "nomic-embed-text",
|
||||
template: "search_query: {query}",
|
||||
},
|
||||
{
|
||||
prefix: "mxbai-embed-large",
|
||||
template: "Represent this sentence for searching relevant passages: {query}",
|
||||
},
|
||||
] as const;
|
||||
|
||||
function sanitizeAndNormalizeEmbedding(vec: number[]): number[] {
|
||||
const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0));
|
||||
const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0));
|
||||
@@ -93,6 +109,14 @@ function normalizeEmbeddingModel(model: string, providerId?: string): string {
|
||||
return normalizeOllamaWireModelId(trimmed, providerId);
|
||||
}
|
||||
|
||||
function applyQueryInstructionTemplate(model: string, queryText: string): string {
|
||||
const normalizedModel = model.trim().toLowerCase();
|
||||
const match = QUERY_INSTRUCTION_TEMPLATES.find(({ prefix }) =>
|
||||
normalizedModel.startsWith(prefix),
|
||||
);
|
||||
return match ? match.template.replace("{query}", () => queryText) : queryText;
|
||||
}
|
||||
|
||||
function resolveConfiguredProvider(options: OllamaEmbeddingOptions) {
|
||||
const providers = options.config.models?.providers;
|
||||
if (!providers) {
|
||||
@@ -319,10 +343,13 @@ export async function createOllamaEmbeddingProvider(
|
||||
return embedding;
|
||||
};
|
||||
|
||||
const embedQuery = async (text: string): Promise<number[]> =>
|
||||
await embedOne(applyQueryInstructionTemplate(client.model, text));
|
||||
|
||||
const provider: OllamaEmbeddingProvider = {
|
||||
id: "ollama",
|
||||
model: client.model,
|
||||
embedQuery: embedOne,
|
||||
embedQuery,
|
||||
embedBatch: async (texts) => (texts.length === 0 ? [] : await embedMany(texts)),
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user