feat(ollama): prefix memory embedding queries

This commit is contained in:
Peter Steinberger
2026-04-27 11:06:06 +01:00
parent 92100efa04
commit 9d52b615ad
4 changed files with 130 additions and 3 deletions

View File

@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Providers: add Cerebras as a bundled plugin with onboarding, static model catalog, docs, and manifest-owned endpoint metadata. Thanks @codex.
- Ollama/memory: add model-specific retrieval query prefixes for `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large` memory-search queries while leaving document batches unchanged. Carries forward #45013. Thanks @laolin5564.
- Plugins/providers: move pre-runtime model-id normalization, provider endpoint host metadata, and OpenAI-compatible request-family hints into plugin manifests so core no longer carries bundled-provider routing tables. Thanks @codex.
- Plugins/install: allow `OPENCLAW_PLUGIN_STAGE_DIR` to contain layered runtime-dependency roots, resolving read-only preinstalled deps before installing missing deps into the final writable root. Fixes #72396. Thanks @liorb-mountapps.
- Control UI: add a raw config pending-changes diff panel that parses JSON5, redacts sensitive values until reveal, and avoids fake raw-edit callbacks when opening the panel. Refs #39831; supersedes #48621 and #46654. Thanks @JiajunBernoulli and @BunsDev.

View File

@@ -847,6 +847,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
| Default model | `nomic-embed-text` |
| Auto-pull | Yes — the embedding model is pulled automatically if not present locally |
Query-time embeddings use retrieval prefixes for models that require or recommend them, including `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large`. Memory document batches stay raw so existing indexes do not need a format migration.
To select Ollama as the memory search embedding provider:
```json5

View File

@@ -46,6 +46,19 @@ function mockEmbeddingFetch(embedding: number[]) {
return fetchMock;
}
function readEmbeddingRequestBody(init: RequestInit | undefined): { input?: unknown } {
if (typeof init?.body !== "string") {
throw new Error("expected JSON string request body");
}
return JSON.parse(init.body) as { input?: unknown };
}
function readFirstEmbeddingInput(fetchMock: ReturnType<typeof mockEmbeddingFetch>): unknown {
const [, init] = (fetchMock.mock.calls[0] ?? []) as unknown as [string, RequestInit | undefined];
const body = readEmbeddingRequestBody(init);
return body.input;
}
describe("ollama embedding provider", () => {
it("calls /api/embed and returns normalized vectors", async () => {
const fetchMock = mockEmbeddingFetch([3, 4]);
@@ -53,7 +66,7 @@ describe("ollama embedding provider", () => {
const { provider } = await createOllamaEmbeddingProvider({
config: {} as OpenClawConfig,
provider: "ollama",
model: "nomic-embed-text",
model: "unknown-embedder",
fallback: "none",
remote: { baseUrl: "http://127.0.0.1:11434" },
});
@@ -65,7 +78,7 @@ describe("ollama embedding provider", () => {
"http://127.0.0.1:11434/api/embed",
expect.objectContaining({
method: "POST",
body: JSON.stringify({ model: "nomic-embed-text", input: "hi" }),
body: JSON.stringify({ model: "unknown-embedder", input: "hi" }),
}),
);
expect(vector[0]).toBeCloseTo(0.6, 5);
@@ -224,6 +237,90 @@ describe("ollama embedding provider", () => {
expect(inputs).toEqual([["a", "bb", "ccc"]]);
});
it("uses a retrieval query prefix for qwen3 embedding queries", async () => {
const fetchMock = mockEmbeddingFetch([1, 0]);
const { provider } = await createOllamaEmbeddingProvider({
config: {} as OpenClawConfig,
provider: "ollama",
model: "qwen3-embedding:0.6b",
fallback: "none",
remote: { baseUrl: "http://127.0.0.1:11434" },
});
await provider.embedQuery("怀孕");
expect(readFirstEmbeddingInput(fetchMock)).toBe(
"Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:怀孕",
);
});
it("uses the nomic search_query prefix for query embeddings", async () => {
const fetchMock = mockEmbeddingFetch([1, 0]);
const { provider } = await createOllamaEmbeddingProvider({
config: {} as OpenClawConfig,
provider: "ollama",
model: "nomic-embed-text",
fallback: "none",
remote: { baseUrl: "http://127.0.0.1:11434" },
});
await provider.embedQuery("What does $& mean?");
expect(readFirstEmbeddingInput(fetchMock)).toBe("search_query: What does $& mean?");
});
it("uses the mixedbread retrieval prompt for query embeddings", async () => {
const fetchMock = mockEmbeddingFetch([1, 0]);
const { provider } = await createOllamaEmbeddingProvider({
config: {} as OpenClawConfig,
provider: "ollama",
model: "mxbai-embed-large:latest",
fallback: "none",
remote: { baseUrl: "http://127.0.0.1:11434" },
});
await provider.embedQuery("capital of Australia");
expect(readFirstEmbeddingInput(fetchMock)).toBe(
"Represent this sentence for searching relevant passages: capital of Australia",
);
});
it("keeps document batch embeddings raw", async () => {
const inputs: unknown[] = [];
const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => {
const body = readEmbeddingRequestBody(init);
inputs.push(body.input);
return new Response(
JSON.stringify({
embeddings: [
[1, 0],
[1, 0],
],
}),
{
status: 200,
headers: { "content-type": "application/json" },
},
);
});
vi.stubGlobal("fetch", fetchMock);
const { provider } = await createOllamaEmbeddingProvider({
config: {} as OpenClawConfig,
provider: "ollama",
model: "qwen3-embedding:0.6b",
fallback: "none",
remote: { baseUrl: "http://127.0.0.1:11434" },
});
await expect(provider.embedBatch(["doc one", "doc two"])).resolves.toHaveLength(2);
expect(inputs).toEqual([["doc one", "doc two"]]);
});
it("uses custom Ollama provider config and strips that provider prefix", async () => {
const fetchMock = mockEmbeddingFetch([1, 0]);

View File

@@ -57,6 +57,22 @@ type OllamaEmbeddingClientConfig = Omit<OllamaEmbeddingClient, "embedBatch">;
export const DEFAULT_OLLAMA_EMBEDDING_MODEL = "nomic-embed-text";
const QUERY_INSTRUCTION_TEMPLATES = [
{
prefix: "qwen3-embedding",
template:
"Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:{query}",
},
{
prefix: "nomic-embed-text",
template: "search_query: {query}",
},
{
prefix: "mxbai-embed-large",
template: "Represent this sentence for searching relevant passages: {query}",
},
] as const;
function sanitizeAndNormalizeEmbedding(vec: number[]): number[] {
const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0));
const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0));
@@ -93,6 +109,14 @@ function normalizeEmbeddingModel(model: string, providerId?: string): string {
return normalizeOllamaWireModelId(trimmed, providerId);
}
function applyQueryInstructionTemplate(model: string, queryText: string): string {
const normalizedModel = model.trim().toLowerCase();
const match = QUERY_INSTRUCTION_TEMPLATES.find(({ prefix }) =>
normalizedModel.startsWith(prefix),
);
return match ? match.template.replace("{query}", () => queryText) : queryText;
}
function resolveConfiguredProvider(options: OllamaEmbeddingOptions) {
const providers = options.config.models?.providers;
if (!providers) {
@@ -319,10 +343,13 @@ export async function createOllamaEmbeddingProvider(
return embedding;
};
const embedQuery = async (text: string): Promise<number[]> =>
await embedOne(applyQueryInstructionTemplate(client.model, text));
const provider: OllamaEmbeddingProvider = {
id: "ollama",
model: client.model,
embedQuery: embedOne,
embedQuery,
embedBatch: async (texts) => (texts.length === 0 ? [] : await embedMany(texts)),
};