diff --git a/CHANGELOG.md b/CHANGELOG.md index 378952b9dd1..30bfa6bd4d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai ### Changes - Providers: add Cerebras as a bundled plugin with onboarding, static model catalog, docs, and manifest-owned endpoint metadata. Thanks @codex. +- Ollama/memory: add model-specific retrieval query prefixes for `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large` memory-search queries while leaving document batches unchanged. Carries forward #45013. Thanks @laolin5564. - Plugins/providers: move pre-runtime model-id normalization, provider endpoint host metadata, and OpenAI-compatible request-family hints into plugin manifests so core no longer carries bundled-provider routing tables. Thanks @codex. - Plugins/install: allow `OPENCLAW_PLUGIN_STAGE_DIR` to contain layered runtime-dependency roots, resolving read-only preinstalled deps before installing missing deps into the final writable root. Fixes #72396. Thanks @liorb-mountapps. - Control UI: add a raw config pending-changes diff panel that parses JSON5, redacts sensitive values until reveal, and avoids fake raw-edit callbacks when opening the panel. Refs #39831; supersedes #48621 and #46654. Thanks @JiajunBernoulli and @BunsDev. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 5e4c88e65c4..b7d97b141d3 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -847,6 +847,8 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s | Default model | `nomic-embed-text` | | Auto-pull | Yes — the embedding model is pulled automatically if not present locally | + Query-time embeddings use retrieval prefixes for models that require or recommend them, including `nomic-embed-text`, `qwen3-embedding`, and `mxbai-embed-large`. Memory document batches stay raw so existing indexes do not need a format migration. + To select Ollama as the memory search embedding provider: ```json5 diff --git a/extensions/ollama/src/embedding-provider.test.ts b/extensions/ollama/src/embedding-provider.test.ts index d006cee55bf..9a032d7455f 100644 --- a/extensions/ollama/src/embedding-provider.test.ts +++ b/extensions/ollama/src/embedding-provider.test.ts @@ -46,6 +46,19 @@ function mockEmbeddingFetch(embedding: number[]) { return fetchMock; } +function readEmbeddingRequestBody(init: RequestInit | undefined): { input?: unknown } { + if (typeof init?.body !== "string") { + throw new Error("expected JSON string request body"); + } + return JSON.parse(init.body) as { input?: unknown }; +} + +function readFirstEmbeddingInput(fetchMock: ReturnType): unknown { + const [, init] = (fetchMock.mock.calls[0] ?? []) as unknown as [string, RequestInit | undefined]; + const body = readEmbeddingRequestBody(init); + return body.input; +} + describe("ollama embedding provider", () => { it("calls /api/embed and returns normalized vectors", async () => { const fetchMock = mockEmbeddingFetch([3, 4]); @@ -53,7 +66,7 @@ describe("ollama embedding provider", () => { const { provider } = await createOllamaEmbeddingProvider({ config: {} as OpenClawConfig, provider: "ollama", - model: "nomic-embed-text", + model: "unknown-embedder", fallback: "none", remote: { baseUrl: "http://127.0.0.1:11434" }, }); @@ -65,7 +78,7 @@ describe("ollama embedding provider", () => { "http://127.0.0.1:11434/api/embed", expect.objectContaining({ method: "POST", - body: JSON.stringify({ model: "nomic-embed-text", input: "hi" }), + body: JSON.stringify({ model: "unknown-embedder", input: "hi" }), }), ); expect(vector[0]).toBeCloseTo(0.6, 5); @@ -224,6 +237,90 @@ describe("ollama embedding provider", () => { expect(inputs).toEqual([["a", "bb", "ccc"]]); }); + it("uses a retrieval query prefix for qwen3 embedding queries", async () => { + const fetchMock = mockEmbeddingFetch([1, 0]); + + const { provider } = await createOllamaEmbeddingProvider({ + config: {} as OpenClawConfig, + provider: "ollama", + model: "qwen3-embedding:0.6b", + fallback: "none", + remote: { baseUrl: "http://127.0.0.1:11434" }, + }); + + await provider.embedQuery("怀孕"); + + expect(readFirstEmbeddingInput(fetchMock)).toBe( + "Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:怀孕", + ); + }); + + it("uses the nomic search_query prefix for query embeddings", async () => { + const fetchMock = mockEmbeddingFetch([1, 0]); + + const { provider } = await createOllamaEmbeddingProvider({ + config: {} as OpenClawConfig, + provider: "ollama", + model: "nomic-embed-text", + fallback: "none", + remote: { baseUrl: "http://127.0.0.1:11434" }, + }); + + await provider.embedQuery("What does $& mean?"); + + expect(readFirstEmbeddingInput(fetchMock)).toBe("search_query: What does $& mean?"); + }); + + it("uses the mixedbread retrieval prompt for query embeddings", async () => { + const fetchMock = mockEmbeddingFetch([1, 0]); + + const { provider } = await createOllamaEmbeddingProvider({ + config: {} as OpenClawConfig, + provider: "ollama", + model: "mxbai-embed-large:latest", + fallback: "none", + remote: { baseUrl: "http://127.0.0.1:11434" }, + }); + + await provider.embedQuery("capital of Australia"); + + expect(readFirstEmbeddingInput(fetchMock)).toBe( + "Represent this sentence for searching relevant passages: capital of Australia", + ); + }); + + it("keeps document batch embeddings raw", async () => { + const inputs: unknown[] = []; + const fetchMock = vi.fn(async (_url: string, init?: RequestInit) => { + const body = readEmbeddingRequestBody(init); + inputs.push(body.input); + return new Response( + JSON.stringify({ + embeddings: [ + [1, 0], + [1, 0], + ], + }), + { + status: 200, + headers: { "content-type": "application/json" }, + }, + ); + }); + vi.stubGlobal("fetch", fetchMock); + + const { provider } = await createOllamaEmbeddingProvider({ + config: {} as OpenClawConfig, + provider: "ollama", + model: "qwen3-embedding:0.6b", + fallback: "none", + remote: { baseUrl: "http://127.0.0.1:11434" }, + }); + + await expect(provider.embedBatch(["doc one", "doc two"])).resolves.toHaveLength(2); + expect(inputs).toEqual([["doc one", "doc two"]]); + }); + it("uses custom Ollama provider config and strips that provider prefix", async () => { const fetchMock = mockEmbeddingFetch([1, 0]); diff --git a/extensions/ollama/src/embedding-provider.ts b/extensions/ollama/src/embedding-provider.ts index 1909ffb58c9..675855770dd 100644 --- a/extensions/ollama/src/embedding-provider.ts +++ b/extensions/ollama/src/embedding-provider.ts @@ -57,6 +57,22 @@ type OllamaEmbeddingClientConfig = Omit; export const DEFAULT_OLLAMA_EMBEDDING_MODEL = "nomic-embed-text"; +const QUERY_INSTRUCTION_TEMPLATES = [ + { + prefix: "qwen3-embedding", + template: + "Instruct: Given a user query, retrieve relevant memory notes and documents\nQuery:{query}", + }, + { + prefix: "nomic-embed-text", + template: "search_query: {query}", + }, + { + prefix: "mxbai-embed-large", + template: "Represent this sentence for searching relevant passages: {query}", + }, +] as const; + function sanitizeAndNormalizeEmbedding(vec: number[]): number[] { const sanitized = vec.map((value) => (Number.isFinite(value) ? value : 0)); const magnitude = Math.sqrt(sanitized.reduce((sum, value) => sum + value * value, 0)); @@ -93,6 +109,14 @@ function normalizeEmbeddingModel(model: string, providerId?: string): string { return normalizeOllamaWireModelId(trimmed, providerId); } +function applyQueryInstructionTemplate(model: string, queryText: string): string { + const normalizedModel = model.trim().toLowerCase(); + const match = QUERY_INSTRUCTION_TEMPLATES.find(({ prefix }) => + normalizedModel.startsWith(prefix), + ); + return match ? match.template.replace("{query}", () => queryText) : queryText; +} + function resolveConfiguredProvider(options: OllamaEmbeddingOptions) { const providers = options.config.models?.providers; if (!providers) { @@ -319,10 +343,13 @@ export async function createOllamaEmbeddingProvider( return embedding; }; + const embedQuery = async (text: string): Promise => + await embedOne(applyQueryInstructionTemplate(client.model, text)); + const provider: OllamaEmbeddingProvider = { id: "ollama", model: client.model, - embedQuery: embedOne, + embedQuery, embedBatch: async (texts) => (texts.length === 0 ? [] : await embedMany(texts)), };