diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b435ce2a80..f7e55cf2faa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai - Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc. - iOS/Home canvas: add a bundled welcome screen with a live agent overview that refreshes on connect, reconnect, and foreground return, and move the compact connection pill off the top-left canvas overlay. (#42456) Thanks @ngutman. - iOS/Home canvas: replace floating controls with a docked toolbar, make the bundled home scaffold adapt to smaller phones, and open chat in the resolved main session instead of a synthetic `ios` session. (#42456) Thanks @ngutman. +- Memory/Gemini: add `gemini-embedding-2-preview` memory-search support with configurable output dimensions and automatic reindexing when the configured dimensions change. (#42501) thanks @BillChirico. ### Breaking diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index f14105a70ef..35c51f6b523 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -310,7 +310,7 @@ Notes: - `remote.baseUrl` is optional (defaults to the Gemini API base URL). - `remote.headers` lets you add extra headers if needed. - Default model: `gemini-embedding-001`. -- `gemini-embedding-2-preview` is also supported: multimodal inputs, 8192 token limit, configurable dimensions (768 / 1536 / 3072, default 3072). +- `gemini-embedding-2-preview` is also supported: 8192 token limit and configurable dimensions (768 / 1536 / 3072, default 3072). #### Gemini Embedding 2 (preview) @@ -330,8 +330,9 @@ agents: { ``` > **⚠️ Re-index required:** Switching from `gemini-embedding-001` (768 dimensions) -> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. -> OpenClaw will automatically reindex when it detects the model change. +> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. The same is true if you +> change `outputDimensionality` between 768, 1536, and 3072. +> OpenClaw will automatically reindex when it detects a model or dimension change. If you want to use a **custom OpenAI-compatible endpoint** (OpenRouter, vLLM, or a proxy), you can use the `remote` configuration with the OpenAI provider: diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index e14fd5a0b3b..6bcacfec2db 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -28,6 +28,7 @@ export type ResolvedMemorySearchConfig = { }; fallback: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none"; model: string; + outputDimensionality?: number; local: { modelPath?: string; modelCacheDir?: string; @@ -193,6 +194,7 @@ function mergeConfig( ? DEFAULT_OLLAMA_MODEL : undefined; const model = overrides?.model ?? defaults?.model ?? modelDefault ?? ""; + const outputDimensionality = overrides?.outputDimensionality ?? defaults?.outputDimensionality; const local = { modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath, modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir, @@ -312,6 +314,7 @@ function mergeConfig( }, fallback, model, + outputDimensionality, local, store, chunking: { tokens: Math.max(1, chunking.tokens), overlap }, diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 04d5200bfbb..730dd397831 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -83,6 +83,7 @@ const TARGET_KEYS = [ "agents.defaults.memorySearch.remote.batch.timeoutMinutes", "agents.defaults.memorySearch.local.modelPath", "agents.defaults.memorySearch.store.path", + "agents.defaults.memorySearch.outputDimensionality", "agents.defaults.memorySearch.store.vector.enabled", "agents.defaults.memorySearch.store.vector.extensionPath", "agents.defaults.memorySearch.query.hybrid.enabled", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 908829cbf33..bd93f711d91 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -785,6 +785,8 @@ export const FIELD_HELP: Record = { 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', "agents.defaults.memorySearch.model": "Embedding model override used by the selected memory provider when a non-default model is required. Set this only when you need explicit recall quality/cost tuning beyond provider defaults.", + "agents.defaults.memorySearch.outputDimensionality": + "Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.", "agents.defaults.memorySearch.remote.baseUrl": "Overrides the embedding API endpoint, such as an OpenAI-compatible proxy or custom Gemini base URL. Use this only when routing through your own gateway or vendor endpoint; keep provider defaults otherwise.", "agents.defaults.memorySearch.remote.apiKey": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index c643cf91cd9..b7477b4798a 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -331,6 +331,7 @@ export const FIELD_LABELS: Record = { "agents.defaults.memorySearch.remote.batch.pollIntervalMs": "Remote Batch Poll Interval (ms)", "agents.defaults.memorySearch.remote.batch.timeoutMinutes": "Remote Batch Timeout (min)", "agents.defaults.memorySearch.model": "Memory Search Model", + "agents.defaults.memorySearch.outputDimensionality": "Memory Search Output Dimensionality", "agents.defaults.memorySearch.fallback": "Memory Search Fallback", "agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path", "agents.defaults.memorySearch.store.path": "Memory Search Index Path", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index e352f858c39..5de1b4cafa5 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -347,6 +347,11 @@ export type MemorySearchConfig = { fallback?: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none"; /** Embedding model id (remote) or alias (local). */ model?: string; + /** + * Gemini embedding-2 models only: output vector dimensions. + * Supported values today are 768, 1536, and 3072. + */ + outputDimensionality?: number; /** Local embedding settings (node-llama-cpp). */ local?: { /** GGUF model path or hf: URI. */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 3ede7218b80..a240eba5d43 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -599,6 +599,7 @@ export const MemorySearchSchema = z ]) .optional(), model: z.string().optional(), + outputDimensionality: z.number().int().positive().optional(), local: z .object({ modelPath: z.string().optional(), diff --git a/src/memory/batch-gemini.test.ts b/src/memory/batch-gemini.test.ts new file mode 100644 index 00000000000..67d90a5a78b --- /dev/null +++ b/src/memory/batch-gemini.test.ts @@ -0,0 +1,92 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +import type { GeminiEmbeddingClient } from "./embeddings-gemini.js"; + +describe("runGeminiEmbeddingBatches", () => { + let runGeminiEmbeddingBatches: typeof import("./batch-gemini.js").runGeminiEmbeddingBatches; + + beforeAll(async () => { + ({ runGeminiEmbeddingBatches } = await import("./batch-gemini.js")); + }); + + afterEach(() => { + vi.resetAllMocks(); + vi.unstubAllGlobals(); + }); + + const mockClient: GeminiEmbeddingClient = { + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + headers: {}, + model: "gemini-embedding-2-preview", + modelPath: "models/gemini-embedding-2-preview", + apiKeys: ["test-key"], + outputDimensionality: 1536, + }; + + it("includes outputDimensionality in batch upload requests", async () => { + const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { + const url = + typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url; + if (url.includes("/upload/v1beta/files?uploadType=multipart")) { + const body = init?.body; + if (!(body instanceof Blob)) { + throw new Error("expected multipart blob body"); + } + const text = await body.text(); + expect(text).toContain('"taskType":"RETRIEVAL_DOCUMENT"'); + expect(text).toContain('"outputDimensionality":1536'); + return new Response(JSON.stringify({ name: "files/file-123" }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } + if (url.endsWith(":asyncBatchEmbedContent")) { + return new Response( + JSON.stringify({ + name: "batches/batch-1", + state: "COMPLETED", + outputConfig: { file: "files/output-1" }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + } + if (url.endsWith("/files/output-1:download")) { + return new Response( + JSON.stringify({ + key: "req-1", + response: { embedding: { values: [0.1, 0.2, 0.3] } }, + }), + { + status: 200, + headers: { "Content-Type": "application/jsonl" }, + }, + ); + } + throw new Error(`unexpected fetch ${url}`); + }); + + vi.stubGlobal("fetch", fetchMock); + + const results = await runGeminiEmbeddingBatches({ + gemini: mockClient, + agentId: "main", + requests: [ + { + custom_id: "req-1", + content: { parts: [{ text: "hello world" }] }, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: 1536, + }, + ], + wait: true, + pollIntervalMs: 1, + timeoutMs: 1000, + concurrency: 1, + }); + + expect(results.get("req-1")).toEqual([0.1, 0.2, 0.3]); + expect(fetchMock).toHaveBeenCalledTimes(3); + }); +}); diff --git a/src/memory/batch-gemini.ts b/src/memory/batch-gemini.ts index 998f283b676..111570a998c 100644 --- a/src/memory/batch-gemini.ts +++ b/src/memory/batch-gemini.ts @@ -13,6 +13,7 @@ export type GeminiBatchRequest = { custom_id: string; content: { parts: Array<{ text: string }> }; taskType: "RETRIEVAL_DOCUMENT" | "RETRIEVAL_QUERY"; + outputDimensionality?: number; }; export type GeminiBatchStatus = { @@ -84,7 +85,10 @@ async function submitGeminiBatch(params: { key: request.custom_id, request: { content: request.content, - task_type: request.taskType, + taskType: request.taskType, + ...(typeof request.outputDimensionality === "number" + ? { outputDimensionality: request.outputDimensionality } + : {}), }, }), ) diff --git a/src/memory/embeddings-gemini.ts b/src/memory/embeddings-gemini.ts index f9b87997519..ae7da66b7c0 100644 --- a/src/memory/embeddings-gemini.ts +++ b/src/memory/embeddings-gemini.ts @@ -17,6 +17,7 @@ export type GeminiEmbeddingClient = { model: string; modelPath: string; apiKeys: string[]; + outputDimensionality?: number; }; const DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"; @@ -151,10 +152,7 @@ export async function createGeminiEmbeddingProvider( const embedUrl = `${baseUrl}/${client.modelPath}:embedContent`; const batchUrl = `${baseUrl}/${client.modelPath}:batchEmbedContents`; const isV2 = isGeminiEmbedding2Model(client.model); - const outputDimensionality = resolveGeminiOutputDimensionality( - client.model, - options.outputDimensionality, - ); + const outputDimensionality = client.outputDimensionality; const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => { const authHeaders = parseGeminiAuth(apiKey); @@ -272,13 +270,18 @@ export async function resolveGeminiEmbeddingClient( }); const model = normalizeGeminiModel(options.model); const modelPath = buildGeminiModelPath(model); + const outputDimensionality = resolveGeminiOutputDimensionality( + model, + options.outputDimensionality, + ); debugEmbeddingsLog("memory embeddings: gemini client", { rawBaseUrl, baseUrl, model, modelPath, + outputDimensionality, embedEndpoint: `${baseUrl}/${modelPath}:embedContent`, batchEndpoint: `${baseUrl}/${modelPath}:batchEmbedContents`, }); - return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys }; + return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys, outputDimensionality }; } diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 43ebcca58c2..8010c419494 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -6,6 +6,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; import "./test-runtime-mocks.js"; let embedBatchCalls = 0; +let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = []; vi.mock("./embeddings.js", () => { const embedText = (text: string) => { @@ -15,18 +16,43 @@ vi.mock("./embeddings.js", () => { return [alpha, beta]; }; return { - createEmbeddingProvider: async (options: { model?: string }) => ({ - requestedProvider: "openai", - provider: { - id: "mock", - model: options.model ?? "mock-embed", - embedQuery: async (text: string) => embedText(text), - embedBatch: async (texts: string[]) => { - embedBatchCalls += 1; - return texts.map(embedText); + createEmbeddingProvider: async (options: { + provider?: string; + model?: string; + outputDimensionality?: number; + }) => { + providerCalls.push({ + provider: options.provider, + model: options.model, + outputDimensionality: options.outputDimensionality, + }); + const providerId = options.provider === "gemini" ? "gemini" : "mock"; + const model = options.model ?? "mock-embed"; + return { + requestedProvider: options.provider ?? "openai", + provider: { + id: providerId, + model, + embedQuery: async (text: string) => embedText(text), + embedBatch: async (texts: string[]) => { + embedBatchCalls += 1; + return texts.map(embedText); + }, }, - }, - }), + ...(providerId === "gemini" + ? { + gemini: { + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + headers: {}, + model, + modelPath: `models/${model}`, + apiKeys: ["test-key"], + outputDimensionality: options.outputDimensionality, + }, + } + : {}), + }; + }, }; }); @@ -93,6 +119,7 @@ describe("memory index", () => { // Keep atomic reindex tests on the safe path. vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1"); embedBatchCalls = 0; + providerCalls = []; // Keep the workspace stable to allow manager reuse across tests. await fs.mkdir(memoryDir, { recursive: true }); @@ -119,7 +146,9 @@ describe("memory index", () => { extraPaths?: string[]; sources?: Array<"memory" | "sessions">; sessionMemory?: boolean; + provider?: "openai" | "gemini"; model?: string; + outputDimensionality?: number; vectorEnabled?: boolean; cacheEnabled?: boolean; minScore?: number; @@ -130,8 +159,9 @@ describe("memory index", () => { defaults: { workspace: workspaceDir, memorySearch: { - provider: "openai", + provider: params.provider ?? "openai", model: params.model ?? "mock-embed", + outputDimensionality: params.outputDimensionality, store: { path: params.storePath, vector: { enabled: params.vectorEnabled ?? false } }, // Perf: keep test indexes to a single chunk to reduce sqlite work. chunking: { tokens: 4000, overlap: 0 }, @@ -342,6 +372,67 @@ describe("memory index", () => { await secondManager.close?.(); }); + it("passes Gemini outputDimensionality from config into the provider", async () => { + const cfg = createCfg({ + storePath: indexMainPath, + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 1536, + }); + + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + const manager = requireManager(result); + + expect( + providerCalls.some( + (call) => + call.provider === "gemini" && + call.model === "gemini-embedding-2-preview" && + call.outputDimensionality === 1536, + ), + ).toBe(true); + await manager.close?.(); + }); + + it("reindexes when Gemini outputDimensionality changes", async () => { + const base = createCfg({ + storePath: indexModelPath, + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 3072, + }); + const baseAgents = base.agents!; + const baseDefaults = baseAgents.defaults!; + const baseMemorySearch = baseDefaults.memorySearch!; + + const first = await getMemorySearchManager({ cfg: base, agentId: "main" }); + const firstManager = requireManager(first); + await firstManager.sync?.({ reason: "test" }); + const callsAfterFirstSync = embedBatchCalls; + await firstManager.close?.(); + + const second = await getMemorySearchManager({ + cfg: { + ...base, + agents: { + ...baseAgents, + defaults: { + ...baseDefaults, + memorySearch: { + ...baseMemorySearch, + outputDimensionality: 768, + }, + }, + }, + }, + agentId: "main", + }); + const secondManager = requireManager(second); + await secondManager.sync?.({ reason: "test" }); + expect(embedBatchCalls).toBeGreaterThan(callsAfterFirstSync); + await secondManager.close?.(); + }); + it("reuses cached embeddings on forced reindex", async () => { const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true }); const manager = await getPersistentManager(cfg); diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index 965058c8a3b..97a26dcc315 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -236,6 +236,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { provider: "gemini", baseUrl: this.gemini.baseUrl, model: this.gemini.model, + outputDimensionality: this.gemini.outputDimensionality, headers: entries, }), ); @@ -483,6 +484,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { buildRequest: (chunk) => ({ content: { parts: [{ text: chunk.text }] }, taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: this.gemini?.outputDimensionality, }), runBatch: async (runnerOptions) => await runGeminiEmbeddingBatches({ diff --git a/src/memory/manager-sync-ops.ts b/src/memory/manager-sync-ops.ts index 1fe91599b34..7bdf8fcdd2e 100644 --- a/src/memory/manager-sync-ops.ts +++ b/src/memory/manager-sync-ops.ts @@ -996,6 +996,7 @@ export abstract class MemoryManagerSyncOps { provider: fallback, remote: this.settings.remote, model: fallbackModel, + outputDimensionality: this.settings.outputDimensionality, fallback: "none", local: this.settings.local, }); diff --git a/src/memory/manager.ts b/src/memory/manager.ts index 9b1ff74e54c..e79f83c570a 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -157,6 +157,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem provider: settings.provider, remote: settings.remote, model: settings.model, + outputDimensionality: settings.outputDimensionality, fallback: settings.fallback, local: settings.local, });