From 60aed953468b3383f749e65beabeb33d1b481962 Mon Sep 17 00:00:00 2001 From: Bill Chirico Date: Wed, 11 Mar 2026 14:28:53 -0400 Subject: [PATCH] feat(memory): add gemini-embedding-2-preview support (#42501) Merged via squash. Prepared head SHA: c57b1f8ba2ca65f4946afe94a9137ee8c05c8c64 Co-authored-by: BillChirico <13951316+BillChirico@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + docs/concepts/memory.md | 23 ++ src/agents/memory-search.ts | 3 + src/config/schema.help.quality.test.ts | 1 + src/config/schema.help.ts | 2 + src/config/schema.labels.ts | 1 + src/config/types.tools.ts | 5 + src/config/zod-schema.agent-runtime.ts | 1 + src/memory/batch-gemini.test.ts | 94 +++++ src/memory/batch-gemini.ts | 10 +- src/memory/embedding-model-limits.ts | 2 + src/memory/embeddings-gemini.test.ts | 453 +++++++++++++++++++++++++ src/memory/embeddings-gemini.ts | 143 +++++++- src/memory/embeddings.ts | 10 +- src/memory/index.test.ts | 115 ++++++- src/memory/manager-embedding-ops.ts | 9 +- src/memory/manager-sync-ops.ts | 1 + src/memory/manager.ts | 1 + 18 files changed, 838 insertions(+), 37 deletions(-) create mode 100644 src/memory/batch-gemini.test.ts create mode 100644 src/memory/embeddings-gemini.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f851f7e073..e6a3f5ec78a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai - Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc. - iOS/Home canvas: add a bundled welcome screen with a live agent overview that refreshes on connect, reconnect, and foreground return, and move the compact connection pill off the top-left canvas overlay. (#42456) Thanks @ngutman. - iOS/Home canvas: replace floating controls with a docked toolbar, make the bundled home scaffold adapt to smaller phones, and open chat in the resolved main session instead of a synthetic `ios` session. (#42456) Thanks @ngutman. +- Memory/Gemini: add `gemini-embedding-2-preview` memory-search support with configurable output dimensions and automatic reindexing when the configured dimensions change. (#42501) thanks @BillChirico. - Discord/auto threads: add `autoArchiveDuration` channel config for auto-created threads so Discord thread archiving can stay at 1 hour, 1 day, 3 days, or 1 week instead of always using the 1-hour default. (#35065) Thanks @davidguttman. - OpenCode/onboarding: add new OpenCode Go provider, treat Zen and Go as one OpenCode setup in the wizard/docs while keeping the runtime providers split, store one shared OpenCode key for both profiles, and stop overriding the built-in `opencode-go` catalog routing. (#42313) Thanks @ImLukeF and @vincentkoc. - macOS/chat UI: add a chat model picker, persist explicit thinking-level selections across relaunch, and harden provider-aware session model sync for the shared chat composer. (#42314) Thanks @ImLukeF. diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index b3940945249..35c51f6b523 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -310,6 +310,29 @@ Notes: - `remote.baseUrl` is optional (defaults to the Gemini API base URL). - `remote.headers` lets you add extra headers if needed. - Default model: `gemini-embedding-001`. +- `gemini-embedding-2-preview` is also supported: 8192 token limit and configurable dimensions (768 / 1536 / 3072, default 3072). + +#### Gemini Embedding 2 (preview) + +```json5 +agents: { + defaults: { + memorySearch: { + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 3072, // optional: 768, 1536, or 3072 (default) + remote: { + apiKey: "YOUR_GEMINI_API_KEY" + } + } + } +} +``` + +> **⚠️ Re-index required:** Switching from `gemini-embedding-001` (768 dimensions) +> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. The same is true if you +> change `outputDimensionality` between 768, 1536, and 3072. +> OpenClaw will automatically reindex when it detects a model or dimension change. If you want to use a **custom OpenAI-compatible endpoint** (OpenRouter, vLLM, or a proxy), you can use the `remote` configuration with the OpenAI provider: diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index e14fd5a0b3b..6bcacfec2db 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -28,6 +28,7 @@ export type ResolvedMemorySearchConfig = { }; fallback: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none"; model: string; + outputDimensionality?: number; local: { modelPath?: string; modelCacheDir?: string; @@ -193,6 +194,7 @@ function mergeConfig( ? DEFAULT_OLLAMA_MODEL : undefined; const model = overrides?.model ?? defaults?.model ?? modelDefault ?? ""; + const outputDimensionality = overrides?.outputDimensionality ?? defaults?.outputDimensionality; const local = { modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath, modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir, @@ -312,6 +314,7 @@ function mergeConfig( }, fallback, model, + outputDimensionality, local, store, chunking: { tokens: Math.max(1, chunking.tokens), overlap }, diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 04d5200bfbb..730dd397831 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -83,6 +83,7 @@ const TARGET_KEYS = [ "agents.defaults.memorySearch.remote.batch.timeoutMinutes", "agents.defaults.memorySearch.local.modelPath", "agents.defaults.memorySearch.store.path", + "agents.defaults.memorySearch.outputDimensionality", "agents.defaults.memorySearch.store.vector.enabled", "agents.defaults.memorySearch.store.vector.extensionPath", "agents.defaults.memorySearch.query.hybrid.enabled", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 908829cbf33..bd93f711d91 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -785,6 +785,8 @@ export const FIELD_HELP: Record = { 'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.', "agents.defaults.memorySearch.model": "Embedding model override used by the selected memory provider when a non-default model is required. Set this only when you need explicit recall quality/cost tuning beyond provider defaults.", + "agents.defaults.memorySearch.outputDimensionality": + "Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.", "agents.defaults.memorySearch.remote.baseUrl": "Overrides the embedding API endpoint, such as an OpenAI-compatible proxy or custom Gemini base URL. Use this only when routing through your own gateway or vendor endpoint; keep provider defaults otherwise.", "agents.defaults.memorySearch.remote.apiKey": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index c643cf91cd9..b7477b4798a 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -331,6 +331,7 @@ export const FIELD_LABELS: Record = { "agents.defaults.memorySearch.remote.batch.pollIntervalMs": "Remote Batch Poll Interval (ms)", "agents.defaults.memorySearch.remote.batch.timeoutMinutes": "Remote Batch Timeout (min)", "agents.defaults.memorySearch.model": "Memory Search Model", + "agents.defaults.memorySearch.outputDimensionality": "Memory Search Output Dimensionality", "agents.defaults.memorySearch.fallback": "Memory Search Fallback", "agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path", "agents.defaults.memorySearch.store.path": "Memory Search Index Path", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index e352f858c39..5de1b4cafa5 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -347,6 +347,11 @@ export type MemorySearchConfig = { fallback?: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none"; /** Embedding model id (remote) or alias (local). */ model?: string; + /** + * Gemini embedding-2 models only: output vector dimensions. + * Supported values today are 768, 1536, and 3072. + */ + outputDimensionality?: number; /** Local embedding settings (node-llama-cpp). */ local?: { /** GGUF model path or hf: URI. */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 3ede7218b80..a240eba5d43 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -599,6 +599,7 @@ export const MemorySearchSchema = z ]) .optional(), model: z.string().optional(), + outputDimensionality: z.number().int().positive().optional(), local: z .object({ modelPath: z.string().optional(), diff --git a/src/memory/batch-gemini.test.ts b/src/memory/batch-gemini.test.ts new file mode 100644 index 00000000000..57bc71291b9 --- /dev/null +++ b/src/memory/batch-gemini.test.ts @@ -0,0 +1,94 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from "vitest"; +import type { GeminiEmbeddingClient } from "./embeddings-gemini.js"; + +describe("runGeminiEmbeddingBatches", () => { + let runGeminiEmbeddingBatches: typeof import("./batch-gemini.js").runGeminiEmbeddingBatches; + + beforeAll(async () => { + ({ runGeminiEmbeddingBatches } = await import("./batch-gemini.js")); + }); + + afterEach(() => { + vi.resetAllMocks(); + vi.unstubAllGlobals(); + }); + + const mockClient: GeminiEmbeddingClient = { + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + headers: {}, + model: "gemini-embedding-2-preview", + modelPath: "models/gemini-embedding-2-preview", + apiKeys: ["test-key"], + outputDimensionality: 1536, + }; + + it("includes outputDimensionality in batch upload requests", async () => { + const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { + const url = + typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url; + if (url.includes("/upload/v1beta/files?uploadType=multipart")) { + const body = init?.body; + if (!(body instanceof Blob)) { + throw new Error("expected multipart blob body"); + } + const text = await body.text(); + expect(text).toContain('"taskType":"RETRIEVAL_DOCUMENT"'); + expect(text).toContain('"outputDimensionality":1536'); + return new Response(JSON.stringify({ name: "files/file-123" }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } + if (url.endsWith(":asyncBatchEmbedContent")) { + return new Response( + JSON.stringify({ + name: "batches/batch-1", + state: "COMPLETED", + outputConfig: { file: "files/output-1" }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ); + } + if (url.endsWith("/files/output-1:download")) { + return new Response( + JSON.stringify({ + key: "req-1", + response: { embedding: { values: [0.1, 0.2, 0.3] } }, + }), + { + status: 200, + headers: { "Content-Type": "application/jsonl" }, + }, + ); + } + throw new Error(`unexpected fetch ${url}`); + }); + + vi.stubGlobal("fetch", fetchMock); + + const results = await runGeminiEmbeddingBatches({ + gemini: mockClient, + agentId: "main", + requests: [ + { + custom_id: "req-1", + request: { + content: { parts: [{ text: "hello world" }] }, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: 1536, + }, + }, + ], + wait: true, + pollIntervalMs: 1, + timeoutMs: 1000, + concurrency: 1, + }); + + expect(results.get("req-1")).toEqual([0.1, 0.2, 0.3]); + expect(fetchMock).toHaveBeenCalledTimes(3); + }); +}); diff --git a/src/memory/batch-gemini.ts b/src/memory/batch-gemini.ts index 998f283b676..3afb5121ff7 100644 --- a/src/memory/batch-gemini.ts +++ b/src/memory/batch-gemini.ts @@ -5,14 +5,13 @@ import { } from "./batch-runner.js"; import { buildBatchHeaders, normalizeBatchBaseUrl } from "./batch-utils.js"; import { debugEmbeddingsLog } from "./embeddings-debug.js"; -import type { GeminiEmbeddingClient } from "./embeddings-gemini.js"; +import type { GeminiEmbeddingClient, GeminiTextEmbeddingRequest } from "./embeddings-gemini.js"; import { hashText } from "./internal.js"; import { withRemoteHttpResponse } from "./remote-http.js"; export type GeminiBatchRequest = { custom_id: string; - content: { parts: Array<{ text: string }> }; - taskType: "RETRIEVAL_DOCUMENT" | "RETRIEVAL_QUERY"; + request: GeminiTextEmbeddingRequest; }; export type GeminiBatchStatus = { @@ -82,10 +81,7 @@ async function submitGeminiBatch(params: { .map((request) => JSON.stringify({ key: request.custom_id, - request: { - content: request.content, - task_type: request.taskType, - }, + request: request.request, }), ) .join("\n"); diff --git a/src/memory/embedding-model-limits.ts b/src/memory/embedding-model-limits.ts index b9960009606..0819686b905 100644 --- a/src/memory/embedding-model-limits.ts +++ b/src/memory/embedding-model-limits.ts @@ -8,6 +8,8 @@ const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record = { "openai:text-embedding-3-large": 8192, "openai:text-embedding-ada-002": 8191, "gemini:text-embedding-004": 2048, + "gemini:gemini-embedding-001": 2048, + "gemini:gemini-embedding-2-preview": 8192, "voyage:voyage-3": 32000, "voyage:voyage-3-lite": 16000, "voyage:voyage-code-3": 32000, diff --git a/src/memory/embeddings-gemini.test.ts b/src/memory/embeddings-gemini.test.ts new file mode 100644 index 00000000000..36cb6bfd111 --- /dev/null +++ b/src/memory/embeddings-gemini.test.ts @@ -0,0 +1,453 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import * as authModule from "../agents/model-auth.js"; +import { + buildFileDataPart, + buildGeminiParts, + buildGeminiTextEmbeddingRequest, + buildInlineDataPart, + createGeminiEmbeddingProvider, + DEFAULT_GEMINI_EMBEDDING_MODEL, + GEMINI_EMBEDDING_2_MODELS, + isGeminiEmbedding2Model, + resolveGeminiOutputDimensionality, + type GeminiPart, +} from "./embeddings-gemini.js"; + +vi.mock("../agents/model-auth.js", async () => { + const { createModelAuthMockModule } = await import("../test-utils/model-auth-mock.js"); + return createModelAuthMockModule(); +}); + +const createGeminiFetchMock = (embeddingValues = [1, 2, 3]) => + vi.fn(async (_input?: unknown, _init?: unknown) => ({ + ok: true, + status: 200, + json: async () => ({ embedding: { values: embeddingValues } }), + })); + +const createGeminiBatchFetchMock = (count: number, embeddingValues = [1, 2, 3]) => + vi.fn(async (_input?: unknown, _init?: unknown) => ({ + ok: true, + status: 200, + json: async () => ({ + embeddings: Array.from({ length: count }, () => ({ values: embeddingValues })), + }), + })); + +function readFirstFetchRequest(fetchMock: { mock: { calls: unknown[][] } }) { + const [url, init] = fetchMock.mock.calls[0] ?? []; + return { url, init: init as RequestInit | undefined }; +} + +function parseFetchBody(fetchMock: { mock: { calls: unknown[][] } }, callIndex = 0) { + const init = fetchMock.mock.calls[callIndex]?.[1] as RequestInit | undefined; + return JSON.parse((init?.body as string) ?? "{}") as Record; +} + +afterEach(() => { + vi.resetAllMocks(); + vi.unstubAllGlobals(); +}); + +function mockResolvedProviderKey(apiKey = "test-key") { + vi.mocked(authModule.resolveApiKeyForProvider).mockResolvedValue({ + apiKey, + mode: "api-key", + source: "test", + }); +} + +// ---------- Helper function tests ---------- + +describe("buildGeminiParts", () => { + it("wraps a string into a single text part", () => { + expect(buildGeminiParts("hello")).toEqual([{ text: "hello" }]); + }); + + it("passes through an existing parts array", () => { + const parts: GeminiPart[] = [ + { text: "hello" }, + { inlineData: { mimeType: "image/png", data: "base64data" } }, + ]; + expect(buildGeminiParts(parts)).toBe(parts); + }); +}); + +describe("buildInlineDataPart", () => { + it("produces the correct shape", () => { + const part = buildInlineDataPart("image/jpeg", "abc123"); + expect(part).toEqual({ + inlineData: { mimeType: "image/jpeg", data: "abc123" }, + }); + }); +}); + +describe("buildFileDataPart", () => { + it("produces the correct shape", () => { + const part = buildFileDataPart("application/pdf", "gs://bucket/file.pdf"); + expect(part).toEqual({ + fileData: { mimeType: "application/pdf", fileUri: "gs://bucket/file.pdf" }, + }); + }); +}); + +describe("buildGeminiTextEmbeddingRequest", () => { + it("builds a text embedding request with optional model and dimensions", () => { + expect( + buildGeminiTextEmbeddingRequest({ + text: "hello", + taskType: "RETRIEVAL_DOCUMENT", + modelPath: "models/gemini-embedding-2-preview", + outputDimensionality: 1536, + }), + ).toEqual({ + model: "models/gemini-embedding-2-preview", + content: { parts: [{ text: "hello" }] }, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: 1536, + }); + }); +}); + +// ---------- Model detection ---------- + +describe("isGeminiEmbedding2Model", () => { + it("returns true for gemini-embedding-2-preview", () => { + expect(isGeminiEmbedding2Model("gemini-embedding-2-preview")).toBe(true); + }); + + it("returns false for gemini-embedding-001", () => { + expect(isGeminiEmbedding2Model("gemini-embedding-001")).toBe(false); + }); + + it("returns false for text-embedding-004", () => { + expect(isGeminiEmbedding2Model("text-embedding-004")).toBe(false); + }); +}); + +describe("GEMINI_EMBEDDING_2_MODELS", () => { + it("contains gemini-embedding-2-preview", () => { + expect(GEMINI_EMBEDDING_2_MODELS.has("gemini-embedding-2-preview")).toBe(true); + }); +}); + +// ---------- Dimension resolution ---------- + +describe("resolveGeminiOutputDimensionality", () => { + it("returns undefined for non-v2 models", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-001")).toBeUndefined(); + expect(resolveGeminiOutputDimensionality("text-embedding-004")).toBeUndefined(); + }); + + it("returns 3072 by default for v2 models", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview")).toBe(3072); + }); + + it("accepts valid dimension values", () => { + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 768)).toBe(768); + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 1536)).toBe(1536); + expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 3072)).toBe(3072); + }); + + it("throws for invalid dimension values", () => { + expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 512)).toThrow( + /Invalid outputDimensionality 512/, + ); + expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 1024)).toThrow( + /Valid values: 768, 1536, 3072/, + ); + }); +}); + +// ---------- Provider: gemini-embedding-001 (backward compat) ---------- + +describe("gemini-embedding-001 provider (backward compat)", () => { + it("does NOT include outputDimensionality in embedQuery", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-001", + fallback: "none", + }); + + await provider.embedQuery("test query"); + + const body = parseFetchBody(fetchMock); + expect(body).not.toHaveProperty("outputDimensionality"); + expect(body.taskType).toBe("RETRIEVAL_QUERY"); + expect(body.content).toEqual({ parts: [{ text: "test query" }] }); + }); + + it("does NOT include outputDimensionality in embedBatch", async () => { + const fetchMock = createGeminiBatchFetchMock(2); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-001", + fallback: "none", + }); + + await provider.embedBatch(["text1", "text2"]); + + const body = parseFetchBody(fetchMock); + expect(body).not.toHaveProperty("outputDimensionality"); + }); +}); + +// ---------- Provider: gemini-embedding-2-preview ---------- + +describe("gemini-embedding-2-preview provider", () => { + it("includes outputDimensionality in embedQuery request", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test query"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + expect(body.taskType).toBe("RETRIEVAL_QUERY"); + expect(body.content).toEqual({ parts: [{ text: "test query" }] }); + }); + + it("includes outputDimensionality in embedBatch request", async () => { + const fetchMock = createGeminiBatchFetchMock(2); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedBatch(["text1", "text2"]); + + const body = parseFetchBody(fetchMock); + expect(body.requests).toEqual([ + { + model: "models/gemini-embedding-2-preview", + content: { parts: [{ text: "text1" }] }, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: 3072, + }, + { + model: "models/gemini-embedding-2-preview", + content: { parts: [{ text: "text2" }] }, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: 3072, + }, + ]); + }); + + it("respects custom outputDimensionality", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + outputDimensionality: 768, + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(768); + }); + + it("uses custom outputDimensionality for each embedBatch request", async () => { + const fetchMock = createGeminiBatchFetchMock(2); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + outputDimensionality: 768, + }); + + await provider.embedBatch(["text1", "text2"]); + + const body = parseFetchBody(fetchMock); + expect(body.requests).toEqual([ + expect.objectContaining({ outputDimensionality: 768 }), + expect.objectContaining({ outputDimensionality: 768 }), + ]); + }); + + it("throws for invalid outputDimensionality", async () => { + mockResolvedProviderKey(); + + await expect( + createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + outputDimensionality: 512, + }), + ).rejects.toThrow(/Invalid outputDimensionality 512/); + }); + + it("uses correct endpoint URL", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const { url } = readFirstFetchRequest(fetchMock); + expect(url).toBe( + "https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-2-preview:embedContent", + ); + }); + + it("allows taskType override via options", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + taskType: "SEMANTIC_SIMILARITY", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.taskType).toBe("SEMANTIC_SIMILARITY"); + }); +}); + +// ---------- Model normalization ---------- + +describe("gemini model normalization", () => { + it("handles models/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "models/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("handles gemini/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("handles google/ prefix for v2 model", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "google/gemini-embedding-2-preview", + fallback: "none", + }); + + await provider.embedQuery("test"); + + const body = parseFetchBody(fetchMock); + expect(body.outputDimensionality).toBe(3072); + }); + + it("defaults to gemini-embedding-001 when model is empty", async () => { + const fetchMock = createGeminiFetchMock(); + vi.stubGlobal("fetch", fetchMock); + mockResolvedProviderKey(); + + const { provider, client } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "", + fallback: "none", + }); + + expect(client.model).toBe(DEFAULT_GEMINI_EMBEDDING_MODEL); + expect(provider.model).toBe(DEFAULT_GEMINI_EMBEDDING_MODEL); + }); + + it("returns empty array for blank query text", async () => { + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + const result = await provider.embedQuery(" "); + expect(result).toEqual([]); + }); + + it("returns empty array for empty batch", async () => { + mockResolvedProviderKey(); + + const { provider } = await createGeminiEmbeddingProvider({ + config: {} as never, + provider: "gemini", + model: "gemini-embedding-2-preview", + fallback: "none", + }); + + const result = await provider.embedBatch([]); + expect(result).toEqual([]); + }); +}); diff --git a/src/memory/embeddings-gemini.ts b/src/memory/embeddings-gemini.ts index 1d5cc5876ea..f8c3d3f4a06 100644 --- a/src/memory/embeddings-gemini.ts +++ b/src/memory/embeddings-gemini.ts @@ -17,6 +17,7 @@ export type GeminiEmbeddingClient = { model: string; modelPath: string; apiKeys: string[]; + outputDimensionality?: number; }; const DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"; @@ -24,6 +25,109 @@ export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001"; const GEMINI_MAX_INPUT_TOKENS: Record = { "text-embedding-004": 2048, }; + +// --- gemini-embedding-2-preview support --- + +export const GEMINI_EMBEDDING_2_MODELS = new Set([ + "gemini-embedding-2-preview", + // Add the GA model name here once released. +]); + +const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072; +const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const; + +export type GeminiTaskType = + | "RETRIEVAL_QUERY" + | "RETRIEVAL_DOCUMENT" + | "SEMANTIC_SIMILARITY" + | "CLASSIFICATION" + | "CLUSTERING" + | "QUESTION_ANSWERING" + | "FACT_VERIFICATION"; + +export type GeminiTextPart = { text: string }; +export type GeminiInlinePart = { + inlineData: { mimeType: string; data: string }; +}; +export type GeminiFilePart = { + fileData: { mimeType: string; fileUri: string }; +}; +export type GeminiPart = GeminiTextPart | GeminiInlinePart | GeminiFilePart; +export type GeminiTextEmbeddingRequest = { + content: { parts: GeminiTextPart[] }; + taskType: GeminiTaskType; + outputDimensionality?: number; + model?: string; +}; + +/** Convert a string or pre-built parts array into `GeminiPart[]`. */ +export function buildGeminiParts(input: string | GeminiPart[]): GeminiPart[] { + if (typeof input === "string") { + return [{ text: input }]; + } + return input; +} + +/** Convenience: build an inline-data part for multimodal embeddings. */ +export function buildInlineDataPart(mimeType: string, base64Data: string): GeminiInlinePart { + return { inlineData: { mimeType, data: base64Data } }; +} + +/** Convenience: build a file-data part for multimodal embeddings. */ +export function buildFileDataPart(mimeType: string, fileUri: string): GeminiFilePart { + return { fileData: { mimeType, fileUri } }; +} + +/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */ +export function buildGeminiTextEmbeddingRequest(params: { + text: string; + taskType: GeminiTaskType; + outputDimensionality?: number; + modelPath?: string; +}): GeminiTextEmbeddingRequest { + const request: GeminiTextEmbeddingRequest = { + content: { parts: [{ text: params.text }] }, + taskType: params.taskType, + }; + if (params.modelPath) { + request.model = params.modelPath; + } + if (params.outputDimensionality != null) { + request.outputDimensionality = params.outputDimensionality; + } + return request; +} + +/** + * Returns true if the given model name is a gemini-embedding-2 variant that + * supports `outputDimensionality` and extended task types. + */ +export function isGeminiEmbedding2Model(model: string): boolean { + return GEMINI_EMBEDDING_2_MODELS.has(model); +} + +/** + * Validate and return the `outputDimensionality` for gemini-embedding-2 models. + * Returns `undefined` for older models (they don't support the param). + */ +export function resolveGeminiOutputDimensionality( + model: string, + requested?: number, +): number | undefined { + if (!isGeminiEmbedding2Model(model)) { + return undefined; + } + if (requested == null) { + return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS; + } + const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS; + if (!valid.includes(requested)) { + throw new Error( + `Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`, + ); + } + return requested; +} function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined { const trimmed = resolveMemorySecretInputString({ value: remoteApiKey, @@ -73,6 +177,8 @@ export async function createGeminiEmbeddingProvider( const baseUrl = client.baseUrl.replace(/\/$/, ""); const embedUrl = `${baseUrl}/${client.modelPath}:embedContent`; const batchUrl = `${baseUrl}/${client.modelPath}:batchEmbedContents`; + const isV2 = isGeminiEmbedding2Model(client.model); + const outputDimensionality = client.outputDimensionality; const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => { const authHeaders = parseGeminiAuth(apiKey); @@ -106,14 +212,15 @@ export async function createGeminiEmbeddingProvider( if (!text.trim()) { return []; } + const body = buildGeminiTextEmbeddingRequest({ + text, + taskType: options.taskType ?? "RETRIEVAL_QUERY", + outputDimensionality: isV2 ? outputDimensionality : undefined, + }); const payload = await executeWithApiKeyRotation({ provider: "google", apiKeys: client.apiKeys, - execute: (apiKey) => - fetchWithGeminiAuth(apiKey, embedUrl, { - content: { parts: [{ text }] }, - taskType: "RETRIEVAL_QUERY", - }), + execute: (apiKey) => fetchWithGeminiAuth(apiKey, embedUrl, body), }); return payload.embedding?.values ?? []; }; @@ -122,18 +229,19 @@ export async function createGeminiEmbeddingProvider( if (texts.length === 0) { return []; } - const requests = texts.map((text) => ({ - model: client.modelPath, - content: { parts: [{ text }] }, - taskType: "RETRIEVAL_DOCUMENT", - })); + const requests = texts.map((text) => + buildGeminiTextEmbeddingRequest({ + text, + modelPath: client.modelPath, + taskType: options.taskType ?? "RETRIEVAL_DOCUMENT", + outputDimensionality: isV2 ? outputDimensionality : undefined, + }), + ); + const batchBody = { requests }; const payload = await executeWithApiKeyRotation({ provider: "google", apiKeys: client.apiKeys, - execute: (apiKey) => - fetchWithGeminiAuth(apiKey, batchUrl, { - requests, - }), + execute: (apiKey) => fetchWithGeminiAuth(apiKey, batchUrl, batchBody), }); const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : []; return texts.map((_, index) => embeddings[index]?.values ?? []); @@ -183,13 +291,18 @@ export async function resolveGeminiEmbeddingClient( }); const model = normalizeGeminiModel(options.model); const modelPath = buildGeminiModelPath(model); + const outputDimensionality = resolveGeminiOutputDimensionality( + model, + options.outputDimensionality, + ); debugEmbeddingsLog("memory embeddings: gemini client", { rawBaseUrl, baseUrl, model, modelPath, + outputDimensionality, embedEndpoint: `${baseUrl}/${modelPath}:embedContent`, batchEndpoint: `${baseUrl}/${modelPath}:batchEmbedContents`, }); - return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys }; + return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys, outputDimensionality }; } diff --git a/src/memory/embeddings.ts b/src/memory/embeddings.ts index ca6b4046e2c..d91807c54c8 100644 --- a/src/memory/embeddings.ts +++ b/src/memory/embeddings.ts @@ -4,7 +4,11 @@ import type { OpenClawConfig } from "../config/config.js"; import type { SecretInput } from "../config/types.secrets.js"; import { formatErrorMessage } from "../infra/errors.js"; import { resolveUserPath } from "../utils.js"; -import { createGeminiEmbeddingProvider, type GeminiEmbeddingClient } from "./embeddings-gemini.js"; +import { + createGeminiEmbeddingProvider, + type GeminiEmbeddingClient, + type GeminiTaskType, +} from "./embeddings-gemini.js"; import { createMistralEmbeddingProvider, type MistralEmbeddingClient, @@ -74,6 +78,10 @@ export type EmbeddingProviderOptions = { modelPath?: string; modelCacheDir?: string; }; + /** Gemini embedding-2: output vector dimensions (768, 1536, or 3072). */ + outputDimensionality?: number; + /** Gemini: override the default task type sent with embedding requests. */ + taskType?: GeminiTaskType; }; export const DEFAULT_LOCAL_MODEL = diff --git a/src/memory/index.test.ts b/src/memory/index.test.ts index 43ebcca58c2..8010c419494 100644 --- a/src/memory/index.test.ts +++ b/src/memory/index.test.ts @@ -6,6 +6,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js"; import "./test-runtime-mocks.js"; let embedBatchCalls = 0; +let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = []; vi.mock("./embeddings.js", () => { const embedText = (text: string) => { @@ -15,18 +16,43 @@ vi.mock("./embeddings.js", () => { return [alpha, beta]; }; return { - createEmbeddingProvider: async (options: { model?: string }) => ({ - requestedProvider: "openai", - provider: { - id: "mock", - model: options.model ?? "mock-embed", - embedQuery: async (text: string) => embedText(text), - embedBatch: async (texts: string[]) => { - embedBatchCalls += 1; - return texts.map(embedText); + createEmbeddingProvider: async (options: { + provider?: string; + model?: string; + outputDimensionality?: number; + }) => { + providerCalls.push({ + provider: options.provider, + model: options.model, + outputDimensionality: options.outputDimensionality, + }); + const providerId = options.provider === "gemini" ? "gemini" : "mock"; + const model = options.model ?? "mock-embed"; + return { + requestedProvider: options.provider ?? "openai", + provider: { + id: providerId, + model, + embedQuery: async (text: string) => embedText(text), + embedBatch: async (texts: string[]) => { + embedBatchCalls += 1; + return texts.map(embedText); + }, }, - }, - }), + ...(providerId === "gemini" + ? { + gemini: { + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + headers: {}, + model, + modelPath: `models/${model}`, + apiKeys: ["test-key"], + outputDimensionality: options.outputDimensionality, + }, + } + : {}), + }; + }, }; }); @@ -93,6 +119,7 @@ describe("memory index", () => { // Keep atomic reindex tests on the safe path. vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1"); embedBatchCalls = 0; + providerCalls = []; // Keep the workspace stable to allow manager reuse across tests. await fs.mkdir(memoryDir, { recursive: true }); @@ -119,7 +146,9 @@ describe("memory index", () => { extraPaths?: string[]; sources?: Array<"memory" | "sessions">; sessionMemory?: boolean; + provider?: "openai" | "gemini"; model?: string; + outputDimensionality?: number; vectorEnabled?: boolean; cacheEnabled?: boolean; minScore?: number; @@ -130,8 +159,9 @@ describe("memory index", () => { defaults: { workspace: workspaceDir, memorySearch: { - provider: "openai", + provider: params.provider ?? "openai", model: params.model ?? "mock-embed", + outputDimensionality: params.outputDimensionality, store: { path: params.storePath, vector: { enabled: params.vectorEnabled ?? false } }, // Perf: keep test indexes to a single chunk to reduce sqlite work. chunking: { tokens: 4000, overlap: 0 }, @@ -342,6 +372,67 @@ describe("memory index", () => { await secondManager.close?.(); }); + it("passes Gemini outputDimensionality from config into the provider", async () => { + const cfg = createCfg({ + storePath: indexMainPath, + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 1536, + }); + + const result = await getMemorySearchManager({ cfg, agentId: "main" }); + const manager = requireManager(result); + + expect( + providerCalls.some( + (call) => + call.provider === "gemini" && + call.model === "gemini-embedding-2-preview" && + call.outputDimensionality === 1536, + ), + ).toBe(true); + await manager.close?.(); + }); + + it("reindexes when Gemini outputDimensionality changes", async () => { + const base = createCfg({ + storePath: indexModelPath, + provider: "gemini", + model: "gemini-embedding-2-preview", + outputDimensionality: 3072, + }); + const baseAgents = base.agents!; + const baseDefaults = baseAgents.defaults!; + const baseMemorySearch = baseDefaults.memorySearch!; + + const first = await getMemorySearchManager({ cfg: base, agentId: "main" }); + const firstManager = requireManager(first); + await firstManager.sync?.({ reason: "test" }); + const callsAfterFirstSync = embedBatchCalls; + await firstManager.close?.(); + + const second = await getMemorySearchManager({ + cfg: { + ...base, + agents: { + ...baseAgents, + defaults: { + ...baseDefaults, + memorySearch: { + ...baseMemorySearch, + outputDimensionality: 768, + }, + }, + }, + }, + agentId: "main", + }); + const secondManager = requireManager(second); + await secondManager.sync?.({ reason: "test" }); + expect(embedBatchCalls).toBeGreaterThan(callsAfterFirstSync); + await secondManager.close?.(); + }); + it("reuses cached embeddings on forced reindex", async () => { const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true }); const manager = await getPersistentManager(cfg); diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index 965058c8a3b..bcc653fda7a 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -9,6 +9,7 @@ import { import { type VoyageBatchRequest, runVoyageEmbeddingBatches } from "./batch-voyage.js"; import { enforceEmbeddingMaxInputTokens } from "./embedding-chunk-limits.js"; import { estimateUtf8Bytes } from "./embedding-input-limits.js"; +import { buildGeminiTextEmbeddingRequest } from "./embeddings-gemini.js"; import { chunkMarkdown, hashText, @@ -236,6 +237,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { provider: "gemini", baseUrl: this.gemini.baseUrl, model: this.gemini.model, + outputDimensionality: this.gemini.outputDimensionality, headers: entries, }), ); @@ -481,8 +483,11 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { provider: "gemini", enabled: Boolean(gemini), buildRequest: (chunk) => ({ - content: { parts: [{ text: chunk.text }] }, - taskType: "RETRIEVAL_DOCUMENT", + request: buildGeminiTextEmbeddingRequest({ + text: chunk.text, + taskType: "RETRIEVAL_DOCUMENT", + outputDimensionality: this.gemini?.outputDimensionality, + }), }), runBatch: async (runnerOptions) => await runGeminiEmbeddingBatches({ diff --git a/src/memory/manager-sync-ops.ts b/src/memory/manager-sync-ops.ts index 1fe91599b34..7bdf8fcdd2e 100644 --- a/src/memory/manager-sync-ops.ts +++ b/src/memory/manager-sync-ops.ts @@ -996,6 +996,7 @@ export abstract class MemoryManagerSyncOps { provider: fallback, remote: this.settings.remote, model: fallbackModel, + outputDimensionality: this.settings.outputDimensionality, fallback: "none", local: this.settings.local, }); diff --git a/src/memory/manager.ts b/src/memory/manager.ts index 9b1ff74e54c..e79f83c570a 100644 --- a/src/memory/manager.ts +++ b/src/memory/manager.ts @@ -157,6 +157,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem provider: settings.provider, remote: settings.remote, model: settings.model, + outputDimensionality: settings.outputDimensionality, fallback: settings.fallback, local: settings.local, });