mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
memory: wire Gemini embedding dimensions through config
This commit is contained in:
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc.
|
||||
- iOS/Home canvas: add a bundled welcome screen with a live agent overview that refreshes on connect, reconnect, and foreground return, and move the compact connection pill off the top-left canvas overlay. (#42456) Thanks @ngutman.
|
||||
- iOS/Home canvas: replace floating controls with a docked toolbar, make the bundled home scaffold adapt to smaller phones, and open chat in the resolved main session instead of a synthetic `ios` session. (#42456) Thanks @ngutman.
|
||||
- Memory/Gemini: add `gemini-embedding-2-preview` memory-search support with configurable output dimensions and automatic reindexing when the configured dimensions change. (#42501) thanks @BillChirico.
|
||||
|
||||
### Breaking
|
||||
|
||||
|
||||
@@ -310,7 +310,7 @@ Notes:
|
||||
- `remote.baseUrl` is optional (defaults to the Gemini API base URL).
|
||||
- `remote.headers` lets you add extra headers if needed.
|
||||
- Default model: `gemini-embedding-001`.
|
||||
- `gemini-embedding-2-preview` is also supported: multimodal inputs, 8192 token limit, configurable dimensions (768 / 1536 / 3072, default 3072).
|
||||
- `gemini-embedding-2-preview` is also supported: 8192 token limit and configurable dimensions (768 / 1536 / 3072, default 3072).
|
||||
|
||||
#### Gemini Embedding 2 (preview)
|
||||
|
||||
@@ -330,8 +330,9 @@ agents: {
|
||||
```
|
||||
|
||||
> **⚠️ Re-index required:** Switching from `gemini-embedding-001` (768 dimensions)
|
||||
> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size.
|
||||
> OpenClaw will automatically reindex when it detects the model change.
|
||||
> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. The same is true if you
|
||||
> change `outputDimensionality` between 768, 1536, and 3072.
|
||||
> OpenClaw will automatically reindex when it detects a model or dimension change.
|
||||
|
||||
If you want to use a **custom OpenAI-compatible endpoint** (OpenRouter, vLLM, or a proxy),
|
||||
you can use the `remote` configuration with the OpenAI provider:
|
||||
|
||||
@@ -28,6 +28,7 @@ export type ResolvedMemorySearchConfig = {
|
||||
};
|
||||
fallback: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none";
|
||||
model: string;
|
||||
outputDimensionality?: number;
|
||||
local: {
|
||||
modelPath?: string;
|
||||
modelCacheDir?: string;
|
||||
@@ -193,6 +194,7 @@ function mergeConfig(
|
||||
? DEFAULT_OLLAMA_MODEL
|
||||
: undefined;
|
||||
const model = overrides?.model ?? defaults?.model ?? modelDefault ?? "";
|
||||
const outputDimensionality = overrides?.outputDimensionality ?? defaults?.outputDimensionality;
|
||||
const local = {
|
||||
modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath,
|
||||
modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir,
|
||||
@@ -312,6 +314,7 @@ function mergeConfig(
|
||||
},
|
||||
fallback,
|
||||
model,
|
||||
outputDimensionality,
|
||||
local,
|
||||
store,
|
||||
chunking: { tokens: Math.max(1, chunking.tokens), overlap },
|
||||
|
||||
@@ -83,6 +83,7 @@ const TARGET_KEYS = [
|
||||
"agents.defaults.memorySearch.remote.batch.timeoutMinutes",
|
||||
"agents.defaults.memorySearch.local.modelPath",
|
||||
"agents.defaults.memorySearch.store.path",
|
||||
"agents.defaults.memorySearch.outputDimensionality",
|
||||
"agents.defaults.memorySearch.store.vector.enabled",
|
||||
"agents.defaults.memorySearch.store.vector.extensionPath",
|
||||
"agents.defaults.memorySearch.query.hybrid.enabled",
|
||||
|
||||
@@ -785,6 +785,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.',
|
||||
"agents.defaults.memorySearch.model":
|
||||
"Embedding model override used by the selected memory provider when a non-default model is required. Set this only when you need explicit recall quality/cost tuning beyond provider defaults.",
|
||||
"agents.defaults.memorySearch.outputDimensionality":
|
||||
"Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.",
|
||||
"agents.defaults.memorySearch.remote.baseUrl":
|
||||
"Overrides the embedding API endpoint, such as an OpenAI-compatible proxy or custom Gemini base URL. Use this only when routing through your own gateway or vendor endpoint; keep provider defaults otherwise.",
|
||||
"agents.defaults.memorySearch.remote.apiKey":
|
||||
|
||||
@@ -331,6 +331,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.memorySearch.remote.batch.pollIntervalMs": "Remote Batch Poll Interval (ms)",
|
||||
"agents.defaults.memorySearch.remote.batch.timeoutMinutes": "Remote Batch Timeout (min)",
|
||||
"agents.defaults.memorySearch.model": "Memory Search Model",
|
||||
"agents.defaults.memorySearch.outputDimensionality": "Memory Search Output Dimensionality",
|
||||
"agents.defaults.memorySearch.fallback": "Memory Search Fallback",
|
||||
"agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path",
|
||||
"agents.defaults.memorySearch.store.path": "Memory Search Index Path",
|
||||
|
||||
@@ -347,6 +347,11 @@ export type MemorySearchConfig = {
|
||||
fallback?: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none";
|
||||
/** Embedding model id (remote) or alias (local). */
|
||||
model?: string;
|
||||
/**
|
||||
* Gemini embedding-2 models only: output vector dimensions.
|
||||
* Supported values today are 768, 1536, and 3072.
|
||||
*/
|
||||
outputDimensionality?: number;
|
||||
/** Local embedding settings (node-llama-cpp). */
|
||||
local?: {
|
||||
/** GGUF model path or hf: URI. */
|
||||
|
||||
@@ -599,6 +599,7 @@ export const MemorySearchSchema = z
|
||||
])
|
||||
.optional(),
|
||||
model: z.string().optional(),
|
||||
outputDimensionality: z.number().int().positive().optional(),
|
||||
local: z
|
||||
.object({
|
||||
modelPath: z.string().optional(),
|
||||
|
||||
92
src/memory/batch-gemini.test.ts
Normal file
92
src/memory/batch-gemini.test.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import type { GeminiEmbeddingClient } from "./embeddings-gemini.js";
|
||||
|
||||
describe("runGeminiEmbeddingBatches", () => {
|
||||
let runGeminiEmbeddingBatches: typeof import("./batch-gemini.js").runGeminiEmbeddingBatches;
|
||||
|
||||
beforeAll(async () => {
|
||||
({ runGeminiEmbeddingBatches } = await import("./batch-gemini.js"));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.resetAllMocks();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
const mockClient: GeminiEmbeddingClient = {
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
headers: {},
|
||||
model: "gemini-embedding-2-preview",
|
||||
modelPath: "models/gemini-embedding-2-preview",
|
||||
apiKeys: ["test-key"],
|
||||
outputDimensionality: 1536,
|
||||
};
|
||||
|
||||
it("includes outputDimensionality in batch upload requests", async () => {
|
||||
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const url =
|
||||
typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
|
||||
if (url.includes("/upload/v1beta/files?uploadType=multipart")) {
|
||||
const body = init?.body;
|
||||
if (!(body instanceof Blob)) {
|
||||
throw new Error("expected multipart blob body");
|
||||
}
|
||||
const text = await body.text();
|
||||
expect(text).toContain('"taskType":"RETRIEVAL_DOCUMENT"');
|
||||
expect(text).toContain('"outputDimensionality":1536');
|
||||
return new Response(JSON.stringify({ name: "files/file-123" }), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
});
|
||||
}
|
||||
if (url.endsWith(":asyncBatchEmbedContent")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
name: "batches/batch-1",
|
||||
state: "COMPLETED",
|
||||
outputConfig: { file: "files/output-1" },
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
);
|
||||
}
|
||||
if (url.endsWith("/files/output-1:download")) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
key: "req-1",
|
||||
response: { embedding: { values: [0.1, 0.2, 0.3] } },
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/jsonl" },
|
||||
},
|
||||
);
|
||||
}
|
||||
throw new Error(`unexpected fetch ${url}`);
|
||||
});
|
||||
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
|
||||
const results = await runGeminiEmbeddingBatches({
|
||||
gemini: mockClient,
|
||||
agentId: "main",
|
||||
requests: [
|
||||
{
|
||||
custom_id: "req-1",
|
||||
content: { parts: [{ text: "hello world" }] },
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: 1536,
|
||||
},
|
||||
],
|
||||
wait: true,
|
||||
pollIntervalMs: 1,
|
||||
timeoutMs: 1000,
|
||||
concurrency: 1,
|
||||
});
|
||||
|
||||
expect(results.get("req-1")).toEqual([0.1, 0.2, 0.3]);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
});
|
||||
@@ -13,6 +13,7 @@ export type GeminiBatchRequest = {
|
||||
custom_id: string;
|
||||
content: { parts: Array<{ text: string }> };
|
||||
taskType: "RETRIEVAL_DOCUMENT" | "RETRIEVAL_QUERY";
|
||||
outputDimensionality?: number;
|
||||
};
|
||||
|
||||
export type GeminiBatchStatus = {
|
||||
@@ -84,7 +85,10 @@ async function submitGeminiBatch(params: {
|
||||
key: request.custom_id,
|
||||
request: {
|
||||
content: request.content,
|
||||
task_type: request.taskType,
|
||||
taskType: request.taskType,
|
||||
...(typeof request.outputDimensionality === "number"
|
||||
? { outputDimensionality: request.outputDimensionality }
|
||||
: {}),
|
||||
},
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -17,6 +17,7 @@ export type GeminiEmbeddingClient = {
|
||||
model: string;
|
||||
modelPath: string;
|
||||
apiKeys: string[];
|
||||
outputDimensionality?: number;
|
||||
};
|
||||
|
||||
const DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
|
||||
@@ -151,10 +152,7 @@ export async function createGeminiEmbeddingProvider(
|
||||
const embedUrl = `${baseUrl}/${client.modelPath}:embedContent`;
|
||||
const batchUrl = `${baseUrl}/${client.modelPath}:batchEmbedContents`;
|
||||
const isV2 = isGeminiEmbedding2Model(client.model);
|
||||
const outputDimensionality = resolveGeminiOutputDimensionality(
|
||||
client.model,
|
||||
options.outputDimensionality,
|
||||
);
|
||||
const outputDimensionality = client.outputDimensionality;
|
||||
|
||||
const fetchWithGeminiAuth = async (apiKey: string, endpoint: string, body: unknown) => {
|
||||
const authHeaders = parseGeminiAuth(apiKey);
|
||||
@@ -272,13 +270,18 @@ export async function resolveGeminiEmbeddingClient(
|
||||
});
|
||||
const model = normalizeGeminiModel(options.model);
|
||||
const modelPath = buildGeminiModelPath(model);
|
||||
const outputDimensionality = resolveGeminiOutputDimensionality(
|
||||
model,
|
||||
options.outputDimensionality,
|
||||
);
|
||||
debugEmbeddingsLog("memory embeddings: gemini client", {
|
||||
rawBaseUrl,
|
||||
baseUrl,
|
||||
model,
|
||||
modelPath,
|
||||
outputDimensionality,
|
||||
embedEndpoint: `${baseUrl}/${modelPath}:embedContent`,
|
||||
batchEndpoint: `${baseUrl}/${modelPath}:batchEmbedContents`,
|
||||
});
|
||||
return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys };
|
||||
return { baseUrl, headers, ssrfPolicy, model, modelPath, apiKeys, outputDimensionality };
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
import "./test-runtime-mocks.js";
|
||||
|
||||
let embedBatchCalls = 0;
|
||||
let providerCalls: Array<{ provider?: string; model?: string; outputDimensionality?: number }> = [];
|
||||
|
||||
vi.mock("./embeddings.js", () => {
|
||||
const embedText = (text: string) => {
|
||||
@@ -15,18 +16,43 @@ vi.mock("./embeddings.js", () => {
|
||||
return [alpha, beta];
|
||||
};
|
||||
return {
|
||||
createEmbeddingProvider: async (options: { model?: string }) => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
id: "mock",
|
||||
model: options.model ?? "mock-embed",
|
||||
embedQuery: async (text: string) => embedText(text),
|
||||
embedBatch: async (texts: string[]) => {
|
||||
embedBatchCalls += 1;
|
||||
return texts.map(embedText);
|
||||
createEmbeddingProvider: async (options: {
|
||||
provider?: string;
|
||||
model?: string;
|
||||
outputDimensionality?: number;
|
||||
}) => {
|
||||
providerCalls.push({
|
||||
provider: options.provider,
|
||||
model: options.model,
|
||||
outputDimensionality: options.outputDimensionality,
|
||||
});
|
||||
const providerId = options.provider === "gemini" ? "gemini" : "mock";
|
||||
const model = options.model ?? "mock-embed";
|
||||
return {
|
||||
requestedProvider: options.provider ?? "openai",
|
||||
provider: {
|
||||
id: providerId,
|
||||
model,
|
||||
embedQuery: async (text: string) => embedText(text),
|
||||
embedBatch: async (texts: string[]) => {
|
||||
embedBatchCalls += 1;
|
||||
return texts.map(embedText);
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
...(providerId === "gemini"
|
||||
? {
|
||||
gemini: {
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
headers: {},
|
||||
model,
|
||||
modelPath: `models/${model}`,
|
||||
apiKeys: ["test-key"],
|
||||
outputDimensionality: options.outputDimensionality,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
@@ -93,6 +119,7 @@ describe("memory index", () => {
|
||||
// Keep atomic reindex tests on the safe path.
|
||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
|
||||
embedBatchCalls = 0;
|
||||
providerCalls = [];
|
||||
|
||||
// Keep the workspace stable to allow manager reuse across tests.
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
@@ -119,7 +146,9 @@ describe("memory index", () => {
|
||||
extraPaths?: string[];
|
||||
sources?: Array<"memory" | "sessions">;
|
||||
sessionMemory?: boolean;
|
||||
provider?: "openai" | "gemini";
|
||||
model?: string;
|
||||
outputDimensionality?: number;
|
||||
vectorEnabled?: boolean;
|
||||
cacheEnabled?: boolean;
|
||||
minScore?: number;
|
||||
@@ -130,8 +159,9 @@ describe("memory index", () => {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
provider: params.provider ?? "openai",
|
||||
model: params.model ?? "mock-embed",
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
store: { path: params.storePath, vector: { enabled: params.vectorEnabled ?? false } },
|
||||
// Perf: keep test indexes to a single chunk to reduce sqlite work.
|
||||
chunking: { tokens: 4000, overlap: 0 },
|
||||
@@ -342,6 +372,67 @@ describe("memory index", () => {
|
||||
await secondManager.close?.();
|
||||
});
|
||||
|
||||
it("passes Gemini outputDimensionality from config into the provider", async () => {
|
||||
const cfg = createCfg({
|
||||
storePath: indexMainPath,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
outputDimensionality: 1536,
|
||||
});
|
||||
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
const manager = requireManager(result);
|
||||
|
||||
expect(
|
||||
providerCalls.some(
|
||||
(call) =>
|
||||
call.provider === "gemini" &&
|
||||
call.model === "gemini-embedding-2-preview" &&
|
||||
call.outputDimensionality === 1536,
|
||||
),
|
||||
).toBe(true);
|
||||
await manager.close?.();
|
||||
});
|
||||
|
||||
it("reindexes when Gemini outputDimensionality changes", async () => {
|
||||
const base = createCfg({
|
||||
storePath: indexModelPath,
|
||||
provider: "gemini",
|
||||
model: "gemini-embedding-2-preview",
|
||||
outputDimensionality: 3072,
|
||||
});
|
||||
const baseAgents = base.agents!;
|
||||
const baseDefaults = baseAgents.defaults!;
|
||||
const baseMemorySearch = baseDefaults.memorySearch!;
|
||||
|
||||
const first = await getMemorySearchManager({ cfg: base, agentId: "main" });
|
||||
const firstManager = requireManager(first);
|
||||
await firstManager.sync?.({ reason: "test" });
|
||||
const callsAfterFirstSync = embedBatchCalls;
|
||||
await firstManager.close?.();
|
||||
|
||||
const second = await getMemorySearchManager({
|
||||
cfg: {
|
||||
...base,
|
||||
agents: {
|
||||
...baseAgents,
|
||||
defaults: {
|
||||
...baseDefaults,
|
||||
memorySearch: {
|
||||
...baseMemorySearch,
|
||||
outputDimensionality: 768,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
agentId: "main",
|
||||
});
|
||||
const secondManager = requireManager(second);
|
||||
await secondManager.sync?.({ reason: "test" });
|
||||
expect(embedBatchCalls).toBeGreaterThan(callsAfterFirstSync);
|
||||
await secondManager.close?.();
|
||||
});
|
||||
|
||||
it("reuses cached embeddings on forced reindex", async () => {
|
||||
const cfg = createCfg({ storePath: indexMainPath, cacheEnabled: true });
|
||||
const manager = await getPersistentManager(cfg);
|
||||
|
||||
@@ -236,6 +236,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
provider: "gemini",
|
||||
baseUrl: this.gemini.baseUrl,
|
||||
model: this.gemini.model,
|
||||
outputDimensionality: this.gemini.outputDimensionality,
|
||||
headers: entries,
|
||||
}),
|
||||
);
|
||||
@@ -483,6 +484,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
buildRequest: (chunk) => ({
|
||||
content: { parts: [{ text: chunk.text }] },
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: this.gemini?.outputDimensionality,
|
||||
}),
|
||||
runBatch: async (runnerOptions) =>
|
||||
await runGeminiEmbeddingBatches({
|
||||
|
||||
@@ -996,6 +996,7 @@ export abstract class MemoryManagerSyncOps {
|
||||
provider: fallback,
|
||||
remote: this.settings.remote,
|
||||
model: fallbackModel,
|
||||
outputDimensionality: this.settings.outputDimensionality,
|
||||
fallback: "none",
|
||||
local: this.settings.local,
|
||||
});
|
||||
|
||||
@@ -157,6 +157,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
provider: settings.provider,
|
||||
remote: settings.remote,
|
||||
model: settings.model,
|
||||
outputDimensionality: settings.outputDimensionality,
|
||||
fallback: settings.fallback,
|
||||
local: settings.local,
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user