mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:40:42 +00:00
test: isolate gemini embedding request helpers
This commit is contained in:
121
packages/memory-host-sdk/src/host/embeddings-gemini-request.ts
Normal file
121
packages/memory-host-sdk/src/host/embeddings-gemini-request.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
|
||||
export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
||||
|
||||
export const GEMINI_EMBEDDING_2_MODELS = new Set([
|
||||
"gemini-embedding-2-preview",
|
||||
// Add the GA model name here once released.
|
||||
]);
|
||||
|
||||
const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072;
|
||||
const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const;
|
||||
|
||||
export type GeminiTaskType =
|
||||
| "RETRIEVAL_QUERY"
|
||||
| "RETRIEVAL_DOCUMENT"
|
||||
| "SEMANTIC_SIMILARITY"
|
||||
| "CLASSIFICATION"
|
||||
| "CLUSTERING"
|
||||
| "QUESTION_ANSWERING"
|
||||
| "FACT_VERIFICATION";
|
||||
|
||||
export type GeminiTextPart = { text: string };
|
||||
export type GeminiInlinePart = {
|
||||
inlineData: { mimeType: string; data: string };
|
||||
};
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||
export type GeminiEmbeddingRequest = {
|
||||
content: { parts: GeminiPart[] };
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
model?: string;
|
||||
};
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
text: string;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiTextEmbeddingRequest {
|
||||
return buildGeminiEmbeddingRequest({
|
||||
input: { text: params.text },
|
||||
taskType: params.taskType,
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
modelPath: params.modelPath,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildGeminiEmbeddingRequest(params: {
|
||||
input: EmbeddingInput;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiEmbeddingRequest {
|
||||
const request: GeminiEmbeddingRequest = {
|
||||
content: {
|
||||
parts: params.input.parts?.map((part) =>
|
||||
part.type === "text"
|
||||
? ({ text: part.text } satisfies GeminiTextPart)
|
||||
: ({
|
||||
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||
} satisfies GeminiInlinePart),
|
||||
) ?? [{ text: params.input.text }],
|
||||
},
|
||||
taskType: params.taskType,
|
||||
};
|
||||
if (params.modelPath) {
|
||||
request.model = params.modelPath;
|
||||
}
|
||||
if (params.outputDimensionality != null) {
|
||||
request.outputDimensionality = params.outputDimensionality;
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given model name is a gemini-embedding-2 variant that
|
||||
* supports `outputDimensionality` and extended task types.
|
||||
*/
|
||||
export function isGeminiEmbedding2Model(model: string): boolean {
|
||||
return GEMINI_EMBEDDING_2_MODELS.has(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and return the `outputDimensionality` for gemini-embedding-2 models.
|
||||
* Returns `undefined` for older models (they don't support the param).
|
||||
*/
|
||||
export function resolveGeminiOutputDimensionality(
|
||||
model: string,
|
||||
requested?: number,
|
||||
): number | undefined {
|
||||
if (!isGeminiEmbedding2Model(model)) {
|
||||
return undefined;
|
||||
}
|
||||
if (requested == null) {
|
||||
return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS;
|
||||
}
|
||||
const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS;
|
||||
if (!valid.includes(requested)) {
|
||||
throw new Error(
|
||||
`Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`,
|
||||
);
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
|
||||
export function normalizeGeminiModel(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
}
|
||||
const withoutPrefix = trimmed.replace(/^models\//, "");
|
||||
if (withoutPrefix.startsWith("gemini/")) {
|
||||
return withoutPrefix.slice("gemini/".length);
|
||||
}
|
||||
if (withoutPrefix.startsWith("google/")) {
|
||||
return withoutPrefix.slice("google/".length);
|
||||
}
|
||||
return withoutPrefix;
|
||||
}
|
||||
@@ -1,133 +1,13 @@
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import * as authModule from "../../../../src/agents/model-auth.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildGeminiEmbeddingRequest,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
} from "./embeddings-gemini-request.js";
|
||||
|
||||
vi.mock("../../../../src/infra/net/fetch-guard.js", () => ({
|
||||
fetchWithSsrFGuard: async (params: {
|
||||
url: string;
|
||||
init?: RequestInit;
|
||||
fetchImpl?: typeof fetch;
|
||||
}) => {
|
||||
const fetchImpl = params.fetchImpl ?? globalThis.fetch;
|
||||
if (!fetchImpl) {
|
||||
throw new Error("fetch is not available");
|
||||
}
|
||||
const response = await fetchImpl(params.url, params.init);
|
||||
return {
|
||||
response,
|
||||
finalUrl: params.url,
|
||||
release: async () => {},
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
const { resolveApiKeyForProviderMock } = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("../../../../src/agents/model-auth.js", () => {
|
||||
return {
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
requireApiKey: (auth: { apiKey?: string; mode?: string }, provider: string) => {
|
||||
if (auth.apiKey) {
|
||||
return auth.apiKey;
|
||||
}
|
||||
throw new Error(`No API key resolved for provider "${provider}" (auth mode: ${auth.mode}).`);
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const createGeminiFetchMock = (embeddingValues = [1, 2, 3]) =>
|
||||
vi.fn(async (_input?: unknown, _init?: unknown) => ({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ embedding: { values: embeddingValues } }),
|
||||
}));
|
||||
|
||||
const createGeminiBatchFetchMock = (count: number, embeddingValues = [1, 2, 3]) =>
|
||||
vi.fn(async (_input?: unknown, _init?: unknown) => ({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({
|
||||
embeddings: Array.from({ length: count }, () => ({ values: embeddingValues })),
|
||||
}),
|
||||
}));
|
||||
|
||||
function installFetchMock(fetchMock: typeof globalThis.fetch) {
|
||||
vi.stubGlobal("fetch", fetchMock);
|
||||
}
|
||||
|
||||
function parseFetchBody(fetchMock: { mock: { calls: unknown[][] } }, callIndex = 0) {
|
||||
const init = fetchMock.mock.calls[callIndex]?.[1] as RequestInit | undefined;
|
||||
return JSON.parse((init?.body as string) ?? "{}") as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function magnitude(values: number[]) {
|
||||
return Math.sqrt(values.reduce((sum, value) => sum + value * value, 0));
|
||||
}
|
||||
|
||||
let buildGeminiEmbeddingRequest: typeof import("./embeddings-gemini.js").buildGeminiEmbeddingRequest;
|
||||
let createGeminiEmbeddingProvider: typeof import("./embeddings-gemini.js").createGeminiEmbeddingProvider;
|
||||
let DEFAULT_GEMINI_EMBEDDING_MODEL: typeof import("./embeddings-gemini.js").DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
let normalizeGeminiModel: typeof import("./embeddings-gemini.js").normalizeGeminiModel;
|
||||
let resolveGeminiOutputDimensionality: typeof import("./embeddings-gemini.js").resolveGeminiOutputDimensionality;
|
||||
|
||||
beforeAll(async () => {
|
||||
vi.doUnmock("undici");
|
||||
({
|
||||
buildGeminiEmbeddingRequest,
|
||||
createGeminiEmbeddingProvider,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
} = await import("./embeddings-gemini.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.doUnmock("undici");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.doUnmock("undici");
|
||||
vi.resetAllMocks();
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
function mockResolvedProviderKey(apiKey = "test-key") {
|
||||
vi.mocked(authModule.resolveApiKeyForProvider).mockResolvedValue({
|
||||
apiKey,
|
||||
mode: "api-key",
|
||||
source: "test",
|
||||
});
|
||||
}
|
||||
|
||||
type GeminiFetchMock =
|
||||
| ReturnType<typeof createGeminiFetchMock>
|
||||
| ReturnType<typeof createGeminiBatchFetchMock>;
|
||||
|
||||
async function createProviderWithFetch(
|
||||
fetchMock: GeminiFetchMock,
|
||||
options: Partial<Parameters<typeof createGeminiEmbeddingProvider>[0]> & { model: string },
|
||||
) {
|
||||
installFetchMock(fetchMock as unknown as typeof globalThis.fetch);
|
||||
mockResolvedProviderKey();
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
fallback: "none",
|
||||
...options,
|
||||
});
|
||||
return provider;
|
||||
}
|
||||
|
||||
function expectNormalizedThreeFourVector(embedding: number[]) {
|
||||
expect(embedding[0]).toBeCloseTo(0.6, 5);
|
||||
expect(embedding[1]).toBeCloseTo(0.8, 5);
|
||||
expect(magnitude(embedding)).toBeCloseTo(1, 5);
|
||||
}
|
||||
|
||||
describe("package Gemini embedding provider smoke", () => {
|
||||
it("builds multimodal v2 requests and resolves dimensions", () => {
|
||||
describe("package Gemini embedding request helpers", () => {
|
||||
it("builds multimodal v2 requests and resolves model settings", () => {
|
||||
expect(
|
||||
buildGeminiEmbeddingRequest({
|
||||
input: {
|
||||
@@ -158,57 +38,6 @@ describe("package Gemini embedding provider smoke", () => {
|
||||
expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 512)).toThrow(
|
||||
/Invalid outputDimensionality 512/,
|
||||
);
|
||||
});
|
||||
|
||||
it("handles legacy and v2 request/response behavior", async () => {
|
||||
const legacyFetch = createGeminiBatchFetchMock(2);
|
||||
const legacyProvider = await createProviderWithFetch(legacyFetch, {
|
||||
model: "gemini-embedding-001",
|
||||
});
|
||||
|
||||
await legacyProvider.embedQuery("test query");
|
||||
await legacyProvider.embedBatch(["text1", "text2"]);
|
||||
|
||||
expect(parseFetchBody(legacyFetch, 0)).toMatchObject({
|
||||
taskType: "RETRIEVAL_QUERY",
|
||||
content: { parts: [{ text: "test query" }] },
|
||||
});
|
||||
expect(parseFetchBody(legacyFetch, 0)).not.toHaveProperty("outputDimensionality");
|
||||
expect(parseFetchBody(legacyFetch, 1)).not.toHaveProperty("outputDimensionality");
|
||||
|
||||
const v2QueryFetch = createGeminiFetchMock([3, 4]);
|
||||
const v2QueryProvider = await createProviderWithFetch(v2QueryFetch, {
|
||||
model: "gemini-embedding-2-preview",
|
||||
outputDimensionality: 768,
|
||||
taskType: "SEMANTIC_SIMILARITY",
|
||||
});
|
||||
await expect(v2QueryProvider.embedQuery(" ")).resolves.toEqual([]);
|
||||
await expect(v2QueryProvider.embedBatch([])).resolves.toEqual([]);
|
||||
expectNormalizedThreeFourVector(await v2QueryProvider.embedQuery("test query"));
|
||||
|
||||
const v2BatchFetch = createGeminiBatchFetchMock(2, [3, 4]);
|
||||
const v2BatchProvider = await createProviderWithFetch(v2BatchFetch, {
|
||||
model: "gemini-embedding-2-preview",
|
||||
outputDimensionality: 768,
|
||||
taskType: "SEMANTIC_SIMILARITY",
|
||||
});
|
||||
const batch = await v2BatchProvider.embedBatch(["text1", "text2"]);
|
||||
expect(batch).toHaveLength(2);
|
||||
for (const embedding of batch) {
|
||||
expectNormalizedThreeFourVector(embedding);
|
||||
}
|
||||
|
||||
expect(parseFetchBody(v2QueryFetch)).toMatchObject({
|
||||
outputDimensionality: 768,
|
||||
taskType: "SEMANTIC_SIMILARITY",
|
||||
});
|
||||
expect(parseFetchBody(v2BatchFetch).requests).toEqual([
|
||||
expect.objectContaining({ outputDimensionality: 768 }),
|
||||
expect.objectContaining({ outputDimensionality: 768 }),
|
||||
]);
|
||||
});
|
||||
|
||||
it("normalizes known model prefixes and the default model", () => {
|
||||
expect(normalizeGeminiModel("models/gemini-embedding-2-preview")).toBe(
|
||||
"gemini-embedding-2-preview",
|
||||
);
|
||||
|
||||
@@ -12,10 +12,33 @@ import type { SsrFPolicy } from "../../../../src/infra/net/ssrf.js";
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
||||
import {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
isGeminiEmbedding2Model,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
} from "./embeddings-gemini-request.js";
|
||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.js";
|
||||
import { buildRemoteBaseUrlPolicy, withRemoteHttpResponse } from "./remote-http.js";
|
||||
import { resolveMemorySecretInputString } from "./secret-input.js";
|
||||
|
||||
export {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
GEMINI_EMBEDDING_2_MODELS,
|
||||
isGeminiEmbedding2Model,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
type GeminiEmbeddingRequest,
|
||||
type GeminiInlinePart,
|
||||
type GeminiPart,
|
||||
type GeminiTaskType,
|
||||
type GeminiTextEmbeddingRequest,
|
||||
type GeminiTextPart,
|
||||
} from "./embeddings-gemini-request.js";
|
||||
|
||||
export type GeminiEmbeddingClient = {
|
||||
baseUrl: string;
|
||||
headers: Record<string, string>;
|
||||
@@ -26,115 +49,9 @@ export type GeminiEmbeddingClient = {
|
||||
outputDimensionality?: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
||||
const GEMINI_MAX_INPUT_TOKENS: Record<string, number> = {
|
||||
"text-embedding-004": 2048,
|
||||
};
|
||||
|
||||
// --- gemini-embedding-2-preview support ---
|
||||
|
||||
export const GEMINI_EMBEDDING_2_MODELS = new Set([
|
||||
"gemini-embedding-2-preview",
|
||||
// Add the GA model name here once released.
|
||||
]);
|
||||
|
||||
const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072;
|
||||
const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const;
|
||||
|
||||
export type GeminiTaskType =
|
||||
| "RETRIEVAL_QUERY"
|
||||
| "RETRIEVAL_DOCUMENT"
|
||||
| "SEMANTIC_SIMILARITY"
|
||||
| "CLASSIFICATION"
|
||||
| "CLUSTERING"
|
||||
| "QUESTION_ANSWERING"
|
||||
| "FACT_VERIFICATION";
|
||||
|
||||
export type GeminiTextPart = { text: string };
|
||||
export type GeminiInlinePart = {
|
||||
inlineData: { mimeType: string; data: string };
|
||||
};
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||
export type GeminiEmbeddingRequest = {
|
||||
content: { parts: GeminiPart[] };
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
model?: string;
|
||||
};
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
text: string;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiTextEmbeddingRequest {
|
||||
return buildGeminiEmbeddingRequest({
|
||||
input: { text: params.text },
|
||||
taskType: params.taskType,
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
modelPath: params.modelPath,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildGeminiEmbeddingRequest(params: {
|
||||
input: EmbeddingInput;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiEmbeddingRequest {
|
||||
const request: GeminiEmbeddingRequest = {
|
||||
content: {
|
||||
parts: params.input.parts?.map((part) =>
|
||||
part.type === "text"
|
||||
? ({ text: part.text } satisfies GeminiTextPart)
|
||||
: ({
|
||||
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||
} satisfies GeminiInlinePart),
|
||||
) ?? [{ text: params.input.text }],
|
||||
},
|
||||
taskType: params.taskType,
|
||||
};
|
||||
if (params.modelPath) {
|
||||
request.model = params.modelPath;
|
||||
}
|
||||
if (params.outputDimensionality != null) {
|
||||
request.outputDimensionality = params.outputDimensionality;
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given model name is a gemini-embedding-2 variant that
|
||||
* supports `outputDimensionality` and extended task types.
|
||||
*/
|
||||
export function isGeminiEmbedding2Model(model: string): boolean {
|
||||
return GEMINI_EMBEDDING_2_MODELS.has(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and return the `outputDimensionality` for gemini-embedding-2 models.
|
||||
* Returns `undefined` for older models (they don't support the param).
|
||||
*/
|
||||
export function resolveGeminiOutputDimensionality(
|
||||
model: string,
|
||||
requested?: number,
|
||||
): number | undefined {
|
||||
if (!isGeminiEmbedding2Model(model)) {
|
||||
return undefined;
|
||||
}
|
||||
if (requested == null) {
|
||||
return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS;
|
||||
}
|
||||
const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS;
|
||||
if (!valid.includes(requested)) {
|
||||
throw new Error(
|
||||
`Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`,
|
||||
);
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
||||
const trimmed = resolveMemorySecretInputString({
|
||||
value: remoteApiKey,
|
||||
@@ -149,21 +66,6 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function normalizeGeminiModel(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
}
|
||||
const withoutPrefix = trimmed.replace(/^models\//, "");
|
||||
if (withoutPrefix.startsWith("gemini/")) {
|
||||
return withoutPrefix.slice("gemini/".length);
|
||||
}
|
||||
if (withoutPrefix.startsWith("google/")) {
|
||||
return withoutPrefix.slice("google/".length);
|
||||
}
|
||||
return withoutPrefix;
|
||||
}
|
||||
|
||||
async function fetchGeminiEmbeddingPayload(params: {
|
||||
client: GeminiEmbeddingClient;
|
||||
endpoint: string;
|
||||
|
||||
115
src/memory-host-sdk/host/embeddings-gemini-request.ts
Normal file
115
src/memory-host-sdk/host/embeddings-gemini-request.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
import type { GeminiTaskType } from "./embeddings.types.js";
|
||||
|
||||
export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
||||
|
||||
export const GEMINI_EMBEDDING_2_MODELS = new Set([
|
||||
"gemini-embedding-2-preview",
|
||||
// Add the GA model name here once released.
|
||||
]);
|
||||
|
||||
const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072;
|
||||
const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const;
|
||||
|
||||
export type { GeminiTaskType } from "./embeddings.types.js";
|
||||
|
||||
export type GeminiTextPart = { text: string };
|
||||
export type GeminiInlinePart = {
|
||||
inlineData: { mimeType: string; data: string };
|
||||
};
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||
export type GeminiEmbeddingRequest = {
|
||||
content: { parts: GeminiPart[] };
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
model?: string;
|
||||
};
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
text: string;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiTextEmbeddingRequest {
|
||||
return buildGeminiEmbeddingRequest({
|
||||
input: { text: params.text },
|
||||
taskType: params.taskType,
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
modelPath: params.modelPath,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildGeminiEmbeddingRequest(params: {
|
||||
input: EmbeddingInput;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiEmbeddingRequest {
|
||||
const request: GeminiEmbeddingRequest = {
|
||||
content: {
|
||||
parts: params.input.parts?.map((part) =>
|
||||
part.type === "text"
|
||||
? ({ text: part.text } satisfies GeminiTextPart)
|
||||
: ({
|
||||
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||
} satisfies GeminiInlinePart),
|
||||
) ?? [{ text: params.input.text }],
|
||||
},
|
||||
taskType: params.taskType,
|
||||
};
|
||||
if (params.modelPath) {
|
||||
request.model = params.modelPath;
|
||||
}
|
||||
if (params.outputDimensionality != null) {
|
||||
request.outputDimensionality = params.outputDimensionality;
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given model name is a gemini-embedding-2 variant that
|
||||
* supports `outputDimensionality` and extended task types.
|
||||
*/
|
||||
export function isGeminiEmbedding2Model(model: string): boolean {
|
||||
return GEMINI_EMBEDDING_2_MODELS.has(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and return the `outputDimensionality` for gemini-embedding-2 models.
|
||||
* Returns `undefined` for older models (they don't support the param).
|
||||
*/
|
||||
export function resolveGeminiOutputDimensionality(
|
||||
model: string,
|
||||
requested?: number,
|
||||
): number | undefined {
|
||||
if (!isGeminiEmbedding2Model(model)) {
|
||||
return undefined;
|
||||
}
|
||||
if (requested == null) {
|
||||
return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS;
|
||||
}
|
||||
const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS;
|
||||
if (!valid.includes(requested)) {
|
||||
throw new Error(
|
||||
`Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`,
|
||||
);
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
|
||||
export function normalizeGeminiModel(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
}
|
||||
const withoutPrefix = trimmed.replace(/^models\//, "");
|
||||
if (withoutPrefix.startsWith("gemini/")) {
|
||||
return withoutPrefix.slice("gemini/".length);
|
||||
}
|
||||
if (withoutPrefix.startsWith("google/")) {
|
||||
return withoutPrefix.slice("google/".length);
|
||||
}
|
||||
return withoutPrefix;
|
||||
}
|
||||
@@ -3,13 +3,12 @@ import * as authModule from "../../agents/model-auth.js";
|
||||
import {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
createGeminiEmbeddingProvider,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
GEMINI_EMBEDDING_2_MODELS,
|
||||
isGeminiEmbedding2Model,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
} from "./embeddings-gemini.js";
|
||||
} from "./embeddings-gemini-request.js";
|
||||
import {
|
||||
createGeminiBatchFetchMock,
|
||||
createJsonResponseFetchMock,
|
||||
@@ -47,12 +46,17 @@ afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
type GeminiProviderOptions = Parameters<
|
||||
typeof import("./embeddings-gemini.js").createGeminiEmbeddingProvider
|
||||
>[0];
|
||||
|
||||
async function createProviderWithFetch(
|
||||
fetchMock: JsonFetchMock,
|
||||
options: Partial<Parameters<typeof createGeminiEmbeddingProvider>[0]> & { model: string },
|
||||
options: Partial<GeminiProviderOptions> & { model: string },
|
||||
) {
|
||||
installFetchMock(fetchMock as unknown as typeof globalThis.fetch);
|
||||
mockResolvedProviderKey(authModule.resolveApiKeyForProvider);
|
||||
const { createGeminiEmbeddingProvider } = await import("./embeddings-gemini.js");
|
||||
const { provider } = await createGeminiEmbeddingProvider({
|
||||
config: {} as never,
|
||||
provider: "gemini",
|
||||
@@ -63,7 +67,7 @@ async function createProviderWithFetch(
|
||||
}
|
||||
|
||||
describe("Gemini embedding request helpers", () => {
|
||||
it("builds text and multimodal requests", () => {
|
||||
it("builds requests and resolves model settings", () => {
|
||||
expect(
|
||||
buildGeminiTextEmbeddingRequest({
|
||||
text: "hello",
|
||||
@@ -101,16 +105,10 @@ describe("Gemini embedding request helpers", () => {
|
||||
taskType: "RETRIEVAL_DOCUMENT",
|
||||
outputDimensionality: 1536,
|
||||
});
|
||||
});
|
||||
|
||||
it("detects v2 model names", () => {
|
||||
expect(GEMINI_EMBEDDING_2_MODELS.has("gemini-embedding-2-preview")).toBe(true);
|
||||
expect(isGeminiEmbedding2Model("gemini-embedding-2-preview")).toBe(true);
|
||||
expect(isGeminiEmbedding2Model("gemini-embedding-001")).toBe(false);
|
||||
expect(isGeminiEmbedding2Model("text-embedding-004")).toBe(false);
|
||||
});
|
||||
|
||||
it("resolves v2 dimensions and rejects invalid values", () => {
|
||||
expect(resolveGeminiOutputDimensionality("gemini-embedding-001")).toBeUndefined();
|
||||
expect(resolveGeminiOutputDimensionality("text-embedding-004")).toBeUndefined();
|
||||
expect(resolveGeminiOutputDimensionality("gemini-embedding-2-preview")).toBe(3072);
|
||||
@@ -123,9 +121,6 @@ describe("Gemini embedding request helpers", () => {
|
||||
expect(() => resolveGeminiOutputDimensionality("gemini-embedding-2-preview", 1024)).toThrow(
|
||||
/Valid values: 768, 1536, 3072/,
|
||||
);
|
||||
});
|
||||
|
||||
it("normalizes known model prefixes and default model", () => {
|
||||
expect(normalizeGeminiModel("models/gemini-embedding-2-preview")).toBe(
|
||||
"gemini-embedding-2-preview",
|
||||
);
|
||||
|
||||
@@ -13,14 +13,33 @@ import { normalizeOptionalString } from "../../shared/string-coerce.js";
|
||||
import type { EmbeddingInput } from "./embedding-inputs.js";
|
||||
import { sanitizeAndNormalizeEmbedding } from "./embedding-vectors.js";
|
||||
import { debugEmbeddingsLog } from "./embeddings-debug.js";
|
||||
import type {
|
||||
EmbeddingProvider,
|
||||
EmbeddingProviderOptions,
|
||||
GeminiTaskType,
|
||||
} from "./embeddings.types.js";
|
||||
import {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
isGeminiEmbedding2Model,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
} from "./embeddings-gemini-request.js";
|
||||
import type { EmbeddingProvider, EmbeddingProviderOptions } from "./embeddings.types.js";
|
||||
import { buildRemoteBaseUrlPolicy, withRemoteHttpResponse } from "./remote-http.js";
|
||||
import { resolveMemorySecretInputString } from "./secret-input.js";
|
||||
|
||||
export {
|
||||
buildGeminiEmbeddingRequest,
|
||||
buildGeminiTextEmbeddingRequest,
|
||||
DEFAULT_GEMINI_EMBEDDING_MODEL,
|
||||
GEMINI_EMBEDDING_2_MODELS,
|
||||
isGeminiEmbedding2Model,
|
||||
normalizeGeminiModel,
|
||||
resolveGeminiOutputDimensionality,
|
||||
type GeminiEmbeddingRequest,
|
||||
type GeminiInlinePart,
|
||||
type GeminiPart,
|
||||
type GeminiTaskType,
|
||||
type GeminiTextEmbeddingRequest,
|
||||
type GeminiTextPart,
|
||||
} from "./embeddings-gemini-request.js";
|
||||
|
||||
export type GeminiEmbeddingClient = {
|
||||
baseUrl: string;
|
||||
headers: Record<string, string>;
|
||||
@@ -31,108 +50,9 @@ export type GeminiEmbeddingClient = {
|
||||
outputDimensionality?: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
||||
const GEMINI_MAX_INPUT_TOKENS: Record<string, number> = {
|
||||
"text-embedding-004": 2048,
|
||||
};
|
||||
|
||||
// --- gemini-embedding-2-preview support ---
|
||||
|
||||
export const GEMINI_EMBEDDING_2_MODELS = new Set([
|
||||
"gemini-embedding-2-preview",
|
||||
// Add the GA model name here once released.
|
||||
]);
|
||||
|
||||
const GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS = 3072;
|
||||
const GEMINI_EMBEDDING_2_VALID_DIMENSIONS = [768, 1536, 3072] as const;
|
||||
|
||||
export type { GeminiTaskType } from "./embeddings.types.js";
|
||||
|
||||
export type GeminiTextPart = { text: string };
|
||||
export type GeminiInlinePart = {
|
||||
inlineData: { mimeType: string; data: string };
|
||||
};
|
||||
export type GeminiPart = GeminiTextPart | GeminiInlinePart;
|
||||
export type GeminiEmbeddingRequest = {
|
||||
content: { parts: GeminiPart[] };
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
model?: string;
|
||||
};
|
||||
export type GeminiTextEmbeddingRequest = GeminiEmbeddingRequest;
|
||||
|
||||
/** Builds the text-only Gemini embedding request shape used across direct and batch APIs. */
|
||||
export function buildGeminiTextEmbeddingRequest(params: {
|
||||
text: string;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiTextEmbeddingRequest {
|
||||
return buildGeminiEmbeddingRequest({
|
||||
input: { text: params.text },
|
||||
taskType: params.taskType,
|
||||
outputDimensionality: params.outputDimensionality,
|
||||
modelPath: params.modelPath,
|
||||
});
|
||||
}
|
||||
|
||||
export function buildGeminiEmbeddingRequest(params: {
|
||||
input: EmbeddingInput;
|
||||
taskType: GeminiTaskType;
|
||||
outputDimensionality?: number;
|
||||
modelPath?: string;
|
||||
}): GeminiEmbeddingRequest {
|
||||
const request: GeminiEmbeddingRequest = {
|
||||
content: {
|
||||
parts: params.input.parts?.map((part) =>
|
||||
part.type === "text"
|
||||
? ({ text: part.text } satisfies GeminiTextPart)
|
||||
: ({
|
||||
inlineData: { mimeType: part.mimeType, data: part.data },
|
||||
} satisfies GeminiInlinePart),
|
||||
) ?? [{ text: params.input.text }],
|
||||
},
|
||||
taskType: params.taskType,
|
||||
};
|
||||
if (params.modelPath) {
|
||||
request.model = params.modelPath;
|
||||
}
|
||||
if (params.outputDimensionality != null) {
|
||||
request.outputDimensionality = params.outputDimensionality;
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given model name is a gemini-embedding-2 variant that
|
||||
* supports `outputDimensionality` and extended task types.
|
||||
*/
|
||||
export function isGeminiEmbedding2Model(model: string): boolean {
|
||||
return GEMINI_EMBEDDING_2_MODELS.has(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and return the `outputDimensionality` for gemini-embedding-2 models.
|
||||
* Returns `undefined` for older models (they don't support the param).
|
||||
*/
|
||||
export function resolveGeminiOutputDimensionality(
|
||||
model: string,
|
||||
requested?: number,
|
||||
): number | undefined {
|
||||
if (!isGeminiEmbedding2Model(model)) {
|
||||
return undefined;
|
||||
}
|
||||
if (requested == null) {
|
||||
return GEMINI_EMBEDDING_2_DEFAULT_DIMENSIONS;
|
||||
}
|
||||
const valid: readonly number[] = GEMINI_EMBEDDING_2_VALID_DIMENSIONS;
|
||||
if (!valid.includes(requested)) {
|
||||
throw new Error(
|
||||
`Invalid outputDimensionality ${requested} for ${model}. Valid values: ${valid.join(", ")}`,
|
||||
);
|
||||
}
|
||||
return requested;
|
||||
}
|
||||
function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
||||
const trimmed = resolveMemorySecretInputString({
|
||||
value: remoteApiKey,
|
||||
@@ -147,21 +67,6 @@ function resolveRemoteApiKey(remoteApiKey: unknown): string | undefined {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function normalizeGeminiModel(model: string): string {
|
||||
const trimmed = model.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
||||
}
|
||||
const withoutPrefix = trimmed.replace(/^models\//, "");
|
||||
if (withoutPrefix.startsWith("gemini/")) {
|
||||
return withoutPrefix.slice("gemini/".length);
|
||||
}
|
||||
if (withoutPrefix.startsWith("google/")) {
|
||||
return withoutPrefix.slice("google/".length);
|
||||
}
|
||||
return withoutPrefix;
|
||||
}
|
||||
|
||||
async function fetchGeminiEmbeddingPayload(params: {
|
||||
client: GeminiEmbeddingClient;
|
||||
endpoint: string;
|
||||
|
||||
Reference in New Issue
Block a user