mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-13 11:00:50 +00:00
Merged via squash.
Prepared head SHA: c57b1f8ba2
Co-authored-by: BillChirico <13951316+BillChirico@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
42 lines
1.4 KiB
TypeScript
42 lines
1.4 KiB
TypeScript
import type { EmbeddingProvider } from "./embeddings.js";
|
|
|
|
const DEFAULT_EMBEDDING_MAX_INPUT_TOKENS = 8192;
|
|
const DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS = 2048;
|
|
|
|
const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record<string, number> = {
|
|
"openai:text-embedding-3-small": 8192,
|
|
"openai:text-embedding-3-large": 8192,
|
|
"openai:text-embedding-ada-002": 8191,
|
|
"gemini:text-embedding-004": 2048,
|
|
"gemini:gemini-embedding-001": 2048,
|
|
"gemini:gemini-embedding-2-preview": 8192,
|
|
"voyage:voyage-3": 32000,
|
|
"voyage:voyage-3-lite": 16000,
|
|
"voyage:voyage-code-3": 32000,
|
|
};
|
|
|
|
export function resolveEmbeddingMaxInputTokens(provider: EmbeddingProvider): number {
|
|
if (typeof provider.maxInputTokens === "number") {
|
|
return provider.maxInputTokens;
|
|
}
|
|
|
|
// Provider/model mapping is best-effort; different providers use different
|
|
// limits and we prefer to be conservative when we don't know.
|
|
const key = `${provider.id}:${provider.model}`.toLowerCase();
|
|
const known = KNOWN_EMBEDDING_MAX_INPUT_TOKENS[key];
|
|
if (typeof known === "number") {
|
|
return known;
|
|
}
|
|
|
|
// Provider-specific conservative fallbacks. This prevents us from accidentally
|
|
// using the OpenAI default for providers with much smaller limits.
|
|
if (provider.id.toLowerCase() === "gemini") {
|
|
return 2048;
|
|
}
|
|
if (provider.id.toLowerCase() === "local") {
|
|
return DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS;
|
|
}
|
|
|
|
return DEFAULT_EMBEDDING_MAX_INPUT_TOKENS;
|
|
}
|