mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-13 19:10:39 +00:00
36 lines
1.1 KiB
TypeScript
36 lines
1.1 KiB
TypeScript
import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
|
|
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
|
|
import type { EmbeddingProvider } from "./embeddings.js";
|
|
import { hashText, type MemoryChunk } from "./internal.js";
|
|
|
|
export function enforceEmbeddingMaxInputTokens(
|
|
provider: EmbeddingProvider,
|
|
chunks: MemoryChunk[],
|
|
hardMaxInputTokens?: number,
|
|
): MemoryChunk[] {
|
|
const providerMaxInputTokens = resolveEmbeddingMaxInputTokens(provider);
|
|
const maxInputTokens =
|
|
typeof hardMaxInputTokens === "number" && hardMaxInputTokens > 0
|
|
? Math.min(providerMaxInputTokens, hardMaxInputTokens)
|
|
: providerMaxInputTokens;
|
|
const out: MemoryChunk[] = [];
|
|
|
|
for (const chunk of chunks) {
|
|
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
|
|
out.push(chunk);
|
|
continue;
|
|
}
|
|
|
|
for (const text of splitTextToUtf8ByteLimit(chunk.text, maxInputTokens)) {
|
|
out.push({
|
|
startLine: chunk.startLine,
|
|
endLine: chunk.endLine,
|
|
text,
|
|
hash: hashText(text),
|
|
});
|
|
}
|
|
}
|
|
|
|
return out;
|
|
}
|