diff --git a/CHANGELOG.md b/CHANGELOG.md index 97ee25c8baa..5d3b5d367eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,7 @@ Docs: https://docs.openclaw.ai - Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook. - Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement. - Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent. +- Memory/Embeddings: enforce a per-input 8k safety cap before embedding batching and apply a conservative 2k fallback limit for local providers without declared input limits, preventing oversized session/memory chunks from triggering provider context-size failures during sync/indexing. (#6016) Thanks @batumilove. - Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai. - Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn. - Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet. diff --git a/src/memory/embedding-chunk-limits.test.ts b/src/memory/embedding-chunk-limits.test.ts index 83c4a26d341..733f98fe7b2 100644 --- a/src/memory/embedding-chunk-limits.test.ts +++ b/src/memory/embedding-chunk-limits.test.ts @@ -13,6 +13,18 @@ function createProvider(maxInputTokens: number): EmbeddingProvider { }; } +function createProviderWithoutMaxInputTokens(params: { + id: string; + model: string; +}): EmbeddingProvider { + return { + id: params.id, + model: params.model, + embedQuery: async () => [0], + embedBatch: async () => [[0]], + }; +} + describe("embedding chunk limits", () => { it("splits oversized chunks so each embedding input stays <= maxInputTokens bytes", () => { const provider = createProvider(8192); @@ -49,4 +61,42 @@ describe("embedding chunk limits", () => { // If we split inside surrogate pairs we'd likely end up with replacement chars. expect(out.map((chunk) => chunk.text).join("")).not.toContain("\uFFFD"); }); + + it("uses conservative fallback limits for local providers without declared maxInputTokens", () => { + const provider = createProviderWithoutMaxInputTokens({ + id: "local", + model: "unknown-local-embedding", + }); + + const out = enforceEmbeddingMaxInputTokens(provider, [ + { + startLine: 1, + endLine: 1, + text: "x".repeat(3000), + hash: "ignored", + }, + ]); + + expect(out.length).toBeGreaterThan(1); + expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 2048)).toBe(true); + }); + + it("honors hard safety caps lower than provider maxInputTokens", () => { + const provider = createProvider(8192); + const out = enforceEmbeddingMaxInputTokens( + provider, + [ + { + startLine: 1, + endLine: 1, + text: "x".repeat(8100), + hash: "ignored", + }, + ], + 8000, + ); + + expect(out.length).toBeGreaterThan(1); + expect(out.every((chunk) => estimateUtf8Bytes(chunk.text) <= 8000)).toBe(true); + }); }); diff --git a/src/memory/embedding-chunk-limits.ts b/src/memory/embedding-chunk-limits.ts index 3f832855300..033b30a84a3 100644 --- a/src/memory/embedding-chunk-limits.ts +++ b/src/memory/embedding-chunk-limits.ts @@ -6,8 +6,13 @@ import { hashText, type MemoryChunk } from "./internal.js"; export function enforceEmbeddingMaxInputTokens( provider: EmbeddingProvider, chunks: MemoryChunk[], + hardMaxInputTokens?: number, ): MemoryChunk[] { - const maxInputTokens = resolveEmbeddingMaxInputTokens(provider); + const providerMaxInputTokens = resolveEmbeddingMaxInputTokens(provider); + const maxInputTokens = + typeof hardMaxInputTokens === "number" && hardMaxInputTokens > 0 + ? Math.min(providerMaxInputTokens, hardMaxInputTokens) + : providerMaxInputTokens; const out: MemoryChunk[] = []; for (const chunk of chunks) { diff --git a/src/memory/embedding-model-limits.ts b/src/memory/embedding-model-limits.ts index 0f6dad821eb..b9960009606 100644 --- a/src/memory/embedding-model-limits.ts +++ b/src/memory/embedding-model-limits.ts @@ -1,6 +1,7 @@ import type { EmbeddingProvider } from "./embeddings.js"; const DEFAULT_EMBEDDING_MAX_INPUT_TOKENS = 8192; +const DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS = 2048; const KNOWN_EMBEDDING_MAX_INPUT_TOKENS: Record = { "openai:text-embedding-3-small": 8192, @@ -30,6 +31,9 @@ export function resolveEmbeddingMaxInputTokens(provider: EmbeddingProvider): num if (provider.id.toLowerCase() === "gemini") { return 2048; } + if (provider.id.toLowerCase() === "local") { + return DEFAULT_LOCAL_EMBEDDING_MAX_INPUT_TOKENS; + } return DEFAULT_EMBEDDING_MAX_INPUT_TOKENS; } diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index 51e95b136ba..6da8b7ffa3b 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -709,6 +709,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { chunkMarkdown(content, this.settings.chunking).filter( (chunk) => chunk.text.trim().length > 0, ), + EMBEDDING_BATCH_MAX_TOKENS, ); if (options.source === "sessions" && "lineMap" in entry) { remapChunkLines(chunks, entry.lineMap); diff --git a/src/memory/manager.embedding-batches.test.ts b/src/memory/manager.embedding-batches.test.ts index 602f9120714..1326eca71eb 100644 --- a/src/memory/manager.embedding-batches.test.ts +++ b/src/memory/manager.embedding-batches.test.ts @@ -6,7 +6,7 @@ import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness const fx = installEmbeddingManagerFixture({ fixturePrefix: "openclaw-mem-", - largeTokens: 1250, + largeTokens: 4000, smallTokens: 200, createCfg: ({ workspaceDir, indexPath, tokens }) => ({ agents: { @@ -50,6 +50,10 @@ describe("memory embedding batches", () => { ); expect(totalTexts).toBe(status.chunks); expect(embedBatch.mock.calls.length).toBeGreaterThan(1); + const inputs: string[] = embedBatch.mock.calls.flatMap( + (call: unknown[]) => (call[0] as string[] | undefined) ?? [], + ); + expect(inputs.every((text) => Buffer.byteLength(text, "utf8") <= 8000)).toBe(true); expect(updates.length).toBeGreaterThan(0); expect(updates.some((update) => update.label?.includes("/"))).toBe(true); const last = updates[updates.length - 1];