diff --git a/CHANGELOG.md b/CHANGELOG.md index a3ba0a7711c..512aba182b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65. +- Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys. - Docs/tools: clarify that `tools.profile: "messaging"` is intentionally narrow and that `tools.profile: "full"` is the unrestricted baseline for broader command/control access. Carries forward #39954. Thanks @posigit. - Control UI/Agents: redact tool-call args, partial/final results, derived exec output, and configured custom secret patterns before streaming tool events to the Control UI, so tool output cannot expose provider or channel credentials. Fixes #72283. (#72319) Thanks @volcano303 and @BunsDev. - Agents/sessions: keep `sessions_history` recall redaction enabled even when general log redaction is disabled, and clarify that safety-boundary UI/tool/diagnostic payloads still redact independently of `logging.redactSensitive`. Carries forward #72319. Thanks @volcano303 and @BunsDev. diff --git a/docs/.generated/config-baseline.sha256 b/docs/.generated/config-baseline.sha256 index 446aed311f4..67cb932ea7a 100644 --- a/docs/.generated/config-baseline.sha256 +++ b/docs/.generated/config-baseline.sha256 @@ -1,4 +1,4 @@ -5ffabe5ff76d8e4a0d121e89f74f84917b919447e63bf12e0e5b0e4c0211d451 config-baseline.json -7dcb21e47ddd5de98e2af1ecbc41e11ac0c5742819c359e6d851fbc39c0226e9 config-baseline.core.json +0f57fb6d20b9d300c4325b227e49f17f04349b0f3c27dd218397fe7a3b5001dc config-baseline.json +9d1815981dc3f89d1dfdc72f0a4723d4fd5efca8e5b8a1a1cbf6a053c50c937d config-baseline.core.json c4f07c228d4f07e7afafa5b600b4a80f5b26aaed7267c7287a64d04a527be8e8 config-baseline.channel.json 6938050627f0d120109d2045b4300aa8b508b35132542db434033ed0fe3e2b3a config-baseline.plugin.json diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 0176c7d1001..1611abae507 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -885,7 +885,13 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s { agents: { defaults: { - memorySearch: { provider: "ollama" }, + memorySearch: { + provider: "ollama", + remote: { + // Default for Ollama. Raise on larger hosts if reindexing is too slow. + nonBatchConcurrency: 1, + }, + }, }, }, } @@ -899,10 +905,11 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s defaults: { memorySearch: { provider: "ollama", + model: "nomic-embed-text", remote: { baseUrl: "http://gpu-box.local:11434", - model: "nomic-embed-text", apiKey: "ollama-local", + nonBatchConcurrency: 2, }, }, }, diff --git a/docs/reference/memory-config.md b/docs/reference/memory-config.md index fc07ef55f2f..dda402699e9 100644 --- a/docs/reference/memory-config.md +++ b/docs/reference/memory-config.md @@ -386,6 +386,7 @@ Prevents re-embedding unchanged text during reindex or transcript updates. | Key | Type | Default | Description | | ----------------------------- | --------- | ------- | -------------------------- | +| `remote.nonBatchConcurrency` | `number` | `4` | Parallel inline embeddings | | `remote.batch.enabled` | `boolean` | `false` | Enable batch embedding API | | `remote.batch.concurrency` | `number` | `2` | Parallel batch jobs | | `remote.batch.wait` | `boolean` | `true` | Wait for batch completion | @@ -394,7 +395,9 @@ Prevents re-embedding unchanged text during reindex or transcript updates. Available for `openai`, `gemini`, and `voyage`. OpenAI batch is typically fastest and cheapest for large backfills. -This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active. +`remote.nonBatchConcurrency` controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active. Ollama defaults to `1` for non-batch indexing to avoid overwhelming smaller local hosts; set a higher value on larger machines. + +This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls the timeout for inline embedding calls. --- diff --git a/extensions/memory-core/src/memory/manager-embedding-ops.ts b/extensions/memory-core/src/memory/manager-embedding-ops.ts index 3382dab930f..ef783f95024 100644 --- a/extensions/memory-core/src/memory/manager-embedding-ops.ts +++ b/extensions/memory-core/src/memory/manager-embedding-ops.ts @@ -89,11 +89,17 @@ export function resolveEmbeddingTimeoutMs(params: { export function resolveMemoryIndexConcurrency(params: { batch: { enabled: boolean; concurrency: number }; - configuredConcurrency?: number; + configuredNonBatchConcurrency?: number; + providerId?: string; }): number { - return params.configuredConcurrency != null || params.batch.enabled - ? params.batch.concurrency - : EMBEDDING_INDEX_CONCURRENCY; + if (params.batch.enabled) { + return params.batch.concurrency; + } + const configured = params.configuredNonBatchConcurrency; + if (typeof configured === "number" && Number.isFinite(configured)) { + return Math.max(1, Math.floor(configured)); + } + return params.providerId === "ollama" ? 1 : EMBEDDING_INDEX_CONCURRENCY; } export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { @@ -509,7 +515,8 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { protected getIndexConcurrency(): number { return resolveMemoryIndexConcurrency({ batch: this.batch, - configuredConcurrency: this.settings.remote?.batch?.concurrency, + configuredNonBatchConcurrency: this.settings.remote?.nonBatchConcurrency, + providerId: this.provider?.id, }); } diff --git a/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts b/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts index bf67130f0c1..0ea78542b36 100644 --- a/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts +++ b/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts @@ -46,11 +46,35 @@ describe("memory index concurrency resolution", () => { ).toBe(4); }); - it("respects configured concurrency even when batch mode is disabled", () => { + it("respects configured non-batch concurrency when batch mode is disabled", () => { expect( resolveMemoryIndexConcurrency({ batch: { enabled: false, concurrency: 1 }, - configuredConcurrency: 1, + configuredNonBatchConcurrency: 1, + }), + ).toBe(1); + }); + + it("clamps configured non-batch concurrency to a positive integer", () => { + expect( + resolveMemoryIndexConcurrency({ + batch: { enabled: false, concurrency: 2 }, + configuredNonBatchConcurrency: 2.8, + }), + ).toBe(2); + expect( + resolveMemoryIndexConcurrency({ + batch: { enabled: false, concurrency: 2 }, + configuredNonBatchConcurrency: 0, + }), + ).toBe(1); + }); + + it("uses conservative non-batch concurrency for Ollama by default", () => { + expect( + resolveMemoryIndexConcurrency({ + batch: { enabled: false, concurrency: 2 }, + providerId: "ollama", }), ).toBe(1); }); diff --git a/src/agents/memory-search.test.ts b/src/agents/memory-search.test.ts index 992724337f5..ede692d8fc9 100644 --- a/src/agents/memory-search.test.ts +++ b/src/agents/memory-search.test.ts @@ -127,11 +127,15 @@ describe("memory search config", () => { function expectMergedRemoteConfig( resolved: ReturnType, apiKey: unknown, + extras?: { nonBatchConcurrency?: number }, ) { expect(resolved?.remote).toEqual({ baseUrl: "https://agent.example/v1", apiKey, headers: { "X-Default": "on" }, + ...(typeof extras?.nonBatchConcurrency === "number" + ? { nonBatchConcurrency: extras.nonBatchConcurrency } + : {}), batch: { enabled: false, wait: true, @@ -535,6 +539,18 @@ describe("memory search config", () => { expectMergedRemoteConfig(resolved, "default-key"); // pragma: allowlist secret }); + it("merges remote non-batch concurrency from defaults with agent overrides", () => { + const cfg = configWithRemoteDefaults({ + apiKey: "default-key", // pragma: allowlist secret + headers: { "X-Default": "on" }, + nonBatchConcurrency: 1, + }); + + const resolved = resolveMemorySearchConfig(cfg, "main"); + + expectMergedRemoteConfig(resolved, "default-key", { nonBatchConcurrency: 1 }); // pragma: allowlist secret + }); + it("preserves SecretRef remote apiKey when merging defaults with agent overrides", () => { const cfg = configWithRemoteDefaults({ apiKey: { source: "env", provider: "default", id: "OPENAI_API_KEY" }, // pragma: allowlist secret diff --git a/src/agents/memory-search.ts b/src/agents/memory-search.ts index aaf430db5fc..d3faed881ee 100644 --- a/src/agents/memory-search.ts +++ b/src/agents/memory-search.ts @@ -22,6 +22,7 @@ export type ResolvedMemorySearchConfig = { baseUrl?: string; apiKey?: SecretInput; headers?: Record; + nonBatchConcurrency?: number; batch?: { enabled: boolean; wait: boolean; @@ -165,9 +166,11 @@ function mergeConfig( overrideRemote?.baseUrl || overrideRemote?.apiKey || overrideRemote?.headers || + overrideRemote?.nonBatchConcurrency != null || defaultRemote?.baseUrl || defaultRemote?.apiKey || - defaultRemote?.headers, + defaultRemote?.headers || + defaultRemote?.nonBatchConcurrency != null, ); const includeRemote = hasRemoteConfig || @@ -191,6 +194,8 @@ function mergeConfig( baseUrl: overrideRemote?.baseUrl ?? defaultRemote?.baseUrl, apiKey: overrideRemote?.apiKey ?? defaultRemote?.apiKey, headers: overrideRemote?.headers ?? defaultRemote?.headers, + nonBatchConcurrency: + overrideRemote?.nonBatchConcurrency ?? defaultRemote?.nonBatchConcurrency, batch, } : undefined; diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index dcfdeb9d3f3..05dff57ae29 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -4368,6 +4368,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.", }, + nonBatchConcurrency: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + title: "Remote Non-Batch Embedding Concurrency", + description: + "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.", + }, batch: { type: "object", properties: { @@ -6358,6 +6366,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { type: "string", }, }, + nonBatchConcurrency: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + }, batch: { type: "object", properties: { @@ -26081,6 +26094,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.", tags: ["advanced"], }, + "agents.defaults.memorySearch.remote.nonBatchConcurrency": { + label: "Remote Non-Batch Embedding Concurrency", + help: "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.", + tags: ["performance"], + }, "agents.defaults.memorySearch.remote.batch.enabled": { label: "Remote Batch Embedding Enabled", help: "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.", diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 34c3354435b..bcacd0c492b 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -86,6 +86,7 @@ const TARGET_KEYS = [ "agents.defaults.memorySearch.remote.baseUrl", "agents.defaults.memorySearch.remote.apiKey", "agents.defaults.memorySearch.remote.headers", + "agents.defaults.memorySearch.remote.nonBatchConcurrency", "agents.defaults.memorySearch.remote.batch.enabled", "agents.defaults.memorySearch.remote.batch.wait", "agents.defaults.memorySearch.remote.batch.concurrency", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 472e80d4823..aec27cde2a4 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -1014,6 +1014,8 @@ export const FIELD_HELP: Record = { "Supplies a dedicated API key for remote embedding calls used by memory indexing and query-time embeddings. Use this when memory embeddings should use different credentials than global defaults or environment variables.", "agents.defaults.memorySearch.remote.headers": "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.", + "agents.defaults.memorySearch.remote.nonBatchConcurrency": + "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.", "agents.defaults.memorySearch.remote.batch.enabled": "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.", "agents.defaults.memorySearch.remote.batch.wait": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 5927605ceb5..8737a6d2533 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -417,6 +417,8 @@ export const FIELD_LABELS: Record = { "agents.defaults.memorySearch.remote.baseUrl": "Remote Embedding Base URL", "agents.defaults.memorySearch.remote.apiKey": "Remote Embedding API Key", "agents.defaults.memorySearch.remote.headers": "Remote Embedding Headers", + "agents.defaults.memorySearch.remote.nonBatchConcurrency": + "Remote Non-Batch Embedding Concurrency", "agents.defaults.memorySearch.remote.batch.enabled": "Remote Batch Embedding Enabled", "agents.defaults.memorySearch.remote.batch.wait": "Remote Batch Wait for Completion", "agents.defaults.memorySearch.remote.batch.concurrency": "Remote Batch Concurrency", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index 0a54f94ce37..8c8e0d11051 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -365,6 +365,8 @@ export type MemorySearchConfig = { baseUrl?: string; apiKey?: SecretInput; headers?: Record; + /** Max concurrent non-batch embedding tasks during indexing. Useful for slower local providers such as Ollama. */ + nonBatchConcurrency?: number; batch?: { /** Enable batch API for embedding indexing (OpenAI/Gemini; default: true). */ enabled?: boolean; diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 6a14022a2a0..2d0c8aca7b1 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -665,6 +665,7 @@ export const MemorySearchSchema = z baseUrl: z.string().optional(), apiKey: SecretInputSchema.optional().register(sensitive), headers: z.record(z.string(), z.string()).optional(), + nonBatchConcurrency: z.number().int().positive().optional(), batch: z .object({ enabled: z.boolean().optional(), diff --git a/src/test-utils/plugin-runtime-env.ts b/src/test-utils/plugin-runtime-env.ts index ec26a18bde8..deb65dc696f 100644 --- a/src/test-utils/plugin-runtime-env.ts +++ b/src/test-utils/plugin-runtime-env.ts @@ -27,7 +27,7 @@ export function createRuntimeEnv(options?: RuntimeEnvOptions): OutputRuntimeEnv export function createTypedRuntimeEnv( options?: TypedRuntimeEnvOptions, ): TRuntime { - return createRuntimeEnv(options) as TRuntime; + return createRuntimeEnv(options) as unknown as TRuntime; } export function createNonExitingRuntimeEnv(): OutputRuntimeEnv {