fix(memory): cap ollama non-batch embedding concurrency

This commit is contained in:
Peter Steinberger
2026-04-28 00:33:53 +01:00
parent 5de3196a60
commit 802f13ac15
15 changed files with 103 additions and 14 deletions

View File

@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65.
- Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys.
- Docs/tools: clarify that `tools.profile: "messaging"` is intentionally narrow and that `tools.profile: "full"` is the unrestricted baseline for broader command/control access. Carries forward #39954. Thanks @posigit.
- Control UI/Agents: redact tool-call args, partial/final results, derived exec output, and configured custom secret patterns before streaming tool events to the Control UI, so tool output cannot expose provider or channel credentials. Fixes #72283. (#72319) Thanks @volcano303 and @BunsDev.
- Agents/sessions: keep `sessions_history` recall redaction enabled even when general log redaction is disabled, and clarify that safety-boundary UI/tool/diagnostic payloads still redact independently of `logging.redactSensitive`. Carries forward #72319. Thanks @volcano303 and @BunsDev.

View File

@@ -1,4 +1,4 @@
5ffabe5ff76d8e4a0d121e89f74f84917b919447e63bf12e0e5b0e4c0211d451 config-baseline.json
7dcb21e47ddd5de98e2af1ecbc41e11ac0c5742819c359e6d851fbc39c0226e9 config-baseline.core.json
0f57fb6d20b9d300c4325b227e49f17f04349b0f3c27dd218397fe7a3b5001dc config-baseline.json
9d1815981dc3f89d1dfdc72f0a4723d4fd5efca8e5b8a1a1cbf6a053c50c937d config-baseline.core.json
c4f07c228d4f07e7afafa5b600b4a80f5b26aaed7267c7287a64d04a527be8e8 config-baseline.channel.json
6938050627f0d120109d2045b4300aa8b508b35132542db434033ed0fe3e2b3a config-baseline.plugin.json

View File

@@ -885,7 +885,13 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
{
agents: {
defaults: {
memorySearch: { provider: "ollama" },
memorySearch: {
provider: "ollama",
remote: {
// Default for Ollama. Raise on larger hosts if reindexing is too slow.
nonBatchConcurrency: 1,
},
},
},
},
}
@@ -899,10 +905,11 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
defaults: {
memorySearch: {
provider: "ollama",
model: "nomic-embed-text",
remote: {
baseUrl: "http://gpu-box.local:11434",
model: "nomic-embed-text",
apiKey: "ollama-local",
nonBatchConcurrency: 2,
},
},
},

View File

@@ -386,6 +386,7 @@ Prevents re-embedding unchanged text during reindex or transcript updates.
| Key | Type | Default | Description |
| ----------------------------- | --------- | ------- | -------------------------- |
| `remote.nonBatchConcurrency` | `number` | `4` | Parallel inline embeddings |
| `remote.batch.enabled` | `boolean` | `false` | Enable batch embedding API |
| `remote.batch.concurrency` | `number` | `2` | Parallel batch jobs |
| `remote.batch.wait` | `boolean` | `true` | Wait for batch completion |
@@ -394,7 +395,9 @@ Prevents re-embedding unchanged text during reindex or transcript updates.
Available for `openai`, `gemini`, and `voyage`. OpenAI batch is typically fastest and cheapest for large backfills.
This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active.
`remote.nonBatchConcurrency` controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active. Ollama defaults to `1` for non-batch indexing to avoid overwhelming smaller local hosts; set a higher value on larger machines.
This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls the timeout for inline embedding calls.
---

View File

@@ -89,11 +89,17 @@ export function resolveEmbeddingTimeoutMs(params: {
export function resolveMemoryIndexConcurrency(params: {
batch: { enabled: boolean; concurrency: number };
configuredConcurrency?: number;
configuredNonBatchConcurrency?: number;
providerId?: string;
}): number {
return params.configuredConcurrency != null || params.batch.enabled
? params.batch.concurrency
: EMBEDDING_INDEX_CONCURRENCY;
if (params.batch.enabled) {
return params.batch.concurrency;
}
const configured = params.configuredNonBatchConcurrency;
if (typeof configured === "number" && Number.isFinite(configured)) {
return Math.max(1, Math.floor(configured));
}
return params.providerId === "ollama" ? 1 : EMBEDDING_INDEX_CONCURRENCY;
}
export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
@@ -509,7 +515,8 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
protected getIndexConcurrency(): number {
return resolveMemoryIndexConcurrency({
batch: this.batch,
configuredConcurrency: this.settings.remote?.batch?.concurrency,
configuredNonBatchConcurrency: this.settings.remote?.nonBatchConcurrency,
providerId: this.provider?.id,
});
}

View File

@@ -46,11 +46,35 @@ describe("memory index concurrency resolution", () => {
).toBe(4);
});
it("respects configured concurrency even when batch mode is disabled", () => {
it("respects configured non-batch concurrency when batch mode is disabled", () => {
expect(
resolveMemoryIndexConcurrency({
batch: { enabled: false, concurrency: 1 },
configuredConcurrency: 1,
configuredNonBatchConcurrency: 1,
}),
).toBe(1);
});
it("clamps configured non-batch concurrency to a positive integer", () => {
expect(
resolveMemoryIndexConcurrency({
batch: { enabled: false, concurrency: 2 },
configuredNonBatchConcurrency: 2.8,
}),
).toBe(2);
expect(
resolveMemoryIndexConcurrency({
batch: { enabled: false, concurrency: 2 },
configuredNonBatchConcurrency: 0,
}),
).toBe(1);
});
it("uses conservative non-batch concurrency for Ollama by default", () => {
expect(
resolveMemoryIndexConcurrency({
batch: { enabled: false, concurrency: 2 },
providerId: "ollama",
}),
).toBe(1);
});

View File

@@ -127,11 +127,15 @@ describe("memory search config", () => {
function expectMergedRemoteConfig(
resolved: ReturnType<typeof resolveMemorySearchConfig>,
apiKey: unknown,
extras?: { nonBatchConcurrency?: number },
) {
expect(resolved?.remote).toEqual({
baseUrl: "https://agent.example/v1",
apiKey,
headers: { "X-Default": "on" },
...(typeof extras?.nonBatchConcurrency === "number"
? { nonBatchConcurrency: extras.nonBatchConcurrency }
: {}),
batch: {
enabled: false,
wait: true,
@@ -535,6 +539,18 @@ describe("memory search config", () => {
expectMergedRemoteConfig(resolved, "default-key"); // pragma: allowlist secret
});
it("merges remote non-batch concurrency from defaults with agent overrides", () => {
const cfg = configWithRemoteDefaults({
apiKey: "default-key", // pragma: allowlist secret
headers: { "X-Default": "on" },
nonBatchConcurrency: 1,
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expectMergedRemoteConfig(resolved, "default-key", { nonBatchConcurrency: 1 }); // pragma: allowlist secret
});
it("preserves SecretRef remote apiKey when merging defaults with agent overrides", () => {
const cfg = configWithRemoteDefaults({
apiKey: { source: "env", provider: "default", id: "OPENAI_API_KEY" }, // pragma: allowlist secret

View File

@@ -22,6 +22,7 @@ export type ResolvedMemorySearchConfig = {
baseUrl?: string;
apiKey?: SecretInput;
headers?: Record<string, string>;
nonBatchConcurrency?: number;
batch?: {
enabled: boolean;
wait: boolean;
@@ -165,9 +166,11 @@ function mergeConfig(
overrideRemote?.baseUrl ||
overrideRemote?.apiKey ||
overrideRemote?.headers ||
overrideRemote?.nonBatchConcurrency != null ||
defaultRemote?.baseUrl ||
defaultRemote?.apiKey ||
defaultRemote?.headers,
defaultRemote?.headers ||
defaultRemote?.nonBatchConcurrency != null,
);
const includeRemote =
hasRemoteConfig ||
@@ -191,6 +194,8 @@ function mergeConfig(
baseUrl: overrideRemote?.baseUrl ?? defaultRemote?.baseUrl,
apiKey: overrideRemote?.apiKey ?? defaultRemote?.apiKey,
headers: overrideRemote?.headers ?? defaultRemote?.headers,
nonBatchConcurrency:
overrideRemote?.nonBatchConcurrency ?? defaultRemote?.nonBatchConcurrency,
batch,
}
: undefined;

View File

@@ -4368,6 +4368,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
},
nonBatchConcurrency: {
type: "integer",
exclusiveMinimum: 0,
maximum: 9007199254740991,
title: "Remote Non-Batch Embedding Concurrency",
description:
"Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
},
batch: {
type: "object",
properties: {
@@ -6358,6 +6366,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
type: "string",
},
},
nonBatchConcurrency: {
type: "integer",
exclusiveMinimum: 0,
maximum: 9007199254740991,
},
batch: {
type: "object",
properties: {
@@ -26081,6 +26094,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
tags: ["advanced"],
},
"agents.defaults.memorySearch.remote.nonBatchConcurrency": {
label: "Remote Non-Batch Embedding Concurrency",
help: "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
tags: ["performance"],
},
"agents.defaults.memorySearch.remote.batch.enabled": {
label: "Remote Batch Embedding Enabled",
help: "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",

View File

@@ -86,6 +86,7 @@ const TARGET_KEYS = [
"agents.defaults.memorySearch.remote.baseUrl",
"agents.defaults.memorySearch.remote.apiKey",
"agents.defaults.memorySearch.remote.headers",
"agents.defaults.memorySearch.remote.nonBatchConcurrency",
"agents.defaults.memorySearch.remote.batch.enabled",
"agents.defaults.memorySearch.remote.batch.wait",
"agents.defaults.memorySearch.remote.batch.concurrency",

View File

@@ -1014,6 +1014,8 @@ export const FIELD_HELP: Record<string, string> = {
"Supplies a dedicated API key for remote embedding calls used by memory indexing and query-time embeddings. Use this when memory embeddings should use different credentials than global defaults or environment variables.",
"agents.defaults.memorySearch.remote.headers":
"Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
"agents.defaults.memorySearch.remote.nonBatchConcurrency":
"Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
"agents.defaults.memorySearch.remote.batch.enabled":
"Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",
"agents.defaults.memorySearch.remote.batch.wait":

View File

@@ -417,6 +417,8 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.memorySearch.remote.baseUrl": "Remote Embedding Base URL",
"agents.defaults.memorySearch.remote.apiKey": "Remote Embedding API Key",
"agents.defaults.memorySearch.remote.headers": "Remote Embedding Headers",
"agents.defaults.memorySearch.remote.nonBatchConcurrency":
"Remote Non-Batch Embedding Concurrency",
"agents.defaults.memorySearch.remote.batch.enabled": "Remote Batch Embedding Enabled",
"agents.defaults.memorySearch.remote.batch.wait": "Remote Batch Wait for Completion",
"agents.defaults.memorySearch.remote.batch.concurrency": "Remote Batch Concurrency",

View File

@@ -365,6 +365,8 @@ export type MemorySearchConfig = {
baseUrl?: string;
apiKey?: SecretInput;
headers?: Record<string, string>;
/** Max concurrent non-batch embedding tasks during indexing. Useful for slower local providers such as Ollama. */
nonBatchConcurrency?: number;
batch?: {
/** Enable batch API for embedding indexing (OpenAI/Gemini; default: true). */
enabled?: boolean;

View File

@@ -665,6 +665,7 @@ export const MemorySearchSchema = z
baseUrl: z.string().optional(),
apiKey: SecretInputSchema.optional().register(sensitive),
headers: z.record(z.string(), z.string()).optional(),
nonBatchConcurrency: z.number().int().positive().optional(),
batch: z
.object({
enabled: z.boolean().optional(),

View File

@@ -27,7 +27,7 @@ export function createRuntimeEnv(options?: RuntimeEnvOptions): OutputRuntimeEnv
export function createTypedRuntimeEnv<TRuntime>(
options?: TypedRuntimeEnvOptions<TRuntime>,
): TRuntime {
return createRuntimeEnv(options) as TRuntime;
return createRuntimeEnv(options) as unknown as TRuntime;
}
export function createNonExitingRuntimeEnv(): OutputRuntimeEnv {