fix(memory): cap ollama non-batch embedding concurrency

2026-05-06 10:50:44 +00:00 · 2026-04-28 00:33:53 +01:00
parent 5de3196a60
commit 802f13ac15
15 changed files with 103 additions and 14 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65.
+- Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys.
 - Docs/tools: clarify that `tools.profile: "messaging"` is intentionally narrow and that `tools.profile: "full"` is the unrestricted baseline for broader command/control access. Carries forward #39954. Thanks @posigit.
 - Control UI/Agents: redact tool-call args, partial/final results, derived exec output, and configured custom secret patterns before streaming tool events to the Control UI, so tool output cannot expose provider or channel credentials. Fixes #72283. (#72319) Thanks @volcano303 and @BunsDev.
 - Agents/sessions: keep `sessions_history` recall redaction enabled even when general log redaction is disabled, and clarify that safety-boundary UI/tool/diagnostic payloads still redact independently of `logging.redactSensitive`. Carries forward #72319. Thanks @volcano303 and @BunsDev.
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-5ffabe5ff76d8e4a0d121e89f74f84917b919447e63bf12e0e5b0e4c0211d451  config-baseline.json
-7dcb21e47ddd5de98e2af1ecbc41e11ac0c5742819c359e6d851fbc39c0226e9  config-baseline.core.json
+0f57fb6d20b9d300c4325b227e49f17f04349b0f3c27dd218397fe7a3b5001dc  config-baseline.json
+9d1815981dc3f89d1dfdc72f0a4723d4fd5efca8e5b8a1a1cbf6a053c50c937d  config-baseline.core.json
 c4f07c228d4f07e7afafa5b600b4a80f5b26aaed7267c7287a64d04a527be8e8  config-baseline.channel.json
 6938050627f0d120109d2045b4300aa8b508b35132542db434033ed0fe3e2b3a  config-baseline.plugin.json
--- a/docs/providers/ollama.md
+++ b/docs/providers/ollama.md
@@ -885,7 +885,13 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
    {
      agents: {
        defaults: {
-          memorySearch: { provider: "ollama" },
+          memorySearch: {
+            provider: "ollama",
+            remote: {
+              // Default for Ollama. Raise on larger hosts if reindexing is too slow.
+              nonBatchConcurrency: 1,
+            },
+          },
        },
      },
    }
@@ -899,10 +905,11 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
        defaults: {
          memorySearch: {
            provider: "ollama",
+            model: "nomic-embed-text",
            remote: {
              baseUrl: "http://gpu-box.local:11434",
-              model: "nomic-embed-text",
              apiKey: "ollama-local",
+              nonBatchConcurrency: 2,
            },
          },
        },
--- a/docs/reference/memory-config.md
+++ b/docs/reference/memory-config.md
@@ -386,6 +386,7 @@ Prevents re-embedding unchanged text during reindex or transcript updates.

 | Key                           | Type      | Default | Description                |
 | ----------------------------- | --------- | ------- | -------------------------- |
+| `remote.nonBatchConcurrency`  | `number`  | `4`     | Parallel inline embeddings |
 | `remote.batch.enabled`        | `boolean` | `false` | Enable batch embedding API |
 | `remote.batch.concurrency`    | `number`  | `2`     | Parallel batch jobs        |
 | `remote.batch.wait`           | `boolean` | `true`  | Wait for batch completion  |
@@ -394,7 +395,9 @@ Prevents re-embedding unchanged text during reindex or transcript updates.

 Available for `openai`, `gemini`, and `voyage`. OpenAI batch is typically fastest and cheapest for large backfills.

-This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active.
+`remote.nonBatchConcurrency` controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active. Ollama defaults to `1` for non-batch indexing to avoid overwhelming smaller local hosts; set a higher value on larger machines.
+
+This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls the timeout for inline embedding calls.

 ---

--- a/extensions/memory-core/src/memory/manager-embedding-ops.ts
+++ b/extensions/memory-core/src/memory/manager-embedding-ops.ts
@@ -89,11 +89,17 @@ export function resolveEmbeddingTimeoutMs(params: {

 export function resolveMemoryIndexConcurrency(params: {
  batch: { enabled: boolean; concurrency: number };
-  configuredConcurrency?: number;
+  configuredNonBatchConcurrency?: number;
+  providerId?: string;
 }): number {
-  return params.configuredConcurrency != null || params.batch.enabled
-    ? params.batch.concurrency
-    : EMBEDDING_INDEX_CONCURRENCY;
+  if (params.batch.enabled) {
+    return params.batch.concurrency;
+  }
+  const configured = params.configuredNonBatchConcurrency;
+  if (typeof configured === "number" && Number.isFinite(configured)) {
+    return Math.max(1, Math.floor(configured));
+  }
+  return params.providerId === "ollama" ? 1 : EMBEDDING_INDEX_CONCURRENCY;
 }

 export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
@@ -509,7 +515,8 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
  protected getIndexConcurrency(): number {
    return resolveMemoryIndexConcurrency({
      batch: this.batch,
-      configuredConcurrency: this.settings.remote?.batch?.concurrency,
+      configuredNonBatchConcurrency: this.settings.remote?.nonBatchConcurrency,
+      providerId: this.provider?.id,
    });
  }

--- a/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts
+++ b/extensions/memory-core/src/memory/manager-embedding-timeout.test.ts
@@ -46,11 +46,35 @@ describe("memory index concurrency resolution", () => {
    ).toBe(4);
  });

-  it("respects configured concurrency even when batch mode is disabled", () => {
+  it("respects configured non-batch concurrency when batch mode is disabled", () => {
    expect(
      resolveMemoryIndexConcurrency({
        batch: { enabled: false, concurrency: 1 },
-        configuredConcurrency: 1,
+        configuredNonBatchConcurrency: 1,
+      }),
+    ).toBe(1);
+  });
+
+  it("clamps configured non-batch concurrency to a positive integer", () => {
+    expect(
+      resolveMemoryIndexConcurrency({
+        batch: { enabled: false, concurrency: 2 },
+        configuredNonBatchConcurrency: 2.8,
+      }),
+    ).toBe(2);
+    expect(
+      resolveMemoryIndexConcurrency({
+        batch: { enabled: false, concurrency: 2 },
+        configuredNonBatchConcurrency: 0,
+      }),
+    ).toBe(1);
+  });
+
+  it("uses conservative non-batch concurrency for Ollama by default", () => {
+    expect(
+      resolveMemoryIndexConcurrency({
+        batch: { enabled: false, concurrency: 2 },
+        providerId: "ollama",
      }),
    ).toBe(1);
  });
--- a/src/agents/memory-search.test.ts
+++ b/src/agents/memory-search.test.ts
@@ -127,11 +127,15 @@ describe("memory search config", () => {
  function expectMergedRemoteConfig(
    resolved: ReturnType<typeof resolveMemorySearchConfig>,
    apiKey: unknown,
+    extras?: { nonBatchConcurrency?: number },
  ) {
    expect(resolved?.remote).toEqual({
      baseUrl: "https://agent.example/v1",
      apiKey,
      headers: { "X-Default": "on" },
+      ...(typeof extras?.nonBatchConcurrency === "number"
+        ? { nonBatchConcurrency: extras.nonBatchConcurrency }
+        : {}),
      batch: {
        enabled: false,
        wait: true,
@@ -535,6 +539,18 @@ describe("memory search config", () => {
    expectMergedRemoteConfig(resolved, "default-key"); // pragma: allowlist secret
  });

+  it("merges remote non-batch concurrency from defaults with agent overrides", () => {
+    const cfg = configWithRemoteDefaults({
+      apiKey: "default-key", // pragma: allowlist secret
+      headers: { "X-Default": "on" },
+      nonBatchConcurrency: 1,
+    });
+
+    const resolved = resolveMemorySearchConfig(cfg, "main");
+
+    expectMergedRemoteConfig(resolved, "default-key", { nonBatchConcurrency: 1 }); // pragma: allowlist secret
+  });
+
  it("preserves SecretRef remote apiKey when merging defaults with agent overrides", () => {
    const cfg = configWithRemoteDefaults({
      apiKey: { source: "env", provider: "default", id: "OPENAI_API_KEY" }, // pragma: allowlist secret
--- a/src/agents/memory-search.ts
+++ b/src/agents/memory-search.ts
@@ -22,6 +22,7 @@ export type ResolvedMemorySearchConfig = {
    baseUrl?: string;
    apiKey?: SecretInput;
    headers?: Record<string, string>;
+    nonBatchConcurrency?: number;
    batch?: {
      enabled: boolean;
      wait: boolean;
@@ -165,9 +166,11 @@ function mergeConfig(
    overrideRemote?.baseUrl ||
    overrideRemote?.apiKey ||
    overrideRemote?.headers ||
+    overrideRemote?.nonBatchConcurrency != null ||
    defaultRemote?.baseUrl ||
    defaultRemote?.apiKey ||
-    defaultRemote?.headers,
+    defaultRemote?.headers ||
+    defaultRemote?.nonBatchConcurrency != null,
  );
  const includeRemote =
    hasRemoteConfig ||
@@ -191,6 +194,8 @@ function mergeConfig(
        baseUrl: overrideRemote?.baseUrl ?? defaultRemote?.baseUrl,
        apiKey: overrideRemote?.apiKey ?? defaultRemote?.apiKey,
        headers: overrideRemote?.headers ?? defaultRemote?.headers,
+        nonBatchConcurrency:
+          overrideRemote?.nonBatchConcurrency ?? defaultRemote?.nonBatchConcurrency,
        batch,
      }
    : undefined;
--- a/src/config/schema.base.generated.ts
+++ b/src/config/schema.base.generated.ts
@@ -4368,6 +4368,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                        description:
                          "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
                      },
+                      nonBatchConcurrency: {
+                        type: "integer",
+                        exclusiveMinimum: 0,
+                        maximum: 9007199254740991,
+                        title: "Remote Non-Batch Embedding Concurrency",
+                        description:
+                          "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
+                      },
                      batch: {
                        type: "object",
                        properties: {
@@ -6358,6 +6366,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
                            type: "string",
                          },
                        },
+                        nonBatchConcurrency: {
+                          type: "integer",
+                          exclusiveMinimum: 0,
+                          maximum: 9007199254740991,
+                        },
                        batch: {
                          type: "object",
                          properties: {
@@ -26081,6 +26094,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
      help: "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
      tags: ["advanced"],
    },
+    "agents.defaults.memorySearch.remote.nonBatchConcurrency": {
+      label: "Remote Non-Batch Embedding Concurrency",
+      help: "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
+      tags: ["performance"],
+    },
    "agents.defaults.memorySearch.remote.batch.enabled": {
      label: "Remote Batch Embedding Enabled",
      help: "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -86,6 +86,7 @@ const TARGET_KEYS = [
  "agents.defaults.memorySearch.remote.baseUrl",
  "agents.defaults.memorySearch.remote.apiKey",
  "agents.defaults.memorySearch.remote.headers",
+  "agents.defaults.memorySearch.remote.nonBatchConcurrency",
  "agents.defaults.memorySearch.remote.batch.enabled",
  "agents.defaults.memorySearch.remote.batch.wait",
  "agents.defaults.memorySearch.remote.batch.concurrency",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -1014,6 +1014,8 @@ export const FIELD_HELP: Record<string, string> = {
    "Supplies a dedicated API key for remote embedding calls used by memory indexing and query-time embeddings. Use this when memory embeddings should use different credentials than global defaults or environment variables.",
  "agents.defaults.memorySearch.remote.headers":
    "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
+  "agents.defaults.memorySearch.remote.nonBatchConcurrency":
+    "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
  "agents.defaults.memorySearch.remote.batch.enabled":
    "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",
  "agents.defaults.memorySearch.remote.batch.wait":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -417,6 +417,8 @@ export const FIELD_LABELS: Record<string, string> = {
  "agents.defaults.memorySearch.remote.baseUrl": "Remote Embedding Base URL",
  "agents.defaults.memorySearch.remote.apiKey": "Remote Embedding API Key",
  "agents.defaults.memorySearch.remote.headers": "Remote Embedding Headers",
+  "agents.defaults.memorySearch.remote.nonBatchConcurrency":
+    "Remote Non-Batch Embedding Concurrency",
  "agents.defaults.memorySearch.remote.batch.enabled": "Remote Batch Embedding Enabled",
  "agents.defaults.memorySearch.remote.batch.wait": "Remote Batch Wait for Completion",
  "agents.defaults.memorySearch.remote.batch.concurrency": "Remote Batch Concurrency",
--- a/src/config/types.tools.ts
+++ b/src/config/types.tools.ts
@@ -365,6 +365,8 @@ export type MemorySearchConfig = {
    baseUrl?: string;
    apiKey?: SecretInput;
    headers?: Record<string, string>;
+    /** Max concurrent non-batch embedding tasks during indexing. Useful for slower local providers such as Ollama. */
+    nonBatchConcurrency?: number;
    batch?: {
      /** Enable batch API for embedding indexing (OpenAI/Gemini; default: true). */
      enabled?: boolean;
--- a/src/config/zod-schema.agent-runtime.ts
+++ b/src/config/zod-schema.agent-runtime.ts
@@ -665,6 +665,7 @@ export const MemorySearchSchema = z
        baseUrl: z.string().optional(),
        apiKey: SecretInputSchema.optional().register(sensitive),
        headers: z.record(z.string(), z.string()).optional(),
+        nonBatchConcurrency: z.number().int().positive().optional(),
        batch: z
          .object({
            enabled: z.boolean().optional(),
--- a/src/test-utils/plugin-runtime-env.ts
+++ b/src/test-utils/plugin-runtime-env.ts
@@ -27,7 +27,7 @@ export function createRuntimeEnv(options?: RuntimeEnvOptions): OutputRuntimeEnv
 export function createTypedRuntimeEnv<TRuntime>(
  options?: TypedRuntimeEnvOptions<TRuntime>,
 ): TRuntime {
-  return createRuntimeEnv(options) as TRuntime;
+  return createRuntimeEnv(options) as unknown as TRuntime;
 }

 export function createNonExitingRuntimeEnv(): OutputRuntimeEnv {