mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 10:50:44 +00:00
fix(memory): cap ollama non-batch embedding concurrency
This commit is contained in:
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
### Fixes
|
||||
|
||||
- Agents/ACPX: stop forwarding Codex ACP timeout config controls that Codex rejects while preserving OpenClaw's run-timeout watchdog for ACP subagents. Fixes #73052. Thanks @pfrederiksen and @richa65.
|
||||
- Memory/Ollama: add `memorySearch.remote.nonBatchConcurrency` for inline embedding indexing, default Ollama non-batch indexing to one request at a time, and keep batch concurrency separate from non-batch concurrency so local embedding backfills avoid timeout storms on smaller hosts. Carries forward #57733. Thanks @itilys.
|
||||
- Docs/tools: clarify that `tools.profile: "messaging"` is intentionally narrow and that `tools.profile: "full"` is the unrestricted baseline for broader command/control access. Carries forward #39954. Thanks @posigit.
|
||||
- Control UI/Agents: redact tool-call args, partial/final results, derived exec output, and configured custom secret patterns before streaming tool events to the Control UI, so tool output cannot expose provider or channel credentials. Fixes #72283. (#72319) Thanks @volcano303 and @BunsDev.
|
||||
- Agents/sessions: keep `sessions_history` recall redaction enabled even when general log redaction is disabled, and clarify that safety-boundary UI/tool/diagnostic payloads still redact independently of `logging.redactSensitive`. Carries forward #72319. Thanks @volcano303 and @BunsDev.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
5ffabe5ff76d8e4a0d121e89f74f84917b919447e63bf12e0e5b0e4c0211d451 config-baseline.json
|
||||
7dcb21e47ddd5de98e2af1ecbc41e11ac0c5742819c359e6d851fbc39c0226e9 config-baseline.core.json
|
||||
0f57fb6d20b9d300c4325b227e49f17f04349b0f3c27dd218397fe7a3b5001dc config-baseline.json
|
||||
9d1815981dc3f89d1dfdc72f0a4723d4fd5efca8e5b8a1a1cbf6a053c50c937d config-baseline.core.json
|
||||
c4f07c228d4f07e7afafa5b600b4a80f5b26aaed7267c7287a64d04a527be8e8 config-baseline.channel.json
|
||||
6938050627f0d120109d2045b4300aa8b508b35132542db434033ed0fe3e2b3a config-baseline.plugin.json
|
||||
|
||||
@@ -885,7 +885,13 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
{
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: { provider: "ollama" },
|
||||
memorySearch: {
|
||||
provider: "ollama",
|
||||
remote: {
|
||||
// Default for Ollama. Raise on larger hosts if reindexing is too slow.
|
||||
nonBatchConcurrency: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -899,10 +905,11 @@ For the full setup and behavior details, see [Ollama Web Search](/tools/ollama-s
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
provider: "ollama",
|
||||
model: "nomic-embed-text",
|
||||
remote: {
|
||||
baseUrl: "http://gpu-box.local:11434",
|
||||
model: "nomic-embed-text",
|
||||
apiKey: "ollama-local",
|
||||
nonBatchConcurrency: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -386,6 +386,7 @@ Prevents re-embedding unchanged text during reindex or transcript updates.
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
| ----------------------------- | --------- | ------- | -------------------------- |
|
||||
| `remote.nonBatchConcurrency` | `number` | `4` | Parallel inline embeddings |
|
||||
| `remote.batch.enabled` | `boolean` | `false` | Enable batch embedding API |
|
||||
| `remote.batch.concurrency` | `number` | `2` | Parallel batch jobs |
|
||||
| `remote.batch.wait` | `boolean` | `true` | Wait for batch completion |
|
||||
@@ -394,7 +395,9 @@ Prevents re-embedding unchanged text during reindex or transcript updates.
|
||||
|
||||
Available for `openai`, `gemini`, and `voyage`. OpenAI batch is typically fastest and cheapest for large backfills.
|
||||
|
||||
This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active.
|
||||
`remote.nonBatchConcurrency` controls inline embedding calls used by local/self-hosted providers and hosted providers when provider batch APIs are not active. Ollama defaults to `1` for non-batch indexing to avoid overwhelming smaller local hosts; set a higher value on larger machines.
|
||||
|
||||
This is separate from `sync.embeddingBatchTimeoutSeconds`, which controls the timeout for inline embedding calls.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -89,11 +89,17 @@ export function resolveEmbeddingTimeoutMs(params: {
|
||||
|
||||
export function resolveMemoryIndexConcurrency(params: {
|
||||
batch: { enabled: boolean; concurrency: number };
|
||||
configuredConcurrency?: number;
|
||||
configuredNonBatchConcurrency?: number;
|
||||
providerId?: string;
|
||||
}): number {
|
||||
return params.configuredConcurrency != null || params.batch.enabled
|
||||
? params.batch.concurrency
|
||||
: EMBEDDING_INDEX_CONCURRENCY;
|
||||
if (params.batch.enabled) {
|
||||
return params.batch.concurrency;
|
||||
}
|
||||
const configured = params.configuredNonBatchConcurrency;
|
||||
if (typeof configured === "number" && Number.isFinite(configured)) {
|
||||
return Math.max(1, Math.floor(configured));
|
||||
}
|
||||
return params.providerId === "ollama" ? 1 : EMBEDDING_INDEX_CONCURRENCY;
|
||||
}
|
||||
|
||||
export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
@@ -509,7 +515,8 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
protected getIndexConcurrency(): number {
|
||||
return resolveMemoryIndexConcurrency({
|
||||
batch: this.batch,
|
||||
configuredConcurrency: this.settings.remote?.batch?.concurrency,
|
||||
configuredNonBatchConcurrency: this.settings.remote?.nonBatchConcurrency,
|
||||
providerId: this.provider?.id,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -46,11 +46,35 @@ describe("memory index concurrency resolution", () => {
|
||||
).toBe(4);
|
||||
});
|
||||
|
||||
it("respects configured concurrency even when batch mode is disabled", () => {
|
||||
it("respects configured non-batch concurrency when batch mode is disabled", () => {
|
||||
expect(
|
||||
resolveMemoryIndexConcurrency({
|
||||
batch: { enabled: false, concurrency: 1 },
|
||||
configuredConcurrency: 1,
|
||||
configuredNonBatchConcurrency: 1,
|
||||
}),
|
||||
).toBe(1);
|
||||
});
|
||||
|
||||
it("clamps configured non-batch concurrency to a positive integer", () => {
|
||||
expect(
|
||||
resolveMemoryIndexConcurrency({
|
||||
batch: { enabled: false, concurrency: 2 },
|
||||
configuredNonBatchConcurrency: 2.8,
|
||||
}),
|
||||
).toBe(2);
|
||||
expect(
|
||||
resolveMemoryIndexConcurrency({
|
||||
batch: { enabled: false, concurrency: 2 },
|
||||
configuredNonBatchConcurrency: 0,
|
||||
}),
|
||||
).toBe(1);
|
||||
});
|
||||
|
||||
it("uses conservative non-batch concurrency for Ollama by default", () => {
|
||||
expect(
|
||||
resolveMemoryIndexConcurrency({
|
||||
batch: { enabled: false, concurrency: 2 },
|
||||
providerId: "ollama",
|
||||
}),
|
||||
).toBe(1);
|
||||
});
|
||||
|
||||
@@ -127,11 +127,15 @@ describe("memory search config", () => {
|
||||
function expectMergedRemoteConfig(
|
||||
resolved: ReturnType<typeof resolveMemorySearchConfig>,
|
||||
apiKey: unknown,
|
||||
extras?: { nonBatchConcurrency?: number },
|
||||
) {
|
||||
expect(resolved?.remote).toEqual({
|
||||
baseUrl: "https://agent.example/v1",
|
||||
apiKey,
|
||||
headers: { "X-Default": "on" },
|
||||
...(typeof extras?.nonBatchConcurrency === "number"
|
||||
? { nonBatchConcurrency: extras.nonBatchConcurrency }
|
||||
: {}),
|
||||
batch: {
|
||||
enabled: false,
|
||||
wait: true,
|
||||
@@ -535,6 +539,18 @@ describe("memory search config", () => {
|
||||
expectMergedRemoteConfig(resolved, "default-key"); // pragma: allowlist secret
|
||||
});
|
||||
|
||||
it("merges remote non-batch concurrency from defaults with agent overrides", () => {
|
||||
const cfg = configWithRemoteDefaults({
|
||||
apiKey: "default-key", // pragma: allowlist secret
|
||||
headers: { "X-Default": "on" },
|
||||
nonBatchConcurrency: 1,
|
||||
});
|
||||
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
|
||||
expectMergedRemoteConfig(resolved, "default-key", { nonBatchConcurrency: 1 }); // pragma: allowlist secret
|
||||
});
|
||||
|
||||
it("preserves SecretRef remote apiKey when merging defaults with agent overrides", () => {
|
||||
const cfg = configWithRemoteDefaults({
|
||||
apiKey: { source: "env", provider: "default", id: "OPENAI_API_KEY" }, // pragma: allowlist secret
|
||||
|
||||
@@ -22,6 +22,7 @@ export type ResolvedMemorySearchConfig = {
|
||||
baseUrl?: string;
|
||||
apiKey?: SecretInput;
|
||||
headers?: Record<string, string>;
|
||||
nonBatchConcurrency?: number;
|
||||
batch?: {
|
||||
enabled: boolean;
|
||||
wait: boolean;
|
||||
@@ -165,9 +166,11 @@ function mergeConfig(
|
||||
overrideRemote?.baseUrl ||
|
||||
overrideRemote?.apiKey ||
|
||||
overrideRemote?.headers ||
|
||||
overrideRemote?.nonBatchConcurrency != null ||
|
||||
defaultRemote?.baseUrl ||
|
||||
defaultRemote?.apiKey ||
|
||||
defaultRemote?.headers,
|
||||
defaultRemote?.headers ||
|
||||
defaultRemote?.nonBatchConcurrency != null,
|
||||
);
|
||||
const includeRemote =
|
||||
hasRemoteConfig ||
|
||||
@@ -191,6 +194,8 @@ function mergeConfig(
|
||||
baseUrl: overrideRemote?.baseUrl ?? defaultRemote?.baseUrl,
|
||||
apiKey: overrideRemote?.apiKey ?? defaultRemote?.apiKey,
|
||||
headers: overrideRemote?.headers ?? defaultRemote?.headers,
|
||||
nonBatchConcurrency:
|
||||
overrideRemote?.nonBatchConcurrency ?? defaultRemote?.nonBatchConcurrency,
|
||||
batch,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
@@ -4368,6 +4368,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
|
||||
},
|
||||
nonBatchConcurrency: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
title: "Remote Non-Batch Embedding Concurrency",
|
||||
description:
|
||||
"Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
|
||||
},
|
||||
batch: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -6358,6 +6366,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
type: "string",
|
||||
},
|
||||
},
|
||||
nonBatchConcurrency: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
batch: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -26081,6 +26094,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
|
||||
tags: ["advanced"],
|
||||
},
|
||||
"agents.defaults.memorySearch.remote.nonBatchConcurrency": {
|
||||
label: "Remote Non-Batch Embedding Concurrency",
|
||||
help: "Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
|
||||
tags: ["performance"],
|
||||
},
|
||||
"agents.defaults.memorySearch.remote.batch.enabled": {
|
||||
label: "Remote Batch Embedding Enabled",
|
||||
help: "Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",
|
||||
|
||||
@@ -86,6 +86,7 @@ const TARGET_KEYS = [
|
||||
"agents.defaults.memorySearch.remote.baseUrl",
|
||||
"agents.defaults.memorySearch.remote.apiKey",
|
||||
"agents.defaults.memorySearch.remote.headers",
|
||||
"agents.defaults.memorySearch.remote.nonBatchConcurrency",
|
||||
"agents.defaults.memorySearch.remote.batch.enabled",
|
||||
"agents.defaults.memorySearch.remote.batch.wait",
|
||||
"agents.defaults.memorySearch.remote.batch.concurrency",
|
||||
|
||||
@@ -1014,6 +1014,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Supplies a dedicated API key for remote embedding calls used by memory indexing and query-time embeddings. Use this when memory embeddings should use different credentials than global defaults or environment variables.",
|
||||
"agents.defaults.memorySearch.remote.headers":
|
||||
"Adds custom HTTP headers to remote embedding requests, merged with provider defaults. Use this for proxy auth and tenant routing headers, and keep values minimal to avoid leaking sensitive metadata.",
|
||||
"agents.defaults.memorySearch.remote.nonBatchConcurrency":
|
||||
"Limits concurrent inline embedding requests during non-batch memory indexing. Lower this for local or small self-hosted providers such as Ollama; batch embedding concurrency is configured separately under remote.batch.",
|
||||
"agents.defaults.memorySearch.remote.batch.enabled":
|
||||
"Enables provider batch APIs for embedding jobs when supported (OpenAI/Gemini), improving throughput on larger index runs. Keep this enabled unless debugging provider batch failures or running very small workloads.",
|
||||
"agents.defaults.memorySearch.remote.batch.wait":
|
||||
|
||||
@@ -417,6 +417,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.memorySearch.remote.baseUrl": "Remote Embedding Base URL",
|
||||
"agents.defaults.memorySearch.remote.apiKey": "Remote Embedding API Key",
|
||||
"agents.defaults.memorySearch.remote.headers": "Remote Embedding Headers",
|
||||
"agents.defaults.memorySearch.remote.nonBatchConcurrency":
|
||||
"Remote Non-Batch Embedding Concurrency",
|
||||
"agents.defaults.memorySearch.remote.batch.enabled": "Remote Batch Embedding Enabled",
|
||||
"agents.defaults.memorySearch.remote.batch.wait": "Remote Batch Wait for Completion",
|
||||
"agents.defaults.memorySearch.remote.batch.concurrency": "Remote Batch Concurrency",
|
||||
|
||||
@@ -365,6 +365,8 @@ export type MemorySearchConfig = {
|
||||
baseUrl?: string;
|
||||
apiKey?: SecretInput;
|
||||
headers?: Record<string, string>;
|
||||
/** Max concurrent non-batch embedding tasks during indexing. Useful for slower local providers such as Ollama. */
|
||||
nonBatchConcurrency?: number;
|
||||
batch?: {
|
||||
/** Enable batch API for embedding indexing (OpenAI/Gemini; default: true). */
|
||||
enabled?: boolean;
|
||||
|
||||
@@ -665,6 +665,7 @@ export const MemorySearchSchema = z
|
||||
baseUrl: z.string().optional(),
|
||||
apiKey: SecretInputSchema.optional().register(sensitive),
|
||||
headers: z.record(z.string(), z.string()).optional(),
|
||||
nonBatchConcurrency: z.number().int().positive().optional(),
|
||||
batch: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
|
||||
@@ -27,7 +27,7 @@ export function createRuntimeEnv(options?: RuntimeEnvOptions): OutputRuntimeEnv
|
||||
export function createTypedRuntimeEnv<TRuntime>(
|
||||
options?: TypedRuntimeEnvOptions<TRuntime>,
|
||||
): TRuntime {
|
||||
return createRuntimeEnv(options) as TRuntime;
|
||||
return createRuntimeEnv(options) as unknown as TRuntime;
|
||||
}
|
||||
|
||||
export function createNonExitingRuntimeEnv(): OutputRuntimeEnv {
|
||||
|
||||
Reference in New Issue
Block a user