feat(agents): make run loop retry limits configurable in openclaw.json (#80661)

Merged via squash. Prepared head SHA: d595d51a5a Co-authored-by: medns <1575008+medns@users.noreply.github.com> Co-authored-by: odysseus0 <8635094+odysseus0@users.noreply.github.com> Reviewed-by: @odysseus0
2026-05-18 19:44:44 +00:00 · 2026-05-12 19:43:12 +08:00
parent 8a051395b7
commit a92d5fe77d
13 changed files with 183 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -95,6 +95,7 @@ Docs: https://docs.openclaw.ai
 - Exec approvals: add `tools.exec.commandHighlighting` so parser-derived command highlighting in approval prompts can be enabled globally or per agent. (#79348) Thanks @jesse-merhi.
 - Codex app-server: mirror native Codex subagent spawn lifecycle events into Task Registry so app-server child agents appear in task/status surfaces without relying on transcript text. (#79512) Thanks @mbelinky.
 - Gateway: expose optional `isHeartbeat` metadata on agent event payloads so clients can distinguish scheduled heartbeat runs from ordinary chat runs. (#80610) Thanks @medns.
+- Agents: add `agents.defaults.runRetries` and `agents.list[].runRetries` config for embedded Pi runner retry loop limits. (#80661) Thanks @medns.

 ### Fixes

--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -616,6 +616,36 @@ Periodic heartbeat runs.
 - `notifyUser`: when `true`, sends brief notices to the user when compaction starts and when it completes (for example, "Compacting context..." and "Compaction complete"). Disabled by default to keep compaction silent.
 - `memoryFlush`: silent agentic turn before auto-compaction to store durable memories. Set `model` to an exact provider/model such as `ollama/qwen3:8b` when this housekeeping turn should stay on a local model; the override does not inherit the active session fallback chain. Skipped when workspace is read-only.

+### `agents.defaults.runRetries`
+
+Outer run loop retry iteration boundaries for the embedded Pi runner to prevent infinite execution loops during failure recovery. Note that this setting currently only applies to the embedded agent runtime, not ACP or CLI runtimes.
+
+```json5
+{
+  agents: {
+    defaults: {
+      runRetries: {
+        base: 24,
+        perProfile: 8,
+        min: 32,
+        max: 160,
+      },
+    },
+    list: [
+      {
+        id: "main",
+        runRetries: { max: 50 }, // optional per-agent overrides
+      },
+    ],
+  },
+}
+```
+
+- `base`: base number of run retry iterations for the outer run loop. Default: `24`.
+- `perProfile`: additional run retry iterations granted per fallback profile candidate. Default: `8`.
+- `min`: minimum absolute limit for run retry iterations. Default: `32`.
+- `max`: maximum absolute limit for run retry iterations to prevent runaway execution. Default: `160`.
+
 ### `agents.defaults.contextPruning`

 Prunes **old tool results** from in-memory context before sending to the LLM. Does **not** modify session history on disk.
--- a/src/agents/agent-scope-config.ts
+++ b/src/agents/agent-scope-config.ts
@@ -31,6 +31,7 @@ export type ResolvedAgentConfig = {
  identity?: AgentEntry["identity"];
  groupChat?: AgentEntry["groupChat"];
  subagents?: AgentEntry["subagents"];
+  runRetries?: AgentEntry["runRetries"];
  embeddedPi?: AgentEntry["embeddedPi"];
  sandbox?: AgentEntry["sandbox"];
  tools?: AgentEntry["tools"];
@@ -133,6 +134,10 @@ export function resolveAgentConfig(
    identity: entry.identity,
    groupChat: entry.groupChat,
    subagents: typeof entry.subagents === "object" && entry.subagents ? entry.subagents : undefined,
+    runRetries:
+      typeof entry.runRetries === "object" && entry.runRetries
+        ? { ...agentDefaults?.runRetries, ...entry.runRetries }
+        : agentDefaults?.runRetries,
    embeddedPi:
      typeof entry.embeddedPi === "object" && entry.embeddedPi ? entry.embeddedPi : undefined,
    sandbox: entry.sandbox,
--- a/src/agents/agent-scope.test.ts
+++ b/src/agents/agent-scope.test.ts
@@ -120,6 +120,36 @@ describe("resolveAgentConfig", () => {
    });
  });

+  it("merges runRetries from defaults with per-agent overrides", () => {
+    const cfg: OpenClawConfig = {
+      agents: {
+        defaults: {
+          runRetries: {
+            base: 24,
+            perProfile: 8,
+            min: 32,
+            max: 160,
+          },
+        },
+        list: [
+          {
+            id: "main",
+            runRetries: {
+              max: 50,
+            },
+          },
+        ],
+      },
+    };
+
+    expect(resolveAgentConfig(cfg, "main")?.runRetries).toEqual({
+      base: 24,
+      perProfile: 8,
+      min: 32,
+      max: 50,
+    });
+  });
+
  it("resolves explicit and effective model primary separately", () => {
    const cfgWithStringDefault = {
      agents: {
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -894,7 +894,11 @@ export async function runEmbeddedPiAgent(

      const MAX_TIMEOUT_COMPACTION_ATTEMPTS = 2;
      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
-      const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
+      const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(
+        profileCandidates.length,
+        params.config,
+        sessionAgentId,
+      );
      let overflowCompactionAttempts = 0;
      let toolResultTruncationAttempted = false;
      let bootstrapPromptWarningSignaturesSeen =
--- a/src/agents/pi-embedded-runner/run/helpers.ts
+++ b/src/agents/pi-embedded-runner/run/helpers.ts
@@ -2,6 +2,7 @@ import type { AssistantMessage } from "@earendil-works/pi-ai";
 import type { OpenClawConfig } from "../../../config/types.openclaw.js";
 import { generateSecureToken } from "../../../infra/secure-random.js";
 import { extractAssistantTextForPhase } from "../../../shared/chat-message-content.js";
+import { resolveAgentConfig } from "../../agent-scope-config.js";
 import { extractAssistantVisibleText } from "../../pi-embedded-utils.js";
 import { derivePromptTokens, normalizeUsage } from "../../usage.js";
 import type { EmbeddedPiAgentMeta } from "../types.js";
@@ -71,11 +72,22 @@ const MIN_RUN_RETRY_ITERATIONS = 32;
 const MAX_RUN_RETRY_ITERATIONS = 160;

 // Defensive guard for the outer run loop across all retry branches.
-export function resolveMaxRunRetryIterations(profileCandidateCount: number): number {
-  const scaled =
-    BASE_RUN_RETRY_ITERATIONS +
-    Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE;
-  return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
+export function resolveMaxRunRetryIterations(
+  profileCandidateCount: number,
+  cfg?: OpenClawConfig,
+  agentId?: string,
+): number {
+  const configRetries =
+    (cfg && agentId ? resolveAgentConfig(cfg, agentId)?.runRetries : undefined) ??
+    cfg?.agents?.defaults?.runRetries;
+
+  const base = Math.max(1, configRetries?.base ?? BASE_RUN_RETRY_ITERATIONS);
+  const perProfile = Math.max(0, configRetries?.perProfile ?? RUN_RETRY_ITERATIONS_PER_PROFILE);
+  const minLimit = Math.max(1, configRetries?.min ?? MIN_RUN_RETRY_ITERATIONS);
+  const maxLimit = Math.max(minLimit, configRetries?.max ?? MAX_RUN_RETRY_ITERATIONS);
+
+  const scaled = base + Math.max(1, profileCandidateCount) * perProfile;
+  return Math.min(maxLimit, Math.max(minLimit, scaled));
 }

 export function resolveActiveErrorContext(params: {
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -1436,6 +1436,23 @@ export const FIELD_HELP: Record<string, string> = {
    "User-prompt template used for the pre-compaction memory flush turn when generating memory candidates. Use this only when you need custom extraction instructions beyond the default memory flush behavior.",
  "agents.defaults.compaction.memoryFlush.systemPrompt":
    "System-prompt override for the pre-compaction memory flush turn to control extraction style and safety constraints. Use carefully so custom instructions do not reduce memory quality or leak sensitive context.",
+  "agents.defaults.runRetries":
+    "Outer run loop retry iteration boundaries for the embedded Pi runner to prevent infinite execution loops during failure recovery.",
+  "agents.defaults.runRetries.base":
+    "Base number of run retry iterations for the embedded Pi runner's outer run loop (default: 24).",
+  "agents.defaults.runRetries.perProfile":
+    "Additional run retry iterations granted per fallback profile candidate (default: 8).",
+  "agents.defaults.runRetries.min":
+    "Minimum absolute limit for run retry iterations (default: 32).",
+  "agents.defaults.runRetries.max":
+    "Maximum absolute limit for run retry iterations to prevent runaway execution (default: 160).",
+  "agents.list[].runRetries":
+    "Optional per-agent override for the embedded Pi runner's outer run loop retry iteration boundaries.",
+  "agents.list[].runRetries.base": "Base number of run retry iterations for this agent.",
+  "agents.list[].runRetries.perProfile":
+    "Additional run retry iterations granted per fallback profile candidate for this agent.",
+  "agents.list[].runRetries.min": "Minimum absolute limit for run retry iterations for this agent.",
+  "agents.list[].runRetries.max": "Maximum absolute limit for run retry iterations for this agent.",
  "agents.defaults.embeddedPi":
    "Embedded Pi runner hardening controls for how workspace-local Pi settings are trusted and applied in OpenClaw sessions.",
  "agents.defaults.embeddedPi.projectSettingsPolicy":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -664,6 +664,16 @@ export const FIELD_LABELS: Record<string, string> = {
    "Compaction Memory Flush Transcript Size Threshold",
  "agents.defaults.compaction.memoryFlush.prompt": "Compaction Memory Flush Prompt",
  "agents.defaults.compaction.memoryFlush.systemPrompt": "Compaction Memory Flush System Prompt",
+  "agents.defaults.runRetries": "Run Retries",
+  "agents.defaults.runRetries.base": "Run Retries Base",
+  "agents.defaults.runRetries.perProfile": "Run Retries Per Profile",
+  "agents.defaults.runRetries.min": "Run Retries Minimum",
+  "agents.defaults.runRetries.max": "Run Retries Maximum",
+  "agents.list[].runRetries": "Agent Run Retries",
+  "agents.list[].runRetries.base": "Agent Run Retries Base",
+  "agents.list[].runRetries.perProfile": "Agent Run Retries Per Profile",
+  "agents.list[].runRetries.min": "Agent Run Retries Minimum",
+  "agents.list[].runRetries.max": "Agent Run Retries Maximum",
  "agents.defaults.embeddedPi": "Embedded Pi",
  "agents.defaults.embeddedPi.projectSettingsPolicy": "Embedded Pi Project Settings Policy",
  "agents.defaults.embeddedPi.executionContract": "Embedded Pi Execution Contract",
--- a/src/config/types.agent-defaults.ts
+++ b/src/config/types.agent-defaults.ts
@@ -95,6 +95,17 @@ export type AgentContextLimitsConfig = {
  postCompactionMaxChars?: number;
 };

+export type AgentRunRetriesConfig = {
+  /** Base number of run retry iterations (default: 24). */
+  base?: number;
+  /** Additional run retry iterations per fallback profile (default: 8). */
+  perProfile?: number;
+  /** Minimum limit for run retry iterations (default: 32). */
+  min?: number;
+  /** Maximum limit for run retry iterations (default: 160). */
+  max?: number;
+};
+
 export type CliBackendConfig = {
  /** CLI command to execute (absolute path or on PATH). */
  command: string;
@@ -299,6 +310,8 @@ export type AgentDefaultsConfig = {
  contextPruning?: AgentContextPruningConfig;
  /** Compaction tuning and pre-compaction memory flush behavior. */
  compaction?: AgentCompactionConfig;
+  /** Outer run loop retry iteration boundaries. */
+  runRetries?: AgentRunRetriesConfig;
  /** Embedded Pi runner hardening and compatibility controls. */
  embeddedPi?: {
    /**
--- a/src/config/types.agents.ts
+++ b/src/config/types.agents.ts
@@ -126,6 +126,8 @@ export type AgentConfig = {
    /** Require explicit agentId in sessions_spawn (no default same-as-caller). */
    requireAgentId?: boolean;
  };
+  /** Optional outer run loop retry boundaries. */
+  runRetries?: AgentDefaultsConfig["runRetries"];
  /** Optional per-agent embedded Pi overrides. */
  embeddedPi?: {
    /** Optional per-agent execution contract override. */
--- a/src/config/zod-schema.agent-defaults.test.ts
+++ b/src/config/zod-schema.agent-defaults.test.ts
@@ -167,6 +167,38 @@ describe("agent defaults schema", () => {
    expect(result.embeddedPi?.executionContract).toBe("strict-agentic");
  });

+  it("accepts runRetries configuration on defaults and agent entries", () => {
+    const result = AgentDefaultsSchema.parse({
+      runRetries: {
+        base: 24,
+        max: 160,
+      },
+    });
+    expect(result?.runRetries?.base).toBe(24);
+    expect(result?.runRetries?.max).toBe(160);
+
+    const agentResult = AgentEntrySchema.parse({
+      id: "test",
+      runRetries: {
+        min: 10,
+        max: 50,
+      },
+    });
+    expect(agentResult?.runRetries?.min).toBe(10);
+    expect(agentResult?.runRetries?.max).toBe(50);
+  });
+
+  it("rejects runRetries with max < min", () => {
+    expectSchemaFailurePath(
+      AgentDefaultsSchema.safeParse({ runRetries: { min: 100, max: 50 } }),
+      "runRetries.max",
+    );
+    expectSchemaFailurePath(
+      AgentEntrySchema.safeParse({ id: "test", runRetries: { min: 100, max: 50 } }),
+      "runRetries.max",
+    );
+  });
+
  it("accepts compaction.truncateAfterCompaction", () => {
    const result = AgentDefaultsSchema.parse({
      compaction: {
--- a/src/config/zod-schema.agent-defaults.ts
+++ b/src/config/zod-schema.agent-defaults.ts
@@ -8,6 +8,7 @@ import {
  AgentRuntimePolicySchema,
  AgentModelSchema,
  MemorySearchSchema,
+  AgentRunRetriesConfigSchema,
 } from "./zod-schema.agent-runtime.js";
 import {
  BlockStreamingChunkSchema,
@@ -218,6 +219,7 @@ export const AgentDefaultsSchema = z
      })
      .strict()
      .optional(),
+    runRetries: AgentRunRetriesConfigSchema.optional(),
    embeddedPi: z
      .object({
        projectSettingsPolicy: z
--- a/src/config/zod-schema.agent-runtime.ts
+++ b/src/config/zod-schema.agent-runtime.ts
@@ -19,6 +19,24 @@ import {
 } from "./zod-schema.core.js";
 import { sensitive } from "./zod-schema.sensitive.js";

+export const AgentRunRetriesConfigSchema = z
+  .object({
+    base: z.number().int().positive().optional(),
+    perProfile: z.number().int().nonnegative().optional(),
+    min: z.number().int().positive().optional(),
+    max: z.number().int().positive().optional(),
+  })
+  .strict()
+  .refine(
+    (data) => {
+      if (data.min !== undefined && data.max !== undefined) {
+        return data.max >= data.min;
+      }
+      return true;
+    },
+    { message: "max must be greater than or equal to min", path: ["max"] },
+  );
+
 export const HeartbeatSchema = z
  .object({
    every: z.string().optional(),
@@ -951,6 +969,7 @@ export const AgentEntrySchema = z
      })
      .strict()
      .optional(),
+    runRetries: AgentRunRetriesConfigSchema.optional(),
    embeddedPi: z
      .object({
        executionContract: z.union([z.literal("default"), z.literal("strict-agentic")]).optional(),