fix(agents): stabilize exec loop outcome hashing

2026-05-06 14:20:44 +00:00 · 2026-04-27 12:33:37 +01:00
parent 35335214b3
commit e9bce3f81c
4 changed files with 146 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes

 - CLI/doctor: run bundled plugin runtime-dependency repairs through the async npm installer with spinner/line progress and heartbeat updates, so long `openclaw doctor --fix` installs no longer look hung in TTY or piped output. Fixes #72775. Thanks @dfpalhano.
+- Agents/tools: ignore volatile `exec` runtime metadata when comparing tool-loop outcomes, so enabled loop detection can stop repeated identical shell-command results instead of resetting on duration, PID, session, or cwd changes. Fixes #34574; supersedes #41502. Thanks @gucasbrg and @Zcg2021.
 - Agents/fallback: classify internal live-session model switch conflicts as unknown fallback failures instead of provider overloads, preventing local vLLM endpoints from receiving misleading overloaded cooldowns. Refs #63229. Thanks @clawdia-lobster.
 - Control UI: keep session-specific assistant identity loads authoritative after WebSocket connect, so non-main agent chat sessions do not show the main agent name in the header after bootstrap refreshes. Fixes #72776. Thanks @rockytian-top.
 - Agents/Qwen: preserve exact custom `modelstudio` provider configs with foreign `api` owners so explicit OpenAI-compatible Model Studio endpoints no longer get normalized into the bundled Qwen plugin path. Fixes #64483. Thanks @FiredMosquito831.
--- a/docs/tools/loop-detection.md
+++ b/docs/tools/loop-detection.md
@@ -73,6 +73,8 @@ Per-agent override (optional):
 - `detectors.knownPollNoProgress`: detects known polling-like patterns with no state change.
 - `detectors.pingPong`: detects alternating ping-pong patterns.

+For `exec`, no-progress checks compare stable command outcomes and ignore volatile runtime metadata such as duration, PID, session ID, and working directory.
+
 ## Recommended setup

 - Start with `enabled: true`, defaults unchanged.
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@@ -462,6 +462,104 @@ describe("tool-loop-detection", () => {
      }
    });

+    it("blocks repeated completed exec calls despite volatile runtime details", () => {
+      const state = createState();
+      const params = { command: "grafana-api.sh datasources" };
+
+      for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
+        recordSuccessfulCall(
+          state,
+          "exec",
+          params,
+          {
+            content: [{ type: "text", text: "Loki\nPrometheus" }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 100 + index,
+              cwd: `/tmp/run-${index}`,
+              aggregated: "Loki\nPrometheus",
+            },
+          },
+          index,
+        );
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("global_circuit_breaker");
+      }
+    });
+
+    it("blocks repeated running exec calls despite volatile session details and text", () => {
+      const state = createState();
+      const params = { command: "tail -f /var/log/app.log", yieldMs: 1000 };
+
+      for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
+        recordSuccessfulCall(
+          state,
+          "exec",
+          params,
+          {
+            content: [
+              {
+                type: "text",
+                text: `Command still running (session sess-${index}, pid ${1000 + index})`,
+              },
+            ],
+            details: {
+              status: "running",
+              sessionId: `sess-${index}`,
+              pid: 1000 + index,
+              startedAt: Date.now() + index,
+              cwd: `/tmp/run-${index}`,
+              tail: "(no new output)",
+            },
+          },
+          index,
+        );
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("critical");
+        expect(loopResult.detector).toBe("global_circuit_breaker");
+      }
+    });
+
+    it("keeps changing exec output below the global no-progress breaker", () => {
+      const state = createState();
+      const params = { command: "date" };
+
+      for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
+        recordSuccessfulCall(
+          state,
+          "exec",
+          params,
+          {
+            content: [{ type: "text", text: `tick ${index}` }],
+            details: {
+              status: "completed",
+              exitCode: 0,
+              durationMs: 100 + index,
+              aggregated: `tick ${index}`,
+            },
+          },
+          index,
+        );
+      }
+
+      const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
+      expect(loopResult.stuck).toBe(true);
+      if (loopResult.stuck) {
+        expect(loopResult.level).toBe("warning");
+        expect(loopResult.detector).toBe("generic_repeat");
+      }
+    });
+
    it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => {
      const state = createState();
      const toolName = "exec";
--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@@ -202,6 +202,45 @@ function extractUnknownToolName(error: unknown): string | undefined {
  return toolName ? toolName.toLowerCase() : undefined;
 }

+function stringField(value: unknown): string | null {
+  return typeof value === "string" ? value : null;
+}
+
+function hashExecToolOutcome(details: Record<string, unknown>, text: string): string | undefined {
+  const status = stringField(details.status);
+  if (!status) {
+    return undefined;
+  }
+
+  if (status === "running") {
+    return digestStable({
+      status,
+      tail: stringField(details.tail) ?? "",
+    });
+  }
+
+  if (status === "completed" || status === "failed") {
+    return digestStable({
+      status,
+      exitCode: typeof details.exitCode === "number" ? details.exitCode : null,
+      timedOut: details.timedOut === true,
+      output: stringField(details.aggregated) ?? text,
+    });
+  }
+
+  if (status === "approval-pending" || status === "approval-unavailable") {
+    return digestStable({
+      status,
+      reason: stringField(details.reason),
+      host: stringField(details.host),
+      command: stringField(details.command) ?? "",
+      warningText: stringField(details.warningText) ?? "",
+    });
+  }
+
+  return undefined;
+}
+
 function hashToolOutcome(
  toolName: string,
  params: unknown,
@@ -221,6 +260,12 @@ function hashToolOutcome(

  const details = isPlainObject(result.details) ? result.details : {};
  const text = extractTextContent(result);
+  if (toolName === "exec") {
+    const execHash = hashExecToolOutcome(details, text);
+    if (execHash) {
+      return { resultHash: execHash };
+    }
+  }
  if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
    const action = params.action;
    if (action === "poll") {