fix(agents): scope loop detection to runs

2026-05-06 06:20:43 +00:00 · 2026-04-27 12:59:54 +01:00
parent d73e2ee774
commit 6a55a00da4
7 changed files with 162 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Agents/tools: scope tool-loop detection history to the active run when available, so scheduled heartbeat cycles no longer inherit stale repeated-call counts from previous runs. Fixes #40144. Thanks @mattbrown319.
 - Agents/reasoning: recover fully wrapped unclosed `<think>` replies that would otherwise sanitize to empty text while keeping strict stripping for closed reasoning blocks and unclosed tails after visible text. Fixes #37696; supersedes #51915. Thanks @druide67 and @okuyam2y.
 - Control UI/Gateway: bind WebChat handshakes to their active socket and reject post-close server registrations, so aborted connects no longer leave zombie clients or misleading duplicate WebSocket connection logs. Fixes #72753. Thanks @LumenFromTheFuture.
 - Plugins/Windows: normalize Windows absolute paths before handing bundled plugin modules to Jiti, so Feishu/Lark message sending no longer fails with unsupported `c:` ESM loader URLs. Fixes #72783. Thanks @jackychen-png.
--- a/docs/tools/loop-detection.md
+++ b/docs/tools/loop-detection.md
@@ -74,6 +74,7 @@ Per-agent override (optional):
 - `detectors.pingPong`: detects alternating ping-pong patterns.

 For `exec`, no-progress checks compare stable command outcomes and ignore volatile runtime metadata such as duration, PID, session ID, and working directory.
+When a run id is available, recent tool-call history is evaluated only within that run so scheduled heartbeat cycles and fresh runs do not inherit stale loop counts from earlier runs.

 ## Recommended setup

--- a/src/agents/pi-tools.before-tool-call.e2e.test.ts
+++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts
@@ -245,6 +245,32 @@ describe("before_tool_call loop detection behavior", () => {
    ).rejects.toThrow("global circuit breaker");
  });

+  it("does not carry loop history across run ids", async () => {
+    const execute = vi.fn().mockResolvedValue({
+      content: [{ type: "text", text: "same output" }],
+      details: { ok: true },
+    });
+    const params = { path: "/tmp/file" };
+    const firstRunTool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
+      ...enabledLoopDetectionContext,
+      runId: "heartbeat-1",
+    });
+    const secondRunTool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
+      ...enabledLoopDetectionContext,
+      runId: "heartbeat-2",
+    });
+
+    for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
+      await expect(
+        firstRunTool.execute(`old-run-${i}`, params, undefined, undefined),
+      ).resolves.toBeDefined();
+    }
+
+    await expect(
+      secondRunTool.execute("new-run-0", params, undefined, undefined),
+    ).resolves.toBeDefined();
+  });
+
  it("coalesces repeated generic warning events into threshold buckets", async () => {
    await withToolLoopEvents(
      async (emitted) => {
--- a/src/agents/pi-tools.before-tool-call.ts
+++ b/src/agents/pi-tools.before-tool-call.ts
@@ -166,6 +166,7 @@ async function recordLoopOutcome(args: {
      result: args.result,
      error: args.error,
      config: args.ctx.loopDetection,
+      ...(args.ctx.runId && { runId: args.ctx.runId }),
    });
  } catch (err) {
    log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`);
@@ -190,7 +191,14 @@ export async function runBeforeToolCallHook(args: {
      sessionId: args.ctx?.agentId,
    });

-    const loopResult = detectToolCallLoop(sessionState, toolName, params, args.ctx.loopDetection);
+    const loopScope = args.ctx.runId ? { runId: args.ctx.runId } : undefined;
+    const loopResult = detectToolCallLoop(
+      sessionState,
+      toolName,
+      params,
+      args.ctx.loopDetection,
+      loopScope,
+    );

    if (loopResult.stuck) {
      if (loopResult.level === "critical") {
@@ -211,7 +219,8 @@ export async function runBeforeToolCallHook(args: {
          reason: loopResult.message,
        };
      }
-      const warningKey = loopResult.warningKey ?? `${loopResult.detector}:${toolName}`;
+      const baseWarningKey = loopResult.warningKey ?? `${loopResult.detector}:${toolName}`;
+      const warningKey = args.ctx.runId ? `${args.ctx.runId}:${baseWarningKey}` : baseWarningKey;
      if (shouldEmitLoopWarning(sessionState, warningKey, loopResult.count)) {
        log.warn(`Loop warning for ${toolName}: ${loopResult.message}`);
        logToolLoopAction({
@@ -228,7 +237,14 @@ export async function runBeforeToolCallHook(args: {
      }
    }

-    recordToolCall(sessionState, toolName, params, args.toolCallId, args.ctx.loopDetection);
+    recordToolCall(
+      sessionState,
+      toolName,
+      params,
+      args.toolCallId,
+      args.ctx.loopDetection,
+      loopScope,
+    );
  }

  const hookRunner = getGlobalHookRunner();
--- a/src/agents/tool-loop-detection.test.ts
+++ b/src/agents/tool-loop-detection.test.ts
@@ -254,6 +254,16 @@ describe("tool-loop-detection", () => {
      expect(timestamp).toBeLessThanOrEqual(after);
    });

+    it("records run id when provided", () => {
+      const state = createState();
+
+      recordToolCall(state, "tool", { arg: 1 }, "call-run", enabledLoopDetectionConfig, {
+        runId: "run-1",
+      });
+
+      expect(state.toolCallHistory?.[0]?.runId).toBe("run-1");
+    });
+
    it("respects configured historySize", () => {
      const state = createState();

@@ -294,6 +304,59 @@ describe("tool-loop-detection", () => {
      expect(result.stuck).toBe(false);
    });

+    it("ignores repeated history from other runs", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        recordToolCall(state, "read", params, `old-run-${i}`, enabledLoopDetectionConfig, {
+          runId: "heartbeat-1",
+        });
+      }
+
+      const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, {
+        runId: "heartbeat-2",
+      });
+
+      expect(result.stuck).toBe(false);
+    });
+
+    it("detects repeated history within the same run", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        recordToolCall(state, "read", params, `same-run-${i}`, enabledLoopDetectionConfig, {
+          runId: "run-1",
+        });
+      }
+
+      const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, {
+        runId: "run-1",
+      });
+
+      expect(result.stuck).toBe(true);
+      if (result.stuck) {
+        expect(result.detector).toBe("generic_repeat");
+        expect(result.count).toBe(WARNING_THRESHOLD);
+      }
+    });
+
+    it("keeps scoped and unscoped history isolated", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+
+      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
+        recordToolCall(state, "read", params, `scoped-${i}`, enabledLoopDetectionConfig, {
+          runId: "run-1",
+        });
+      }
+
+      const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig);
+
+      expect(result.stuck).toBe(false);
+    });
+
    it("warns on generic repeated tool+args calls", () => {
      const state = createState();
      for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
@@ -748,6 +811,28 @@ describe("tool-loop-detection", () => {
      expect(entry?.resultHash?.length).toBe(64);
    });

+    it("does not attach outcomes to matching calls from other runs", () => {
+      const state = createState();
+      const params = { path: "/same.txt" };
+      recordToolCall(state, "read", params, "call-1", enabledLoopDetectionConfig, {
+        runId: "run-1",
+      });
+
+      recordToolCallOutcome(state, {
+        toolName: "read",
+        toolParams: params,
+        toolCallId: "call-1",
+        result: { content: [{ type: "text", text: "same output" }] },
+        config: enabledLoopDetectionConfig,
+        runId: "run-2",
+      });
+
+      expect(state.toolCallHistory).toHaveLength(2);
+      expect(state.toolCallHistory?.[0]?.resultHash).toBeUndefined();
+      expect(state.toolCallHistory?.[1]?.runId).toBe("run-2");
+      expect(state.toolCallHistory?.[1]?.resultHash).toBeTypeOf("string");
+    });
+
    it("handles empty history", () => {
      const state = createState();

--- a/src/agents/tool-loop-detection.ts
+++ b/src/agents/tool-loop-detection.ts
@@ -1,6 +1,6 @@
 import { createHash } from "node:crypto";
 import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
-import type { SessionState } from "../logging/diagnostic-session-state.js";
+import type { SessionState, ToolCallRecord } from "../logging/diagnostic-session-state.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import { isPlainObject } from "../utils.js";

@@ -58,6 +58,23 @@ type ResolvedLoopDetectionConfig = {
  };
 };

+export type ToolLoopDetectionScope = {
+  runId?: string;
+};
+
+function normalizeRunId(runId?: string): string | undefined {
+  const trimmed = runId?.trim();
+  return trimmed ? trimmed : undefined;
+}
+
+function selectHistoryForScope(
+  history: readonly ToolCallRecord[],
+  scope?: ToolLoopDetectionScope,
+): ToolCallRecord[] {
+  const runId = normalizeRunId(scope?.runId);
+  return history.filter((record) => normalizeRunId(record.runId) === runId);
+}
+
 function asPositiveInt(value: number | undefined, fallback: number): number {
  if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
    return fallback;
@@ -483,12 +500,13 @@ export function detectToolCallLoop(
  toolName: string,
  params: unknown,
  config?: ToolLoopDetectionConfig,
+  scope?: ToolLoopDetectionScope,
 ): LoopDetectionResult {
  const resolvedConfig = resolveLoopDetectionConfig(config);
  if (!resolvedConfig.enabled) {
    return { stuck: false };
  }
-  const history = state.toolCallHistory ?? [];
+  const history = selectHistoryForScope(state.toolCallHistory ?? [], scope);
  const currentHash = hashToolCall(toolName, params);
  const unknownToolStreak = getUnknownToolRepeatStreak(history, toolName);
  const noProgress = getNoProgressStreak(history, toolName, currentHash);
@@ -625,8 +643,10 @@ export function recordToolCall(
  params: unknown,
  toolCallId?: string,
  config?: ToolLoopDetectionConfig,
+  scope?: ToolLoopDetectionScope,
 ): void {
  const resolvedConfig = resolveLoopDetectionConfig(config);
+  const runId = normalizeRunId(scope?.runId);
  if (!state.toolCallHistory) {
    state.toolCallHistory = [];
  }
@@ -635,6 +655,7 @@ export function recordToolCall(
    toolName,
    argsHash: hashToolCall(toolName, params),
    toolCallId,
+    ...(runId && { runId }),
    timestamp: Date.now(),
  });

@@ -655,9 +676,11 @@ export function recordToolCallOutcome(
    result?: unknown;
    error?: unknown;
    config?: ToolLoopDetectionConfig;
+    runId?: string;
  },
 ): void {
  const resolvedConfig = resolveLoopDetectionConfig(params.config);
+  const runId = normalizeRunId(params.runId);
  const outcome = hashToolOutcome(params.toolName, params.toolParams, params.result, params.error);
  const resultHash = outcome.resultHash;
  if (!resultHash) {
@@ -675,6 +698,9 @@ export function recordToolCallOutcome(
    if (!call) {
      continue;
    }
+    if (normalizeRunId(call.runId) !== runId) {
+      continue;
+    }
    if (params.toolCallId && call.toolCallId !== params.toolCallId) {
      continue;
    }
@@ -695,6 +721,7 @@ export function recordToolCallOutcome(
      toolName: params.toolName,
      argsHash,
      toolCallId: params.toolCallId,
+      ...(runId && { runId }),
      resultHash,
      unknownToolName: outcome.unknownToolName,
      timestamp: Date.now(),
--- a/src/logging/diagnostic-session-state.ts
+++ b/src/logging/diagnostic-session-state.ts
@@ -15,6 +15,7 @@ export type ToolCallRecord = {
  toolName: string;
  argsHash: string;
  toolCallId?: string;
+  runId?: string;
  resultHash?: string;
  unknownToolName?: string;
  timestamp: number;