diff --git a/CHANGELOG.md b/CHANGELOG.md index 638ef5f17d9..36aa419d861 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Agents/tools: scope tool-loop detection history to the active run when available, so scheduled heartbeat cycles no longer inherit stale repeated-call counts from previous runs. Fixes #40144. Thanks @mattbrown319. - Agents/reasoning: recover fully wrapped unclosed `` replies that would otherwise sanitize to empty text while keeping strict stripping for closed reasoning blocks and unclosed tails after visible text. Fixes #37696; supersedes #51915. Thanks @druide67 and @okuyam2y. - Control UI/Gateway: bind WebChat handshakes to their active socket and reject post-close server registrations, so aborted connects no longer leave zombie clients or misleading duplicate WebSocket connection logs. Fixes #72753. Thanks @LumenFromTheFuture. - Plugins/Windows: normalize Windows absolute paths before handing bundled plugin modules to Jiti, so Feishu/Lark message sending no longer fails with unsupported `c:` ESM loader URLs. Fixes #72783. Thanks @jackychen-png. diff --git a/docs/tools/loop-detection.md b/docs/tools/loop-detection.md index b8ee0a7c105..7e3a88726a4 100644 --- a/docs/tools/loop-detection.md +++ b/docs/tools/loop-detection.md @@ -74,6 +74,7 @@ Per-agent override (optional): - `detectors.pingPong`: detects alternating ping-pong patterns. For `exec`, no-progress checks compare stable command outcomes and ignore volatile runtime metadata such as duration, PID, session ID, and working directory. +When a run id is available, recent tool-call history is evaluated only within that run so scheduled heartbeat cycles and fresh runs do not inherit stale loop counts from earlier runs. ## Recommended setup diff --git a/src/agents/pi-tools.before-tool-call.e2e.test.ts b/src/agents/pi-tools.before-tool-call.e2e.test.ts index 9b95e23998a..b9fc5d3659d 100644 --- a/src/agents/pi-tools.before-tool-call.e2e.test.ts +++ b/src/agents/pi-tools.before-tool-call.e2e.test.ts @@ -245,6 +245,32 @@ describe("before_tool_call loop detection behavior", () => { ).rejects.toThrow("global circuit breaker"); }); + it("does not carry loop history across run ids", async () => { + const execute = vi.fn().mockResolvedValue({ + content: [{ type: "text", text: "same output" }], + details: { ok: true }, + }); + const params = { path: "/tmp/file" }; + const firstRunTool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { + ...enabledLoopDetectionContext, + runId: "heartbeat-1", + }); + const secondRunTool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, { + ...enabledLoopDetectionContext, + runId: "heartbeat-2", + }); + + for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) { + await expect( + firstRunTool.execute(`old-run-${i}`, params, undefined, undefined), + ).resolves.toBeDefined(); + } + + await expect( + secondRunTool.execute("new-run-0", params, undefined, undefined), + ).resolves.toBeDefined(); + }); + it("coalesces repeated generic warning events into threshold buckets", async () => { await withToolLoopEvents( async (emitted) => { diff --git a/src/agents/pi-tools.before-tool-call.ts b/src/agents/pi-tools.before-tool-call.ts index 6d3c8f3213c..37faa2c2e5a 100644 --- a/src/agents/pi-tools.before-tool-call.ts +++ b/src/agents/pi-tools.before-tool-call.ts @@ -166,6 +166,7 @@ async function recordLoopOutcome(args: { result: args.result, error: args.error, config: args.ctx.loopDetection, + ...(args.ctx.runId && { runId: args.ctx.runId }), }); } catch (err) { log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`); @@ -190,7 +191,14 @@ export async function runBeforeToolCallHook(args: { sessionId: args.ctx?.agentId, }); - const loopResult = detectToolCallLoop(sessionState, toolName, params, args.ctx.loopDetection); + const loopScope = args.ctx.runId ? { runId: args.ctx.runId } : undefined; + const loopResult = detectToolCallLoop( + sessionState, + toolName, + params, + args.ctx.loopDetection, + loopScope, + ); if (loopResult.stuck) { if (loopResult.level === "critical") { @@ -211,7 +219,8 @@ export async function runBeforeToolCallHook(args: { reason: loopResult.message, }; } - const warningKey = loopResult.warningKey ?? `${loopResult.detector}:${toolName}`; + const baseWarningKey = loopResult.warningKey ?? `${loopResult.detector}:${toolName}`; + const warningKey = args.ctx.runId ? `${args.ctx.runId}:${baseWarningKey}` : baseWarningKey; if (shouldEmitLoopWarning(sessionState, warningKey, loopResult.count)) { log.warn(`Loop warning for ${toolName}: ${loopResult.message}`); logToolLoopAction({ @@ -228,7 +237,14 @@ export async function runBeforeToolCallHook(args: { } } - recordToolCall(sessionState, toolName, params, args.toolCallId, args.ctx.loopDetection); + recordToolCall( + sessionState, + toolName, + params, + args.toolCallId, + args.ctx.loopDetection, + loopScope, + ); } const hookRunner = getGlobalHookRunner(); diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index e6dc3d1018e..9828441ff32 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -254,6 +254,16 @@ describe("tool-loop-detection", () => { expect(timestamp).toBeLessThanOrEqual(after); }); + it("records run id when provided", () => { + const state = createState(); + + recordToolCall(state, "tool", { arg: 1 }, "call-run", enabledLoopDetectionConfig, { + runId: "run-1", + }); + + expect(state.toolCallHistory?.[0]?.runId).toBe("run-1"); + }); + it("respects configured historySize", () => { const state = createState(); @@ -294,6 +304,59 @@ describe("tool-loop-detection", () => { expect(result.stuck).toBe(false); }); + it("ignores repeated history from other runs", () => { + const state = createState(); + const params = { path: "/same.txt" }; + + for (let i = 0; i < WARNING_THRESHOLD; i += 1) { + recordToolCall(state, "read", params, `old-run-${i}`, enabledLoopDetectionConfig, { + runId: "heartbeat-1", + }); + } + + const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, { + runId: "heartbeat-2", + }); + + expect(result.stuck).toBe(false); + }); + + it("detects repeated history within the same run", () => { + const state = createState(); + const params = { path: "/same.txt" }; + + for (let i = 0; i < WARNING_THRESHOLD; i += 1) { + recordToolCall(state, "read", params, `same-run-${i}`, enabledLoopDetectionConfig, { + runId: "run-1", + }); + } + + const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, { + runId: "run-1", + }); + + expect(result.stuck).toBe(true); + if (result.stuck) { + expect(result.detector).toBe("generic_repeat"); + expect(result.count).toBe(WARNING_THRESHOLD); + } + }); + + it("keeps scoped and unscoped history isolated", () => { + const state = createState(); + const params = { path: "/same.txt" }; + + for (let i = 0; i < WARNING_THRESHOLD; i += 1) { + recordToolCall(state, "read", params, `scoped-${i}`, enabledLoopDetectionConfig, { + runId: "run-1", + }); + } + + const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig); + + expect(result.stuck).toBe(false); + }); + it("warns on generic repeated tool+args calls", () => { const state = createState(); for (let i = 0; i < WARNING_THRESHOLD; i += 1) { @@ -748,6 +811,28 @@ describe("tool-loop-detection", () => { expect(entry?.resultHash?.length).toBe(64); }); + it("does not attach outcomes to matching calls from other runs", () => { + const state = createState(); + const params = { path: "/same.txt" }; + recordToolCall(state, "read", params, "call-1", enabledLoopDetectionConfig, { + runId: "run-1", + }); + + recordToolCallOutcome(state, { + toolName: "read", + toolParams: params, + toolCallId: "call-1", + result: { content: [{ type: "text", text: "same output" }] }, + config: enabledLoopDetectionConfig, + runId: "run-2", + }); + + expect(state.toolCallHistory).toHaveLength(2); + expect(state.toolCallHistory?.[0]?.resultHash).toBeUndefined(); + expect(state.toolCallHistory?.[1]?.runId).toBe("run-2"); + expect(state.toolCallHistory?.[1]?.resultHash).toBeTypeOf("string"); + }); + it("handles empty history", () => { const state = createState(); diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index f692676bc37..972f5bb8d43 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -1,6 +1,6 @@ import { createHash } from "node:crypto"; import type { ToolLoopDetectionConfig } from "../config/types.tools.js"; -import type { SessionState } from "../logging/diagnostic-session-state.js"; +import type { SessionState, ToolCallRecord } from "../logging/diagnostic-session-state.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { isPlainObject } from "../utils.js"; @@ -58,6 +58,23 @@ type ResolvedLoopDetectionConfig = { }; }; +export type ToolLoopDetectionScope = { + runId?: string; +}; + +function normalizeRunId(runId?: string): string | undefined { + const trimmed = runId?.trim(); + return trimmed ? trimmed : undefined; +} + +function selectHistoryForScope( + history: readonly ToolCallRecord[], + scope?: ToolLoopDetectionScope, +): ToolCallRecord[] { + const runId = normalizeRunId(scope?.runId); + return history.filter((record) => normalizeRunId(record.runId) === runId); +} + function asPositiveInt(value: number | undefined, fallback: number): number { if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) { return fallback; @@ -483,12 +500,13 @@ export function detectToolCallLoop( toolName: string, params: unknown, config?: ToolLoopDetectionConfig, + scope?: ToolLoopDetectionScope, ): LoopDetectionResult { const resolvedConfig = resolveLoopDetectionConfig(config); if (!resolvedConfig.enabled) { return { stuck: false }; } - const history = state.toolCallHistory ?? []; + const history = selectHistoryForScope(state.toolCallHistory ?? [], scope); const currentHash = hashToolCall(toolName, params); const unknownToolStreak = getUnknownToolRepeatStreak(history, toolName); const noProgress = getNoProgressStreak(history, toolName, currentHash); @@ -625,8 +643,10 @@ export function recordToolCall( params: unknown, toolCallId?: string, config?: ToolLoopDetectionConfig, + scope?: ToolLoopDetectionScope, ): void { const resolvedConfig = resolveLoopDetectionConfig(config); + const runId = normalizeRunId(scope?.runId); if (!state.toolCallHistory) { state.toolCallHistory = []; } @@ -635,6 +655,7 @@ export function recordToolCall( toolName, argsHash: hashToolCall(toolName, params), toolCallId, + ...(runId && { runId }), timestamp: Date.now(), }); @@ -655,9 +676,11 @@ export function recordToolCallOutcome( result?: unknown; error?: unknown; config?: ToolLoopDetectionConfig; + runId?: string; }, ): void { const resolvedConfig = resolveLoopDetectionConfig(params.config); + const runId = normalizeRunId(params.runId); const outcome = hashToolOutcome(params.toolName, params.toolParams, params.result, params.error); const resultHash = outcome.resultHash; if (!resultHash) { @@ -675,6 +698,9 @@ export function recordToolCallOutcome( if (!call) { continue; } + if (normalizeRunId(call.runId) !== runId) { + continue; + } if (params.toolCallId && call.toolCallId !== params.toolCallId) { continue; } @@ -695,6 +721,7 @@ export function recordToolCallOutcome( toolName: params.toolName, argsHash, toolCallId: params.toolCallId, + ...(runId && { runId }), resultHash, unknownToolName: outcome.unknownToolName, timestamp: Date.now(), diff --git a/src/logging/diagnostic-session-state.ts b/src/logging/diagnostic-session-state.ts index 7c647ca17a1..965aa9f1f8d 100644 --- a/src/logging/diagnostic-session-state.ts +++ b/src/logging/diagnostic-session-state.ts @@ -15,6 +15,7 @@ export type ToolCallRecord = { toolName: string; argsHash: string; toolCallId?: string; + runId?: string; resultHash?: string; unknownToolName?: string; timestamp: number;