From eebabf679b983e5a660fb3cef371e1303f11f615 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Mon, 16 Feb 2026 14:46:25 -0500 Subject: [PATCH] fix: gate ping-pong blocking on no-progress --- CHANGELOG.md | 2 +- src/agents/pi-tools.before-tool-call.test.ts | 69 +++++++++++++++++- src/agents/tool-loop-detection.test.ts | 50 ++++++++++++- src/agents/tool-loop-detection.ts | 75 +++++++++++++++++--- 4 files changed, 182 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38713fb80be..c2539747ac8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai - Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky. - Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow. - Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost. -- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets, adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc. +- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + no-progress-blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets (including canonical ping-pong pairs), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc. - Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus. - Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT. - Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang. diff --git a/src/agents/pi-tools.before-tool-call.test.ts b/src/agents/pi-tools.before-tool-call.test.ts index 8d3cb51a691..34ae63ffc64 100644 --- a/src/agents/pi-tools.before-tool-call.test.ts +++ b/src/agents/pi-tools.before-tool-call.test.ts @@ -182,8 +182,14 @@ describe("before_tool_call loop detection behavior", () => { } await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined); + await readTool.execute("read-10", { path: "/a.txt" }, undefined, undefined); + await listTool.execute("list-11", { dir: "/workspace" }, undefined, undefined); - const loopEvent = emitted.at(-1); + const pingPongWarns = emitted.filter( + (evt) => evt.level === "warning" && evt.detector === "ping_pong", + ); + expect(pingPongWarns).toHaveLength(1); + const loopEvent = pingPongWarns[0]; expect(loopEvent?.type).toBe("tool.loop"); expect(loopEvent?.level).toBe("warning"); expect(loopEvent?.action).toBe("warn"); @@ -255,6 +261,67 @@ describe("before_tool_call loop detection behavior", () => { } }); + it("does not block ping-pong at critical threshold when outcomes are progressing", async () => { + const emitted: DiagnosticToolLoopEvent[] = []; + const stop = onDiagnosticEvent((evt) => { + if (evt.type === "tool.loop") { + emitted.push(evt); + } + }); + try { + const readExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({ + content: [{ type: "text", text: `read ${toolCallId}` }], + details: { ok: true }, + })); + const listExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({ + content: [{ type: "text", text: `list ${toolCallId}` }], + details: { ok: true }, + })); + const readTool = wrapToolWithBeforeToolCallHook( + { name: "read", execute: readExecute } as unknown as AnyAgentTool, + { + agentId: "main", + sessionKey: "main", + }, + ); + const listTool = wrapToolWithBeforeToolCallHook( + { name: "list", execute: listExecute } as unknown as AnyAgentTool, + { + agentId: "main", + sessionKey: "main", + }, + ); + + for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) { + if (i % 2 === 0) { + await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined); + } else { + await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined); + } + } + + await expect( + listTool.execute( + `list-${CRITICAL_THRESHOLD - 1}`, + { dir: "/workspace" }, + undefined, + undefined, + ), + ).resolves.toBeDefined(); + + const criticalPingPong = emitted.find( + (evt) => evt.level === "critical" && evt.detector === "ping_pong", + ); + expect(criticalPingPong).toBeUndefined(); + const warningPingPong = emitted.find( + (evt) => evt.level === "warning" && evt.detector === "ping_pong", + ); + expect(warningPingPong).toBeTruthy(); + } finally { + stop(); + } + }); + it("emits structured critical diagnostic events when blocking loops", async () => { const emitted: DiagnosticToolLoopEvent[] = []; const stop = onDiagnosticEvent((evt) => { diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index 3a98a8b968c..1955c6634f2 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -271,9 +271,21 @@ describe("tool-loop-detection", () => { for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) { if (i % 2 === 0) { - recordToolCall(state, "read", readParams, `read-${i}`); + recordSuccessfulCall( + state, + "read", + readParams, + { content: [{ type: "text", text: "read stable" }], details: { ok: true } }, + i, + ); } else { - recordToolCall(state, "list", listParams, `list-${i}`); + recordSuccessfulCall( + state, + "list", + listParams, + { content: [{ type: "text", text: "list stable" }], details: { ok: true } }, + i, + ); } } @@ -288,6 +300,40 @@ describe("tool-loop-detection", () => { } }); + it("does not block ping-pong at critical threshold when outcomes are progressing", () => { + const state = createState(); + const readParams = { path: "/a.txt" }; + const listParams = { dir: "/workspace" }; + + for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) { + if (i % 2 === 0) { + recordSuccessfulCall( + state, + "read", + readParams, + { content: [{ type: "text", text: `read ${i}` }], details: { ok: true } }, + i, + ); + } else { + recordSuccessfulCall( + state, + "list", + listParams, + { content: [{ type: "text", text: `list ${i}` }], details: { ok: true } }, + i, + ); + } + } + + const loopResult = detectToolCallLoop(state, "list", listParams); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + expect(loopResult.detector).toBe("ping_pong"); + expect(loopResult.count).toBe(CRITICAL_THRESHOLD); + } + }); + it("does not flag ping-pong when alternation is broken", () => { const state = createState(); recordToolCall(state, "read", { path: "/a.txt" }, "a1"); diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index 22d1bcd515a..b32ad4cb2f7 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -189,12 +189,17 @@ function getNoProgressStreak( } function getPingPongStreak( - history: Array<{ toolName: string; argsHash: string }>, + history: Array<{ toolName: string; argsHash: string; resultHash?: string }>, currentSignature: string, -): { count: number; pairedToolName?: string } { +): { + count: number; + pairedToolName?: string; + pairedSignature?: string; + noProgressEvidence: boolean; +} { const last = history.at(-1); if (!last) { - return { count: 0 }; + return { count: 0, noProgressEvidence: false }; } let otherSignature: string | undefined; @@ -212,7 +217,7 @@ function getPingPongStreak( } if (!otherSignature || !otherToolName) { - return { count: 0 }; + return { count: 0, noProgressEvidence: false }; } let alternatingTailCount = 0; @@ -229,20 +234,66 @@ function getPingPongStreak( } if (alternatingTailCount < 2) { - return { count: 0 }; + return { count: 0, noProgressEvidence: false }; } const expectedCurrentSignature = otherSignature; if (currentSignature !== expectedCurrentSignature) { - return { count: 0 }; + return { count: 0, noProgressEvidence: false }; + } + + const tailStart = Math.max(0, history.length - alternatingTailCount); + let firstHashA: string | undefined; + let firstHashB: string | undefined; + let noProgressEvidence = true; + for (let i = tailStart; i < history.length; i += 1) { + const call = history[i]; + if (!call) { + continue; + } + if (!call.resultHash) { + noProgressEvidence = false; + break; + } + if (call.argsHash === last.argsHash) { + if (!firstHashA) { + firstHashA = call.resultHash; + } else if (firstHashA !== call.resultHash) { + noProgressEvidence = false; + break; + } + continue; + } + if (call.argsHash === otherSignature) { + if (!firstHashB) { + firstHashB = call.resultHash; + } else if (firstHashB !== call.resultHash) { + noProgressEvidence = false; + break; + } + continue; + } + noProgressEvidence = false; + break; + } + + // Need repeated stable outcomes on both sides before treating ping-pong as no-progress. + if (!firstHashA || !firstHashB) { + noProgressEvidence = false; } return { count: alternatingTailCount + 1, pairedToolName: last.toolName, + pairedSignature: last.argsHash, + noProgressEvidence, }; } +function canonicalPairKey(signatureA: string, signatureB: string): string { + return [signatureA, signatureB].toSorted().join("|"); +} + /** * Detect if an agent is stuck in a repetitive tool call loop. * Checks if the same tool+params combination has been called excessively. @@ -297,7 +348,11 @@ export function detectToolCallLoop( }; } - if (pingPong.count >= CRITICAL_THRESHOLD) { + const pingPongWarningKey = pingPong.pairedSignature + ? `pingpong:${canonicalPairKey(currentHash, pingPong.pairedSignature)}` + : `pingpong:${toolName}:${currentHash}`; + + if (pingPong.count >= CRITICAL_THRESHOLD && pingPong.noProgressEvidence) { log.error( `Critical ping-pong loop detected: alternating calls count=${pingPong.count} currentTool=${toolName}`, ); @@ -306,9 +361,9 @@ export function detectToolCallLoop( level: "critical", detector: "ping_pong", count: pingPong.count, - message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`, + message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls) with no progress. This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`, pairedToolName: pingPong.pairedToolName, - warningKey: `pingpong:${toolName}:${currentHash}:${pingPong.pairedToolName ?? "unknown"}`, + warningKey: pingPongWarningKey, }; } @@ -323,7 +378,7 @@ export function detectToolCallLoop( count: pingPong.count, message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`, pairedToolName: pingPong.pairedToolName, - warningKey: `pingpong:${toolName}:${currentHash}:${pingPong.pairedToolName ?? "unknown"}`, + warningKey: pingPongWarningKey, }; }