mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-16 04:20:44 +00:00
fix: gate ping-pong blocking on no-progress
This commit is contained in:
@@ -20,7 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
|
||||
- Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
|
||||
- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets, adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + no-progress-blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets (including canonical ping-pong pairs), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
|
||||
- Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
|
||||
- Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
|
||||
|
||||
@@ -182,8 +182,14 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
}
|
||||
|
||||
await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined);
|
||||
await readTool.execute("read-10", { path: "/a.txt" }, undefined, undefined);
|
||||
await listTool.execute("list-11", { dir: "/workspace" }, undefined, undefined);
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
const pingPongWarns = emitted.filter(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(pingPongWarns).toHaveLength(1);
|
||||
const loopEvent = pingPongWarns[0];
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("warning");
|
||||
expect(loopEvent?.action).toBe("warn");
|
||||
@@ -255,6 +261,67 @@ describe("before_tool_call loop detection behavior", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block ping-pong at critical threshold when outcomes are progressing", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const readExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `read ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}));
|
||||
const listExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `list ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}));
|
||||
const readTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "read", execute: readExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
const listTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "list", execute: listExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
|
||||
await expect(
|
||||
listTool.execute(
|
||||
`list-${CRITICAL_THRESHOLD - 1}`,
|
||||
{ dir: "/workspace" },
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
|
||||
const criticalPingPong = emitted.find(
|
||||
(evt) => evt.level === "critical" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(criticalPingPong).toBeUndefined();
|
||||
const warningPingPong = emitted.find(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(warningPingPong).toBeTruthy();
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("emits structured critical diagnostic events when blocking loops", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
|
||||
@@ -271,9 +271,21 @@ describe("tool-loop-detection", () => {
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordToolCall(state, "read", readParams, `read-${i}`);
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"read",
|
||||
readParams,
|
||||
{ content: [{ type: "text", text: "read stable" }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
} else {
|
||||
recordToolCall(state, "list", listParams, `list-${i}`);
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"list",
|
||||
listParams,
|
||||
{ content: [{ type: "text", text: "list stable" }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -288,6 +300,40 @@ describe("tool-loop-detection", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block ping-pong at critical threshold when outcomes are progressing", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
const listParams = { dir: "/workspace" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"read",
|
||||
readParams,
|
||||
{ content: [{ type: "text", text: `read ${i}` }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
} else {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"list",
|
||||
listParams,
|
||||
{ content: [{ type: "text", text: `list ${i}` }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", listParams);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("ping_pong");
|
||||
expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag ping-pong when alternation is broken", () => {
|
||||
const state = createState();
|
||||
recordToolCall(state, "read", { path: "/a.txt" }, "a1");
|
||||
|
||||
@@ -189,12 +189,17 @@ function getNoProgressStreak(
|
||||
}
|
||||
|
||||
function getPingPongStreak(
|
||||
history: Array<{ toolName: string; argsHash: string }>,
|
||||
history: Array<{ toolName: string; argsHash: string; resultHash?: string }>,
|
||||
currentSignature: string,
|
||||
): { count: number; pairedToolName?: string } {
|
||||
): {
|
||||
count: number;
|
||||
pairedToolName?: string;
|
||||
pairedSignature?: string;
|
||||
noProgressEvidence: boolean;
|
||||
} {
|
||||
const last = history.at(-1);
|
||||
if (!last) {
|
||||
return { count: 0 };
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
let otherSignature: string | undefined;
|
||||
@@ -212,7 +217,7 @@ function getPingPongStreak(
|
||||
}
|
||||
|
||||
if (!otherSignature || !otherToolName) {
|
||||
return { count: 0 };
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
let alternatingTailCount = 0;
|
||||
@@ -229,20 +234,66 @@ function getPingPongStreak(
|
||||
}
|
||||
|
||||
if (alternatingTailCount < 2) {
|
||||
return { count: 0 };
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
const expectedCurrentSignature = otherSignature;
|
||||
if (currentSignature !== expectedCurrentSignature) {
|
||||
return { count: 0 };
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
const tailStart = Math.max(0, history.length - alternatingTailCount);
|
||||
let firstHashA: string | undefined;
|
||||
let firstHashB: string | undefined;
|
||||
let noProgressEvidence = true;
|
||||
for (let i = tailStart; i < history.length; i += 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
if (!call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
if (call.argsHash === last.argsHash) {
|
||||
if (!firstHashA) {
|
||||
firstHashA = call.resultHash;
|
||||
} else if (firstHashA !== call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (call.argsHash === otherSignature) {
|
||||
if (!firstHashB) {
|
||||
firstHashB = call.resultHash;
|
||||
} else if (firstHashB !== call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Need repeated stable outcomes on both sides before treating ping-pong as no-progress.
|
||||
if (!firstHashA || !firstHashB) {
|
||||
noProgressEvidence = false;
|
||||
}
|
||||
|
||||
return {
|
||||
count: alternatingTailCount + 1,
|
||||
pairedToolName: last.toolName,
|
||||
pairedSignature: last.argsHash,
|
||||
noProgressEvidence,
|
||||
};
|
||||
}
|
||||
|
||||
function canonicalPairKey(signatureA: string, signatureB: string): string {
|
||||
return [signatureA, signatureB].toSorted().join("|");
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an agent is stuck in a repetitive tool call loop.
|
||||
* Checks if the same tool+params combination has been called excessively.
|
||||
@@ -297,7 +348,11 @@ export function detectToolCallLoop(
|
||||
};
|
||||
}
|
||||
|
||||
if (pingPong.count >= CRITICAL_THRESHOLD) {
|
||||
const pingPongWarningKey = pingPong.pairedSignature
|
||||
? `pingpong:${canonicalPairKey(currentHash, pingPong.pairedSignature)}`
|
||||
: `pingpong:${toolName}:${currentHash}`;
|
||||
|
||||
if (pingPong.count >= CRITICAL_THRESHOLD && pingPong.noProgressEvidence) {
|
||||
log.error(
|
||||
`Critical ping-pong loop detected: alternating calls count=${pingPong.count} currentTool=${toolName}`,
|
||||
);
|
||||
@@ -306,9 +361,9 @@ export function detectToolCallLoop(
|
||||
level: "critical",
|
||||
detector: "ping_pong",
|
||||
count: pingPong.count,
|
||||
message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`,
|
||||
message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls) with no progress. This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`,
|
||||
pairedToolName: pingPong.pairedToolName,
|
||||
warningKey: `pingpong:${toolName}:${currentHash}:${pingPong.pairedToolName ?? "unknown"}`,
|
||||
warningKey: pingPongWarningKey,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -323,7 +378,7 @@ export function detectToolCallLoop(
|
||||
count: pingPong.count,
|
||||
message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`,
|
||||
pairedToolName: pingPong.pairedToolName,
|
||||
warningKey: `pingpong:${toolName}:${currentHash}:${pingPong.pairedToolName ?? "unknown"}`,
|
||||
warningKey: pingPongWarningKey,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user