From e9bce3f81c3ab8abd897aab4793c713379147fe4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 27 Apr 2026 12:33:37 +0100 Subject: [PATCH] fix(agents): stabilize exec loop outcome hashing --- CHANGELOG.md | 1 + docs/tools/loop-detection.md | 2 + src/agents/tool-loop-detection.test.ts | 98 ++++++++++++++++++++++++++ src/agents/tool-loop-detection.ts | 45 ++++++++++++ 4 files changed, 146 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 645a700eaf9..4621e45b14d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai ### Fixes - CLI/doctor: run bundled plugin runtime-dependency repairs through the async npm installer with spinner/line progress and heartbeat updates, so long `openclaw doctor --fix` installs no longer look hung in TTY or piped output. Fixes #72775. Thanks @dfpalhano. +- Agents/tools: ignore volatile `exec` runtime metadata when comparing tool-loop outcomes, so enabled loop detection can stop repeated identical shell-command results instead of resetting on duration, PID, session, or cwd changes. Fixes #34574; supersedes #41502. Thanks @gucasbrg and @Zcg2021. - Agents/fallback: classify internal live-session model switch conflicts as unknown fallback failures instead of provider overloads, preventing local vLLM endpoints from receiving misleading overloaded cooldowns. Refs #63229. Thanks @clawdia-lobster. - Control UI: keep session-specific assistant identity loads authoritative after WebSocket connect, so non-main agent chat sessions do not show the main agent name in the header after bootstrap refreshes. Fixes #72776. Thanks @rockytian-top. - Agents/Qwen: preserve exact custom `modelstudio` provider configs with foreign `api` owners so explicit OpenAI-compatible Model Studio endpoints no longer get normalized into the bundled Qwen plugin path. Fixes #64483. Thanks @FiredMosquito831. diff --git a/docs/tools/loop-detection.md b/docs/tools/loop-detection.md index 18617c04826..b8ee0a7c105 100644 --- a/docs/tools/loop-detection.md +++ b/docs/tools/loop-detection.md @@ -73,6 +73,8 @@ Per-agent override (optional): - `detectors.knownPollNoProgress`: detects known polling-like patterns with no state change. - `detectors.pingPong`: detects alternating ping-pong patterns. +For `exec`, no-progress checks compare stable command outcomes and ignore volatile runtime metadata such as duration, PID, session ID, and working directory. + ## Recommended setup - Start with `enabled: true`, defaults unchanged. diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index 0608e4b5752..03f2385ae69 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -462,6 +462,104 @@ describe("tool-loop-detection", () => { } }); + it("blocks repeated completed exec calls despite volatile runtime details", () => { + const state = createState(); + const params = { command: "grafana-api.sh datasources" }; + + for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) { + recordSuccessfulCall( + state, + "exec", + params, + { + content: [{ type: "text", text: "Loki\nPrometheus" }], + details: { + status: "completed", + exitCode: 0, + durationMs: 100 + index, + cwd: `/tmp/run-${index}`, + aggregated: "Loki\nPrometheus", + }, + }, + index, + ); + } + + const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("critical"); + expect(loopResult.detector).toBe("global_circuit_breaker"); + } + }); + + it("blocks repeated running exec calls despite volatile session details and text", () => { + const state = createState(); + const params = { command: "tail -f /var/log/app.log", yieldMs: 1000 }; + + for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) { + recordSuccessfulCall( + state, + "exec", + params, + { + content: [ + { + type: "text", + text: `Command still running (session sess-${index}, pid ${1000 + index})`, + }, + ], + details: { + status: "running", + sessionId: `sess-${index}`, + pid: 1000 + index, + startedAt: Date.now() + index, + cwd: `/tmp/run-${index}`, + tail: "(no new output)", + }, + }, + index, + ); + } + + const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("critical"); + expect(loopResult.detector).toBe("global_circuit_breaker"); + } + }); + + it("keeps changing exec output below the global no-progress breaker", () => { + const state = createState(); + const params = { command: "date" }; + + for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) { + recordSuccessfulCall( + state, + "exec", + params, + { + content: [{ type: "text", text: `tick ${index}` }], + details: { + status: "completed", + exitCode: 0, + durationMs: 100 + index, + aggregated: `tick ${index}`, + }, + }, + index, + ); + } + + const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig); + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.level).toBe("warning"); + expect(loopResult.detector).toBe("generic_repeat"); + } + }); + it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => { const state = createState(); const toolName = "exec"; diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index a239ff6bb62..81b239b27c5 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -202,6 +202,45 @@ function extractUnknownToolName(error: unknown): string | undefined { return toolName ? toolName.toLowerCase() : undefined; } +function stringField(value: unknown): string | null { + return typeof value === "string" ? value : null; +} + +function hashExecToolOutcome(details: Record, text: string): string | undefined { + const status = stringField(details.status); + if (!status) { + return undefined; + } + + if (status === "running") { + return digestStable({ + status, + tail: stringField(details.tail) ?? "", + }); + } + + if (status === "completed" || status === "failed") { + return digestStable({ + status, + exitCode: typeof details.exitCode === "number" ? details.exitCode : null, + timedOut: details.timedOut === true, + output: stringField(details.aggregated) ?? text, + }); + } + + if (status === "approval-pending" || status === "approval-unavailable") { + return digestStable({ + status, + reason: stringField(details.reason), + host: stringField(details.host), + command: stringField(details.command) ?? "", + warningText: stringField(details.warningText) ?? "", + }); + } + + return undefined; +} + function hashToolOutcome( toolName: string, params: unknown, @@ -221,6 +260,12 @@ function hashToolOutcome( const details = isPlainObject(result.details) ? result.details : {}; const text = extractTextContent(result); + if (toolName === "exec") { + const execHash = hashExecToolOutcome(details, text); + if (execHash) { + return { resultHash: execHash }; + } + } if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) { const action = params.action; if (action === "poll") {