fix(agents): stabilize exec loop outcome hashing

This commit is contained in:
Peter Steinberger
2026-04-27 12:33:37 +01:00
parent 35335214b3
commit e9bce3f81c
4 changed files with 146 additions and 0 deletions

View File

@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- CLI/doctor: run bundled plugin runtime-dependency repairs through the async npm installer with spinner/line progress and heartbeat updates, so long `openclaw doctor --fix` installs no longer look hung in TTY or piped output. Fixes #72775. Thanks @dfpalhano.
- Agents/tools: ignore volatile `exec` runtime metadata when comparing tool-loop outcomes, so enabled loop detection can stop repeated identical shell-command results instead of resetting on duration, PID, session, or cwd changes. Fixes #34574; supersedes #41502. Thanks @gucasbrg and @Zcg2021.
- Agents/fallback: classify internal live-session model switch conflicts as unknown fallback failures instead of provider overloads, preventing local vLLM endpoints from receiving misleading overloaded cooldowns. Refs #63229. Thanks @clawdia-lobster.
- Control UI: keep session-specific assistant identity loads authoritative after WebSocket connect, so non-main agent chat sessions do not show the main agent name in the header after bootstrap refreshes. Fixes #72776. Thanks @rockytian-top.
- Agents/Qwen: preserve exact custom `modelstudio` provider configs with foreign `api` owners so explicit OpenAI-compatible Model Studio endpoints no longer get normalized into the bundled Qwen plugin path. Fixes #64483. Thanks @FiredMosquito831.

View File

@@ -73,6 +73,8 @@ Per-agent override (optional):
- `detectors.knownPollNoProgress`: detects known polling-like patterns with no state change.
- `detectors.pingPong`: detects alternating ping-pong patterns.
For `exec`, no-progress checks compare stable command outcomes and ignore volatile runtime metadata such as duration, PID, session ID, and working directory.
## Recommended setup
- Start with `enabled: true`, defaults unchanged.

View File

@@ -462,6 +462,104 @@ describe("tool-loop-detection", () => {
}
});
it("blocks repeated completed exec calls despite volatile runtime details", () => {
const state = createState();
const params = { command: "grafana-api.sh datasources" };
for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
recordSuccessfulCall(
state,
"exec",
params,
{
content: [{ type: "text", text: "Loki\nPrometheus" }],
details: {
status: "completed",
exitCode: 0,
durationMs: 100 + index,
cwd: `/tmp/run-${index}`,
aggregated: "Loki\nPrometheus",
},
},
index,
);
}
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
expect(loopResult.detector).toBe("global_circuit_breaker");
}
});
it("blocks repeated running exec calls despite volatile session details and text", () => {
const state = createState();
const params = { command: "tail -f /var/log/app.log", yieldMs: 1000 };
for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
recordSuccessfulCall(
state,
"exec",
params,
{
content: [
{
type: "text",
text: `Command still running (session sess-${index}, pid ${1000 + index})`,
},
],
details: {
status: "running",
sessionId: `sess-${index}`,
pid: 1000 + index,
startedAt: Date.now() + index,
cwd: `/tmp/run-${index}`,
tail: "(no new output)",
},
},
index,
);
}
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
expect(loopResult.detector).toBe("global_circuit_breaker");
}
});
it("keeps changing exec output below the global no-progress breaker", () => {
const state = createState();
const params = { command: "date" };
for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
recordSuccessfulCall(
state,
"exec",
params,
{
content: [{ type: "text", text: `tick ${index}` }],
details: {
status: "completed",
exitCode: 0,
durationMs: 100 + index,
aggregated: `tick ${index}`,
},
},
index,
);
}
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
expect(loopResult.detector).toBe("generic_repeat");
}
});
it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => {
const state = createState();
const toolName = "exec";

View File

@@ -202,6 +202,45 @@ function extractUnknownToolName(error: unknown): string | undefined {
return toolName ? toolName.toLowerCase() : undefined;
}
function stringField(value: unknown): string | null {
return typeof value === "string" ? value : null;
}
function hashExecToolOutcome(details: Record<string, unknown>, text: string): string | undefined {
const status = stringField(details.status);
if (!status) {
return undefined;
}
if (status === "running") {
return digestStable({
status,
tail: stringField(details.tail) ?? "",
});
}
if (status === "completed" || status === "failed") {
return digestStable({
status,
exitCode: typeof details.exitCode === "number" ? details.exitCode : null,
timedOut: details.timedOut === true,
output: stringField(details.aggregated) ?? text,
});
}
if (status === "approval-pending" || status === "approval-unavailable") {
return digestStable({
status,
reason: stringField(details.reason),
host: stringField(details.host),
command: stringField(details.command) ?? "",
warningText: stringField(details.warningText) ?? "",
});
}
return undefined;
}
function hashToolOutcome(
toolName: string,
params: unknown,
@@ -221,6 +260,12 @@ function hashToolOutcome(
const details = isPlainObject(result.details) ? result.details : {};
const text = extractTextContent(result);
if (toolName === "exec") {
const execHash = hashExecToolOutcome(details, text);
if (execHash) {
return { resultHash: execHash };
}
}
if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
const action = params.action;
if (action === "poll") {