mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-26 18:09:31 +00:00
Fixes #89090. Release note: prevent repeated visible message sends from bypassing loop blocking when delivery results include fresh message, file, poll, receipt, run, idempotency, or timestamp fields. Normalizes send-like result hashing for the core message tool, sessions_send, and provider-docked messaging tools while preserving stable routing and outcome facts. Verification: - node scripts/run-vitest.mjs src/agents/tool-loop-detection.test.ts src/agents/tools/message-tool.test.ts - .agents/skills/autoreview/scripts/autoreview --mode local --parallel-tests "node scripts/run-vitest.mjs src/agents/tool-loop-detection.test.ts src/agents/tools/message-tool.test.ts" - .agents/skills/autoreview/scripts/autoreview --mode branch --base origin/main --parallel-tests "node scripts/run-vitest.mjs src/agents/tool-loop-detection.test.ts src/agents/tools/message-tool.test.ts" - gh pr checks 89109 --watch --interval 30 Co-authored-by: openperf <16864032@qq.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
1304 lines
43 KiB
TypeScript
1304 lines
43 KiB
TypeScript
// Tool loop detection tests cover repeated-call hashing, ping-pong detection,
|
|
// unknown-tool thresholds, and circuit-breaker escalation.
|
|
import { describe, expect, it, vi } from "vitest";
|
|
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
|
|
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
|
|
|
// Recognize a provider-docked send tool by name (only "telegram" here) so the
|
|
// volatility strip applies to it without pulling in the channel-plugin registry; the
|
|
// real detector is covered by embedded-agent-messaging's own tests.
|
|
const isMessagingToolSendActionMock = vi.hoisted(() =>
|
|
vi.fn((toolName: string): boolean => toolName === "telegram"),
|
|
);
|
|
vi.mock("./embedded-agent-messaging.js", () => ({
|
|
isMessagingToolSendAction: isMessagingToolSendActionMock,
|
|
}));
|
|
import {
|
|
CRITICAL_THRESHOLD,
|
|
GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
|
|
TOOL_CALL_HISTORY_SIZE,
|
|
UNKNOWN_TOOL_THRESHOLD,
|
|
WARNING_THRESHOLD,
|
|
detectToolCallLoop,
|
|
getToolCallStats,
|
|
hashToolCall,
|
|
recordToolCall,
|
|
recordToolCallOutcome,
|
|
} from "./tool-loop-detection.js";
|
|
|
|
function createState(): SessionState {
|
|
return {
|
|
lastActivity: Date.now(),
|
|
state: "processing",
|
|
queueDepth: 0,
|
|
};
|
|
}
|
|
|
|
const enabledLoopDetectionConfig: ToolLoopDetectionConfig = { enabled: true };
|
|
|
|
const shortHistoryLoopConfig: ToolLoopDetectionConfig = {
|
|
enabled: true,
|
|
historySize: 4,
|
|
};
|
|
|
|
function recordSuccessfulCall(
|
|
state: SessionState,
|
|
toolName: string,
|
|
params: unknown,
|
|
result: unknown,
|
|
index: number,
|
|
): void {
|
|
const toolCallId = `${toolName}-${index}`;
|
|
recordToolCall(state, toolName, params, toolCallId);
|
|
recordToolCallOutcome(state, {
|
|
toolName,
|
|
toolParams: params,
|
|
toolCallId,
|
|
result,
|
|
});
|
|
}
|
|
|
|
function recordFailedCall(
|
|
state: SessionState,
|
|
toolName: string,
|
|
params: unknown,
|
|
error: unknown,
|
|
index: number,
|
|
): void {
|
|
const toolCallId = `${toolName}-error-${index}`;
|
|
recordToolCall(state, toolName, params, toolCallId);
|
|
recordToolCallOutcome(state, {
|
|
toolName,
|
|
toolParams: params,
|
|
toolCallId,
|
|
error,
|
|
});
|
|
}
|
|
|
|
function recordRepeatedSuccessfulCalls(params: {
|
|
state: SessionState;
|
|
toolName: string;
|
|
toolParams: unknown;
|
|
result: unknown;
|
|
count: number;
|
|
startIndex?: number;
|
|
}) {
|
|
const startIndex = params.startIndex ?? 0;
|
|
for (let i = 0; i < params.count; i += 1) {
|
|
recordSuccessfulCall(
|
|
params.state,
|
|
params.toolName,
|
|
params.toolParams,
|
|
params.result,
|
|
startIndex + i,
|
|
);
|
|
}
|
|
}
|
|
|
|
function createNoProgressPollFixture(sessionId: string) {
|
|
return {
|
|
params: { action: "poll", sessionId },
|
|
result: {
|
|
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
|
details: { status: "running", aggregated: "steady" },
|
|
},
|
|
};
|
|
}
|
|
|
|
function createReadNoProgressFixture() {
|
|
return {
|
|
toolName: "read",
|
|
params: { path: "/same.txt" },
|
|
result: {
|
|
content: [{ type: "text", text: "same output" }],
|
|
details: { ok: true },
|
|
},
|
|
} as const;
|
|
}
|
|
|
|
function createPingPongFixture() {
|
|
return {
|
|
state: createState(),
|
|
readParams: { path: "/a.txt" },
|
|
listParams: { dir: "/workspace" },
|
|
};
|
|
}
|
|
|
|
function detectLoopAfterRepeatedCalls(params: {
|
|
toolName: string;
|
|
toolParams: unknown;
|
|
result: unknown;
|
|
count: number;
|
|
config?: ToolLoopDetectionConfig;
|
|
}) {
|
|
const state = createState();
|
|
recordRepeatedSuccessfulCalls({
|
|
state,
|
|
toolName: params.toolName,
|
|
toolParams: params.toolParams,
|
|
result: params.result,
|
|
count: params.count,
|
|
});
|
|
return detectToolCallLoop(
|
|
state,
|
|
params.toolName,
|
|
params.toolParams,
|
|
params.config ?? enabledLoopDetectionConfig,
|
|
);
|
|
}
|
|
|
|
function recordSuccessfulPingPongCalls(params: {
|
|
state: SessionState;
|
|
readParams: { path: string };
|
|
listParams: { dir: string };
|
|
count: number;
|
|
textAtIndex: (toolName: "read" | "list", index: number) => string;
|
|
}) {
|
|
// Alternating successful calls with unchanged output exercise the ping-pong
|
|
// detector independently from same-tool repetition.
|
|
for (let i = 0; i < params.count; i += 1) {
|
|
if (i % 2 === 0) {
|
|
recordSuccessfulCall(
|
|
params.state,
|
|
"read",
|
|
params.readParams,
|
|
{ content: [{ type: "text", text: params.textAtIndex("read", i) }], details: { ok: true } },
|
|
i,
|
|
);
|
|
} else {
|
|
recordSuccessfulCall(
|
|
params.state,
|
|
"list",
|
|
params.listParams,
|
|
{ content: [{ type: "text", text: params.textAtIndex("list", i) }], details: { ok: true } },
|
|
i,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
function expectPingPongLoop(
|
|
loopResult: ReturnType<typeof detectToolCallLoop>,
|
|
expected: { level: "warning" | "critical"; count: number; expectCriticalText?: boolean },
|
|
) {
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (!loopResult.stuck) {
|
|
return;
|
|
}
|
|
expect(loopResult.level).toBe(expected.level);
|
|
expect(loopResult.detector).toBe("ping_pong");
|
|
expect(loopResult.count).toBe(expected.count);
|
|
if (expected.expectCriticalText) {
|
|
expect(loopResult.message).toContain("CRITICAL");
|
|
}
|
|
}
|
|
|
|
describe("tool-loop-detection", () => {
|
|
describe("hashToolCall", () => {
|
|
it("creates consistent hash for same tool and params", () => {
|
|
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
|
const hash2 = hashToolCall("read", { path: "/file.txt" });
|
|
expect(hash1).toBe(hash2);
|
|
});
|
|
|
|
it("creates different hashes for different params", () => {
|
|
const hash1 = hashToolCall("read", { path: "/file1.txt" });
|
|
const hash2 = hashToolCall("read", { path: "/file2.txt" });
|
|
expect(hash1).not.toBe(hash2);
|
|
});
|
|
|
|
it("creates different hashes for different tools", () => {
|
|
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
|
const hash2 = hashToolCall("write", { path: "/file.txt" });
|
|
expect(hash1).not.toBe(hash2);
|
|
});
|
|
|
|
it("hashes non-object params with the same digest shape", () => {
|
|
const hashes = [
|
|
hashToolCall("tool", "string-param"),
|
|
hashToolCall("tool", 123),
|
|
hashToolCall("tool", null),
|
|
];
|
|
expect(hashes).toHaveLength(3);
|
|
for (const hash of hashes) {
|
|
expect(hash.startsWith("tool:")).toBe(true);
|
|
expect(hash.length).toBe("tool:".length + 64);
|
|
expect(/^[a-f0-9]+$/.test(hash.slice("tool:".length))).toBe(true);
|
|
}
|
|
});
|
|
|
|
it("produces deterministic hashes regardless of key order", () => {
|
|
const hash1 = hashToolCall("tool", { a: 1, b: 2 });
|
|
const hash2 = hashToolCall("tool", { b: 2, a: 1 });
|
|
expect(hash1).toBe(hash2);
|
|
});
|
|
|
|
it("keeps hashes fixed-size even for large params", () => {
|
|
const payload = { data: "x".repeat(20_000) };
|
|
const hash = hashToolCall("read", payload);
|
|
expect(hash.startsWith("read:")).toBe(true);
|
|
expect(hash.length).toBe("read:".length + 64);
|
|
});
|
|
|
|
it("hashes circular params without collapsing repeated references", () => {
|
|
const shared = { id: "shared" };
|
|
const payload: Record<string, unknown> = { first: shared, second: shared };
|
|
payload.self = payload;
|
|
|
|
const equivalentShared = { id: "shared" };
|
|
const equivalentPayload: Record<string, unknown> = {
|
|
second: equivalentShared,
|
|
first: equivalentShared,
|
|
};
|
|
equivalentPayload.self = equivalentPayload;
|
|
|
|
expect(hashToolCall("tool", payload)).toBe(hashToolCall("tool", equivalentPayload));
|
|
expect(hashToolCall("tool", payload)).toEqual(expect.stringMatching(/^tool:[a-f0-9]{64}$/));
|
|
});
|
|
});
|
|
|
|
describe("recordToolCall", () => {
|
|
it("adds tool call to empty history", () => {
|
|
const state = createState();
|
|
|
|
recordToolCall(state, "read", { path: "/file.txt" }, "call-1");
|
|
|
|
expect(state.toolCallHistory).toHaveLength(1);
|
|
expect(state.toolCallHistory?.[0]?.toolName).toBe("read");
|
|
expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-1");
|
|
});
|
|
|
|
it("maintains sliding window of last N calls", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i += 1) {
|
|
recordToolCall(state, "tool", { iteration: i }, `call-${i}`);
|
|
}
|
|
|
|
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
|
|
|
|
const oldestCall = state.toolCallHistory?.[0];
|
|
expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 }));
|
|
});
|
|
|
|
it("records timestamp for each call", () => {
|
|
const state = createState();
|
|
const before = Date.now();
|
|
recordToolCall(state, "tool", { arg: 1 }, "call-ts");
|
|
const after = Date.now();
|
|
|
|
const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0;
|
|
expect(timestamp).toBeGreaterThanOrEqual(before);
|
|
expect(timestamp).toBeLessThanOrEqual(after);
|
|
});
|
|
|
|
it("records run id when provided", () => {
|
|
const state = createState();
|
|
|
|
recordToolCall(state, "tool", { arg: 1 }, "call-run", enabledLoopDetectionConfig, {
|
|
runId: "run-1",
|
|
});
|
|
|
|
expect(state.toolCallHistory?.[0]?.runId).toBe("run-1");
|
|
});
|
|
|
|
it("respects configured historySize", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < 10; i += 1) {
|
|
recordToolCall(state, "tool", { iteration: i }, `call-${i}`, shortHistoryLoopConfig);
|
|
}
|
|
|
|
expect(state.toolCallHistory).toHaveLength(4);
|
|
expect(state.toolCallHistory?.[0]?.argsHash).toBe(hashToolCall("tool", { iteration: 6 }));
|
|
});
|
|
});
|
|
|
|
describe("detectToolCallLoop", () => {
|
|
it("is disabled by default", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < 20; i += 1) {
|
|
recordToolCall(state, "read", { path: "/same.txt" }, `default-${i}`);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "read", { path: "/same.txt" });
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("does not flag unique tool calls", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < 15; i += 1) {
|
|
recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`);
|
|
}
|
|
|
|
const result = detectToolCallLoop(
|
|
state,
|
|
"read",
|
|
{ path: "/new-file.txt" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
expect(result.stuck).toBe(false);
|
|
});
|
|
|
|
it("ignores repeated history from other runs", () => {
|
|
const state = createState();
|
|
const params = { path: "/same.txt" };
|
|
|
|
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
|
recordToolCall(state, "read", params, `old-run-${i}`, enabledLoopDetectionConfig, {
|
|
runId: "heartbeat-1",
|
|
});
|
|
}
|
|
|
|
const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, {
|
|
runId: "heartbeat-2",
|
|
});
|
|
|
|
expect(result.stuck).toBe(false);
|
|
});
|
|
|
|
it("detects repeated history within the same run", () => {
|
|
const state = createState();
|
|
const params = { path: "/same.txt" };
|
|
|
|
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
|
recordToolCall(state, "read", params, `same-run-${i}`, enabledLoopDetectionConfig, {
|
|
runId: "run-1",
|
|
});
|
|
}
|
|
|
|
const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig, {
|
|
runId: "run-1",
|
|
});
|
|
|
|
expect(result.stuck).toBe(true);
|
|
if (result.stuck) {
|
|
expect(result.detector).toBe("generic_repeat");
|
|
expect(result.count).toBe(WARNING_THRESHOLD);
|
|
}
|
|
});
|
|
|
|
it("keeps scoped and unscoped history isolated", () => {
|
|
const state = createState();
|
|
const params = { path: "/same.txt" };
|
|
|
|
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
|
recordToolCall(state, "read", params, `scoped-${i}`, enabledLoopDetectionConfig, {
|
|
runId: "run-1",
|
|
});
|
|
}
|
|
|
|
const result = detectToolCallLoop(state, "read", params, enabledLoopDetectionConfig);
|
|
|
|
expect(result.stuck).toBe(false);
|
|
});
|
|
|
|
it("warns on generic repeated tool+args calls", () => {
|
|
const state = createState();
|
|
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
|
recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`);
|
|
}
|
|
|
|
const result = detectToolCallLoop(
|
|
state,
|
|
"read",
|
|
{ path: "/same.txt" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
|
|
expect(result.stuck).toBe(true);
|
|
if (result.stuck) {
|
|
expect(result.level).toBe("warning");
|
|
expect(result.detector).toBe("generic_repeat");
|
|
expect(result.count).toBe(WARNING_THRESHOLD);
|
|
expect(result.message).toContain("WARNING");
|
|
expect(result.message).toContain(`${WARNING_THRESHOLD} times`);
|
|
}
|
|
});
|
|
|
|
it("blocks generic no-progress loops at critical threshold", () => {
|
|
const fixture = createReadNoProgressFixture();
|
|
const loopResult = detectLoopAfterRepeatedCalls({
|
|
toolName: fixture.toolName,
|
|
toolParams: fixture.params,
|
|
result: fixture.result,
|
|
count: CRITICAL_THRESHOLD,
|
|
});
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
expect(loopResult.detector).toBe("generic_repeat");
|
|
expect(loopResult.message).toContain("identical outcomes");
|
|
}
|
|
});
|
|
|
|
it("applies custom thresholds when detection is enabled", () => {
|
|
const state = createState();
|
|
const { params, result } = createNoProgressPollFixture("sess-custom");
|
|
const config: ToolLoopDetectionConfig = {
|
|
enabled: true,
|
|
warningThreshold: 2,
|
|
criticalThreshold: 4,
|
|
detectors: {
|
|
genericRepeat: false,
|
|
knownPollNoProgress: true,
|
|
pingPong: false,
|
|
},
|
|
};
|
|
|
|
recordRepeatedSuccessfulCalls({
|
|
state,
|
|
toolName: "process",
|
|
toolParams: params,
|
|
result,
|
|
count: 2,
|
|
});
|
|
const warningResult = detectToolCallLoop(state, "process", params, config);
|
|
expect(warningResult.stuck).toBe(true);
|
|
if (warningResult.stuck) {
|
|
expect(warningResult.level).toBe("warning");
|
|
}
|
|
|
|
recordRepeatedSuccessfulCalls({
|
|
state,
|
|
toolName: "process",
|
|
toolParams: params,
|
|
result,
|
|
count: 2,
|
|
startIndex: 2,
|
|
});
|
|
const criticalResult = detectToolCallLoop(state, "process", params, config);
|
|
expect(criticalResult.stuck).toBe(true);
|
|
if (criticalResult.stuck) {
|
|
expect(criticalResult.level).toBe("critical");
|
|
expect(criticalResult.detector).toBe("known_poll_no_progress");
|
|
}
|
|
});
|
|
|
|
it("can disable specific detectors", () => {
|
|
const state = createState();
|
|
const { params, result } = createNoProgressPollFixture("sess-no-detectors");
|
|
const config: ToolLoopDetectionConfig = {
|
|
enabled: true,
|
|
detectors: {
|
|
genericRepeat: false,
|
|
knownPollNoProgress: false,
|
|
pingPong: false,
|
|
},
|
|
};
|
|
|
|
recordRepeatedSuccessfulCalls({
|
|
state,
|
|
toolName: "process",
|
|
toolParams: params,
|
|
result,
|
|
count: CRITICAL_THRESHOLD,
|
|
});
|
|
|
|
const loopResult = detectToolCallLoop(state, "process", params, config);
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("warns for known polling no-progress loops", () => {
|
|
const { params, result } = createNoProgressPollFixture("sess-1");
|
|
const loopResult = detectLoopAfterRepeatedCalls({
|
|
toolName: "process",
|
|
toolParams: params,
|
|
result,
|
|
count: WARNING_THRESHOLD,
|
|
});
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("warning");
|
|
expect(loopResult.detector).toBe("known_poll_no_progress");
|
|
expect(loopResult.message).toContain("no progress");
|
|
}
|
|
});
|
|
|
|
it("blocks known polling no-progress loops at critical threshold", () => {
|
|
const { params, result } = createNoProgressPollFixture("sess-1");
|
|
const loopResult = detectLoopAfterRepeatedCalls({
|
|
toolName: "process",
|
|
toolParams: params,
|
|
result,
|
|
count: CRITICAL_THRESHOLD,
|
|
});
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
expect(loopResult.detector).toBe("known_poll_no_progress");
|
|
expect(loopResult.message).toContain("CRITICAL");
|
|
}
|
|
});
|
|
|
|
it("does not block known polling when output progresses", () => {
|
|
const state = createState();
|
|
const params = { action: "poll", sessionId: "sess-1" };
|
|
|
|
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
|
const result = {
|
|
content: [{ type: "text", text: `line ${i}` }],
|
|
details: { status: "running", aggregated: `line ${i}` },
|
|
};
|
|
recordSuccessfulCall(state, "process", params, result, i);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "process", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("blocks any tool with global no-progress breaker at 30", () => {
|
|
const fixture = createReadNoProgressFixture();
|
|
const loopResult = detectLoopAfterRepeatedCalls({
|
|
toolName: fixture.toolName,
|
|
toolParams: fixture.params,
|
|
result: fixture.result,
|
|
count: GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
|
|
config: {
|
|
enabled: true,
|
|
detectors: { genericRepeat: false, knownPollNoProgress: true, pingPong: true },
|
|
},
|
|
});
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
expect(loopResult.detector).toBe("global_circuit_breaker");
|
|
expect(loopResult.message).toContain("global circuit breaker");
|
|
}
|
|
});
|
|
|
|
it("blocks repeated completed exec calls despite volatile runtime details", () => {
|
|
const state = createState();
|
|
const params = { command: "grafana-api.sh datasources" };
|
|
|
|
for (let index = 0; index < CRITICAL_THRESHOLD; index += 1) {
|
|
recordSuccessfulCall(
|
|
state,
|
|
"exec",
|
|
params,
|
|
{
|
|
content: [{ type: "text", text: "Loki\nPrometheus" }],
|
|
details: {
|
|
status: "completed",
|
|
exitCode: 0,
|
|
durationMs: 100 + index,
|
|
cwd: `/tmp/run-${index}`,
|
|
aggregated: "Loki\nPrometheus",
|
|
},
|
|
},
|
|
index,
|
|
);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
expect(loopResult.detector).toBe("generic_repeat");
|
|
}
|
|
});
|
|
|
|
it("blocks repeated running exec calls despite volatile session details and text", () => {
|
|
const state = createState();
|
|
const params = { command: "tail -f /var/log/app.log", yieldMs: 1000 };
|
|
|
|
for (let index = 0; index < CRITICAL_THRESHOLD; index += 1) {
|
|
recordSuccessfulCall(
|
|
state,
|
|
"exec",
|
|
params,
|
|
{
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: `Command still running (session sess-${index}, pid ${1000 + index})`,
|
|
},
|
|
],
|
|
details: {
|
|
status: "running",
|
|
sessionId: `sess-${index}`,
|
|
pid: 1000 + index,
|
|
startedAt: Date.now() + index,
|
|
cwd: `/tmp/run-${index}`,
|
|
tail: "(no new output)",
|
|
},
|
|
},
|
|
index,
|
|
);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
expect(loopResult.detector).toBe("generic_repeat");
|
|
}
|
|
});
|
|
|
|
it("keeps changing exec output below the global no-progress breaker", () => {
|
|
const state = createState();
|
|
const params = { command: "date" };
|
|
|
|
for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
|
|
recordSuccessfulCall(
|
|
state,
|
|
"exec",
|
|
params,
|
|
{
|
|
content: [{ type: "text", text: `tick ${index}` }],
|
|
details: {
|
|
status: "completed",
|
|
exitCode: 0,
|
|
durationMs: 100 + index,
|
|
aggregated: `tick ${index}`,
|
|
},
|
|
},
|
|
index,
|
|
);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("warning");
|
|
expect(loopResult.detector).toBe("generic_repeat");
|
|
}
|
|
});
|
|
|
|
it("keeps changing empty-output exec failures below the global no-progress breaker", () => {
|
|
const state = createState();
|
|
const params = { command: "openclaw flaky-helper" };
|
|
|
|
for (let index = 0; index < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; index += 1) {
|
|
recordSuccessfulCall(
|
|
state,
|
|
"exec",
|
|
params,
|
|
{
|
|
content: [{ type: "text", text: `Runtime failed before spawn: attempt ${index}` }],
|
|
details: {
|
|
status: "failed",
|
|
exitCode: null,
|
|
durationMs: 100 + index,
|
|
aggregated: "",
|
|
},
|
|
},
|
|
index,
|
|
);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "exec", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("warning");
|
|
expect(loopResult.detector).toBe("generic_repeat");
|
|
}
|
|
});
|
|
|
|
it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => {
|
|
const state = createState();
|
|
const toolName = "exec";
|
|
const unknownToolError = new Error("Tool exec not found");
|
|
|
|
for (let index = 0; index < UNKNOWN_TOOL_THRESHOLD - 1; index += 1) {
|
|
recordFailedCall(state, toolName, { command: `echo ${index}` }, unknownToolError, index);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(
|
|
state,
|
|
toolName,
|
|
{ command: "echo still allowed" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("blocks repeated unknown-tool failures even when the args keep changing", () => {
|
|
const state = createState();
|
|
const toolName = "exec";
|
|
const unknownToolError = new Error("Tool exec not found");
|
|
|
|
const attempts = [
|
|
{ command: "ls" },
|
|
{ command: "pwd" },
|
|
{ input: "whoami" },
|
|
{ cmd: "env" },
|
|
{ shell: "bash -lc ls" },
|
|
{ command: "printf ok" },
|
|
{ cwd: "/tmp", command: "ls" },
|
|
{ args: ["ls", "/tmp"] },
|
|
{ command: "find . -maxdepth 1" },
|
|
{ text: "run ls" },
|
|
{ command: "uname -a" },
|
|
{ command: "id" },
|
|
{ command: "date" },
|
|
{ command: "ps" },
|
|
{ command: "df -h" },
|
|
{ command: "free -m" },
|
|
{ command: "ls /tmp" },
|
|
{ command: "ls -la" },
|
|
{ command: "cat /etc/hostname" },
|
|
{ command: "echo done" },
|
|
];
|
|
|
|
for (const [index, params] of attempts.entries()) {
|
|
recordFailedCall(state, toolName, params, unknownToolError, index);
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(
|
|
state,
|
|
toolName,
|
|
{ command: "echo still looping" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.detector).toBe("unknown_tool_repeat");
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("warns on ping-pong alternating patterns", () => {
|
|
const state = createState();
|
|
const readParams = { path: "/a.txt" };
|
|
const listParams = { dir: "/workspace" };
|
|
|
|
for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) {
|
|
if (i % 2 === 0) {
|
|
recordToolCall(state, "read", readParams, `read-${i}`);
|
|
} else {
|
|
recordToolCall(state, "list", listParams, `list-${i}`);
|
|
}
|
|
}
|
|
|
|
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
|
|
expectPingPongLoop(loopResult, { level: "warning", count: WARNING_THRESHOLD });
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.message).toContain("ping-pong loop");
|
|
}
|
|
});
|
|
|
|
it("blocks ping-pong alternating patterns at critical threshold", () => {
|
|
const { state, readParams, listParams } = createPingPongFixture();
|
|
|
|
recordSuccessfulPingPongCalls({
|
|
state,
|
|
readParams,
|
|
listParams,
|
|
count: CRITICAL_THRESHOLD - 1,
|
|
textAtIndex: (toolName) => (toolName === "read" ? "read stable" : "list stable"),
|
|
});
|
|
|
|
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
|
|
expectPingPongLoop(loopResult, {
|
|
level: "critical",
|
|
count: CRITICAL_THRESHOLD,
|
|
expectCriticalText: true,
|
|
});
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.message).toContain("ping-pong loop");
|
|
}
|
|
});
|
|
|
|
it("does not block ping-pong at critical threshold when outcomes are progressing", () => {
|
|
const { state, readParams, listParams } = createPingPongFixture();
|
|
|
|
recordSuccessfulPingPongCalls({
|
|
state,
|
|
readParams,
|
|
listParams,
|
|
count: CRITICAL_THRESHOLD - 1,
|
|
textAtIndex: (toolName, index) => `${toolName} ${index}`,
|
|
});
|
|
|
|
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
|
|
expectPingPongLoop(loopResult, { level: "warning", count: CRITICAL_THRESHOLD });
|
|
});
|
|
|
|
it("does not flag ping-pong when alternation is broken", () => {
|
|
const state = createState();
|
|
recordToolCall(state, "read", { path: "/a.txt" }, "a1");
|
|
recordToolCall(state, "list", { dir: "/workspace" }, "b1");
|
|
recordToolCall(state, "read", { path: "/a.txt" }, "a2");
|
|
recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation
|
|
|
|
const loopResult = detectToolCallLoop(
|
|
state,
|
|
"list",
|
|
{ dir: "/workspace" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("records fixed-size result hashes for large tool outputs", () => {
|
|
const state = createState();
|
|
const params = { action: "log", sessionId: "sess-big" };
|
|
const toolCallId = "log-big";
|
|
recordToolCall(state, "process", params, toolCallId);
|
|
recordToolCallOutcome(state, {
|
|
toolName: "process",
|
|
toolParams: params,
|
|
toolCallId,
|
|
result: {
|
|
content: [{ type: "text", text: "y".repeat(40_000) }],
|
|
details: { status: "running", totalLines: 1, totalChars: 40_000 },
|
|
},
|
|
});
|
|
|
|
const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId);
|
|
expect(typeof entry?.resultHash).toBe("string");
|
|
expect(entry?.resultHash?.length).toBe(64);
|
|
});
|
|
|
|
it("returns the recorded call when a pre-recorded tool call receives its result", () => {
|
|
const state = createState();
|
|
const params = { action: "lookup", path: "cron.maxConcurrentRuns" };
|
|
|
|
recordToolCall(state, "gateway", params, "call-1");
|
|
|
|
const recorded = recordToolCallOutcome(state, {
|
|
toolName: "gateway",
|
|
toolParams: params,
|
|
toolCallId: "call-1",
|
|
result: { content: [{ type: "text", text: "same schema" }] },
|
|
});
|
|
|
|
expect(recorded?.toolCallId).toBe("call-1");
|
|
expect(state.toolCallHistory).toHaveLength(1);
|
|
expect(state.toolCallHistory?.[0]?.resultHash).toBeTypeOf("string");
|
|
});
|
|
|
|
it("returns the recorded call while trimming production call/outcome records", () => {
|
|
const state = createState();
|
|
let lastRecordedToolCallId: string | undefined;
|
|
|
|
for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 3; i += 1) {
|
|
const params = { action: "lookup", path: `config.${i}` };
|
|
const toolCallId = `call-${i}`;
|
|
recordToolCall(state, "gateway", params, toolCallId);
|
|
const recorded = recordToolCallOutcome(state, {
|
|
toolName: "gateway",
|
|
toolParams: params,
|
|
toolCallId,
|
|
result: { content: [{ type: "text", text: `schema-${i}` }] },
|
|
});
|
|
lastRecordedToolCallId = recorded?.toolCallId;
|
|
}
|
|
|
|
expect(lastRecordedToolCallId).toBe(`call-${TOOL_CALL_HISTORY_SIZE + 2}`);
|
|
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
|
|
expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-3");
|
|
});
|
|
|
|
it("does not attach outcomes to matching calls from other runs", () => {
|
|
const state = createState();
|
|
const params = { path: "/same.txt" };
|
|
recordToolCall(state, "read", params, "call-1", enabledLoopDetectionConfig, {
|
|
runId: "run-1",
|
|
});
|
|
|
|
recordToolCallOutcome(state, {
|
|
toolName: "read",
|
|
toolParams: params,
|
|
toolCallId: "call-1",
|
|
result: { content: [{ type: "text", text: "same output" }] },
|
|
config: enabledLoopDetectionConfig,
|
|
runId: "run-2",
|
|
});
|
|
|
|
expect(state.toolCallHistory).toHaveLength(2);
|
|
expect(state.toolCallHistory?.[0]?.resultHash).toBeUndefined();
|
|
expect(state.toolCallHistory?.[1]?.runId).toBe("run-2");
|
|
expect(state.toolCallHistory?.[1]?.resultHash).toBeTypeOf("string");
|
|
});
|
|
|
|
it("handles empty history", () => {
|
|
const state = createState();
|
|
|
|
const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig);
|
|
expect(result.stuck).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe("message send loop detection (#89090)", () => {
|
|
// Mirror jsonResult(payload): text is the stringified payload (so it carries the
|
|
// volatile id too) and details is the payload — the shape a real send returns.
|
|
function sendResult(payload: Record<string, unknown>) {
|
|
return {
|
|
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
details: payload,
|
|
};
|
|
}
|
|
|
|
function recordSend(
|
|
state: SessionState,
|
|
toolName: string,
|
|
params: unknown,
|
|
payload: Record<string, unknown>,
|
|
index: number,
|
|
): void {
|
|
const toolCallId = `${toolName}-${index}`;
|
|
recordToolCall(state, toolName, params, toolCallId, enabledLoopDetectionConfig);
|
|
recordToolCallOutcome(state, {
|
|
toolName,
|
|
toolParams: params,
|
|
toolCallId,
|
|
result: sendResult(payload),
|
|
config: enabledLoopDetectionConfig,
|
|
});
|
|
}
|
|
|
|
function sendPayload(index: number): Record<string, unknown> {
|
|
return {
|
|
ok: true,
|
|
channel: "feishu",
|
|
chatId: "oc_chat",
|
|
runId: `run_${index}`,
|
|
messageId: `om_${index}`,
|
|
receipt: { platformMessageId: `p_${index}` },
|
|
};
|
|
}
|
|
|
|
it("gives duplicate sends a stable result hash despite per-call ids in details and text", () => {
|
|
const state = createState();
|
|
const params = { action: "send", target: "feishu:oc_chat", text: "ping" };
|
|
recordSend(state, "message", params, sendPayload(0), 0);
|
|
recordSend(state, "message", params, sendPayload(1), 1);
|
|
const hashes = state.toolCallHistory
|
|
?.filter((call) => call.toolName === "message")
|
|
.map((call) => call.resultHash);
|
|
expect(hashes?.[0]).toBeTypeOf("string");
|
|
expect(hashes?.[0]).toBe(hashes?.[1]);
|
|
});
|
|
|
|
it("strips nested ids so broadcast results with per-call ids share a result hash", () => {
|
|
const state = createState();
|
|
const params = { action: "broadcast", text: "ping" };
|
|
const broadcast = (index: number) => ({
|
|
results: [
|
|
{ channel: "feishu", ok: true, result: { messageId: `om_${index}`, receipt: index } },
|
|
],
|
|
});
|
|
recordSend(state, "message", params, broadcast(0), 0);
|
|
recordSend(state, "message", params, broadcast(1), 1);
|
|
const hashes = state.toolCallHistory
|
|
?.filter((call) => call.toolName === "message")
|
|
.map((call) => call.resultHash);
|
|
expect(hashes?.[0]).toBe(hashes?.[1]);
|
|
});
|
|
|
|
it("escalates identical-arg send loops to critical even though every id differs", () => {
|
|
const state = createState();
|
|
const params = { action: "send", target: "feishu:oc_chat", text: "ping" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(state, "message", params, sendPayload(i), i);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("also blocks sessions_send loops whose result carries a fresh runId", () => {
|
|
const state = createState();
|
|
const params = { sessionKey: "agent:main:peer", text: "ping" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(state, "sessions_send", params, { ok: true, runId: `run_${i}` }, i);
|
|
}
|
|
const loopResult = detectToolCallLoop(
|
|
state,
|
|
"sessions_send",
|
|
params,
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("escalates provider-docked send-tool loops (e.g. telegram) whose result carries fresh ids", () => {
|
|
const state = createState();
|
|
const params = { to: "telegram:123", text: "ping" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(state, "telegram", params, sendPayload(i), i);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "telegram", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("escalates sibling delivery actions (reply) the same as send", () => {
|
|
const state = createState();
|
|
const params = { action: "reply", target: "feishu:oc_chat", text: "ping" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(state, "message", params, sendPayload(i), i);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("blocks upload-file loops whose result carries a fresh file id", () => {
|
|
const state = createState();
|
|
const params = { action: "upload-file", target: "feishu:oc_chat", media: "img.png" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{ ok: true, channel: "feishu", fileId: `f_${i}`, messageId: `om_${i}` },
|
|
i,
|
|
);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("blocks poll loops whose result carries a fresh poll id", () => {
|
|
const state = createState();
|
|
const params = { action: "poll", target: "tg:chat-1", question: "?" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{ ok: true, channel: "telegram", messageId: `om_${i}`, chatId: "c1", pollId: `pl_${i}` },
|
|
i,
|
|
);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("does not flag distinct messages (different args) as a no-progress loop", () => {
|
|
const state = createState();
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
{ action: "send", target: "feishu:oc_chat", text: `line ${i}` },
|
|
sendPayload(i),
|
|
i,
|
|
);
|
|
}
|
|
const loopResult = detectToolCallLoop(
|
|
state,
|
|
"message",
|
|
{ action: "send", target: "feishu:oc_chat", text: "line final" },
|
|
enabledLoopDetectionConfig,
|
|
);
|
|
expect(loopResult.stuck).toBe(false);
|
|
});
|
|
|
|
it("keeps full result hashing for non-send message actions so id/timestamp progress is not masked", () => {
|
|
const state = createState();
|
|
const params = { action: "read", target: "feishu:oc_chat" };
|
|
// Results differ only in id/timestamp: a non-send action must still treat that as
|
|
// progress (distinct hashes), unlike a send where those are stripped as volatile.
|
|
recordSend(state, "message", params, { ok: true, messageId: "m_0", ts: 1000 }, 0);
|
|
recordSend(state, "message", params, { ok: true, messageId: "m_1", ts: 2000 }, 1);
|
|
const hashes = state.toolCallHistory
|
|
?.filter((call) => call.toolName === "message")
|
|
.map((call) => call.resultHash);
|
|
expect(hashes?.[0]).toBeTypeOf("string");
|
|
expect(hashes?.[0]).not.toBe(hashes?.[1]);
|
|
});
|
|
|
|
it("preserves stable nested route ids so distinct routes stay distinguishable", () => {
|
|
const state = createState();
|
|
const params = { action: "send", target: "feishu:oc_chat", text: "ping" };
|
|
// messageId is volatile (stripped); a nested route id is a stable fact that must survive,
|
|
// so two sends resolving to different routes keep distinct hashes.
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{ ok: true, messageId: "om_0", route: { id: "conv-A" } },
|
|
0,
|
|
);
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{ ok: true, messageId: "om_1", route: { id: "conv-B" } },
|
|
1,
|
|
);
|
|
const hashes = state.toolCallHistory
|
|
?.filter((call) => call.toolName === "message")
|
|
.map((call) => call.resultHash);
|
|
expect(hashes?.[0]).not.toBe(hashes?.[1]);
|
|
});
|
|
|
|
it("keeps a critical send block sticky after the veto result is recorded", () => {
|
|
const state = createState();
|
|
const params = { action: "send", target: "feishu:oc_chat", text: "ping" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(state, "message", params, sendPayload(i), i);
|
|
}
|
|
expect(detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig).stuck).toBe(
|
|
true,
|
|
);
|
|
// The loop veto records a blocked result (buildBlockedToolResult, deniedReason "tool-loop");
|
|
// it must not reset the no-progress streak, so the next identical send is still blocked.
|
|
recordToolCall(state, "message", params, "message-veto", enabledLoopDetectionConfig);
|
|
recordToolCallOutcome(state, {
|
|
toolName: "message",
|
|
toolParams: params,
|
|
toolCallId: "message-veto",
|
|
result: {
|
|
content: [{ type: "text", text: "blocked" }],
|
|
details: { status: "blocked", deniedReason: "tool-loop" },
|
|
},
|
|
config: enabledLoopDetectionConfig,
|
|
});
|
|
const after = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(after.stuck).toBe(true);
|
|
if (after.stuck) {
|
|
expect(after.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("still escalates repeated plugin/approval vetoes to a critical loop", () => {
|
|
const state = createState();
|
|
const params = { action: "read", target: "feishu:oc_chat" };
|
|
// A non-loop veto (plugin/approval) keeps a stable result hash, so repeated identical
|
|
// denials still accumulate a no-progress streak and reach a critical block.
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordToolCall(state, "message", params, `veto-${i}`, enabledLoopDetectionConfig);
|
|
recordToolCallOutcome(state, {
|
|
toolName: "message",
|
|
toolParams: params,
|
|
toolCallId: `veto-${i}`,
|
|
result: {
|
|
content: [{ type: "text", text: "blocked" }],
|
|
details: { status: "blocked", deniedReason: "plugin-before-tool-call" },
|
|
},
|
|
config: enabledLoopDetectionConfig,
|
|
});
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("blocks plugin-shaped send results whose message object carries a bare per-send id", () => {
|
|
const state = createState();
|
|
const params = { action: "send", to: "feishu:chat-1", content: "hello" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
// The volatile id is the message object's own `id`; conversation.id is stable.
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{
|
|
message: {
|
|
id: `qa_${i}`,
|
|
accountId: "default",
|
|
direction: "outbound",
|
|
conversation: { id: "loop-room", chatType: "channel" },
|
|
senderId: "openclaw",
|
|
text: "hello",
|
|
timestamp: 1_800_000_000_000 + i,
|
|
},
|
|
},
|
|
i,
|
|
);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck).toBe(true);
|
|
if (loopResult.stuck) {
|
|
expect(loopResult.level).toBe("critical");
|
|
}
|
|
});
|
|
|
|
it("does not escalate when a stable conversation id changes between sends", () => {
|
|
const state = createState();
|
|
const params = { action: "send", to: "feishu:chat-1", content: "hello" };
|
|
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
|
recordSend(
|
|
state,
|
|
"message",
|
|
params,
|
|
{
|
|
message: {
|
|
id: `qa_${i}`,
|
|
direction: "outbound",
|
|
conversation: { id: `loop-room-${i}`, chatType: "channel" },
|
|
text: "hello",
|
|
},
|
|
},
|
|
i,
|
|
);
|
|
}
|
|
const loopResult = detectToolCallLoop(state, "message", params, enabledLoopDetectionConfig);
|
|
expect(loopResult.stuck && loopResult.level).not.toBe("critical");
|
|
});
|
|
});
|
|
|
|
describe("getToolCallStats", () => {
|
|
it("returns zero stats for empty history", () => {
|
|
const state = createState();
|
|
|
|
const stats = getToolCallStats(state);
|
|
expect(stats.totalCalls).toBe(0);
|
|
expect(stats.uniquePatterns).toBe(0);
|
|
expect(stats.mostFrequent).toBeNull();
|
|
});
|
|
|
|
it("counts total calls and unique patterns", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < 5; i += 1) {
|
|
recordToolCall(state, "read", { path: "/file.txt" }, `same-${i}`);
|
|
}
|
|
|
|
recordToolCall(state, "write", { path: "/output.txt" }, "write-1");
|
|
recordToolCall(state, "list", { dir: "/home" }, "list-1");
|
|
recordToolCall(state, "read", { path: "/other.txt" }, "read-other");
|
|
|
|
const stats = getToolCallStats(state);
|
|
expect(stats.totalCalls).toBe(8);
|
|
expect(stats.uniquePatterns).toBe(4);
|
|
});
|
|
|
|
it("identifies most frequent pattern", () => {
|
|
const state = createState();
|
|
|
|
for (let i = 0; i < 3; i += 1) {
|
|
recordToolCall(state, "read", { path: "/file1.txt" }, `p1-${i}`);
|
|
}
|
|
|
|
for (let i = 0; i < 7; i += 1) {
|
|
recordToolCall(state, "read", { path: "/file2.txt" }, `p2-${i}`);
|
|
}
|
|
|
|
for (let i = 0; i < 2; i += 1) {
|
|
recordToolCall(state, "write", { path: "/output.txt" }, `p3-${i}`);
|
|
}
|
|
|
|
const stats = getToolCallStats(state);
|
|
expect(stats.mostFrequent?.toolName).toBe("read");
|
|
expect(stats.mostFrequent?.count).toBe(7);
|
|
});
|
|
});
|
|
});
|