mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 09:20:43 +00:00
fix: stop repeated unknown-tool loops (#65922)
Merged via squash.
Prepared head SHA: f352a270a6
Reviewed-by: @osolmaz
This commit is contained in:
@@ -123,6 +123,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Telegram/sessions: keep topic-scoped session initialization on the canonical topic transcript path when inbound turns omit `MessageThreadId`, so one topic session no longer alternates between bare and topic-qualified transcript files. (#64869) Thanks @jalehman.
|
||||
- Agents/failover: scope assistant-side fallback classification and surfaced provider errors to the current attempt instead of stale session history, so cross-provider fallback runs stop inheriting the previous provider's failure. (#62907) Thanks @stainlu.
|
||||
- MiniMax/OAuth: write `api: "anthropic-messages"` and `authHeader: true` into the `minimax-portal` config patch during `openclaw configure`, so re-authenticated portal setups keep Bearer auth routing working. (#64964) Thanks @ryanlee666.
|
||||
- Agents/tools: stop repeated unavailable-tool retries from escaping loop detection when the model changes arguments, and rewrite over-threshold unknown tool calls into plain assistant text before dispatch. (#65922) Thanks @dutifulbob.
|
||||
|
||||
## 2026.4.10
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
resolveEmbeddedAgentBaseStreamFn,
|
||||
resolveAttemptFsWorkspaceOnly,
|
||||
resolveEmbeddedAgentStreamFn,
|
||||
resolveUnknownToolGuardThreshold,
|
||||
resolvePromptBuildHookResult,
|
||||
resolvePromptModeForSession,
|
||||
shouldWarnOnOrphanedUserRepair,
|
||||
@@ -421,13 +422,33 @@ describe("resolveAttemptFsWorkspaceOnly", () => {
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveUnknownToolGuardThreshold", () => {
|
||||
it("returns undefined when loop detection is disabled", () => {
|
||||
expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 4 })).toBe(
|
||||
undefined,
|
||||
);
|
||||
expect(resolveUnknownToolGuardThreshold(undefined)).toBe(undefined);
|
||||
});
|
||||
|
||||
it("uses the default threshold when loop detection is enabled without an override", () => {
|
||||
expect(resolveUnknownToolGuardThreshold({ enabled: true })).toBe(10);
|
||||
});
|
||||
|
||||
it("uses the configured threshold override when provided", () => {
|
||||
expect(resolveUnknownToolGuardThreshold({ enabled: true, unknownToolThreshold: 4 })).toBe(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe("wrapStreamFnTrimToolCallNames", () => {
|
||||
async function invokeWrappedStream(
|
||||
baseFn: (...args: never[]) => unknown,
|
||||
allowedToolNames?: Set<string>,
|
||||
guardOptions?: { unknownToolThreshold?: number },
|
||||
) {
|
||||
return await invokeWrappedTestStream(
|
||||
(innerBaseFn) => wrapStreamFnTrimToolCallNames(innerBaseFn as never, allowedToolNames),
|
||||
(innerBaseFn) =>
|
||||
wrapStreamFnTrimToolCallNames(innerBaseFn as never, allowedToolNames, guardOptions),
|
||||
baseFn,
|
||||
);
|
||||
}
|
||||
@@ -574,6 +595,133 @@ describe("wrapStreamFnTrimToolCallNames", () => {
|
||||
expect(result).toBe(finalMessage);
|
||||
});
|
||||
|
||||
it("rewrites repeated unavailable tool calls into plain assistant text after the threshold", async () => {
|
||||
const baseFn = vi.fn(() =>
|
||||
createFakeStream({
|
||||
events: [],
|
||||
resultMessage: {
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo eleven" } }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"]), {
|
||||
unknownToolThreshold: 10,
|
||||
});
|
||||
|
||||
for (let i = 0; i < 10; i += 1) {
|
||||
const stream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never));
|
||||
const result = await stream.result();
|
||||
expect(result).toMatchObject({
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", name: "exec" }],
|
||||
});
|
||||
}
|
||||
|
||||
const blockedStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never));
|
||||
const blockedResult = (await blockedStream.result()) as {
|
||||
role: string;
|
||||
content: Array<{ type: string; text?: string }>;
|
||||
};
|
||||
|
||||
expect(blockedResult.role).toBe("assistant");
|
||||
expect(blockedResult.content).toEqual([
|
||||
expect.objectContaining({
|
||||
type: "text",
|
||||
text: expect.stringContaining('"exec"'),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("leaves repeated unavailable tool calls alone when the unknown-tool guard is disabled", async () => {
|
||||
const baseFn = vi.fn(() =>
|
||||
createFakeStream({
|
||||
events: [],
|
||||
resultMessage: {
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo eleven" } }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"]));
|
||||
|
||||
for (let i = 0; i < 11; i += 1) {
|
||||
const stream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never));
|
||||
const result = await stream.result();
|
||||
expect(result).toMatchObject({
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", name: "exec" }],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("does not count partial tool-call deltas as separate unavailable-tool retries", async () => {
|
||||
const partialToolCall = { type: "toolCall", name: " exec " };
|
||||
const messageToolCall = { type: "toolCall", name: " exec " };
|
||||
const finalToolCall = { type: "toolCall", name: " exec " };
|
||||
const event = {
|
||||
type: "toolcall_delta",
|
||||
partial: { role: "assistant", content: [partialToolCall] },
|
||||
message: { role: "assistant", content: [messageToolCall] },
|
||||
};
|
||||
const { baseFn } = createEventStream({ event, finalToolCall });
|
||||
|
||||
const stream = await invokeWrappedStream(baseFn, new Set(["read"]), {
|
||||
unknownToolThreshold: 1,
|
||||
});
|
||||
|
||||
for await (const _item of stream) {
|
||||
// drain
|
||||
}
|
||||
const result = (await stream.result()) as {
|
||||
content: Array<{ type: string; text?: string; name?: string }>;
|
||||
};
|
||||
|
||||
expect(partialToolCall.name).toBe("exec");
|
||||
expect(messageToolCall.name).toBe("exec");
|
||||
expect(result.content).toEqual([expect.objectContaining({ type: "toolCall", name: "exec" })]);
|
||||
});
|
||||
|
||||
it("does not reset the unavailable-tool streak on partial-only stream chunks", async () => {
|
||||
const baseFn = vi.fn(() =>
|
||||
createFakeStream({
|
||||
events: [
|
||||
{
|
||||
type: "toolcall_delta",
|
||||
partial: { role: "assistant", content: [{ type: "toolCall", name: " exec " }] },
|
||||
},
|
||||
],
|
||||
resultMessage: {
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo retry" } }],
|
||||
},
|
||||
}),
|
||||
);
|
||||
const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"]), {
|
||||
unknownToolThreshold: 1,
|
||||
});
|
||||
|
||||
const firstStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never));
|
||||
await firstStream.result();
|
||||
|
||||
const secondStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never));
|
||||
for await (const _item of secondStream) {
|
||||
// drain
|
||||
}
|
||||
const secondResult = (await secondStream.result()) as {
|
||||
role: string;
|
||||
content: Array<{ type: string; text?: string; name?: string }>;
|
||||
};
|
||||
|
||||
expect(secondResult.role).toBe("assistant");
|
||||
expect(secondResult.content).toEqual([
|
||||
expect.objectContaining({
|
||||
type: "text",
|
||||
text: expect.stringContaining('"exec"'),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("infers tool names from malformed toolCallId variants when allowlist is present", async () => {
|
||||
const partialToolCall = { type: "toolCall", id: "functions.read:0", name: "" };
|
||||
const finalToolCallA = { type: "toolCall", id: "functionsread3", name: "" };
|
||||
|
||||
@@ -11,6 +11,12 @@ import { normalizeToolName } from "../../tool-policy.js";
|
||||
import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js";
|
||||
import type { TranscriptPolicy } from "../../transcript-policy.js";
|
||||
|
||||
type UnknownToolLoopGuardState = {
|
||||
lastUnknownToolName?: string;
|
||||
count: number;
|
||||
countedMessages: WeakSet<object>;
|
||||
};
|
||||
|
||||
function resolveCaseInsensitiveAllowedToolName(
|
||||
rawName: string,
|
||||
allowedToolNames?: Set<string>,
|
||||
@@ -630,14 +636,128 @@ function trimWhitespaceFromToolCallNamesInMessage(
|
||||
normalizeToolCallIdsInMessage(message);
|
||||
}
|
||||
|
||||
function collectUnknownToolNameFromMessage(
|
||||
message: unknown,
|
||||
allowedToolNames?: Set<string>,
|
||||
): string | undefined {
|
||||
if (!message || typeof message !== "object" || !allowedToolNames || allowedToolNames.size === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const content = (message as { content?: unknown }).content;
|
||||
if (!Array.isArray(content)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let unknownToolName: string | undefined;
|
||||
let sawToolCall = false;
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const typedBlock = block as { type?: unknown; name?: unknown };
|
||||
if (!isToolCallBlockType(typedBlock.type)) {
|
||||
continue;
|
||||
}
|
||||
sawToolCall = true;
|
||||
const rawName = typeof typedBlock.name === "string" ? typedBlock.name.trim() : "";
|
||||
if (!rawName) {
|
||||
return undefined;
|
||||
}
|
||||
if (resolveExactAllowedToolName(rawName, allowedToolNames)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalizedUnknownToolName = normalizeToolName(rawName);
|
||||
if (!unknownToolName) {
|
||||
unknownToolName = normalizedUnknownToolName;
|
||||
continue;
|
||||
}
|
||||
if (unknownToolName !== normalizedUnknownToolName) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
return sawToolCall ? unknownToolName : undefined;
|
||||
}
|
||||
|
||||
function rewriteUnknownToolLoopMessage(message: unknown, toolName: string): void {
|
||||
if (!message || typeof message !== "object") {
|
||||
return;
|
||||
}
|
||||
(message as { content?: unknown }).content = [
|
||||
{
|
||||
type: "text",
|
||||
text: `I can't use the tool "${toolName}" here because it isn't available. I need to stop retrying it and answer without that tool.`,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function guardUnknownToolLoopInMessage(
|
||||
message: unknown,
|
||||
state: UnknownToolLoopGuardState,
|
||||
params: { allowedToolNames?: Set<string>; threshold?: number; countAttempt: boolean },
|
||||
): void {
|
||||
const threshold = params.threshold;
|
||||
if (threshold === undefined || threshold <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const unknownToolName = collectUnknownToolNameFromMessage(message, params.allowedToolNames);
|
||||
if (!unknownToolName) {
|
||||
if (params.countAttempt) {
|
||||
state.lastUnknownToolName = undefined;
|
||||
state.count = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!params.countAttempt) {
|
||||
if (state.lastUnknownToolName === unknownToolName && state.count > threshold) {
|
||||
rewriteUnknownToolLoopMessage(message, unknownToolName);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message && typeof message === "object") {
|
||||
if (state.countedMessages.has(message)) {
|
||||
if (state.lastUnknownToolName === unknownToolName && state.count > threshold) {
|
||||
rewriteUnknownToolLoopMessage(message, unknownToolName);
|
||||
}
|
||||
return;
|
||||
}
|
||||
state.countedMessages.add(message);
|
||||
}
|
||||
|
||||
if (state.lastUnknownToolName === unknownToolName) {
|
||||
state.count += 1;
|
||||
} else {
|
||||
state.lastUnknownToolName = unknownToolName;
|
||||
state.count = 1;
|
||||
}
|
||||
|
||||
if (state.count > threshold) {
|
||||
rewriteUnknownToolLoopMessage(message, unknownToolName);
|
||||
}
|
||||
}
|
||||
|
||||
function wrapStreamTrimToolCallNames(
|
||||
stream: ReturnType<typeof streamSimple>,
|
||||
allowedToolNames?: Set<string>,
|
||||
options?: { unknownToolThreshold?: number; state?: UnknownToolLoopGuardState },
|
||||
): ReturnType<typeof streamSimple> {
|
||||
const unknownToolGuardState = options?.state ?? {
|
||||
count: 0,
|
||||
countedMessages: new WeakSet<object>(),
|
||||
};
|
||||
let streamAttemptAlreadyCounted = false;
|
||||
const originalResult = stream.result.bind(stream);
|
||||
stream.result = async () => {
|
||||
const message = await originalResult();
|
||||
trimWhitespaceFromToolCallNamesInMessage(message, allowedToolNames);
|
||||
guardUnknownToolLoopInMessage(message, unknownToolGuardState, {
|
||||
allowedToolNames,
|
||||
threshold: options?.unknownToolThreshold,
|
||||
countAttempt: !streamAttemptAlreadyCounted,
|
||||
});
|
||||
return message;
|
||||
};
|
||||
|
||||
@@ -655,6 +775,19 @@ function wrapStreamTrimToolCallNames(
|
||||
};
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.partial, allowedToolNames);
|
||||
trimWhitespaceFromToolCallNamesInMessage(event.message, allowedToolNames);
|
||||
if (event.message && typeof event.message === "object") {
|
||||
guardUnknownToolLoopInMessage(event.message, unknownToolGuardState, {
|
||||
allowedToolNames,
|
||||
threshold: options?.unknownToolThreshold,
|
||||
countAttempt: true,
|
||||
});
|
||||
streamAttemptAlreadyCounted = true;
|
||||
}
|
||||
guardUnknownToolLoopInMessage(event.partial, unknownToolGuardState, {
|
||||
allowedToolNames,
|
||||
threshold: options?.unknownToolThreshold,
|
||||
countAttempt: false,
|
||||
});
|
||||
}
|
||||
return result;
|
||||
},
|
||||
@@ -673,15 +806,26 @@ function wrapStreamTrimToolCallNames(
|
||||
export function wrapStreamFnTrimToolCallNames(
|
||||
baseFn: StreamFn,
|
||||
allowedToolNames?: Set<string>,
|
||||
guardOptions?: { unknownToolThreshold?: number },
|
||||
): StreamFn {
|
||||
return (model, context, options) => {
|
||||
const maybeStream = baseFn(model, context, options);
|
||||
const unknownToolGuardState: UnknownToolLoopGuardState = {
|
||||
count: 0,
|
||||
countedMessages: new WeakSet<object>(),
|
||||
};
|
||||
return (model, context, streamOptions) => {
|
||||
const maybeStream = baseFn(model, context, streamOptions);
|
||||
if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) {
|
||||
return Promise.resolve(maybeStream).then((stream) =>
|
||||
wrapStreamTrimToolCallNames(stream, allowedToolNames),
|
||||
wrapStreamTrimToolCallNames(stream, allowedToolNames, {
|
||||
unknownToolThreshold: guardOptions?.unknownToolThreshold,
|
||||
state: unknownToolGuardState,
|
||||
}),
|
||||
);
|
||||
}
|
||||
return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames);
|
||||
return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames, {
|
||||
unknownToolThreshold: guardOptions?.unknownToolThreshold,
|
||||
state: unknownToolGuardState,
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -111,6 +111,7 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js";
|
||||
import { buildSystemPromptReport } from "../../system-prompt-report.js";
|
||||
import { resolveAgentTimeoutMs } from "../../timeout.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js";
|
||||
import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js";
|
||||
import {
|
||||
resolveTranscriptPolicy,
|
||||
shouldAllowProviderOwnedThinkingReplay,
|
||||
@@ -342,6 +343,16 @@ function summarizeSessionContext(messages: AgentMessage[]): {
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveUnknownToolGuardThreshold(loopDetection?: {
|
||||
enabled?: boolean;
|
||||
unknownToolThreshold?: number;
|
||||
}): number | undefined {
|
||||
if (loopDetection?.enabled !== true) {
|
||||
return undefined;
|
||||
}
|
||||
return loopDetection.unknownToolThreshold ?? UNKNOWN_TOOL_THRESHOLD;
|
||||
}
|
||||
|
||||
export async function runEmbeddedAttempt(
|
||||
params: EmbeddedRunAttemptParams,
|
||||
): Promise<EmbeddedRunAttemptResult> {
|
||||
@@ -1236,6 +1247,9 @@ export async function runEmbeddedAttempt(
|
||||
activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames(
|
||||
activeSession.agent.streamFn,
|
||||
allowedToolNames,
|
||||
{
|
||||
unknownToolThreshold: resolveUnknownToolGuardThreshold(clientToolLoopDetection),
|
||||
},
|
||||
);
|
||||
|
||||
if (
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
CRITICAL_THRESHOLD,
|
||||
GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
|
||||
TOOL_CALL_HISTORY_SIZE,
|
||||
UNKNOWN_TOOL_THRESHOLD,
|
||||
WARNING_THRESHOLD,
|
||||
detectToolCallLoop,
|
||||
getToolCallStats,
|
||||
@@ -45,6 +46,23 @@ function recordSuccessfulCall(
|
||||
});
|
||||
}
|
||||
|
||||
function recordFailedCall(
|
||||
state: SessionState,
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
error: unknown,
|
||||
index: number,
|
||||
): void {
|
||||
const toolCallId = `${toolName}-error-${index}`;
|
||||
recordToolCall(state, toolName, params, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName,
|
||||
toolParams: params,
|
||||
toolCallId,
|
||||
error,
|
||||
});
|
||||
}
|
||||
|
||||
function recordRepeatedSuccessfulCalls(params: {
|
||||
state: SessionState;
|
||||
toolName: string;
|
||||
@@ -444,6 +462,71 @@ describe("tool-loop-detection", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => {
|
||||
const state = createState();
|
||||
const toolName = "exec";
|
||||
const unknownToolError = new Error("Tool exec not found");
|
||||
|
||||
for (let index = 0; index < UNKNOWN_TOOL_THRESHOLD - 1; index += 1) {
|
||||
recordFailedCall(state, toolName, { command: `echo ${index}` }, unknownToolError, index);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(
|
||||
state,
|
||||
toolName,
|
||||
{ command: "echo still allowed" },
|
||||
enabledLoopDetectionConfig,
|
||||
);
|
||||
|
||||
expect(loopResult.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("blocks repeated unknown-tool failures even when the args keep changing", () => {
|
||||
const state = createState();
|
||||
const toolName = "exec";
|
||||
const unknownToolError = new Error("Tool exec not found");
|
||||
|
||||
const attempts = [
|
||||
{ command: "ls" },
|
||||
{ command: "pwd" },
|
||||
{ input: "whoami" },
|
||||
{ cmd: "env" },
|
||||
{ shell: "bash -lc ls" },
|
||||
{ command: "printf ok" },
|
||||
{ cwd: "/tmp", command: "ls" },
|
||||
{ args: ["ls", "/tmp"] },
|
||||
{ command: "find . -maxdepth 1" },
|
||||
{ text: "run ls" },
|
||||
{ command: "uname -a" },
|
||||
{ command: "id" },
|
||||
{ command: "date" },
|
||||
{ command: "ps" },
|
||||
{ command: "df -h" },
|
||||
{ command: "free -m" },
|
||||
{ command: "ls /tmp" },
|
||||
{ command: "ls -la" },
|
||||
{ command: "cat /etc/hostname" },
|
||||
{ command: "echo done" },
|
||||
];
|
||||
|
||||
for (const [index, params] of attempts.entries()) {
|
||||
recordFailedCall(state, toolName, params, unknownToolError, index);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(
|
||||
state,
|
||||
toolName,
|
||||
{ command: "echo still looping" },
|
||||
enabledLoopDetectionConfig,
|
||||
);
|
||||
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.detector).toBe("unknown_tool_repeat");
|
||||
expect(loopResult.level).toBe("critical");
|
||||
}
|
||||
});
|
||||
|
||||
it("warns on ping-pong alternating patterns", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
|
||||
@@ -8,6 +8,7 @@ const log = createSubsystemLogger("agents/loop-detection");
|
||||
|
||||
export type LoopDetectorKind =
|
||||
| "generic_repeat"
|
||||
| "unknown_tool_repeat"
|
||||
| "known_poll_no_progress"
|
||||
| "global_circuit_breaker"
|
||||
| "ping_pong";
|
||||
@@ -26,12 +27,14 @@ export type LoopDetectionResult =
|
||||
|
||||
export const TOOL_CALL_HISTORY_SIZE = 30;
|
||||
export const WARNING_THRESHOLD = 10;
|
||||
export const UNKNOWN_TOOL_THRESHOLD = 10;
|
||||
export const CRITICAL_THRESHOLD = 20;
|
||||
export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30;
|
||||
const DEFAULT_LOOP_DETECTION_CONFIG = {
|
||||
enabled: false,
|
||||
historySize: TOOL_CALL_HISTORY_SIZE,
|
||||
warningThreshold: WARNING_THRESHOLD,
|
||||
unknownToolThreshold: UNKNOWN_TOOL_THRESHOLD,
|
||||
criticalThreshold: CRITICAL_THRESHOLD,
|
||||
globalCircuitBreakerThreshold: GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
|
||||
detectors: {
|
||||
@@ -45,6 +48,7 @@ type ResolvedLoopDetectionConfig = {
|
||||
enabled: boolean;
|
||||
historySize: number;
|
||||
warningThreshold: number;
|
||||
unknownToolThreshold: number;
|
||||
criticalThreshold: number;
|
||||
globalCircuitBreakerThreshold: number;
|
||||
detectors: {
|
||||
@@ -86,6 +90,10 @@ function resolveLoopDetectionConfig(config?: ToolLoopDetectionConfig): ResolvedL
|
||||
enabled: config?.enabled ?? DEFAULT_LOOP_DETECTION_CONFIG.enabled,
|
||||
historySize: asPositiveInt(config?.historySize, DEFAULT_LOOP_DETECTION_CONFIG.historySize),
|
||||
warningThreshold,
|
||||
unknownToolThreshold: asPositiveInt(
|
||||
config?.unknownToolThreshold,
|
||||
DEFAULT_LOOP_DETECTION_CONFIG.unknownToolThreshold,
|
||||
),
|
||||
criticalThreshold,
|
||||
globalCircuitBreakerThreshold,
|
||||
detectors: {
|
||||
@@ -182,17 +190,33 @@ function formatErrorForHash(error: unknown): string {
|
||||
return stableStringify(error);
|
||||
}
|
||||
|
||||
function extractUnknownToolName(error: unknown): string | undefined {
|
||||
const raw = formatErrorForHash(error).trim();
|
||||
if (!raw) {
|
||||
return undefined;
|
||||
}
|
||||
const match =
|
||||
raw.match(/unknown tool[:\s]+["']?([a-z0-9_.-]+)["']?/i) ??
|
||||
raw.match(/tool\s+["']?([a-z0-9_.-]+)["']?\s+(?:not found|is not available)/i);
|
||||
const toolName = match?.[1]?.trim();
|
||||
return toolName ? toolName.toLowerCase() : undefined;
|
||||
}
|
||||
|
||||
function hashToolOutcome(
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
result: unknown,
|
||||
error: unknown,
|
||||
): string | undefined {
|
||||
): { resultHash?: string; unknownToolName?: string } {
|
||||
if (error !== undefined) {
|
||||
return `error:${digestStable(formatErrorForHash(error))}`;
|
||||
const unknownToolName = extractUnknownToolName(error);
|
||||
return {
|
||||
resultHash: `error:${digestStable(formatErrorForHash(error))}`,
|
||||
unknownToolName,
|
||||
};
|
||||
}
|
||||
if (!isPlainObject(result)) {
|
||||
return result === undefined ? undefined : digestStable(result);
|
||||
return { resultHash: result === undefined ? undefined : digestStable(result) };
|
||||
}
|
||||
|
||||
const details = isPlainObject(result.details) ? result.details : {};
|
||||
@@ -200,33 +224,65 @@ function hashToolOutcome(
|
||||
if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
|
||||
const action = params.action;
|
||||
if (action === "poll") {
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
aggregated: details.aggregated ?? null,
|
||||
text,
|
||||
});
|
||||
return {
|
||||
resultHash: digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
aggregated: details.aggregated ?? null,
|
||||
text,
|
||||
}),
|
||||
};
|
||||
}
|
||||
if (action === "log") {
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
totalLines: details.totalLines ?? null,
|
||||
totalChars: details.totalChars ?? null,
|
||||
truncated: details.truncated ?? null,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
text,
|
||||
});
|
||||
return {
|
||||
resultHash: digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
totalLines: details.totalLines ?? null,
|
||||
totalChars: details.totalChars ?? null,
|
||||
truncated: details.truncated ?? null,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
text,
|
||||
}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return digestStable({
|
||||
details,
|
||||
text,
|
||||
});
|
||||
return {
|
||||
resultHash: digestStable({
|
||||
details,
|
||||
text,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
function getUnknownToolRepeatStreak(
|
||||
history: Array<{ toolName: string; unknownToolName?: string }>,
|
||||
toolName: string,
|
||||
): { count: number; unknownToolName?: string } {
|
||||
let streak = 0;
|
||||
let repeatedUnknownToolName: string | undefined;
|
||||
|
||||
for (let i = history.length - 1; i >= 0; i -= 1) {
|
||||
const record = history[i];
|
||||
if (!record || record.toolName !== toolName || !record.unknownToolName) {
|
||||
break;
|
||||
}
|
||||
if (!repeatedUnknownToolName) {
|
||||
repeatedUnknownToolName = record.unknownToolName;
|
||||
streak = 1;
|
||||
continue;
|
||||
}
|
||||
if (record.unknownToolName !== repeatedUnknownToolName) {
|
||||
break;
|
||||
}
|
||||
streak += 1;
|
||||
}
|
||||
|
||||
return { count: streak, unknownToolName: repeatedUnknownToolName };
|
||||
}
|
||||
|
||||
function getNoProgressStreak(
|
||||
@@ -381,11 +437,23 @@ export function detectToolCallLoop(
|
||||
}
|
||||
const history = state.toolCallHistory ?? [];
|
||||
const currentHash = hashToolCall(toolName, params);
|
||||
const unknownToolStreak = getUnknownToolRepeatStreak(history, toolName);
|
||||
const noProgress = getNoProgressStreak(history, toolName, currentHash);
|
||||
const noProgressStreak = noProgress.count;
|
||||
const knownPollTool = isKnownPollToolCall(toolName, params);
|
||||
const pingPong = getPingPongStreak(history, currentHash);
|
||||
|
||||
if (unknownToolStreak.count >= resolvedConfig.unknownToolThreshold) {
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "unknown_tool_repeat",
|
||||
count: unknownToolStreak.count,
|
||||
message: `CRITICAL: attempted unavailable tool ${unknownToolStreak.unknownToolName ?? toolName} ${unknownToolStreak.count} times. Stop retrying that missing tool and answer without it.`,
|
||||
warningKey: `unknown-tool:${toolName}:${unknownToolStreak.unknownToolName ?? "unknown"}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (noProgressStreak >= resolvedConfig.globalCircuitBreakerThreshold) {
|
||||
log.error(
|
||||
`Global circuit breaker triggered: ${toolName} repeated ${noProgressStreak} times with no progress`,
|
||||
@@ -537,12 +605,8 @@ export function recordToolCallOutcome(
|
||||
},
|
||||
): void {
|
||||
const resolvedConfig = resolveLoopDetectionConfig(params.config);
|
||||
const resultHash = hashToolOutcome(
|
||||
params.toolName,
|
||||
params.toolParams,
|
||||
params.result,
|
||||
params.error,
|
||||
);
|
||||
const outcome = hashToolOutcome(params.toolName, params.toolParams, params.result, params.error);
|
||||
const resultHash = outcome.resultHash;
|
||||
if (!resultHash) {
|
||||
return;
|
||||
}
|
||||
@@ -568,6 +632,7 @@ export function recordToolCallOutcome(
|
||||
continue;
|
||||
}
|
||||
call.resultHash = resultHash;
|
||||
call.unknownToolName = outcome.unknownToolName;
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
@@ -578,6 +643,7 @@ export function recordToolCallOutcome(
|
||||
argsHash,
|
||||
toolCallId: params.toolCallId,
|
||||
resultHash,
|
||||
unknownToolName: outcome.unknownToolName,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -7054,6 +7054,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
unknownToolThreshold: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
},
|
||||
criticalThreshold: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
@@ -16945,6 +16950,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Warning threshold for repetitive patterns when detector is enabled (default: 10).",
|
||||
},
|
||||
unknownToolThreshold: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
maximum: 9007199254740991,
|
||||
title: "Unknown-tool Loop Threshold",
|
||||
description:
|
||||
"Block repeated calls to the same unavailable tool after this many misses (default: 10).",
|
||||
},
|
||||
criticalThreshold: {
|
||||
type: "integer",
|
||||
exclusiveMinimum: 0,
|
||||
@@ -23726,6 +23739,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Warning threshold for repetitive patterns when detector is enabled (default: 10).",
|
||||
tags: ["tools"],
|
||||
},
|
||||
"tools.loopDetection.unknownToolThreshold": {
|
||||
label: "Unknown-tool Loop Threshold",
|
||||
help: "Block repeated calls to the same unavailable tool after this many misses (default: 10).",
|
||||
tags: ["tools"],
|
||||
},
|
||||
"tools.loopDetection.criticalThreshold": {
|
||||
label: "Tool-loop Critical Threshold",
|
||||
help: "Critical threshold for repetitive patterns when detector is enabled (default: 20).",
|
||||
|
||||
@@ -543,6 +543,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"tools.loopDetection.historySize": "Tool history window size for loop detection (default: 30).",
|
||||
"tools.loopDetection.warningThreshold":
|
||||
"Warning threshold for repetitive patterns when detector is enabled (default: 10).",
|
||||
"tools.loopDetection.unknownToolThreshold":
|
||||
"Block repeated calls to the same unavailable tool after this many misses (default: 10).",
|
||||
"tools.loopDetection.criticalThreshold":
|
||||
"Critical threshold for repetitive patterns when detector is enabled (default: 20).",
|
||||
"tools.loopDetection.globalCircuitBreakerThreshold":
|
||||
|
||||
@@ -177,6 +177,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"tools.loopDetection.enabled": "Tool-loop Detection",
|
||||
"tools.loopDetection.historySize": "Tool-loop History Size",
|
||||
"tools.loopDetection.warningThreshold": "Tool-loop Warning Threshold",
|
||||
"tools.loopDetection.unknownToolThreshold": "Unknown-tool Loop Threshold",
|
||||
"tools.loopDetection.criticalThreshold": "Tool-loop Critical Threshold",
|
||||
"tools.loopDetection.globalCircuitBreakerThreshold": "Tool-loop Global Circuit Breaker Threshold",
|
||||
"tools.loopDetection.detectors.genericRepeat": "Tool-loop Generic Repeat Detection",
|
||||
|
||||
@@ -169,6 +169,8 @@ export type ToolLoopDetectionConfig = {
|
||||
historySize?: number;
|
||||
/** Warning threshold before a warning-only loop classification (default: 10). */
|
||||
warningThreshold?: number;
|
||||
/** Block repeated calls to the same unavailable tool after this many misses (default: 10). */
|
||||
unknownToolThreshold?: number;
|
||||
/** Critical threshold for blocking repetitive loops (default: 20). */
|
||||
criticalThreshold?: number;
|
||||
/** Global no-progress breaker threshold (default: 30). */
|
||||
|
||||
@@ -485,6 +485,7 @@ const ToolLoopDetectionSchema = z
|
||||
enabled: z.boolean().optional(),
|
||||
historySize: z.number().int().positive().optional(),
|
||||
warningThreshold: z.number().int().positive().optional(),
|
||||
unknownToolThreshold: z.number().int().positive().optional(),
|
||||
criticalThreshold: z.number().int().positive().optional(),
|
||||
globalCircuitBreakerThreshold: z.number().int().positive().optional(),
|
||||
detectors: ToolLoopDetectionDetectorSchema,
|
||||
|
||||
@@ -141,7 +141,12 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
detector:
|
||||
| "generic_repeat"
|
||||
| "unknown_tool_repeat"
|
||||
| "known_poll_no_progress"
|
||||
| "global_circuit_breaker"
|
||||
| "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
|
||||
@@ -16,6 +16,7 @@ export type ToolCallRecord = {
|
||||
argsHash: string;
|
||||
toolCallId?: string;
|
||||
resultHash?: string;
|
||||
unknownToolName?: string;
|
||||
timestamp: number;
|
||||
};
|
||||
|
||||
|
||||
@@ -264,7 +264,12 @@ export function logToolLoopAction(
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
detector:
|
||||
| "generic_repeat"
|
||||
| "unknown_tool_repeat"
|
||||
| "known_poll_no_progress"
|
||||
| "global_circuit_breaker"
|
||||
| "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
|
||||
Reference in New Issue
Block a user