From 74f2c4a56b4f4a17c086e11f3ceedd9ec8b5ab7a Mon Sep 17 00:00:00 2001 From: Bob Date: Mon, 13 Apr 2026 16:42:11 +0100 Subject: [PATCH] fix: stop repeated unknown-tool loops (#65922) Merged via squash. Prepared head SHA: f352a270a6c0f36888223314ee279c42cff05408 Reviewed-by: @osolmaz --- CHANGELOG.md | 1 + .../pi-embedded-runner/run/attempt.test.ts | 150 ++++++++++++++++- .../run/attempt.tool-call-normalization.ts | 152 +++++++++++++++++- src/agents/pi-embedded-runner/run/attempt.ts | 14 ++ src/agents/tool-loop-detection.test.ts | 83 ++++++++++ src/agents/tool-loop-detection.ts | 128 +++++++++++---- src/config/schema.base.generated.ts | 18 +++ src/config/schema.help.ts | 2 + src/config/schema.labels.ts | 1 + src/config/types.tools.ts | 2 + src/config/zod-schema.agent-runtime.ts | 1 + src/infra/diagnostic-events.ts | 7 +- src/logging/diagnostic-session-state.ts | 1 + src/logging/diagnostic.ts | 7 +- 14 files changed, 529 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e5f103e557..6aa8df3b654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -123,6 +123,7 @@ Docs: https://docs.openclaw.ai - Telegram/sessions: keep topic-scoped session initialization on the canonical topic transcript path when inbound turns omit `MessageThreadId`, so one topic session no longer alternates between bare and topic-qualified transcript files. (#64869) Thanks @jalehman. - Agents/failover: scope assistant-side fallback classification and surfaced provider errors to the current attempt instead of stale session history, so cross-provider fallback runs stop inheriting the previous provider's failure. (#62907) Thanks @stainlu. - MiniMax/OAuth: write `api: "anthropic-messages"` and `authHeader: true` into the `minimax-portal` config patch during `openclaw configure`, so re-authenticated portal setups keep Bearer auth routing working. (#64964) Thanks @ryanlee666. +- Agents/tools: stop repeated unavailable-tool retries from escaping loop detection when the model changes arguments, and rewrite over-threshold unknown tool calls into plain assistant text before dispatch. (#65922) Thanks @dutifulbob. ## 2026.4.10 diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index c073e102096..67e182bad5f 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -14,6 +14,7 @@ import { resolveEmbeddedAgentBaseStreamFn, resolveAttemptFsWorkspaceOnly, resolveEmbeddedAgentStreamFn, + resolveUnknownToolGuardThreshold, resolvePromptBuildHookResult, resolvePromptModeForSession, shouldWarnOnOrphanedUserRepair, @@ -421,13 +422,33 @@ describe("resolveAttemptFsWorkspaceOnly", () => { ).toBe(false); }); }); + +describe("resolveUnknownToolGuardThreshold", () => { + it("returns undefined when loop detection is disabled", () => { + expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 4 })).toBe( + undefined, + ); + expect(resolveUnknownToolGuardThreshold(undefined)).toBe(undefined); + }); + + it("uses the default threshold when loop detection is enabled without an override", () => { + expect(resolveUnknownToolGuardThreshold({ enabled: true })).toBe(10); + }); + + it("uses the configured threshold override when provided", () => { + expect(resolveUnknownToolGuardThreshold({ enabled: true, unknownToolThreshold: 4 })).toBe(4); + }); +}); + describe("wrapStreamFnTrimToolCallNames", () => { async function invokeWrappedStream( baseFn: (...args: never[]) => unknown, allowedToolNames?: Set, + guardOptions?: { unknownToolThreshold?: number }, ) { return await invokeWrappedTestStream( - (innerBaseFn) => wrapStreamFnTrimToolCallNames(innerBaseFn as never, allowedToolNames), + (innerBaseFn) => + wrapStreamFnTrimToolCallNames(innerBaseFn as never, allowedToolNames, guardOptions), baseFn, ); } @@ -574,6 +595,133 @@ describe("wrapStreamFnTrimToolCallNames", () => { expect(result).toBe(finalMessage); }); + it("rewrites repeated unavailable tool calls into plain assistant text after the threshold", async () => { + const baseFn = vi.fn(() => + createFakeStream({ + events: [], + resultMessage: { + role: "assistant", + content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo eleven" } }], + }, + }), + ); + const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"]), { + unknownToolThreshold: 10, + }); + + for (let i = 0; i < 10; i += 1) { + const stream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never)); + const result = await stream.result(); + expect(result).toMatchObject({ + role: "assistant", + content: [{ type: "toolCall", name: "exec" }], + }); + } + + const blockedStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never)); + const blockedResult = (await blockedStream.result()) as { + role: string; + content: Array<{ type: string; text?: string }>; + }; + + expect(blockedResult.role).toBe("assistant"); + expect(blockedResult.content).toEqual([ + expect.objectContaining({ + type: "text", + text: expect.stringContaining('"exec"'), + }), + ]); + }); + + it("leaves repeated unavailable tool calls alone when the unknown-tool guard is disabled", async () => { + const baseFn = vi.fn(() => + createFakeStream({ + events: [], + resultMessage: { + role: "assistant", + content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo eleven" } }], + }, + }), + ); + const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"])); + + for (let i = 0; i < 11; i += 1) { + const stream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never)); + const result = await stream.result(); + expect(result).toMatchObject({ + role: "assistant", + content: [{ type: "toolCall", name: "exec" }], + }); + } + }); + + it("does not count partial tool-call deltas as separate unavailable-tool retries", async () => { + const partialToolCall = { type: "toolCall", name: " exec " }; + const messageToolCall = { type: "toolCall", name: " exec " }; + const finalToolCall = { type: "toolCall", name: " exec " }; + const event = { + type: "toolcall_delta", + partial: { role: "assistant", content: [partialToolCall] }, + message: { role: "assistant", content: [messageToolCall] }, + }; + const { baseFn } = createEventStream({ event, finalToolCall }); + + const stream = await invokeWrappedStream(baseFn, new Set(["read"]), { + unknownToolThreshold: 1, + }); + + for await (const _item of stream) { + // drain + } + const result = (await stream.result()) as { + content: Array<{ type: string; text?: string; name?: string }>; + }; + + expect(partialToolCall.name).toBe("exec"); + expect(messageToolCall.name).toBe("exec"); + expect(result.content).toEqual([expect.objectContaining({ type: "toolCall", name: "exec" })]); + }); + + it("does not reset the unavailable-tool streak on partial-only stream chunks", async () => { + const baseFn = vi.fn(() => + createFakeStream({ + events: [ + { + type: "toolcall_delta", + partial: { role: "assistant", content: [{ type: "toolCall", name: " exec " }] }, + }, + ], + resultMessage: { + role: "assistant", + content: [{ type: "toolCall", name: " exec ", arguments: { command: "echo retry" } }], + }, + }), + ); + const wrappedFn = wrapStreamFnTrimToolCallNames(baseFn as never, new Set(["read"]), { + unknownToolThreshold: 1, + }); + + const firstStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never)); + await firstStream.result(); + + const secondStream = await Promise.resolve(wrappedFn({} as never, {} as never, {} as never)); + for await (const _item of secondStream) { + // drain + } + const secondResult = (await secondStream.result()) as { + role: string; + content: Array<{ type: string; text?: string; name?: string }>; + }; + + expect(secondResult.role).toBe("assistant"); + expect(secondResult.content).toEqual([ + expect.objectContaining({ + type: "text", + text: expect.stringContaining('"exec"'), + }), + ]); + }); + it("infers tool names from malformed toolCallId variants when allowlist is present", async () => { const partialToolCall = { type: "toolCall", id: "functions.read:0", name: "" }; const finalToolCallA = { type: "toolCall", id: "functionsread3", name: "" }; diff --git a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts index 7caa8d69463..e4cda7a091c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts +++ b/src/agents/pi-embedded-runner/run/attempt.tool-call-normalization.ts @@ -11,6 +11,12 @@ import { normalizeToolName } from "../../tool-policy.js"; import { shouldAllowProviderOwnedThinkingReplay } from "../../transcript-policy.js"; import type { TranscriptPolicy } from "../../transcript-policy.js"; +type UnknownToolLoopGuardState = { + lastUnknownToolName?: string; + count: number; + countedMessages: WeakSet; +}; + function resolveCaseInsensitiveAllowedToolName( rawName: string, allowedToolNames?: Set, @@ -630,14 +636,128 @@ function trimWhitespaceFromToolCallNamesInMessage( normalizeToolCallIdsInMessage(message); } +function collectUnknownToolNameFromMessage( + message: unknown, + allowedToolNames?: Set, +): string | undefined { + if (!message || typeof message !== "object" || !allowedToolNames || allowedToolNames.size === 0) { + return undefined; + } + const content = (message as { content?: unknown }).content; + if (!Array.isArray(content)) { + return undefined; + } + + let unknownToolName: string | undefined; + let sawToolCall = false; + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + const typedBlock = block as { type?: unknown; name?: unknown }; + if (!isToolCallBlockType(typedBlock.type)) { + continue; + } + sawToolCall = true; + const rawName = typeof typedBlock.name === "string" ? typedBlock.name.trim() : ""; + if (!rawName) { + return undefined; + } + if (resolveExactAllowedToolName(rawName, allowedToolNames)) { + return undefined; + } + const normalizedUnknownToolName = normalizeToolName(rawName); + if (!unknownToolName) { + unknownToolName = normalizedUnknownToolName; + continue; + } + if (unknownToolName !== normalizedUnknownToolName) { + return undefined; + } + } + + return sawToolCall ? unknownToolName : undefined; +} + +function rewriteUnknownToolLoopMessage(message: unknown, toolName: string): void { + if (!message || typeof message !== "object") { + return; + } + (message as { content?: unknown }).content = [ + { + type: "text", + text: `I can't use the tool "${toolName}" here because it isn't available. I need to stop retrying it and answer without that tool.`, + }, + ]; +} + +function guardUnknownToolLoopInMessage( + message: unknown, + state: UnknownToolLoopGuardState, + params: { allowedToolNames?: Set; threshold?: number; countAttempt: boolean }, +): void { + const threshold = params.threshold; + if (threshold === undefined || threshold <= 0) { + return; + } + + const unknownToolName = collectUnknownToolNameFromMessage(message, params.allowedToolNames); + if (!unknownToolName) { + if (params.countAttempt) { + state.lastUnknownToolName = undefined; + state.count = 0; + } + return; + } + + if (!params.countAttempt) { + if (state.lastUnknownToolName === unknownToolName && state.count > threshold) { + rewriteUnknownToolLoopMessage(message, unknownToolName); + } + return; + } + + if (message && typeof message === "object") { + if (state.countedMessages.has(message)) { + if (state.lastUnknownToolName === unknownToolName && state.count > threshold) { + rewriteUnknownToolLoopMessage(message, unknownToolName); + } + return; + } + state.countedMessages.add(message); + } + + if (state.lastUnknownToolName === unknownToolName) { + state.count += 1; + } else { + state.lastUnknownToolName = unknownToolName; + state.count = 1; + } + + if (state.count > threshold) { + rewriteUnknownToolLoopMessage(message, unknownToolName); + } +} + function wrapStreamTrimToolCallNames( stream: ReturnType, allowedToolNames?: Set, + options?: { unknownToolThreshold?: number; state?: UnknownToolLoopGuardState }, ): ReturnType { + const unknownToolGuardState = options?.state ?? { + count: 0, + countedMessages: new WeakSet(), + }; + let streamAttemptAlreadyCounted = false; const originalResult = stream.result.bind(stream); stream.result = async () => { const message = await originalResult(); trimWhitespaceFromToolCallNamesInMessage(message, allowedToolNames); + guardUnknownToolLoopInMessage(message, unknownToolGuardState, { + allowedToolNames, + threshold: options?.unknownToolThreshold, + countAttempt: !streamAttemptAlreadyCounted, + }); return message; }; @@ -655,6 +775,19 @@ function wrapStreamTrimToolCallNames( }; trimWhitespaceFromToolCallNamesInMessage(event.partial, allowedToolNames); trimWhitespaceFromToolCallNamesInMessage(event.message, allowedToolNames); + if (event.message && typeof event.message === "object") { + guardUnknownToolLoopInMessage(event.message, unknownToolGuardState, { + allowedToolNames, + threshold: options?.unknownToolThreshold, + countAttempt: true, + }); + streamAttemptAlreadyCounted = true; + } + guardUnknownToolLoopInMessage(event.partial, unknownToolGuardState, { + allowedToolNames, + threshold: options?.unknownToolThreshold, + countAttempt: false, + }); } return result; }, @@ -673,15 +806,26 @@ function wrapStreamTrimToolCallNames( export function wrapStreamFnTrimToolCallNames( baseFn: StreamFn, allowedToolNames?: Set, + guardOptions?: { unknownToolThreshold?: number }, ): StreamFn { - return (model, context, options) => { - const maybeStream = baseFn(model, context, options); + const unknownToolGuardState: UnknownToolLoopGuardState = { + count: 0, + countedMessages: new WeakSet(), + }; + return (model, context, streamOptions) => { + const maybeStream = baseFn(model, context, streamOptions); if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) { return Promise.resolve(maybeStream).then((stream) => - wrapStreamTrimToolCallNames(stream, allowedToolNames), + wrapStreamTrimToolCallNames(stream, allowedToolNames, { + unknownToolThreshold: guardOptions?.unknownToolThreshold, + state: unknownToolGuardState, + }), ); } - return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames); + return wrapStreamTrimToolCallNames(maybeStream, allowedToolNames, { + unknownToolThreshold: guardOptions?.unknownToolThreshold, + state: unknownToolGuardState, + }); }; } diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 6ce4251ce76..3c3fd2d8d88 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -111,6 +111,7 @@ import { buildSystemPromptParams } from "../../system-prompt-params.js"; import { buildSystemPromptReport } from "../../system-prompt-report.js"; import { resolveAgentTimeoutMs } from "../../timeout.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../../tool-call-id.js"; +import { UNKNOWN_TOOL_THRESHOLD } from "../../tool-loop-detection.js"; import { resolveTranscriptPolicy, shouldAllowProviderOwnedThinkingReplay, @@ -342,6 +343,16 @@ function summarizeSessionContext(messages: AgentMessage[]): { }; } +export function resolveUnknownToolGuardThreshold(loopDetection?: { + enabled?: boolean; + unknownToolThreshold?: number; +}): number | undefined { + if (loopDetection?.enabled !== true) { + return undefined; + } + return loopDetection.unknownToolThreshold ?? UNKNOWN_TOOL_THRESHOLD; +} + export async function runEmbeddedAttempt( params: EmbeddedRunAttemptParams, ): Promise { @@ -1236,6 +1247,9 @@ export async function runEmbeddedAttempt( activeSession.agent.streamFn = wrapStreamFnTrimToolCallNames( activeSession.agent.streamFn, allowedToolNames, + { + unknownToolThreshold: resolveUnknownToolGuardThreshold(clientToolLoopDetection), + }, ); if ( diff --git a/src/agents/tool-loop-detection.test.ts b/src/agents/tool-loop-detection.test.ts index 056c5286cbb..0608e4b5752 100644 --- a/src/agents/tool-loop-detection.test.ts +++ b/src/agents/tool-loop-detection.test.ts @@ -5,6 +5,7 @@ import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD, TOOL_CALL_HISTORY_SIZE, + UNKNOWN_TOOL_THRESHOLD, WARNING_THRESHOLD, detectToolCallLoop, getToolCallStats, @@ -45,6 +46,23 @@ function recordSuccessfulCall( }); } +function recordFailedCall( + state: SessionState, + toolName: string, + params: unknown, + error: unknown, + index: number, +): void { + const toolCallId = `${toolName}-error-${index}`; + recordToolCall(state, toolName, params, toolCallId); + recordToolCallOutcome(state, { + toolName, + toolParams: params, + toolCallId, + error, + }); +} + function recordRepeatedSuccessfulCalls(params: { state: SessionState; toolName: string; @@ -444,6 +462,71 @@ describe("tool-loop-detection", () => { } }); + it("does not block repeated unknown-tool failures before the unknown-tool threshold", () => { + const state = createState(); + const toolName = "exec"; + const unknownToolError = new Error("Tool exec not found"); + + for (let index = 0; index < UNKNOWN_TOOL_THRESHOLD - 1; index += 1) { + recordFailedCall(state, toolName, { command: `echo ${index}` }, unknownToolError, index); + } + + const loopResult = detectToolCallLoop( + state, + toolName, + { command: "echo still allowed" }, + enabledLoopDetectionConfig, + ); + + expect(loopResult.stuck).toBe(false); + }); + + it("blocks repeated unknown-tool failures even when the args keep changing", () => { + const state = createState(); + const toolName = "exec"; + const unknownToolError = new Error("Tool exec not found"); + + const attempts = [ + { command: "ls" }, + { command: "pwd" }, + { input: "whoami" }, + { cmd: "env" }, + { shell: "bash -lc ls" }, + { command: "printf ok" }, + { cwd: "/tmp", command: "ls" }, + { args: ["ls", "/tmp"] }, + { command: "find . -maxdepth 1" }, + { text: "run ls" }, + { command: "uname -a" }, + { command: "id" }, + { command: "date" }, + { command: "ps" }, + { command: "df -h" }, + { command: "free -m" }, + { command: "ls /tmp" }, + { command: "ls -la" }, + { command: "cat /etc/hostname" }, + { command: "echo done" }, + ]; + + for (const [index, params] of attempts.entries()) { + recordFailedCall(state, toolName, params, unknownToolError, index); + } + + const loopResult = detectToolCallLoop( + state, + toolName, + { command: "echo still looping" }, + enabledLoopDetectionConfig, + ); + + expect(loopResult.stuck).toBe(true); + if (loopResult.stuck) { + expect(loopResult.detector).toBe("unknown_tool_repeat"); + expect(loopResult.level).toBe("critical"); + } + }); + it("warns on ping-pong alternating patterns", () => { const state = createState(); const readParams = { path: "/a.txt" }; diff --git a/src/agents/tool-loop-detection.ts b/src/agents/tool-loop-detection.ts index 1576e7ace9b..a239ff6bb62 100644 --- a/src/agents/tool-loop-detection.ts +++ b/src/agents/tool-loop-detection.ts @@ -8,6 +8,7 @@ const log = createSubsystemLogger("agents/loop-detection"); export type LoopDetectorKind = | "generic_repeat" + | "unknown_tool_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong"; @@ -26,12 +27,14 @@ export type LoopDetectionResult = export const TOOL_CALL_HISTORY_SIZE = 30; export const WARNING_THRESHOLD = 10; +export const UNKNOWN_TOOL_THRESHOLD = 10; export const CRITICAL_THRESHOLD = 20; export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30; const DEFAULT_LOOP_DETECTION_CONFIG = { enabled: false, historySize: TOOL_CALL_HISTORY_SIZE, warningThreshold: WARNING_THRESHOLD, + unknownToolThreshold: UNKNOWN_TOOL_THRESHOLD, criticalThreshold: CRITICAL_THRESHOLD, globalCircuitBreakerThreshold: GLOBAL_CIRCUIT_BREAKER_THRESHOLD, detectors: { @@ -45,6 +48,7 @@ type ResolvedLoopDetectionConfig = { enabled: boolean; historySize: number; warningThreshold: number; + unknownToolThreshold: number; criticalThreshold: number; globalCircuitBreakerThreshold: number; detectors: { @@ -86,6 +90,10 @@ function resolveLoopDetectionConfig(config?: ToolLoopDetectionConfig): ResolvedL enabled: config?.enabled ?? DEFAULT_LOOP_DETECTION_CONFIG.enabled, historySize: asPositiveInt(config?.historySize, DEFAULT_LOOP_DETECTION_CONFIG.historySize), warningThreshold, + unknownToolThreshold: asPositiveInt( + config?.unknownToolThreshold, + DEFAULT_LOOP_DETECTION_CONFIG.unknownToolThreshold, + ), criticalThreshold, globalCircuitBreakerThreshold, detectors: { @@ -182,17 +190,33 @@ function formatErrorForHash(error: unknown): string { return stableStringify(error); } +function extractUnknownToolName(error: unknown): string | undefined { + const raw = formatErrorForHash(error).trim(); + if (!raw) { + return undefined; + } + const match = + raw.match(/unknown tool[:\s]+["']?([a-z0-9_.-]+)["']?/i) ?? + raw.match(/tool\s+["']?([a-z0-9_.-]+)["']?\s+(?:not found|is not available)/i); + const toolName = match?.[1]?.trim(); + return toolName ? toolName.toLowerCase() : undefined; +} + function hashToolOutcome( toolName: string, params: unknown, result: unknown, error: unknown, -): string | undefined { +): { resultHash?: string; unknownToolName?: string } { if (error !== undefined) { - return `error:${digestStable(formatErrorForHash(error))}`; + const unknownToolName = extractUnknownToolName(error); + return { + resultHash: `error:${digestStable(formatErrorForHash(error))}`, + unknownToolName, + }; } if (!isPlainObject(result)) { - return result === undefined ? undefined : digestStable(result); + return { resultHash: result === undefined ? undefined : digestStable(result) }; } const details = isPlainObject(result.details) ? result.details : {}; @@ -200,33 +224,65 @@ function hashToolOutcome( if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) { const action = params.action; if (action === "poll") { - return digestStable({ - action, - status: details.status, - exitCode: details.exitCode ?? null, - exitSignal: details.exitSignal ?? null, - aggregated: details.aggregated ?? null, - text, - }); + return { + resultHash: digestStable({ + action, + status: details.status, + exitCode: details.exitCode ?? null, + exitSignal: details.exitSignal ?? null, + aggregated: details.aggregated ?? null, + text, + }), + }; } if (action === "log") { - return digestStable({ - action, - status: details.status, - totalLines: details.totalLines ?? null, - totalChars: details.totalChars ?? null, - truncated: details.truncated ?? null, - exitCode: details.exitCode ?? null, - exitSignal: details.exitSignal ?? null, - text, - }); + return { + resultHash: digestStable({ + action, + status: details.status, + totalLines: details.totalLines ?? null, + totalChars: details.totalChars ?? null, + truncated: details.truncated ?? null, + exitCode: details.exitCode ?? null, + exitSignal: details.exitSignal ?? null, + text, + }), + }; } } - return digestStable({ - details, - text, - }); + return { + resultHash: digestStable({ + details, + text, + }), + }; +} + +function getUnknownToolRepeatStreak( + history: Array<{ toolName: string; unknownToolName?: string }>, + toolName: string, +): { count: number; unknownToolName?: string } { + let streak = 0; + let repeatedUnknownToolName: string | undefined; + + for (let i = history.length - 1; i >= 0; i -= 1) { + const record = history[i]; + if (!record || record.toolName !== toolName || !record.unknownToolName) { + break; + } + if (!repeatedUnknownToolName) { + repeatedUnknownToolName = record.unknownToolName; + streak = 1; + continue; + } + if (record.unknownToolName !== repeatedUnknownToolName) { + break; + } + streak += 1; + } + + return { count: streak, unknownToolName: repeatedUnknownToolName }; } function getNoProgressStreak( @@ -381,11 +437,23 @@ export function detectToolCallLoop( } const history = state.toolCallHistory ?? []; const currentHash = hashToolCall(toolName, params); + const unknownToolStreak = getUnknownToolRepeatStreak(history, toolName); const noProgress = getNoProgressStreak(history, toolName, currentHash); const noProgressStreak = noProgress.count; const knownPollTool = isKnownPollToolCall(toolName, params); const pingPong = getPingPongStreak(history, currentHash); + if (unknownToolStreak.count >= resolvedConfig.unknownToolThreshold) { + return { + stuck: true, + level: "critical", + detector: "unknown_tool_repeat", + count: unknownToolStreak.count, + message: `CRITICAL: attempted unavailable tool ${unknownToolStreak.unknownToolName ?? toolName} ${unknownToolStreak.count} times. Stop retrying that missing tool and answer without it.`, + warningKey: `unknown-tool:${toolName}:${unknownToolStreak.unknownToolName ?? "unknown"}`, + }; + } + if (noProgressStreak >= resolvedConfig.globalCircuitBreakerThreshold) { log.error( `Global circuit breaker triggered: ${toolName} repeated ${noProgressStreak} times with no progress`, @@ -537,12 +605,8 @@ export function recordToolCallOutcome( }, ): void { const resolvedConfig = resolveLoopDetectionConfig(params.config); - const resultHash = hashToolOutcome( - params.toolName, - params.toolParams, - params.result, - params.error, - ); + const outcome = hashToolOutcome(params.toolName, params.toolParams, params.result, params.error); + const resultHash = outcome.resultHash; if (!resultHash) { return; } @@ -568,6 +632,7 @@ export function recordToolCallOutcome( continue; } call.resultHash = resultHash; + call.unknownToolName = outcome.unknownToolName; matched = true; break; } @@ -578,6 +643,7 @@ export function recordToolCallOutcome( argsHash, toolCallId: params.toolCallId, resultHash, + unknownToolName: outcome.unknownToolName, timestamp: Date.now(), }); } diff --git a/src/config/schema.base.generated.ts b/src/config/schema.base.generated.ts index c18b7180ba6..1cb13991876 100644 --- a/src/config/schema.base.generated.ts +++ b/src/config/schema.base.generated.ts @@ -7054,6 +7054,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { exclusiveMinimum: 0, maximum: 9007199254740991, }, + unknownToolThreshold: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + }, criticalThreshold: { type: "integer", exclusiveMinimum: 0, @@ -16945,6 +16950,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { description: "Warning threshold for repetitive patterns when detector is enabled (default: 10).", }, + unknownToolThreshold: { + type: "integer", + exclusiveMinimum: 0, + maximum: 9007199254740991, + title: "Unknown-tool Loop Threshold", + description: + "Block repeated calls to the same unavailable tool after this many misses (default: 10).", + }, criticalThreshold: { type: "integer", exclusiveMinimum: 0, @@ -23726,6 +23739,11 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = { help: "Warning threshold for repetitive patterns when detector is enabled (default: 10).", tags: ["tools"], }, + "tools.loopDetection.unknownToolThreshold": { + label: "Unknown-tool Loop Threshold", + help: "Block repeated calls to the same unavailable tool after this many misses (default: 10).", + tags: ["tools"], + }, "tools.loopDetection.criticalThreshold": { label: "Tool-loop Critical Threshold", help: "Critical threshold for repetitive patterns when detector is enabled (default: 20).", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 02a44845458..3fbd679e5df 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -543,6 +543,8 @@ export const FIELD_HELP: Record = { "tools.loopDetection.historySize": "Tool history window size for loop detection (default: 30).", "tools.loopDetection.warningThreshold": "Warning threshold for repetitive patterns when detector is enabled (default: 10).", + "tools.loopDetection.unknownToolThreshold": + "Block repeated calls to the same unavailable tool after this many misses (default: 10).", "tools.loopDetection.criticalThreshold": "Critical threshold for repetitive patterns when detector is enabled (default: 20).", "tools.loopDetection.globalCircuitBreakerThreshold": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 8eff1e4b748..a87a995b6af 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -177,6 +177,7 @@ export const FIELD_LABELS: Record = { "tools.loopDetection.enabled": "Tool-loop Detection", "tools.loopDetection.historySize": "Tool-loop History Size", "tools.loopDetection.warningThreshold": "Tool-loop Warning Threshold", + "tools.loopDetection.unknownToolThreshold": "Unknown-tool Loop Threshold", "tools.loopDetection.criticalThreshold": "Tool-loop Critical Threshold", "tools.loopDetection.globalCircuitBreakerThreshold": "Tool-loop Global Circuit Breaker Threshold", "tools.loopDetection.detectors.genericRepeat": "Tool-loop Generic Repeat Detection", diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index f3c5b551272..66bb686d6b5 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -169,6 +169,8 @@ export type ToolLoopDetectionConfig = { historySize?: number; /** Warning threshold before a warning-only loop classification (default: 10). */ warningThreshold?: number; + /** Block repeated calls to the same unavailable tool after this many misses (default: 10). */ + unknownToolThreshold?: number; /** Critical threshold for blocking repetitive loops (default: 20). */ criticalThreshold?: number; /** Global no-progress breaker threshold (default: 30). */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index eeae09a4daa..2ee7cea1eb3 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -485,6 +485,7 @@ const ToolLoopDetectionSchema = z enabled: z.boolean().optional(), historySize: z.number().int().positive().optional(), warningThreshold: z.number().int().positive().optional(), + unknownToolThreshold: z.number().int().positive().optional(), criticalThreshold: z.number().int().positive().optional(), globalCircuitBreakerThreshold: z.number().int().positive().optional(), detectors: ToolLoopDetectionDetectorSchema, diff --git a/src/infra/diagnostic-events.ts b/src/infra/diagnostic-events.ts index 55a2f1aab7a..b603ea1eef5 100644 --- a/src/infra/diagnostic-events.ts +++ b/src/infra/diagnostic-events.ts @@ -141,7 +141,12 @@ export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & { toolName: string; level: "warning" | "critical"; action: "warn" | "block"; - detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong"; + detector: + | "generic_repeat" + | "unknown_tool_repeat" + | "known_poll_no_progress" + | "global_circuit_breaker" + | "ping_pong"; count: number; message: string; pairedToolName?: string; diff --git a/src/logging/diagnostic-session-state.ts b/src/logging/diagnostic-session-state.ts index 30ea1249aa5..7c647ca17a1 100644 --- a/src/logging/diagnostic-session-state.ts +++ b/src/logging/diagnostic-session-state.ts @@ -16,6 +16,7 @@ export type ToolCallRecord = { argsHash: string; toolCallId?: string; resultHash?: string; + unknownToolName?: string; timestamp: number; }; diff --git a/src/logging/diagnostic.ts b/src/logging/diagnostic.ts index ad2f4c3050c..578b2a24ec8 100644 --- a/src/logging/diagnostic.ts +++ b/src/logging/diagnostic.ts @@ -264,7 +264,12 @@ export function logToolLoopAction( toolName: string; level: "warning" | "critical"; action: "warn" | "block"; - detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong"; + detector: + | "generic_repeat" + | "unknown_tool_repeat" + | "known_poll_no_progress" + | "global_circuit_breaker" + | "ping_pong"; count: number; message: string; pairedToolName?: string;