Agents/tool-loop: enable unknown-tool stream guard by default

This commit is contained in:
Xan Torres
2026-04-15 23:09:49 +02:00
committed by Ayaan Zaidi
parent b23d59a522
commit 36ed36768c
3 changed files with 36 additions and 11 deletions

View File

@@ -424,20 +424,34 @@ describe("resolveAttemptFsWorkspaceOnly", () => {
});
describe("resolveUnknownToolGuardThreshold", () => {
it("returns undefined when loop detection is disabled", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 4 })).toBe(
undefined,
);
expect(resolveUnknownToolGuardThreshold(undefined)).toBe(undefined);
it("returns the default threshold when no loop-detection config is provided", () => {
expect(resolveUnknownToolGuardThreshold(undefined)).toBe(10);
expect(resolveUnknownToolGuardThreshold({})).toBe(10);
});
it("uses the default threshold when loop detection is enabled without an override", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: true })).toBe(10);
it("stays on even when tools.loopDetection.enabled is false (safety net)", () => {
// The unknown-tool guard has no false-positive surface — the tool is
// objectively not registered — so it is always on regardless of the
// opt-in genericRepeat/pingPong/pollNoProgress detectors.
expect(resolveUnknownToolGuardThreshold({ enabled: false })).toBe(10);
expect(resolveUnknownToolGuardThreshold({ enabled: false, unknownToolThreshold: 3 })).toBe(3);
});
it("uses the configured threshold override when provided", () => {
expect(resolveUnknownToolGuardThreshold({ enabled: true, unknownToolThreshold: 4 })).toBe(4);
});
it("falls back to the default threshold when the override is non-positive", () => {
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 0 })).toBe(10);
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: -5 })).toBe(10);
expect(
resolveUnknownToolGuardThreshold({ unknownToolThreshold: Number.NaN }),
).toBe(10);
});
it("floors fractional overrides", () => {
expect(resolveUnknownToolGuardThreshold({ unknownToolThreshold: 3.7 })).toBe(3);
});
});
describe("wrapStreamFnTrimToolCallNames", () => {

View File

@@ -294,11 +294,21 @@ const MAX_BTW_SNAPSHOT_MESSAGES = 100;
export function resolveUnknownToolGuardThreshold(loopDetection?: {
enabled?: boolean;
unknownToolThreshold?: number;
}): number | undefined {
if (loopDetection?.enabled !== true) {
return undefined;
}): number {
// The unknown-tool guard is a safety net against the model hallucinating a
// tool name or calling a tool that has since been removed from the allowlist
// (for example after a `skills.allowBundled` config change). After `threshold`
// consecutive unknown-tool attempts the stream wrapper rewrites the assistant
// message content to tell the model to stop, which breaks otherwise-infinite
// Tool-not-found loops against the provider. Unlike the genericRepeat /
// pingPong / pollNoProgress detectors this guard has no false-positive
// surface because the tool is objectively not registered in this run, so it
// stays on regardless of `tools.loopDetection.enabled`.
const raw = loopDetection?.unknownToolThreshold;
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.floor(raw);
}
return loopDetection.unknownToolThreshold ?? UNKNOWN_TOOL_THRESHOLD;
return UNKNOWN_TOOL_THRESHOLD;
}
function summarizeMessagePayload(msg: AgentMessage): { textChars: number; imageBlocks: number } {