diff --git a/src/agents/pi-embedded-runner/post-compaction-loop-guard.test.ts b/src/agents/pi-embedded-runner/post-compaction-loop-guard.test.ts new file mode 100644 index 00000000000..a665c964665 --- /dev/null +++ b/src/agents/pi-embedded-runner/post-compaction-loop-guard.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it } from "vitest"; +import { + createPostCompactionLoopGuard, + type PostCompactionLoopGuard, +} from "./post-compaction-loop-guard.js"; + +function callOutcome(toolName: string, args: unknown, result: string) { + return { toolName, argsHash: JSON.stringify(args), resultHash: result }; +} + +describe("createPostCompactionLoopGuard", () => { + it("is dormant when never armed", () => { + const guard = createPostCompactionLoopGuard(); + const verdict = guard.observe(callOutcome("read", { path: "/x" }, "r1")); + expect(verdict.shouldAbort).toBe(false); + expect(verdict.armed).toBe(false); + }); + + it("arms for the configured window after compaction", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + expect(guard.snapshot().armed).toBe(true); + expect(guard.snapshot().remainingAttempts).toBe(3); + }); + + it("decrements remainingAttempts on each observation", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + expect(guard.snapshot().remainingAttempts).toBe(2); + guard.observe(callOutcome("read", { path: "/y" }, "r2")); + expect(guard.snapshot().remainingAttempts).toBe(1); + guard.observe(callOutcome("read", { path: "/z" }, "r3")); + expect(guard.snapshot().remainingAttempts).toBe(0); + expect(guard.snapshot().armed).toBe(false); + }); + + it("aborts on the windowSize-th identical (tool,args,result) call within the window", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + expect( + guard.observe(callOutcome("gateway", { action: "lookup", path: "x" }, "r1")).shouldAbort, + ).toBe(false); + expect( + guard.observe(callOutcome("gateway", { action: "lookup", path: "x" }, "r1")).shouldAbort, + ).toBe(false); + const third = guard.observe(callOutcome("gateway", { action: "lookup", path: "x" }, "r1")); + expect(third.shouldAbort).toBe(true); + if (third.shouldAbort) { + expect(third.detector).toBe("compaction_loop_persisted"); + expect(third.count).toBe(3); + expect(third.toolName).toBe("gateway"); + } + }); + + it("does NOT abort when the result hash changes (progress was made)", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + guard.observe(callOutcome("read", { path: "/x" }, "r2")); + const third = guard.observe(callOutcome("read", { path: "/x" }, "r3")); + expect(third.shouldAbort).toBe(false); + }); + + it("does NOT abort when the args hash changes", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/a" }, "r1")); + guard.observe(callOutcome("read", { path: "/b" }, "r1")); + const third = guard.observe(callOutcome("read", { path: "/c" }, "r1")); + expect(third.shouldAbort).toBe(false); + }); + + it("does NOT abort outside the window", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 2 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + expect(guard.snapshot().armed).toBe(false); + const after = guard.observe(callOutcome("read", { path: "/x" }, "r1")); + expect(after.shouldAbort).toBe(false); + }); + + it("re-arms when armPostCompaction is called again (multiple compactions per run)", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 2 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + guard.observe(callOutcome("read", { path: "/x" }, "r1")); + expect(guard.snapshot().armed).toBe(false); + guard.armPostCompaction(); + expect(guard.snapshot().armed).toBe(true); + expect(guard.snapshot().remainingAttempts).toBe(2); + }); + + it("respects enabled: false (always returns shouldAbort: false even when armed)", () => { + const guard = createPostCompactionLoopGuard({ enabled: false, windowSize: 3 }); + guard.armPostCompaction(); + guard.observe(callOutcome("gateway", { x: 1 }, "r1")); + guard.observe(callOutcome("gateway", { x: 1 }, "r1")); + const third = guard.observe(callOutcome("gateway", { x: 1 }, "r1")); + expect(third.shouldAbort).toBe(false); + }); + + it("disarms after observing windowSize calls regardless of verdict", () => { + const guard = createPostCompactionLoopGuard({ windowSize: 3 }); + guard.armPostCompaction(); + guard.observe(callOutcome("read", { path: "/a" }, "r1")); + guard.observe(callOutcome("write", { path: "/b" }, "r2")); + guard.observe(callOutcome("exec", { cmd: "ls" }, "r3")); + expect(guard.snapshot().armed).toBe(false); + expect(guard.snapshot().remainingAttempts).toBe(0); + }); +}); diff --git a/src/agents/pi-embedded-runner/post-compaction-loop-guard.ts b/src/agents/pi-embedded-runner/post-compaction-loop-guard.ts new file mode 100644 index 00000000000..ef8b5cc6120 --- /dev/null +++ b/src/agents/pi-embedded-runner/post-compaction-loop-guard.ts @@ -0,0 +1,128 @@ +import type { ToolLoopPostCompactionGuardConfig } from "../../config/types.tools.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; + +const log = createSubsystemLogger("agents/post-compaction-guard"); + +const DEFAULT_WINDOW_SIZE = 3; + +export type PostCompactionGuardObservation = { + toolName: string; + argsHash: string; + resultHash: string; +}; + +export type PostCompactionGuardVerdict = + | { shouldAbort: false; armed: boolean; remainingAttempts: number } + | { + shouldAbort: true; + armed: boolean; + remainingAttempts: number; + detector: "compaction_loop_persisted"; + count: number; + toolName: string; + message: string; + }; + +export type PostCompactionLoopGuard = { + armPostCompaction: () => void; + observe: (call: PostCompactionGuardObservation) => PostCompactionGuardVerdict; + snapshot: () => { armed: boolean; remainingAttempts: number }; +}; + +type GuardState = { + enabled: boolean; + windowSize: number; + remainingAttempts: number; + history: PostCompactionGuardObservation[]; +}; + +function asPositiveInt(value: number | undefined, fallback: number): number { + if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) { + return fallback; + } + return value; +} + +export function createPostCompactionLoopGuard( + config?: ToolLoopPostCompactionGuardConfig, +): PostCompactionLoopGuard { + const state: GuardState = { + enabled: config?.enabled ?? true, + windowSize: asPositiveInt(config?.windowSize, DEFAULT_WINDOW_SIZE), + remainingAttempts: 0, + history: [], + }; + + const armPostCompaction = (): void => { + state.remainingAttempts = state.windowSize; + state.history = []; + if (state.enabled) { + log.info(`post-compaction guard armed for ${state.windowSize} attempts`); + } + }; + + const observe = (call: PostCompactionGuardObservation): PostCompactionGuardVerdict => { + if (state.remainingAttempts <= 0) { + return { shouldAbort: false, armed: false, remainingAttempts: 0 }; + } + state.remainingAttempts -= 1; + state.history.push(call); + const armedAfter = state.remainingAttempts > 0; + + if (!state.enabled) { + return { shouldAbort: false, armed: armedAfter, remainingAttempts: state.remainingAttempts }; + } + + const matches = state.history.filter( + (entry) => + entry.toolName === call.toolName && + entry.argsHash === call.argsHash && + entry.resultHash === call.resultHash, + ); + + if (matches.length >= state.windowSize) { + log.error( + `post-compaction loop persisted: tool=${call.toolName} repeated ${matches.length} times with identical args+result post-compaction`, + ); + return { + shouldAbort: true, + armed: armedAfter, + remainingAttempts: state.remainingAttempts, + detector: "compaction_loop_persisted", + count: matches.length, + toolName: call.toolName, + message: `CRITICAL: tool ${call.toolName} repeated ${matches.length} times with identical arguments and identical results within ${state.windowSize} attempts after auto-compaction. The compaction did not break the loop. Aborting to prevent runaway resource use.`, + }; + } + + return { shouldAbort: false, armed: armedAfter, remainingAttempts: state.remainingAttempts }; + }; + + const snapshot = () => ({ + armed: state.remainingAttempts > 0, + remainingAttempts: state.remainingAttempts, + }); + + return { armPostCompaction, observe, snapshot }; +} + +export class PostCompactionLoopPersistedError extends Error { + readonly detector: "compaction_loop_persisted"; + readonly count: number; + readonly toolName: string; + + constructor( + message: string, + details: { + detector: "compaction_loop_persisted"; + count: number; + toolName: string; + }, + ) { + super(message); + this.name = "PostCompactionLoopPersistedError"; + this.detector = details.detector; + this.count = details.count; + this.toolName = details.toolName; + } +} diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 620684fa27a..5606dc7303d 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -658,6 +658,10 @@ export const FIELD_HELP: Record = { "tools.loopDetection.detectors.knownPollNoProgress": "Enable known poll tool no-progress loop detection (default: true).", "tools.loopDetection.detectors.pingPong": "Enable ping-pong loop detection (default: true).", + "tools.loopDetection.postCompactionGuard.enabled": + "Enable the post-compaction loop guard (default: true). When the runner has just retried a prompt after auto-compaction, this guard aborts the run if the agent emits the same (tool, args, result) windowSize times. Targets the failure mode where context-overflow + compaction does not break a tool-call loop.", + "tools.loopDetection.postCompactionGuard.windowSize": + "Number of post-compaction attempts during which the guard stays armed (default: 3). Lower values are stricter; higher values give the agent more attempts before abort.", "tools.exec.notifyOnExit": "When true (default), backgrounded exec sessions on exit and node exec lifecycle events enqueue a system event and request a heartbeat.", "tools.exec.notifyOnExitEmptySuccess": diff --git a/src/config/types.tools.ts b/src/config/types.tools.ts index ae4dd596283..1e1a96ccacb 100644 --- a/src/config/types.tools.ts +++ b/src/config/types.tools.ts @@ -166,6 +166,13 @@ export type ToolLoopDetectionDetectorConfig = { pingPong?: boolean; }; +export type ToolLoopPostCompactionGuardConfig = { + /** Enable a strict guard that aborts when the agent re-enters the same tool-call loop immediately after a successful auto-compaction-retry (default: true). */ + enabled?: boolean; + /** How many attempts post-compaction the guard remains armed (default: 3). */ + windowSize?: number; +}; + export type ToolLoopDetectionConfig = { /** Enable tool-loop protection (default: false). */ enabled?: boolean; @@ -181,6 +188,8 @@ export type ToolLoopDetectionConfig = { globalCircuitBreakerThreshold?: number; /** Detector toggles. */ detectors?: ToolLoopDetectionDetectorConfig; + /** Post-compaction loop guard: aborts when the agent repeats the same (tool, args, result) immediately after auto-compaction-retry. */ + postCompactionGuard?: ToolLoopPostCompactionGuardConfig; }; export type SessionsToolsVisibility = "self" | "tree" | "agent" | "all";