From 8a43223014cb473d304b7ff3c08d2ec0dbd5e234 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 5 Apr 2026 21:51:20 +0100 Subject: [PATCH] fix(agents): preserve tool output during context guarding --- .../tool-result-context-guard.test.ts | 126 ++++++++----- .../tool-result-context-guard.ts | 177 +++++++++++++++++- 2 files changed, 246 insertions(+), 57 deletions(-) diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts index 6fed7c61197..61107230289 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts @@ -4,6 +4,7 @@ import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js"; import { CONTEXT_LIMIT_TRUNCATION_NOTICE, PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE, + PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER, installToolResultContextGuard, } from "./tool-result-context-guard.js"; @@ -93,29 +94,37 @@ async function applyGuardToContext( return await agent.transformContext?.(contextForNextCall, new AbortController().signal); } -function expectCompactedToolResultsWithoutContextNotice( - contextForNextCall: AgentMessage[], - oldIndex: number, - newIndex: number, -) { - const oldResultText = getToolResultText(contextForNextCall[oldIndex]); - const newResultText = getToolResultText(contextForNextCall[newIndex]); - expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(newResultText).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); +function expectReadableCompaction(text: string, prefix: string) { + expect(text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE)).toBe(true); + expect(text).toContain(prefix.repeat(64)); + expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expect(text).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); +} + +function expectReadableToolSlice(text: string, prefix: string) { + expect(text).toContain(prefix.repeat(64)); + expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expect( + text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE) || + text.includes(CONTEXT_LIMIT_TRUNCATION_NOTICE), + ).toBe(true); } describe("installToolResultContextGuard", () => { - it("compacts newest-first when total context overflows, even if each result fits individually", async () => { + it("returns a cloned guarded context so original tool output stays visible", async () => { const agent = makeGuardableAgent(); const contextForNextCall = makeTwoToolResultOverflowContext(); const transformed = await applyGuardToContext(agent, contextForNextCall); - expect(transformed).toBe(contextForNextCall); - expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2); + expect(transformed).not.toBe(contextForNextCall); + const transformedMessages = transformed as AgentMessage[]; + expectReadableCompaction(getToolResultText(transformedMessages[1]), "x"); + expectReadableCompaction(getToolResultText(transformedMessages[2]), "y"); + expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(1_000)); + expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000)); }); - it("keeps compacting newest-first until context is back under budget", async () => { + it("keeps readable slices of overflowing tool results before using a placeholder", async () => { const agent = makeGuardableAgent(); installToolResultContextGuard({ @@ -130,14 +139,17 @@ describe("installToolResultContextGuard", () => { makeToolResult("call_3", "c".repeat(800)), ]; - await agent.transformContext?.(contextForNextCall, new AbortController().signal); + const transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; - const first = getToolResultText(contextForNextCall[1]); - const second = getToolResultText(contextForNextCall[2]); - const third = getToolResultText(contextForNextCall[3]); + const first = getToolResultText(transformed[1]); + const second = getToolResultText(transformed[2]); + const third = getToolResultText(transformed[3]); - expect(first).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(second).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expectReadableCompaction(first, "a"); + expectReadableCompaction(second, "b"); expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); }); @@ -150,20 +162,24 @@ describe("installToolResultContextGuard", () => { }); const contextForNextCall: AgentMessage[] = [makeUser("stress")]; + let transformed: AgentMessage[] | undefined; for (let i = 1; i <= 4; i++) { contextForNextCall.push(makeToolResult(`call_${i}`, String(i).repeat(95_000))); - await agent.transformContext?.(contextForNextCall, new AbortController().signal); + transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; } - const toolResultTexts = contextForNextCall + const toolResultTexts = (transformed ?? []) .filter((msg) => msg.role === "toolResult") .map((msg) => getToolResultText(msg as AgentMessage)); - // Newest-first compaction: oldest results stay intact to preserve the - // cached prefix; the newest overflowing result is compacted. - expect(toolResultTexts[0]?.length).toBe(95_000); - expect(toolResultTexts[3]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(toolResultTexts.join("\n")).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); + // Large outputs are capped per-tool before aggregate compaction kicks in. + expect(toolResultTexts[0]?.length).toBe(50_000); + expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); + expectReadableCompaction(toolResultTexts[3] ?? "", "4"); + expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); }); it("truncates an individually oversized tool result with a context-limit notice", async () => { @@ -176,9 +192,12 @@ describe("installToolResultContextGuard", () => { const contextForNextCall = [makeToolResult("call_big", "z".repeat(5_000))]; - await agent.transformContext?.(contextForNextCall, new AbortController().signal); + const transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; - const newResultText = getToolResultText(contextForNextCall[0]); + const newResultText = getToolResultText(transformed[0]); expect(newResultText.length).toBeLessThan(5_000); expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE); }); @@ -197,8 +216,12 @@ describe("installToolResultContextGuard", () => { makeToolResult("call_new", "y".repeat(1_000)), ]; - await agent.transformContext?.(contextForNextCall, new AbortController().signal); - expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2); + const transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; + expectReadableCompaction(getToolResultText(transformed[1]), "x"); + expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); }); it("wraps an existing transformContext and guards the transformed output", async () => { @@ -215,7 +238,7 @@ describe("installToolResultContextGuard", () => { expect(transformed).not.toBe(contextForNextCall); const transformedMessages = transformed as AgentMessage[]; const oldResultText = getToolResultText(transformedMessages[1]); - expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expectReadableCompaction(oldResultText, "x"); }); it("handles legacy role=tool string outputs when enforcing context budget", async () => { @@ -232,13 +255,18 @@ describe("installToolResultContextGuard", () => { makeLegacyToolResult("call_new", "y".repeat(1_000)), ]; - await agent.transformContext?.(contextForNextCall, new AbortController().signal); + const transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; - const oldResultText = (contextForNextCall[1] as { content?: unknown }).content; - const newResultText = (contextForNextCall[2] as { content?: unknown }).content; + const oldResultText = (transformed[1] as { content?: unknown }).content; + const newResultText = (transformed[2] as { content?: unknown }).content; - expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expect(typeof oldResultText).toBe("string"); + expect(typeof newResultText).toBe("string"); + expect(oldResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE); + expect(newResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE); }); it("drops oversized read-tool details payloads when compacting tool results", async () => { @@ -255,19 +283,22 @@ describe("installToolResultContextGuard", () => { makeToolResultWithDetails("call_new", "y".repeat(900), "d".repeat(8_000)), ]; - await agent.transformContext?.(contextForNextCall, new AbortController().signal); + const transformed = (await agent.transformContext?.( + contextForNextCall, + new AbortController().signal, + )) as AgentMessage[]; - const oldResult = contextForNextCall[1] as { + const oldResult = transformed[1] as { details?: unknown; }; - const newResult = contextForNextCall[2] as { + const newResult = transformed[2] as { details?: unknown; }; - const oldResultText = getToolResultText(contextForNextCall[1]); - const newResultText = getToolResultText(contextForNextCall[2]); + const oldResultText = getToolResultText(transformed[1]); + const newResultText = getToolResultText(transformed[2]); - expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); - expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expectReadableToolSlice(oldResultText, "x"); + expectReadableToolSlice(newResultText, "y"); expect(oldResult.details).toBeUndefined(); expect(newResult.details).toBeUndefined(); }); @@ -322,12 +353,11 @@ describe("installToolResultContextGuard", () => { makeToolResult("call_old", "x".repeat(2_000)), ]; - await expect( - agent.transformContext?.(contextForNextCall, new AbortController().signal), - ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE); + const guarded = agent.transformContext?.(contextForNextCall, new AbortController().signal); + await expect(guarded).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE); // Tool result should have been compacted before the overflow check. const toolResultText = getToolResultText(contextForNextCall[1]); - expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + expect(toolResultText).toBe("x".repeat(2_000)); }); }); diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts index 94359d02178..03c30912653 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts @@ -10,6 +10,7 @@ import { invalidateMessageCharsCacheEntry, isToolResultMessage, } from "./tool-result-char-estimator.js"; +import { truncateToolResultText } from "./tool-result-truncation.js"; // Keep a conservative input budget to absorb tokenizer variance and provider framing overhead. const CONTEXT_INPUT_HEADROOM_RATIO = 0.75; @@ -23,10 +24,20 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`; export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER = "[compacted: tool output removed to free context]"; +export const PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE = + "[compacted: tool output trimmed to free context]"; export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE = "Preemptive context overflow: estimated context size exceeds safe threshold during tool loop"; +const PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX = `\n${PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE}`; +const MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS = 96; +const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO = + CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE; +const MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS = Math.ceil( + MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO, +); + type GuardableTransformContext = ( messages: AgentMessage[], signal: AbortSignal, @@ -74,7 +85,11 @@ function replaceToolResultText(msg: AgentMessage, text: string): AgentMessage { } as AgentMessage; } -function truncateToolResultToChars( +function estimateBudgetToTextBudget(maxChars: number): number { + return Math.max(0, Math.floor(maxChars / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO)); +} + +function compactToolResultToEstimateBudget( msg: AgentMessage, maxChars: number, cache: MessageCharEstimateCache, @@ -90,14 +105,36 @@ function truncateToolResultToChars( const rawText = getToolResultText(msg); if (!rawText) { - return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE); + return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); } - const truncatedText = truncateTextToBudget(rawText, maxChars); - return replaceToolResultText(msg, truncatedText); + const textBudget = estimateBudgetToTextBudget(maxChars); + if (textBudget <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) { + return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + } + + const maxCompactedTextChars = Math.max(MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, textBudget); + if (maxCompactedTextChars <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) { + return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + } + + const minKeepChars = Math.max( + 96, + Math.min( + MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, + maxCompactedTextChars - PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX.length - 1, + ), + ); + + const compactedText = truncateToolResultText(rawText, maxCompactedTextChars, { + suffix: PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX, + minKeepChars, + }); + + return replaceToolResultText(msg, compactedText); } -function compactExistingToolResultsInPlace(params: { +function compactToPlaceholderInPlace(params: { messages: AgentMessage[]; charsNeeded: number; cache: MessageCharEstimateCache; @@ -108,9 +145,6 @@ function compactExistingToolResultsInPlace(params: { } let reduced = 0; - // Compact newest-first so more of the cached prefix survives: rewriting - // messages[k] for small k invalidates the provider prompt cache from that point onward. - // Tradeoff: the model loses recent tool output instead of old. for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; if (!isToolResultMessage(msg)) { @@ -138,6 +172,124 @@ function compactExistingToolResultsInPlace(params: { return reduced; } +function truncateToolResultToChars( + msg: AgentMessage, + maxChars: number, + cache: MessageCharEstimateCache, +): AgentMessage { + if (!isToolResultMessage(msg)) { + return msg; + } + + const estimatedChars = estimateMessageCharsCached(msg, cache); + if (estimatedChars <= maxChars) { + return msg; + } + + const rawText = getToolResultText(msg); + if (!rawText) { + return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE); + } + + const textBudget = estimateBudgetToTextBudget(maxChars); + if (textBudget <= 0) { + return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE); + } + + if (rawText.length <= textBudget) { + return replaceToolResultText(msg, rawText); + } + + const truncatedText = truncateTextToBudget(rawText, textBudget); + return replaceToolResultText(msg, truncatedText); +} + +function compactExistingToolResultsInPlace(params: { + messages: AgentMessage[]; + charsNeeded: number; + cache: MessageCharEstimateCache; +}): number { + const { messages, charsNeeded, cache } = params; + if (charsNeeded <= 0) { + return 0; + } + + let reduced = 0; + // Compact newest-first so more of the cached prefix survives: rewriting + // messages[k] for small k invalidates the provider prompt cache from that point onward. + // Keep a truncated slice of newer tool output before falling back to a + // full placeholder so recent, user-visible results remain readable when possible. + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (!isToolResultMessage(msg)) { + continue; + } + + const before = estimateMessageCharsCached(msg, cache); + if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) { + continue; + } + + const targetAfter = Math.max( + MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS, + before - (charsNeeded - reduced), + ); + + let compacted = compactToolResultToEstimateBudget(msg, targetAfter, cache); + let after = estimateMessageCharsCached(compacted, cache); + if (after >= before) { + compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + after = estimateMessageCharsCached(compacted, cache); + } + + applyMessageMutationInPlace(msg, compacted, cache); + if (after >= before) { + continue; + } + + reduced += before - after; + if (reduced >= charsNeeded) { + break; + } + } + + if (reduced < charsNeeded) { + reduced += compactToPlaceholderInPlace({ + messages, + charsNeeded: charsNeeded - reduced, + cache, + }); + } + + return reduced; +} + +function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] { + return messages.map( + (msg) => ({ ...(msg as unknown as Record) }) as unknown as AgentMessage, + ); +} + +function contextNeedsToolResultCompaction(params: { + messages: AgentMessage[]; + contextBudgetChars: number; + maxSingleToolResultChars: number; +}): boolean { + const { messages, contextBudgetChars, maxSingleToolResultChars } = params; + const estimateCache = createMessageCharEstimateCache(); + let sawToolResult = false; + for (const message of messages) { + if (!isToolResultMessage(message)) { + continue; + } + sawToolResult = true; + if (estimateMessageCharsCached(message, estimateCache) > maxSingleToolResultChars) { + return true; + } + } + return sawToolResult && estimateContextChars(messages, estimateCache) > contextBudgetChars; +} + function applyMessageMutationInPlace( target: AgentMessage, source: AgentMessage, @@ -221,7 +373,14 @@ export function installToolResultContextGuard(params: { ? await originalTransformContext.call(mutableAgent, messages, signal) : messages; - const contextMessages = Array.isArray(transformed) ? transformed : messages; + const sourceMessages = Array.isArray(transformed) ? transformed : messages; + const contextMessages = contextNeedsToolResultCompaction({ + messages: sourceMessages, + contextBudgetChars, + maxSingleToolResultChars, + }) + ? cloneMessagesForGuard(sourceMessages) + : sourceMessages; enforceToolResultContextBudgetInPlace({ messages: contextMessages, contextBudgetChars,