From 8a43223014cb473d304b7ff3c08d2ec0dbd5e234 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sun, 5 Apr 2026 21:51:20 +0100
Subject: [PATCH] fix(agents): preserve tool output during context guarding

---
 .../tool-result-context-guard.test.ts         | 126 ++++++++-----
 .../tool-result-context-guard.ts              | 177 +++++++++++++++++-
 2 files changed, 246 insertions(+), 57 deletions(-)

diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
index 6fed7c61197..61107230289 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@@ -4,6 +4,7 @@ import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
 import {
   CONTEXT_LIMIT_TRUNCATION_NOTICE,
   PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
+  PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE,
   PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
   installToolResultContextGuard,
 } from "./tool-result-context-guard.js";
@@ -93,29 +94,37 @@ async function applyGuardToContext(
   return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
 }
 
-function expectCompactedToolResultsWithoutContextNotice(
-  contextForNextCall: AgentMessage[],
-  oldIndex: number,
-  newIndex: number,
-) {
-  const oldResultText = getToolResultText(contextForNextCall[oldIndex]);
-  const newResultText = getToolResultText(contextForNextCall[newIndex]);
-  expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  expect(newResultText).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+function expectReadableCompaction(text: string, prefix: string) {
+  expect(text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE)).toBe(true);
+  expect(text).toContain(prefix.repeat(64));
+  expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+  expect(text).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+}
+
+function expectReadableToolSlice(text: string, prefix: string) {
+  expect(text).toContain(prefix.repeat(64));
+  expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+  expect(
+    text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE) ||
+      text.includes(CONTEXT_LIMIT_TRUNCATION_NOTICE),
+  ).toBe(true);
 }
 
 describe("installToolResultContextGuard", () => {
-  it("compacts newest-first when total context overflows, even if each result fits individually", async () => {
+  it("returns a cloned guarded context so original tool output stays visible", async () => {
     const agent = makeGuardableAgent();
     const contextForNextCall = makeTwoToolResultOverflowContext();
     const transformed = await applyGuardToContext(agent, contextForNextCall);
 
-    expect(transformed).toBe(contextForNextCall);
-    expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
+    expect(transformed).not.toBe(contextForNextCall);
+    const transformedMessages = transformed as AgentMessage[];
+    expectReadableCompaction(getToolResultText(transformedMessages[1]), "x");
+    expectReadableCompaction(getToolResultText(transformedMessages[2]), "y");
+    expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(1_000));
+    expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
   });
 
-  it("keeps compacting newest-first until context is back under budget", async () => {
+  it("keeps readable slices of overflowing tool results before using a placeholder", async () => {
     const agent = makeGuardableAgent();
 
     installToolResultContextGuard({
@@ -130,14 +139,17 @@ describe("installToolResultContextGuard", () => {
       makeToolResult("call_3", "c".repeat(800)),
     ];
 
-    await agent.transformContext?.(contextForNextCall, new AbortController().signal);
+    const transformed = (await agent.transformContext?.(
+      contextForNextCall,
+      new AbortController().signal,
+    )) as AgentMessage[];
 
-    const first = getToolResultText(contextForNextCall[1]);
-    const second = getToolResultText(contextForNextCall[2]);
-    const third = getToolResultText(contextForNextCall[3]);
+    const first = getToolResultText(transformed[1]);
+    const second = getToolResultText(transformed[2]);
+    const third = getToolResultText(transformed[3]);
 
-    expect(first).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    expect(second).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectReadableCompaction(first, "a");
+    expectReadableCompaction(second, "b");
     expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
   });
 
@@ -150,20 +162,24 @@ describe("installToolResultContextGuard", () => {
     });
 
     const contextForNextCall: AgentMessage[] = [makeUser("stress")];
+    let transformed: AgentMessage[] | undefined;
     for (let i = 1; i <= 4; i++) {
       contextForNextCall.push(makeToolResult(`call_${i}`, String(i).repeat(95_000)));
-      await agent.transformContext?.(contextForNextCall, new AbortController().signal);
+      transformed = (await agent.transformContext?.(
+        contextForNextCall,
+        new AbortController().signal,
+      )) as AgentMessage[];
     }
 
-    const toolResultTexts = contextForNextCall
+    const toolResultTexts = (transformed ?? [])
       .filter((msg) => msg.role === "toolResult")
       .map((msg) => getToolResultText(msg as AgentMessage));
 
-    // Newest-first compaction: oldest results stay intact to preserve the
-    // cached prefix; the newest overflowing result is compacted.
-    expect(toolResultTexts[0]?.length).toBe(95_000);
-    expect(toolResultTexts[3]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    expect(toolResultTexts.join("\n")).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    // Large outputs are capped per-tool before aggregate compaction kicks in.
+    expect(toolResultTexts[0]?.length).toBe(50_000);
+    expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    expectReadableCompaction(toolResultTexts[3] ?? "", "4");
+    expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
   });
 
   it("truncates an individually oversized tool result with a context-limit notice", async () => {
@@ -176,9 +192,12 @@ describe("installToolResultContextGuard", () => {
 
     const contextForNextCall = [makeToolResult("call_big", "z".repeat(5_000))];
 
-    await agent.transformContext?.(contextForNextCall, new AbortController().signal);
+    const transformed = (await agent.transformContext?.(
+      contextForNextCall,
+      new AbortController().signal,
+    )) as AgentMessage[];
 
-    const newResultText = getToolResultText(contextForNextCall[0]);
+    const newResultText = getToolResultText(transformed[0]);
     expect(newResultText.length).toBeLessThan(5_000);
     expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
   });
@@ -197,8 +216,12 @@ describe("installToolResultContextGuard", () => {
       makeToolResult("call_new", "y".repeat(1_000)),
     ];
 
-    await agent.transformContext?.(contextForNextCall, new AbortController().signal);
-    expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
+    const transformed = (await agent.transformContext?.(
+      contextForNextCall,
+      new AbortController().signal,
+    )) as AgentMessage[];
+    expectReadableCompaction(getToolResultText(transformed[1]), "x");
+    expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
   });
 
   it("wraps an existing transformContext and guards the transformed output", async () => {
@@ -215,7 +238,7 @@ describe("installToolResultContextGuard", () => {
     expect(transformed).not.toBe(contextForNextCall);
     const transformedMessages = transformed as AgentMessage[];
     const oldResultText = getToolResultText(transformedMessages[1]);
-    expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectReadableCompaction(oldResultText, "x");
   });
 
   it("handles legacy role=tool string outputs when enforcing context budget", async () => {
@@ -232,13 +255,18 @@ describe("installToolResultContextGuard", () => {
       makeLegacyToolResult("call_new", "y".repeat(1_000)),
     ];
 
-    await agent.transformContext?.(contextForNextCall, new AbortController().signal);
+    const transformed = (await agent.transformContext?.(
+      contextForNextCall,
+      new AbortController().signal,
+    )) as AgentMessage[];
 
-    const oldResultText = (contextForNextCall[1] as { content?: unknown }).content;
-    const newResultText = (contextForNextCall[2] as { content?: unknown }).content;
+    const oldResultText = (transformed[1] as { content?: unknown }).content;
+    const newResultText = (transformed[2] as { content?: unknown }).content;
 
-    expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expect(typeof oldResultText).toBe("string");
+    expect(typeof newResultText).toBe("string");
+    expect(oldResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
+    expect(newResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
   });
 
   it("drops oversized read-tool details payloads when compacting tool results", async () => {
@@ -255,19 +283,22 @@ describe("installToolResultContextGuard", () => {
       makeToolResultWithDetails("call_new", "y".repeat(900), "d".repeat(8_000)),
     ];
 
-    await agent.transformContext?.(contextForNextCall, new AbortController().signal);
+    const transformed = (await agent.transformContext?.(
+      contextForNextCall,
+      new AbortController().signal,
+    )) as AgentMessage[];
 
-    const oldResult = contextForNextCall[1] as {
+    const oldResult = transformed[1] as {
       details?: unknown;
     };
-    const newResult = contextForNextCall[2] as {
+    const newResult = transformed[2] as {
       details?: unknown;
     };
-    const oldResultText = getToolResultText(contextForNextCall[1]);
-    const newResultText = getToolResultText(contextForNextCall[2]);
+    const oldResultText = getToolResultText(transformed[1]);
+    const newResultText = getToolResultText(transformed[2]);
 
-    expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expectReadableToolSlice(oldResultText, "x");
+    expectReadableToolSlice(newResultText, "y");
     expect(oldResult.details).toBeUndefined();
     expect(newResult.details).toBeUndefined();
   });
@@ -322,12 +353,11 @@ describe("installToolResultContextGuard", () => {
       makeToolResult("call_old", "x".repeat(2_000)),
     ];
 
-    await expect(
-      agent.transformContext?.(contextForNextCall, new AbortController().signal),
-    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+    const guarded = agent.transformContext?.(contextForNextCall, new AbortController().signal);
+    await expect(guarded).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
 
     // Tool result should have been compacted before the overflow check.
     const toolResultText = getToolResultText(contextForNextCall[1]);
-    expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+    expect(toolResultText).toBe("x".repeat(2_000));
   });
 });
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
index 94359d02178..03c30912653 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -10,6 +10,7 @@ import {
   invalidateMessageCharsCacheEntry,
   isToolResultMessage,
 } from "./tool-result-char-estimator.js";
+import { truncateToolResultText } from "./tool-result-truncation.js";
 
 // Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
 const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
@@ -23,10 +24,20 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
 
 export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
   "[compacted: tool output removed to free context]";
+export const PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE =
+  "[compacted: tool output trimmed to free context]";
 
 export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
   "Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
 
+const PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX = `\n${PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE}`;
+const MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS = 96;
+const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
+  CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
+const MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS = Math.ceil(
+  MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO,
+);
+
 type GuardableTransformContext = (
   messages: AgentMessage[],
   signal: AbortSignal,
@@ -74,7 +85,11 @@ function replaceToolResultText(msg: AgentMessage, text: string): AgentMessage {
   } as AgentMessage;
 }
 
-function truncateToolResultToChars(
+function estimateBudgetToTextBudget(maxChars: number): number {
+  return Math.max(0, Math.floor(maxChars / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO));
+}
+
+function compactToolResultToEstimateBudget(
   msg: AgentMessage,
   maxChars: number,
   cache: MessageCharEstimateCache,
@@ -90,14 +105,36 @@ function truncateToolResultToChars(
 
   const rawText = getToolResultText(msg);
   if (!rawText) {
-    return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
   }
 
-  const truncatedText = truncateTextToBudget(rawText, maxChars);
-  return replaceToolResultText(msg, truncatedText);
+  const textBudget = estimateBudgetToTextBudget(maxChars);
+  if (textBudget <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
+    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+  }
+
+  const maxCompactedTextChars = Math.max(MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, textBudget);
+  if (maxCompactedTextChars <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
+    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+  }
+
+  const minKeepChars = Math.max(
+    96,
+    Math.min(
+      MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS,
+      maxCompactedTextChars - PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX.length - 1,
+    ),
+  );
+
+  const compactedText = truncateToolResultText(rawText, maxCompactedTextChars, {
+    suffix: PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX,
+    minKeepChars,
+  });
+
+  return replaceToolResultText(msg, compactedText);
 }
 
-function compactExistingToolResultsInPlace(params: {
+function compactToPlaceholderInPlace(params: {
   messages: AgentMessage[];
   charsNeeded: number;
   cache: MessageCharEstimateCache;
@@ -108,9 +145,6 @@ function compactExistingToolResultsInPlace(params: {
   }
 
   let reduced = 0;
-  // Compact newest-first so more of the cached prefix survives: rewriting
-  // messages[k] for small k invalidates the provider prompt cache from that point onward.
-  // Tradeoff: the model loses recent tool output instead of old.
   for (let i = messages.length - 1; i >= 0; i--) {
     const msg = messages[i];
     if (!isToolResultMessage(msg)) {
@@ -138,6 +172,124 @@ function compactExistingToolResultsInPlace(params: {
   return reduced;
 }
 
+function truncateToolResultToChars(
+  msg: AgentMessage,
+  maxChars: number,
+  cache: MessageCharEstimateCache,
+): AgentMessage {
+  if (!isToolResultMessage(msg)) {
+    return msg;
+  }
+
+  const estimatedChars = estimateMessageCharsCached(msg, cache);
+  if (estimatedChars <= maxChars) {
+    return msg;
+  }
+
+  const rawText = getToolResultText(msg);
+  if (!rawText) {
+    return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
+  }
+
+  const textBudget = estimateBudgetToTextBudget(maxChars);
+  if (textBudget <= 0) {
+    return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
+  }
+
+  if (rawText.length <= textBudget) {
+    return replaceToolResultText(msg, rawText);
+  }
+
+  const truncatedText = truncateTextToBudget(rawText, textBudget);
+  return replaceToolResultText(msg, truncatedText);
+}
+
+function compactExistingToolResultsInPlace(params: {
+  messages: AgentMessage[];
+  charsNeeded: number;
+  cache: MessageCharEstimateCache;
+}): number {
+  const { messages, charsNeeded, cache } = params;
+  if (charsNeeded <= 0) {
+    return 0;
+  }
+
+  let reduced = 0;
+  // Compact newest-first so more of the cached prefix survives: rewriting
+  // messages[k] for small k invalidates the provider prompt cache from that point onward.
+  // Keep a truncated slice of newer tool output before falling back to a
+  // full placeholder so recent, user-visible results remain readable when possible.
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (!isToolResultMessage(msg)) {
+      continue;
+    }
+
+    const before = estimateMessageCharsCached(msg, cache);
+    if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
+      continue;
+    }
+
+    const targetAfter = Math.max(
+      MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS,
+      before - (charsNeeded - reduced),
+    );
+
+    let compacted = compactToolResultToEstimateBudget(msg, targetAfter, cache);
+    let after = estimateMessageCharsCached(compacted, cache);
+    if (after >= before) {
+      compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+      after = estimateMessageCharsCached(compacted, cache);
+    }
+
+    applyMessageMutationInPlace(msg, compacted, cache);
+    if (after >= before) {
+      continue;
+    }
+
+    reduced += before - after;
+    if (reduced >= charsNeeded) {
+      break;
+    }
+  }
+
+  if (reduced < charsNeeded) {
+    reduced += compactToPlaceholderInPlace({
+      messages,
+      charsNeeded: charsNeeded - reduced,
+      cache,
+    });
+  }
+
+  return reduced;
+}
+
+function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
+  return messages.map(
+    (msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
+  );
+}
+
+function contextNeedsToolResultCompaction(params: {
+  messages: AgentMessage[];
+  contextBudgetChars: number;
+  maxSingleToolResultChars: number;
+}): boolean {
+  const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
+  const estimateCache = createMessageCharEstimateCache();
+  let sawToolResult = false;
+  for (const message of messages) {
+    if (!isToolResultMessage(message)) {
+      continue;
+    }
+    sawToolResult = true;
+    if (estimateMessageCharsCached(message, estimateCache) > maxSingleToolResultChars) {
+      return true;
+    }
+  }
+  return sawToolResult && estimateContextChars(messages, estimateCache) > contextBudgetChars;
+}
+
 function applyMessageMutationInPlace(
   target: AgentMessage,
   source: AgentMessage,
@@ -221,7 +373,14 @@ export function installToolResultContextGuard(params: {
       ? await originalTransformContext.call(mutableAgent, messages, signal)
       : messages;
 
-    const contextMessages = Array.isArray(transformed) ? transformed : messages;
+    const sourceMessages = Array.isArray(transformed) ? transformed : messages;
+    const contextMessages = contextNeedsToolResultCompaction({
+      messages: sourceMessages,
+      contextBudgetChars,
+      maxSingleToolResultChars,
+    })
+      ? cloneMessagesForGuard(sourceMessages)
+      : sourceMessages;
     enforceToolResultContextBudgetInPlace({
       messages: contextMessages,
       contextBudgetChars,