align tool-result truncation with pi semantics

2026-04-18 04:31:10 +00:00 · 2026-04-05 22:11:09 -05:00
parent 5c1b1eb169
commit 7fc1a74ee9
9 changed files with 514 additions and 622 deletions
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts
@@ -1,4 +1,4 @@
-import { beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import {
  makeAttemptResult,
  makeCompactionSuccess,
@@ -10,9 +10,11 @@ import {
  loadRunOverflowCompactionHarness,
  mockedContextEngine,
  mockedCompactDirect,
+  mockedEvaluateContextWindowGuard,
  mockedIsCompactionFailureError,
  mockedIsLikelyContextOverflowError,
  mockedLog,
+  mockedResolveContextWindowInfo,
  mockedRunEmbeddedAttempt,
  mockedSessionLikelyHasOversizedToolResults,
  mockedTruncateOversizedToolResultsInSession,
@@ -172,6 +174,86 @@ describe("overflow compaction in run loop", () => {
    expect(result.meta.error).toBeUndefined();
  });

+  it("falls back to tool-result truncation and retries when real aggregate tool-result detection trips", async () => {
+    const { sessionLikelyHasOversizedToolResults } = await vi.importActual<
+      typeof import("./tool-result-truncation.js")
+    >("./tool-result-truncation.js");
+    mockedResolveContextWindowInfo.mockReturnValue({
+      tokens: 10_000,
+      source: "model",
+    });
+    mockedEvaluateContextWindowGuard.mockReturnValue({
+      shouldWarn: false,
+      shouldBlock: false,
+      tokens: 10_000,
+      source: "model",
+    });
+
+    mockedRunEmbeddedAttempt
+      .mockResolvedValueOnce(
+        makeAttemptResult({
+          promptError: makeOverflowError(),
+          messagesSnapshot: [
+            {
+              role: "user",
+              content: "u".repeat(20_000),
+            } as EmbeddedRunAttemptResult["messagesSnapshot"][number],
+            {
+              role: "toolResult",
+              toolCallId: "call_a",
+              toolName: "read",
+              content: [{ type: "text", text: "a".repeat(10_000) }],
+              isError: false,
+            } as EmbeddedRunAttemptResult["messagesSnapshot"][number],
+            {
+              role: "toolResult",
+              toolCallId: "call_b",
+              toolName: "read",
+              content: [{ type: "text", text: "b".repeat(10_000) }],
+              isError: false,
+            } as EmbeddedRunAttemptResult["messagesSnapshot"][number],
+            {
+              role: "toolResult",
+              toolCallId: "call_c",
+              toolName: "read",
+              content: [{ type: "text", text: "c".repeat(10_000) }],
+              isError: false,
+            } as EmbeddedRunAttemptResult["messagesSnapshot"][number],
+          ],
+        }),
+      )
+      .mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
+
+    mockedCompactDirect.mockResolvedValueOnce({
+      ok: false,
+      compacted: false,
+      reason: "nothing to compact",
+    });
+    mockedSessionLikelyHasOversizedToolResults.mockImplementation(
+      ((params: Parameters<typeof sessionLikelyHasOversizedToolResults>[0]) =>
+        sessionLikelyHasOversizedToolResults(params)) as never,
+    );
+    mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
+      truncated: true,
+      truncatedCount: 2,
+    });
+
+    const result = await runEmbeddedPiAgent(baseParams);
+
+    expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
+    expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
+      expect.objectContaining({ contextWindowTokens: 10_000 }),
+    );
+    expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
+      expect.objectContaining({ sessionFile: "/tmp/session.json" }),
+    );
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
+    expect(mockedLog.info).toHaveBeenCalledWith(
+      expect.stringContaining("Truncated 2 tool result(s)"),
+    );
+    expect(result.meta.error).toBeUndefined();
+  });
+
  it("retries compaction up to 3 times before giving up", async () => {
    const overflowError = makeOverflowError();

--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@@ -4,8 +4,7 @@ import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
 import {
  CONTEXT_LIMIT_TRUNCATION_NOTICE,
  PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
-  PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE,
-  PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
+  formatContextLimitTruncationNotice,
  installToolResultContextGuard,
 } from "./tool-result-context-guard.js";

@@ -61,6 +60,9 @@ function makeToolResultWithDetails(id: string, text: string, detailText: string)

 function getToolResultText(msg: AgentMessage): string {
  const content = (msg as { content?: unknown }).content;
+  if (typeof content === "string") {
+    return content;
+  }
  if (!Array.isArray(content)) {
    return "";
  }
@@ -79,342 +81,164 @@ function makeGuardableAgent(
  return { transformContext };
 }

-function makeTwoToolResultOverflowContext(): AgentMessage[] {
-  return [
-    makeUser("u".repeat(2_000)),
-    makeToolResult("call_old", "x".repeat(1_000)),
-    makeToolResult("call_new", "y".repeat(1_000)),
-  ];
-}
-
 async function applyGuardToContext(
  agent: { transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown },
  contextForNextCall: AgentMessage[],
+  contextWindowTokens = 1_000,
 ) {
  installToolResultContextGuard({
    agent,
-    contextWindowTokens: 1_000,
+    contextWindowTokens,
  });
  return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
 }

-function expectReadableCompaction(text: string, prefix: string) {
-  expect(text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE)).toBe(true);
-  expect(text).toContain(prefix.repeat(64));
-  expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  expect(text).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+function expectPiStyleTruncation(text: string): void {
+  expect(text).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
+  expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/);
+  expect(text).not.toContain("[compacted: tool output removed to free context]");
+  expect(text).not.toContain("[compacted: tool output trimmed to free context]");
+  expect(text).not.toContain("[truncated: output exceeded context limit]");
 }

-function expectReadableToolSlice(text: string, prefix: string) {
-  expect(text).toContain(prefix.repeat(64));
-  expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  expect(
-    text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE) ||
-      text.includes(CONTEXT_LIMIT_TRUNCATION_NOTICE),
-  ).toBe(true);
-}
-
-function expectCompactedOrPlaceholder(text: string, prefix: string) {
-  if (text === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER) {
-    return;
-  }
-  expectReadableCompaction(text, prefix);
-}
+describe("formatContextLimitTruncationNotice", () => {
+  it("formats pi-style truncation wording with a count", () => {
+    expect(formatContextLimitTruncationNotice(123)).toBe("[... 123 more characters truncated]");
+  });
+});

 describe("installToolResultContextGuard", () => {
-  it("returns a cloned guarded context so original tool output stays visible", async () => {
+  it("passes through unchanged context when under the per-tool and total budget", async () => {
    const agent = makeGuardableAgent();
-    const contextForNextCall = makeTwoToolResultOverflowContext();
+    const contextForNextCall = [makeUser("hello"), makeToolResult("call_ok", "small output")];
+
    const transformed = await applyGuardToContext(agent, contextForNextCall);

-    expect(transformed).not.toBe(contextForNextCall);
-    const transformedMessages = transformed as AgentMessage[];
-    expectReadableCompaction(getToolResultText(transformedMessages[1]), "x");
-    expectReadableCompaction(getToolResultText(transformedMessages[2]), "y");
-    expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(1_000));
-    expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
+    expect(transformed).toBe(contextForNextCall);
  });

-  it("keeps at least one readable older slice before falling back to a placeholder", async () => {
+  it("does not preemptively overflow large non-tool context that is still under the high-water mark", async () => {
    const agent = makeGuardableAgent();
+    const contextForNextCall = [makeUser("u".repeat(3_200))];

-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
+    const transformed = await applyGuardToContext(agent, contextForNextCall);

-    const contextForNextCall = [
-      makeUser("u".repeat(2_200)),
-      makeToolResult("call_1", "a".repeat(800)),
-      makeToolResult("call_2", "b".repeat(800)),
-      makeToolResult("call_3", "c".repeat(800)),
-    ];
-
-    const transformed = (await agent.transformContext?.(
-      contextForNextCall,
-      new AbortController().signal,
-    )) as AgentMessage[];
-
-    const first = getToolResultText(transformed[1]);
-    const second = getToolResultText(transformed[2]);
-    const third = getToolResultText(transformed[3]);
-
-    expectReadableCompaction(first, "a");
-    expectReadableCompaction(third, "c");
-    expect(
-      second === "b".repeat(800) || second === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
-    ).toBe(true);
+    expect(transformed).toBe(contextForNextCall);
  });

-  it("keeps the newest large tool result visible when an older one can absorb overflow", async () => {
+  it("returns a cloned guarded context so original oversized tool output stays visible", async () => {
    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 100_000,
-    });
-
-    const contextForNextCall: AgentMessage[] = [makeUser("stress")];
-    let transformed: AgentMessage[] | undefined;
-    for (let i = 1; i <= 4; i++) {
-      contextForNextCall.push(makeToolResult(`call_${i}`, String(i).repeat(95_000)));
-      transformed = (await agent.transformContext?.(
-        contextForNextCall,
-        new AbortController().signal,
-      )) as AgentMessage[];
-    }
-
-    const toolResultTexts = (transformed ?? [])
-      .filter((msg) => msg.role === "toolResult")
-      .map((msg) => getToolResultText(msg as AgentMessage));
-
-    expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
-    expectReadableCompaction(toolResultTexts[1] ?? "", "2");
-    expectReadableCompaction(toolResultTexts[2] ?? "", "3");
-    expectReadableToolSlice(toolResultTexts[3] ?? "", "4");
-  });
-
-  it("truncates an individually oversized tool result with a context-limit notice", async () => {
-    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
    const contextForNextCall = [makeToolResult("call_big", "z".repeat(5_000))];

-    const transformed = (await agent.transformContext?.(
-      contextForNextCall,
-      new AbortController().signal,
-    )) as AgentMessage[];
+    const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];

+    expect(transformed).not.toBe(contextForNextCall);
    const newResultText = getToolResultText(transformed[0]);
    expect(newResultText.length).toBeLessThan(5_000);
-    expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
-  });
-
-  it("falls back to compacting the newest tool result when older ones are insufficient", async () => {
-    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
-    const contextForNextCall = [
-      makeUser("u".repeat(2_600)),
-      makeToolResult("call_old", "x".repeat(700)),
-      makeToolResult("call_new", "y".repeat(1_000)),
-    ];
-
-    const transformed = (await agent.transformContext?.(
-      contextForNextCall,
-      new AbortController().signal,
-    )) as AgentMessage[];
-    expectCompactedOrPlaceholder(getToolResultText(transformed[1]), "x");
-    expectCompactedOrPlaceholder(getToolResultText(transformed[2]), "y");
+    expectPiStyleTruncation(newResultText);
+    expect(getToolResultText(contextForNextCall[0])).toBe("z".repeat(5_000));
  });

  it("wraps an existing transformContext and guards the transformed output", async () => {
-    const agent = makeGuardableAgent((messages) => {
-      return messages.map((msg) =>
+    const agent = makeGuardableAgent((messages) =>
+      messages.map((msg) =>
        castAgentMessage({
          ...(msg as unknown as Record<string, unknown>),
        }),
-      );
-    });
-    const contextForNextCall = makeTwoToolResultOverflowContext();
-    const transformed = await applyGuardToContext(agent, contextForNextCall);
+      ),
+    );
+    const contextForNextCall = [makeToolResult("call_big", "x".repeat(5_000))];
+
+    const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];

    expect(transformed).not.toBe(contextForNextCall);
-    const transformedMessages = transformed as AgentMessage[];
-    const oldResultText = getToolResultText(transformedMessages[1]);
-    expectReadableCompaction(oldResultText, "x");
+    expectPiStyleTruncation(getToolResultText(transformed[0]));
  });

-  it("handles legacy role=tool string outputs when enforcing context budget", async () => {
+  it("handles legacy role=tool string outputs with pi-style truncation wording", async () => {
    const agent = makeGuardableAgent();
+    const contextForNextCall = [makeLegacyToolResult("call_big", "y".repeat(5_000))];

-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
+    const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
+    const newResultText = getToolResultText(transformed[0]);

+    expect(typeof (transformed[0] as { content?: unknown }).content).toBe("string");
+    expectPiStyleTruncation(newResultText);
+  });
+
+  it("drops oversized tool-result details when truncating once", async () => {
+    const agent = makeGuardableAgent();
    const contextForNextCall = [
-      makeUser("u".repeat(2_000)),
-      makeLegacyToolResult("call_old", "x".repeat(1_000)),
-      makeLegacyToolResult("call_new", "y".repeat(1_000)),
+      makeToolResultWithDetails("call_big", "x".repeat(900), "d".repeat(8_000)),
    ];

-    const transformed = (await agent.transformContext?.(
-      contextForNextCall,
-      new AbortController().signal,
-    )) as AgentMessage[];
+    const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
+    const result = transformed[0] as { details?: unknown };
+    const newResultText = getToolResultText(transformed[0]);

-    const oldResultText = (transformed[1] as { content?: unknown }).content;
-    const newResultText = (transformed[2] as { content?: unknown }).content;
-
-    expect(typeof oldResultText).toBe("string");
-    expect(typeof newResultText).toBe("string");
-    expect(oldResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
-    expect(newResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
+    expectPiStyleTruncation(newResultText);
+    expect(result.details).toBeUndefined();
+    expect((contextForNextCall[0] as { details?: unknown }).details).toBeDefined();
  });

-  it("drops oversized read-tool details payloads when compacting tool results", async () => {
+  it("throws overflow when total context exceeds the budget after one-time truncation", async () => {
    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
    const contextForNextCall = [
-      makeUser("u".repeat(1_600)),
-      makeToolResultWithDetails("call_old", "x".repeat(900), "d".repeat(8_000)),
-      makeToolResultWithDetails("call_new", "y".repeat(900), "d".repeat(8_000)),
+      makeUser("u".repeat(2_800)),
+      makeToolResult("call_ok", "x".repeat(500)),
    ];

-    const transformed = (await agent.transformContext?.(
-      contextForNextCall,
-      new AbortController().signal,
-    )) as AgentMessage[];
+    await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
+      PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
+    );

-    const oldResult = transformed[1] as {
-      details?: unknown;
-    };
-    const newResult = transformed[2] as {
-      details?: unknown;
-    };
-    const oldResultText = getToolResultText(transformed[1]);
-    const newResultText = getToolResultText(transformed[2]);
-
-    expectReadableToolSlice(oldResultText, "x");
-    expectReadableToolSlice(newResultText, "y");
-    expect(oldResult.details).toBeUndefined();
-    expect(newResult.details).toBeUndefined();
+    expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(500));
  });

-  it("throws overflow instead of compacting the latest read result during aggregate compaction", async () => {
+  it("throws overflow instead of historically rewriting older tool results", async () => {
    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
    const contextForNextCall = [
-      makeUser("u".repeat(2_600)),
-      makeToolResult("call_old", "x".repeat(300)),
+      makeUser("u".repeat(2_200)),
+      makeToolResult("call_1", "a".repeat(500)),
+      makeToolResult("call_2", "b".repeat(500)),
+      makeToolResult("call_3", "c".repeat(500)),
+    ];
+
+    await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
+      PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
+    );
+
+    expect(getToolResultText(contextForNextCall[1])).toBe("a".repeat(500));
+    expect(getToolResultText(contextForNextCall[2])).toBe("b".repeat(500));
+    expect(getToolResultText(contextForNextCall[3])).toBe("c".repeat(500));
+  });
+
+  it("throws overflow instead of special-casing the latest read result", async () => {
+    const agent = makeGuardableAgent();
+    const contextForNextCall = [
+      makeUser("u".repeat(2_900)),
+      makeToolResult("call_old", "x".repeat(400)),
      makeReadToolResult("call_new", "y".repeat(500)),
    ];

-    await expect(
-      agent.transformContext?.(contextForNextCall, new AbortController().signal),
-    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+    await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
+      PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
+    );

-    expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(300));
+    expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(400));
    expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(500));
  });

-  it("keeps the latest read result when older outputs absorb the aggregate overflow", async () => {
+  it("supports model-window-specific truncation for large but otherwise valid tool results", async () => {
    const agent = makeGuardableAgent();
+    const contextForNextCall = [makeToolResult("call_big", "q".repeat(95_000))];

-    installToolResultContextGuard({
+    const transformed = (await applyGuardToContext(
      agent,
-      contextWindowTokens: 1_000,
-    });
-
-    const contextForNextCall = [
-      makeUser("u".repeat(1_400)),
-      makeToolResult("call_old_1", "a".repeat(350)),
-      makeToolResult("call_old_2", "b".repeat(350)),
-      makeReadToolResult("call_new", "c".repeat(500)),
-    ];
-
-    const transformed = (await agent.transformContext?.(
      contextForNextCall,
-      new AbortController().signal,
+      100_000,
    )) as AgentMessage[];

-    expect(getToolResultText(transformed[3])).toBe("c".repeat(500));
-  });
-
-  it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
-    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      // contextBudgetChars = 1000 * 4 * 0.75 = 3000
-      // preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600
-      contextWindowTokens: 1_000,
-    });
-
-    // Large user message (non-compactable) pushes context past 90% threshold.
-    const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")];
-
-    await expect(
-      agent.transformContext?.(contextForNextCall, new AbortController().signal),
-    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
-  });
-
-  it("does not throw when context is under 90% after tool-result compaction", async () => {
-    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
-    // Context well under the 3600-char preemptive threshold.
-    const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")];
-
-    await expect(
-      agent.transformContext?.(contextForNextCall, new AbortController().signal),
-    ).resolves.not.toThrow();
-  });
-
-  it("compacts tool results before checking the preemptive overflow threshold", async () => {
-    const agent = makeGuardableAgent();
-
-    installToolResultContextGuard({
-      agent,
-      contextWindowTokens: 1_000,
-    });
-
-    // Large user message + large tool result. The guard should compact the tool
-    // result first, then check the overflow threshold. Even after compaction the
-    // user content alone pushes past 90%, so the overflow error fires.
-    const contextForNextCall = [
-      makeUser("u".repeat(3_700)),
-      makeToolResult("call_old", "x".repeat(2_000)),
-    ];
-
-    const guarded = agent.transformContext?.(contextForNextCall, new AbortController().signal);
-    await expect(guarded).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
-
-    // Tool result should have been compacted before the overflow check.
-    const toolResultText = getToolResultText(contextForNextCall[1]);
-    expect(toolResultText).toBe("x".repeat(2_000));
+    expectPiStyleTruncation(getToolResultText(transformed[0]));
  });
 });
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -10,33 +10,18 @@ import {
  invalidateMessageCharsCacheEntry,
  isToolResultMessage,
 } from "./tool-result-char-estimator.js";
-import { truncateToolResultText } from "./tool-result-truncation.js";

 // Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
 const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
 const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
-// High-water mark: if context exceeds this ratio after tool-result compaction,
-// trigger full session compaction via the existing overflow recovery cascade.
 const PREEMPTIVE_OVERFLOW_RATIO = 0.9;

-export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]";
-const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
-
-export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
-  "[compacted: tool output removed to free context]";
-export const PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE =
-  "[compacted: tool output trimmed to free context]";
+export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated";

 export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
  "Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
-
-const PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX = `\n${PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE}`;
-const MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS = 96;
 const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
  CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
-const MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS = Math.ceil(
-  MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO,
-);

 type GuardableTransformContext = (
  messages: AgentMessage[],
@@ -49,19 +34,8 @@ type GuardableAgentRecord = {
  transformContext?: GuardableTransformContext;
 };

-function getToolResultName(msg: AgentMessage): string | undefined {
-  const toolName = (msg as { toolName?: unknown }).toolName;
-  if (typeof toolName === "string" && toolName.trim().length > 0) {
-    return toolName;
-  }
-  const legacyToolName = (msg as { tool_name?: unknown }).tool_name;
-  return typeof legacyToolName === "string" && legacyToolName.trim().length > 0
-    ? legacyToolName
-    : undefined;
-}
-
-function isReadToolResultMessage(msg: AgentMessage): boolean {
-  return isToolResultMessage(msg) && getToolResultName(msg) === "read";
+export function formatContextLimitTruncationNotice(truncatedChars: number): string {
+  return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`;
 }

 function truncateTextToBudget(text: string, maxChars: number): string {
@@ -70,21 +44,25 @@ function truncateTextToBudget(text: string, maxChars: number): string {
  }

  if (maxChars <= 0) {
-    return CONTEXT_LIMIT_TRUNCATION_NOTICE;
+    return formatContextLimitTruncationNotice(text.length);
  }

-  const bodyBudget = Math.max(0, maxChars - CONTEXT_LIMIT_TRUNCATION_SUFFIX.length);
-  if (bodyBudget <= 0) {
-    return CONTEXT_LIMIT_TRUNCATION_NOTICE;
+  let bodyBudget = maxChars;
+  for (let i = 0; i < 4; i += 1) {
+    const estimatedSuffix = formatContextLimitTruncationNotice(
+      Math.max(1, text.length - bodyBudget),
+    );
+    bodyBudget = Math.max(0, maxChars - estimatedSuffix.length);
  }

  let cutPoint = bodyBudget;
-  const newline = text.lastIndexOf("\n", bodyBudget);
+  const newline = text.lastIndexOf("\n", cutPoint);
  if (newline > bodyBudget * 0.7) {
    cutPoint = newline;
  }

-  return text.slice(0, cutPoint) + CONTEXT_LIMIT_TRUNCATION_SUFFIX;
+  const omittedChars = text.length - cutPoint;
+  return text.slice(0, cutPoint) + formatContextLimitTruncationNotice(omittedChars);
 }

 function replaceToolResultText(msg: AgentMessage, text: string): AgentMessage {
@@ -104,89 +82,6 @@ function estimateBudgetToTextBudget(maxChars: number): number {
  return Math.max(0, Math.floor(maxChars / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO));
 }

-function compactToolResultToEstimateBudget(
-  msg: AgentMessage,
-  maxChars: number,
-  cache: MessageCharEstimateCache,
-): AgentMessage {
-  if (!isToolResultMessage(msg)) {
-    return msg;
-  }
-
-  const estimatedChars = estimateMessageCharsCached(msg, cache);
-  if (estimatedChars <= maxChars) {
-    return msg;
-  }
-
-  const rawText = getToolResultText(msg);
-  if (!rawText) {
-    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  }
-
-  const textBudget = estimateBudgetToTextBudget(maxChars);
-  if (textBudget <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
-    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  }
-
-  const maxCompactedTextChars = Math.max(MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, textBudget);
-  if (maxCompactedTextChars <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
-    return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-  }
-
-  const minKeepChars = Math.max(
-    96,
-    Math.min(
-      MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS,
-      maxCompactedTextChars - PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX.length - 1,
-    ),
-  );
-
-  const compactedText = truncateToolResultText(rawText, maxCompactedTextChars, {
-    suffix: PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX,
-    minKeepChars,
-  });
-
-  return replaceToolResultText(msg, compactedText);
-}
-
-function compactToPlaceholderInPlace(params: {
-  messages: AgentMessage[];
-  charsNeeded: number;
-  cache: MessageCharEstimateCache;
-}): number {
-  const { messages, charsNeeded, cache } = params;
-  if (charsNeeded <= 0) {
-    return 0;
-  }
-
-  let reduced = 0;
-  for (const i of resolveToolResultCompactionOrder(messages)) {
-    const msg = messages[i];
-    if (!isToolResultMessage(msg)) {
-      continue;
-    }
-
-    const before = estimateMessageCharsCached(msg, cache);
-    if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
-      continue;
-    }
-
-    const compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-    applyMessageMutationInPlace(msg, compacted, cache);
-    const after = estimateMessageCharsCached(msg, cache);
-    if (after >= before) {
-      continue;
-    }
-
-    reduced += before - after;
-    if (reduced >= charsNeeded) {
-      break;
-    }
-  }
-
-  return reduced;
-}
-
 function truncateToolResultToChars(
  msg: AgentMessage,
  maxChars: number,
@@ -203,12 +98,16 @@ function truncateToolResultToChars(

  const rawText = getToolResultText(msg);
  if (!rawText) {
-    return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    const omittedChars = Math.max(
+      1,
+      estimateBudgetToTextBudget(Math.max(estimatedChars - maxChars, 1)),
+    );
+    return replaceToolResultText(msg, formatContextLimitTruncationNotice(omittedChars));
  }

  const textBudget = estimateBudgetToTextBudget(maxChars);
  if (textBudget <= 0) {
-    return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
+    return replaceToolResultText(msg, formatContextLimitTruncationNotice(rawText.length));
  }

  if (rawText.length <= textBudget) {
@@ -219,163 +118,27 @@ function truncateToolResultToChars(
  return replaceToolResultText(msg, truncatedText);
 }

-function compactExistingToolResultsInPlace(params: {
-  messages: AgentMessage[];
-  charsNeeded: number;
-  cache: MessageCharEstimateCache;
-}): number {
-  const { messages, charsNeeded, cache } = params;
-  if (charsNeeded <= 0) {
-    return 0;
-  }
-
-  let reduced = 0;
-  // Keep the most recent tool result visible as long as older tool outputs can
-  // absorb the overflow. Among older tool results, compact newest-first so we
-  // still preserve as much of the cached prefix as possible.
-  for (const i of resolveToolResultCompactionOrder(messages)) {
-    const msg = messages[i];
-    if (!isToolResultMessage(msg)) {
-      continue;
-    }
-
-    const before = estimateMessageCharsCached(msg, cache);
-    if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
-      continue;
-    }
-
-    const targetAfter = Math.max(
-      MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS,
-      before - (charsNeeded - reduced),
-    );
-
-    let compacted = compactToolResultToEstimateBudget(msg, targetAfter, cache);
-    let after = estimateMessageCharsCached(compacted, cache);
-    if (after >= before) {
-      compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
-      after = estimateMessageCharsCached(compacted, cache);
-    }
-
-    applyMessageMutationInPlace(msg, compacted, cache);
-    if (after >= before) {
-      continue;
-    }
-
-    reduced += before - after;
-    if (reduced >= charsNeeded) {
-      break;
-    }
-  }
-
-  if (reduced < charsNeeded) {
-    reduced += compactToPlaceholderInPlace({
-      messages,
-      charsNeeded: charsNeeded - reduced,
-      cache,
-    });
-  }
-
-  return reduced;
-}
-
-function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] {
-  const toolResultIndexes: number[] = [];
-  for (let i = 0; i < messages.length; i += 1) {
-    if (isToolResultMessage(messages[i])) {
-      toolResultIndexes.push(i);
-    }
-  }
-  if (toolResultIndexes.length <= 1) {
-    return toolResultIndexes;
-  }
-  const newestIndex = toolResultIndexes[toolResultIndexes.length - 1];
-  const olderIndexes = toolResultIndexes.slice(0, -1).toReversed();
-  return [...olderIndexes, newestIndex];
-}
-
-function getNewestToolResultIndex(messages: AgentMessage[]): number | undefined {
-  for (let i = messages.length - 1; i >= 0; i -= 1) {
-    if (isToolResultMessage(messages[i])) {
-      return i;
-    }
-  }
-  return undefined;
-}
-
-function shouldPreferOverflowForLatestRead(params: {
-  messages: AgentMessage[];
-  contextBudgetChars: number;
-  maxSingleToolResultChars: number;
-}): boolean {
-  const newestToolResultIndex = getNewestToolResultIndex(params.messages);
-  if (newestToolResultIndex === undefined) {
-    return false;
-  }
-  const newestToolResult = params.messages[newestToolResultIndex];
-  if (!isReadToolResultMessage(newestToolResult)) {
-    return false;
-  }
-
-  const initialCache = createMessageCharEstimateCache();
-  if (
-    estimateMessageCharsCached(newestToolResult, initialCache) > params.maxSingleToolResultChars
-  ) {
-    return false;
-  }
-
-  const simulatedMessages = cloneMessagesForGuard(params.messages);
-  const estimateCache = createMessageCharEstimateCache();
-  for (const message of simulatedMessages) {
-    if (!isToolResultMessage(message)) {
-      continue;
-    }
-    const truncated = truncateToolResultToChars(
-      message,
-      params.maxSingleToolResultChars,
-      estimateCache,
-    );
-    applyMessageMutationInPlace(message, truncated, estimateCache);
-  }
-
-  const currentChars = estimateContextChars(simulatedMessages, estimateCache);
-  if (currentChars <= params.contextBudgetChars) {
-    return false;
-  }
-
-  const newestToolResultAfterPerToolLimit = simulatedMessages[newestToolResultIndex];
-  const newestToolResultTextBefore = getToolResultText(newestToolResultAfterPerToolLimit);
-  compactExistingToolResultsInPlace({
-    messages: simulatedMessages,
-    charsNeeded: currentChars - params.contextBudgetChars,
-    cache: estimateCache,
-  });
-  return getToolResultText(simulatedMessages[newestToolResultIndex]) !== newestToolResultTextBefore;
-}
-
 function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
  return messages.map(
    (msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
  );
 }

-function contextNeedsToolResultCompaction(params: {
+function toolResultsNeedTruncation(params: {
  messages: AgentMessage[];
-  contextBudgetChars: number;
  maxSingleToolResultChars: number;
 }): boolean {
-  const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
+  const { messages, maxSingleToolResultChars } = params;
  const estimateCache = createMessageCharEstimateCache();
-  let sawToolResult = false;
  for (const message of messages) {
    if (!isToolResultMessage(message)) {
      continue;
    }
-    sawToolResult = true;
    if (estimateMessageCharsCached(message, estimateCache) > maxSingleToolResultChars) {
      return true;
    }
  }
-  return sawToolResult && estimateContextChars(messages, estimateCache) > contextBudgetChars;
+  return false;
 }

 function applyMessageMutationInPlace(
@@ -400,15 +163,13 @@ function applyMessageMutationInPlace(
  }
 }

-function enforceToolResultContextBudgetInPlace(params: {
+function enforceToolResultLimitInPlace(params: {
  messages: AgentMessage[];
-  contextBudgetChars: number;
  maxSingleToolResultChars: number;
 }): void {
-  const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
+  const { messages, maxSingleToolResultChars } = params;
  const estimateCache = createMessageCharEstimateCache();

-  // Ensure each tool result has an upper bound before considering total context usage.
  for (const message of messages) {
    if (!isToolResultMessage(message)) {
      continue;
@@ -416,19 +177,6 @@ function enforceToolResultContextBudgetInPlace(params: {
    const truncated = truncateToolResultToChars(message, maxSingleToolResultChars, estimateCache);
    applyMessageMutationInPlace(message, truncated, estimateCache);
  }
-
-  let currentChars = estimateContextChars(messages, estimateCache);
-  if (currentChars <= contextBudgetChars) {
-    return;
-  }
-
-  // Prefer compacting older tool outputs before sacrificing the newest one;
-  // stop once the context is back under budget.
-  compactExistingToolResultsInPlace({
-    messages,
-    charsNeeded: currentChars - contextBudgetChars,
-    cache: estimateCache,
-  });
 }

 export function installToolResultContextGuard(params: {
@@ -440,16 +188,16 @@ export function installToolResultContextGuard(params: {
    1_024,
    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
  );
+  const preemptiveOverflowChars = Math.max(
+    contextBudgetChars,
+    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
+  );
  const maxSingleToolResultChars = Math.max(
    1_024,
    Math.floor(
      contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE,
    ),
  );
-  const preemptiveOverflowChars = Math.max(
-    contextBudgetChars,
-    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
-  );

  // Agent.transformContext is private in pi-coding-agent, so access it via a
  // narrow runtime view to keep callsites type-safe while preserving behavior.
@@ -462,32 +210,19 @@ export function installToolResultContextGuard(params: {
      : messages;

    const sourceMessages = Array.isArray(transformed) ? transformed : messages;
-    if (
-      shouldPreferOverflowForLatestRead({
-        messages: sourceMessages,
-        contextBudgetChars,
-        maxSingleToolResultChars,
-      })
-    ) {
-      throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
-    }
-    const contextMessages = contextNeedsToolResultCompaction({
+    const contextMessages = toolResultsNeedTruncation({
      messages: sourceMessages,
-      contextBudgetChars,
      maxSingleToolResultChars,
    })
      ? cloneMessagesForGuard(sourceMessages)
      : sourceMessages;
-    enforceToolResultContextBudgetInPlace({
-      messages: contextMessages,
-      contextBudgetChars,
-      maxSingleToolResultChars,
-    });
+    if (contextMessages !== sourceMessages) {
+      enforceToolResultLimitInPlace({
+        messages: contextMessages,
+        maxSingleToolResultChars,
+      });
+    }

-    // After tool-result compaction, check if context still exceeds the high-water mark.
-    // If it does, non-tool-result content dominates and only full LLM-based session
-    // compaction can reduce context size. Throwing a context overflow error triggers
-    // the existing overflow recovery cascade in run.ts.
    const postEnforcementChars = estimateContextChars(
      contextMessages,
      createMessageCharEstimateCache(),
--- a/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.test.ts
@@ -342,6 +342,112 @@ describe("truncateOversizedToolResultsInSession", () => {
      openSpy.mockRestore();
    }
  });
+
+  it("rewrites aggregate medium tool results when their combined size still overflows the session", async () => {
+    const sessionFile = "/tmp/tool-result-truncation-aggregate-session.jsonl";
+    const sessionManager = SessionManager.inMemory();
+    sessionManager.appendMessage(makeUserMessage("u".repeat(20_000)));
+    sessionManager.appendMessage(makeAssistantMessage("reading files"));
+    sessionManager.appendMessage(makeToolResult("a".repeat(10_000)));
+    sessionManager.appendMessage(makeToolResult("b".repeat(10_000)));
+    sessionManager.appendMessage(makeToolResult("c".repeat(10_000)));
+
+    const openSpy = vi
+      .spyOn(SessionManager, "open")
+      .mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
+
+    try {
+      const result = await truncateOversizedToolResultsInSession({
+        sessionFile,
+        contextWindowTokens: 10_000,
+        sessionKey: "agent:main:aggregate-test",
+      });
+
+      expect(result.truncated).toBe(true);
+      expect(result.truncatedCount).toBeGreaterThan(0);
+
+      const branch = sessionManager.getBranch();
+      const toolTexts = branch
+        .filter((entry) => entry.type === "message" && entry.message.role === "toolResult")
+        .map((entry) =>
+          entry.type === "message" && entry.message.role === "toolResult"
+            ? getFirstToolResultText(entry.message)
+            : "",
+        );
+      expect(toolTexts.some((text) => text.includes("truncated"))).toBe(true);
+      expect(toolTexts.some((text) => text.length < 10_000)).toBe(true);
+    } finally {
+      openSpy.mockRestore();
+    }
+  });
+
+  it("lets a retry pass the real guard after aggregate session rewrite", async () => {
+    const { PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE, installToolResultContextGuard } =
+      await import("./tool-result-context-guard.js");
+    const sessionFile = "/tmp/tool-result-truncation-seam-session.jsonl";
+    const contextWindowTokens = 10_000;
+    const originalMessages = [
+      makeUserMessage("u".repeat(20_000)),
+      makeAssistantMessage("reading files"),
+      makeToolResult("a".repeat(10_000), "call_a"),
+      makeToolResult("b".repeat(10_000), "call_b"),
+      makeToolResult("c".repeat(10_000), "call_c"),
+    ];
+    const guardAgent = {};
+    installToolResultContextGuard({ agent: guardAgent, contextWindowTokens });
+
+    await expect(
+      (
+        guardAgent as {
+          transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
+        }
+      ).transformContext?.(originalMessages, new AbortController().signal),
+    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+
+    expect(
+      sessionLikelyHasOversizedToolResults({
+        messages: originalMessages,
+        contextWindowTokens,
+      }),
+    ).toBe(true);
+
+    const sessionManager = SessionManager.inMemory();
+    for (const message of originalMessages) {
+      sessionManager.appendMessage(message);
+    }
+    const openSpy = vi
+      .spyOn(SessionManager, "open")
+      .mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
+
+    try {
+      const rewriteResult = await truncateOversizedToolResultsInSession({
+        sessionFile,
+        contextWindowTokens,
+        sessionKey: "agent:main:seam-test",
+      });
+
+      expect(rewriteResult.truncated).toBe(true);
+      expect(rewriteResult.truncatedCount).toBeGreaterThan(0);
+
+      const rewrittenMessages = sessionManager
+        .getBranch()
+        .filter((entry) => entry.type === "message")
+        .map((entry) => (entry.type === "message" ? entry.message : null))
+        .filter((message): message is AgentMessage => message !== null);
+
+      const retryAgent = {};
+      installToolResultContextGuard({ agent: retryAgent, contextWindowTokens });
+      await expect(
+        (
+          retryAgent as {
+            transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
+          }
+        ).transformContext?.(rewrittenMessages, new AbortController().signal),
+      ).resolves.toBeDefined();
+    } finally {
+      openSpy.mockRestore();
+    }
+  });
 });

 describe("sessionLikelyHasOversizedToolResults", () => {
@@ -365,6 +471,21 @@ describe("sessionLikelyHasOversizedToolResults", () => {
    ).toBe(true);
  });

+  it("returns true when several medium tool results exceed the aggregate overflow budget", () => {
+    const messages = [
+      makeUserMessage("u".repeat(20_000)),
+      makeToolResult("a".repeat(10_000)),
+      makeToolResult("b".repeat(10_000)),
+      makeToolResult("c".repeat(10_000)),
+    ];
+    expect(
+      sessionLikelyHasOversizedToolResults({
+        messages,
+        contextWindowTokens: 10_000,
+      }),
+    ).toBe(true);
+  });
+
  it("returns false for empty messages", () => {
    expect(
      sessionLikelyHasOversizedToolResults({
--- a/src/agents/pi-embedded-runner/tool-result-truncation.ts
+++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts
@@ -4,6 +4,12 @@ import { SessionManager } from "@mariozechner/pi-coding-agent";
 import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js";
 import { acquireSessionWriteLock } from "../session-write-lock.js";
 import { log } from "./logger.js";
+import {
+  CHARS_PER_TOKEN_ESTIMATE,
+  TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
+  createMessageCharEstimateCache,
+  estimateContextChars,
+} from "./tool-result-char-estimator.js";
 import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js";

 /**
@@ -12,6 +18,11 @@ import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.j
 * consume more than 30% of the context window even without other messages.
 */
 const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
+const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
+const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
+const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
+  CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
+const AGGREGATE_TRUNCATION_MIN_KEEP_CHARS = 256;

 /**
 * Default hard cap for a single live tool result text block.
@@ -43,10 +54,60 @@ const TRUNCATION_SUFFIX =
  "offset/limit parameters to read smaller chunks.]";

 type ToolResultTruncationOptions = {
-  suffix?: string;
+  suffix?: string | ((truncatedChars: number) => string);
  minKeepChars?: number;
 };

+type ToolResultRewriteCandidate = {
+  entryId: string;
+  entryIndex: number;
+  message: AgentMessage;
+  textLength: number;
+};
+
+function calculateContextBudgetChars(contextWindowTokens: number): number {
+  return Math.max(
+    1_024,
+    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
+  );
+}
+
+function calculatePreemptiveOverflowChars(contextWindowTokens: number): number {
+  return Math.max(
+    calculateContextBudgetChars(contextWindowTokens),
+    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
+  );
+}
+
+function estimateToolResultCharsFromTextLength(textLength: number): number {
+  return Math.ceil(textLength * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO);
+}
+
+function collectToolResultRewriteCandidates(branch: ReturnType<SessionManager["getBranch"]>): {
+  candidates: ToolResultRewriteCandidate[];
+  messages: AgentMessage[];
+} {
+  const candidates: ToolResultRewriteCandidate[] = [];
+  const messages: AgentMessage[] = [];
+  for (let i = 0; i < branch.length; i++) {
+    const entry = branch[i];
+    if (entry.type !== "message") {
+      continue;
+    }
+    messages.push(entry.message);
+    if ((entry.message as { role?: string }).role !== "toolResult") {
+      continue;
+    }
+    candidates.push({
+      entryId: entry.id,
+      entryIndex: i,
+      message: entry.message,
+      textLength: getToolResultTextLength(entry.message),
+    });
+  }
+  return { candidates, messages };
+}
+
 /**
 * Marker inserted between head and tail when using head+tail truncation.
 */
@@ -82,12 +143,16 @@ export function truncateToolResultText(
  maxChars: number,
  options: ToolResultTruncationOptions = {},
 ): string {
-  const suffix = options.suffix ?? TRUNCATION_SUFFIX;
+  const suffixFactory: (truncatedChars: number) => string =
+    typeof options.suffix === "function"
+      ? options.suffix
+      : () => (options.suffix ?? TRUNCATION_SUFFIX);
  const minKeepChars = options.minKeepChars ?? MIN_KEEP_CHARS;
  if (text.length <= maxChars) {
    return text;
  }
-  const budget = Math.max(minKeepChars, maxChars - suffix.length);
+  const defaultSuffix = suffixFactory(Math.max(1, text.length - maxChars));
+  const budget = Math.max(minKeepChars, maxChars - defaultSuffix.length);

  // If tail looks important, split budget between head and tail
  if (hasImportantTail(text) && budget > minKeepChars * 2) {
@@ -108,7 +173,9 @@ export function truncateToolResultText(
        tailStart = tailNewline + 1;
      }

-      return text.slice(0, headCut) + MIDDLE_OMISSION_MARKER + text.slice(tailStart) + suffix;
+      const keptText = text.slice(0, headCut) + MIDDLE_OMISSION_MARKER + text.slice(tailStart);
+      const suffix = suffixFactory(Math.max(1, text.length - keptText.length));
+      return keptText + suffix;
    }
  }

@@ -118,7 +185,9 @@ export function truncateToolResultText(
  if (lastNewline > budget * 0.8) {
    cutPoint = lastNewline;
  }
-  return text.slice(0, cutPoint) + suffix;
+  const keptText = text.slice(0, cutPoint);
+  const suffix = suffixFactory(Math.max(1, text.length - keptText.length));
+  return keptText + suffix;
 }

 /**
@@ -167,7 +236,10 @@ export function truncateToolResultMessage(
  maxChars: number,
  options: ToolResultTruncationOptions = {},
 ): AgentMessage {
-  const suffix = options.suffix ?? TRUNCATION_SUFFIX;
+  const suffixFactory: (truncatedChars: number) => string =
+    typeof options.suffix === "function"
+      ? options.suffix
+      : () => (options.suffix ?? TRUNCATION_SUFFIX);
  const minKeepChars = options.minKeepChars ?? MIN_KEEP_CHARS;
  const content = (msg as { content?: unknown }).content;
  if (!Array.isArray(content)) {
@@ -191,10 +263,19 @@ export function truncateToolResultMessage(
    }
    // Proportional budget for this block
    const blockShare = textBlock.text.length / totalTextChars;
-    const blockBudget = Math.max(minKeepChars + suffix.length, Math.floor(maxChars * blockShare));
+    const defaultSuffix = suffixFactory(
+      Math.max(1, textBlock.text.length - Math.floor(maxChars * blockShare)),
+    );
+    const blockBudget = Math.max(
+      minKeepChars + defaultSuffix.length,
+      Math.floor(maxChars * blockShare),
+    );
    return {
      ...textBlock,
-      text: truncateToolResultText(textBlock.text, blockBudget, { suffix, minKeepChars }),
+      text: truncateToolResultText(textBlock.text, blockBudget, {
+        suffix: suffixFactory,
+        minKeepChars,
+      }),
    };
  });

@@ -231,47 +312,84 @@ export async function truncateOversizedToolResultsInSession(params: {
      return { truncated: false, truncatedCount: 0, reason: "empty session" };
    }

-    // Find oversized tool result entries and their indices in the branch
-    const oversizedIndices: number[] = [];
-    for (let i = 0; i < branch.length; i++) {
-      const entry = branch[i];
-      if (entry.type !== "message") {
-        continue;
-      }
-      const msg = entry.message;
-      if ((msg as { role?: string }).role !== "toolResult") {
-        continue;
-      }
-      const textLength = getToolResultTextLength(msg);
-      if (textLength > maxChars) {
-        oversizedIndices.push(i);
-        log.info(
-          `[tool-result-truncation] Found oversized tool result: ` +
-            `entry=${entry.id} chars=${textLength} maxChars=${maxChars} ` +
-            `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
-        );
-      }
-    }
-
-    if (oversizedIndices.length === 0) {
-      return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" };
-    }
-
-    const replacements = oversizedIndices.flatMap((index) => {
-      const entry = branch[index];
-      if (!entry || entry.type !== "message") {
-        return [];
-      }
-      const message = truncateToolResultMessage(entry.message, maxChars);
-      const newLength = getToolResultTextLength(message);
+    const { candidates, messages } = collectToolResultRewriteCandidates(branch);
+    const oversizedCandidates = candidates.filter((candidate) => candidate.textLength > maxChars);
+    for (const candidate of oversizedCandidates) {
      log.info(
-        `[tool-result-truncation] Truncated tool result: ` +
-          `originalEntry=${entry.id} newChars=${newLength} ` +
+        `[tool-result-truncation] Found oversized tool result: ` +
+          `entry=${candidate.entryId} chars=${candidate.textLength} maxChars=${maxChars} ` +
          `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
      );
-      return [{ entryId: entry.id, message }];
+    }
+
+    const currentContextChars = estimateContextChars(messages, createMessageCharEstimateCache());
+    const overflowThresholdChars = calculatePreemptiveOverflowChars(contextWindowTokens);
+    const aggregateCharsNeeded = Math.max(0, currentContextChars - overflowThresholdChars);
+
+    if (oversizedCandidates.length === 0 && aggregateCharsNeeded <= 0) {
+      return { truncated: false, truncatedCount: 0, reason: "no tool result truncation needed" };
+    }
+
+    let remainingAggregateCharsNeeded = aggregateCharsNeeded;
+    const candidatesByRecency = [...candidates].toSorted((a, b) => b.entryIndex - a.entryIndex);
+    const replacements = candidatesByRecency.flatMap((candidate) => {
+      const aggregateEligible =
+        remainingAggregateCharsNeeded > 0 &&
+        candidate.textLength > AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
+      const targetChars =
+        candidate.textLength > maxChars
+          ? maxChars
+          : aggregateEligible
+            ? Math.max(
+                AGGREGATE_TRUNCATION_MIN_KEEP_CHARS,
+                candidate.textLength -
+                  Math.ceil(remainingAggregateCharsNeeded / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO),
+              )
+            : candidate.textLength;
+
+      if (targetChars >= candidate.textLength) {
+        return [];
+      }
+
+      const minKeepChars =
+        candidate.textLength > maxChars ? undefined : AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
+      const message = truncateToolResultMessage(
+        candidate.message,
+        targetChars,
+        minKeepChars === undefined ? {} : { minKeepChars },
+      );
+      const newLength = getToolResultTextLength(message);
+      if (newLength >= candidate.textLength) {
+        return [];
+      }
+
+      const reducedEstimateChars = estimateToolResultCharsFromTextLength(
+        candidate.textLength - newLength,
+      );
+      remainingAggregateCharsNeeded = Math.max(
+        0,
+        remainingAggregateCharsNeeded - reducedEstimateChars,
+      );
+
+      log.info(
+        `[tool-result-truncation] Truncated tool result: ` +
+          `originalEntry=${candidate.entryId} newChars=${newLength} ` +
+          `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
+      );
+      return [{ entryId: candidate.entryId, message }];
    });

+    if (replacements.length === 0) {
+      return {
+        truncated: false,
+        truncatedCount: 0,
+        reason:
+          oversizedCandidates.length > 0
+            ? "oversized tool results could not be reduced"
+            : "aggregate tool result overflow could not be reduced",
+      };
+    }
+
    const rewriteResult = rewriteTranscriptEntriesInSessionManager({
      sessionManager,
      replacements,
@@ -351,16 +469,21 @@ export function sessionLikelyHasOversizedToolResults(params: {
 }): boolean {
  const { messages, contextWindowTokens } = params;
  const maxChars = calculateMaxToolResultChars(contextWindowTokens);
+  const contextBudgetChars = calculatePreemptiveOverflowChars(contextWindowTokens);
+  let sawToolResult = false;
+  let aggregateToolResultChars = 0;

  for (const msg of messages) {
    if ((msg as { role?: string }).role !== "toolResult") {
      continue;
    }
+    sawToolResult = true;
    const textLength = getToolResultTextLength(msg);
+    aggregateToolResultChars += estimateToolResultCharsFromTextLength(textLength);
    if (textLength > maxChars) {
      return true;
    }
  }

-  return false;
+  return sawToolResult && aggregateToolResultChars > contextBudgetChars;
 }
--- a/src/agents/session-tool-result-guard.test.ts
+++ b/src/agents/session-tool-result-guard.test.ts
@@ -158,6 +158,17 @@ describe("installSessionToolResultGuard", () => {
    expectPersistedRoles(sm, ["assistant", "toolResult"]);
  });

+  it("applies pi-style count-based truncation wording when persisting oversized tool results", () => {
+    const sm = SessionManager.inMemory();
+    installSessionToolResultGuard(sm);
+
+    appendToolResultText(sm, "x".repeat(80_000));
+
+    const text = getToolResultText(getPersistedMessages(sm));
+    expect(text).toContain("more characters truncated");
+    expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/);
+  });
+
  it("backfills blank toolResult names from pending tool calls", () => {
    const sm = SessionManager.inMemory();
    installSessionToolResultGuard(sm);
--- a/src/agents/session-tool-result-guard.ts
+++ b/src/agents/session-tool-result-guard.ts
@@ -5,6 +5,7 @@ import type {
  PluginHookBeforeMessageWriteResult,
 } from "../plugins/types.js";
 import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
+import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/tool-result-context-guard.js";
 import {
  DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS,
  truncateToolResultMessage,
@@ -12,10 +13,6 @@ import {
 import { createPendingToolCallState } from "./session-tool-result-state.js";
 import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";
 import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js";
-
-const GUARD_TRUNCATION_SUFFIX =
-  "\n\n⚠️ [Content truncated during persistence — original exceeded size limit. " +
-  "Use offset/limit parameters or request specific sections for large content.]";
 const RAW_APPEND_MESSAGE = Symbol("openclaw.session.rawAppendMessage");

 type SessionManagerWithRawAppend = SessionManager & {
@@ -32,7 +29,7 @@ function capToolResultSize(msg: AgentMessage): AgentMessage {
    return msg;
  }
  return truncateToolResultMessage(msg, DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS, {
-    suffix: GUARD_TRUNCATION_SUFFIX,
+    suffix: (truncatedChars) => formatContextLimitTruncationNotice(truncatedChars),
    minKeepChars: 2_000,
  });
 }
--- a/src/agents/subagent-announce.ts
+++ b/src/agents/subagent-announce.ts
@@ -102,7 +102,7 @@ export function buildSubagentSystemPrompt(params: {
    "3. **Don't initiate** - No heartbeats, no proactive actions, no side quests",
    "4. **Be ephemeral** - You may be terminated after task completion. That's fine.",
    "5. **Trust push-based completion** - Descendant results are auto-announced back to you; do not busy-poll for status.",
-    "6. **Recover from compacted/truncated tool output** - If you see `[compacted: tool output removed to free context]` or `[truncated: output exceeded context limit]`, assume prior output was reduced. Re-read only what you need using smaller chunks (`read` with offset/limit, or targeted `rg`/`head`/`tail`) instead of full-file `cat`.",
+    "6. **Recover from truncated tool output** - If you see a notice like `[..., N more characters truncated]`, assume prior output was reduced. Re-read only what you need using smaller chunks (`read` with offset/limit, or targeted `rg`/`head`/`tail`) instead of full-file `cat`.",
    "",
    "## Output Format",
    "When complete, your final response should include:",
--- a/src/agents/system-prompt.test.ts
+++ b/src/agents/system-prompt.test.ts
@@ -993,8 +993,7 @@ describe("buildSubagentSystemPrompt", () => {
    expect(prompt).toContain("Avoid polling loops");
    expect(prompt).toContain("spawned by the main agent");
    expect(prompt).toContain("reported to the main agent");
-    expect(prompt).toContain("[compacted: tool output removed to free context]");
-    expect(prompt).toContain("[truncated: output exceeded context limit]");
+    expect(prompt).toContain("[..., N more characters truncated]");
    expect(prompt).toContain("offset/limit");
    expect(prompt).toContain("instead of full-file `cat`");
  });