From 3aa4199ef0987b5b09d076104573fecc7eadeedc Mon Sep 17 00:00:00 2001
From: Keshav Rao <keshavrao250@gmail.com>
Date: Mon, 16 Mar 2026 19:04:00 -0700
Subject: [PATCH] agent: preemptive context overflow detection during tool
 loops (#29371)

Merged via squash.

Prepared head SHA: 19661b8fb1e3aea20e438b28e8323d7f42fe01d6
Co-authored-by: keshav55 <3821985+keshav55@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
---
 CHANGELOG.md                                  |  2 +
 extensions/telegram/src/bot/helpers.test.ts   | 54 ++++++++++++++++-
 .../tool-result-context-guard.test.ts         | 60 +++++++++++++++++++
 .../tool-result-context-guard.ts              | 22 +++++++
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4192bba536a..d948e2b59ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -105,6 +105,8 @@ Docs: https://docs.openclaw.ai
 - Agents/usage tracking: stop forcing `supportsUsageInStreaming: false` on non-native OpenAI-completions providers so compatible backends report token usage and cost again instead of showing all zeros. (#46500) Fixes #46142. Thanks @ademczuk.
 - Plugins/subagents: preserve gateway-owned plugin subagent access across runtime, tool, and embedded-runner load paths so gateway plugin tools and context engines can still spawn and manage subagents after the loader cache split. (#46648) Thanks @jalehman.
 - Control UI/overview: keep the language dropdown aligned with the persisted locale during dashboard startup so refreshing the page does not fall back to English before locale hydration completes. (#48019) Thanks @git-jxj.
+- Agents/compaction: rerun transcript repair after `session.compact()` so orphaned `tool_result` blocks cannot survive compaction and break later Anthropic requests. (#16095) thanks @claw-sylphx.
+- Agents/compaction: trigger overflow recovery from the tool-result guard once post-compaction context still exceeds the safe threshold, so long tool loops compact before the next model call hard-fails. (#29371) thanks @keshav55.
 
 ## 2026.3.13
 
diff --git a/extensions/telegram/src/bot/helpers.test.ts b/extensions/telegram/src/bot/helpers.test.ts
index fe30465b40c..5777216f2ac 100644
--- a/extensions/telegram/src/bot/helpers.test.ts
+++ b/extensions/telegram/src/bot/helpers.test.ts
@@ -1,3 +1,4 @@
+import type { Message } from "grammy/types";
 import { describe, expect, it } from "vitest";
 import {
   buildTelegramThreadParams,
@@ -404,8 +405,59 @@ describe("hasBotMention", () => {
       ),
     ).toBe(true);
   });
-});
 
+  it("matches mention followed by punctuation", () => {
+    expect(
+      hasBotMention(
+        {
+          text: "@gaian, what's up?",
+          chat: { id: 1, type: "supergroup" },
+          // oxlint-disable-next-line typescript/no-explicit-any
+        } as any,
+        "gaian",
+      ),
+    ).toBe(true);
+  });
+
+  it("matches mention followed by space", () => {
+    expect(
+      hasBotMention(
+        {
+          text: "@gaian how are you",
+          chat: { id: 1, type: "supergroup" },
+          // oxlint-disable-next-line typescript/no-explicit-any
+        } as any,
+        "gaian",
+      ),
+    ).toBe(true);
+  });
+
+  it("does not match substring of a longer username", () => {
+    expect(
+      hasBotMention(
+        {
+          text: "@gaianchat_bot hello",
+          chat: { id: 1, type: "supergroup" },
+          // oxlint-disable-next-line typescript/no-explicit-any
+        } as any,
+        "gaian",
+      ),
+    ).toBe(false);
+  });
+
+  it("does not match when mention is a prefix of another word", () => {
+    expect(
+      hasBotMention(
+        {
+          text: "@gaianbot do something",
+          chat: { id: 1, type: "supergroup" },
+          // oxlint-disable-next-line typescript/no-explicit-any
+        } as any,
+        "gaian",
+      ),
+    ).toBe(false);
+  });
+});
 describe("expandTextLinks", () => {
   it("returns text unchanged when no entities are provided", () => {
     expect(expandTextLinks("Hello world")).toBe("Hello world");
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
index df50558e951..9f265d3b56e 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
 import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
 import {
   CONTEXT_LIMIT_TRUNCATION_NOTICE,
+  PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
   PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
   installToolResultContextGuard,
 } from "./tool-result-context-guard.js";
@@ -268,4 +269,63 @@ describe("installToolResultContextGuard", () => {
     expect(oldResult.details).toBeUndefined();
     expect(newResult.details).toBeUndefined();
   });
+
+  it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
+    const agent = makeGuardableAgent();
+
+    installToolResultContextGuard({
+      agent,
+      // contextBudgetChars = 1000 * 4 * 0.75 = 3000
+      // preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600
+      contextWindowTokens: 1_000,
+    });
+
+    // Large user message (non-compactable) pushes context past 90% threshold.
+    const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")];
+
+    await expect(
+      agent.transformContext?.(contextForNextCall, new AbortController().signal),
+    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+  });
+
+  it("does not throw when context is under 90% after tool-result compaction", async () => {
+    const agent = makeGuardableAgent();
+
+    installToolResultContextGuard({
+      agent,
+      contextWindowTokens: 1_000,
+    });
+
+    // Context well under the 3600-char preemptive threshold.
+    const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")];
+
+    await expect(
+      agent.transformContext?.(contextForNextCall, new AbortController().signal),
+    ).resolves.not.toThrow();
+  });
+
+  it("compacts tool results before checking the preemptive overflow threshold", async () => {
+    const agent = makeGuardableAgent();
+
+    installToolResultContextGuard({
+      agent,
+      contextWindowTokens: 1_000,
+    });
+
+    // Large user message + large tool result. The guard should compact the tool
+    // result first, then check the overflow threshold. Even after compaction the
+    // user content alone pushes past 90%, so the overflow error fires.
+    const contextForNextCall = [
+      makeUser("u".repeat(3_700)),
+      makeToolResult("call_old", "x".repeat(2_000)),
+    ];
+
+    await expect(
+      agent.transformContext?.(contextForNextCall, new AbortController().signal),
+    ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+
+    // Tool result should have been compacted before the overflow check.
+    const toolResultText = getToolResultText(contextForNextCall[1]);
+    expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
+  });
 });
diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
index 4a3d3482421..1ab23ede3cf 100644
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -14,6 +14,9 @@ import {
 // Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
 const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
 const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
+// High-water mark: if context exceeds this ratio after tool-result compaction,
+// trigger full session compaction via the existing overflow recovery cascade.
+const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
 
 export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]";
 const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
@@ -21,6 +24,9 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
 export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
   "[compacted: tool output removed to free context]";
 
+export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
+  "Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
+
 type GuardableTransformContext = (
   messages: AgentMessage[],
   signal: AbortSignal,
@@ -196,6 +202,10 @@ export function installToolResultContextGuard(params: {
       contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE,
     ),
   );
+  const preemptiveOverflowChars = Math.max(
+    contextBudgetChars,
+    Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
+  );
 
   // Agent.transformContext is private in pi-coding-agent, so access it via a
   // narrow runtime view to keep callsites type-safe while preserving behavior.
@@ -214,6 +224,18 @@ export function installToolResultContextGuard(params: {
       maxSingleToolResultChars,
     });
 
+    // After tool-result compaction, check if context still exceeds the high-water mark.
+    // If it does, non-tool-result content dominates and only full LLM-based session
+    // compaction can reduce context size. Throwing a context overflow error triggers
+    // the existing overflow recovery cascade in run.ts.
+    const postEnforcementChars = estimateContextChars(
+      contextMessages,
+      createMessageCharEstimateCache(),
+    );
+    if (postEnforcementChars > preemptiveOverflowChars) {
+      throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
+    }
+
     return contextMessages;
   }) as GuardableTransformContext;