From 3aa4199ef0987b5b09d076104573fecc7eadeedc Mon Sep 17 00:00:00 2001 From: Keshav Rao Date: Mon, 16 Mar 2026 19:04:00 -0700 Subject: [PATCH] agent: preemptive context overflow detection during tool loops (#29371) Merged via squash. Prepared head SHA: 19661b8fb1e3aea20e438b28e8323d7f42fe01d6 Co-authored-by: keshav55 <3821985+keshav55@users.noreply.github.com> Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com> Reviewed-by: @jalehman --- CHANGELOG.md | 2 + extensions/telegram/src/bot/helpers.test.ts | 54 ++++++++++++++++- .../tool-result-context-guard.test.ts | 60 +++++++++++++++++++ .../tool-result-context-guard.ts | 22 +++++++ 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4192bba536a..d948e2b59ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,6 +105,8 @@ Docs: https://docs.openclaw.ai - Agents/usage tracking: stop forcing `supportsUsageInStreaming: false` on non-native OpenAI-completions providers so compatible backends report token usage and cost again instead of showing all zeros. (#46500) Fixes #46142. Thanks @ademczuk. - Plugins/subagents: preserve gateway-owned plugin subagent access across runtime, tool, and embedded-runner load paths so gateway plugin tools and context engines can still spawn and manage subagents after the loader cache split. (#46648) Thanks @jalehman. - Control UI/overview: keep the language dropdown aligned with the persisted locale during dashboard startup so refreshing the page does not fall back to English before locale hydration completes. (#48019) Thanks @git-jxj. +- Agents/compaction: rerun transcript repair after `session.compact()` so orphaned `tool_result` blocks cannot survive compaction and break later Anthropic requests. (#16095) thanks @claw-sylphx. +- Agents/compaction: trigger overflow recovery from the tool-result guard once post-compaction context still exceeds the safe threshold, so long tool loops compact before the next model call hard-fails. (#29371) thanks @keshav55. ## 2026.3.13 diff --git a/extensions/telegram/src/bot/helpers.test.ts b/extensions/telegram/src/bot/helpers.test.ts index fe30465b40c..5777216f2ac 100644 --- a/extensions/telegram/src/bot/helpers.test.ts +++ b/extensions/telegram/src/bot/helpers.test.ts @@ -1,3 +1,4 @@ +import type { Message } from "grammy/types"; import { describe, expect, it } from "vitest"; import { buildTelegramThreadParams, @@ -404,8 +405,59 @@ describe("hasBotMention", () => { ), ).toBe(true); }); -}); + it("matches mention followed by punctuation", () => { + expect( + hasBotMention( + { + text: "@gaian, what's up?", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(true); + }); + + it("matches mention followed by space", () => { + expect( + hasBotMention( + { + text: "@gaian how are you", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(true); + }); + + it("does not match substring of a longer username", () => { + expect( + hasBotMention( + { + text: "@gaianchat_bot hello", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(false); + }); + + it("does not match when mention is a prefix of another word", () => { + expect( + hasBotMention( + { + text: "@gaianbot do something", + chat: { id: 1, type: "supergroup" }, + // oxlint-disable-next-line typescript/no-explicit-any + } as any, + "gaian", + ), + ).toBe(false); + }); +}); describe("expandTextLinks", () => { it("returns text unchanged when no entities are provided", () => { expect(expandTextLinks("Hello world")).toBe("Hello world"); diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts index df50558e951..9f265d3b56e 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js"; import { CONTEXT_LIMIT_TRUNCATION_NOTICE, + PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER, installToolResultContextGuard, } from "./tool-result-context-guard.js"; @@ -268,4 +269,63 @@ describe("installToolResultContextGuard", () => { expect(oldResult.details).toBeUndefined(); expect(newResult.details).toBeUndefined(); }); + + it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => { + const agent = makeGuardableAgent(); + + installToolResultContextGuard({ + agent, + // contextBudgetChars = 1000 * 4 * 0.75 = 3000 + // preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600 + contextWindowTokens: 1_000, + }); + + // Large user message (non-compactable) pushes context past 90% threshold. + const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")]; + + await expect( + agent.transformContext?.(contextForNextCall, new AbortController().signal), + ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE); + }); + + it("does not throw when context is under 90% after tool-result compaction", async () => { + const agent = makeGuardableAgent(); + + installToolResultContextGuard({ + agent, + contextWindowTokens: 1_000, + }); + + // Context well under the 3600-char preemptive threshold. + const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")]; + + await expect( + agent.transformContext?.(contextForNextCall, new AbortController().signal), + ).resolves.not.toThrow(); + }); + + it("compacts tool results before checking the preemptive overflow threshold", async () => { + const agent = makeGuardableAgent(); + + installToolResultContextGuard({ + agent, + contextWindowTokens: 1_000, + }); + + // Large user message + large tool result. The guard should compact the tool + // result first, then check the overflow threshold. Even after compaction the + // user content alone pushes past 90%, so the overflow error fires. + const contextForNextCall = [ + makeUser("u".repeat(3_700)), + makeToolResult("call_old", "x".repeat(2_000)), + ]; + + await expect( + agent.transformContext?.(contextForNextCall, new AbortController().signal), + ).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE); + + // Tool result should have been compacted before the overflow check. + const toolResultText = getToolResultText(contextForNextCall[1]); + expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER); + }); }); diff --git a/src/agents/pi-embedded-runner/tool-result-context-guard.ts b/src/agents/pi-embedded-runner/tool-result-context-guard.ts index 4a3d3482421..1ab23ede3cf 100644 --- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts +++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts @@ -14,6 +14,9 @@ import { // Keep a conservative input budget to absorb tokenizer variance and provider framing overhead. const CONTEXT_INPUT_HEADROOM_RATIO = 0.75; const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5; +// High-water mark: if context exceeds this ratio after tool-result compaction, +// trigger full session compaction via the existing overflow recovery cascade. +const PREEMPTIVE_OVERFLOW_RATIO = 0.9; export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]"; const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`; @@ -21,6 +24,9 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`; export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER = "[compacted: tool output removed to free context]"; +export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE = + "Preemptive context overflow: estimated context size exceeds safe threshold during tool loop"; + type GuardableTransformContext = ( messages: AgentMessage[], signal: AbortSignal, @@ -196,6 +202,10 @@ export function installToolResultContextGuard(params: { contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE, ), ); + const preemptiveOverflowChars = Math.max( + contextBudgetChars, + Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO), + ); // Agent.transformContext is private in pi-coding-agent, so access it via a // narrow runtime view to keep callsites type-safe while preserving behavior. @@ -214,6 +224,18 @@ export function installToolResultContextGuard(params: { maxSingleToolResultChars, }); + // After tool-result compaction, check if context still exceeds the high-water mark. + // If it does, non-tool-result content dominates and only full LLM-based session + // compaction can reduce context size. Throwing a context overflow error triggers + // the existing overflow recovery cascade in run.ts. + const postEnforcementChars = estimateContextChars( + contextMessages, + createMessageCharEstimateCache(), + ); + if (postEnforcementChars > preemptiveOverflowChars) { + throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE); + } + return contextMessages; }) as GuardableTransformContext;