fix(agents): preserve tool output during context guarding

This commit is contained in:
Peter Steinberger
2026-04-05 21:51:20 +01:00
parent 9b7002ee59
commit 8a43223014
2 changed files with 246 additions and 57 deletions

View File

@@ -4,6 +4,7 @@ import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
import {
CONTEXT_LIMIT_TRUNCATION_NOTICE,
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE,
PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
installToolResultContextGuard,
} from "./tool-result-context-guard.js";
@@ -93,29 +94,37 @@ async function applyGuardToContext(
return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
}
function expectCompactedToolResultsWithoutContextNotice(
contextForNextCall: AgentMessage[],
oldIndex: number,
newIndex: number,
) {
const oldResultText = getToolResultText(contextForNextCall[oldIndex]);
const newResultText = getToolResultText(contextForNextCall[newIndex]);
expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(newResultText).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
function expectReadableCompaction(text: string, prefix: string) {
expect(text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE)).toBe(true);
expect(text).toContain(prefix.repeat(64));
expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(text).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
}
function expectReadableToolSlice(text: string, prefix: string) {
expect(text).toContain(prefix.repeat(64));
expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(
text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE) ||
text.includes(CONTEXT_LIMIT_TRUNCATION_NOTICE),
).toBe(true);
}
describe("installToolResultContextGuard", () => {
it("compacts newest-first when total context overflows, even if each result fits individually", async () => {
it("returns a cloned guarded context so original tool output stays visible", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = makeTwoToolResultOverflowContext();
const transformed = await applyGuardToContext(agent, contextForNextCall);
expect(transformed).toBe(contextForNextCall);
expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
expect(transformed).not.toBe(contextForNextCall);
const transformedMessages = transformed as AgentMessage[];
expectReadableCompaction(getToolResultText(transformedMessages[1]), "x");
expectReadableCompaction(getToolResultText(transformedMessages[2]), "y");
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(1_000));
expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
});
it("keeps compacting newest-first until context is back under budget", async () => {
it("keeps readable slices of overflowing tool results before using a placeholder", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
@@ -130,14 +139,17 @@ describe("installToolResultContextGuard", () => {
makeToolResult("call_3", "c".repeat(800)),
];
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const first = getToolResultText(contextForNextCall[1]);
const second = getToolResultText(contextForNextCall[2]);
const third = getToolResultText(contextForNextCall[3]);
const first = getToolResultText(transformed[1]);
const second = getToolResultText(transformed[2]);
const third = getToolResultText(transformed[3]);
expect(first).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(second).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expectReadableCompaction(first, "a");
expectReadableCompaction(second, "b");
expect(third).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
});
@@ -150,20 +162,24 @@ describe("installToolResultContextGuard", () => {
});
const contextForNextCall: AgentMessage[] = [makeUser("stress")];
let transformed: AgentMessage[] | undefined;
for (let i = 1; i <= 4; i++) {
contextForNextCall.push(makeToolResult(`call_${i}`, String(i).repeat(95_000)));
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
}
const toolResultTexts = contextForNextCall
const toolResultTexts = (transformed ?? [])
.filter((msg) => msg.role === "toolResult")
.map((msg) => getToolResultText(msg as AgentMessage));
// Newest-first compaction: oldest results stay intact to preserve the
// cached prefix; the newest overflowing result is compacted.
expect(toolResultTexts[0]?.length).toBe(95_000);
expect(toolResultTexts[3]).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(toolResultTexts.join("\n")).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
// Large outputs are capped per-tool before aggregate compaction kicks in.
expect(toolResultTexts[0]?.length).toBe(50_000);
expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
expectReadableCompaction(toolResultTexts[3] ?? "", "4");
expect(toolResultTexts[3]).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
});
it("truncates an individually oversized tool result with a context-limit notice", async () => {
@@ -176,9 +192,12 @@ describe("installToolResultContextGuard", () => {
const contextForNextCall = [makeToolResult("call_big", "z".repeat(5_000))];
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const newResultText = getToolResultText(contextForNextCall[0]);
const newResultText = getToolResultText(transformed[0]);
expect(newResultText.length).toBeLessThan(5_000);
expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
});
@@ -197,8 +216,12 @@ describe("installToolResultContextGuard", () => {
makeToolResult("call_new", "y".repeat(1_000)),
];
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
expectCompactedToolResultsWithoutContextNotice(contextForNextCall, 1, 2);
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
expectReadableCompaction(getToolResultText(transformed[1]), "x");
expect(getToolResultText(transformed[2])).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
});
it("wraps an existing transformContext and guards the transformed output", async () => {
@@ -215,7 +238,7 @@ describe("installToolResultContextGuard", () => {
expect(transformed).not.toBe(contextForNextCall);
const transformedMessages = transformed as AgentMessage[];
const oldResultText = getToolResultText(transformedMessages[1]);
expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expectReadableCompaction(oldResultText, "x");
});
it("handles legacy role=tool string outputs when enforcing context budget", async () => {
@@ -232,13 +255,18 @@ describe("installToolResultContextGuard", () => {
makeLegacyToolResult("call_new", "y".repeat(1_000)),
];
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const oldResultText = (contextForNextCall[1] as { content?: unknown }).content;
const newResultText = (contextForNextCall[2] as { content?: unknown }).content;
const oldResultText = (transformed[1] as { content?: unknown }).content;
const newResultText = (transformed[2] as { content?: unknown }).content;
expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(typeof oldResultText).toBe("string");
expect(typeof newResultText).toBe("string");
expect(oldResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
expect(newResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
});
it("drops oversized read-tool details payloads when compacting tool results", async () => {
@@ -255,19 +283,22 @@ describe("installToolResultContextGuard", () => {
makeToolResultWithDetails("call_new", "y".repeat(900), "d".repeat(8_000)),
];
await agent.transformContext?.(contextForNextCall, new AbortController().signal);
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const oldResult = contextForNextCall[1] as {
const oldResult = transformed[1] as {
details?: unknown;
};
const newResult = contextForNextCall[2] as {
const newResult = transformed[2] as {
details?: unknown;
};
const oldResultText = getToolResultText(contextForNextCall[1]);
const newResultText = getToolResultText(contextForNextCall[2]);
const oldResultText = getToolResultText(transformed[1]);
const newResultText = getToolResultText(transformed[2]);
expect(oldResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(newResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expectReadableToolSlice(oldResultText, "x");
expectReadableToolSlice(newResultText, "y");
expect(oldResult.details).toBeUndefined();
expect(newResult.details).toBeUndefined();
});
@@ -322,12 +353,11 @@ describe("installToolResultContextGuard", () => {
makeToolResult("call_old", "x".repeat(2_000)),
];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
const guarded = agent.transformContext?.(contextForNextCall, new AbortController().signal);
await expect(guarded).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
// Tool result should have been compacted before the overflow check.
const toolResultText = getToolResultText(contextForNextCall[1]);
expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(toolResultText).toBe("x".repeat(2_000));
});
});

View File

@@ -10,6 +10,7 @@ import {
invalidateMessageCharsCacheEntry,
isToolResultMessage,
} from "./tool-result-char-estimator.js";
import { truncateToolResultText } from "./tool-result-truncation.js";
// Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
@@ -23,10 +24,20 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
"[compacted: tool output removed to free context]";
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE =
"[compacted: tool output trimmed to free context]";
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
"Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
const PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX = `\n${PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE}`;
const MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS = 96;
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
const MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS = Math.ceil(
MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO,
);
type GuardableTransformContext = (
messages: AgentMessage[],
signal: AbortSignal,
@@ -74,7 +85,11 @@ function replaceToolResultText(msg: AgentMessage, text: string): AgentMessage {
} as AgentMessage;
}
function truncateToolResultToChars(
function estimateBudgetToTextBudget(maxChars: number): number {
return Math.max(0, Math.floor(maxChars / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO));
}
function compactToolResultToEstimateBudget(
msg: AgentMessage,
maxChars: number,
cache: MessageCharEstimateCache,
@@ -90,14 +105,36 @@ function truncateToolResultToChars(
const rawText = getToolResultText(msg);
if (!rawText) {
return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const truncatedText = truncateTextToBudget(rawText, maxChars);
return replaceToolResultText(msg, truncatedText);
const textBudget = estimateBudgetToTextBudget(maxChars);
if (textBudget <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const maxCompactedTextChars = Math.max(MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, textBudget);
if (maxCompactedTextChars <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const minKeepChars = Math.max(
96,
Math.min(
MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS,
maxCompactedTextChars - PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX.length - 1,
),
);
const compactedText = truncateToolResultText(rawText, maxCompactedTextChars, {
suffix: PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX,
minKeepChars,
});
return replaceToolResultText(msg, compactedText);
}
function compactExistingToolResultsInPlace(params: {
function compactToPlaceholderInPlace(params: {
messages: AgentMessage[];
charsNeeded: number;
cache: MessageCharEstimateCache;
@@ -108,9 +145,6 @@ function compactExistingToolResultsInPlace(params: {
}
let reduced = 0;
// Compact newest-first so more of the cached prefix survives: rewriting
// messages[k] for small k invalidates the provider prompt cache from that point onward.
// Tradeoff: the model loses recent tool output instead of old.
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (!isToolResultMessage(msg)) {
@@ -138,6 +172,124 @@ function compactExistingToolResultsInPlace(params: {
return reduced;
}
function truncateToolResultToChars(
msg: AgentMessage,
maxChars: number,
cache: MessageCharEstimateCache,
): AgentMessage {
if (!isToolResultMessage(msg)) {
return msg;
}
const estimatedChars = estimateMessageCharsCached(msg, cache);
if (estimatedChars <= maxChars) {
return msg;
}
const rawText = getToolResultText(msg);
if (!rawText) {
return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
}
const textBudget = estimateBudgetToTextBudget(maxChars);
if (textBudget <= 0) {
return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
}
if (rawText.length <= textBudget) {
return replaceToolResultText(msg, rawText);
}
const truncatedText = truncateTextToBudget(rawText, textBudget);
return replaceToolResultText(msg, truncatedText);
}
function compactExistingToolResultsInPlace(params: {
messages: AgentMessage[];
charsNeeded: number;
cache: MessageCharEstimateCache;
}): number {
const { messages, charsNeeded, cache } = params;
if (charsNeeded <= 0) {
return 0;
}
let reduced = 0;
// Compact newest-first so more of the cached prefix survives: rewriting
// messages[k] for small k invalidates the provider prompt cache from that point onward.
// Keep a truncated slice of newer tool output before falling back to a
// full placeholder so recent, user-visible results remain readable when possible.
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (!isToolResultMessage(msg)) {
continue;
}
const before = estimateMessageCharsCached(msg, cache);
if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
continue;
}
const targetAfter = Math.max(
MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS,
before - (charsNeeded - reduced),
);
let compacted = compactToolResultToEstimateBudget(msg, targetAfter, cache);
let after = estimateMessageCharsCached(compacted, cache);
if (after >= before) {
compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
after = estimateMessageCharsCached(compacted, cache);
}
applyMessageMutationInPlace(msg, compacted, cache);
if (after >= before) {
continue;
}
reduced += before - after;
if (reduced >= charsNeeded) {
break;
}
}
if (reduced < charsNeeded) {
reduced += compactToPlaceholderInPlace({
messages,
charsNeeded: charsNeeded - reduced,
cache,
});
}
return reduced;
}
function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
return messages.map(
(msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
);
}
function contextNeedsToolResultCompaction(params: {
messages: AgentMessage[];
contextBudgetChars: number;
maxSingleToolResultChars: number;
}): boolean {
const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
const estimateCache = createMessageCharEstimateCache();
let sawToolResult = false;
for (const message of messages) {
if (!isToolResultMessage(message)) {
continue;
}
sawToolResult = true;
if (estimateMessageCharsCached(message, estimateCache) > maxSingleToolResultChars) {
return true;
}
}
return sawToolResult && estimateContextChars(messages, estimateCache) > contextBudgetChars;
}
function applyMessageMutationInPlace(
target: AgentMessage,
source: AgentMessage,
@@ -221,7 +373,14 @@ export function installToolResultContextGuard(params: {
? await originalTransformContext.call(mutableAgent, messages, signal)
: messages;
const contextMessages = Array.isArray(transformed) ? transformed : messages;
const sourceMessages = Array.isArray(transformed) ? transformed : messages;
const contextMessages = contextNeedsToolResultCompaction({
messages: sourceMessages,
contextBudgetChars,
maxSingleToolResultChars,
})
? cloneMessagesForGuard(sourceMessages)
: sourceMessages;
enforceToolResultContextBudgetInPlace({
messages: contextMessages,
contextBudgetChars,