align tool-result truncation with pi semantics

This commit is contained in:
Tak Hoffman
2026-04-05 22:11:09 -05:00
committed by Peter Steinberger
parent 5c1b1eb169
commit 7fc1a74ee9
9 changed files with 514 additions and 622 deletions

View File

@@ -1,4 +1,4 @@
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import {
makeAttemptResult,
makeCompactionSuccess,
@@ -10,9 +10,11 @@ import {
loadRunOverflowCompactionHarness,
mockedContextEngine,
mockedCompactDirect,
mockedEvaluateContextWindowGuard,
mockedIsCompactionFailureError,
mockedIsLikelyContextOverflowError,
mockedLog,
mockedResolveContextWindowInfo,
mockedRunEmbeddedAttempt,
mockedSessionLikelyHasOversizedToolResults,
mockedTruncateOversizedToolResultsInSession,
@@ -172,6 +174,86 @@ describe("overflow compaction in run loop", () => {
expect(result.meta.error).toBeUndefined();
});
it("falls back to tool-result truncation and retries when real aggregate tool-result detection trips", async () => {
const { sessionLikelyHasOversizedToolResults } = await vi.importActual<
typeof import("./tool-result-truncation.js")
>("./tool-result-truncation.js");
mockedResolveContextWindowInfo.mockReturnValue({
tokens: 10_000,
source: "model",
});
mockedEvaluateContextWindowGuard.mockReturnValue({
shouldWarn: false,
shouldBlock: false,
tokens: 10_000,
source: "model",
});
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(
makeAttemptResult({
promptError: makeOverflowError(),
messagesSnapshot: [
{
role: "user",
content: "u".repeat(20_000),
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_a",
toolName: "read",
content: [{ type: "text", text: "a".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_b",
toolName: "read",
content: [{ type: "text", text: "b".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_c",
toolName: "read",
content: [{ type: "text", text: "c".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
],
}),
)
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockImplementation(
((params: Parameters<typeof sessionLikelyHasOversizedToolResults>[0]) =>
sessionLikelyHasOversizedToolResults(params)) as never,
);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 2,
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
expect.objectContaining({ contextWindowTokens: 10_000 }),
);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(mockedLog.info).toHaveBeenCalledWith(
expect.stringContaining("Truncated 2 tool result(s)"),
);
expect(result.meta.error).toBeUndefined();
});
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = makeOverflowError();

View File

@@ -4,8 +4,7 @@ import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
import {
CONTEXT_LIMIT_TRUNCATION_NOTICE,
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE,
PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
formatContextLimitTruncationNotice,
installToolResultContextGuard,
} from "./tool-result-context-guard.js";
@@ -61,6 +60,9 @@ function makeToolResultWithDetails(id: string, text: string, detailText: string)
function getToolResultText(msg: AgentMessage): string {
const content = (msg as { content?: unknown }).content;
if (typeof content === "string") {
return content;
}
if (!Array.isArray(content)) {
return "";
}
@@ -79,342 +81,164 @@ function makeGuardableAgent(
return { transformContext };
}
function makeTwoToolResultOverflowContext(): AgentMessage[] {
return [
makeUser("u".repeat(2_000)),
makeToolResult("call_old", "x".repeat(1_000)),
makeToolResult("call_new", "y".repeat(1_000)),
];
}
async function applyGuardToContext(
agent: { transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown },
contextForNextCall: AgentMessage[],
contextWindowTokens = 1_000,
) {
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
contextWindowTokens,
});
return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
}
function expectReadableCompaction(text: string, prefix: string) {
expect(text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE)).toBe(true);
expect(text).toContain(prefix.repeat(64));
expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(text).not.toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
function expectPiStyleTruncation(text: string): void {
expect(text).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/);
expect(text).not.toContain("[compacted: tool output removed to free context]");
expect(text).not.toContain("[compacted: tool output trimmed to free context]");
expect(text).not.toContain("[truncated: output exceeded context limit]");
}
function expectReadableToolSlice(text: string, prefix: string) {
expect(text).toContain(prefix.repeat(64));
expect(text).not.toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
expect(
text.includes(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE) ||
text.includes(CONTEXT_LIMIT_TRUNCATION_NOTICE),
).toBe(true);
}
function expectCompactedOrPlaceholder(text: string, prefix: string) {
if (text === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER) {
return;
}
expectReadableCompaction(text, prefix);
}
describe("formatContextLimitTruncationNotice", () => {
it("formats pi-style truncation wording with a count", () => {
expect(formatContextLimitTruncationNotice(123)).toBe("[... 123 more characters truncated]");
});
});
describe("installToolResultContextGuard", () => {
it("returns a cloned guarded context so original tool output stays visible", async () => {
it("passes through unchanged context when under the per-tool and total budget", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = makeTwoToolResultOverflowContext();
const contextForNextCall = [makeUser("hello"), makeToolResult("call_ok", "small output")];
const transformed = await applyGuardToContext(agent, contextForNextCall);
expect(transformed).not.toBe(contextForNextCall);
const transformedMessages = transformed as AgentMessage[];
expectReadableCompaction(getToolResultText(transformedMessages[1]), "x");
expectReadableCompaction(getToolResultText(transformedMessages[2]), "y");
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(1_000));
expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(1_000));
expect(transformed).toBe(contextForNextCall);
});
it("keeps at least one readable older slice before falling back to a placeholder", async () => {
it("does not preemptively overflow large non-tool context that is still under the high-water mark", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [makeUser("u".repeat(3_200))];
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const transformed = await applyGuardToContext(agent, contextForNextCall);
const contextForNextCall = [
makeUser("u".repeat(2_200)),
makeToolResult("call_1", "a".repeat(800)),
makeToolResult("call_2", "b".repeat(800)),
makeToolResult("call_3", "c".repeat(800)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const first = getToolResultText(transformed[1]);
const second = getToolResultText(transformed[2]);
const third = getToolResultText(transformed[3]);
expectReadableCompaction(first, "a");
expectReadableCompaction(third, "c");
expect(
second === "b".repeat(800) || second === PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
).toBe(true);
expect(transformed).toBe(contextForNextCall);
});
it("keeps the newest large tool result visible when an older one can absorb overflow", async () => {
it("returns a cloned guarded context so original oversized tool output stays visible", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 100_000,
});
const contextForNextCall: AgentMessage[] = [makeUser("stress")];
let transformed: AgentMessage[] | undefined;
for (let i = 1; i <= 4; i++) {
contextForNextCall.push(makeToolResult(`call_${i}`, String(i).repeat(95_000)));
transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
}
const toolResultTexts = (transformed ?? [])
.filter((msg) => msg.role === "toolResult")
.map((msg) => getToolResultText(msg as AgentMessage));
expect(toolResultTexts[0]).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
expectReadableCompaction(toolResultTexts[1] ?? "", "2");
expectReadableCompaction(toolResultTexts[2] ?? "", "3");
expectReadableToolSlice(toolResultTexts[3] ?? "", "4");
});
it("truncates an individually oversized tool result with a context-limit notice", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [makeToolResult("call_big", "z".repeat(5_000))];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
expect(transformed).not.toBe(contextForNextCall);
const newResultText = getToolResultText(transformed[0]);
expect(newResultText.length).toBeLessThan(5_000);
expect(newResultText).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
});
it("falls back to compacting the newest tool result when older ones are insufficient", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(2_600)),
makeToolResult("call_old", "x".repeat(700)),
makeToolResult("call_new", "y".repeat(1_000)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
expectCompactedOrPlaceholder(getToolResultText(transformed[1]), "x");
expectCompactedOrPlaceholder(getToolResultText(transformed[2]), "y");
expectPiStyleTruncation(newResultText);
expect(getToolResultText(contextForNextCall[0])).toBe("z".repeat(5_000));
});
it("wraps an existing transformContext and guards the transformed output", async () => {
const agent = makeGuardableAgent((messages) => {
return messages.map((msg) =>
const agent = makeGuardableAgent((messages) =>
messages.map((msg) =>
castAgentMessage({
...(msg as unknown as Record<string, unknown>),
}),
);
});
const contextForNextCall = makeTwoToolResultOverflowContext();
const transformed = await applyGuardToContext(agent, contextForNextCall);
),
);
const contextForNextCall = [makeToolResult("call_big", "x".repeat(5_000))];
const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
expect(transformed).not.toBe(contextForNextCall);
const transformedMessages = transformed as AgentMessage[];
const oldResultText = getToolResultText(transformedMessages[1]);
expectReadableCompaction(oldResultText, "x");
expectPiStyleTruncation(getToolResultText(transformed[0]));
});
it("handles legacy role=tool string outputs when enforcing context budget", async () => {
it("handles legacy role=tool string outputs with pi-style truncation wording", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [makeLegacyToolResult("call_big", "y".repeat(5_000))];
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
const newResultText = getToolResultText(transformed[0]);
expect(typeof (transformed[0] as { content?: unknown }).content).toBe("string");
expectPiStyleTruncation(newResultText);
});
it("drops oversized tool-result details when truncating once", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [
makeUser("u".repeat(2_000)),
makeLegacyToolResult("call_old", "x".repeat(1_000)),
makeLegacyToolResult("call_new", "y".repeat(1_000)),
makeToolResultWithDetails("call_big", "x".repeat(900), "d".repeat(8_000)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
const transformed = (await applyGuardToContext(agent, contextForNextCall)) as AgentMessage[];
const result = transformed[0] as { details?: unknown };
const newResultText = getToolResultText(transformed[0]);
const oldResultText = (transformed[1] as { content?: unknown }).content;
const newResultText = (transformed[2] as { content?: unknown }).content;
expect(typeof oldResultText).toBe("string");
expect(typeof newResultText).toBe("string");
expect(oldResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
expect(newResultText).toContain(PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE);
expectPiStyleTruncation(newResultText);
expect(result.details).toBeUndefined();
expect((contextForNextCall[0] as { details?: unknown }).details).toBeDefined();
});
it("drops oversized read-tool details payloads when compacting tool results", async () => {
it("throws overflow when total context exceeds the budget after one-time truncation", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(1_600)),
makeToolResultWithDetails("call_old", "x".repeat(900), "d".repeat(8_000)),
makeToolResultWithDetails("call_new", "y".repeat(900), "d".repeat(8_000)),
makeUser("u".repeat(2_800)),
makeToolResult("call_ok", "x".repeat(500)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
)) as AgentMessage[];
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
const oldResult = transformed[1] as {
details?: unknown;
};
const newResult = transformed[2] as {
details?: unknown;
};
const oldResultText = getToolResultText(transformed[1]);
const newResultText = getToolResultText(transformed[2]);
expectReadableToolSlice(oldResultText, "x");
expectReadableToolSlice(newResultText, "y");
expect(oldResult.details).toBeUndefined();
expect(newResult.details).toBeUndefined();
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(500));
});
it("throws overflow instead of compacting the latest read result during aggregate compaction", async () => {
it("throws overflow instead of historically rewriting older tool results", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(2_600)),
makeToolResult("call_old", "x".repeat(300)),
makeUser("u".repeat(2_200)),
makeToolResult("call_1", "a".repeat(500)),
makeToolResult("call_2", "b".repeat(500)),
makeToolResult("call_3", "c".repeat(500)),
];
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
expect(getToolResultText(contextForNextCall[1])).toBe("a".repeat(500));
expect(getToolResultText(contextForNextCall[2])).toBe("b".repeat(500));
expect(getToolResultText(contextForNextCall[3])).toBe("c".repeat(500));
});
it("throws overflow instead of special-casing the latest read result", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [
makeUser("u".repeat(2_900)),
makeToolResult("call_old", "x".repeat(400)),
makeReadToolResult("call_new", "y".repeat(500)),
];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(300));
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(400));
expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(500));
});
it("keeps the latest read result when older outputs absorb the aggregate overflow", async () => {
it("supports model-window-specific truncation for large but otherwise valid tool results", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [makeToolResult("call_big", "q".repeat(95_000))];
installToolResultContextGuard({
const transformed = (await applyGuardToContext(
agent,
contextWindowTokens: 1_000,
});
const contextForNextCall = [
makeUser("u".repeat(1_400)),
makeToolResult("call_old_1", "a".repeat(350)),
makeToolResult("call_old_2", "b".repeat(350)),
makeReadToolResult("call_new", "c".repeat(500)),
];
const transformed = (await agent.transformContext?.(
contextForNextCall,
new AbortController().signal,
100_000,
)) as AgentMessage[];
expect(getToolResultText(transformed[3])).toBe("c".repeat(500));
});
it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
// contextBudgetChars = 1000 * 4 * 0.75 = 3000
// preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600
contextWindowTokens: 1_000,
});
// Large user message (non-compactable) pushes context past 90% threshold.
const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
});
it("does not throw when context is under 90% after tool-result compaction", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
// Context well under the 3600-char preemptive threshold.
const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).resolves.not.toThrow();
});
it("compacts tool results before checking the preemptive overflow threshold", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
// Large user message + large tool result. The guard should compact the tool
// result first, then check the overflow threshold. Even after compaction the
// user content alone pushes past 90%, so the overflow error fires.
const contextForNextCall = [
makeUser("u".repeat(3_700)),
makeToolResult("call_old", "x".repeat(2_000)),
];
const guarded = agent.transformContext?.(contextForNextCall, new AbortController().signal);
await expect(guarded).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
// Tool result should have been compacted before the overflow check.
const toolResultText = getToolResultText(contextForNextCall[1]);
expect(toolResultText).toBe("x".repeat(2_000));
expectPiStyleTruncation(getToolResultText(transformed[0]));
});
});

View File

@@ -10,33 +10,18 @@ import {
invalidateMessageCharsCacheEntry,
isToolResultMessage,
} from "./tool-result-char-estimator.js";
import { truncateToolResultText } from "./tool-result-truncation.js";
// Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
// High-water mark: if context exceeds this ratio after tool-result compaction,
// trigger full session compaction via the existing overflow recovery cascade.
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]";
const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
"[compacted: tool output removed to free context]";
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE =
"[compacted: tool output trimmed to free context]";
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated";
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
"Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
const PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX = `\n${PREEMPTIVE_TOOL_RESULT_COMPACTION_NOTICE}`;
const MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS = 96;
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
const MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS = Math.ceil(
MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO,
);
type GuardableTransformContext = (
messages: AgentMessage[],
@@ -49,19 +34,8 @@ type GuardableAgentRecord = {
transformContext?: GuardableTransformContext;
};
function getToolResultName(msg: AgentMessage): string | undefined {
const toolName = (msg as { toolName?: unknown }).toolName;
if (typeof toolName === "string" && toolName.trim().length > 0) {
return toolName;
}
const legacyToolName = (msg as { tool_name?: unknown }).tool_name;
return typeof legacyToolName === "string" && legacyToolName.trim().length > 0
? legacyToolName
: undefined;
}
function isReadToolResultMessage(msg: AgentMessage): boolean {
return isToolResultMessage(msg) && getToolResultName(msg) === "read";
export function formatContextLimitTruncationNotice(truncatedChars: number): string {
return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`;
}
function truncateTextToBudget(text: string, maxChars: number): string {
@@ -70,21 +44,25 @@ function truncateTextToBudget(text: string, maxChars: number): string {
}
if (maxChars <= 0) {
return CONTEXT_LIMIT_TRUNCATION_NOTICE;
return formatContextLimitTruncationNotice(text.length);
}
const bodyBudget = Math.max(0, maxChars - CONTEXT_LIMIT_TRUNCATION_SUFFIX.length);
if (bodyBudget <= 0) {
return CONTEXT_LIMIT_TRUNCATION_NOTICE;
let bodyBudget = maxChars;
for (let i = 0; i < 4; i += 1) {
const estimatedSuffix = formatContextLimitTruncationNotice(
Math.max(1, text.length - bodyBudget),
);
bodyBudget = Math.max(0, maxChars - estimatedSuffix.length);
}
let cutPoint = bodyBudget;
const newline = text.lastIndexOf("\n", bodyBudget);
const newline = text.lastIndexOf("\n", cutPoint);
if (newline > bodyBudget * 0.7) {
cutPoint = newline;
}
return text.slice(0, cutPoint) + CONTEXT_LIMIT_TRUNCATION_SUFFIX;
const omittedChars = text.length - cutPoint;
return text.slice(0, cutPoint) + formatContextLimitTruncationNotice(omittedChars);
}
function replaceToolResultText(msg: AgentMessage, text: string): AgentMessage {
@@ -104,89 +82,6 @@ function estimateBudgetToTextBudget(maxChars: number): number {
return Math.max(0, Math.floor(maxChars / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO));
}
function compactToolResultToEstimateBudget(
msg: AgentMessage,
maxChars: number,
cache: MessageCharEstimateCache,
): AgentMessage {
if (!isToolResultMessage(msg)) {
return msg;
}
const estimatedChars = estimateMessageCharsCached(msg, cache);
if (estimatedChars <= maxChars) {
return msg;
}
const rawText = getToolResultText(msg);
if (!rawText) {
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const textBudget = estimateBudgetToTextBudget(maxChars);
if (textBudget <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const maxCompactedTextChars = Math.max(MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS, textBudget);
if (maxCompactedTextChars <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
return replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
}
const minKeepChars = Math.max(
96,
Math.min(
MIN_COMPACTED_TOOL_RESULT_TEXT_CHARS,
maxCompactedTextChars - PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX.length - 1,
),
);
const compactedText = truncateToolResultText(rawText, maxCompactedTextChars, {
suffix: PREEMPTIVE_TOOL_RESULT_COMPACTION_SUFFIX,
minKeepChars,
});
return replaceToolResultText(msg, compactedText);
}
function compactToPlaceholderInPlace(params: {
messages: AgentMessage[];
charsNeeded: number;
cache: MessageCharEstimateCache;
}): number {
const { messages, charsNeeded, cache } = params;
if (charsNeeded <= 0) {
return 0;
}
let reduced = 0;
for (const i of resolveToolResultCompactionOrder(messages)) {
const msg = messages[i];
if (!isToolResultMessage(msg)) {
continue;
}
const before = estimateMessageCharsCached(msg, cache);
if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
continue;
}
const compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
applyMessageMutationInPlace(msg, compacted, cache);
const after = estimateMessageCharsCached(msg, cache);
if (after >= before) {
continue;
}
reduced += before - after;
if (reduced >= charsNeeded) {
break;
}
}
return reduced;
}
function truncateToolResultToChars(
msg: AgentMessage,
maxChars: number,
@@ -203,12 +98,16 @@ function truncateToolResultToChars(
const rawText = getToolResultText(msg);
if (!rawText) {
return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
const omittedChars = Math.max(
1,
estimateBudgetToTextBudget(Math.max(estimatedChars - maxChars, 1)),
);
return replaceToolResultText(msg, formatContextLimitTruncationNotice(omittedChars));
}
const textBudget = estimateBudgetToTextBudget(maxChars);
if (textBudget <= 0) {
return replaceToolResultText(msg, CONTEXT_LIMIT_TRUNCATION_NOTICE);
return replaceToolResultText(msg, formatContextLimitTruncationNotice(rawText.length));
}
if (rawText.length <= textBudget) {
@@ -219,163 +118,27 @@ function truncateToolResultToChars(
return replaceToolResultText(msg, truncatedText);
}
function compactExistingToolResultsInPlace(params: {
messages: AgentMessage[];
charsNeeded: number;
cache: MessageCharEstimateCache;
}): number {
const { messages, charsNeeded, cache } = params;
if (charsNeeded <= 0) {
return 0;
}
let reduced = 0;
// Keep the most recent tool result visible as long as older tool outputs can
// absorb the overflow. Among older tool results, compact newest-first so we
// still preserve as much of the cached prefix as possible.
for (const i of resolveToolResultCompactionOrder(messages)) {
const msg = messages[i];
if (!isToolResultMessage(msg)) {
continue;
}
const before = estimateMessageCharsCached(msg, cache);
if (before <= PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER.length) {
continue;
}
const targetAfter = Math.max(
MIN_COMPACTED_TOOL_RESULT_ESTIMATE_CHARS,
before - (charsNeeded - reduced),
);
let compacted = compactToolResultToEstimateBudget(msg, targetAfter, cache);
let after = estimateMessageCharsCached(compacted, cache);
if (after >= before) {
compacted = replaceToolResultText(msg, PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
after = estimateMessageCharsCached(compacted, cache);
}
applyMessageMutationInPlace(msg, compacted, cache);
if (after >= before) {
continue;
}
reduced += before - after;
if (reduced >= charsNeeded) {
break;
}
}
if (reduced < charsNeeded) {
reduced += compactToPlaceholderInPlace({
messages,
charsNeeded: charsNeeded - reduced,
cache,
});
}
return reduced;
}
function resolveToolResultCompactionOrder(messages: AgentMessage[]): number[] {
const toolResultIndexes: number[] = [];
for (let i = 0; i < messages.length; i += 1) {
if (isToolResultMessage(messages[i])) {
toolResultIndexes.push(i);
}
}
if (toolResultIndexes.length <= 1) {
return toolResultIndexes;
}
const newestIndex = toolResultIndexes[toolResultIndexes.length - 1];
const olderIndexes = toolResultIndexes.slice(0, -1).toReversed();
return [...olderIndexes, newestIndex];
}
function getNewestToolResultIndex(messages: AgentMessage[]): number | undefined {
for (let i = messages.length - 1; i >= 0; i -= 1) {
if (isToolResultMessage(messages[i])) {
return i;
}
}
return undefined;
}
function shouldPreferOverflowForLatestRead(params: {
messages: AgentMessage[];
contextBudgetChars: number;
maxSingleToolResultChars: number;
}): boolean {
const newestToolResultIndex = getNewestToolResultIndex(params.messages);
if (newestToolResultIndex === undefined) {
return false;
}
const newestToolResult = params.messages[newestToolResultIndex];
if (!isReadToolResultMessage(newestToolResult)) {
return false;
}
const initialCache = createMessageCharEstimateCache();
if (
estimateMessageCharsCached(newestToolResult, initialCache) > params.maxSingleToolResultChars
) {
return false;
}
const simulatedMessages = cloneMessagesForGuard(params.messages);
const estimateCache = createMessageCharEstimateCache();
for (const message of simulatedMessages) {
if (!isToolResultMessage(message)) {
continue;
}
const truncated = truncateToolResultToChars(
message,
params.maxSingleToolResultChars,
estimateCache,
);
applyMessageMutationInPlace(message, truncated, estimateCache);
}
const currentChars = estimateContextChars(simulatedMessages, estimateCache);
if (currentChars <= params.contextBudgetChars) {
return false;
}
const newestToolResultAfterPerToolLimit = simulatedMessages[newestToolResultIndex];
const newestToolResultTextBefore = getToolResultText(newestToolResultAfterPerToolLimit);
compactExistingToolResultsInPlace({
messages: simulatedMessages,
charsNeeded: currentChars - params.contextBudgetChars,
cache: estimateCache,
});
return getToolResultText(simulatedMessages[newestToolResultIndex]) !== newestToolResultTextBefore;
}
function cloneMessagesForGuard(messages: AgentMessage[]): AgentMessage[] {
return messages.map(
(msg) => ({ ...(msg as unknown as Record<string, unknown>) }) as unknown as AgentMessage,
);
}
function contextNeedsToolResultCompaction(params: {
function toolResultsNeedTruncation(params: {
messages: AgentMessage[];
contextBudgetChars: number;
maxSingleToolResultChars: number;
}): boolean {
const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
const { messages, maxSingleToolResultChars } = params;
const estimateCache = createMessageCharEstimateCache();
let sawToolResult = false;
for (const message of messages) {
if (!isToolResultMessage(message)) {
continue;
}
sawToolResult = true;
if (estimateMessageCharsCached(message, estimateCache) > maxSingleToolResultChars) {
return true;
}
}
return sawToolResult && estimateContextChars(messages, estimateCache) > contextBudgetChars;
return false;
}
function applyMessageMutationInPlace(
@@ -400,15 +163,13 @@ function applyMessageMutationInPlace(
}
}
function enforceToolResultContextBudgetInPlace(params: {
function enforceToolResultLimitInPlace(params: {
messages: AgentMessage[];
contextBudgetChars: number;
maxSingleToolResultChars: number;
}): void {
const { messages, contextBudgetChars, maxSingleToolResultChars } = params;
const { messages, maxSingleToolResultChars } = params;
const estimateCache = createMessageCharEstimateCache();
// Ensure each tool result has an upper bound before considering total context usage.
for (const message of messages) {
if (!isToolResultMessage(message)) {
continue;
@@ -416,19 +177,6 @@ function enforceToolResultContextBudgetInPlace(params: {
const truncated = truncateToolResultToChars(message, maxSingleToolResultChars, estimateCache);
applyMessageMutationInPlace(message, truncated, estimateCache);
}
let currentChars = estimateContextChars(messages, estimateCache);
if (currentChars <= contextBudgetChars) {
return;
}
// Prefer compacting older tool outputs before sacrificing the newest one;
// stop once the context is back under budget.
compactExistingToolResultsInPlace({
messages,
charsNeeded: currentChars - contextBudgetChars,
cache: estimateCache,
});
}
export function installToolResultContextGuard(params: {
@@ -440,16 +188,16 @@ export function installToolResultContextGuard(params: {
1_024,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
);
const preemptiveOverflowChars = Math.max(
contextBudgetChars,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
const maxSingleToolResultChars = Math.max(
1_024,
Math.floor(
contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE,
),
);
const preemptiveOverflowChars = Math.max(
contextBudgetChars,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
// Agent.transformContext is private in pi-coding-agent, so access it via a
// narrow runtime view to keep callsites type-safe while preserving behavior.
@@ -462,32 +210,19 @@ export function installToolResultContextGuard(params: {
: messages;
const sourceMessages = Array.isArray(transformed) ? transformed : messages;
if (
shouldPreferOverflowForLatestRead({
messages: sourceMessages,
contextBudgetChars,
maxSingleToolResultChars,
})
) {
throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
}
const contextMessages = contextNeedsToolResultCompaction({
const contextMessages = toolResultsNeedTruncation({
messages: sourceMessages,
contextBudgetChars,
maxSingleToolResultChars,
})
? cloneMessagesForGuard(sourceMessages)
: sourceMessages;
enforceToolResultContextBudgetInPlace({
messages: contextMessages,
contextBudgetChars,
maxSingleToolResultChars,
});
if (contextMessages !== sourceMessages) {
enforceToolResultLimitInPlace({
messages: contextMessages,
maxSingleToolResultChars,
});
}
// After tool-result compaction, check if context still exceeds the high-water mark.
// If it does, non-tool-result content dominates and only full LLM-based session
// compaction can reduce context size. Throwing a context overflow error triggers
// the existing overflow recovery cascade in run.ts.
const postEnforcementChars = estimateContextChars(
contextMessages,
createMessageCharEstimateCache(),

View File

@@ -342,6 +342,112 @@ describe("truncateOversizedToolResultsInSession", () => {
openSpy.mockRestore();
}
});
it("rewrites aggregate medium tool results when their combined size still overflows the session", async () => {
const sessionFile = "/tmp/tool-result-truncation-aggregate-session.jsonl";
const sessionManager = SessionManager.inMemory();
sessionManager.appendMessage(makeUserMessage("u".repeat(20_000)));
sessionManager.appendMessage(makeAssistantMessage("reading files"));
sessionManager.appendMessage(makeToolResult("a".repeat(10_000)));
sessionManager.appendMessage(makeToolResult("b".repeat(10_000)));
sessionManager.appendMessage(makeToolResult("c".repeat(10_000)));
const openSpy = vi
.spyOn(SessionManager, "open")
.mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
try {
const result = await truncateOversizedToolResultsInSession({
sessionFile,
contextWindowTokens: 10_000,
sessionKey: "agent:main:aggregate-test",
});
expect(result.truncated).toBe(true);
expect(result.truncatedCount).toBeGreaterThan(0);
const branch = sessionManager.getBranch();
const toolTexts = branch
.filter((entry) => entry.type === "message" && entry.message.role === "toolResult")
.map((entry) =>
entry.type === "message" && entry.message.role === "toolResult"
? getFirstToolResultText(entry.message)
: "",
);
expect(toolTexts.some((text) => text.includes("truncated"))).toBe(true);
expect(toolTexts.some((text) => text.length < 10_000)).toBe(true);
} finally {
openSpy.mockRestore();
}
});
it("lets a retry pass the real guard after aggregate session rewrite", async () => {
const { PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE, installToolResultContextGuard } =
await import("./tool-result-context-guard.js");
const sessionFile = "/tmp/tool-result-truncation-seam-session.jsonl";
const contextWindowTokens = 10_000;
const originalMessages = [
makeUserMessage("u".repeat(20_000)),
makeAssistantMessage("reading files"),
makeToolResult("a".repeat(10_000), "call_a"),
makeToolResult("b".repeat(10_000), "call_b"),
makeToolResult("c".repeat(10_000), "call_c"),
];
const guardAgent = {};
installToolResultContextGuard({ agent: guardAgent, contextWindowTokens });
await expect(
(
guardAgent as {
transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
}
).transformContext?.(originalMessages, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
expect(
sessionLikelyHasOversizedToolResults({
messages: originalMessages,
contextWindowTokens,
}),
).toBe(true);
const sessionManager = SessionManager.inMemory();
for (const message of originalMessages) {
sessionManager.appendMessage(message);
}
const openSpy = vi
.spyOn(SessionManager, "open")
.mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
try {
const rewriteResult = await truncateOversizedToolResultsInSession({
sessionFile,
contextWindowTokens,
sessionKey: "agent:main:seam-test",
});
expect(rewriteResult.truncated).toBe(true);
expect(rewriteResult.truncatedCount).toBeGreaterThan(0);
const rewrittenMessages = sessionManager
.getBranch()
.filter((entry) => entry.type === "message")
.map((entry) => (entry.type === "message" ? entry.message : null))
.filter((message): message is AgentMessage => message !== null);
const retryAgent = {};
installToolResultContextGuard({ agent: retryAgent, contextWindowTokens });
await expect(
(
retryAgent as {
transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
}
).transformContext?.(rewrittenMessages, new AbortController().signal),
).resolves.toBeDefined();
} finally {
openSpy.mockRestore();
}
});
});
describe("sessionLikelyHasOversizedToolResults", () => {
@@ -365,6 +471,21 @@ describe("sessionLikelyHasOversizedToolResults", () => {
).toBe(true);
});
it("returns true when several medium tool results exceed the aggregate overflow budget", () => {
const messages = [
makeUserMessage("u".repeat(20_000)),
makeToolResult("a".repeat(10_000)),
makeToolResult("b".repeat(10_000)),
makeToolResult("c".repeat(10_000)),
];
expect(
sessionLikelyHasOversizedToolResults({
messages,
contextWindowTokens: 10_000,
}),
).toBe(true);
});
it("returns false for empty messages", () => {
expect(
sessionLikelyHasOversizedToolResults({

View File

@@ -4,6 +4,12 @@ import { SessionManager } from "@mariozechner/pi-coding-agent";
import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js";
import { acquireSessionWriteLock } from "../session-write-lock.js";
import { log } from "./logger.js";
import {
CHARS_PER_TOKEN_ESTIMATE,
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
createMessageCharEstimateCache,
estimateContextChars,
} from "./tool-result-char-estimator.js";
import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js";
/**
@@ -12,6 +18,11 @@ import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.j
* consume more than 30% of the context window even without other messages.
*/
const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
const AGGREGATE_TRUNCATION_MIN_KEEP_CHARS = 256;
/**
* Default hard cap for a single live tool result text block.
@@ -43,10 +54,60 @@ const TRUNCATION_SUFFIX =
"offset/limit parameters to read smaller chunks.]";
type ToolResultTruncationOptions = {
suffix?: string;
suffix?: string | ((truncatedChars: number) => string);
minKeepChars?: number;
};
type ToolResultRewriteCandidate = {
entryId: string;
entryIndex: number;
message: AgentMessage;
textLength: number;
};
function calculateContextBudgetChars(contextWindowTokens: number): number {
return Math.max(
1_024,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
);
}
function calculatePreemptiveOverflowChars(contextWindowTokens: number): number {
return Math.max(
calculateContextBudgetChars(contextWindowTokens),
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
}
function estimateToolResultCharsFromTextLength(textLength: number): number {
return Math.ceil(textLength * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO);
}
function collectToolResultRewriteCandidates(branch: ReturnType<SessionManager["getBranch"]>): {
candidates: ToolResultRewriteCandidate[];
messages: AgentMessage[];
} {
const candidates: ToolResultRewriteCandidate[] = [];
const messages: AgentMessage[] = [];
for (let i = 0; i < branch.length; i++) {
const entry = branch[i];
if (entry.type !== "message") {
continue;
}
messages.push(entry.message);
if ((entry.message as { role?: string }).role !== "toolResult") {
continue;
}
candidates.push({
entryId: entry.id,
entryIndex: i,
message: entry.message,
textLength: getToolResultTextLength(entry.message),
});
}
return { candidates, messages };
}
/**
* Marker inserted between head and tail when using head+tail truncation.
*/
@@ -82,12 +143,16 @@ export function truncateToolResultText(
maxChars: number,
options: ToolResultTruncationOptions = {},
): string {
const suffix = options.suffix ?? TRUNCATION_SUFFIX;
const suffixFactory: (truncatedChars: number) => string =
typeof options.suffix === "function"
? options.suffix
: () => (options.suffix ?? TRUNCATION_SUFFIX);
const minKeepChars = options.minKeepChars ?? MIN_KEEP_CHARS;
if (text.length <= maxChars) {
return text;
}
const budget = Math.max(minKeepChars, maxChars - suffix.length);
const defaultSuffix = suffixFactory(Math.max(1, text.length - maxChars));
const budget = Math.max(minKeepChars, maxChars - defaultSuffix.length);
// If tail looks important, split budget between head and tail
if (hasImportantTail(text) && budget > minKeepChars * 2) {
@@ -108,7 +173,9 @@ export function truncateToolResultText(
tailStart = tailNewline + 1;
}
return text.slice(0, headCut) + MIDDLE_OMISSION_MARKER + text.slice(tailStart) + suffix;
const keptText = text.slice(0, headCut) + MIDDLE_OMISSION_MARKER + text.slice(tailStart);
const suffix = suffixFactory(Math.max(1, text.length - keptText.length));
return keptText + suffix;
}
}
@@ -118,7 +185,9 @@ export function truncateToolResultText(
if (lastNewline > budget * 0.8) {
cutPoint = lastNewline;
}
return text.slice(0, cutPoint) + suffix;
const keptText = text.slice(0, cutPoint);
const suffix = suffixFactory(Math.max(1, text.length - keptText.length));
return keptText + suffix;
}
/**
@@ -167,7 +236,10 @@ export function truncateToolResultMessage(
maxChars: number,
options: ToolResultTruncationOptions = {},
): AgentMessage {
const suffix = options.suffix ?? TRUNCATION_SUFFIX;
const suffixFactory: (truncatedChars: number) => string =
typeof options.suffix === "function"
? options.suffix
: () => (options.suffix ?? TRUNCATION_SUFFIX);
const minKeepChars = options.minKeepChars ?? MIN_KEEP_CHARS;
const content = (msg as { content?: unknown }).content;
if (!Array.isArray(content)) {
@@ -191,10 +263,19 @@ export function truncateToolResultMessage(
}
// Proportional budget for this block
const blockShare = textBlock.text.length / totalTextChars;
const blockBudget = Math.max(minKeepChars + suffix.length, Math.floor(maxChars * blockShare));
const defaultSuffix = suffixFactory(
Math.max(1, textBlock.text.length - Math.floor(maxChars * blockShare)),
);
const blockBudget = Math.max(
minKeepChars + defaultSuffix.length,
Math.floor(maxChars * blockShare),
);
return {
...textBlock,
text: truncateToolResultText(textBlock.text, blockBudget, { suffix, minKeepChars }),
text: truncateToolResultText(textBlock.text, blockBudget, {
suffix: suffixFactory,
minKeepChars,
}),
};
});
@@ -231,47 +312,84 @@ export async function truncateOversizedToolResultsInSession(params: {
return { truncated: false, truncatedCount: 0, reason: "empty session" };
}
// Find oversized tool result entries and their indices in the branch
const oversizedIndices: number[] = [];
for (let i = 0; i < branch.length; i++) {
const entry = branch[i];
if (entry.type !== "message") {
continue;
}
const msg = entry.message;
if ((msg as { role?: string }).role !== "toolResult") {
continue;
}
const textLength = getToolResultTextLength(msg);
if (textLength > maxChars) {
oversizedIndices.push(i);
log.info(
`[tool-result-truncation] Found oversized tool result: ` +
`entry=${entry.id} chars=${textLength} maxChars=${maxChars} ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
}
}
if (oversizedIndices.length === 0) {
return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" };
}
const replacements = oversizedIndices.flatMap((index) => {
const entry = branch[index];
if (!entry || entry.type !== "message") {
return [];
}
const message = truncateToolResultMessage(entry.message, maxChars);
const newLength = getToolResultTextLength(message);
const { candidates, messages } = collectToolResultRewriteCandidates(branch);
const oversizedCandidates = candidates.filter((candidate) => candidate.textLength > maxChars);
for (const candidate of oversizedCandidates) {
log.info(
`[tool-result-truncation] Truncated tool result: ` +
`originalEntry=${entry.id} newChars=${newLength} ` +
`[tool-result-truncation] Found oversized tool result: ` +
`entry=${candidate.entryId} chars=${candidate.textLength} maxChars=${maxChars} ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
return [{ entryId: entry.id, message }];
}
const currentContextChars = estimateContextChars(messages, createMessageCharEstimateCache());
const overflowThresholdChars = calculatePreemptiveOverflowChars(contextWindowTokens);
const aggregateCharsNeeded = Math.max(0, currentContextChars - overflowThresholdChars);
if (oversizedCandidates.length === 0 && aggregateCharsNeeded <= 0) {
return { truncated: false, truncatedCount: 0, reason: "no tool result truncation needed" };
}
let remainingAggregateCharsNeeded = aggregateCharsNeeded;
const candidatesByRecency = [...candidates].toSorted((a, b) => b.entryIndex - a.entryIndex);
const replacements = candidatesByRecency.flatMap((candidate) => {
const aggregateEligible =
remainingAggregateCharsNeeded > 0 &&
candidate.textLength > AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
const targetChars =
candidate.textLength > maxChars
? maxChars
: aggregateEligible
? Math.max(
AGGREGATE_TRUNCATION_MIN_KEEP_CHARS,
candidate.textLength -
Math.ceil(remainingAggregateCharsNeeded / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO),
)
: candidate.textLength;
if (targetChars >= candidate.textLength) {
return [];
}
const minKeepChars =
candidate.textLength > maxChars ? undefined : AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
const message = truncateToolResultMessage(
candidate.message,
targetChars,
minKeepChars === undefined ? {} : { minKeepChars },
);
const newLength = getToolResultTextLength(message);
if (newLength >= candidate.textLength) {
return [];
}
const reducedEstimateChars = estimateToolResultCharsFromTextLength(
candidate.textLength - newLength,
);
remainingAggregateCharsNeeded = Math.max(
0,
remainingAggregateCharsNeeded - reducedEstimateChars,
);
log.info(
`[tool-result-truncation] Truncated tool result: ` +
`originalEntry=${candidate.entryId} newChars=${newLength} ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
return [{ entryId: candidate.entryId, message }];
});
if (replacements.length === 0) {
return {
truncated: false,
truncatedCount: 0,
reason:
oversizedCandidates.length > 0
? "oversized tool results could not be reduced"
: "aggregate tool result overflow could not be reduced",
};
}
const rewriteResult = rewriteTranscriptEntriesInSessionManager({
sessionManager,
replacements,
@@ -351,16 +469,21 @@ export function sessionLikelyHasOversizedToolResults(params: {
}): boolean {
const { messages, contextWindowTokens } = params;
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
const contextBudgetChars = calculatePreemptiveOverflowChars(contextWindowTokens);
let sawToolResult = false;
let aggregateToolResultChars = 0;
for (const msg of messages) {
if ((msg as { role?: string }).role !== "toolResult") {
continue;
}
sawToolResult = true;
const textLength = getToolResultTextLength(msg);
aggregateToolResultChars += estimateToolResultCharsFromTextLength(textLength);
if (textLength > maxChars) {
return true;
}
}
return false;
return sawToolResult && aggregateToolResultChars > contextBudgetChars;
}

View File

@@ -158,6 +158,17 @@ describe("installSessionToolResultGuard", () => {
expectPersistedRoles(sm, ["assistant", "toolResult"]);
});
it("applies pi-style count-based truncation wording when persisting oversized tool results", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);
appendToolResultText(sm, "x".repeat(80_000));
const text = getToolResultText(getPersistedMessages(sm));
expect(text).toContain("more characters truncated");
expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/);
});
it("backfills blank toolResult names from pending tool calls", () => {
const sm = SessionManager.inMemory();
installSessionToolResultGuard(sm);

View File

@@ -5,6 +5,7 @@ import type {
PluginHookBeforeMessageWriteResult,
} from "../plugins/types.js";
import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/tool-result-context-guard.js";
import {
DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS,
truncateToolResultMessage,
@@ -12,10 +13,6 @@ import {
import { createPendingToolCallState } from "./session-tool-result-state.js";
import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";
import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js";
const GUARD_TRUNCATION_SUFFIX =
"\n\n⚠ [Content truncated during persistence — original exceeded size limit. " +
"Use offset/limit parameters or request specific sections for large content.]";
const RAW_APPEND_MESSAGE = Symbol("openclaw.session.rawAppendMessage");
type SessionManagerWithRawAppend = SessionManager & {
@@ -32,7 +29,7 @@ function capToolResultSize(msg: AgentMessage): AgentMessage {
return msg;
}
return truncateToolResultMessage(msg, DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS, {
suffix: GUARD_TRUNCATION_SUFFIX,
suffix: (truncatedChars) => formatContextLimitTruncationNotice(truncatedChars),
minKeepChars: 2_000,
});
}

View File

@@ -102,7 +102,7 @@ export function buildSubagentSystemPrompt(params: {
"3. **Don't initiate** - No heartbeats, no proactive actions, no side quests",
"4. **Be ephemeral** - You may be terminated after task completion. That's fine.",
"5. **Trust push-based completion** - Descendant results are auto-announced back to you; do not busy-poll for status.",
"6. **Recover from compacted/truncated tool output** - If you see `[compacted: tool output removed to free context]` or `[truncated: output exceeded context limit]`, assume prior output was reduced. Re-read only what you need using smaller chunks (`read` with offset/limit, or targeted `rg`/`head`/`tail`) instead of full-file `cat`.",
"6. **Recover from truncated tool output** - If you see a notice like `[..., N more characters truncated]`, assume prior output was reduced. Re-read only what you need using smaller chunks (`read` with offset/limit, or targeted `rg`/`head`/`tail`) instead of full-file `cat`.",
"",
"## Output Format",
"When complete, your final response should include:",

View File

@@ -993,8 +993,7 @@ describe("buildSubagentSystemPrompt", () => {
expect(prompt).toContain("Avoid polling loops");
expect(prompt).toContain("spawned by the main agent");
expect(prompt).toContain("reported to the main agent");
expect(prompt).toContain("[compacted: tool output removed to free context]");
expect(prompt).toContain("[truncated: output exceeded context limit]");
expect(prompt).toContain("[..., N more characters truncated]");
expect(prompt).toContain("offset/limit");
expect(prompt).toContain("instead of full-file `cat`");
});