remove openclaw-only tool overflow compatibility layer

This commit is contained in:
Tak Hoffman
2026-04-05 22:33:02 -05:00
committed by Peter Steinberger
parent 7fc1a74ee9
commit cbc2945117
7 changed files with 17 additions and 715 deletions

View File

@@ -69,14 +69,6 @@ export const mockedPrepareProviderRuntimeAuth = vi.fn(async () => undefined);
export const mockedRunEmbeddedAttempt =
vi.fn<(params: unknown) => Promise<EmbeddedRunAttemptResult>>();
export const mockedRunContextEngineMaintenance = vi.fn(async () => undefined);
export const mockedSessionLikelyHasOversizedToolResults = vi.fn(() => false);
export const mockedTruncateOversizedToolResultsInSession = vi.fn<
() => Promise<MockTruncateOversizedToolResultsResult>
>(async () => ({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
}));
type MockFailoverErrorDescription = {
message: string;
@@ -91,12 +83,6 @@ type MockCoerceToFailoverError = (
) => unknown;
type MockDescribeFailoverError = (err: unknown) => MockFailoverErrorDescription;
type MockResolveFailoverStatus = (reason: string) => number | undefined;
type MockTruncateOversizedToolResultsResult = {
truncated: boolean;
truncatedCount: number;
reason?: string;
};
export class MockedFailoverError extends Error {
constructor(message: string) {
super(message);
@@ -217,14 +203,6 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
mockedRunEmbeddedAttempt.mockReset();
mockedRunContextEngineMaintenance.mockReset();
mockedRunContextEngineMaintenance.mockResolvedValue(undefined);
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
mockedCoerceToFailoverError.mockReset();
mockedCoerceToFailoverError.mockReturnValue(null);
@@ -475,11 +453,6 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
buildEmbeddedRunPayloads: vi.fn(() => []),
}));
vi.doMock("./tool-result-truncation.js", () => ({
truncateOversizedToolResultsInSession: mockedTruncateOversizedToolResultsInSession,
sessionLikelyHasOversizedToolResults: mockedSessionLikelyHasOversizedToolResults,
}));
vi.doMock("./compact.js", () => ({
runPostCompactionSideEffects: mockedRunPostCompactionSideEffects,
}));

View File

@@ -1,26 +1,20 @@
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import { beforeAll, beforeEach, describe, expect, it } from "vitest";
import {
makeAttemptResult,
makeCompactionSuccess,
makeOverflowError,
mockOverflowRetrySuccess,
queueOverflowAttemptWithOversizedToolOutput,
} from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
mockedContextEngine,
mockedCompactDirect,
mockedEvaluateContextWindowGuard,
mockedIsCompactionFailureError,
mockedIsLikelyContextOverflowError,
mockedLog,
mockedResolveContextWindowInfo,
mockedRunEmbeddedAttempt,
mockedSessionLikelyHasOversizedToolResults,
mockedTruncateOversizedToolResultsInSession,
overflowBaseRunParams as baseParams,
} from "./run.overflow-compaction.harness.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
let runEmbeddedPiAgent: typeof import("./run.js").runEmbeddedPiAgent;
@@ -32,8 +26,6 @@ describe("overflow compaction in run loop", () => {
beforeEach(() => {
mockedRunEmbeddedAttempt.mockReset();
mockedCompactDirect.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedContextEngine.info.ownsCompaction = false;
mockedLog.debug.mockReset();
mockedLog.info.mockReset();
@@ -65,12 +57,6 @@ describe("overflow compaction in run loop", () => {
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
});
it("retries after successful compaction on context overflow promptError", async () => {
@@ -143,117 +129,6 @@ describe("overflow compaction in run loop", () => {
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
});
it("falls back to tool-result truncation and retries when oversized results are detected", async () => {
queueOverflowAttemptWithOversizedToolOutput(mockedRunEmbeddedAttempt, makeOverflowError());
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 1,
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
expect.objectContaining({ contextWindowTokens: 200000 }),
);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(mockedLog.info).toHaveBeenCalledWith(
expect.stringContaining("Truncated 1 tool result(s)"),
);
expect(result.meta.error).toBeUndefined();
});
it("falls back to tool-result truncation and retries when real aggregate tool-result detection trips", async () => {
const { sessionLikelyHasOversizedToolResults } = await vi.importActual<
typeof import("./tool-result-truncation.js")
>("./tool-result-truncation.js");
mockedResolveContextWindowInfo.mockReturnValue({
tokens: 10_000,
source: "model",
});
mockedEvaluateContextWindowGuard.mockReturnValue({
shouldWarn: false,
shouldBlock: false,
tokens: 10_000,
source: "model",
});
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(
makeAttemptResult({
promptError: makeOverflowError(),
messagesSnapshot: [
{
role: "user",
content: "u".repeat(20_000),
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_a",
toolName: "read",
content: [{ type: "text", text: "a".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_b",
toolName: "read",
content: [{ type: "text", text: "b".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
{
role: "toolResult",
toolCallId: "call_c",
toolName: "read",
content: [{ type: "text", text: "c".repeat(10_000) }],
isError: false,
} as EmbeddedRunAttemptResult["messagesSnapshot"][number],
],
}),
)
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockImplementation(
((params: Parameters<typeof sessionLikelyHasOversizedToolResults>[0]) =>
sessionLikelyHasOversizedToolResults(params)) as never,
);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 2,
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
expect.objectContaining({ contextWindowTokens: 10_000 }),
);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(mockedLog.info).toHaveBeenCalledWith(
expect.stringContaining("Truncated 2 tool result(s)"),
);
expect(result.meta.error).toBeUndefined();
});
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = makeOverflowError();

View File

@@ -91,10 +91,6 @@ import type { RunEmbeddedPiAgentParams } from "./run/params.js";
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
import { handleRetryLimitExhaustion } from "./run/retry-limit.js";
import { resolveEffectiveRuntimeModel, resolveHookModelSelection } from "./run/setup.js";
import {
sessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession,
} from "./tool-result-truncation.js";
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
import { createUsageAccumulator, mergeUsageIntoAccumulator } from "./usage-accumulator.js";
import { describeUnknownError } from "./utils.js";
@@ -320,7 +316,6 @@ export async function runEmbeddedPiAgent(
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
let bootstrapPromptWarningSignaturesSeen =
params.bootstrapPromptWarningSignaturesSeen ??
(params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
@@ -927,60 +922,8 @@ export async function runEmbeddedPiAgent(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
// Fallback: try truncating oversized tool results in the session.
// This handles the case where a single tool result exceeds the
// context window and compaction cannot reduce it further.
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
if (log.isEnabled("debug")) {
log.debug(
`[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` +
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
);
}
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
// Do NOT reset overflowCompactionAttempts here — the global cap must remain
// enforced across all iterations to prevent unbounded compaction cycles (OC-65).
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
} else if (log.isEnabled("debug")) {
log.debug(
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
);
}
}
if (
(isCompactionFailure ||
overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS ||
toolResultTruncationAttempted) &&
(isCompactionFailure || overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) &&
log.isEnabled("debug")
) {
log.debug(

View File

@@ -3,7 +3,6 @@ import { describe, expect, it } from "vitest";
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
import {
CONTEXT_LIMIT_TRUNCATION_NOTICE,
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
formatContextLimitTruncationNotice,
installToolResultContextGuard,
} from "./tool-result-context-guard.js";
@@ -119,7 +118,7 @@ describe("installToolResultContextGuard", () => {
it("does not preemptively overflow large non-tool context that is still under the high-water mark", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [makeUser("u".repeat(3_200))];
const contextForNextCall = [makeUser("u".repeat(50_000))];
const transformed = await applyGuardToContext(agent, contextForNextCall);
@@ -181,50 +180,46 @@ describe("installToolResultContextGuard", () => {
expect((contextForNextCall[0] as { details?: unknown }).details).toBeDefined();
});
it("throws overflow when total context exceeds the budget after one-time truncation", async () => {
it("does not preemptively overflow when total context remains large after one-time truncation", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [
makeUser("u".repeat(2_800)),
makeUser("u".repeat(50_000)),
makeToolResult("call_ok", "x".repeat(500)),
];
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
const transformed = await applyGuardToContext(agent, contextForNextCall);
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(500));
expect(transformed).toBe(contextForNextCall);
});
it("throws overflow instead of historically rewriting older tool results", async () => {
it("does not rewrite older tool results under aggregate pressure", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [
makeUser("u".repeat(2_200)),
makeUser("u".repeat(50_000)),
makeToolResult("call_1", "a".repeat(500)),
makeToolResult("call_2", "b".repeat(500)),
makeToolResult("call_3", "c".repeat(500)),
];
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
const transformed = await applyGuardToContext(agent, contextForNextCall);
expect(transformed).toBe(contextForNextCall);
expect(getToolResultText(contextForNextCall[1])).toBe("a".repeat(500));
expect(getToolResultText(contextForNextCall[2])).toBe("b".repeat(500));
expect(getToolResultText(contextForNextCall[3])).toBe("c".repeat(500));
});
it("throws overflow instead of special-casing the latest read result", async () => {
it("does not special-case the latest read result under aggregate pressure", async () => {
const agent = makeGuardableAgent();
const contextForNextCall = [
makeUser("u".repeat(2_900)),
makeUser("u".repeat(50_000)),
makeToolResult("call_old", "x".repeat(400)),
makeReadToolResult("call_new", "y".repeat(500)),
];
await expect(applyGuardToContext(agent, contextForNextCall)).rejects.toThrow(
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
);
const transformed = await applyGuardToContext(agent, contextForNextCall);
expect(transformed).toBe(contextForNextCall);
expect(getToolResultText(contextForNextCall[1])).toBe("x".repeat(400));
expect(getToolResultText(contextForNextCall[2])).toBe("y".repeat(500));
});

View File

@@ -1,27 +1,19 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import {
CHARS_PER_TOKEN_ESTIMATE,
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
type MessageCharEstimateCache,
createMessageCharEstimateCache,
estimateContextChars,
estimateMessageCharsCached,
getToolResultText,
invalidateMessageCharsCacheEntry,
isToolResultMessage,
} from "./tool-result-char-estimator.js";
// Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated";
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
"Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
4 / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
type GuardableTransformContext = (
messages: AgentMessage[],
@@ -184,14 +176,6 @@ export function installToolResultContextGuard(params: {
contextWindowTokens: number;
}): () => void {
const contextWindowTokens = Math.max(1, Math.floor(params.contextWindowTokens));
const contextBudgetChars = Math.max(
1_024,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
);
const preemptiveOverflowChars = Math.max(
contextBudgetChars,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
const maxSingleToolResultChars = Math.max(
1_024,
Math.floor(
@@ -223,14 +207,6 @@ export function installToolResultContextGuard(params: {
});
}
const postEnforcementChars = estimateContextChars(
contextMessages,
createMessageCharEstimateCache(),
);
if (postEnforcementChars > preemptiveOverflowChars) {
throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
}
return contextMessages;
}) as GuardableTransformContext;

View File

@@ -1,51 +1,25 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, ToolResultMessage, UserMessage } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { beforeEach, describe, expect, it, vi } from "vitest";
import {
buildSessionWriteLockModuleMock,
resetModulesWithSessionWriteLockDoMock,
} from "../../test-utils/session-write-lock-module-mock.js";
import { beforeEach, describe, expect, it } from "vitest";
import { makeAgentAssistantMessage } from "../test-helpers/agent-message-fixtures.js";
const acquireSessionWriteLockReleaseMock = vi.hoisted(() => vi.fn(async () => {}));
const acquireSessionWriteLockMock = vi.hoisted(() =>
vi.fn(async (_params?: unknown) => ({ release: acquireSessionWriteLockReleaseMock })),
);
vi.mock("../session-write-lock.js", () =>
buildSessionWriteLockModuleMock(
() => vi.importActual<typeof import("../session-write-lock.js")>("../session-write-lock.js"),
(params) => acquireSessionWriteLockMock(params),
),
);
let truncateToolResultText: typeof import("./tool-result-truncation.js").truncateToolResultText;
let truncateToolResultMessage: typeof import("./tool-result-truncation.js").truncateToolResultMessage;
let calculateMaxToolResultChars: typeof import("./tool-result-truncation.js").calculateMaxToolResultChars;
let getToolResultTextLength: typeof import("./tool-result-truncation.js").getToolResultTextLength;
let truncateOversizedToolResultsInMessages: typeof import("./tool-result-truncation.js").truncateOversizedToolResultsInMessages;
let truncateOversizedToolResultsInSession: typeof import("./tool-result-truncation.js").truncateOversizedToolResultsInSession;
let isOversizedToolResult: typeof import("./tool-result-truncation.js").isOversizedToolResult;
let sessionLikelyHasOversizedToolResults: typeof import("./tool-result-truncation.js").sessionLikelyHasOversizedToolResults;
let DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS: typeof import("./tool-result-truncation.js").DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS;
let HARD_MAX_TOOL_RESULT_CHARS: typeof import("./tool-result-truncation.js").HARD_MAX_TOOL_RESULT_CHARS;
let onSessionTranscriptUpdate: typeof import("../../sessions/transcript-events.js").onSessionTranscriptUpdate;
async function loadFreshToolResultTruncationModuleForTest() {
resetModulesWithSessionWriteLockDoMock("../session-write-lock.js", (params) =>
acquireSessionWriteLockMock(params),
);
({ onSessionTranscriptUpdate } = await import("../../sessions/transcript-events.js"));
({
truncateToolResultText,
truncateToolResultMessage,
calculateMaxToolResultChars,
getToolResultTextLength,
truncateOversizedToolResultsInMessages,
truncateOversizedToolResultsInSession,
isOversizedToolResult,
sessionLikelyHasOversizedToolResults,
DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS,
HARD_MAX_TOOL_RESULT_CHARS,
} = await import("./tool-result-truncation.js"));
@@ -56,8 +30,6 @@ const nextTimestamp = () => testTimestamp++;
beforeEach(async () => {
testTimestamp = 1;
acquireSessionWriteLockMock.mockClear();
acquireSessionWriteLockReleaseMock.mockClear();
await loadFreshToolResultTruncationModuleForTest();
});
@@ -296,206 +268,6 @@ describe("truncateOversizedToolResultsInMessages", () => {
});
});
describe("truncateOversizedToolResultsInSession", () => {
it("acquires the session write lock before rewriting oversized tool results", async () => {
const sessionFile = "/tmp/tool-result-truncation-session.jsonl";
const sessionManager = SessionManager.inMemory();
sessionManager.appendMessage(makeUserMessage("hello"));
sessionManager.appendMessage(makeAssistantMessage("reading file"));
sessionManager.appendMessage(makeToolResult("x".repeat(500_000)));
const openSpy = vi
.spyOn(SessionManager, "open")
.mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
const listener = vi.fn();
const cleanup = onSessionTranscriptUpdate(listener);
try {
const result = await truncateOversizedToolResultsInSession({
sessionFile,
contextWindowTokens: 128_000,
sessionKey: "agent:main:test",
});
expect(result.truncated).toBe(true);
expect(result.truncatedCount).toBe(1);
expect(acquireSessionWriteLockMock).toHaveBeenCalledWith({ sessionFile });
expect(acquireSessionWriteLockReleaseMock).toHaveBeenCalledTimes(1);
expect(listener).toHaveBeenCalledWith({ sessionFile });
const branch = sessionManager.getBranch();
const rewrittenToolResult = branch.find(
(entry) => entry.type === "message" && entry.message.role === "toolResult",
);
expect(rewrittenToolResult?.type).toBe("message");
if (
rewrittenToolResult?.type !== "message" ||
rewrittenToolResult.message.role !== "toolResult"
) {
throw new Error("expected rewritten tool result");
}
const rewrittenText = getFirstToolResultText(rewrittenToolResult.message);
expect(rewrittenText.length).toBeLessThan(500_000);
expect(rewrittenText).toContain("truncated");
} finally {
cleanup();
openSpy.mockRestore();
}
});
it("rewrites aggregate medium tool results when their combined size still overflows the session", async () => {
const sessionFile = "/tmp/tool-result-truncation-aggregate-session.jsonl";
const sessionManager = SessionManager.inMemory();
sessionManager.appendMessage(makeUserMessage("u".repeat(20_000)));
sessionManager.appendMessage(makeAssistantMessage("reading files"));
sessionManager.appendMessage(makeToolResult("a".repeat(10_000)));
sessionManager.appendMessage(makeToolResult("b".repeat(10_000)));
sessionManager.appendMessage(makeToolResult("c".repeat(10_000)));
const openSpy = vi
.spyOn(SessionManager, "open")
.mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
try {
const result = await truncateOversizedToolResultsInSession({
sessionFile,
contextWindowTokens: 10_000,
sessionKey: "agent:main:aggregate-test",
});
expect(result.truncated).toBe(true);
expect(result.truncatedCount).toBeGreaterThan(0);
const branch = sessionManager.getBranch();
const toolTexts = branch
.filter((entry) => entry.type === "message" && entry.message.role === "toolResult")
.map((entry) =>
entry.type === "message" && entry.message.role === "toolResult"
? getFirstToolResultText(entry.message)
: "",
);
expect(toolTexts.some((text) => text.includes("truncated"))).toBe(true);
expect(toolTexts.some((text) => text.length < 10_000)).toBe(true);
} finally {
openSpy.mockRestore();
}
});
it("lets a retry pass the real guard after aggregate session rewrite", async () => {
const { PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE, installToolResultContextGuard } =
await import("./tool-result-context-guard.js");
const sessionFile = "/tmp/tool-result-truncation-seam-session.jsonl";
const contextWindowTokens = 10_000;
const originalMessages = [
makeUserMessage("u".repeat(20_000)),
makeAssistantMessage("reading files"),
makeToolResult("a".repeat(10_000), "call_a"),
makeToolResult("b".repeat(10_000), "call_b"),
makeToolResult("c".repeat(10_000), "call_c"),
];
const guardAgent = {};
installToolResultContextGuard({ agent: guardAgent, contextWindowTokens });
await expect(
(
guardAgent as {
transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
}
).transformContext?.(originalMessages, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
expect(
sessionLikelyHasOversizedToolResults({
messages: originalMessages,
contextWindowTokens,
}),
).toBe(true);
const sessionManager = SessionManager.inMemory();
for (const message of originalMessages) {
sessionManager.appendMessage(message);
}
const openSpy = vi
.spyOn(SessionManager, "open")
.mockReturnValue(sessionManager as unknown as ReturnType<typeof SessionManager.open>);
try {
const rewriteResult = await truncateOversizedToolResultsInSession({
sessionFile,
contextWindowTokens,
sessionKey: "agent:main:seam-test",
});
expect(rewriteResult.truncated).toBe(true);
expect(rewriteResult.truncatedCount).toBeGreaterThan(0);
const rewrittenMessages = sessionManager
.getBranch()
.filter((entry) => entry.type === "message")
.map((entry) => (entry.type === "message" ? entry.message : null))
.filter((message): message is AgentMessage => message !== null);
const retryAgent = {};
installToolResultContextGuard({ agent: retryAgent, contextWindowTokens });
await expect(
(
retryAgent as {
transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown;
}
).transformContext?.(rewrittenMessages, new AbortController().signal),
).resolves.toBeDefined();
} finally {
openSpy.mockRestore();
}
});
});
describe("sessionLikelyHasOversizedToolResults", () => {
it("returns false when no tool results are oversized", () => {
const messages = [makeUserMessage("hello"), makeToolResult("small result")];
expect(
sessionLikelyHasOversizedToolResults({
messages,
contextWindowTokens: 200_000,
}),
).toBe(false);
});
it("returns true when a tool result is oversized", () => {
const messages = [makeUserMessage("hello"), makeToolResult("x".repeat(500_000))];
expect(
sessionLikelyHasOversizedToolResults({
messages,
contextWindowTokens: 128_000,
}),
).toBe(true);
});
it("returns true when several medium tool results exceed the aggregate overflow budget", () => {
const messages = [
makeUserMessage("u".repeat(20_000)),
makeToolResult("a".repeat(10_000)),
makeToolResult("b".repeat(10_000)),
makeToolResult("c".repeat(10_000)),
];
expect(
sessionLikelyHasOversizedToolResults({
messages,
contextWindowTokens: 10_000,
}),
).toBe(true);
});
it("returns false for empty messages", () => {
expect(
sessionLikelyHasOversizedToolResults({
messages: [],
contextWindowTokens: 200_000,
}),
).toBe(false);
});
});
describe("truncateToolResultText head+tail strategy", () => {
it("preserves error content at the tail when present", () => {
const head = "Line 1\n".repeat(500);

View File

@@ -1,16 +1,5 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { TextContent } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js";
import { acquireSessionWriteLock } from "../session-write-lock.js";
import { log } from "./logger.js";
import {
CHARS_PER_TOKEN_ESTIMATE,
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
createMessageCharEstimateCache,
estimateContextChars,
} from "./tool-result-char-estimator.js";
import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js";
/**
* Maximum share of the context window a single tool result should occupy.
@@ -18,11 +7,6 @@ import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.j
* consume more than 30% of the context window even without other messages.
*/
const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO =
CHARS_PER_TOKEN_ESTIMATE / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
const AGGREGATE_TRUNCATION_MIN_KEEP_CHARS = 256;
/**
* Default hard cap for a single live tool result text block.
@@ -58,56 +42,6 @@ type ToolResultTruncationOptions = {
minKeepChars?: number;
};
type ToolResultRewriteCandidate = {
entryId: string;
entryIndex: number;
message: AgentMessage;
textLength: number;
};
function calculateContextBudgetChars(contextWindowTokens: number): number {
return Math.max(
1_024,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * CONTEXT_INPUT_HEADROOM_RATIO),
);
}
function calculatePreemptiveOverflowChars(contextWindowTokens: number): number {
return Math.max(
calculateContextBudgetChars(contextWindowTokens),
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
}
function estimateToolResultCharsFromTextLength(textLength: number): number {
return Math.ceil(textLength * TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO);
}
function collectToolResultRewriteCandidates(branch: ReturnType<SessionManager["getBranch"]>): {
candidates: ToolResultRewriteCandidate[];
messages: AgentMessage[];
} {
const candidates: ToolResultRewriteCandidate[] = [];
const messages: AgentMessage[] = [];
for (let i = 0; i < branch.length; i++) {
const entry = branch[i];
if (entry.type !== "message") {
continue;
}
messages.push(entry.message);
if ((entry.message as { role?: string }).role !== "toolResult") {
continue;
}
candidates.push({
entryId: entry.id,
entryIndex: i,
message: entry.message,
textLength: getToolResultTextLength(entry.message),
});
}
return { candidates, messages };
}
/**
* Marker inserted between head and tail when using head+tail truncation.
*/
@@ -282,142 +216,6 @@ export function truncateToolResultMessage(
return { ...msg, content: newContent } as AgentMessage;
}
/**
* Find oversized tool result entries in a session and truncate them.
*
* This operates on the session file by:
* 1. Opening the session manager
* 2. Walking the current branch to find oversized tool results
* 3. Branching from before the first oversized tool result
* 4. Re-appending all entries from that point with truncated tool results
*
* @returns Object indicating whether any truncation was performed
*/
export async function truncateOversizedToolResultsInSession(params: {
sessionFile: string;
contextWindowTokens: number;
sessionId?: string;
sessionKey?: string;
}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> {
const { sessionFile, contextWindowTokens } = params;
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
let sessionLock: Awaited<ReturnType<typeof acquireSessionWriteLock>> | undefined;
try {
sessionLock = await acquireSessionWriteLock({ sessionFile });
const sessionManager = SessionManager.open(sessionFile);
const branch = sessionManager.getBranch();
if (branch.length === 0) {
return { truncated: false, truncatedCount: 0, reason: "empty session" };
}
const { candidates, messages } = collectToolResultRewriteCandidates(branch);
const oversizedCandidates = candidates.filter((candidate) => candidate.textLength > maxChars);
for (const candidate of oversizedCandidates) {
log.info(
`[tool-result-truncation] Found oversized tool result: ` +
`entry=${candidate.entryId} chars=${candidate.textLength} maxChars=${maxChars} ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
}
const currentContextChars = estimateContextChars(messages, createMessageCharEstimateCache());
const overflowThresholdChars = calculatePreemptiveOverflowChars(contextWindowTokens);
const aggregateCharsNeeded = Math.max(0, currentContextChars - overflowThresholdChars);
if (oversizedCandidates.length === 0 && aggregateCharsNeeded <= 0) {
return { truncated: false, truncatedCount: 0, reason: "no tool result truncation needed" };
}
let remainingAggregateCharsNeeded = aggregateCharsNeeded;
const candidatesByRecency = [...candidates].toSorted((a, b) => b.entryIndex - a.entryIndex);
const replacements = candidatesByRecency.flatMap((candidate) => {
const aggregateEligible =
remainingAggregateCharsNeeded > 0 &&
candidate.textLength > AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
const targetChars =
candidate.textLength > maxChars
? maxChars
: aggregateEligible
? Math.max(
AGGREGATE_TRUNCATION_MIN_KEEP_CHARS,
candidate.textLength -
Math.ceil(remainingAggregateCharsNeeded / TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO),
)
: candidate.textLength;
if (targetChars >= candidate.textLength) {
return [];
}
const minKeepChars =
candidate.textLength > maxChars ? undefined : AGGREGATE_TRUNCATION_MIN_KEEP_CHARS;
const message = truncateToolResultMessage(
candidate.message,
targetChars,
minKeepChars === undefined ? {} : { minKeepChars },
);
const newLength = getToolResultTextLength(message);
if (newLength >= candidate.textLength) {
return [];
}
const reducedEstimateChars = estimateToolResultCharsFromTextLength(
candidate.textLength - newLength,
);
remainingAggregateCharsNeeded = Math.max(
0,
remainingAggregateCharsNeeded - reducedEstimateChars,
);
log.info(
`[tool-result-truncation] Truncated tool result: ` +
`originalEntry=${candidate.entryId} newChars=${newLength} ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
return [{ entryId: candidate.entryId, message }];
});
if (replacements.length === 0) {
return {
truncated: false,
truncatedCount: 0,
reason:
oversizedCandidates.length > 0
? "oversized tool results could not be reduced"
: "aggregate tool result overflow could not be reduced",
};
}
const rewriteResult = rewriteTranscriptEntriesInSessionManager({
sessionManager,
replacements,
});
if (rewriteResult.changed) {
emitSessionTranscriptUpdate(sessionFile);
}
log.info(
`[tool-result-truncation] Truncated ${rewriteResult.rewrittenEntries} tool result(s) in session ` +
`(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
return {
truncated: rewriteResult.changed,
truncatedCount: rewriteResult.rewrittenEntries,
reason: rewriteResult.reason,
};
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`);
return { truncated: false, truncatedCount: 0, reason: errMsg };
} finally {
await sessionLock?.release();
}
}
/**
* Truncate oversized tool results in an array of messages (in-memory).
* Returns a new array with truncated messages.
@@ -457,33 +255,3 @@ export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: nu
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
return getToolResultTextLength(msg) > maxChars;
}
/**
* Estimate whether the session likely has oversized tool results that caused
* a context overflow. Used as a heuristic to decide whether to attempt
* tool result truncation before giving up.
*/
export function sessionLikelyHasOversizedToolResults(params: {
messages: AgentMessage[];
contextWindowTokens: number;
}): boolean {
const { messages, contextWindowTokens } = params;
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
const contextBudgetChars = calculatePreemptiveOverflowChars(contextWindowTokens);
let sawToolResult = false;
let aggregateToolResultChars = 0;
for (const msg of messages) {
if ((msg as { role?: string }).role !== "toolResult") {
continue;
}
sawToolResult = true;
const textLength = getToolResultTextLength(msg);
aggregateToolResultChars += estimateToolResultCharsFromTextLength(textLength);
if (textLength > maxChars) {
return true;
}
}
return sawToolResult && aggregateToolResultChars > contextBudgetChars;
}