Restore readable tool-result overflow fallback

This commit is contained in:
Tak Hoffman
2026-04-05 23:37:40 -05:00
committed by Peter Steinberger
parent 3e2a05f425
commit 09b7c00dab
5 changed files with 205 additions and 2 deletions

View File

@@ -112,8 +112,8 @@ export function queueOverflowAttemptWithOversizedToolOutput(
promptError: overflowError,
messagesSnapshot: [
{
role: "assistant",
content: "big tool output",
role: "toolResult",
content: [{ type: "text", text: "x".repeat(80_000) }],
} as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number],
],
}),

View File

@@ -69,6 +69,19 @@ export const mockedPrepareProviderRuntimeAuth = vi.fn(async () => undefined);
export const mockedRunEmbeddedAttempt =
vi.fn<(params: unknown) => Promise<EmbeddedRunAttemptResult>>();
export const mockedRunContextEngineMaintenance = vi.fn(async () => undefined);
export const mockedSessionLikelyHasOversizedToolResults = vi.fn(() => false);
type MockTruncateOversizedToolResultsResult = {
truncated: boolean;
truncatedCount: number;
reason?: string;
};
export const mockedTruncateOversizedToolResultsInSession = vi.fn<
() => Promise<MockTruncateOversizedToolResultsResult>
>(async () => ({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
}));
type MockFailoverErrorDescription = {
message: string;
@@ -203,6 +216,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void {
mockedRunEmbeddedAttempt.mockReset();
mockedRunContextEngineMaintenance.mockReset();
mockedRunContextEngineMaintenance.mockResolvedValue(undefined);
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
mockedCoerceToFailoverError.mockReset();
mockedCoerceToFailoverError.mockReturnValue(null);
@@ -375,6 +396,11 @@ export async function loadRunOverflowCompactionHarness(): Promise<{
runEmbeddedAttempt: mockedRunEmbeddedAttempt,
}));
vi.doMock("./tool-result-truncation.js", () => ({
sessionLikelyHasOversizedToolResults: mockedSessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession: mockedTruncateOversizedToolResultsInSession,
}));
vi.doMock("./context-engine-maintenance.js", () => ({
runContextEngineMaintenance: mockedRunContextEngineMaintenance,
}));

View File

@@ -4,6 +4,7 @@ import {
makeCompactionSuccess,
makeOverflowError,
mockOverflowRetrySuccess,
queueOverflowAttemptWithOversizedToolOutput,
} from "./run.overflow-compaction.fixture.js";
import {
loadRunOverflowCompactionHarness,
@@ -13,6 +14,8 @@ import {
mockedIsLikelyContextOverflowError,
mockedLog,
mockedRunEmbeddedAttempt,
mockedSessionLikelyHasOversizedToolResults,
mockedTruncateOversizedToolResultsInSession,
overflowBaseRunParams as baseParams,
} from "./run.overflow-compaction.harness.js";
@@ -26,6 +29,8 @@ describe("overflow compaction in run loop", () => {
beforeEach(() => {
mockedRunEmbeddedAttempt.mockReset();
mockedCompactDirect.mockReset();
mockedSessionLikelyHasOversizedToolResults.mockReset();
mockedTruncateOversizedToolResultsInSession.mockReset();
mockedContextEngine.info.ownsCompaction = false;
mockedLog.debug.mockReset();
mockedLog.info.mockReset();
@@ -57,6 +62,12 @@ describe("overflow compaction in run loop", () => {
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false);
mockedTruncateOversizedToolResultsInSession.mockResolvedValue({
truncated: false,
truncatedCount: 0,
reason: "no oversized tool results",
});
});
it("retries after successful compaction on context overflow promptError", async () => {
@@ -129,6 +140,37 @@ describe("overflow compaction in run loop", () => {
expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
});
it("falls back to tool-result truncation and retries when oversized results are detected", async () => {
queueOverflowAttemptWithOversizedToolOutput(mockedRunEmbeddedAttempt, makeOverflowError());
mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 1,
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith(
expect.objectContaining({ contextWindowTokens: 200000 }),
);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith(
expect.objectContaining({ sessionFile: "/tmp/session.json" }),
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(mockedLog.info).toHaveBeenCalledWith(
expect.stringContaining("Truncated 1 tool result(s)"),
);
expect(result.meta.error).toBeUndefined();
});
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = makeOverflowError();

View File

@@ -64,6 +64,10 @@ import { runContextEngineMaintenance } from "./context-engine-maintenance.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
import { log } from "./logger.js";
import { resolveModelAsync } from "./model.js";
import {
sessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession,
} from "./tool-result-truncation.js";
import { handleAssistantFailover } from "./run/assistant-failover.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import { createEmbeddedRunAuthController } from "./run/auth-controller.js";
@@ -316,6 +320,7 @@ export async function runEmbeddedPiAgent(
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
let bootstrapPromptWarningSignaturesSeen =
params.bootstrapPromptWarningSignaturesSeen ??
(params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
@@ -922,6 +927,38 @@ export async function runEmbeddedPiAgent(
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
);
}
if (!toolResultTruncationAttempted) {
const contextWindowTokens = ctxInfo.tokens;
const hasOversized = attempt.messagesSnapshot
? sessionLikelyHasOversizedToolResults({
messages: attempt.messagesSnapshot,
contextWindowTokens,
})
: false;
if (hasOversized) {
toolResultTruncationAttempted = true;
log.warn(
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
`(contextWindow=${contextWindowTokens} tokens)`,
);
const truncResult = await truncateOversizedToolResultsInSession({
sessionFile: params.sessionFile,
contextWindowTokens,
sessionId: params.sessionId,
sessionKey: params.sessionKey,
});
if (truncResult.truncated) {
log.info(
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
);
continue;
}
log.warn(
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
);
}
}
if (
(isCompactionFailure || overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) &&
log.isEnabled("debug")

View File

@@ -1,5 +1,10 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { TextContent } from "@mariozechner/pi-ai";
import { SessionManager } from "@mariozechner/pi-coding-agent";
import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js";
import { acquireSessionWriteLock } from "../session-write-lock.js";
import { log } from "./logger.js";
import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js";
/**
* Maximum share of the context window a single tool result should occupy.
@@ -245,6 +250,80 @@ export function truncateOversizedToolResultsInMessages(
return { messages: result, truncatedCount };
}
export async function truncateOversizedToolResultsInSession(params: {
sessionFile: string;
contextWindowTokens: number;
sessionId?: string;
sessionKey?: string;
}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> {
const { sessionFile, contextWindowTokens } = params;
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
let sessionLock: Awaited<ReturnType<typeof acquireSessionWriteLock>> | undefined;
try {
sessionLock = await acquireSessionWriteLock({ sessionFile });
const sessionManager = SessionManager.open(sessionFile);
const branch = sessionManager.getBranch();
if (branch.length === 0) {
return { truncated: false, truncatedCount: 0, reason: "empty session" };
}
const oversizedIndices: number[] = [];
for (let i = 0; i < branch.length; i += 1) {
const entry = branch[i];
if (entry.type !== "message") {
continue;
}
const msg = entry.message;
if ((msg as { role?: string }).role !== "toolResult") {
continue;
}
if (getToolResultTextLength(msg) > maxChars) {
oversizedIndices.push(i);
}
}
if (oversizedIndices.length === 0) {
return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" };
}
const replacements = oversizedIndices.flatMap((index) => {
const entry = branch[index];
if (!entry || entry.type !== "message") {
return [];
}
return [{ entryId: entry.id, message: truncateToolResultMessage(entry.message, maxChars) }];
});
const rewriteResult = rewriteTranscriptEntriesInSessionManager({
sessionManager,
replacements,
});
if (rewriteResult.changed) {
emitSessionTranscriptUpdate(sessionFile);
}
log.info(
`[tool-result-truncation] Truncated ${rewriteResult.rewrittenEntries} tool result(s) in session ` +
`(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` +
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
);
return {
truncated: rewriteResult.changed,
truncatedCount: rewriteResult.rewrittenEntries,
reason: rewriteResult.reason,
};
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`);
return { truncated: false, truncatedCount: 0, reason: errMsg };
} finally {
await sessionLock?.release();
}
}
/**
* Check if a tool result message exceeds the size limit for a given context window.
*/
@@ -255,3 +334,22 @@ export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: nu
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
return getToolResultTextLength(msg) > maxChars;
}
export function sessionLikelyHasOversizedToolResults(params: {
messages: AgentMessage[];
contextWindowTokens: number;
}): boolean {
const { messages, contextWindowTokens } = params;
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
for (const msg of messages) {
if ((msg as { role?: string }).role !== "toolResult") {
continue;
}
if (getToolResultTextLength(msg) > maxChars) {
return true;
}
}
return false;
}