diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts index ce8e80e030a..6f4dea234e7 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.fixture.ts @@ -112,8 +112,8 @@ export function queueOverflowAttemptWithOversizedToolOutput( promptError: overflowError, messagesSnapshot: [ { - role: "assistant", - content: "big tool output", + role: "toolResult", + content: [{ type: "text", text: "x".repeat(80_000) }], } as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number], ], }), diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts index 88ae95f34ed..3f32edeed85 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.harness.ts @@ -69,6 +69,19 @@ export const mockedPrepareProviderRuntimeAuth = vi.fn(async () => undefined); export const mockedRunEmbeddedAttempt = vi.fn<(params: unknown) => Promise>(); export const mockedRunContextEngineMaintenance = vi.fn(async () => undefined); +export const mockedSessionLikelyHasOversizedToolResults = vi.fn(() => false); +type MockTruncateOversizedToolResultsResult = { + truncated: boolean; + truncatedCount: number; + reason?: string; +}; +export const mockedTruncateOversizedToolResultsInSession = vi.fn< + () => Promise +>(async () => ({ + truncated: false, + truncatedCount: 0, + reason: "no oversized tool results", +})); type MockFailoverErrorDescription = { message: string; @@ -203,6 +216,14 @@ export function resetRunOverflowCompactionHarnessMocks(): void { mockedRunEmbeddedAttempt.mockReset(); mockedRunContextEngineMaintenance.mockReset(); mockedRunContextEngineMaintenance.mockResolvedValue(undefined); + mockedSessionLikelyHasOversizedToolResults.mockReset(); + mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false); + mockedTruncateOversizedToolResultsInSession.mockReset(); + mockedTruncateOversizedToolResultsInSession.mockResolvedValue({ + truncated: false, + truncatedCount: 0, + reason: "no oversized tool results", + }); mockedCoerceToFailoverError.mockReset(); mockedCoerceToFailoverError.mockReturnValue(null); @@ -375,6 +396,11 @@ export async function loadRunOverflowCompactionHarness(): Promise<{ runEmbeddedAttempt: mockedRunEmbeddedAttempt, })); + vi.doMock("./tool-result-truncation.js", () => ({ + sessionLikelyHasOversizedToolResults: mockedSessionLikelyHasOversizedToolResults, + truncateOversizedToolResultsInSession: mockedTruncateOversizedToolResultsInSession, + })); + vi.doMock("./context-engine-maintenance.js", () => ({ runContextEngineMaintenance: mockedRunContextEngineMaintenance, })); diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts index a476f76eb82..6c645359b6d 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.loop.test.ts @@ -4,6 +4,7 @@ import { makeCompactionSuccess, makeOverflowError, mockOverflowRetrySuccess, + queueOverflowAttemptWithOversizedToolOutput, } from "./run.overflow-compaction.fixture.js"; import { loadRunOverflowCompactionHarness, @@ -13,6 +14,8 @@ import { mockedIsLikelyContextOverflowError, mockedLog, mockedRunEmbeddedAttempt, + mockedSessionLikelyHasOversizedToolResults, + mockedTruncateOversizedToolResultsInSession, overflowBaseRunParams as baseParams, } from "./run.overflow-compaction.harness.js"; @@ -26,6 +29,8 @@ describe("overflow compaction in run loop", () => { beforeEach(() => { mockedRunEmbeddedAttempt.mockReset(); mockedCompactDirect.mockReset(); + mockedSessionLikelyHasOversizedToolResults.mockReset(); + mockedTruncateOversizedToolResultsInSession.mockReset(); mockedContextEngine.info.ownsCompaction = false; mockedLog.debug.mockReset(); mockedLog.info.mockReset(); @@ -57,6 +62,12 @@ describe("overflow compaction in run loop", () => { compacted: false, reason: "nothing to compact", }); + mockedSessionLikelyHasOversizedToolResults.mockReturnValue(false); + mockedTruncateOversizedToolResultsInSession.mockResolvedValue({ + truncated: false, + truncatedCount: 0, + reason: "no oversized tool results", + }); }); it("retries after successful compaction on context overflow promptError", async () => { @@ -129,6 +140,37 @@ describe("overflow compaction in run loop", () => { expect(mockedLog.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed")); }); + it("falls back to tool-result truncation and retries when oversized results are detected", async () => { + queueOverflowAttemptWithOversizedToolOutput(mockedRunEmbeddedAttempt, makeOverflowError()); + mockedRunEmbeddedAttempt.mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + mockedCompactDirect.mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); + mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true); + mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({ + truncated: true, + truncatedCount: 1, + }); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedSessionLikelyHasOversizedToolResults).toHaveBeenCalledWith( + expect.objectContaining({ contextWindowTokens: 200000 }), + ); + expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledWith( + expect.objectContaining({ sessionFile: "/tmp/session.json" }), + ); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(mockedLog.info).toHaveBeenCalledWith( + expect.stringContaining("Truncated 1 tool result(s)"), + ); + expect(result.meta.error).toBeUndefined(); + }); + it("retries compaction up to 3 times before giving up", async () => { const overflowError = makeOverflowError(); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index f4099086e70..1598cbc9758 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -64,6 +64,10 @@ import { runContextEngineMaintenance } from "./context-engine-maintenance.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; import { log } from "./logger.js"; import { resolveModelAsync } from "./model.js"; +import { + sessionLikelyHasOversizedToolResults, + truncateOversizedToolResultsInSession, +} from "./tool-result-truncation.js"; import { handleAssistantFailover } from "./run/assistant-failover.js"; import { runEmbeddedAttempt } from "./run/attempt.js"; import { createEmbeddedRunAuthController } from "./run/auth-controller.js"; @@ -316,6 +320,7 @@ export async function runEmbeddedPiAgent( const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3; const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length); let overflowCompactionAttempts = 0; + let toolResultTruncationAttempted = false; let bootstrapPromptWarningSignaturesSeen = params.bootstrapPromptWarningSignaturesSeen ?? (params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []); @@ -922,6 +927,38 @@ export async function runEmbeddedPiAgent( `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`, ); } + if (!toolResultTruncationAttempted) { + const contextWindowTokens = ctxInfo.tokens; + const hasOversized = attempt.messagesSnapshot + ? sessionLikelyHasOversizedToolResults({ + messages: attempt.messagesSnapshot, + contextWindowTokens, + }) + : false; + + if (hasOversized) { + toolResultTruncationAttempted = true; + log.warn( + `[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` + + `(contextWindow=${contextWindowTokens} tokens)`, + ); + const truncResult = await truncateOversizedToolResultsInSession({ + sessionFile: params.sessionFile, + contextWindowTokens, + sessionId: params.sessionId, + sessionKey: params.sessionKey, + }); + if (truncResult.truncated) { + log.info( + `[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`, + ); + continue; + } + log.warn( + `[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`, + ); + } + } if ( (isCompactionFailure || overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS) && log.isEnabled("debug") diff --git a/src/agents/pi-embedded-runner/tool-result-truncation.ts b/src/agents/pi-embedded-runner/tool-result-truncation.ts index f5a6bc10d68..c8fcb78a851 100644 --- a/src/agents/pi-embedded-runner/tool-result-truncation.ts +++ b/src/agents/pi-embedded-runner/tool-result-truncation.ts @@ -1,5 +1,10 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { TextContent } from "@mariozechner/pi-ai"; +import { SessionManager } from "@mariozechner/pi-coding-agent"; +import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js"; +import { acquireSessionWriteLock } from "../session-write-lock.js"; +import { log } from "./logger.js"; +import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js"; /** * Maximum share of the context window a single tool result should occupy. @@ -245,6 +250,80 @@ export function truncateOversizedToolResultsInMessages( return { messages: result, truncatedCount }; } +export async function truncateOversizedToolResultsInSession(params: { + sessionFile: string; + contextWindowTokens: number; + sessionId?: string; + sessionKey?: string; +}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> { + const { sessionFile, contextWindowTokens } = params; + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + let sessionLock: Awaited> | undefined; + + try { + sessionLock = await acquireSessionWriteLock({ sessionFile }); + const sessionManager = SessionManager.open(sessionFile); + const branch = sessionManager.getBranch(); + + if (branch.length === 0) { + return { truncated: false, truncatedCount: 0, reason: "empty session" }; + } + + const oversizedIndices: number[] = []; + for (let i = 0; i < branch.length; i += 1) { + const entry = branch[i]; + if (entry.type !== "message") { + continue; + } + const msg = entry.message; + if ((msg as { role?: string }).role !== "toolResult") { + continue; + } + if (getToolResultTextLength(msg) > maxChars) { + oversizedIndices.push(i); + } + } + + if (oversizedIndices.length === 0) { + return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" }; + } + + const replacements = oversizedIndices.flatMap((index) => { + const entry = branch[index]; + if (!entry || entry.type !== "message") { + return []; + } + return [{ entryId: entry.id, message: truncateToolResultMessage(entry.message, maxChars) }]; + }); + + const rewriteResult = rewriteTranscriptEntriesInSessionManager({ + sessionManager, + replacements, + }); + if (rewriteResult.changed) { + emitSessionTranscriptUpdate(sessionFile); + } + + log.info( + `[tool-result-truncation] Truncated ${rewriteResult.rewrittenEntries} tool result(s) in session ` + + `(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` + + `sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`, + ); + + return { + truncated: rewriteResult.changed, + truncatedCount: rewriteResult.rewrittenEntries, + reason: rewriteResult.reason, + }; + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err); + log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`); + return { truncated: false, truncatedCount: 0, reason: errMsg }; + } finally { + await sessionLock?.release(); + } +} + /** * Check if a tool result message exceeds the size limit for a given context window. */ @@ -255,3 +334,22 @@ export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: nu const maxChars = calculateMaxToolResultChars(contextWindowTokens); return getToolResultTextLength(msg) > maxChars; } + +export function sessionLikelyHasOversizedToolResults(params: { + messages: AgentMessage[]; + contextWindowTokens: number; +}): boolean { + const { messages, contextWindowTokens } = params; + const maxChars = calculateMaxToolResultChars(contextWindowTokens); + + for (const msg of messages) { + if ((msg as { role?: string }).role !== "toolResult") { + continue; + } + if (getToolResultTextLength(msg) > maxChars) { + return true; + } + } + + return false; +}