diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts index bf4b27f5beb..dbed0335435 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts @@ -49,6 +49,30 @@ describe("pruneProcessedHistoryImages", () => { expect(first.content[1]).toMatchObject({ type: "image", data: "abc" }); }); + it("prunes image blocks from toolResult messages that already have assistant replies", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "toolResult", + toolName: "read", + content: [{ type: "text", text: "screenshot bytes" }, { ...image }], + }), + castAgentMessage({ + role: "assistant", + content: "ack", + }), + ]; + + const didMutate = pruneProcessedHistoryImages(messages); + + expect(didMutate).toBe(true); + const firstTool = messages[0] as Extract | undefined; + if (!firstTool || !Array.isArray(firstTool.content)) { + throw new Error("expected toolResult array content"); + } + expect(firstTool.content).toHaveLength(2); + expect(firstTool.content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); + }); + it("does not change messages when no assistant turn exists", () => { const messages: AgentMessage[] = [ castAgentMessage({ diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.ts b/src/agents/pi-embedded-runner/run/history-image-prune.ts index d7dbea5de38..4e92bb08f01 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts @@ -21,7 +21,11 @@ export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean { let didMutate = false; for (let i = 0; i < lastAssistantIndex; i++) { const message = messages[i]; - if (!message || message.role !== "user" || !Array.isArray(message.content)) { + if ( + !message || + (message.role !== "user" && message.role !== "toolResult") || + !Array.isArray(message.content) + ) { continue; } for (let j = 0; j < message.content.length; j++) { diff --git a/src/agents/pi-extensions/context-pruning/pruner.test.ts b/src/agents/pi-extensions/context-pruning/pruner.test.ts index 3985bb2feb1..57a5c9f50f7 100644 --- a/src/agents/pi-extensions/context-pruning/pruner.test.ts +++ b/src/agents/pi-extensions/context-pruning/pruner.test.ts @@ -45,6 +45,19 @@ function makeAssistant(content: AssistantMessage["content"]): AgentMessage { }; } +function makeToolResult( + content: Array< + { type: "text"; text: string } | { type: "image"; data: string; mimeType: string } + >, +): AgentMessage { + return { + role: "toolResult", + toolName: "read", + content, + timestamp: Date.now(), + } as AgentMessage; +} + describe("pruneContextMessages", () => { it("does not crash on assistant message with malformed thinking block (missing thinking string)", () => { const messages: AgentMessage[] = [ @@ -109,4 +122,84 @@ describe("pruneContextMessages", () => { }); expect(result).toHaveLength(2); }); + + it("soft-trims image-containing tool results by replacing image blocks with placeholders", () => { + const messages: AgentMessage[] = [ + makeUser("summarize this"), + makeToolResult([ + { type: "text", text: "A".repeat(120) }, + { type: "image", data: "img", mimeType: "image/png" }, + { type: "text", text: "B".repeat(120) }, + ]), + makeAssistant([{ type: "text", text: "done" }]), + ]; + + const result = pruneContextMessages({ + messages, + settings: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 1, + softTrimRatio: 0, + hardClear: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, + enabled: false, + }, + softTrim: { + maxChars: 200, + headChars: 170, + tailChars: 30, + }, + }, + ctx: CONTEXT_WINDOW_1M, + isToolPrunable: () => true, + contextWindowTokensOverride: 16, + }); + + const toolResult = result[1] as Extract; + expect(toolResult.content).toHaveLength(1); + expect(toolResult.content[0]).toMatchObject({ type: "text" }); + const textBlock = toolResult.content[0] as { type: "text"; text: string }; + expect(textBlock.text).toContain("[image removed during context pruning]"); + expect(textBlock.text).toContain( + "[Tool result trimmed: kept first 170 chars and last 30 chars", + ); + }); + + it("hard-clears image-containing tool results once ratios require clearing", () => { + const messages: AgentMessage[] = [ + makeUser("summarize this"), + makeToolResult([ + { type: "text", text: "small text" }, + { type: "image", data: "img", mimeType: "image/png" }, + ]), + makeAssistant([{ type: "text", text: "done" }]), + ]; + + const placeholder = "[hard cleared test placeholder]"; + const result = pruneContextMessages({ + messages, + settings: { + ...DEFAULT_CONTEXT_PRUNING_SETTINGS, + keepLastAssistants: 1, + softTrimRatio: 0, + hardClearRatio: 0, + minPrunableToolChars: 1, + softTrim: { + maxChars: 5_000, + headChars: 2_000, + tailChars: 2_000, + }, + hardClear: { + enabled: true, + placeholder, + }, + }, + ctx: CONTEXT_WINDOW_1M, + isToolPrunable: () => true, + contextWindowTokensOverride: 8, + }); + + const toolResult = result[1] as Extract; + expect(toolResult.content).toEqual([{ type: "text", text: placeholder }]); + }); }); diff --git a/src/agents/pi-extensions/context-pruning/pruner.ts b/src/agents/pi-extensions/context-pruning/pruner.ts index c195fa79e09..0bb24b5b2a7 100644 --- a/src/agents/pi-extensions/context-pruning/pruner.ts +++ b/src/agents/pi-extensions/context-pruning/pruner.ts @@ -5,9 +5,8 @@ import type { EffectiveContextPruningSettings } from "./settings.js"; import { makeToolPrunablePredicate } from "./tools.js"; const CHARS_PER_TOKEN_ESTIMATE = 4; -// We currently skip pruning tool results that contain images. Still, we count them (approx.) so -// we start trimming prunable tool results earlier when image-heavy context is consuming the window. const IMAGE_CHAR_ESTIMATE = 8_000; +const PRUNED_CONTEXT_IMAGE_MARKER = "[image removed during context pruning]"; function asText(text: string): TextContent { return { type: "text", text }; @@ -23,6 +22,22 @@ function collectTextSegments(content: ReadonlyArray) return parts; } +function collectPrunableToolResultSegments( + content: ReadonlyArray, +): string[] { + const parts: string[] = []; + for (const block of content) { + if (block.type === "text") { + parts.push(block.text); + continue; + } + if (block.type === "image") { + parts.push(PRUNED_CONTEXT_IMAGE_MARKER); + } + } + return parts; +} + function estimateJoinedTextLength(parts: string[]): number { if (parts.length === 0) { return 0; @@ -190,12 +205,9 @@ function softTrimToolResultMessage(params: { settings: EffectiveContextPruningSettings; }): ToolResultMessage | null { const { msg, settings } = params; - // Ignore image tool results for now: these are often directly relevant and hard to partially prune safely. - if (hasImageBlocks(msg.content)) { - return null; - } - - const parts = collectTextSegments(msg.content); + const parts = hasImageBlocks(msg.content) + ? collectPrunableToolResultSegments(msg.content) + : collectTextSegments(msg.content); const rawLen = estimateJoinedTextLength(parts); if (rawLen <= settings.softTrim.maxChars) { return null; @@ -274,9 +286,6 @@ export function pruneContextMessages(params: { if (!isToolPrunable(msg.toolName)) { continue; } - if (hasImageBlocks(msg.content)) { - continue; - } prunableToolIndexes.push(i); const updated = softTrimToolResultMessage({