fix(cache): delay history image pruning to preserve prompt cache prefix (#58038)

pruneProcessedHistoryImages was stripping image blocks from every
already-answered user turn on each run. Turn N sends image bytes → provider
caches the prefix. Turn N+1 replaces image with text marker → bytes diverge
at that message → cache miss from there onward.

Now only prune images older than 3 assistant turns. Recent history stays
byte-identical so the cached prefix survives, while legacy sessions with
persisted image payloads still get cleaned up.
This commit is contained in:
Boris Cherny
2026-04-03 17:22:58 -07:00
committed by GitHub
parent 300fb36879
commit af81c437fa
3 changed files with 79 additions and 17 deletions

View File

@@ -1531,7 +1531,8 @@ export async function runEmbeddedAttempt(
try {
// Idempotent cleanup for legacy sessions with persisted image payloads.
// Called each run; only mutates already-answered user turns that still carry image blocks.
// Only mutates user turns older than a few assistant replies so recent
// history stays byte-identical for prompt-cache prefix matching.
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
if (didPruneImages) {
activeSession.agent.replaceMessages(activeSession.messages);

View File

@@ -28,23 +28,44 @@ function expectPrunedImageMessage(
describe("pruneProcessedHistoryImages", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
const userText = () => castAgentMessage({ role: "user", content: "more" });
it("prunes image blocks from user messages that already have assistant replies", () => {
it("prunes image blocks from user messages older than 3 assistant turns", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
}),
castAgentMessage({
role: "assistant",
content: "got it",
}),
assistantTurn(),
userText(),
assistantTurn(),
userText(),
assistantTurn(),
];
const content = expectPrunedImageMessage(messages, "expected user array content");
expect(content[0]?.type).toBe("text");
});
it("keeps image blocks within the last 3 assistant turns to preserve prompt cache", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
}),
assistantTurn(),
userText(),
assistantTurn(),
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
const content = expectArrayMessageContent(messages[0], "expected user array content");
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not prune latest user message when no assistant response exists yet", () => {
const messages: AgentMessage[] = [
castAgentMessage({
@@ -61,22 +82,49 @@ describe("pruneProcessedHistoryImages", () => {
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
});
it("prunes image blocks from toolResult messages that already have assistant replies", () => {
it("prunes image blocks from toolResult messages older than 3 assistant turns", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "toolResult",
toolName: "read",
content: [{ type: "text", text: "screenshot bytes" }, { ...image }],
}),
castAgentMessage({
role: "assistant",
content: "ack",
}),
assistantTurn(),
userText(),
assistantTurn(),
userText(),
assistantTurn(),
];
expectPrunedImageMessage(messages, "expected toolResult array content");
});
it("prunes only old images while preserving recent ones", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [{ type: "text", text: "old" }, { ...image }],
}),
assistantTurn(),
userText(),
assistantTurn(),
castAgentMessage({
role: "user",
content: [{ type: "text", text: "recent" }, { ...image }],
}),
assistantTurn(),
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(true);
const oldContent = expectArrayMessageContent(messages[0], "expected old user content");
expect(oldContent[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
const recentContent = expectArrayMessageContent(messages[4], "expected recent user content");
expect(recentContent[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not change messages when no assistant turn exists", () => {
const messages: AgentMessage[] = [
castAgentMessage({

View File

@@ -2,24 +2,37 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
/**
* Number of most-recent assistant turns whose preceding user/toolResult image blocks are
* kept intact. Pruning these would diverge the request bytes from what the provider
* cached on the previous turn, invalidating the prompt-cache prefix.
*/
const PRESERVE_RECENT_ASSISTANT_TURNS = 3;
/**
* Idempotent cleanup for legacy sessions that persisted image blocks in history.
* Called each run; mutates only user turns that already have an assistant reply.
* Called each run; mutates only user turns that are older than
* {@link PRESERVE_RECENT_ASSISTANT_TURNS} assistant replies so recent turns remain
* byte-identical for prompt caching.
*/
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
let lastAssistantIndex = -1;
let assistantSeen = 0;
let pruneBeforeIndex = -1;
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i]?.role === "assistant") {
lastAssistantIndex = i;
break;
assistantSeen++;
if (assistantSeen >= PRESERVE_RECENT_ASSISTANT_TURNS) {
pruneBeforeIndex = i;
break;
}
}
}
if (lastAssistantIndex < 0) {
if (pruneBeforeIndex < 0) {
return false;
}
let didMutate = false;
for (let i = 0; i < lastAssistantIndex; i++) {
for (let i = 0; i < pruneBeforeIndex; i++) {
const message = messages[i];
if (
!message ||