mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-20 05:31:30 +00:00
fix(cache): delay history image pruning to preserve prompt cache prefix (#58038)
pruneProcessedHistoryImages was stripping image blocks from every already-answered user turn on each run. Turn N sends image bytes → provider caches the prefix. Turn N+1 replaces image with text marker → bytes diverge at that message → cache miss from there onward. Now only prune images older than 3 assistant turns. Recent history stays byte-identical so the cached prefix survives, while legacy sessions with persisted image payloads still get cleaned up.
This commit is contained in:
@@ -1531,7 +1531,8 @@ export async function runEmbeddedAttempt(
|
||||
|
||||
try {
|
||||
// Idempotent cleanup for legacy sessions with persisted image payloads.
|
||||
// Called each run; only mutates already-answered user turns that still carry image blocks.
|
||||
// Only mutates user turns older than a few assistant replies so recent
|
||||
// history stays byte-identical for prompt-cache prefix matching.
|
||||
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
|
||||
if (didPruneImages) {
|
||||
activeSession.agent.replaceMessages(activeSession.messages);
|
||||
|
||||
@@ -28,23 +28,44 @@ function expectPrunedImageMessage(
|
||||
|
||||
describe("pruneProcessedHistoryImages", () => {
|
||||
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
|
||||
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
|
||||
const userText = () => castAgentMessage({ role: "user", content: "more" });
|
||||
|
||||
it("prunes image blocks from user messages that already have assistant replies", () => {
|
||||
it("prunes image blocks from user messages older than 3 assistant turns", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
|
||||
}),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: "got it",
|
||||
}),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
];
|
||||
|
||||
const content = expectPrunedImageMessage(messages, "expected user array content");
|
||||
expect(content[0]?.type).toBe("text");
|
||||
});
|
||||
|
||||
it("keeps image blocks within the last 3 assistant turns to preserve prompt cache", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
|
||||
}),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
];
|
||||
|
||||
const didMutate = pruneProcessedHistoryImages(messages);
|
||||
|
||||
expect(didMutate).toBe(false);
|
||||
const content = expectArrayMessageContent(messages[0], "expected user array content");
|
||||
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
|
||||
});
|
||||
|
||||
it("does not prune latest user message when no assistant response exists yet", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
@@ -61,22 +82,49 @@ describe("pruneProcessedHistoryImages", () => {
|
||||
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
|
||||
});
|
||||
|
||||
it("prunes image blocks from toolResult messages that already have assistant replies", () => {
|
||||
it("prunes image blocks from toolResult messages older than 3 assistant turns", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "toolResult",
|
||||
toolName: "read",
|
||||
content: [{ type: "text", text: "screenshot bytes" }, { ...image }],
|
||||
}),
|
||||
castAgentMessage({
|
||||
role: "assistant",
|
||||
content: "ack",
|
||||
}),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
];
|
||||
|
||||
expectPrunedImageMessage(messages, "expected toolResult array content");
|
||||
});
|
||||
|
||||
it("prunes only old images while preserving recent ones", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "old" }, { ...image }],
|
||||
}),
|
||||
assistantTurn(),
|
||||
userText(),
|
||||
assistantTurn(),
|
||||
castAgentMessage({
|
||||
role: "user",
|
||||
content: [{ type: "text", text: "recent" }, { ...image }],
|
||||
}),
|
||||
assistantTurn(),
|
||||
];
|
||||
|
||||
const didMutate = pruneProcessedHistoryImages(messages);
|
||||
expect(didMutate).toBe(true);
|
||||
|
||||
const oldContent = expectArrayMessageContent(messages[0], "expected old user content");
|
||||
expect(oldContent[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
|
||||
|
||||
const recentContent = expectArrayMessageContent(messages[4], "expected recent user content");
|
||||
expect(recentContent[1]).toMatchObject({ type: "image", data: "abc" });
|
||||
});
|
||||
|
||||
it("does not change messages when no assistant turn exists", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
castAgentMessage({
|
||||
|
||||
@@ -2,24 +2,37 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
|
||||
|
||||
/**
|
||||
* Number of most-recent assistant turns whose preceding user/toolResult image blocks are
|
||||
* kept intact. Pruning these would diverge the request bytes from what the provider
|
||||
* cached on the previous turn, invalidating the prompt-cache prefix.
|
||||
*/
|
||||
const PRESERVE_RECENT_ASSISTANT_TURNS = 3;
|
||||
|
||||
/**
|
||||
* Idempotent cleanup for legacy sessions that persisted image blocks in history.
|
||||
* Called each run; mutates only user turns that already have an assistant reply.
|
||||
* Called each run; mutates only user turns that are older than
|
||||
* {@link PRESERVE_RECENT_ASSISTANT_TURNS} assistant replies so recent turns remain
|
||||
* byte-identical for prompt caching.
|
||||
*/
|
||||
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
|
||||
let lastAssistantIndex = -1;
|
||||
let assistantSeen = 0;
|
||||
let pruneBeforeIndex = -1;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
if (messages[i]?.role === "assistant") {
|
||||
lastAssistantIndex = i;
|
||||
break;
|
||||
assistantSeen++;
|
||||
if (assistantSeen >= PRESERVE_RECENT_ASSISTANT_TURNS) {
|
||||
pruneBeforeIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lastAssistantIndex < 0) {
|
||||
if (pruneBeforeIndex < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let didMutate = false;
|
||||
for (let i = 0; i < lastAssistantIndex; i++) {
|
||||
for (let i = 0; i < pruneBeforeIndex; i++) {
|
||||
const message = messages[i];
|
||||
if (
|
||||
!message ||
|
||||
|
||||
Reference in New Issue
Block a user