From 2d9a0d9cf00d572a36bce0154a61eb383d802272 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 03:10:10 +0100 Subject: [PATCH] fix: preserve image history while pruning replay context --- CHANGELOG.md | 7 +- docs/concepts/session-pruning.md | 18 +- .../attempt.spawn-workspace.test-support.ts | 3 +- src/agents/pi-embedded-runner/run/attempt.ts | 25 +- .../run/history-image-prune.test.ts | 220 ++++++++++++++++-- .../run/history-image-prune.ts | 108 +++++++-- 6 files changed, 334 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27548868f08..2149a303e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,7 +69,12 @@ Docs: https://docs.openclaw.ai - ACP: wait for the configured runtime backend to become healthy before startup identity reconciliation, avoiding transient acpx warnings during Gateway boot. Fixes #40566. - Channels/ACP bindings: time out configured binding readiness checks instead of letting Discord preflight hang forever when an ACP target never settles. Fixes #68776. - Control UI: hide the chat loading skeleton during background history reloads when existing messages or active stream content are already visible, avoiding reload flashes on high-latency local gateways. Fixes #71844. Thanks @WolvenRA. -- CLI/status: label the OpenClaw Serve/Funnel setting as `Tailscale exposure` and show daemon state separately when available, so `gateway.tailscale.mode: +- Agents/images: scrub old `[media attached: ...]`, `[Image: source: ...]`, + and `media://inbound/...` markers from pruned model replay context so stale + media refs are not rehydrated as fresh prompt images. Fixes #71868. Thanks + @jmeadlock. +- CLI/status: label the OpenClaw Serve/Funnel setting as `Tailscale exposure` + and show daemon state separately when available, so `gateway.tailscale.mode: "off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790. Thanks @pesvobodak. - Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz. diff --git a/docs/concepts/session-pruning.md b/docs/concepts/session-pruning.md index 21029799292..6232a867222 100644 --- a/docs/concepts/session-pruning.md +++ b/docs/concepts/session-pruning.md @@ -35,15 +35,23 @@ cache-write size, directly lowering cost. ## Legacy image cleanup -OpenClaw also runs a separate idempotent cleanup for older legacy sessions that -persisted raw image blocks in history. +OpenClaw also builds a separate idempotent replay view for sessions that +persist raw image blocks or prompt-hydration media markers in history. - It preserves the **3 most recent completed turns** byte-for-byte so prompt cache prefixes for recent follow-ups stay stable. -- Older already-processed image blocks in `user` or `toolResult` history can be - replaced with `[image data removed - already processed by model]`. +- In the replay view, older already-processed image blocks from `user` or + `toolResult` history can be replaced with + `[image data removed - already processed by model]`. +- Older textual media references such as `[media attached: ...]`, + `[Image: source: ...]`, and `media://inbound/...` can be replaced with + `[media reference removed - already processed by model]`. Current-turn + attachment markers stay intact so vision models can still hydrate fresh + images. +- The raw session transcript is not rewritten, so history viewers can still + render the original message entries and their images. - This is separate from normal cache-TTL pruning. It exists to stop repeated - image payloads from busting prompt caches on later turns. + image payloads or stale media refs from busting prompt caches on later turns. ## Smart defaults diff --git a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts index 2f56d8b97fb..3ddfaaacfea 100644 --- a/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts +++ b/src/agents/pi-embedded-runner/run/attempt.spawn-workspace.test-support.ts @@ -658,7 +658,8 @@ vi.mock("./compaction-timeout.js", () => ({ })); vi.mock("./history-image-prune.js", () => ({ - pruneProcessedHistoryImages: (messages: T) => messages, + installHistoryImagePruneContextTransform: () => () => {}, + pruneProcessedHistoryImages: () => null, })); export type MutableSession = { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index d58a5b7d1c6..572ff820070 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -297,7 +297,10 @@ import { selectCompactionTimeoutSnapshot, shouldFlagCompactionTimeout, } from "./compaction-timeout.js"; -import { pruneProcessedHistoryImages } from "./history-image-prune.js"; +import { + installHistoryImagePruneContextTransform, + pruneProcessedHistoryImages, +} from "./history-image-prune.js"; import { detectAndLoadPromptImages } from "./images.js"; import { buildAttemptReplayMetadata } from "./incomplete-turn.js"; import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js"; @@ -1450,6 +1453,14 @@ export async function runEmbeddedAttempt( }), }); } + const removeLoopContextGuard = removeToolResultContextGuard; + const removeHistoryImagePruneContextTransform = installHistoryImagePruneContextTransform( + activeSession.agent, + ); + removeToolResultContextGuard = () => { + removeHistoryImagePruneContextTransform(); + removeLoopContextGuard?.(); + }; const cacheTrace = createCacheTrace({ cfg: params.config, env: process.env, @@ -2216,9 +2227,11 @@ export async function runEmbeddedAttempt( trigger: params.trigger, channelId: params.messageChannel ?? params.messageProvider ?? undefined, }; + const promptBuildMessages = + pruneProcessedHistoryImages(activeSession.messages) ?? activeSession.messages; const hookResult = await resolvePromptBuildHookResult({ prompt: params.prompt, - messages: activeSession.messages, + messages: promptBuildMessages, hookCtx, hookRunner, legacyBeforeAgentStartResult: params.legacyBeforeAgentStartResult, @@ -2362,14 +2375,6 @@ export async function runEmbeddedAttempt( : undefined; try { - // Idempotent cleanup: prune old image blocks to limit context - // growth. Only mutates turns older than a few assistant replies; - // the delay also reduces prompt-cache churn. - const didPruneImages = pruneProcessedHistoryImages(activeSession.messages); - if (didPruneImages) { - activeSession.agent.state.messages = activeSession.messages; - } - const filteredMessages = filterHeartbeatPairs( activeSession.messages, heartbeatSummary?.ackMaxChars, diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts index 25489866208..820e0c482c9 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts @@ -2,7 +2,12 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ImageContent } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; import { castAgentMessage } from "../../test-helpers/agent-message-fixtures.js"; -import { PRUNED_HISTORY_IMAGE_MARKER, pruneProcessedHistoryImages } from "./history-image-prune.js"; +import { + PRUNED_HISTORY_IMAGE_MARKER, + PRUNED_HISTORY_MEDIA_REFERENCE_MARKER, + installHistoryImagePruneContextTransform, + pruneProcessedHistoryImages, +} from "./history-image-prune.js"; function expectArrayMessageContent( message: AgentMessage | undefined, @@ -18,22 +23,37 @@ function expectPrunedImageMessage( messages: AgentMessage[], errorMessage: string, ): Array<{ type: string; text?: string; data?: string }> { - const didMutate = pruneProcessedHistoryImages(messages); - expect(didMutate).toBe(true); - const content = expectArrayMessageContent(messages[0], errorMessage); + const pruned = pruneProcessedHistoryImages(messages); + expect(pruned).not.toBeNull(); + expect(pruned).not.toBe(messages); + const content = expectArrayMessageContent(pruned?.[0], errorMessage); expect(content).toHaveLength(2); expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); return content; } function expectImageMessagePreserved(messages: AgentMessage[], errorMessage: string) { - const didMutate = pruneProcessedHistoryImages(messages); + const pruned = pruneProcessedHistoryImages(messages); - expect(didMutate).toBe(false); + expect(pruned).toBeNull(); const content = expectArrayMessageContent(messages[0], errorMessage); expect(content[1]).toMatchObject({ type: "image", data: "abc" }); } +function oldEnoughTail(): AgentMessage[] { + const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" }); + const userText = () => castAgentMessage({ role: "user", content: "more" }); + return [ + assistantTurn(), + userText(), + assistantTurn(), + userText(), + assistantTurn(), + userText(), + assistantTurn(), + ]; +} + describe("pruneProcessedHistoryImages", () => { const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" }; const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" }); @@ -58,6 +78,90 @@ describe("pruneProcessedHistoryImages", () => { expect(content[0]?.type).toBe("text"); }); + it("scrubs old media attachment markers from text blocks", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "user", + content: [ + { + type: "text", + text: [ + "old image", + "[media attached: media://inbound/old.png]", + "[media attached 1/2: /tmp/old photo.jpeg (image/jpeg) | https://example.test/img]", + "[Image: source: /Users/me/Pictures/old.jpg]", + ].join("\n"), + }, + { ...image }, + ], + }), + ...oldEnoughTail(), + ]; + + const pruned = pruneProcessedHistoryImages(messages); + + expect(pruned).not.toBeNull(); + const content = expectArrayMessageContent(pruned?.[0], "expected user array content"); + expect(content[0]?.text).toBe( + [ + "old image", + PRUNED_HISTORY_MEDIA_REFERENCE_MARKER, + PRUNED_HISTORY_MEDIA_REFERENCE_MARKER, + PRUNED_HISTORY_MEDIA_REFERENCE_MARKER, + ].join("\n"), + ); + expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); + const originalContent = expectArrayMessageContent( + messages[0], + "expected original user content", + ); + expect(originalContent[0]?.text).toContain("[media attached: media://inbound/old.png]"); + expect(originalContent[1]).toMatchObject({ type: "image", data: "abc" }); + }); + + it("scrubs old media attachment markers from string content without image blocks", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "user", + content: "please remember [media attached: media://inbound/stale-image.png]", + }), + ...oldEnoughTail(), + ]; + + const pruned = pruneProcessedHistoryImages(messages); + + expect(pruned).not.toBeNull(); + const firstUser = pruned?.[0] as Extract | undefined; + expect(firstUser?.content).toBe(`please remember ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER}`); + const originalUser = messages[0] as Extract | undefined; + expect(originalUser?.content).toBe( + "please remember [media attached: media://inbound/stale-image.png]", + ); + }); + + it("scrubs bare old inbound media URIs from tool results", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "toolResult", + toolName: "memory_search", + content: "previous media://inbound/stale-screenshot.png result", + }), + ...oldEnoughTail(), + ]; + + const pruned = pruneProcessedHistoryImages(messages); + + expect(pruned).not.toBeNull(); + const toolResult = pruned?.[0] as Extract | undefined; + expect(toolResult?.content).toBe(`previous ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER} result`); + const originalToolResult = messages[0] as + | Extract + | undefined; + expect(originalToolResult?.content).toBe( + "previous media://inbound/stale-screenshot.png result", + ); + }); + it("keeps image blocks that belong to the third-most-recent assistant turn", () => { const messages: AgentMessage[] = [ castAgentMessage({ @@ -74,6 +178,33 @@ describe("pruneProcessedHistoryImages", () => { expectImageMessagePreserved(messages, "expected user array content"); }); + it("preserves recent media attachment markers", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ + role: "user", + content: [ + { + type: "text", + text: "recent [media attached: media://inbound/current.png]", + }, + { ...image }, + ], + }), + assistantTurn(), + userText(), + assistantTurn(), + userText(), + assistantTurn(), + ]; + + const pruned = pruneProcessedHistoryImages(messages); + + expect(pruned).toBeNull(); + const content = expectArrayMessageContent(messages[0], "expected user array content"); + expect(content[0]?.text).toBe("recent [media attached: media://inbound/current.png]"); + expect(content[1]).toMatchObject({ type: "image", data: "abc" }); + }); + it("does not count multiple assistant messages from one tool loop as separate turns", () => { const messages: AgentMessage[] = [ castAgentMessage({ @@ -108,9 +239,9 @@ describe("pruneProcessedHistoryImages", () => { }), ]; - const didMutate = pruneProcessedHistoryImages(messages); + const pruned = pruneProcessedHistoryImages(messages); - expect(didMutate).toBe(false); + expect(pruned).toBeNull(); const content = expectArrayMessageContent(messages[0], "expected user array content"); expect(content).toHaveLength(2); expect(content[1]).toMatchObject({ type: "image", data: "abc" }); @@ -153,14 +284,20 @@ describe("pruneProcessedHistoryImages", () => { assistantTurn(), ]; - const didMutate = pruneProcessedHistoryImages(messages); - expect(didMutate).toBe(true); + const pruned = pruneProcessedHistoryImages(messages); + expect(pruned).not.toBeNull(); - const oldContent = expectArrayMessageContent(messages[0], "expected old user content"); + const oldContent = expectArrayMessageContent(pruned?.[0], "expected old user content"); expect(oldContent[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); - const recentContent = expectArrayMessageContent(messages[6], "expected recent user content"); + const recentContent = expectArrayMessageContent(pruned?.[6], "expected recent user content"); expect(recentContent[1]).toMatchObject({ type: "image", data: "abc" }); + + const originalOldContent = expectArrayMessageContent( + messages[0], + "expected original old user content", + ); + expect(originalOldContent[1]).toMatchObject({ type: "image", data: "abc" }); }); it("does not change messages when no assistant turn exists", () => { @@ -171,10 +308,65 @@ describe("pruneProcessedHistoryImages", () => { }), ]; - const didMutate = pruneProcessedHistoryImages(messages); + const pruned = pruneProcessedHistoryImages(messages); - expect(didMutate).toBe(false); + expect(pruned).toBeNull(); const firstUser = messages[0] as Extract | undefined; expect(firstUser?.content).toBe("noop"); }); }); + +describe("installHistoryImagePruneContextTransform", () => { + const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" }; + + it("prunes the provider replay view after an existing context transform", async () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "fresh prompt" }), + ...oldEnoughTail(), + ]; + const transformedMessages: AgentMessage[] = [ + castAgentMessage({ + role: "user", + content: [ + { + type: "text", + text: "stale [media attached: media://inbound/old.png]", + }, + { ...image }, + ], + }), + ...oldEnoughTail(), + ]; + const originalTransformContext = async ( + inputMessages: AgentMessage[], + _signal?: AbortSignal, + ) => { + expect(inputMessages).toBe(messages); + return transformedMessages; + }; + const agent = { transformContext: originalTransformContext }; + + const restore = installHistoryImagePruneContextTransform(agent); + const replayMessages = await agent.transformContext(messages, new AbortController().signal); + + expect(replayMessages).not.toBe(transformedMessages); + const replayContent = expectArrayMessageContent( + replayMessages[0], + "expected replay user array content", + ); + expect(replayContent[0]?.text).toBe(`stale ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER}`); + expect(replayContent[1]).toMatchObject({ + type: "text", + text: PRUNED_HISTORY_IMAGE_MARKER, + }); + const originalContent = expectArrayMessageContent( + transformedMessages[0], + "expected original transformed content", + ); + expect(originalContent[0]?.text).toContain("media://inbound/old.png"); + expect(originalContent[1]).toMatchObject({ type: "image", data: "abc" }); + + restore(); + expect(agent.transformContext).toBe(originalTransformContext); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.ts b/src/agents/pi-embedded-runner/run/history-image-prune.ts index 1a509b793e0..20cc6fbf9da 100644 --- a/src/agents/pi-embedded-runner/run/history-image-prune.ts +++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts @@ -1,6 +1,19 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]"; +export const PRUNED_HISTORY_MEDIA_REFERENCE_MARKER = + "[media reference removed - already processed by model]"; + +const MEDIA_ATTACHED_HISTORY_REF_PATTERN = /\[media attached(?:\s+\d+\/\d+)?:\s*[^\]]+\]/gi; +const MESSAGE_IMAGE_HISTORY_REF_PATTERN = /\[Image:\s*source:\s*[^\]]+\]/gi; +const INBOUND_MEDIA_URI_HISTORY_REF_PATTERN = /\bmedia:\/\/inbound\/[^\]\s/\\]+/g; + +type PrunableContextAgent = { + transformContext?: ( + messages: AgentMessage[], + signal?: AbortSignal, + ) => AgentMessage[] | Promise; +}; /** * Number of most-recent completed turns whose preceding user/toolResult image @@ -45,43 +58,106 @@ function resolvePruneBeforeIndex(messages: AgentMessage[]): number { return completedTurnStarts[completedTurnStarts.length - PRESERVE_RECENT_COMPLETED_TURNS]; } +function pruneHistoryMediaReferenceText(text: string): string { + return text + .replace(MEDIA_ATTACHED_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER) + .replace(MESSAGE_IMAGE_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER) + .replace(INBOUND_MEDIA_URI_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER); +} + +function cloneMessageWithContent( + message: Extract, + content: typeof message.content, +): AgentMessage { + return { ...message, content } as AgentMessage; +} + /** * Idempotent cleanup: prune persisted image blocks from completed turns older * than {@link PRESERVE_RECENT_COMPLETED_TURNS}. The delay also reduces * prompt-cache churn, though prefix stability additionally depends on the - * replay sanitizer being idempotent. + * replay sanitizer being idempotent. Textual media markers are scrubbed on the + * same boundary because detectAndLoadPromptImages treats them as fresh prompt + * image references when old history is replayed into a later prompt. */ -export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean { +export function pruneProcessedHistoryImages(messages: AgentMessage[]): AgentMessage[] | null { const pruneBeforeIndex = resolvePruneBeforeIndex(messages); if (pruneBeforeIndex < 0) { - return false; + return null; } - let didMutate = false; + let prunedMessages: AgentMessage[] | null = null; for (let i = 0; i < pruneBeforeIndex; i++) { const message = messages[i]; - if ( - !message || - (message.role !== "user" && message.role !== "toolResult") || - !Array.isArray(message.content) - ) { + if (!message || (message.role !== "user" && message.role !== "toolResult")) { continue; } + + if (typeof message.content === "string") { + const prunedText = pruneHistoryMediaReferenceText(message.content); + if (prunedText !== message.content) { + prunedMessages ??= messages.slice(); + prunedMessages[i] = cloneMessageWithContent(message, prunedText); + } + continue; + } + + if (!Array.isArray(message.content)) { + continue; + } + for (let j = 0; j < message.content.length; j++) { const block = message.content[j]; if (!block || typeof block !== "object") { continue; } - if ((block as { type?: string }).type !== "image") { + const blockType = (block as { type?: string }).type; + if (blockType === "text" && typeof (block as { text?: unknown }).text === "string") { + const text = (block as { text: string }).text; + const prunedText = pruneHistoryMediaReferenceText(text); + if (prunedText !== text) { + prunedMessages ??= messages.slice(); + const baseMessage = prunedMessages[i]; + const baseContent = + baseMessage && "content" in baseMessage && Array.isArray(baseMessage.content) + ? baseMessage.content + : message.content; + const nextContent = baseContent.slice() as typeof message.content; + nextContent[j] = { ...block, text: prunedText } as (typeof message.content)[number]; + prunedMessages[i] = cloneMessageWithContent(message, nextContent); + } continue; } - message.content[j] = { - type: "text", - text: PRUNED_HISTORY_IMAGE_MARKER, - } as (typeof message.content)[number]; - didMutate = true; + if (blockType === "image") { + prunedMessages ??= messages.slice(); + const baseMessage = prunedMessages[i]; + const baseContent = + baseMessage && "content" in baseMessage && Array.isArray(baseMessage.content) + ? baseMessage.content + : message.content; + const nextContent = baseContent.slice() as typeof message.content; + nextContent[j] = { + type: "text", + text: PRUNED_HISTORY_IMAGE_MARKER, + } as (typeof message.content)[number]; + prunedMessages[i] = cloneMessageWithContent(message, nextContent); + } } } - return didMutate; + return prunedMessages; +} + +export function installHistoryImagePruneContextTransform(agent: PrunableContextAgent): () => void { + const originalTransformContext = agent.transformContext; + agent.transformContext = async (messages: AgentMessage[], signal?: AbortSignal) => { + const transformed = originalTransformContext + ? await originalTransformContext.call(agent, messages, signal) + : messages; + const sourceMessages = Array.isArray(transformed) ? transformed : messages; + return pruneProcessedHistoryImages(sourceMessages) ?? sourceMessages; + }; + return () => { + agent.transformContext = originalTransformContext; + }; }