fix: preserve image history while pruning replay context

This commit is contained in:
Peter Steinberger
2026-04-26 03:10:10 +01:00
parent 69e7e499b1
commit 2d9a0d9cf0
6 changed files with 334 additions and 47 deletions

View File

@@ -69,7 +69,12 @@ Docs: https://docs.openclaw.ai
- ACP: wait for the configured runtime backend to become healthy before startup identity reconciliation, avoiding transient acpx warnings during Gateway boot. Fixes #40566.
- Channels/ACP bindings: time out configured binding readiness checks instead of letting Discord preflight hang forever when an ACP target never settles. Fixes #68776.
- Control UI: hide the chat loading skeleton during background history reloads when existing messages or active stream content are already visible, avoiding reload flashes on high-latency local gateways. Fixes #71844. Thanks @WolvenRA.
- CLI/status: label the OpenClaw Serve/Funnel setting as `Tailscale exposure` and show daemon state separately when available, so `gateway.tailscale.mode:
- Agents/images: scrub old `[media attached: ...]`, `[Image: source: ...]`,
and `media://inbound/...` markers from pruned model replay context so stale
media refs are not rehydrated as fresh prompt images. Fixes #71868. Thanks
@jmeadlock.
- CLI/status: label the OpenClaw Serve/Funnel setting as `Tailscale exposure`
and show daemon state separately when available, so `gateway.tailscale.mode:
"off"` no longer reads like the Tailscale daemon is stopped. Fixes #71790.
Thanks @pesvobodak.
- Plugins/Bonjour: stop ciao mDNS watchdog failures from looping forever when the advertiser stays stuck in `probing` or `announcing`; Bonjour now disables itself for the current Gateway process after repeated failed restarts while the Gateway keeps running. Fixes #69011. Thanks @siddharthaagarwalofficial-ux, @FiredMosquito831, and @spikefcz.

View File

@@ -35,15 +35,23 @@ cache-write size, directly lowering cost.
## Legacy image cleanup
OpenClaw also runs a separate idempotent cleanup for older legacy sessions that
persisted raw image blocks in history.
OpenClaw also builds a separate idempotent replay view for sessions that
persist raw image blocks or prompt-hydration media markers in history.
- It preserves the **3 most recent completed turns** byte-for-byte so prompt
cache prefixes for recent follow-ups stay stable.
- Older already-processed image blocks in `user` or `toolResult` history can be
replaced with `[image data removed - already processed by model]`.
- In the replay view, older already-processed image blocks from `user` or
`toolResult` history can be replaced with
`[image data removed - already processed by model]`.
- Older textual media references such as `[media attached: ...]`,
`[Image: source: ...]`, and `media://inbound/...` can be replaced with
`[media reference removed - already processed by model]`. Current-turn
attachment markers stay intact so vision models can still hydrate fresh
images.
- The raw session transcript is not rewritten, so history viewers can still
render the original message entries and their images.
- This is separate from normal cache-TTL pruning. It exists to stop repeated
image payloads from busting prompt caches on later turns.
image payloads or stale media refs from busting prompt caches on later turns.
## Smart defaults

View File

@@ -658,7 +658,8 @@ vi.mock("./compaction-timeout.js", () => ({
}));
vi.mock("./history-image-prune.js", () => ({
pruneProcessedHistoryImages: <T>(messages: T) => messages,
installHistoryImagePruneContextTransform: () => () => {},
pruneProcessedHistoryImages: () => null,
}));
export type MutableSession = {

View File

@@ -297,7 +297,10 @@ import {
selectCompactionTimeoutSnapshot,
shouldFlagCompactionTimeout,
} from "./compaction-timeout.js";
import { pruneProcessedHistoryImages } from "./history-image-prune.js";
import {
installHistoryImagePruneContextTransform,
pruneProcessedHistoryImages,
} from "./history-image-prune.js";
import { detectAndLoadPromptImages } from "./images.js";
import { buildAttemptReplayMetadata } from "./incomplete-turn.js";
import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js";
@@ -1450,6 +1453,14 @@ export async function runEmbeddedAttempt(
}),
});
}
const removeLoopContextGuard = removeToolResultContextGuard;
const removeHistoryImagePruneContextTransform = installHistoryImagePruneContextTransform(
activeSession.agent,
);
removeToolResultContextGuard = () => {
removeHistoryImagePruneContextTransform();
removeLoopContextGuard?.();
};
const cacheTrace = createCacheTrace({
cfg: params.config,
env: process.env,
@@ -2216,9 +2227,11 @@ export async function runEmbeddedAttempt(
trigger: params.trigger,
channelId: params.messageChannel ?? params.messageProvider ?? undefined,
};
const promptBuildMessages =
pruneProcessedHistoryImages(activeSession.messages) ?? activeSession.messages;
const hookResult = await resolvePromptBuildHookResult({
prompt: params.prompt,
messages: activeSession.messages,
messages: promptBuildMessages,
hookCtx,
hookRunner,
legacyBeforeAgentStartResult: params.legacyBeforeAgentStartResult,
@@ -2362,14 +2375,6 @@ export async function runEmbeddedAttempt(
: undefined;
try {
// Idempotent cleanup: prune old image blocks to limit context
// growth. Only mutates turns older than a few assistant replies;
// the delay also reduces prompt-cache churn.
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
if (didPruneImages) {
activeSession.agent.state.messages = activeSession.messages;
}
const filteredMessages = filterHeartbeatPairs(
activeSession.messages,
heartbeatSummary?.ackMaxChars,

View File

@@ -2,7 +2,12 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { castAgentMessage } from "../../test-helpers/agent-message-fixtures.js";
import { PRUNED_HISTORY_IMAGE_MARKER, pruneProcessedHistoryImages } from "./history-image-prune.js";
import {
PRUNED_HISTORY_IMAGE_MARKER,
PRUNED_HISTORY_MEDIA_REFERENCE_MARKER,
installHistoryImagePruneContextTransform,
pruneProcessedHistoryImages,
} from "./history-image-prune.js";
function expectArrayMessageContent(
message: AgentMessage | undefined,
@@ -18,22 +23,37 @@ function expectPrunedImageMessage(
messages: AgentMessage[],
errorMessage: string,
): Array<{ type: string; text?: string; data?: string }> {
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(true);
const content = expectArrayMessageContent(messages[0], errorMessage);
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).not.toBeNull();
expect(pruned).not.toBe(messages);
const content = expectArrayMessageContent(pruned?.[0], errorMessage);
expect(content).toHaveLength(2);
expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
return content;
}
function expectImageMessagePreserved(messages: AgentMessage[], errorMessage: string) {
const didMutate = pruneProcessedHistoryImages(messages);
const pruned = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
expect(pruned).toBeNull();
const content = expectArrayMessageContent(messages[0], errorMessage);
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
}
function oldEnoughTail(): AgentMessage[] {
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
const userText = () => castAgentMessage({ role: "user", content: "more" });
return [
assistantTurn(),
userText(),
assistantTurn(),
userText(),
assistantTurn(),
userText(),
assistantTurn(),
];
}
describe("pruneProcessedHistoryImages", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
const assistantTurn = () => castAgentMessage({ role: "assistant", content: "ack" });
@@ -58,6 +78,90 @@ describe("pruneProcessedHistoryImages", () => {
expect(content[0]?.type).toBe("text");
});
it("scrubs old media attachment markers from text blocks", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [
{
type: "text",
text: [
"old image",
"[media attached: media://inbound/old.png]",
"[media attached 1/2: /tmp/old photo.jpeg (image/jpeg) | https://example.test/img]",
"[Image: source: /Users/me/Pictures/old.jpg]",
].join("\n"),
},
{ ...image },
],
}),
...oldEnoughTail(),
];
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).not.toBeNull();
const content = expectArrayMessageContent(pruned?.[0], "expected user array content");
expect(content[0]?.text).toBe(
[
"old image",
PRUNED_HISTORY_MEDIA_REFERENCE_MARKER,
PRUNED_HISTORY_MEDIA_REFERENCE_MARKER,
PRUNED_HISTORY_MEDIA_REFERENCE_MARKER,
].join("\n"),
);
expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
const originalContent = expectArrayMessageContent(
messages[0],
"expected original user content",
);
expect(originalContent[0]?.text).toContain("[media attached: media://inbound/old.png]");
expect(originalContent[1]).toMatchObject({ type: "image", data: "abc" });
});
it("scrubs old media attachment markers from string content without image blocks", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: "please remember [media attached: media://inbound/stale-image.png]",
}),
...oldEnoughTail(),
];
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).not.toBeNull();
const firstUser = pruned?.[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(firstUser?.content).toBe(`please remember ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER}`);
const originalUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(originalUser?.content).toBe(
"please remember [media attached: media://inbound/stale-image.png]",
);
});
it("scrubs bare old inbound media URIs from tool results", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "toolResult",
toolName: "memory_search",
content: "previous media://inbound/stale-screenshot.png result",
}),
...oldEnoughTail(),
];
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).not.toBeNull();
const toolResult = pruned?.[0] as Extract<AgentMessage, { role: "toolResult" }> | undefined;
expect(toolResult?.content).toBe(`previous ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER} result`);
const originalToolResult = messages[0] as
| Extract<AgentMessage, { role: "toolResult" }>
| undefined;
expect(originalToolResult?.content).toBe(
"previous media://inbound/stale-screenshot.png result",
);
});
it("keeps image blocks that belong to the third-most-recent assistant turn", () => {
const messages: AgentMessage[] = [
castAgentMessage({
@@ -74,6 +178,33 @@ describe("pruneProcessedHistoryImages", () => {
expectImageMessagePreserved(messages, "expected user array content");
});
it("preserves recent media attachment markers", () => {
const messages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [
{
type: "text",
text: "recent [media attached: media://inbound/current.png]",
},
{ ...image },
],
}),
assistantTurn(),
userText(),
assistantTurn(),
userText(),
assistantTurn(),
];
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).toBeNull();
const content = expectArrayMessageContent(messages[0], "expected user array content");
expect(content[0]?.text).toBe("recent [media attached: media://inbound/current.png]");
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not count multiple assistant messages from one tool loop as separate turns", () => {
const messages: AgentMessage[] = [
castAgentMessage({
@@ -108,9 +239,9 @@ describe("pruneProcessedHistoryImages", () => {
}),
];
const didMutate = pruneProcessedHistoryImages(messages);
const pruned = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
expect(pruned).toBeNull();
const content = expectArrayMessageContent(messages[0], "expected user array content");
expect(content).toHaveLength(2);
expect(content[1]).toMatchObject({ type: "image", data: "abc" });
@@ -153,14 +284,20 @@ describe("pruneProcessedHistoryImages", () => {
assistantTurn(),
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(true);
const pruned = pruneProcessedHistoryImages(messages);
expect(pruned).not.toBeNull();
const oldContent = expectArrayMessageContent(messages[0], "expected old user content");
const oldContent = expectArrayMessageContent(pruned?.[0], "expected old user content");
expect(oldContent[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
const recentContent = expectArrayMessageContent(messages[6], "expected recent user content");
const recentContent = expectArrayMessageContent(pruned?.[6], "expected recent user content");
expect(recentContent[1]).toMatchObject({ type: "image", data: "abc" });
const originalOldContent = expectArrayMessageContent(
messages[0],
"expected original old user content",
);
expect(originalOldContent[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not change messages when no assistant turn exists", () => {
@@ -171,10 +308,65 @@ describe("pruneProcessedHistoryImages", () => {
}),
];
const didMutate = pruneProcessedHistoryImages(messages);
const pruned = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
expect(pruned).toBeNull();
const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(firstUser?.content).toBe("noop");
});
});
describe("installHistoryImagePruneContextTransform", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
it("prunes the provider replay view after an existing context transform", async () => {
const messages: AgentMessage[] = [
castAgentMessage({ role: "user", content: "fresh prompt" }),
...oldEnoughTail(),
];
const transformedMessages: AgentMessage[] = [
castAgentMessage({
role: "user",
content: [
{
type: "text",
text: "stale [media attached: media://inbound/old.png]",
},
{ ...image },
],
}),
...oldEnoughTail(),
];
const originalTransformContext = async (
inputMessages: AgentMessage[],
_signal?: AbortSignal,
) => {
expect(inputMessages).toBe(messages);
return transformedMessages;
};
const agent = { transformContext: originalTransformContext };
const restore = installHistoryImagePruneContextTransform(agent);
const replayMessages = await agent.transformContext(messages, new AbortController().signal);
expect(replayMessages).not.toBe(transformedMessages);
const replayContent = expectArrayMessageContent(
replayMessages[0],
"expected replay user array content",
);
expect(replayContent[0]?.text).toBe(`stale ${PRUNED_HISTORY_MEDIA_REFERENCE_MARKER}`);
expect(replayContent[1]).toMatchObject({
type: "text",
text: PRUNED_HISTORY_IMAGE_MARKER,
});
const originalContent = expectArrayMessageContent(
transformedMessages[0],
"expected original transformed content",
);
expect(originalContent[0]?.text).toContain("media://inbound/old.png");
expect(originalContent[1]).toMatchObject({ type: "image", data: "abc" });
restore();
expect(agent.transformContext).toBe(originalTransformContext);
});
});

View File

@@ -1,6 +1,19 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
export const PRUNED_HISTORY_MEDIA_REFERENCE_MARKER =
"[media reference removed - already processed by model]";
const MEDIA_ATTACHED_HISTORY_REF_PATTERN = /\[media attached(?:\s+\d+\/\d+)?:\s*[^\]]+\]/gi;
const MESSAGE_IMAGE_HISTORY_REF_PATTERN = /\[Image:\s*source:\s*[^\]]+\]/gi;
const INBOUND_MEDIA_URI_HISTORY_REF_PATTERN = /\bmedia:\/\/inbound\/[^\]\s/\\]+/g;
type PrunableContextAgent = {
transformContext?: (
messages: AgentMessage[],
signal?: AbortSignal,
) => AgentMessage[] | Promise<AgentMessage[]>;
};
/**
* Number of most-recent completed turns whose preceding user/toolResult image
@@ -45,43 +58,106 @@ function resolvePruneBeforeIndex(messages: AgentMessage[]): number {
return completedTurnStarts[completedTurnStarts.length - PRESERVE_RECENT_COMPLETED_TURNS];
}
function pruneHistoryMediaReferenceText(text: string): string {
return text
.replace(MEDIA_ATTACHED_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER)
.replace(MESSAGE_IMAGE_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER)
.replace(INBOUND_MEDIA_URI_HISTORY_REF_PATTERN, PRUNED_HISTORY_MEDIA_REFERENCE_MARKER);
}
function cloneMessageWithContent(
message: Extract<AgentMessage, { role: "user" | "toolResult" }>,
content: typeof message.content,
): AgentMessage {
return { ...message, content } as AgentMessage;
}
/**
* Idempotent cleanup: prune persisted image blocks from completed turns older
* than {@link PRESERVE_RECENT_COMPLETED_TURNS}. The delay also reduces
* prompt-cache churn, though prefix stability additionally depends on the
* replay sanitizer being idempotent.
* replay sanitizer being idempotent. Textual media markers are scrubbed on the
* same boundary because detectAndLoadPromptImages treats them as fresh prompt
* image references when old history is replayed into a later prompt.
*/
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
export function pruneProcessedHistoryImages(messages: AgentMessage[]): AgentMessage[] | null {
const pruneBeforeIndex = resolvePruneBeforeIndex(messages);
if (pruneBeforeIndex < 0) {
return false;
return null;
}
let didMutate = false;
let prunedMessages: AgentMessage[] | null = null;
for (let i = 0; i < pruneBeforeIndex; i++) {
const message = messages[i];
if (
!message ||
(message.role !== "user" && message.role !== "toolResult") ||
!Array.isArray(message.content)
) {
if (!message || (message.role !== "user" && message.role !== "toolResult")) {
continue;
}
if (typeof message.content === "string") {
const prunedText = pruneHistoryMediaReferenceText(message.content);
if (prunedText !== message.content) {
prunedMessages ??= messages.slice();
prunedMessages[i] = cloneMessageWithContent(message, prunedText);
}
continue;
}
if (!Array.isArray(message.content)) {
continue;
}
for (let j = 0; j < message.content.length; j++) {
const block = message.content[j];
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: string }).type !== "image") {
const blockType = (block as { type?: string }).type;
if (blockType === "text" && typeof (block as { text?: unknown }).text === "string") {
const text = (block as { text: string }).text;
const prunedText = pruneHistoryMediaReferenceText(text);
if (prunedText !== text) {
prunedMessages ??= messages.slice();
const baseMessage = prunedMessages[i];
const baseContent =
baseMessage && "content" in baseMessage && Array.isArray(baseMessage.content)
? baseMessage.content
: message.content;
const nextContent = baseContent.slice() as typeof message.content;
nextContent[j] = { ...block, text: prunedText } as (typeof message.content)[number];
prunedMessages[i] = cloneMessageWithContent(message, nextContent);
}
continue;
}
message.content[j] = {
type: "text",
text: PRUNED_HISTORY_IMAGE_MARKER,
} as (typeof message.content)[number];
didMutate = true;
if (blockType === "image") {
prunedMessages ??= messages.slice();
const baseMessage = prunedMessages[i];
const baseContent =
baseMessage && "content" in baseMessage && Array.isArray(baseMessage.content)
? baseMessage.content
: message.content;
const nextContent = baseContent.slice() as typeof message.content;
nextContent[j] = {
type: "text",
text: PRUNED_HISTORY_IMAGE_MARKER,
} as (typeof message.content)[number];
prunedMessages[i] = cloneMessageWithContent(message, nextContent);
}
}
}
return didMutate;
return prunedMessages;
}
export function installHistoryImagePruneContextTransform(agent: PrunableContextAgent): () => void {
const originalTransformContext = agent.transformContext;
agent.transformContext = async (messages: AgentMessage[], signal?: AbortSignal) => {
const transformed = originalTransformContext
? await originalTransformContext.call(agent, messages, signal)
: messages;
const sourceMessages = Array.isArray(transformed) ? transformed : messages;
return pruneProcessedHistoryImages(sourceMessages) ?? sourceMessages;
};
return () => {
agent.transformContext = originalTransformContext;
};
}