refactor(pi): extract history image prune helpers

This commit is contained in:
Peter Steinberger
2026-02-26 16:44:47 +01:00
parent 57334cd7d8
commit 75ed72e807
5 changed files with 149 additions and 132 deletions

View File

@@ -1,76 +1,11 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../../config/config.js";
import {
PRUNED_HISTORY_IMAGE_MARKER,
pruneProcessedHistoryImages,
resolveAttemptFsWorkspaceOnly,
resolvePromptBuildHookResult,
resolvePromptModeForSession,
} from "./attempt.js";
describe("pruneProcessedHistoryImages", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
it("prunes image blocks from user messages that already have assistant replies", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
} as AgentMessage,
{
role: "assistant",
content: "got it",
} as unknown as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(true);
const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(Array.isArray(firstUser?.content)).toBe(true);
const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>;
expect(content).toHaveLength(2);
expect(content[0]?.type).toBe("text");
expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
});
it("does not prune latest user message when no assistant response exists yet", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
} as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
const first = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
if (!first || !Array.isArray(first.content)) {
throw new Error("expected array content");
}
expect(first.content).toHaveLength(2);
expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not change messages when no assistant turn exists", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: "noop",
} as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(firstUser?.content).toBe("noop");
});
});
describe("resolvePromptBuildHookResult", () => {
function createLegacyOnlyHookRunner() {
return {

View File

@@ -112,6 +112,7 @@ import {
selectCompactionTimeoutSnapshot,
shouldFlagCompactionTimeout,
} from "./compaction-timeout.js";
import { pruneProcessedHistoryImages } from "./history-image-prune.js";
import { detectAndLoadPromptImages } from "./images.js";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
@@ -127,48 +128,6 @@ type PromptBuildHookRunner = {
) => Promise<PluginHookBeforeAgentStartResult | undefined>;
};
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
/**
* Prunes image blocks from user messages that already have an assistant response after them.
* This is a one-way cleanup for previously persisted history image data.
*/
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
let lastAssistantIndex = -1;
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i]?.role === "assistant") {
lastAssistantIndex = i;
break;
}
}
if (lastAssistantIndex < 0) {
return false;
}
let didMutate = false;
for (let i = 0; i < lastAssistantIndex; i++) {
const message = messages[i];
if (!message || message.role !== "user" || !Array.isArray(message.content)) {
continue;
}
for (let j = 0; j < message.content.length; j++) {
const block = message.content[j];
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: string }).type !== "image") {
continue;
}
message.content[j] = {
type: "text",
text: PRUNED_HISTORY_IMAGE_MARKER,
} as (typeof message.content)[number];
didMutate = true;
}
}
return didMutate;
}
export async function resolvePromptBuildHookResult(params: {
prompt: string;
messages: unknown[];
@@ -1085,8 +1044,8 @@ export async function runEmbeddedAttempt(
}
try {
// One-time migration: prune image blocks from already-processed user turns.
// This prevents old persisted base64 payloads from bloating subsequent prompts.
// Idempotent cleanup for legacy sessions with persisted image payloads.
// Called each run; only mutates already-answered user turns that still carry image blocks.
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
if (didPruneImages) {
activeSession.agent.replaceMessages(activeSession.messages);

View File

@@ -0,0 +1,65 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { PRUNED_HISTORY_IMAGE_MARKER, pruneProcessedHistoryImages } from "./history-image-prune.js";
describe("pruneProcessedHistoryImages", () => {
const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
it("prunes image blocks from user messages that already have assistant replies", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
} as AgentMessage,
{
role: "assistant",
content: "got it",
} as unknown as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(true);
const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(Array.isArray(firstUser?.content)).toBe(true);
const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>;
expect(content).toHaveLength(2);
expect(content[0]?.type).toBe("text");
expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
});
it("does not prune latest user message when no assistant response exists yet", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
} as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
const first = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
if (!first || !Array.isArray(first.content)) {
throw new Error("expected array content");
}
expect(first.content).toHaveLength(2);
expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
});
it("does not change messages when no assistant turn exists", () => {
const messages: AgentMessage[] = [
{
role: "user",
content: "noop",
} as AgentMessage,
];
const didMutate = pruneProcessedHistoryImages(messages);
expect(didMutate).toBe(false);
const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
expect(firstUser?.content).toBe("noop");
});
});

View File

@@ -0,0 +1,44 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
/**
* Idempotent cleanup for legacy sessions that persisted image blocks in history.
* Called each run; mutates only user turns that already have an assistant reply.
*/
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
let lastAssistantIndex = -1;
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i]?.role === "assistant") {
lastAssistantIndex = i;
break;
}
}
if (lastAssistantIndex < 0) {
return false;
}
let didMutate = false;
for (let i = 0; i < lastAssistantIndex; i++) {
const message = messages[i];
if (!message || message.role !== "user" || !Array.isArray(message.content)) {
continue;
}
for (let j = 0; j < message.content.length; j++) {
const block = message.content[j];
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: string }).type !== "image") {
continue;
}
message.content[j] = {
type: "text",
text: PRUNED_HISTORY_IMAGE_MARKER,
} as (typeof message.content)[number];
didMutate = true;
}
}
return didMutate;
}

View File

@@ -13,18 +13,36 @@ import { log } from "../logger.js";
/**
* Common image file extensions for detection.
*/
const IMAGE_EXTENSIONS = new Set([
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tiff",
".tif",
".heic",
".heif",
]);
const IMAGE_EXTENSION_NAMES = [
"png",
"jpg",
"jpeg",
"gif",
"webp",
"bmp",
"tiff",
"tif",
"heic",
"heif",
] as const;
const IMAGE_EXTENSIONS = new Set(IMAGE_EXTENSION_NAMES.map((ext) => `.${ext}`));
const IMAGE_EXTENSION_PATTERN = IMAGE_EXTENSION_NAMES.join("|");
const MEDIA_ATTACHED_PATH_PATTERN = new RegExp(
`^\\s*(.+?\\.(?:${IMAGE_EXTENSION_PATTERN}))\\s*(?:\\(|$|\\|)`,
"i",
);
const MESSAGE_IMAGE_PATTERN = new RegExp(
`\\[Image:\\s*source:\\s*([^\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN}))\\]`,
"gi",
);
const FILE_URL_PATTERN = new RegExp(
`file://[^\\s<>"'\\\`\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN})`,
"gi",
);
const PATH_PATTERN = new RegExp(
`(?:^|\\s|["'\\\`(])((\\.\\.?/|[~/])[^\\s"'\\\`()\\[\\]]*\\.(?:${IMAGE_EXTENSION_PATTERN}))`,
"gi",
);
/**
* Result of detecting an image reference in text.
@@ -113,18 +131,15 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
// Format is: path (type) | url OR just: path (type)
// Path may contain spaces (e.g., "ChatGPT Image Apr 21.png")
// Use non-greedy .+? to stop at first image extension
const pathMatch = content.match(
/^\s*(.+?\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\s*(?:\(|$|\|)/i,
);
const pathMatch = content.match(MEDIA_ATTACHED_PATH_PATTERN);
if (pathMatch?.[1]) {
addPathRef(pathMatch[1].trim());
}
}
// Pattern for [Image: source: /path/...] format from messaging systems
const messageImagePattern =
/\[Image:\s*source:\s*([^\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\]/gi;
while ((match = messageImagePattern.exec(prompt)) !== null) {
MESSAGE_IMAGE_PATTERN.lastIndex = 0;
while ((match = MESSAGE_IMAGE_PATTERN.exec(prompt)) !== null) {
const raw = match[1]?.trim();
if (raw) {
addPathRef(raw);
@@ -134,8 +149,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
// Remote HTTP(S) URLs are intentionally ignored. Native image injection is local-only.
// Pattern for file:// URLs - treat as paths since loadWebMedia handles them
const fileUrlPattern = /file:\/\/[^\s<>"'`\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif)/gi;
while ((match = fileUrlPattern.exec(prompt)) !== null) {
FILE_URL_PATTERN.lastIndex = 0;
while ((match = FILE_URL_PATTERN.exec(prompt)) !== null) {
const raw = match[0];
if (seen.has(raw.toLowerCase())) {
continue;
@@ -156,9 +171,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
// - ./relative/path.ext
// - ../parent/path.ext
// - ~/home/path.ext
const pathPattern =
/(?:^|\s|["'`(])((\.\.?\/|[~/])[^\s"'`()[\]]*\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))/gi;
while ((match = pathPattern.exec(prompt)) !== null) {
PATH_PATTERN.lastIndex = 0;
while ((match = PATH_PATTERN.exec(prompt)) !== null) {
// Use capture group 1 (the path without delimiter prefix); skip if undefined
if (match[1]) {
addPathRef(match[1]);