From 75ed72e807421ff794a861d9101ac0a64a56bb0e Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 26 Feb 2026 16:44:47 +0100 Subject: [PATCH] refactor(pi): extract history image prune helpers --- .../pi-embedded-runner/run/attempt.test.ts | 65 ------------------- src/agents/pi-embedded-runner/run/attempt.ts | 47 +------------- .../run/history-image-prune.test.ts | 65 +++++++++++++++++++ .../run/history-image-prune.ts | 44 +++++++++++++ src/agents/pi-embedded-runner/run/images.ts | 60 ++++++++++------- 5 files changed, 149 insertions(+), 132 deletions(-) create mode 100644 src/agents/pi-embedded-runner/run/history-image-prune.test.ts create mode 100644 src/agents/pi-embedded-runner/run/history-image-prune.ts diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 022c7f989d8..f314353513b 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -1,76 +1,11 @@ -import type { AgentMessage } from "@mariozechner/pi-agent-core"; -import type { ImageContent } from "@mariozechner/pi-ai"; import { describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../../config/config.js"; import { - PRUNED_HISTORY_IMAGE_MARKER, - pruneProcessedHistoryImages, resolveAttemptFsWorkspaceOnly, resolvePromptBuildHookResult, resolvePromptModeForSession, } from "./attempt.js"; -describe("pruneProcessedHistoryImages", () => { - const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" }; - - it("prunes image blocks from user messages that already have assistant replies", () => { - const messages: AgentMessage[] = [ - { - role: "user", - content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], - } as AgentMessage, - { - role: "assistant", - content: "got it", - } as unknown as AgentMessage, - ]; - - const didMutate = pruneProcessedHistoryImages(messages); - - expect(didMutate).toBe(true); - const firstUser = messages[0] as Extract | undefined; - expect(Array.isArray(firstUser?.content)).toBe(true); - const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>; - expect(content).toHaveLength(2); - expect(content[0]?.type).toBe("text"); - expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); - }); - - it("does not prune latest user message when no assistant response exists yet", () => { - const messages: AgentMessage[] = [ - { - role: "user", - content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], - } as AgentMessage, - ]; - - const didMutate = pruneProcessedHistoryImages(messages); - - expect(didMutate).toBe(false); - const first = messages[0] as Extract | undefined; - if (!first || !Array.isArray(first.content)) { - throw new Error("expected array content"); - } - expect(first.content).toHaveLength(2); - expect(first.content[1]).toMatchObject({ type: "image", data: "abc" }); - }); - - it("does not change messages when no assistant turn exists", () => { - const messages: AgentMessage[] = [ - { - role: "user", - content: "noop", - } as AgentMessage, - ]; - - const didMutate = pruneProcessedHistoryImages(messages); - - expect(didMutate).toBe(false); - const firstUser = messages[0] as Extract | undefined; - expect(firstUser?.content).toBe("noop"); - }); -}); - describe("resolvePromptBuildHookResult", () => { function createLegacyOnlyHookRunner() { return { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index b3590a530c7..a0f4519a4f1 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -112,6 +112,7 @@ import { selectCompactionTimeoutSnapshot, shouldFlagCompactionTimeout, } from "./compaction-timeout.js"; +import { pruneProcessedHistoryImages } from "./history-image-prune.js"; import { detectAndLoadPromptImages } from "./images.js"; import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js"; @@ -127,48 +128,6 @@ type PromptBuildHookRunner = { ) => Promise; }; -export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]"; - -/** - * Prunes image blocks from user messages that already have an assistant response after them. - * This is a one-way cleanup for previously persisted history image data. - */ -export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean { - let lastAssistantIndex = -1; - for (let i = messages.length - 1; i >= 0; i--) { - if (messages[i]?.role === "assistant") { - lastAssistantIndex = i; - break; - } - } - if (lastAssistantIndex < 0) { - return false; - } - - let didMutate = false; - for (let i = 0; i < lastAssistantIndex; i++) { - const message = messages[i]; - if (!message || message.role !== "user" || !Array.isArray(message.content)) { - continue; - } - for (let j = 0; j < message.content.length; j++) { - const block = message.content[j]; - if (!block || typeof block !== "object") { - continue; - } - if ((block as { type?: string }).type !== "image") { - continue; - } - message.content[j] = { - type: "text", - text: PRUNED_HISTORY_IMAGE_MARKER, - } as (typeof message.content)[number]; - didMutate = true; - } - } - return didMutate; -} - export async function resolvePromptBuildHookResult(params: { prompt: string; messages: unknown[]; @@ -1085,8 +1044,8 @@ export async function runEmbeddedAttempt( } try { - // One-time migration: prune image blocks from already-processed user turns. - // This prevents old persisted base64 payloads from bloating subsequent prompts. + // Idempotent cleanup for legacy sessions with persisted image payloads. + // Called each run; only mutates already-answered user turns that still carry image blocks. const didPruneImages = pruneProcessedHistoryImages(activeSession.messages); if (didPruneImages) { activeSession.agent.replaceMessages(activeSession.messages); diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts new file mode 100644 index 00000000000..0e171352e58 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts @@ -0,0 +1,65 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { ImageContent } from "@mariozechner/pi-ai"; +import { describe, expect, it } from "vitest"; +import { PRUNED_HISTORY_IMAGE_MARKER, pruneProcessedHistoryImages } from "./history-image-prune.js"; + +describe("pruneProcessedHistoryImages", () => { + const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" }; + + it("prunes image blocks from user messages that already have assistant replies", () => { + const messages: AgentMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], + } as AgentMessage, + { + role: "assistant", + content: "got it", + } as unknown as AgentMessage, + ]; + + const didMutate = pruneProcessedHistoryImages(messages); + + expect(didMutate).toBe(true); + const firstUser = messages[0] as Extract | undefined; + expect(Array.isArray(firstUser?.content)).toBe(true); + const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>; + expect(content).toHaveLength(2); + expect(content[0]?.type).toBe("text"); + expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER }); + }); + + it("does not prune latest user message when no assistant response exists yet", () => { + const messages: AgentMessage[] = [ + { + role: "user", + content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }], + } as AgentMessage, + ]; + + const didMutate = pruneProcessedHistoryImages(messages); + + expect(didMutate).toBe(false); + const first = messages[0] as Extract | undefined; + if (!first || !Array.isArray(first.content)) { + throw new Error("expected array content"); + } + expect(first.content).toHaveLength(2); + expect(first.content[1]).toMatchObject({ type: "image", data: "abc" }); + }); + + it("does not change messages when no assistant turn exists", () => { + const messages: AgentMessage[] = [ + { + role: "user", + content: "noop", + } as AgentMessage, + ]; + + const didMutate = pruneProcessedHistoryImages(messages); + + expect(didMutate).toBe(false); + const firstUser = messages[0] as Extract | undefined; + expect(firstUser?.content).toBe("noop"); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/history-image-prune.ts b/src/agents/pi-embedded-runner/run/history-image-prune.ts new file mode 100644 index 00000000000..d7dbea5de38 --- /dev/null +++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts @@ -0,0 +1,44 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]"; + +/** + * Idempotent cleanup for legacy sessions that persisted image blocks in history. + * Called each run; mutates only user turns that already have an assistant reply. + */ +export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean { + let lastAssistantIndex = -1; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role === "assistant") { + lastAssistantIndex = i; + break; + } + } + if (lastAssistantIndex < 0) { + return false; + } + + let didMutate = false; + for (let i = 0; i < lastAssistantIndex; i++) { + const message = messages[i]; + if (!message || message.role !== "user" || !Array.isArray(message.content)) { + continue; + } + for (let j = 0; j < message.content.length; j++) { + const block = message.content[j]; + if (!block || typeof block !== "object") { + continue; + } + if ((block as { type?: string }).type !== "image") { + continue; + } + message.content[j] = { + type: "text", + text: PRUNED_HISTORY_IMAGE_MARKER, + } as (typeof message.content)[number]; + didMutate = true; + } + } + + return didMutate; +} diff --git a/src/agents/pi-embedded-runner/run/images.ts b/src/agents/pi-embedded-runner/run/images.ts index 71b7aeb2273..7b24dae94c0 100644 --- a/src/agents/pi-embedded-runner/run/images.ts +++ b/src/agents/pi-embedded-runner/run/images.ts @@ -13,18 +13,36 @@ import { log } from "../logger.js"; /** * Common image file extensions for detection. */ -const IMAGE_EXTENSIONS = new Set([ - ".png", - ".jpg", - ".jpeg", - ".gif", - ".webp", - ".bmp", - ".tiff", - ".tif", - ".heic", - ".heif", -]); +const IMAGE_EXTENSION_NAMES = [ + "png", + "jpg", + "jpeg", + "gif", + "webp", + "bmp", + "tiff", + "tif", + "heic", + "heif", +] as const; +const IMAGE_EXTENSIONS = new Set(IMAGE_EXTENSION_NAMES.map((ext) => `.${ext}`)); +const IMAGE_EXTENSION_PATTERN = IMAGE_EXTENSION_NAMES.join("|"); +const MEDIA_ATTACHED_PATH_PATTERN = new RegExp( + `^\\s*(.+?\\.(?:${IMAGE_EXTENSION_PATTERN}))\\s*(?:\\(|$|\\|)`, + "i", +); +const MESSAGE_IMAGE_PATTERN = new RegExp( + `\\[Image:\\s*source:\\s*([^\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN}))\\]`, + "gi", +); +const FILE_URL_PATTERN = new RegExp( + `file://[^\\s<>"'\\\`\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN})`, + "gi", +); +const PATH_PATTERN = new RegExp( + `(?:^|\\s|["'\\\`(])((\\.\\.?/|[~/])[^\\s"'\\\`()\\[\\]]*\\.(?:${IMAGE_EXTENSION_PATTERN}))`, + "gi", +); /** * Result of detecting an image reference in text. @@ -113,18 +131,15 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] { // Format is: path (type) | url OR just: path (type) // Path may contain spaces (e.g., "ChatGPT Image Apr 21.png") // Use non-greedy .+? to stop at first image extension - const pathMatch = content.match( - /^\s*(.+?\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\s*(?:\(|$|\|)/i, - ); + const pathMatch = content.match(MEDIA_ATTACHED_PATH_PATTERN); if (pathMatch?.[1]) { addPathRef(pathMatch[1].trim()); } } // Pattern for [Image: source: /path/...] format from messaging systems - const messageImagePattern = - /\[Image:\s*source:\s*([^\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\]/gi; - while ((match = messageImagePattern.exec(prompt)) !== null) { + MESSAGE_IMAGE_PATTERN.lastIndex = 0; + while ((match = MESSAGE_IMAGE_PATTERN.exec(prompt)) !== null) { const raw = match[1]?.trim(); if (raw) { addPathRef(raw); @@ -134,8 +149,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] { // Remote HTTP(S) URLs are intentionally ignored. Native image injection is local-only. // Pattern for file:// URLs - treat as paths since loadWebMedia handles them - const fileUrlPattern = /file:\/\/[^\s<>"'`\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif)/gi; - while ((match = fileUrlPattern.exec(prompt)) !== null) { + FILE_URL_PATTERN.lastIndex = 0; + while ((match = FILE_URL_PATTERN.exec(prompt)) !== null) { const raw = match[0]; if (seen.has(raw.toLowerCase())) { continue; @@ -156,9 +171,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] { // - ./relative/path.ext // - ../parent/path.ext // - ~/home/path.ext - const pathPattern = - /(?:^|\s|["'`(])((\.\.?\/|[~/])[^\s"'`()[\]]*\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))/gi; - while ((match = pathPattern.exec(prompt)) !== null) { + PATH_PATTERN.lastIndex = 0; + while ((match = PATH_PATTERN.exec(prompt)) !== null) { // Use capture group 1 (the path without delimiter prefix); skip if undefined if (match[1]) { addPathRef(match[1]);