refactor(pi): extract history image prune helpers

2026-05-06 05:00:42 +00:00 · 2026-02-26 16:44:47 +01:00
parent 57334cd7d8
commit 75ed72e807
5 changed files with 149 additions and 132 deletions
--- a/src/agents/pi-embedded-runner/run/attempt.test.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.test.ts
@@ -1,76 +1,11 @@
-import type { AgentMessage } from "@mariozechner/pi-agent-core";
-import type { ImageContent } from "@mariozechner/pi-ai";
 import { describe, expect, it, vi } from "vitest";
 import type { OpenClawConfig } from "../../../config/config.js";
 import {
-  PRUNED_HISTORY_IMAGE_MARKER,
-  pruneProcessedHistoryImages,
  resolveAttemptFsWorkspaceOnly,
  resolvePromptBuildHookResult,
  resolvePromptModeForSession,
 } from "./attempt.js";

-describe("pruneProcessedHistoryImages", () => {
-  const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
-
-  it("prunes image blocks from user messages that already have assistant replies", () => {
-    const messages: AgentMessage[] = [
-      {
-        role: "user",
-        content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
-      } as AgentMessage,
-      {
-        role: "assistant",
-        content: "got it",
-      } as unknown as AgentMessage,
-    ];
-
-    const didMutate = pruneProcessedHistoryImages(messages);
-
-    expect(didMutate).toBe(true);
-    const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
-    expect(Array.isArray(firstUser?.content)).toBe(true);
-    const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>;
-    expect(content).toHaveLength(2);
-    expect(content[0]?.type).toBe("text");
-    expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
-  });
-
-  it("does not prune latest user message when no assistant response exists yet", () => {
-    const messages: AgentMessage[] = [
-      {
-        role: "user",
-        content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
-      } as AgentMessage,
-    ];
-
-    const didMutate = pruneProcessedHistoryImages(messages);
-
-    expect(didMutate).toBe(false);
-    const first = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
-    if (!first || !Array.isArray(first.content)) {
-      throw new Error("expected array content");
-    }
-    expect(first.content).toHaveLength(2);
-    expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
-  });
-
-  it("does not change messages when no assistant turn exists", () => {
-    const messages: AgentMessage[] = [
-      {
-        role: "user",
-        content: "noop",
-      } as AgentMessage,
-    ];
-
-    const didMutate = pruneProcessedHistoryImages(messages);
-
-    expect(didMutate).toBe(false);
-    const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
-    expect(firstUser?.content).toBe("noop");
-  });
-});
-
 describe("resolvePromptBuildHookResult", () => {
  function createLegacyOnlyHookRunner() {
    return {
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -112,6 +112,7 @@ import {
  selectCompactionTimeoutSnapshot,
  shouldFlagCompactionTimeout,
 } from "./compaction-timeout.js";
+import { pruneProcessedHistoryImages } from "./history-image-prune.js";
 import { detectAndLoadPromptImages } from "./images.js";
 import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";

@@ -127,48 +128,6 @@ type PromptBuildHookRunner = {
  ) => Promise<PluginHookBeforeAgentStartResult | undefined>;
 };

-export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
-
-/**
- * Prunes image blocks from user messages that already have an assistant response after them.
- * This is a one-way cleanup for previously persisted history image data.
- */
-export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
-  let lastAssistantIndex = -1;
-  for (let i = messages.length - 1; i >= 0; i--) {
-    if (messages[i]?.role === "assistant") {
-      lastAssistantIndex = i;
-      break;
-    }
-  }
-  if (lastAssistantIndex < 0) {
-    return false;
-  }
-
-  let didMutate = false;
-  for (let i = 0; i < lastAssistantIndex; i++) {
-    const message = messages[i];
-    if (!message || message.role !== "user" || !Array.isArray(message.content)) {
-      continue;
-    }
-    for (let j = 0; j < message.content.length; j++) {
-      const block = message.content[j];
-      if (!block || typeof block !== "object") {
-        continue;
-      }
-      if ((block as { type?: string }).type !== "image") {
-        continue;
-      }
-      message.content[j] = {
-        type: "text",
-        text: PRUNED_HISTORY_IMAGE_MARKER,
-      } as (typeof message.content)[number];
-      didMutate = true;
-    }
-  }
-  return didMutate;
-}
-
 export async function resolvePromptBuildHookResult(params: {
  prompt: string;
  messages: unknown[];
@@ -1085,8 +1044,8 @@ export async function runEmbeddedAttempt(
        }

        try {
-          // One-time migration: prune image blocks from already-processed user turns.
-          // This prevents old persisted base64 payloads from bloating subsequent prompts.
+          // Idempotent cleanup for legacy sessions with persisted image payloads.
+          // Called each run; only mutates already-answered user turns that still carry image blocks.
          const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
          if (didPruneImages) {
            activeSession.agent.replaceMessages(activeSession.messages);
--- a/src/agents/pi-embedded-runner/run/history-image-prune.test.ts
+++ b/src/agents/pi-embedded-runner/run/history-image-prune.test.ts
@@ -0,0 +1,65 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { ImageContent } from "@mariozechner/pi-ai";
+import { describe, expect, it } from "vitest";
+import { PRUNED_HISTORY_IMAGE_MARKER, pruneProcessedHistoryImages } from "./history-image-prune.js";
+
+describe("pruneProcessedHistoryImages", () => {
+  const image: ImageContent = { type: "image", data: "abc", mimeType: "image/png" };
+
+  it("prunes image blocks from user messages that already have assistant replies", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "user",
+        content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
+      } as AgentMessage,
+      {
+        role: "assistant",
+        content: "got it",
+      } as unknown as AgentMessage,
+    ];
+
+    const didMutate = pruneProcessedHistoryImages(messages);
+
+    expect(didMutate).toBe(true);
+    const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
+    expect(Array.isArray(firstUser?.content)).toBe(true);
+    const content = firstUser?.content as Array<{ type: string; text?: string; data?: string }>;
+    expect(content).toHaveLength(2);
+    expect(content[0]?.type).toBe("text");
+    expect(content[1]).toMatchObject({ type: "text", text: PRUNED_HISTORY_IMAGE_MARKER });
+  });
+
+  it("does not prune latest user message when no assistant response exists yet", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "user",
+        content: [{ type: "text", text: "See /tmp/photo.png" }, { ...image }],
+      } as AgentMessage,
+    ];
+
+    const didMutate = pruneProcessedHistoryImages(messages);
+
+    expect(didMutate).toBe(false);
+    const first = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
+    if (!first || !Array.isArray(first.content)) {
+      throw new Error("expected array content");
+    }
+    expect(first.content).toHaveLength(2);
+    expect(first.content[1]).toMatchObject({ type: "image", data: "abc" });
+  });
+
+  it("does not change messages when no assistant turn exists", () => {
+    const messages: AgentMessage[] = [
+      {
+        role: "user",
+        content: "noop",
+      } as AgentMessage,
+    ];
+
+    const didMutate = pruneProcessedHistoryImages(messages);
+
+    expect(didMutate).toBe(false);
+    const firstUser = messages[0] as Extract<AgentMessage, { role: "user" }> | undefined;
+    expect(firstUser?.content).toBe("noop");
+  });
+});
--- a/src/agents/pi-embedded-runner/run/history-image-prune.ts
+++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts
@@ -0,0 +1,44 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+
+export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
+
+/**
+ * Idempotent cleanup for legacy sessions that persisted image blocks in history.
+ * Called each run; mutates only user turns that already have an assistant reply.
+ */
+export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
+  let lastAssistantIndex = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i]?.role === "assistant") {
+      lastAssistantIndex = i;
+      break;
+    }
+  }
+  if (lastAssistantIndex < 0) {
+    return false;
+  }
+
+  let didMutate = false;
+  for (let i = 0; i < lastAssistantIndex; i++) {
+    const message = messages[i];
+    if (!message || message.role !== "user" || !Array.isArray(message.content)) {
+      continue;
+    }
+    for (let j = 0; j < message.content.length; j++) {
+      const block = message.content[j];
+      if (!block || typeof block !== "object") {
+        continue;
+      }
+      if ((block as { type?: string }).type !== "image") {
+        continue;
+      }
+      message.content[j] = {
+        type: "text",
+        text: PRUNED_HISTORY_IMAGE_MARKER,
+      } as (typeof message.content)[number];
+      didMutate = true;
+    }
+  }
+
+  return didMutate;
+}
--- a/src/agents/pi-embedded-runner/run/images.ts
+++ b/src/agents/pi-embedded-runner/run/images.ts
@@ -13,18 +13,36 @@ import { log } from "../logger.js";
 /**
 * Common image file extensions for detection.
 */
-const IMAGE_EXTENSIONS = new Set([
-  ".png",
-  ".jpg",
-  ".jpeg",
-  ".gif",
-  ".webp",
-  ".bmp",
-  ".tiff",
-  ".tif",
-  ".heic",
-  ".heif",
-]);
+const IMAGE_EXTENSION_NAMES = [
+  "png",
+  "jpg",
+  "jpeg",
+  "gif",
+  "webp",
+  "bmp",
+  "tiff",
+  "tif",
+  "heic",
+  "heif",
+] as const;
+const IMAGE_EXTENSIONS = new Set(IMAGE_EXTENSION_NAMES.map((ext) => `.${ext}`));
+const IMAGE_EXTENSION_PATTERN = IMAGE_EXTENSION_NAMES.join("|");
+const MEDIA_ATTACHED_PATH_PATTERN = new RegExp(
+  `^\\s*(.+?\\.(?:${IMAGE_EXTENSION_PATTERN}))\\s*(?:\\(|$|\\|)`,
+  "i",
+);
+const MESSAGE_IMAGE_PATTERN = new RegExp(
+  `\\[Image:\\s*source:\\s*([^\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN}))\\]`,
+  "gi",
+);
+const FILE_URL_PATTERN = new RegExp(
+  `file://[^\\s<>"'\\\`\\]]+\\.(?:${IMAGE_EXTENSION_PATTERN})`,
+  "gi",
+);
+const PATH_PATTERN = new RegExp(
+  `(?:^|\\s|["'\\\`(])((\\.\\.?/|[~/])[^\\s"'\\\`()\\[\\]]*\\.(?:${IMAGE_EXTENSION_PATTERN}))`,
+  "gi",
+);

 /**
 * Result of detecting an image reference in text.
@@ -113,18 +131,15 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
    // Format is: path (type) | url  OR  just: path (type)
    // Path may contain spaces (e.g., "ChatGPT Image Apr 21.png")
    // Use non-greedy .+? to stop at first image extension
-    const pathMatch = content.match(
-      /^\s*(.+?\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\s*(?:\(|$|\|)/i,
-    );
+    const pathMatch = content.match(MEDIA_ATTACHED_PATH_PATTERN);
    if (pathMatch?.[1]) {
      addPathRef(pathMatch[1].trim());
    }
  }

  // Pattern for [Image: source: /path/...] format from messaging systems
-  const messageImagePattern =
-    /\[Image:\s*source:\s*([^\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))\]/gi;
-  while ((match = messageImagePattern.exec(prompt)) !== null) {
+  MESSAGE_IMAGE_PATTERN.lastIndex = 0;
+  while ((match = MESSAGE_IMAGE_PATTERN.exec(prompt)) !== null) {
    const raw = match[1]?.trim();
    if (raw) {
      addPathRef(raw);
@@ -134,8 +149,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
  // Remote HTTP(S) URLs are intentionally ignored. Native image injection is local-only.

  // Pattern for file:// URLs - treat as paths since loadWebMedia handles them
-  const fileUrlPattern = /file:\/\/[^\s<>"'`\]]+\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif)/gi;
-  while ((match = fileUrlPattern.exec(prompt)) !== null) {
+  FILE_URL_PATTERN.lastIndex = 0;
+  while ((match = FILE_URL_PATTERN.exec(prompt)) !== null) {
    const raw = match[0];
    if (seen.has(raw.toLowerCase())) {
      continue;
@@ -156,9 +171,8 @@ export function detectImageReferences(prompt: string): DetectedImageRef[] {
  // - ./relative/path.ext
  // - ../parent/path.ext
  // - ~/home/path.ext
-  const pathPattern =
-    /(?:^|\s|["'`(])((\.\.?\/|[~/])[^\s"'`()[\]]*\.(?:png|jpe?g|gif|webp|bmp|tiff?|heic|heif))/gi;
-  while ((match = pathPattern.exec(prompt)) !== null) {
+  PATH_PATTERN.lastIndex = 0;
+  while ((match = PATH_PATTERN.exec(prompt)) !== null) {
    // Use capture group 1 (the path without delimiter prefix); skip if undefined
    if (match[1]) {
      addPathRef(match[1]);