fix: preserve OpenAI-compatible image parts

2026-05-06 07:40:44 +00:00 · 2026-04-24 00:43:57 +01:00
parent 178a314a4c
commit cd8822cc5f
3 changed files with 124 additions and 5 deletions
--- a/src/agents/openai-ws-message-conversion.ts
+++ b/src/agents/openai-ws-message-conversion.ts
@@ -23,7 +23,7 @@ import { normalizeUsage } from "./usage.js";

 type AnyMessage = Message & { role: string; content: unknown };
 type AssistantMessageWithPhase = AssistantMessage & { phase?: OpenAIResponsesAssistantPhase };
-export type ReplayModelInfo = { input?: ReadonlyArray<string> };
+export type ReplayModelInfo = { input?: ReadonlyArray<string>; api?: string };
 type ReplayableReasoningItem = Extract<InputItem, { type: "reasoning" }>;
 type ReplayableReasoningSignature = {
  type: "reasoning" | `reasoning.${string}`;
@@ -48,6 +48,14 @@ function supportsImageInput(modelOverride?: ReplayModelInfo): boolean {
  return !Array.isArray(modelOverride?.input) || modelOverride.input.includes("image");
 }

+function usesOpenAICompletionsImageParts(modelOverride?: ReplayModelInfo): boolean {
+  return modelOverride?.api === "openai-completions";
+}
+
+function toImageUrlFromBase64(params: { mediaType?: string; data: string }): string {
+  return `data:${params.mediaType ?? "image/jpeg"};base64,${params.data}`;
+}
+
 function contentToText(content: unknown): string {
  if (typeof content === "string") {
    return content;
@@ -77,6 +85,7 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
  }

  const includeImages = supportsImageInput(modelOverride);
+  const useImageUrl = usesOpenAICompletionsImageParts(modelOverride);
  const parts: ContentPart[] = [];
  for (const part of content as Array<{
    type?: string;
@@ -98,6 +107,15 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
    }

    if (part.type === "image" && typeof part.data === "string") {
+      if (useImageUrl) {
+        parts.push({
+          type: "image_url",
+          image_url: {
+            url: toImageUrlFromBase64({ mediaType: part.mimeType, data: part.data }),
+          },
+        });
+        continue;
+      }
      parts.push({
        type: "input_image",
        source: {
@@ -115,11 +133,24 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
      typeof part.source === "object" &&
      typeof (part.source as { type?: unknown }).type === "string"
    ) {
+      const source = part.source as
+        | { type: "url"; url: string }
+        | { type: "base64"; media_type: string; data: string };
+      if (useImageUrl) {
+        parts.push({
+          type: "image_url",
+          image_url: {
+            url:
+              source.type === "url"
+                ? source.url
+                : toImageUrlFromBase64({ mediaType: source.media_type, data: source.data }),
+          },
+        });
+        continue;
+      }
      parts.push({
        type: "input_image",
-        source: part.source as
-          | { type: "url"; url: string }
-          | { type: "base64"; media_type: string; data: string },
+        source,
      });
    }
  }
@@ -441,7 +472,9 @@ export function convertMessagesToInputItems(
    }
    const parts = Array.isArray(m.content) ? contentToOpenAIParts(m.content, modelOverride) : [];
    const textOutput = contentToText(m.content);
-    const imageParts = parts.filter((part) => part.type === "input_image");
+    const imageParts = parts.filter(
+      (part) => part.type === "input_image" || part.type === "image_url",
+    );
    items.push({
      type: "function_call_output",
      call_id: replayId.callId,
--- a/src/agents/openai-ws-stream.test.ts
+++ b/src/agents/openai-ws-stream.test.ts
@@ -575,6 +575,57 @@ describe("convertMessagesToInputItems", () => {
    expect(items[0]).toMatchObject({ type: "message", role: "user", content: "Hello!" });
  });

+  it("uses image_url parts for OpenAI-compatible user images", () => {
+    const msg: FakeMessage = {
+      role: "user",
+      content: [
+        { type: "text", text: "describe this" },
+        { type: "image", mimeType: "image/png", data: "AAAA" },
+      ],
+      timestamp: 0,
+    };
+    const items = convertMessagesToInputItems(
+      [msg] as Parameters<typeof convertMessagesToInputItems>[0],
+      { api: "openai-completions", input: ["text", "image"] },
+    );
+
+    expect(items).toEqual([
+      {
+        type: "message",
+        role: "user",
+        content: [
+          { type: "input_text", text: "describe this" },
+          { type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } },
+        ],
+      },
+    ]);
+  });
+
+  it("keeps input_image parts for Responses user images", () => {
+    const msg: FakeMessage = {
+      role: "user",
+      content: [{ type: "image", mimeType: "image/png", data: "AAAA" }],
+      timestamp: 0,
+    };
+    const items = convertMessagesToInputItems(
+      [msg] as Parameters<typeof convertMessagesToInputItems>[0],
+      { api: "openai-responses", input: ["text", "image"] },
+    );
+
+    expect(items).toEqual([
+      {
+        type: "message",
+        role: "user",
+        content: [
+          {
+            type: "input_image",
+            source: { type: "base64", media_type: "image/png", data: "AAAA" },
+          },
+        ],
+      },
+    ]);
+  });
+
  it("converts an assistant text-only message", () => {
    const items = convertMessagesToInputItems([assistantMsg(["Hi there."])] as Parameters<
      typeof convertMessagesToInputItems
@@ -855,6 +906,37 @@ describe("convertMessagesToInputItems", () => {
    });
  });

+  it("preserves OpenAI-compatible tool-result images as follow-up image_url parts", () => {
+    const msg: FakeMessage = {
+      role: "toolResult",
+      toolCallId: "call_1",
+      toolName: "read",
+      content: [{ type: "image", mimeType: "image/png", data: "AAAA" }],
+      isError: false,
+      timestamp: 0,
+    };
+    const items = convertMessagesToInputItems(
+      [msg] as Parameters<typeof convertMessagesToInputItems>[0],
+      { api: "openai-completions", input: ["text", "image"] },
+    );
+
+    expect(items).toEqual([
+      {
+        type: "function_call_output",
+        call_id: "call_1",
+        output: "(see attached image)",
+      },
+      {
+        type: "message",
+        role: "user",
+        content: [
+          { type: "input_text", text: "Attached image(s) from tool result:" },
+          { type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } },
+        ],
+      },
+    ]);
+  });
+
  it("drops tool result messages with empty tool call id", () => {
    const msg = {
      role: "toolResult" as const,
--- a/src/agents/openai-ws-types.ts
+++ b/src/agents/openai-ws-types.ts
@@ -6,6 +6,10 @@ export type ContentPart =
  | {
      type: "input_image";
      source: { type: "url"; url: string } | { type: "base64"; media_type: string; data: string };
+    }
+  | {
+      type: "image_url";
+      image_url: { url: string };
    };

 export type InputItem =