diff --git a/src/agents/openai-ws-message-conversion.ts b/src/agents/openai-ws-message-conversion.ts index e617932a35f..badf1a257ae 100644 --- a/src/agents/openai-ws-message-conversion.ts +++ b/src/agents/openai-ws-message-conversion.ts @@ -23,7 +23,7 @@ import { normalizeUsage } from "./usage.js"; type AnyMessage = Message & { role: string; content: unknown }; type AssistantMessageWithPhase = AssistantMessage & { phase?: OpenAIResponsesAssistantPhase }; -export type ReplayModelInfo = { input?: ReadonlyArray }; +export type ReplayModelInfo = { input?: ReadonlyArray; api?: string }; type ReplayableReasoningItem = Extract; type ReplayableReasoningSignature = { type: "reasoning" | `reasoning.${string}`; @@ -48,6 +48,14 @@ function supportsImageInput(modelOverride?: ReplayModelInfo): boolean { return !Array.isArray(modelOverride?.input) || modelOverride.input.includes("image"); } +function usesOpenAICompletionsImageParts(modelOverride?: ReplayModelInfo): boolean { + return modelOverride?.api === "openai-completions"; +} + +function toImageUrlFromBase64(params: { mediaType?: string; data: string }): string { + return `data:${params.mediaType ?? "image/jpeg"};base64,${params.data}`; +} + function contentToText(content: unknown): string { if (typeof content === "string") { return content; @@ -77,6 +85,7 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo) } const includeImages = supportsImageInput(modelOverride); + const useImageUrl = usesOpenAICompletionsImageParts(modelOverride); const parts: ContentPart[] = []; for (const part of content as Array<{ type?: string; @@ -98,6 +107,15 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo) } if (part.type === "image" && typeof part.data === "string") { + if (useImageUrl) { + parts.push({ + type: "image_url", + image_url: { + url: toImageUrlFromBase64({ mediaType: part.mimeType, data: part.data }), + }, + }); + continue; + } parts.push({ type: "input_image", source: { @@ -115,11 +133,24 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo) typeof part.source === "object" && typeof (part.source as { type?: unknown }).type === "string" ) { + const source = part.source as + | { type: "url"; url: string } + | { type: "base64"; media_type: string; data: string }; + if (useImageUrl) { + parts.push({ + type: "image_url", + image_url: { + url: + source.type === "url" + ? source.url + : toImageUrlFromBase64({ mediaType: source.media_type, data: source.data }), + }, + }); + continue; + } parts.push({ type: "input_image", - source: part.source as - | { type: "url"; url: string } - | { type: "base64"; media_type: string; data: string }, + source, }); } } @@ -441,7 +472,9 @@ export function convertMessagesToInputItems( } const parts = Array.isArray(m.content) ? contentToOpenAIParts(m.content, modelOverride) : []; const textOutput = contentToText(m.content); - const imageParts = parts.filter((part) => part.type === "input_image"); + const imageParts = parts.filter( + (part) => part.type === "input_image" || part.type === "image_url", + ); items.push({ type: "function_call_output", call_id: replayId.callId, diff --git a/src/agents/openai-ws-stream.test.ts b/src/agents/openai-ws-stream.test.ts index 59d8494efd0..9fc8651d9e5 100644 --- a/src/agents/openai-ws-stream.test.ts +++ b/src/agents/openai-ws-stream.test.ts @@ -575,6 +575,57 @@ describe("convertMessagesToInputItems", () => { expect(items[0]).toMatchObject({ type: "message", role: "user", content: "Hello!" }); }); + it("uses image_url parts for OpenAI-compatible user images", () => { + const msg: FakeMessage = { + role: "user", + content: [ + { type: "text", text: "describe this" }, + { type: "image", mimeType: "image/png", data: "AAAA" }, + ], + timestamp: 0, + }; + const items = convertMessagesToInputItems( + [msg] as Parameters[0], + { api: "openai-completions", input: ["text", "image"] }, + ); + + expect(items).toEqual([ + { + type: "message", + role: "user", + content: [ + { type: "input_text", text: "describe this" }, + { type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } }, + ], + }, + ]); + }); + + it("keeps input_image parts for Responses user images", () => { + const msg: FakeMessage = { + role: "user", + content: [{ type: "image", mimeType: "image/png", data: "AAAA" }], + timestamp: 0, + }; + const items = convertMessagesToInputItems( + [msg] as Parameters[0], + { api: "openai-responses", input: ["text", "image"] }, + ); + + expect(items).toEqual([ + { + type: "message", + role: "user", + content: [ + { + type: "input_image", + source: { type: "base64", media_type: "image/png", data: "AAAA" }, + }, + ], + }, + ]); + }); + it("converts an assistant text-only message", () => { const items = convertMessagesToInputItems([assistantMsg(["Hi there."])] as Parameters< typeof convertMessagesToInputItems @@ -855,6 +906,37 @@ describe("convertMessagesToInputItems", () => { }); }); + it("preserves OpenAI-compatible tool-result images as follow-up image_url parts", () => { + const msg: FakeMessage = { + role: "toolResult", + toolCallId: "call_1", + toolName: "read", + content: [{ type: "image", mimeType: "image/png", data: "AAAA" }], + isError: false, + timestamp: 0, + }; + const items = convertMessagesToInputItems( + [msg] as Parameters[0], + { api: "openai-completions", input: ["text", "image"] }, + ); + + expect(items).toEqual([ + { + type: "function_call_output", + call_id: "call_1", + output: "(see attached image)", + }, + { + type: "message", + role: "user", + content: [ + { type: "input_text", text: "Attached image(s) from tool result:" }, + { type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } }, + ], + }, + ]); + }); + it("drops tool result messages with empty tool call id", () => { const msg = { role: "toolResult" as const, diff --git a/src/agents/openai-ws-types.ts b/src/agents/openai-ws-types.ts index 7914e4d2237..115b54efea7 100644 --- a/src/agents/openai-ws-types.ts +++ b/src/agents/openai-ws-types.ts @@ -6,6 +6,10 @@ export type ContentPart = | { type: "input_image"; source: { type: "url"; url: string } | { type: "base64"; media_type: string; data: string }; + } + | { + type: "image_url"; + image_url: { url: string }; }; export type InputItem =