mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:40:44 +00:00
fix: preserve OpenAI-compatible image parts
This commit is contained in:
@@ -23,7 +23,7 @@ import { normalizeUsage } from "./usage.js";
|
||||
|
||||
type AnyMessage = Message & { role: string; content: unknown };
|
||||
type AssistantMessageWithPhase = AssistantMessage & { phase?: OpenAIResponsesAssistantPhase };
|
||||
export type ReplayModelInfo = { input?: ReadonlyArray<string> };
|
||||
export type ReplayModelInfo = { input?: ReadonlyArray<string>; api?: string };
|
||||
type ReplayableReasoningItem = Extract<InputItem, { type: "reasoning" }>;
|
||||
type ReplayableReasoningSignature = {
|
||||
type: "reasoning" | `reasoning.${string}`;
|
||||
@@ -48,6 +48,14 @@ function supportsImageInput(modelOverride?: ReplayModelInfo): boolean {
|
||||
return !Array.isArray(modelOverride?.input) || modelOverride.input.includes("image");
|
||||
}
|
||||
|
||||
function usesOpenAICompletionsImageParts(modelOverride?: ReplayModelInfo): boolean {
|
||||
return modelOverride?.api === "openai-completions";
|
||||
}
|
||||
|
||||
function toImageUrlFromBase64(params: { mediaType?: string; data: string }): string {
|
||||
return `data:${params.mediaType ?? "image/jpeg"};base64,${params.data}`;
|
||||
}
|
||||
|
||||
function contentToText(content: unknown): string {
|
||||
if (typeof content === "string") {
|
||||
return content;
|
||||
@@ -77,6 +85,7 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
|
||||
}
|
||||
|
||||
const includeImages = supportsImageInput(modelOverride);
|
||||
const useImageUrl = usesOpenAICompletionsImageParts(modelOverride);
|
||||
const parts: ContentPart[] = [];
|
||||
for (const part of content as Array<{
|
||||
type?: string;
|
||||
@@ -98,6 +107,15 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
|
||||
}
|
||||
|
||||
if (part.type === "image" && typeof part.data === "string") {
|
||||
if (useImageUrl) {
|
||||
parts.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: toImageUrlFromBase64({ mediaType: part.mimeType, data: part.data }),
|
||||
},
|
||||
});
|
||||
continue;
|
||||
}
|
||||
parts.push({
|
||||
type: "input_image",
|
||||
source: {
|
||||
@@ -115,11 +133,24 @@ function contentToOpenAIParts(content: unknown, modelOverride?: ReplayModelInfo)
|
||||
typeof part.source === "object" &&
|
||||
typeof (part.source as { type?: unknown }).type === "string"
|
||||
) {
|
||||
const source = part.source as
|
||||
| { type: "url"; url: string }
|
||||
| { type: "base64"; media_type: string; data: string };
|
||||
if (useImageUrl) {
|
||||
parts.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url:
|
||||
source.type === "url"
|
||||
? source.url
|
||||
: toImageUrlFromBase64({ mediaType: source.media_type, data: source.data }),
|
||||
},
|
||||
});
|
||||
continue;
|
||||
}
|
||||
parts.push({
|
||||
type: "input_image",
|
||||
source: part.source as
|
||||
| { type: "url"; url: string }
|
||||
| { type: "base64"; media_type: string; data: string },
|
||||
source,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -441,7 +472,9 @@ export function convertMessagesToInputItems(
|
||||
}
|
||||
const parts = Array.isArray(m.content) ? contentToOpenAIParts(m.content, modelOverride) : [];
|
||||
const textOutput = contentToText(m.content);
|
||||
const imageParts = parts.filter((part) => part.type === "input_image");
|
||||
const imageParts = parts.filter(
|
||||
(part) => part.type === "input_image" || part.type === "image_url",
|
||||
);
|
||||
items.push({
|
||||
type: "function_call_output",
|
||||
call_id: replayId.callId,
|
||||
|
||||
@@ -575,6 +575,57 @@ describe("convertMessagesToInputItems", () => {
|
||||
expect(items[0]).toMatchObject({ type: "message", role: "user", content: "Hello!" });
|
||||
});
|
||||
|
||||
it("uses image_url parts for OpenAI-compatible user images", () => {
|
||||
const msg: FakeMessage = {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "describe this" },
|
||||
{ type: "image", mimeType: "image/png", data: "AAAA" },
|
||||
],
|
||||
timestamp: 0,
|
||||
};
|
||||
const items = convertMessagesToInputItems(
|
||||
[msg] as Parameters<typeof convertMessagesToInputItems>[0],
|
||||
{ api: "openai-completions", input: ["text", "image"] },
|
||||
);
|
||||
|
||||
expect(items).toEqual([
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "input_text", text: "describe this" },
|
||||
{ type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("keeps input_image parts for Responses user images", () => {
|
||||
const msg: FakeMessage = {
|
||||
role: "user",
|
||||
content: [{ type: "image", mimeType: "image/png", data: "AAAA" }],
|
||||
timestamp: 0,
|
||||
};
|
||||
const items = convertMessagesToInputItems(
|
||||
[msg] as Parameters<typeof convertMessagesToInputItems>[0],
|
||||
{ api: "openai-responses", input: ["text", "image"] },
|
||||
);
|
||||
|
||||
expect(items).toEqual([
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "input_image",
|
||||
source: { type: "base64", media_type: "image/png", data: "AAAA" },
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("converts an assistant text-only message", () => {
|
||||
const items = convertMessagesToInputItems([assistantMsg(["Hi there."])] as Parameters<
|
||||
typeof convertMessagesToInputItems
|
||||
@@ -855,6 +906,37 @@ describe("convertMessagesToInputItems", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves OpenAI-compatible tool-result images as follow-up image_url parts", () => {
|
||||
const msg: FakeMessage = {
|
||||
role: "toolResult",
|
||||
toolCallId: "call_1",
|
||||
toolName: "read",
|
||||
content: [{ type: "image", mimeType: "image/png", data: "AAAA" }],
|
||||
isError: false,
|
||||
timestamp: 0,
|
||||
};
|
||||
const items = convertMessagesToInputItems(
|
||||
[msg] as Parameters<typeof convertMessagesToInputItems>[0],
|
||||
{ api: "openai-completions", input: ["text", "image"] },
|
||||
);
|
||||
|
||||
expect(items).toEqual([
|
||||
{
|
||||
type: "function_call_output",
|
||||
call_id: "call_1",
|
||||
output: "(see attached image)",
|
||||
},
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "input_text", text: "Attached image(s) from tool result:" },
|
||||
{ type: "image_url", image_url: { url: "data:image/png;base64,AAAA" } },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("drops tool result messages with empty tool call id", () => {
|
||||
const msg = {
|
||||
role: "toolResult" as const,
|
||||
|
||||
@@ -6,6 +6,10 @@ export type ContentPart =
|
||||
| {
|
||||
type: "input_image";
|
||||
source: { type: "url"; url: string } | { type: "base64"; media_type: string; data: string };
|
||||
}
|
||||
| {
|
||||
type: "image_url";
|
||||
image_url: { url: string };
|
||||
};
|
||||
|
||||
export type InputItem =
|
||||
|
||||
Reference in New Issue
Block a user