Gateway: follow up HEIC input image handling (#38146)

* Media: scope HEIC MIME sniffing

* Media: hermeticize HEIC input tests

* Gateway: fix HEIC image budget accounting

* Gateway: add HEIC image budget regression test

* Changelog: note HEIC follow-up fix
This commit is contained in:
Vincent Koc
2026-03-06 11:53:59 -05:00
committed by GitHub
parent f9d86b9256
commit 9521e61a22
5 changed files with 116 additions and 10 deletions

View File

@@ -138,6 +138,7 @@ Docs: https://docs.openclaw.ai
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
- Gateway/HEIC input follow-up: keep non-HEIC `input_image` MIME handling unchanged, make HEIC tests hermetic, and enforce chat-completions `maxTotalImageBytes` against post-normalization image payload size. Thanks @vincentkoc.
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.

View File

@@ -0,0 +1,68 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const extractImageContentFromSourceMock = vi.fn();
vi.mock("../media/input-files.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../media/input-files.js")>();
return {
...actual,
extractImageContentFromSource: (...args: unknown[]) =>
extractImageContentFromSourceMock(...args),
};
});
import { __testOnlyOpenAiHttp } from "./openai-http.js";
describe("openai image budget accounting", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("counts normalized base64 image bytes against maxTotalImageBytes", async () => {
extractImageContentFromSourceMock.mockResolvedValueOnce({
type: "image",
data: Buffer.alloc(10, 1).toString("base64"),
mimeType: "image/jpeg",
});
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
maxTotalImageBytes: 5,
});
await expect(
__testOnlyOpenAiHttp.resolveImagesForRequest(
{
urls: ["data:image/heic;base64,QUJD"],
},
limits,
),
).rejects.toThrow(/Total image payload too large/);
});
it("does not double-count unchanged base64 image payloads", async () => {
extractImageContentFromSourceMock.mockResolvedValueOnce({
type: "image",
data: "QUJDRA==",
mimeType: "image/jpeg",
});
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
maxTotalImageBytes: 4,
});
await expect(
__testOnlyOpenAiHttp.resolveImagesForRequest(
{
urls: ["data:image/jpeg;base64,QUJDRA=="],
},
limits,
),
).resolves.toEqual([
{
type: "image",
data: "QUJDRA==",
mimeType: "image/jpeg",
},
]);
});
});

View File

@@ -300,18 +300,16 @@ async function resolveImagesForRequest(
for (const url of urls) {
const source = parseImageUrlToSource(url);
if (source.type === "base64") {
totalBytes += estimateBase64DecodedBytes(source.data);
if (totalBytes > limits.maxTotalImageBytes) {
const sourceBytes = estimateBase64DecodedBytes(source.data);
if (totalBytes + sourceBytes > limits.maxTotalImageBytes) {
throw new Error(
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
`Total image payload too large (${totalBytes + sourceBytes}; limit ${limits.maxTotalImageBytes})`,
);
}
}
const image = await extractImageContentFromSource(source, limits.images);
if (source.type !== "base64") {
totalBytes += estimateBase64DecodedBytes(image.data);
}
totalBytes += estimateBase64DecodedBytes(image.data);
if (totalBytes > limits.maxTotalImageBytes) {
throw new Error(
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
@@ -322,6 +320,11 @@ async function resolveImagesForRequest(
return images;
}
export const __testOnlyOpenAiHttp = {
resolveImagesForRequest,
resolveOpenAiChatCompletionsLimits,
};
function buildAgentPrompt(
messagesUnknown: unknown,
activeUserMessageIndex: number,

View File

@@ -2,6 +2,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
const fetchWithSsrFGuardMock = vi.fn();
const convertHeicToJpegMock = vi.fn();
const detectMimeMock = vi.fn();
vi.mock("../infra/net/fetch-guard.js", () => ({
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
@@ -11,6 +12,10 @@ vi.mock("./image-ops.js", () => ({
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
}));
vi.mock("./mime.js", () => ({
detectMime: (...args: unknown[]) => detectMimeMock(...args),
}));
async function waitForMicrotaskTurn(): Promise<void> {
await new Promise<void>((resolve) => queueMicrotask(resolve));
}
@@ -31,6 +36,7 @@ beforeEach(() => {
describe("HEIC input image normalization", () => {
it("converts base64 HEIC images to JPEG before returning them", async () => {
const normalized = Buffer.from("jpeg-normalized");
detectMimeMock.mockResolvedValueOnce("image/heic");
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
const image = await extractImageContentFromSource(
@@ -67,6 +73,7 @@ describe("HEIC input image normalization", () => {
finalUrl: "https://example.com/photo.heic",
});
const normalized = Buffer.from("jpeg-url-normalized");
detectMimeMock.mockResolvedValueOnce("image/heic");
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
const image = await extractImageContentFromSource(
@@ -91,6 +98,31 @@ describe("HEIC input image normalization", () => {
});
expect(release).toHaveBeenCalledTimes(1);
});
it("keeps declared MIME for non-HEIC images without sniffing", async () => {
const image = await extractImageContentFromSource(
{
type: "base64",
data: Buffer.from("png-like").toString("base64"),
mediaType: "image/png",
},
{
allowUrl: false,
allowedMimes: new Set(["image/png"]),
maxBytes: 1024 * 1024,
maxRedirects: 0,
timeoutMs: 1,
},
);
expect(detectMimeMock).not.toHaveBeenCalled();
expect(convertHeicToJpegMock).not.toHaveBeenCalled();
expect(image).toEqual({
type: "image",
data: Buffer.from("png-like").toString("base64"),
mimeType: "image/png",
});
});
});
describe("fetchWithGuard", () => {

View File

@@ -234,10 +234,12 @@ async function normalizeInputImage(params: {
mimeType?: string;
limits: InputImageLimits;
}): Promise<InputImageContent> {
const sourceMime =
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
normalizeMimeType(params.mimeType) ??
"application/octet-stream";
const declaredMime = normalizeMimeType(params.mimeType) ?? "application/octet-stream";
const sourceMime = HEIC_INPUT_IMAGE_MIMES.has(declaredMime)
? (normalizeMimeType(
await detectMime({ buffer: params.buffer, headerMime: params.mimeType }),
) ?? declaredMime)
: declaredMime;
if (!params.limits.allowedMimes.has(sourceMime)) {
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
}