diff --git a/CHANGELOG.md b/CHANGELOG.md index 53bc622fc81..a063b910640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -138,6 +138,7 @@ Docs: https://docs.openclaw.ai - Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow. - Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow. - HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc. +- Gateway/HEIC input follow-up: keep non-HEIC `input_image` MIME handling unchanged, make HEIC tests hermetic, and enforce chat-completions `maxTotalImageBytes` against post-normalization image payload size. Thanks @vincentkoc. - Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman. - Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils. - Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13. diff --git a/src/gateway/openai-http.image-budget.test.ts b/src/gateway/openai-http.image-budget.test.ts new file mode 100644 index 00000000000..fcc7e2049ae --- /dev/null +++ b/src/gateway/openai-http.image-budget.test.ts @@ -0,0 +1,68 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const extractImageContentFromSourceMock = vi.fn(); + +vi.mock("../media/input-files.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + extractImageContentFromSource: (...args: unknown[]) => + extractImageContentFromSourceMock(...args), + }; +}); + +import { __testOnlyOpenAiHttp } from "./openai-http.js"; + +describe("openai image budget accounting", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("counts normalized base64 image bytes against maxTotalImageBytes", async () => { + extractImageContentFromSourceMock.mockResolvedValueOnce({ + type: "image", + data: Buffer.alloc(10, 1).toString("base64"), + mimeType: "image/jpeg", + }); + + const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({ + maxTotalImageBytes: 5, + }); + + await expect( + __testOnlyOpenAiHttp.resolveImagesForRequest( + { + urls: ["data:image/heic;base64,QUJD"], + }, + limits, + ), + ).rejects.toThrow(/Total image payload too large/); + }); + + it("does not double-count unchanged base64 image payloads", async () => { + extractImageContentFromSourceMock.mockResolvedValueOnce({ + type: "image", + data: "QUJDRA==", + mimeType: "image/jpeg", + }); + + const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({ + maxTotalImageBytes: 4, + }); + + await expect( + __testOnlyOpenAiHttp.resolveImagesForRequest( + { + urls: ["data:image/jpeg;base64,QUJDRA=="], + }, + limits, + ), + ).resolves.toEqual([ + { + type: "image", + data: "QUJDRA==", + mimeType: "image/jpeg", + }, + ]); + }); +}); diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index d23fc64bf96..01564f17b34 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -300,18 +300,16 @@ async function resolveImagesForRequest( for (const url of urls) { const source = parseImageUrlToSource(url); if (source.type === "base64") { - totalBytes += estimateBase64DecodedBytes(source.data); - if (totalBytes > limits.maxTotalImageBytes) { + const sourceBytes = estimateBase64DecodedBytes(source.data); + if (totalBytes + sourceBytes > limits.maxTotalImageBytes) { throw new Error( - `Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`, + `Total image payload too large (${totalBytes + sourceBytes}; limit ${limits.maxTotalImageBytes})`, ); } } const image = await extractImageContentFromSource(source, limits.images); - if (source.type !== "base64") { - totalBytes += estimateBase64DecodedBytes(image.data); - } + totalBytes += estimateBase64DecodedBytes(image.data); if (totalBytes > limits.maxTotalImageBytes) { throw new Error( `Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`, @@ -322,6 +320,11 @@ async function resolveImagesForRequest( return images; } +export const __testOnlyOpenAiHttp = { + resolveImagesForRequest, + resolveOpenAiChatCompletionsLimits, +}; + function buildAgentPrompt( messagesUnknown: unknown, activeUserMessageIndex: number, diff --git a/src/media/input-files.fetch-guard.test.ts b/src/media/input-files.fetch-guard.test.ts index 6d83738c73a..05d59d37e76 100644 --- a/src/media/input-files.fetch-guard.test.ts +++ b/src/media/input-files.fetch-guard.test.ts @@ -2,6 +2,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; const fetchWithSsrFGuardMock = vi.fn(); const convertHeicToJpegMock = vi.fn(); +const detectMimeMock = vi.fn(); vi.mock("../infra/net/fetch-guard.js", () => ({ fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args), @@ -11,6 +12,10 @@ vi.mock("./image-ops.js", () => ({ convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args), })); +vi.mock("./mime.js", () => ({ + detectMime: (...args: unknown[]) => detectMimeMock(...args), +})); + async function waitForMicrotaskTurn(): Promise { await new Promise((resolve) => queueMicrotask(resolve)); } @@ -31,6 +36,7 @@ beforeEach(() => { describe("HEIC input image normalization", () => { it("converts base64 HEIC images to JPEG before returning them", async () => { const normalized = Buffer.from("jpeg-normalized"); + detectMimeMock.mockResolvedValueOnce("image/heic"); convertHeicToJpegMock.mockResolvedValueOnce(normalized); const image = await extractImageContentFromSource( @@ -67,6 +73,7 @@ describe("HEIC input image normalization", () => { finalUrl: "https://example.com/photo.heic", }); const normalized = Buffer.from("jpeg-url-normalized"); + detectMimeMock.mockResolvedValueOnce("image/heic"); convertHeicToJpegMock.mockResolvedValueOnce(normalized); const image = await extractImageContentFromSource( @@ -91,6 +98,31 @@ describe("HEIC input image normalization", () => { }); expect(release).toHaveBeenCalledTimes(1); }); + + it("keeps declared MIME for non-HEIC images without sniffing", async () => { + const image = await extractImageContentFromSource( + { + type: "base64", + data: Buffer.from("png-like").toString("base64"), + mediaType: "image/png", + }, + { + allowUrl: false, + allowedMimes: new Set(["image/png"]), + maxBytes: 1024 * 1024, + maxRedirects: 0, + timeoutMs: 1, + }, + ); + + expect(detectMimeMock).not.toHaveBeenCalled(); + expect(convertHeicToJpegMock).not.toHaveBeenCalled(); + expect(image).toEqual({ + type: "image", + data: Buffer.from("png-like").toString("base64"), + mimeType: "image/png", + }); + }); }); describe("fetchWithGuard", () => { diff --git a/src/media/input-files.ts b/src/media/input-files.ts index dcf3cd5872d..b894c6d13b2 100644 --- a/src/media/input-files.ts +++ b/src/media/input-files.ts @@ -234,10 +234,12 @@ async function normalizeInputImage(params: { mimeType?: string; limits: InputImageLimits; }): Promise { - const sourceMime = - normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ?? - normalizeMimeType(params.mimeType) ?? - "application/octet-stream"; + const declaredMime = normalizeMimeType(params.mimeType) ?? "application/octet-stream"; + const sourceMime = HEIC_INPUT_IMAGE_MIMES.has(declaredMime) + ? (normalizeMimeType( + await detectMime({ buffer: params.buffer, headerMime: params.mimeType }), + ) ?? declaredMime) + : declaredMime; if (!params.limits.allowedMimes.has(sourceMime)) { throw new Error(`Unsupported image MIME type: ${sourceMime}`); }