mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Gateway: follow up HEIC input image handling (#38146)
* Media: scope HEIC MIME sniffing * Media: hermeticize HEIC input tests * Gateway: fix HEIC image budget accounting * Gateway: add HEIC image budget regression test * Changelog: note HEIC follow-up fix
This commit is contained in:
@@ -138,6 +138,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
|
||||
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
|
||||
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
|
||||
- Gateway/HEIC input follow-up: keep non-HEIC `input_image` MIME handling unchanged, make HEIC tests hermetic, and enforce chat-completions `maxTotalImageBytes` against post-normalization image payload size. Thanks @vincentkoc.
|
||||
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
|
||||
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
|
||||
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.
|
||||
|
||||
68
src/gateway/openai-http.image-budget.test.ts
Normal file
68
src/gateway/openai-http.image-budget.test.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const extractImageContentFromSourceMock = vi.fn();
|
||||
|
||||
vi.mock("../media/input-files.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../media/input-files.js")>();
|
||||
return {
|
||||
...actual,
|
||||
extractImageContentFromSource: (...args: unknown[]) =>
|
||||
extractImageContentFromSourceMock(...args),
|
||||
};
|
||||
});
|
||||
|
||||
import { __testOnlyOpenAiHttp } from "./openai-http.js";
|
||||
|
||||
describe("openai image budget accounting", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("counts normalized base64 image bytes against maxTotalImageBytes", async () => {
|
||||
extractImageContentFromSourceMock.mockResolvedValueOnce({
|
||||
type: "image",
|
||||
data: Buffer.alloc(10, 1).toString("base64"),
|
||||
mimeType: "image/jpeg",
|
||||
});
|
||||
|
||||
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
|
||||
maxTotalImageBytes: 5,
|
||||
});
|
||||
|
||||
await expect(
|
||||
__testOnlyOpenAiHttp.resolveImagesForRequest(
|
||||
{
|
||||
urls: ["data:image/heic;base64,QUJD"],
|
||||
},
|
||||
limits,
|
||||
),
|
||||
).rejects.toThrow(/Total image payload too large/);
|
||||
});
|
||||
|
||||
it("does not double-count unchanged base64 image payloads", async () => {
|
||||
extractImageContentFromSourceMock.mockResolvedValueOnce({
|
||||
type: "image",
|
||||
data: "QUJDRA==",
|
||||
mimeType: "image/jpeg",
|
||||
});
|
||||
|
||||
const limits = __testOnlyOpenAiHttp.resolveOpenAiChatCompletionsLimits({
|
||||
maxTotalImageBytes: 4,
|
||||
});
|
||||
|
||||
await expect(
|
||||
__testOnlyOpenAiHttp.resolveImagesForRequest(
|
||||
{
|
||||
urls: ["data:image/jpeg;base64,QUJDRA=="],
|
||||
},
|
||||
limits,
|
||||
),
|
||||
).resolves.toEqual([
|
||||
{
|
||||
type: "image",
|
||||
data: "QUJDRA==",
|
||||
mimeType: "image/jpeg",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
@@ -300,18 +300,16 @@ async function resolveImagesForRequest(
|
||||
for (const url of urls) {
|
||||
const source = parseImageUrlToSource(url);
|
||||
if (source.type === "base64") {
|
||||
totalBytes += estimateBase64DecodedBytes(source.data);
|
||||
if (totalBytes > limits.maxTotalImageBytes) {
|
||||
const sourceBytes = estimateBase64DecodedBytes(source.data);
|
||||
if (totalBytes + sourceBytes > limits.maxTotalImageBytes) {
|
||||
throw new Error(
|
||||
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
|
||||
`Total image payload too large (${totalBytes + sourceBytes}; limit ${limits.maxTotalImageBytes})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const image = await extractImageContentFromSource(source, limits.images);
|
||||
if (source.type !== "base64") {
|
||||
totalBytes += estimateBase64DecodedBytes(image.data);
|
||||
}
|
||||
totalBytes += estimateBase64DecodedBytes(image.data);
|
||||
if (totalBytes > limits.maxTotalImageBytes) {
|
||||
throw new Error(
|
||||
`Total image payload too large (${totalBytes}; limit ${limits.maxTotalImageBytes})`,
|
||||
@@ -322,6 +320,11 @@ async function resolveImagesForRequest(
|
||||
return images;
|
||||
}
|
||||
|
||||
export const __testOnlyOpenAiHttp = {
|
||||
resolveImagesForRequest,
|
||||
resolveOpenAiChatCompletionsLimits,
|
||||
};
|
||||
|
||||
function buildAgentPrompt(
|
||||
messagesUnknown: unknown,
|
||||
activeUserMessageIndex: number,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const fetchWithSsrFGuardMock = vi.fn();
|
||||
const convertHeicToJpegMock = vi.fn();
|
||||
const detectMimeMock = vi.fn();
|
||||
|
||||
vi.mock("../infra/net/fetch-guard.js", () => ({
|
||||
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
|
||||
@@ -11,6 +12,10 @@ vi.mock("./image-ops.js", () => ({
|
||||
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
|
||||
}));
|
||||
|
||||
vi.mock("./mime.js", () => ({
|
||||
detectMime: (...args: unknown[]) => detectMimeMock(...args),
|
||||
}));
|
||||
|
||||
async function waitForMicrotaskTurn(): Promise<void> {
|
||||
await new Promise<void>((resolve) => queueMicrotask(resolve));
|
||||
}
|
||||
@@ -31,6 +36,7 @@ beforeEach(() => {
|
||||
describe("HEIC input image normalization", () => {
|
||||
it("converts base64 HEIC images to JPEG before returning them", async () => {
|
||||
const normalized = Buffer.from("jpeg-normalized");
|
||||
detectMimeMock.mockResolvedValueOnce("image/heic");
|
||||
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||
|
||||
const image = await extractImageContentFromSource(
|
||||
@@ -67,6 +73,7 @@ describe("HEIC input image normalization", () => {
|
||||
finalUrl: "https://example.com/photo.heic",
|
||||
});
|
||||
const normalized = Buffer.from("jpeg-url-normalized");
|
||||
detectMimeMock.mockResolvedValueOnce("image/heic");
|
||||
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||
|
||||
const image = await extractImageContentFromSource(
|
||||
@@ -91,6 +98,31 @@ describe("HEIC input image normalization", () => {
|
||||
});
|
||||
expect(release).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("keeps declared MIME for non-HEIC images without sniffing", async () => {
|
||||
const image = await extractImageContentFromSource(
|
||||
{
|
||||
type: "base64",
|
||||
data: Buffer.from("png-like").toString("base64"),
|
||||
mediaType: "image/png",
|
||||
},
|
||||
{
|
||||
allowUrl: false,
|
||||
allowedMimes: new Set(["image/png"]),
|
||||
maxBytes: 1024 * 1024,
|
||||
maxRedirects: 0,
|
||||
timeoutMs: 1,
|
||||
},
|
||||
);
|
||||
|
||||
expect(detectMimeMock).not.toHaveBeenCalled();
|
||||
expect(convertHeicToJpegMock).not.toHaveBeenCalled();
|
||||
expect(image).toEqual({
|
||||
type: "image",
|
||||
data: Buffer.from("png-like").toString("base64"),
|
||||
mimeType: "image/png",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchWithGuard", () => {
|
||||
|
||||
@@ -234,10 +234,12 @@ async function normalizeInputImage(params: {
|
||||
mimeType?: string;
|
||||
limits: InputImageLimits;
|
||||
}): Promise<InputImageContent> {
|
||||
const sourceMime =
|
||||
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
|
||||
normalizeMimeType(params.mimeType) ??
|
||||
"application/octet-stream";
|
||||
const declaredMime = normalizeMimeType(params.mimeType) ?? "application/octet-stream";
|
||||
const sourceMime = HEIC_INPUT_IMAGE_MIMES.has(declaredMime)
|
||||
? (normalizeMimeType(
|
||||
await detectMime({ buffer: params.buffer, headerMime: params.mimeType }),
|
||||
) ?? declaredMime)
|
||||
: declaredMime;
|
||||
if (!params.limits.allowedMimes.has(sourceMime)) {
|
||||
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user