diff --git a/CHANGELOG.md b/CHANGELOG.md index 49ff69551c1..2b6a2b6be9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -179,6 +179,7 @@ Docs: https://docs.openclaw.ai - Memory host SDK: report malformed remote JSON with caller-scoped errors for POST and batch file upload responses instead of leaking raw parser failures. - Media providers: report malformed operation-poll and audio-transcription JSON with provider-owned errors instead of leaking raw parser failures. - MiniMax, Gemini, Kimi, and Ollama web search: report malformed API JSON with provider-owned errors instead of leaking raw parser failures. +- Image and video generation: reject malformed base64 payloads from OpenAI-compatible image responses, DeepInfra video data URLs, and MiniMax image responses instead of accepting Node's lenient decoder output. - Web search: mark the managed `web_search` `query` argument as required in the advertised tool schema, so schema-following local models stop emitting `queries` payloads that fail at execution. Fixes #82097. Thanks @SpidFightFR. - Twilio voice-call: report malformed successful API JSON responses with provider-owned errors instead of leaking raw parser failures. - Voice-call provider APIs: report malformed successful guarded JSON responses with provider-prefixed errors instead of leaking raw parser failures. diff --git a/extensions/deepinfra/video-generation-provider.test.ts b/extensions/deepinfra/video-generation-provider.test.ts index fca18b2c2c6..2dbb5a89148 100644 --- a/extensions/deepinfra/video-generation-provider.test.ts +++ b/extensions/deepinfra/video-generation-provider.test.ts @@ -166,4 +166,29 @@ describe("deepinfra video generation provider", () => { fileName: "video-1.webm", }); }); + + it("rejects malformed base64 data URL video outputs", async () => { + const release = vi.fn(async () => undefined); + postJsonRequestMock.mockResolvedValue({ + response: { + json: async () => ({ + video_url: "data:video/webm;base64,not-base64!", + request_id: "req_bad_base64", + inference_status: { status: "succeeded" }, + }), + }, + release, + }); + + const provider = buildDeepInfraVideoGenerationProvider(); + await expect( + provider.generateVideo({ + provider: "deepinfra", + model: "deepinfra/Pixverse/Pixverse-T2V", + prompt: "A malformed WebM data URL", + cfg: {}, + }), + ).rejects.toThrow("DeepInfra video response returned malformed data URL base64"); + expect(release).toHaveBeenCalledOnce(); + }); }); diff --git a/extensions/deepinfra/video-generation-provider.ts b/extensions/deepinfra/video-generation-provider.ts index 34d9f478039..6435f97a837 100644 --- a/extensions/deepinfra/video-generation-provider.ts +++ b/extensions/deepinfra/video-generation-provider.ts @@ -1,4 +1,5 @@ import { extensionForMime } from "openclaw/plugin-sdk/media-mime"; +import { canonicalizeBase64 } from "openclaw/plugin-sdk/media-runtime"; import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { @@ -67,8 +68,12 @@ function parseVideoDataUrl(url: string): GeneratedVideoAsset | undefined { } const mimeType = match[1] ?? "video/mp4"; const ext = extensionForMime(mimeType)?.slice(1) ?? "mp4"; + const canonicalBase64 = canonicalizeBase64(match[2] ?? ""); + if (!canonicalBase64) { + throw new Error("DeepInfra video response returned malformed data URL base64"); + } return { - buffer: Buffer.from(match[2] ?? "", "base64"), + buffer: Buffer.from(canonicalBase64, "base64"), mimeType, fileName: `video-1.${ext}`, }; diff --git a/extensions/minimax/image-generation-provider.test.ts b/extensions/minimax/image-generation-provider.test.ts index 02e3b27c62c..85c9804c6c5 100644 --- a/extensions/minimax/image-generation-provider.test.ts +++ b/extensions/minimax/image-generation-provider.test.ts @@ -109,6 +109,37 @@ describe("minimax image-generation provider", () => { }); }); + it("rejects malformed base64 image payloads", async () => { + mockMinimaxApiKey(); + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + data: { + image_base64: ["not-base64!"], + }, + base_resp: { status_code: 0 }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ), + ); + + const provider = buildMinimaxImageGenerationProvider(); + await expect( + provider.generateImage({ + provider: "minimax", + model: "image-01", + prompt: "draw a cat", + cfg: {}, + }), + ).rejects.toThrow("MiniMax image generation returned malformed image base64"); + }); + it("passes request SSRF policy to the provider HTTP helper", async () => { mockMinimaxApiKey(); const postJsonRequest = vi.spyOn(providerHttp, "postJsonRequest").mockResolvedValue({ diff --git a/extensions/minimax/image-generation-provider.ts b/extensions/minimax/image-generation-provider.ts index fe47fed2805..f94eb6a0d72 100644 --- a/extensions/minimax/image-generation-provider.ts +++ b/extensions/minimax/image-generation-provider.ts @@ -1,4 +1,5 @@ import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation"; +import { canonicalizeBase64 } from "openclaw/plugin-sdk/media-runtime"; import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth"; import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime"; import { @@ -183,8 +184,12 @@ function buildMinimaxImageProvider(providerId: string): ImageGenerationProvider if (!b64) { return null; } + const canonicalBase64 = canonicalizeBase64(b64); + if (!canonicalBase64) { + throw new Error("MiniMax image generation returned malformed image base64"); + } return { - buffer: Buffer.from(b64, "base64"), + buffer: Buffer.from(canonicalBase64, "base64"), mimeType: DEFAULT_OUTPUT_MIME, fileName: `image-${index + 1}.png`, }; diff --git a/src/image-generation/image-assets.test.ts b/src/image-generation/image-assets.test.ts index 2ec77b39393..7e663431b21 100644 --- a/src/image-generation/image-assets.test.ts +++ b/src/image-generation/image-assets.test.ts @@ -28,6 +28,16 @@ describe("image asset helpers", () => { expect(asset.fileName).toBe("image-2.png"); }); + it("rejects malformed base64 image data URLs", () => { + expect(parseImageDataUrl("data:image/png;base64,not-base64!")).toBeUndefined(); + expect( + generatedImageAssetFromDataUrl({ + dataUrl: "data:image/png;base64,not-base64!", + index: 0, + }), + ).toBeUndefined(); + }); + it("normalizes image file extensions", () => { expect(imageFileExtensionForMimeType("image/jpeg")).toBe("jpg"); expect(imageFileExtensionForMimeType("image/webp")).toBe("webp"); @@ -71,6 +81,17 @@ describe("image asset helpers", () => { ]); }); + it("skips malformed OpenAI-compatible base64 image responses", () => { + expect( + parseOpenAiCompatibleImageResponse( + { + data: [{ b64_json: "not-base64!" }], + }, + { defaultMimeType: "image/png" }, + ), + ).toEqual([]); + }); + it("resolves source upload filenames from explicit names or MIME types", () => { expect( imageSourceUploadFileName({ diff --git a/src/image-generation/image-assets.ts b/src/image-generation/image-assets.ts index 71735c88851..c18b76c2a37 100644 --- a/src/image-generation/image-assets.ts +++ b/src/image-generation/image-assets.ts @@ -1,3 +1,4 @@ +import { canonicalizeBase64 } from "../media/base64.js"; import { normalizeOptionalLowercaseString, normalizeOptionalString, @@ -93,7 +94,11 @@ export function parseImageDataUrl( if (!mimeType || !base64) { return undefined; } - return { mimeType, base64 }; + const canonicalBase64 = canonicalizeBase64(base64); + if (!canonicalBase64) { + return undefined; + } + return { mimeType, base64: canonicalBase64 }; } export function generatedImageAssetFromBase64(params: { @@ -106,10 +111,11 @@ export function generatedImageAssetFromBase64(params: { sniffMimeType?: boolean; }): GeneratedImageAsset | undefined { const base64 = normalizeOptionalString(params.base64); - if (!base64) { + const canonicalBase64 = base64 ? canonicalizeBase64(base64) : undefined; + if (!canonicalBase64) { return undefined; } - const buffer = Buffer.from(base64, "base64"); + const buffer = Buffer.from(canonicalBase64, "base64"); const explicitMimeType = normalizeOptionalString(params.mimeType); const defaultMimeType = normalizeOptionalString(params.defaultMimeType) ?? DEFAULT_IMAGE_MIME_TYPE;