fix(media): reject malformed generated base64

This commit is contained in:
Vincent Koc
2026-05-15 23:20:24 +08:00
parent 06ec35452f
commit 1b62168a3a
7 changed files with 99 additions and 5 deletions

View File

@@ -179,6 +179,7 @@ Docs: https://docs.openclaw.ai
- Memory host SDK: report malformed remote JSON with caller-scoped errors for POST and batch file upload responses instead of leaking raw parser failures.
- Media providers: report malformed operation-poll and audio-transcription JSON with provider-owned errors instead of leaking raw parser failures.
- MiniMax, Gemini, Kimi, and Ollama web search: report malformed API JSON with provider-owned errors instead of leaking raw parser failures.
- Image and video generation: reject malformed base64 payloads from OpenAI-compatible image responses, DeepInfra video data URLs, and MiniMax image responses instead of accepting Node's lenient decoder output.
- Web search: mark the managed `web_search` `query` argument as required in the advertised tool schema, so schema-following local models stop emitting `queries` payloads that fail at execution. Fixes #82097. Thanks @SpidFightFR.
- Twilio voice-call: report malformed successful API JSON responses with provider-owned errors instead of leaking raw parser failures.
- Voice-call provider APIs: report malformed successful guarded JSON responses with provider-prefixed errors instead of leaking raw parser failures.

View File

@@ -166,4 +166,29 @@ describe("deepinfra video generation provider", () => {
fileName: "video-1.webm",
});
});
it("rejects malformed base64 data URL video outputs", async () => {
const release = vi.fn(async () => undefined);
postJsonRequestMock.mockResolvedValue({
response: {
json: async () => ({
video_url: "data:video/webm;base64,not-base64!",
request_id: "req_bad_base64",
inference_status: { status: "succeeded" },
}),
},
release,
});
const provider = buildDeepInfraVideoGenerationProvider();
await expect(
provider.generateVideo({
provider: "deepinfra",
model: "deepinfra/Pixverse/Pixverse-T2V",
prompt: "A malformed WebM data URL",
cfg: {},
}),
).rejects.toThrow("DeepInfra video response returned malformed data URL base64");
expect(release).toHaveBeenCalledOnce();
});
});

View File

@@ -1,4 +1,5 @@
import { extensionForMime } from "openclaw/plugin-sdk/media-mime";
import { canonicalizeBase64 } from "openclaw/plugin-sdk/media-runtime";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
@@ -67,8 +68,12 @@ function parseVideoDataUrl(url: string): GeneratedVideoAsset | undefined {
}
const mimeType = match[1] ?? "video/mp4";
const ext = extensionForMime(mimeType)?.slice(1) ?? "mp4";
const canonicalBase64 = canonicalizeBase64(match[2] ?? "");
if (!canonicalBase64) {
throw new Error("DeepInfra video response returned malformed data URL base64");
}
return {
buffer: Buffer.from(match[2] ?? "", "base64"),
buffer: Buffer.from(canonicalBase64, "base64"),
mimeType,
fileName: `video-1.${ext}`,
};

View File

@@ -109,6 +109,37 @@ describe("minimax image-generation provider", () => {
});
});
it("rejects malformed base64 image payloads", async () => {
mockMinimaxApiKey();
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue(
new Response(
JSON.stringify({
data: {
image_base64: ["not-base64!"],
},
base_resp: { status_code: 0 },
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
),
);
const provider = buildMinimaxImageGenerationProvider();
await expect(
provider.generateImage({
provider: "minimax",
model: "image-01",
prompt: "draw a cat",
cfg: {},
}),
).rejects.toThrow("MiniMax image generation returned malformed image base64");
});
it("passes request SSRF policy to the provider HTTP helper", async () => {
mockMinimaxApiKey();
const postJsonRequest = vi.spyOn(providerHttp, "postJsonRequest").mockResolvedValue({

View File

@@ -1,4 +1,5 @@
import type { ImageGenerationProvider } from "openclaw/plugin-sdk/image-generation";
import { canonicalizeBase64 } from "openclaw/plugin-sdk/media-runtime";
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
import {
@@ -183,8 +184,12 @@ function buildMinimaxImageProvider(providerId: string): ImageGenerationProvider
if (!b64) {
return null;
}
const canonicalBase64 = canonicalizeBase64(b64);
if (!canonicalBase64) {
throw new Error("MiniMax image generation returned malformed image base64");
}
return {
buffer: Buffer.from(b64, "base64"),
buffer: Buffer.from(canonicalBase64, "base64"),
mimeType: DEFAULT_OUTPUT_MIME,
fileName: `image-${index + 1}.png`,
};

View File

@@ -28,6 +28,16 @@ describe("image asset helpers", () => {
expect(asset.fileName).toBe("image-2.png");
});
it("rejects malformed base64 image data URLs", () => {
expect(parseImageDataUrl("data:image/png;base64,not-base64!")).toBeUndefined();
expect(
generatedImageAssetFromDataUrl({
dataUrl: "data:image/png;base64,not-base64!",
index: 0,
}),
).toBeUndefined();
});
it("normalizes image file extensions", () => {
expect(imageFileExtensionForMimeType("image/jpeg")).toBe("jpg");
expect(imageFileExtensionForMimeType("image/webp")).toBe("webp");
@@ -71,6 +81,17 @@ describe("image asset helpers", () => {
]);
});
it("skips malformed OpenAI-compatible base64 image responses", () => {
expect(
parseOpenAiCompatibleImageResponse(
{
data: [{ b64_json: "not-base64!" }],
},
{ defaultMimeType: "image/png" },
),
).toEqual([]);
});
it("resolves source upload filenames from explicit names or MIME types", () => {
expect(
imageSourceUploadFileName({

View File

@@ -1,3 +1,4 @@
import { canonicalizeBase64 } from "../media/base64.js";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
@@ -93,7 +94,11 @@ export function parseImageDataUrl(
if (!mimeType || !base64) {
return undefined;
}
return { mimeType, base64 };
const canonicalBase64 = canonicalizeBase64(base64);
if (!canonicalBase64) {
return undefined;
}
return { mimeType, base64: canonicalBase64 };
}
export function generatedImageAssetFromBase64(params: {
@@ -106,10 +111,11 @@ export function generatedImageAssetFromBase64(params: {
sniffMimeType?: boolean;
}): GeneratedImageAsset | undefined {
const base64 = normalizeOptionalString(params.base64);
if (!base64) {
const canonicalBase64 = base64 ? canonicalizeBase64(base64) : undefined;
if (!canonicalBase64) {
return undefined;
}
const buffer = Buffer.from(base64, "base64");
const buffer = Buffer.from(canonicalBase64, "base64");
const explicitMimeType = normalizeOptionalString(params.mimeType);
const defaultMimeType =
normalizeOptionalString(params.defaultMimeType) ?? DEFAULT_IMAGE_MIME_TYPE;