fix(gateway): harden image-named attachment sniffing

This commit is contained in:
Vincent Koc
2026-05-16 09:54:21 +08:00
parent 7206811b80
commit 3b663ad1c1
4 changed files with 124 additions and 16 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/media: prevent image filenames from overriding generic non-image byte sniffing, so zip/octet-stream payloads mislabeled as images are offloaded or rejected before they become inline image attachments.
- MS Teams/media: sniff inline `data:image/*` attachment bytes before staging them, skipping payloads that are not actually images.
- Update: let package-swap `doctor --fix` persist core config repairs while plugin schemas are still converging, preventing update failures on externalized channel configs.
- Telegram: let authorized text `/stop` commands use the fast-abort path before queued agent work, so active turns stop immediately instead of processing the abort after the turn finishes. Fixes #82162. Thanks @civiltox.

View File

@@ -295,6 +295,23 @@ describe("parseMessageWithAttachments", () => {
expect(parsed.offloadedRefs[0]?.label).toBe("bundle.zip");
expect(parsed.offloadedRefs[0]?.mimeType).toBe("application/zip");
});
it("does not let image filenames override generic non-image byte sniffing", async () => {
const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64");
const { parsed, logs } = await parseWithWarnings("x", [
{
type: "image",
mimeType: "image/png",
fileName: "fake.png",
content: zip,
},
]);
expect(parsed.images).toHaveLength(0);
expect(parsed.offloadedRefs).toHaveLength(1);
expect(parsed.offloadedRefs[0]?.mimeType).toBe("application/zip");
expect(savedMime()).toBe("application/zip");
expect(logs[0]).toMatch(/mime mismatch/i);
});
});
describe("parseMessageWithAttachments validation errors", () => {
@@ -349,6 +366,23 @@ describe("parseMessageWithAttachments validation errors", () => {
expect(saveMediaBufferMock).not.toHaveBeenCalled();
});
it("rejects generic-container payloads with image mime and image filename when acceptNonImage is false", async () => {
const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64");
let caught: unknown;
try {
await parseMessageWithAttachments(
"x",
[{ type: "image", mimeType: "image/png", fileName: "fake.png", content: zip }],
{ log: { warn: () => {} }, acceptNonImage: false },
);
} catch (err) {
caught = err;
}
expect(caught).toBeInstanceOf(UnsupportedAttachmentError);
expect((caught as UnsupportedAttachmentError).reason).toBe("unsupported-non-image");
expect(saveMediaBufferMock).not.toHaveBeenCalled();
});
it("throws UnsupportedAttachmentError on image when supportsInlineImages is false", async () => {
let caught: unknown;
try {

View File

@@ -107,11 +107,40 @@ function isGenericContainerMime(mime?: string): boolean {
return mime === "application/zip" || mime === "application/octet-stream";
}
function shouldIgnoreProvidedImageMime(params: {
function shouldIgnoreImageMimeHint(params: { sniffedMime?: string; hintedMime?: string }): boolean {
return isGenericContainerMime(params.sniffedMime) && isImageMime(params.hintedMime);
}
function isSpecificMime(mime?: string): boolean {
return Boolean(mime && !isGenericContainerMime(mime));
}
function resolveAttachmentMime(params: {
sniffedMime?: string;
providedMime?: string;
}): boolean {
return isGenericContainerMime(params.sniffedMime) && isImageMime(params.providedMime);
labelMime?: string;
}): string {
const trustedProvidedMime = shouldIgnoreImageMimeHint({
sniffedMime: params.sniffedMime,
hintedMime: params.providedMime,
})
? undefined
: params.providedMime;
const trustedLabelMime = shouldIgnoreImageMimeHint({
sniffedMime: params.sniffedMime,
hintedMime: params.labelMime,
})
? undefined
: params.labelMime;
return (
(isSpecificMime(params.sniffedMime) && params.sniffedMime) ||
(isSpecificMime(trustedProvidedMime) && trustedProvidedMime) ||
(isSpecificMime(trustedLabelMime) && trustedLabelMime) ||
params.sniffedMime ||
trustedProvidedMime ||
trustedLabelMime ||
"application/octet-stream"
);
}
function isValidBase64(value: string): boolean {
@@ -264,24 +293,12 @@ export async function parseMessageWithAttachments(
const providedMime = normalizeMime(mime);
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
const labelMime = normalizeMime(mimeTypeFromFilePath(label));
const trustedProvidedMime = shouldIgnoreProvidedImageMime({ sniffedMime, providedMime })
? undefined
: providedMime;
// Prefer specific MIME signals over generic container types. OOXML
// documents (docx/xlsx/pptx) sniff as application/zip; without this
// priority the agent would receive a `.zip` instead of the specific
// Office document the caller declared.
const finalMime =
(sniffedMime && !isGenericContainerMime(sniffedMime) && sniffedMime) ||
(trustedProvidedMime &&
!isGenericContainerMime(trustedProvidedMime) &&
trustedProvidedMime) ||
(labelMime && !isGenericContainerMime(labelMime) && labelMime) ||
sniffedMime ||
trustedProvidedMime ||
labelMime ||
"application/octet-stream";
const finalMime = resolveAttachmentMime({ sniffedMime, providedMime, labelMime });
if (
sniffedMime &&

View File

@@ -2905,6 +2905,62 @@ describe("chat directive tag stripping for non-streaming final payloads", () =>
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
});
it("routes image-named generic container bytes as non-image media paths for chat.send", async () => {
createTranscriptFixture("openclaw-chat-send-spoofed-image-container-");
mockState.finalText = "ok";
mockState.sessionEntry = {
modelProvider: "test-provider",
model: "vision-model",
};
mockState.modelCatalog = [
{
provider: "test-provider",
id: "vision-model",
name: "Vision model",
input: ["text", "image"],
},
];
mockState.savedMediaResults = [
{ path: "/home/user/.openclaw/media/inbound/fake.zip", contentType: "application/zip" },
];
const respond = vi.fn();
const context = createChatContext();
const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64");
await runNonStreamingChatSend({
context,
respond,
idempotencyKey: "idem-spoofed-image-container",
message: "inspect this",
requestParams: {
attachments: [
{
type: "image",
mimeType: "image/png",
fileName: "fake.png",
content: zip,
},
],
},
expectBroadcast: false,
});
expect(mockState.savedMediaCalls).toEqual([
{
contentType: "application/zip",
subdir: "inbound",
size: mockState.savedMediaCalls[0]?.size ?? 0,
},
]);
expect(mockState.lastDispatchCtx?.MediaPaths).toEqual([
"/home/user/.openclaw/media/inbound/fake.zip",
]);
expect(mockState.lastDispatchCtx?.MediaTypes).toEqual(["application/zip"]);
expect(mockState.lastDispatchImages).toBeUndefined();
expect(mockState.lastDispatchCtx?.Body).not.toContain("media://");
expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true);
});
it("preserves sandbox-relative MediaPaths and stores workspace context for media-understanding", async () => {
createTranscriptFixture("openclaw-chat-send-non-image-absolutize-");
mockState.finalText = "ok";