diff --git a/CHANGELOG.md b/CHANGELOG.md index 87bc08720b0..7343d47ddc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/media: prevent image filenames from overriding generic non-image byte sniffing, so zip/octet-stream payloads mislabeled as images are offloaded or rejected before they become inline image attachments. - MS Teams/media: sniff inline `data:image/*` attachment bytes before staging them, skipping payloads that are not actually images. - Update: let package-swap `doctor --fix` persist core config repairs while plugin schemas are still converging, preventing update failures on externalized channel configs. - Telegram: let authorized text `/stop` commands use the fast-abort path before queued agent work, so active turns stop immediately instead of processing the abort after the turn finishes. Fixes #82162. Thanks @civiltox. diff --git a/src/gateway/chat-attachments.test.ts b/src/gateway/chat-attachments.test.ts index 5afa86caf11..47485efebbe 100644 --- a/src/gateway/chat-attachments.test.ts +++ b/src/gateway/chat-attachments.test.ts @@ -295,6 +295,23 @@ describe("parseMessageWithAttachments", () => { expect(parsed.offloadedRefs[0]?.label).toBe("bundle.zip"); expect(parsed.offloadedRefs[0]?.mimeType).toBe("application/zip"); }); + + it("does not let image filenames override generic non-image byte sniffing", async () => { + const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64"); + const { parsed, logs } = await parseWithWarnings("x", [ + { + type: "image", + mimeType: "image/png", + fileName: "fake.png", + content: zip, + }, + ]); + expect(parsed.images).toHaveLength(0); + expect(parsed.offloadedRefs).toHaveLength(1); + expect(parsed.offloadedRefs[0]?.mimeType).toBe("application/zip"); + expect(savedMime()).toBe("application/zip"); + expect(logs[0]).toMatch(/mime mismatch/i); + }); }); describe("parseMessageWithAttachments validation errors", () => { @@ -349,6 +366,23 @@ describe("parseMessageWithAttachments validation errors", () => { expect(saveMediaBufferMock).not.toHaveBeenCalled(); }); + it("rejects generic-container payloads with image mime and image filename when acceptNonImage is false", async () => { + const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64"); + let caught: unknown; + try { + await parseMessageWithAttachments( + "x", + [{ type: "image", mimeType: "image/png", fileName: "fake.png", content: zip }], + { log: { warn: () => {} }, acceptNonImage: false }, + ); + } catch (err) { + caught = err; + } + expect(caught).toBeInstanceOf(UnsupportedAttachmentError); + expect((caught as UnsupportedAttachmentError).reason).toBe("unsupported-non-image"); + expect(saveMediaBufferMock).not.toHaveBeenCalled(); + }); + it("throws UnsupportedAttachmentError on image when supportsInlineImages is false", async () => { let caught: unknown; try { diff --git a/src/gateway/chat-attachments.ts b/src/gateway/chat-attachments.ts index a2bb1304649..4c3cf0d02cd 100644 --- a/src/gateway/chat-attachments.ts +++ b/src/gateway/chat-attachments.ts @@ -107,11 +107,40 @@ function isGenericContainerMime(mime?: string): boolean { return mime === "application/zip" || mime === "application/octet-stream"; } -function shouldIgnoreProvidedImageMime(params: { +function shouldIgnoreImageMimeHint(params: { sniffedMime?: string; hintedMime?: string }): boolean { + return isGenericContainerMime(params.sniffedMime) && isImageMime(params.hintedMime); +} + +function isSpecificMime(mime?: string): boolean { + return Boolean(mime && !isGenericContainerMime(mime)); +} + +function resolveAttachmentMime(params: { sniffedMime?: string; providedMime?: string; -}): boolean { - return isGenericContainerMime(params.sniffedMime) && isImageMime(params.providedMime); + labelMime?: string; +}): string { + const trustedProvidedMime = shouldIgnoreImageMimeHint({ + sniffedMime: params.sniffedMime, + hintedMime: params.providedMime, + }) + ? undefined + : params.providedMime; + const trustedLabelMime = shouldIgnoreImageMimeHint({ + sniffedMime: params.sniffedMime, + hintedMime: params.labelMime, + }) + ? undefined + : params.labelMime; + return ( + (isSpecificMime(params.sniffedMime) && params.sniffedMime) || + (isSpecificMime(trustedProvidedMime) && trustedProvidedMime) || + (isSpecificMime(trustedLabelMime) && trustedLabelMime) || + params.sniffedMime || + trustedProvidedMime || + trustedLabelMime || + "application/octet-stream" + ); } function isValidBase64(value: string): boolean { @@ -264,24 +293,12 @@ export async function parseMessageWithAttachments( const providedMime = normalizeMime(mime); const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64)); const labelMime = normalizeMime(mimeTypeFromFilePath(label)); - const trustedProvidedMime = shouldIgnoreProvidedImageMime({ sniffedMime, providedMime }) - ? undefined - : providedMime; // Prefer specific MIME signals over generic container types. OOXML // documents (docx/xlsx/pptx) sniff as application/zip; without this // priority the agent would receive a `.zip` instead of the specific // Office document the caller declared. - const finalMime = - (sniffedMime && !isGenericContainerMime(sniffedMime) && sniffedMime) || - (trustedProvidedMime && - !isGenericContainerMime(trustedProvidedMime) && - trustedProvidedMime) || - (labelMime && !isGenericContainerMime(labelMime) && labelMime) || - sniffedMime || - trustedProvidedMime || - labelMime || - "application/octet-stream"; + const finalMime = resolveAttachmentMime({ sniffedMime, providedMime, labelMime }); if ( sniffedMime && diff --git a/src/gateway/server-methods/chat.directive-tags.test.ts b/src/gateway/server-methods/chat.directive-tags.test.ts index d9a1fa11841..0fa317033bd 100644 --- a/src/gateway/server-methods/chat.directive-tags.test.ts +++ b/src/gateway/server-methods/chat.directive-tags.test.ts @@ -2905,6 +2905,62 @@ describe("chat directive tag stripping for non-streaming final payloads", () => expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true); }); + it("routes image-named generic container bytes as non-image media paths for chat.send", async () => { + createTranscriptFixture("openclaw-chat-send-spoofed-image-container-"); + mockState.finalText = "ok"; + mockState.sessionEntry = { + modelProvider: "test-provider", + model: "vision-model", + }; + mockState.modelCatalog = [ + { + provider: "test-provider", + id: "vision-model", + name: "Vision model", + input: ["text", "image"], + }, + ]; + mockState.savedMediaResults = [ + { path: "/home/user/.openclaw/media/inbound/fake.zip", contentType: "application/zip" }, + ]; + const respond = vi.fn(); + const context = createChatContext(); + const zip = Buffer.from("PK\u0003\u0004zip-archive-bytes").toString("base64"); + + await runNonStreamingChatSend({ + context, + respond, + idempotencyKey: "idem-spoofed-image-container", + message: "inspect this", + requestParams: { + attachments: [ + { + type: "image", + mimeType: "image/png", + fileName: "fake.png", + content: zip, + }, + ], + }, + expectBroadcast: false, + }); + + expect(mockState.savedMediaCalls).toEqual([ + { + contentType: "application/zip", + subdir: "inbound", + size: mockState.savedMediaCalls[0]?.size ?? 0, + }, + ]); + expect(mockState.lastDispatchCtx?.MediaPaths).toEqual([ + "/home/user/.openclaw/media/inbound/fake.zip", + ]); + expect(mockState.lastDispatchCtx?.MediaTypes).toEqual(["application/zip"]); + expect(mockState.lastDispatchImages).toBeUndefined(); + expect(mockState.lastDispatchCtx?.Body).not.toContain("media://"); + expect(mockState.lastDispatchCtx?.MediaStaged).toBe(true); + }); + it("preserves sandbox-relative MediaPaths and stores workspace context for media-understanding", async () => { createTranscriptFixture("openclaw-chat-send-non-image-absolutize-"); mockState.finalText = "ok";