From 734bb9c2e73b18777175ae05953f51b4be034d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Martin?= Date: Wed, 15 Apr 2026 02:53:00 +0200 Subject: [PATCH] Telegram/documents: sanitize binary payloads to prevent prompt input inflation (#66877) Merged via squash. Prepared head SHA: 09a87c184f36ca51a8a98da3376c9f13ffd3663b Co-authored-by: martinfrancois <14319020+martinfrancois@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + .../src/bot-message-context.session.ts | 21 +++- extensions/telegram/src/bot.test.ts | 33 +++++ extensions/telegram/src/bot/body-helpers.ts | 8 +- extensions/telegram/src/bot/helpers.test.ts | 60 ++++++++- extensions/telegram/src/bot/helpers.ts | 34 +++-- src/media-understanding/apply.test.ts | 119 ++++++++++++++++++ src/media-understanding/apply.ts | 20 +++ 8 files changed, 278 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c89b72d7eba..90e77e2dde1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - BlueBubbles/inbound: add a persistent file-backed GUID dedupe so MessagePoller webhook replays after BB Server restart or reconnect no longer cause the agent to re-reply to already-handled messages. (#19176, #12053, #66816) Thanks @omarshahine. - Secrets/plugins/status: align SecretRef inspect-vs-strict handling across plugin preload, read-only status/agents surfaces, and runtime auth paths so unresolved refs no longer crash read-only CLI flows while runtime-required non-env refs stay strict. (#66818) Thanks @joshavant. - Memory/dreaming: stop ordinary transcripts that merely quote the dream-diary prompt from being classified as internal dreaming runs and silently dropped from session recall ingestion. (#66852) Thanks @gumadeiras. +- Telegram/documents: sanitize binary reply context and ZIP-like archive extraction so `.epub` and `.mobi` uploads can no longer leak raw binary into prompt context through reply metadata or archive-to-`text/plain` coercion. (#66877) Thanks @martinfrancois. ## 2026.4.14 diff --git a/extensions/telegram/src/bot-message-context.session.ts b/extensions/telegram/src/bot-message-context.session.ts index 23491d253ab..58c1791ed9b 100644 --- a/extensions/telegram/src/bot-message-context.session.ts +++ b/extensions/telegram/src/bot-message-context.session.ts @@ -221,14 +221,29 @@ export async function buildTelegramInboundContextPayload(params: { : "" }]\n` : ""; + const buildReplySupplementalLines = (params: { body?: string }) => { + const lines: string[] = []; + const forwardAnnotation = replyForwardAnnotation.trimEnd(); + if (forwardAnnotation) { + lines.push(forwardAnnotation); + } + if (params.body) { + lines.push(params.body); + } + return lines.length > 0 ? `\n${lines.join("\n")}` : ""; + }; const replySuffix = visibleReplyTarget ? visibleReplyTarget.kind === "quote" ? `\n\n[Quoting ${visibleReplyTarget.sender}${ visibleReplyTarget.id ? ` id:${visibleReplyTarget.id}` : "" - }]\n${replyForwardAnnotation}"${visibleReplyTarget.body}"\n[/Quoting]` + }]${buildReplySupplementalLines({ + body: visibleReplyTarget.body ? `"${visibleReplyTarget.body}"` : undefined, + })}\n[/Quoting]` : `\n\n[Replying to ${visibleReplyTarget.sender}${ visibleReplyTarget.id ? ` id:${visibleReplyTarget.id}` : "" - }]\n${replyForwardAnnotation}${visibleReplyTarget.body}\n[/Replying]` + }]${buildReplySupplementalLines({ + body: visibleReplyTarget.body, + })}\n[/Replying]` : ""; const forwardPrefix = visibleForwardOrigin ? `[Forwarded from ${visibleForwardOrigin.from}${ @@ -427,7 +442,7 @@ export async function buildTelegramInboundContextPayload(params: { }); if (visibleReplyTarget && shouldLogVerbose()) { - const preview = visibleReplyTarget.body.replace(/\s+/g, " ").slice(0, 120); + const preview = (visibleReplyTarget.body ?? "").replace(/\s+/g, " ").slice(0, 120); logVerbose( `telegram reply-context: replyToId=${visibleReplyTarget.id} replyToSender=${visibleReplyTarget.sender} replyToBody="${preview}"`, ); diff --git a/extensions/telegram/src/bot.test.ts b/extensions/telegram/src/bot.test.ts index abcf38d8ab4..86a06f3a861 100644 --- a/extensions/telegram/src/bot.test.ts +++ b/extensions/telegram/src/bot.test.ts @@ -1280,6 +1280,39 @@ describe("createTelegramBot", () => { expect(payload.ReplyToSender).toBe("Ada"); }); + it("keeps reply linkage while omitting filtered binary reply captions", async () => { + onSpy.mockClear(); + sendMessageSpy.mockClear(); + replySpy.mockClear(); + + createTelegramBot({ token: "tok" }); + const handler = getOnHandler("message") as (ctx: Record) => Promise; + + await handler({ + message: { + chat: { id: 7, type: "private" }, + text: "Sure, see below", + date: 1736380800, + reply_to_message: { + message_id: 9001, + caption: "PK\x00\x03\x04binary", + from: { first_name: "Ada" }, + }, + }, + me: { username: "openclaw_bot" }, + getFile: async () => ({ download: async () => new Uint8Array() }), + }); + + expect(replySpy).toHaveBeenCalledTimes(1); + const payload = replySpy.mock.calls[0][0]; + expect(payload.Body).toContain("[Replying to Ada id:9001]"); + expect(payload.Body).not.toContain("PK"); + expect(payload.Body).not.toContain("unsafe reply text omitted"); + expect(payload.ReplyToBody).toBeUndefined(); + expect(payload.ReplyToId).toBe("9001"); + expect(payload.ReplyToSender).toBe("Ada"); + }); + it("includes replied image media in inbound context for text replies", async () => { onSpy.mockClear(); replySpy.mockClear(); diff --git a/extensions/telegram/src/bot/body-helpers.ts b/extensions/telegram/src/bot/body-helpers.ts index 064e1371645..0e7baf45ed0 100644 --- a/extensions/telegram/src/bot/body-helpers.ts +++ b/extensions/telegram/src/bot/body-helpers.ts @@ -102,14 +102,18 @@ export function isBinaryContent(text: string): boolean { return false; } +export function resolveTelegramTextContent(text: unknown, caption?: unknown): string { + const raw = typeof text === "string" ? text : typeof caption === "string" ? caption : ""; + return isBinaryContent(raw) ? "" : raw; +} + export function getTelegramTextParts( msg: Pick, ): { text: string; entities: TelegramTextEntity[]; } { - const raw = msg.text ?? msg.caption ?? ""; - const text = isBinaryContent(raw) ? "" : raw; + const text = resolveTelegramTextContent(msg.text, msg.caption); const entities = text ? (msg.entities ?? msg.caption_entities ?? []) : []; return { text, entities }; } diff --git a/extensions/telegram/src/bot/helpers.test.ts b/extensions/telegram/src/bot/helpers.test.ts index a22607a9b07..3c8f8aefb72 100644 --- a/extensions/telegram/src/bot/helpers.test.ts +++ b/extensions/telegram/src/bot/helpers.test.ts @@ -325,7 +325,6 @@ describe("describeReplyTarget", () => { from: { id: 42, first_name: "Alice", is_bot: false }, }, } as any); - // Should not throw when reply text is malformed; return null instead. expect(result).toBeNull(); }); @@ -347,6 +346,65 @@ describe("describeReplyTarget", () => { expect(result?.kind).toBe("reply"); }); + it("drops binary reply captions with no safe fallback", () => { + const result = describeReplyTarget({ + message_id: 2, + date: 1000, + chat: { id: 1, type: "private" }, + reply_to_message: { + message_id: 1, + date: 900, + chat: { id: 1, type: "private" }, + caption: "PK\x00\x03\x04binary", + from: { id: 42, first_name: "Alice", is_bot: false }, + }, + } as any); + expect(result?.id).toBe("1"); + expect(result?.sender).toBe("Alice"); + expect(result?.body).toBeUndefined(); + }); + + it("falls back to reply text when quote text is binary", () => { + const result = describeReplyTarget({ + message_id: 2, + date: 1000, + chat: { id: 1, type: "private" }, + quote: { + text: "\x00\x01\x02binary quote", + }, + reply_to_message: { + message_id: 1, + date: 900, + chat: { id: 1, type: "private" }, + text: "Original message", + from: { id: 42, first_name: "Alice", is_bot: false }, + }, + } as any); + expect(result?.body).toBe("Original message"); + expect(result?.kind).toBe("reply"); + }); + + it("falls back to external reply text when external quote text is binary", () => { + const result = describeReplyTarget({ + message_id: 5, + date: 1300, + chat: { id: 1, type: "private" }, + text: "Comment on forwarded message", + external_reply: { + message_id: 4, + date: 1200, + chat: { id: 1, type: "private" }, + text: "Forwarded from elsewhere", + quote: { + text: "PK\x00\x03\x04binary quote", + }, + from: { id: 123, first_name: "Eve", is_bot: false }, + }, + } as any); + expect(result?.body).toBe("Forwarded from elsewhere"); + expect(result?.kind).toBe("reply"); + }); + it("extracts forwarded context from reply_to_message (issue #9619)", () => { // When user forwards a message with a comment, the comment message has // reply_to_message pointing to the forwarded message. We should extract diff --git a/extensions/telegram/src/bot/helpers.ts b/extensions/telegram/src/bot/helpers.ts index 4251d97229b..70da79d41b3 100644 --- a/extensions/telegram/src/bot/helpers.ts +++ b/extensions/telegram/src/bot/helpers.ts @@ -20,6 +20,7 @@ import { hasBotMention, isBinaryContent, normalizeForwardedContext, + resolveTelegramTextContent, resolveTelegramMediaPlaceholder, type TelegramForwardedContext, } from "./body-helpers.js"; @@ -40,6 +41,10 @@ export { const TELEGRAM_GENERAL_TOPIC_ID = 1; +function hadUnsafeTelegramText(raw: unknown, sanitized: string): boolean { + return typeof raw === "string" && raw.trim().length > 0 && sanitized.trim().length === 0; +} + export type TelegramThreadSpec = { id?: number; scope: "dm" | "forum" | "none"; @@ -330,7 +335,7 @@ export type TelegramReplyTarget = { sender: string; senderId?: string; senderUsername?: string; - body: string; + body?: string; kind: "reply" | "quote"; /** Forward context if the reply target was itself a forwarded message (issue #9619). */ forwardedFrom?: TelegramForwardedContext; @@ -339,28 +344,30 @@ export type TelegramReplyTarget = { export function describeReplyTarget(msg: Message): TelegramReplyTarget | null { const reply = msg.reply_to_message; const externalReply = (msg as Message & { external_reply?: Message }).external_reply; - const quoteText = + const rawQuoteText = msg.quote?.text ?? (externalReply as (Message & { quote?: { text?: string } }) | undefined)?.quote?.text; + const quoteText = resolveTelegramTextContent(rawQuoteText); let body = ""; let kind: TelegramReplyTarget["kind"] = "reply"; + const filteredQuoteText = hadUnsafeTelegramText(rawQuoteText, quoteText); - if (typeof quoteText === "string") { - body = quoteText.trim(); - if (body) { - kind = "quote"; - } + body = quoteText.trim(); + if (body) { + kind = "quote"; } const replyLike = reply ?? externalReply; + let filteredReplyText = false; if (!body && replyLike) { - const replyBody = ( + const rawReplyText = typeof replyLike.text === "string" ? replyLike.text : typeof replyLike.caption === "string" ? replyLike.caption - : "" - ).trim(); + : undefined; + const replyBody = resolveTelegramTextContent(rawReplyText).trim(); + filteredReplyText = hadUnsafeTelegramText(rawReplyText, replyBody); body = replyBody; if (!body) { body = resolveTelegramMediaPlaceholder(replyLike) ?? ""; @@ -372,7 +379,10 @@ export function describeReplyTarget(msg: Message): TelegramReplyTarget | null { } } } - if (!body) { + if (!body && !replyLike) { + return null; + } + if (!body && !filteredQuoteText && !filteredReplyText) { return null; } const sender = replyLike ? buildSenderName(replyLike) : undefined; @@ -386,7 +396,7 @@ export function describeReplyTarget(msg: Message): TelegramReplyTarget | null { sender: senderLabel, senderId: replyLike?.from?.id != null ? String(replyLike.from.id) : undefined, senderUsername: replyLike?.from?.username ?? undefined, - body, + body: body || undefined, kind, forwardedFrom, }; diff --git a/src/media-understanding/apply.test.ts b/src/media-understanding/apply.test.ts index 95ec7ccfe31..330a89d148e 100644 --- a/src/media-understanding/apply.test.ts +++ b/src/media-understanding/apply.test.ts @@ -1092,6 +1092,125 @@ describe("applyMediaUnderstanding", () => { expectFileNotApplied({ ctx, result, body: "" }); }); + it("skips archive container attachments with +zip MIME types", async () => { + const pseudoEpub = Buffer.from( + "PK\u0003\u0004mimetypeapplication/epub+zipMETA-INF/container", + "utf8", + ); + const filePath = await createTempMediaFile({ + fileName: "book.epub", + content: pseudoEpub, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + mediaType: "application/epub+zip", + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("does not coerce binary control-byte payloads into text/plain", async () => { + const pseudoZip = Buffer.from("PK\u0003\u0004mimetypeapplication/epub+zipcontent.opf", "utf8"); + const filePath = await createTempMediaFile({ + fileName: "payload.bin", + content: pseudoZip, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("does not trust text file extensions when the buffer starts with a ZIP signature", async () => { + const spoofedZip = Buffer.from("PK\u0003\u0004mimetypeapplication/epub+zipcontent.opf", "utf8"); + const filePath = await createTempMediaFile({ + fileName: "payload.txt", + content: spoofedZip, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("does not coerce real ZIP local headers into text/plain when UTF-16 guessing misfires", async () => { + const zipLikeHeader = Buffer.from([ + 0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x08, 0x29, 0xb9, 0x5a, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x66, 0x6f, + 0x6f, 0x2e, 0x74, 0x78, 0x74, + ]); + const filePath = await createTempMediaFile({ + fileName: "archive.bin", + content: zipLikeHeader, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("does not coerce ZIP central-directory headers into text/plain", async () => { + const zipCentralDirectory = Buffer.from([ + 0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x08, 0x29, 0xb9, + 0x5a, 0x00, 0x00, 0x00, 0x00, + ]); + const filePath = await createTempMediaFile({ + fileName: "central-directory.bin", + content: zipCentralDirectory, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("does not coerce empty ZIP end-of-central-directory headers into text/plain", async () => { + const emptyZip = Buffer.from([ + 0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]); + const filePath = await createTempMediaFile({ + fileName: "empty-archive.bin", + content: emptyZip, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expectFileNotApplied({ ctx, result, body: "" }); + }); + + it("keeps utf16 text attachments eligible for extraction", async () => { + const utf16Text = Buffer.from("hello from utf16 text", "utf16le"); + const filePath = await createTempMediaFile({ + fileName: "notes.bin", + content: utf16Text, + }); + + const { ctx, result } = await applyWithDisabledMedia({ + body: "", + mediaPath: filePath, + }); + + expect(result.appliedFile).toBe(true); + expect(ctx.Body).toContain("hello from utf16 text"); + }); + it("does not reclassify PDF attachments as text/plain", async () => { const pseudoPdf = Buffer.from("%PDF-1.7\n1 0 obj\n<< /Type /Catalog >>\nendobj\n", "utf8"); const filePath = await createTempMediaFile({ diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts index 66ec7b64de5..c95d765155e 100644 --- a/src/media-understanding/apply.ts +++ b/src/media-understanding/apply.ts @@ -248,6 +248,20 @@ function looksLikeUtf8Text(buffer?: Buffer): boolean { } } +function hasSuspiciousBinarySignal(buffer?: Buffer): boolean { + if (!buffer || buffer.length === 0) { + return false; + } + const sample = buffer.subarray(0, Math.min(buffer.length, 4096)); + if (sample.length < 4 || sample[0] !== 0x50 || sample[1] !== 0x4b) { + return false; + } + const signature = (sample[2] << 8) | sample[3]; + // Cover the ZIP local-header, central-directory, and empty-archive markers + // so archive payloads cannot slip past text coercion when MIME detection is weak. + return signature === 0x0304 || signature === 0x0102 || signature === 0x0506; +} + function decodeTextSample(buffer?: Buffer): string { if (!buffer || buffer.length === 0) { return ""; @@ -312,6 +326,9 @@ function isBinaryMediaMime(mime?: string): boolean { ) { return true; } + if (mime.endsWith("+zip")) { + return true; + } if (mime.startsWith("application/vnd.")) { // Keep vendor +json/+xml payloads eligible for text extraction while // treating the common binary vendor family (Office, archives, etc.) as binary. @@ -372,6 +389,9 @@ async function extractFileBlocks(params: { if (!forcedTextMimeResolved && isBinaryMediaMime(normalizedRawMime)) { continue; } + if (hasSuspiciousBinarySignal(bufferResult?.buffer)) { + continue; + } const utf16Charset = resolveUtf16Charset(bufferResult?.buffer); const textSample = decodeTextSample(bufferResult?.buffer); // Do not coerce real PDFs into text/plain via printable-byte heuristics.