Telegram/documents: sanitize binary payloads to prevent prompt input inflation (#66877)

Merged via squash. Prepared head SHA: 09a87c184f Co-authored-by: martinfrancois <14319020+martinfrancois@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras
2026-05-06 07:10:43 +00:00 · 2026-04-15 02:53:00 +02:00
parent 0c4e0d7030
commit 734bb9c2e7
8 changed files with 278 additions and 18 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai
 - BlueBubbles/inbound: add a persistent file-backed GUID dedupe so MessagePoller webhook replays after BB Server restart or reconnect no longer cause the agent to re-reply to already-handled messages. (#19176, #12053, #66816) Thanks @omarshahine.
 - Secrets/plugins/status: align SecretRef inspect-vs-strict handling across plugin preload, read-only status/agents surfaces, and runtime auth paths so unresolved refs no longer crash read-only CLI flows while runtime-required non-env refs stay strict. (#66818) Thanks @joshavant.
 - Memory/dreaming: stop ordinary transcripts that merely quote the dream-diary prompt from being classified as internal dreaming runs and silently dropped from session recall ingestion. (#66852) Thanks @gumadeiras.
+- Telegram/documents: sanitize binary reply context and ZIP-like archive extraction so `.epub` and `.mobi` uploads can no longer leak raw binary into prompt context through reply metadata or archive-to-`text/plain` coercion. (#66877) Thanks @martinfrancois.

 ## 2026.4.14

--- a/extensions/telegram/src/bot-message-context.session.ts
+++ b/extensions/telegram/src/bot-message-context.session.ts
@@ -221,14 +221,29 @@ export async function buildTelegramInboundContextPayload(params: {
          : ""
      }]\n`
    : "";
+  const buildReplySupplementalLines = (params: { body?: string }) => {
+    const lines: string[] = [];
+    const forwardAnnotation = replyForwardAnnotation.trimEnd();
+    if (forwardAnnotation) {
+      lines.push(forwardAnnotation);
+    }
+    if (params.body) {
+      lines.push(params.body);
+    }
+    return lines.length > 0 ? `\n${lines.join("\n")}` : "";
+  };
  const replySuffix = visibleReplyTarget
    ? visibleReplyTarget.kind === "quote"
      ? `\n\n[Quoting ${visibleReplyTarget.sender}${
          visibleReplyTarget.id ? ` id:${visibleReplyTarget.id}` : ""
-        }]\n${replyForwardAnnotation}"${visibleReplyTarget.body}"\n[/Quoting]`
+        }]${buildReplySupplementalLines({
+          body: visibleReplyTarget.body ? `"${visibleReplyTarget.body}"` : undefined,
+        })}\n[/Quoting]`
      : `\n\n[Replying to ${visibleReplyTarget.sender}${
          visibleReplyTarget.id ? ` id:${visibleReplyTarget.id}` : ""
-        }]\n${replyForwardAnnotation}${visibleReplyTarget.body}\n[/Replying]`
+        }]${buildReplySupplementalLines({
+          body: visibleReplyTarget.body,
+        })}\n[/Replying]`
    : "";
  const forwardPrefix = visibleForwardOrigin
    ? `[Forwarded from ${visibleForwardOrigin.from}${
@@ -427,7 +442,7 @@ export async function buildTelegramInboundContextPayload(params: {
  });

  if (visibleReplyTarget && shouldLogVerbose()) {
-    const preview = visibleReplyTarget.body.replace(/\s+/g, " ").slice(0, 120);
+    const preview = (visibleReplyTarget.body ?? "").replace(/\s+/g, " ").slice(0, 120);
    logVerbose(
      `telegram reply-context: replyToId=${visibleReplyTarget.id} replyToSender=${visibleReplyTarget.sender} replyToBody="${preview}"`,
    );
--- a/extensions/telegram/src/bot.test.ts
+++ b/extensions/telegram/src/bot.test.ts
@@ -1280,6 +1280,39 @@ describe("createTelegramBot", () => {
    expect(payload.ReplyToSender).toBe("Ada");
  });

+  it("keeps reply linkage while omitting filtered binary reply captions", async () => {
+    onSpy.mockClear();
+    sendMessageSpy.mockClear();
+    replySpy.mockClear();
+
+    createTelegramBot({ token: "tok" });
+    const handler = getOnHandler("message") as (ctx: Record<string, unknown>) => Promise<void>;
+
+    await handler({
+      message: {
+        chat: { id: 7, type: "private" },
+        text: "Sure, see below",
+        date: 1736380800,
+        reply_to_message: {
+          message_id: 9001,
+          caption: "PK\x00\x03\x04binary",
+          from: { first_name: "Ada" },
+        },
+      },
+      me: { username: "openclaw_bot" },
+      getFile: async () => ({ download: async () => new Uint8Array() }),
+    });
+
+    expect(replySpy).toHaveBeenCalledTimes(1);
+    const payload = replySpy.mock.calls[0][0];
+    expect(payload.Body).toContain("[Replying to Ada id:9001]");
+    expect(payload.Body).not.toContain("PK");
+    expect(payload.Body).not.toContain("unsafe reply text omitted");
+    expect(payload.ReplyToBody).toBeUndefined();
+    expect(payload.ReplyToId).toBe("9001");
+    expect(payload.ReplyToSender).toBe("Ada");
+  });
+
  it("includes replied image media in inbound context for text replies", async () => {
    onSpy.mockClear();
    replySpy.mockClear();
--- a/extensions/telegram/src/bot/body-helpers.ts
+++ b/extensions/telegram/src/bot/body-helpers.ts
@@ -102,14 +102,18 @@ export function isBinaryContent(text: string): boolean {
  return false;
 }

+export function resolveTelegramTextContent(text: unknown, caption?: unknown): string {
+  const raw = typeof text === "string" ? text : typeof caption === "string" ? caption : "";
+  return isBinaryContent(raw) ? "" : raw;
+}
+
 export function getTelegramTextParts(
  msg: Pick<Message, "text" | "caption" | "entities" | "caption_entities">,
 ): {
  text: string;
  entities: TelegramTextEntity[];
 } {
-  const raw = msg.text ?? msg.caption ?? "";
-  const text = isBinaryContent(raw) ? "" : raw;
+  const text = resolveTelegramTextContent(msg.text, msg.caption);
  const entities = text ? (msg.entities ?? msg.caption_entities ?? []) : [];
  return { text, entities };
 }
--- a/extensions/telegram/src/bot/helpers.test.ts
+++ b/extensions/telegram/src/bot/helpers.test.ts
@@ -325,7 +325,6 @@ describe("describeReplyTarget", () => {
        from: { id: 42, first_name: "Alice", is_bot: false },
      },
    } as any);
-    // Should not throw when reply text is malformed; return null instead.
    expect(result).toBeNull();
  });

@@ -347,6 +346,65 @@ describe("describeReplyTarget", () => {
    expect(result?.kind).toBe("reply");
  });

+  it("drops binary reply captions with no safe fallback", () => {
+    const result = describeReplyTarget({
+      message_id: 2,
+      date: 1000,
+      chat: { id: 1, type: "private" },
+      reply_to_message: {
+        message_id: 1,
+        date: 900,
+        chat: { id: 1, type: "private" },
+        caption: "PK\x00\x03\x04binary",
+        from: { id: 42, first_name: "Alice", is_bot: false },
+      },
+    } as any);
+    expect(result?.id).toBe("1");
+    expect(result?.sender).toBe("Alice");
+    expect(result?.body).toBeUndefined();
+  });
+
+  it("falls back to reply text when quote text is binary", () => {
+    const result = describeReplyTarget({
+      message_id: 2,
+      date: 1000,
+      chat: { id: 1, type: "private" },
+      quote: {
+        text: "\x00\x01\x02binary quote",
+      },
+      reply_to_message: {
+        message_id: 1,
+        date: 900,
+        chat: { id: 1, type: "private" },
+        text: "Original message",
+        from: { id: 42, first_name: "Alice", is_bot: false },
+      },
+    } as any);
+    expect(result?.body).toBe("Original message");
+    expect(result?.kind).toBe("reply");
+  });
+
+  it("falls back to external reply text when external quote text is binary", () => {
+    const result = describeReplyTarget({
+      message_id: 5,
+      date: 1300,
+      chat: { id: 1, type: "private" },
+      text: "Comment on forwarded message",
+      external_reply: {
+        message_id: 4,
+        date: 1200,
+        chat: { id: 1, type: "private" },
+        text: "Forwarded from elsewhere",
+        quote: {
+          text: "PK\x00\x03\x04binary quote",
+        },
+        from: { id: 123, first_name: "Eve", is_bot: false },
+      },
+    } as any);
+    expect(result?.body).toBe("Forwarded from elsewhere");
+    expect(result?.kind).toBe("reply");
+  });
+
  it("extracts forwarded context from reply_to_message (issue #9619)", () => {
    // When user forwards a message with a comment, the comment message has
    // reply_to_message pointing to the forwarded message. We should extract
--- a/extensions/telegram/src/bot/helpers.ts
+++ b/extensions/telegram/src/bot/helpers.ts
@@ -20,6 +20,7 @@ import {
  hasBotMention,
  isBinaryContent,
  normalizeForwardedContext,
+  resolveTelegramTextContent,
  resolveTelegramMediaPlaceholder,
  type TelegramForwardedContext,
 } from "./body-helpers.js";
@@ -40,6 +41,10 @@ export {

 const TELEGRAM_GENERAL_TOPIC_ID = 1;

+function hadUnsafeTelegramText(raw: unknown, sanitized: string): boolean {
+  return typeof raw === "string" && raw.trim().length > 0 && sanitized.trim().length === 0;
+}
+
 export type TelegramThreadSpec = {
  id?: number;
  scope: "dm" | "forum" | "none";
@@ -330,7 +335,7 @@ export type TelegramReplyTarget = {
  sender: string;
  senderId?: string;
  senderUsername?: string;
-  body: string;
+  body?: string;
  kind: "reply" | "quote";
  /** Forward context if the reply target was itself a forwarded message (issue #9619). */
  forwardedFrom?: TelegramForwardedContext;
@@ -339,28 +344,30 @@ export type TelegramReplyTarget = {
 export function describeReplyTarget(msg: Message): TelegramReplyTarget | null {
  const reply = msg.reply_to_message;
  const externalReply = (msg as Message & { external_reply?: Message }).external_reply;
-  const quoteText =
+  const rawQuoteText =
    msg.quote?.text ??
    (externalReply as (Message & { quote?: { text?: string } }) | undefined)?.quote?.text;
+  const quoteText = resolveTelegramTextContent(rawQuoteText);
  let body = "";
  let kind: TelegramReplyTarget["kind"] = "reply";
+  const filteredQuoteText = hadUnsafeTelegramText(rawQuoteText, quoteText);

-  if (typeof quoteText === "string") {
-    body = quoteText.trim();
-    if (body) {
-      kind = "quote";
-    }
+  body = quoteText.trim();
+  if (body) {
+    kind = "quote";
  }

  const replyLike = reply ?? externalReply;
+  let filteredReplyText = false;
  if (!body && replyLike) {
-    const replyBody = (
+    const rawReplyText =
      typeof replyLike.text === "string"
        ? replyLike.text
        : typeof replyLike.caption === "string"
          ? replyLike.caption
-          : ""
-    ).trim();
+          : undefined;
+    const replyBody = resolveTelegramTextContent(rawReplyText).trim();
+    filteredReplyText = hadUnsafeTelegramText(rawReplyText, replyBody);
    body = replyBody;
    if (!body) {
      body = resolveTelegramMediaPlaceholder(replyLike) ?? "";
@@ -372,7 +379,10 @@ export function describeReplyTarget(msg: Message): TelegramReplyTarget | null {
      }
    }
  }
-  if (!body) {
+  if (!body && !replyLike) {
+    return null;
+  }
+  if (!body && !filteredQuoteText && !filteredReplyText) {
    return null;
  }
  const sender = replyLike ? buildSenderName(replyLike) : undefined;
@@ -386,7 +396,7 @@ export function describeReplyTarget(msg: Message): TelegramReplyTarget | null {
    sender: senderLabel,
    senderId: replyLike?.from?.id != null ? String(replyLike.from.id) : undefined,
    senderUsername: replyLike?.from?.username ?? undefined,
-    body,
+    body: body || undefined,
    kind,
    forwardedFrom,
  };
--- a/src/media-understanding/apply.test.ts
+++ b/src/media-understanding/apply.test.ts
@@ -1092,6 +1092,125 @@ describe("applyMediaUnderstanding", () => {
    expectFileNotApplied({ ctx, result, body: "<media:audio>" });
  });

+  it("skips archive container attachments with +zip MIME types", async () => {
+    const pseudoEpub = Buffer.from(
+      "PK\u0003\u0004mimetypeapplication/epub+zipMETA-INF/container",
+      "utf8",
+    );
+    const filePath = await createTempMediaFile({
+      fileName: "book.epub",
+      content: pseudoEpub,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+      mediaType: "application/epub+zip",
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("does not coerce binary control-byte payloads into text/plain", async () => {
+    const pseudoZip = Buffer.from("PK\u0003\u0004mimetypeapplication/epub+zipcontent.opf", "utf8");
+    const filePath = await createTempMediaFile({
+      fileName: "payload.bin",
+      content: pseudoZip,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("does not trust text file extensions when the buffer starts with a ZIP signature", async () => {
+    const spoofedZip = Buffer.from("PK\u0003\u0004mimetypeapplication/epub+zipcontent.opf", "utf8");
+    const filePath = await createTempMediaFile({
+      fileName: "payload.txt",
+      content: spoofedZip,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("does not coerce real ZIP local headers into text/plain when UTF-16 guessing misfires", async () => {
+    const zipLikeHeader = Buffer.from([
+      0x50, 0x4b, 0x03, 0x04, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x08, 0x29, 0xb9, 0x5a, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x66, 0x6f,
+      0x6f, 0x2e, 0x74, 0x78, 0x74,
+    ]);
+    const filePath = await createTempMediaFile({
+      fileName: "archive.bin",
+      content: zipLikeHeader,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("does not coerce ZIP central-directory headers into text/plain", async () => {
+    const zipCentralDirectory = Buffer.from([
+      0x50, 0x4b, 0x01, 0x02, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x08, 0x29, 0xb9,
+      0x5a, 0x00, 0x00, 0x00, 0x00,
+    ]);
+    const filePath = await createTempMediaFile({
+      fileName: "central-directory.bin",
+      content: zipCentralDirectory,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("does not coerce empty ZIP end-of-central-directory headers into text/plain", async () => {
+    const emptyZip = Buffer.from([
+      0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    ]);
+    const filePath = await createTempMediaFile({
+      fileName: "empty-archive.bin",
+      content: emptyZip,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expectFileNotApplied({ ctx, result, body: "<media:file>" });
+  });
+
+  it("keeps utf16 text attachments eligible for extraction", async () => {
+    const utf16Text = Buffer.from("hello from utf16 text", "utf16le");
+    const filePath = await createTempMediaFile({
+      fileName: "notes.bin",
+      content: utf16Text,
+    });
+
+    const { ctx, result } = await applyWithDisabledMedia({
+      body: "<media:file>",
+      mediaPath: filePath,
+    });
+
+    expect(result.appliedFile).toBe(true);
+    expect(ctx.Body).toContain("hello from utf16 text");
+  });
+
  it("does not reclassify PDF attachments as text/plain", async () => {
    const pseudoPdf = Buffer.from("%PDF-1.7\n1 0 obj\n<< /Type /Catalog >>\nendobj\n", "utf8");
    const filePath = await createTempMediaFile({
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@@ -248,6 +248,20 @@ function looksLikeUtf8Text(buffer?: Buffer): boolean {
  }
 }

+function hasSuspiciousBinarySignal(buffer?: Buffer): boolean {
+  if (!buffer || buffer.length === 0) {
+    return false;
+  }
+  const sample = buffer.subarray(0, Math.min(buffer.length, 4096));
+  if (sample.length < 4 || sample[0] !== 0x50 || sample[1] !== 0x4b) {
+    return false;
+  }
+  const signature = (sample[2] << 8) | sample[3];
+  // Cover the ZIP local-header, central-directory, and empty-archive markers
+  // so archive payloads cannot slip past text coercion when MIME detection is weak.
+  return signature === 0x0304 || signature === 0x0102 || signature === 0x0506;
+}
+
 function decodeTextSample(buffer?: Buffer): string {
  if (!buffer || buffer.length === 0) {
    return "";
@@ -312,6 +326,9 @@ function isBinaryMediaMime(mime?: string): boolean {
  ) {
    return true;
  }
+  if (mime.endsWith("+zip")) {
+    return true;
+  }
  if (mime.startsWith("application/vnd.")) {
    // Keep vendor +json/+xml payloads eligible for text extraction while
    // treating the common binary vendor family (Office, archives, etc.) as binary.
@@ -372,6 +389,9 @@ async function extractFileBlocks(params: {
    if (!forcedTextMimeResolved && isBinaryMediaMime(normalizedRawMime)) {
      continue;
    }
+    if (hasSuspiciousBinarySignal(bufferResult?.buffer)) {
+      continue;
+    }
    const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
    const textSample = decodeTextSample(bufferResult?.buffer);
    // Do not coerce real PDFs into text/plain via printable-byte heuristics.