diff --git a/CHANGELOG.md b/CHANGELOG.md index 33dc3964efd..a2212c281c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai - Cron/agents: forward embedded-run tool policy and internal event params into the attempt layer so `--tools` allowlists, cron-owned message-tool suppression, explicit message targeting, and command-path internal events all take effect at runtime again. (#62675) Thanks @hexsprite. - Setup/providers: guard preferred-provider lookup during setup so malformed plugin metadata with a missing provider id no longer crashes the wizard with `Cannot read properties of undefined (reading 'trim')`. (#66649) Thanks @Tianworld. - Matrix/security: normalize sandboxed profile avatar params, preserve `mxc://` avatar URLs, and surface gmail watcher stop failures during reload. (#64701) Thanks @slepybear. +- Telegram/documents: drop leaked binary caption bytes from inbound Telegram text handling so document uploads like `.mobi` or `.epub` no longer explode prompt token counts. (#66663) Thanks @joelnishanth. ## 2026.4.14 diff --git a/extensions/telegram/src/bot-handlers.runtime.ts b/extensions/telegram/src/bot-handlers.runtime.ts index cf2e81f0174..7447b696164 100644 --- a/extensions/telegram/src/bot-handlers.runtime.ts +++ b/extensions/telegram/src/bot-handlers.runtime.ts @@ -212,7 +212,7 @@ export const registerTelegramHandlers = ({ entry.debounceLane === "forward" ? FORWARD_BURST_DEBOUNCE_MS : debounceMs, buildKey: (entry) => entry.debounceKey, shouldDebounce: (entry) => { - const text = entry.msg.text ?? entry.msg.caption ?? ""; + const text = getTelegramTextParts(entry.msg).text; const hasDebounceableText = shouldDebounceTextInbound({ text, cfg, @@ -248,7 +248,7 @@ export const registerTelegramHandlers = ({ return; } const combinedText = entries - .map((entry) => entry.msg.text ?? entry.msg.caption ?? "") + .map((entry) => getTelegramTextParts(entry.msg).text) .filter(Boolean) .join("\n"); const combinedMedia = entries.flatMap((entry) => entry.allMedia); @@ -836,7 +836,7 @@ export const registerTelegramHandlers = ({ // for reactions, we cannot determine if the reaction came from a topic, so block all // reactions if requireTopic is enabled for this DM. if (!isGroup) { - const requireTopic = (eventAuthContext.groupConfig as TelegramDirectConfig | undefined) + const requireTopic = (eventAuthContext.groupConfig) ?.requireTopic; if (requireTopic === true) { logVerbose( diff --git a/extensions/telegram/src/bot/body-helpers.ts b/extensions/telegram/src/bot/body-helpers.ts index 8564e9545ca..064e1371645 100644 --- a/extensions/telegram/src/bot/body-helpers.ts +++ b/extensions/telegram/src/bot/body-helpers.ts @@ -92,14 +92,25 @@ export function buildSenderLabel(msg: Message, senderId?: number | string) { export type TelegramTextEntity = NonNullable[number]; +export function isBinaryContent(text: string): boolean { + for (let i = 0; i < text.length; i++) { + const code = text.charCodeAt(i); + if (code <= 0x1f && code !== 0x09 && code !== 0x0a && code !== 0x0d) { + return true; + } + } + return false; +} + export function getTelegramTextParts( msg: Pick, ): { text: string; entities: TelegramTextEntity[]; } { - const text = msg.text ?? msg.caption ?? ""; - const entities = msg.entities ?? msg.caption_entities ?? []; + const raw = msg.text ?? msg.caption ?? ""; + const text = isBinaryContent(raw) ? "" : raw; + const entities = text ? (msg.entities ?? msg.caption_entities ?? []) : []; return { text, entities }; } diff --git a/extensions/telegram/src/bot/helpers.test.ts b/extensions/telegram/src/bot/helpers.test.ts index 348d44cdf40..a22607a9b07 100644 --- a/extensions/telegram/src/bot/helpers.test.ts +++ b/extensions/telegram/src/bot/helpers.test.ts @@ -7,6 +7,7 @@ import { expandTextLinks, getTelegramTextParts, hasBotMention, + isBinaryContent, normalizeForwardedContext, resolveTelegramDirectPeerId, resolveTelegramForumFlag, @@ -444,6 +445,67 @@ describe("describeReplyTarget", () => { }); }); +describe("isBinaryContent", () => { + it("returns false for normal user text", () => { + expect(isBinaryContent("Hello, world!")).toBe(false); + }); + + it("returns false for text with common whitespace (tabs, newlines)", () => { + expect(isBinaryContent("line one\nline two\ttab")).toBe(false); + }); + + it("returns true for string containing null bytes", () => { + expect(isBinaryContent("PK\x00\x03\x04")).toBe(true); + }); + + it("returns true for typical binary file header bytes", () => { + const mobiBinarySnippet = "\x00\x00\x00\x01BOOKMOBI\x00\x00\x02\x0E"; + expect(isBinaryContent(mobiBinarySnippet)).toBe(true); + }); + + it("returns false for empty string", () => { + expect(isBinaryContent("")).toBe(false); + }); +}); + +describe("getTelegramTextParts — binary caption filtering (#66647)", () => { + it("strips binary caption content to prevent token explosion", () => { + const binaryCaption = "PK\x03\x04\x14\x00\x08binary-ebook-data"; + const result = getTelegramTextParts({ + caption: binaryCaption, + caption_entities: [{ type: "mention", offset: 0, length: 5 }], + chat: { id: 1, type: "private" }, + date: 1, + message_id: 1, + } as any); + expect(result.text).toBe(""); + expect(result.entities).toEqual([]); + }); + + it("preserves normal caption text", () => { + const result = getTelegramTextParts({ + caption: "Here is my document", + caption_entities: [], + chat: { id: 1, type: "private" }, + date: 1, + message_id: 1, + } as any); + expect(result.text).toBe("Here is my document"); + }); + + it("strips binary content in msg.text as well", () => { + const result = getTelegramTextParts({ + text: "\x00\x01\x02 binary junk", + entities: [{ type: "bold", offset: 0, length: 3 }], + chat: { id: 1, type: "private" }, + date: 1, + message_id: 1, + } as any); + expect(result.text).toBe(""); + expect(result.entities).toEqual([]); + }); +}); + describe("hasBotMention", () => { it("prefers caption text and caption entities when message text is absent", () => { expect( diff --git a/extensions/telegram/src/bot/helpers.ts b/extensions/telegram/src/bot/helpers.ts index 4d176f30a28..4251d97229b 100644 --- a/extensions/telegram/src/bot/helpers.ts +++ b/extensions/telegram/src/bot/helpers.ts @@ -18,6 +18,7 @@ import { extractTelegramLocation, getTelegramTextParts, hasBotMention, + isBinaryContent, normalizeForwardedContext, resolveTelegramMediaPlaceholder, type TelegramForwardedContext, @@ -32,6 +33,7 @@ export { extractTelegramLocation, getTelegramTextParts, hasBotMention, + isBinaryContent, normalizeForwardedContext, resolveTelegramMediaPlaceholder, };