From 3dfff496291097b2d9e70796a4753e4216b7c22b Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Tue, 10 Mar 2026 20:58:58 +0530 Subject: [PATCH] fix: preserve telegram html fallback --- src/telegram/format.test.ts | 6 +++ src/telegram/format.ts | 48 +++++++++++++++++++++ src/telegram/send.test.ts | 30 +++++++++++++ src/telegram/send.ts | 85 +++++++++++++++++++++++++++++++------ 4 files changed, 155 insertions(+), 14 deletions(-) diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts index 1fa6ebbffe0..2fcd06663e0 100644 --- a/src/telegram/format.test.ts +++ b/src/telegram/format.test.ts @@ -125,6 +125,12 @@ describe("markdownToTelegramHtml", () => { expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i); }); + it("treats malformed leading ampersands as plain text when chunking html", () => { + const chunks = splitTelegramHtmlChunks(`&${"A".repeat(5000)}`, 4000); + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true); + }); + it("fails loudly when tag overhead leaves no room for text", () => { expect(() => splitTelegramHtmlChunks("x", 10)).toThrow(/tag overhead/i); }); diff --git a/src/telegram/format.ts b/src/telegram/format.ts index 101da515f93..ed1f6c822f8 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -265,6 +265,50 @@ function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number { return tags.reduce((total, tag) => total + tag.closeTag.length, 0); } +function findTelegramHtmlEntityEnd(text: string, start: number): number { + if (text[start] !== "&") { + return -1; + } + let index = start + 1; + if (index >= text.length) { + return -1; + } + if (text[index] === "#") { + index += 1; + if (index >= text.length) { + return -1; + } + const isHex = text[index] === "x" || text[index] === "X"; + if (isHex) { + index += 1; + const hexStart = index; + while (/[0-9A-Fa-f]/.test(text[index] ?? "")) { + index += 1; + } + if (index === hexStart) { + return -1; + } + } else { + const digitStart = index; + while (/[0-9]/.test(text[index] ?? "")) { + index += 1; + } + if (index === digitStart) { + return -1; + } + } + } else { + const nameStart = index; + while (/[A-Za-z0-9]/.test(text[index] ?? "")) { + index += 1; + } + if (index === nameStart) { + return -1; + } + } + return text[index] === ";" ? index : -1; +} + function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number { if (text.length <= maxLength) { return text.length; @@ -278,6 +322,10 @@ function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number if (lastAmpersand < lastSemicolon) { return normalizedMaxLength; } + const entityEnd = findTelegramHtmlEntityEnd(text, lastAmpersand); + if (entityEnd === -1 || entityEnd < normalizedMaxLength) { + return normalizedMaxLength; + } return lastAmpersand; } diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index 4bfbfe95b9d..cc6fd80f649 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -1317,6 +1317,36 @@ describe("sendMessageTelegram", () => { expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true); expect(res.messageId).toBe("91"); }); + + it("keeps malformed leading ampersands on the chunked plain-text fallback path", async () => { + const chatId = "123"; + const htmlText = `&${"A".repeat(5000)}`; + const plainText = "fallback!!"; + const parseErr = new Error( + "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 0", + ); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 92, chat: { id: chatId } }) + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 93, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + plainText, + }); + + expect(sendMessage).toHaveBeenCalledTimes(4); + expect(String(sendMessage.mock.calls[0]?.[1] ?? "")).toMatch(/^&/); + const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]]; + expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText); + expect(plainFallbackCalls.every((call) => String(call?.[1] ?? "").length > 0)).toBe(true); + expect(res.messageId).toBe("93"); + }); }); describe("reactMessageTelegram", () => { diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 7cb71ed2576..55499c9c308 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -108,24 +108,30 @@ function resolveTelegramMessageIdOrThrow( throw new Error(`Telegram ${context} returned no message_id`); } +function splitTelegramPlainTextChunks(text: string, limit: number): string[] { + if (!text) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + const chunks: string[] = []; + for (let start = 0; start < text.length; start += normalizedLimit) { + chunks.push(text.slice(start, start + normalizedLimit)); + } + return chunks; +} + function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] { if (!text) { return []; } const normalizedLimit = Math.max(1, Math.floor(limit)); - if (chunkCount <= 1 || text.length <= normalizedLimit) { - return [text]; - } - if (text.length > chunkCount * normalizedLimit) { - const chunks: string[] = []; - for (let start = 0; start < text.length; start += normalizedLimit) { - chunks.push(text.slice(start, start + normalizedLimit)); - } - return chunks; + const fixedChunks = splitTelegramPlainTextChunks(text, normalizedLimit); + if (chunkCount <= 1 || fixedChunks.length >= chunkCount) { + return fixedChunks; } const chunks: string[] = []; let offset = 0; - for (let index = 0; index < chunkCount && offset < text.length; index += 1) { + for (let index = 0; index < chunkCount; index += 1) { const remainingChars = text.length - offset; const remainingChunks = chunkCount - index; const nextChunkLength = @@ -686,14 +692,65 @@ export async function sendMessageTelegram( } : undefined; + const sendPlainChunkedText = async ( + plainText: string, + context: string, + ): Promise<{ messageId: string; chatId: string }> => { + const chunks = splitTelegramPlainTextChunks(plainText, 4000); + let lastMessageId = ""; + let lastChatId = chatId; + for (let index = 0; index < chunks.length; index += 1) { + const chunk = chunks[index]; + if (!chunk) { + continue; + } + const res = await withTelegramThreadFallback( + buildTextParams(index === chunks.length - 1), + "message", + opts.verbose, + async (effectiveParams, label) => { + const params = effectiveParams ? { ...effectiveParams } : {}; + if (linkPreviewOptions) { + params.link_preview_options = linkPreviewOptions; + } + const hasParams = Object.keys(params).length > 0; + return await requestWithChatNotFound( + () => + hasParams + ? api.sendMessage(chatId, chunk, params as Parameters[2]) + : api.sendMessage(chatId, chunk), + label, + ); + }, + ); + const messageId = resolveTelegramMessageIdOrThrow(res, context); + recordSentMessage(chatId, messageId); + lastMessageId = String(messageId); + lastChatId = String(res?.chat?.id ?? chatId); + } + return { messageId: lastMessageId, chatId: lastChatId }; + }; + const sendChunkedText = async ( rawText: string, context: string, ): Promise<{ messageId: string; chatId: string }> => { - const htmlChunks = splitTelegramHtmlChunks(rawText, 4000); - const plainTextChunks = opts.plainText - ? splitTelegramPlainTextFallback(opts.plainText, htmlChunks.length, 4000) - : []; + let htmlChunks: string[]; + try { + htmlChunks = splitTelegramHtmlChunks(rawText, 4000); + } catch (error) { + logVerbose( + `telegram ${context} failed HTML chunk planning, retrying as plain text: ${formatErrorMessage( + error, + )}`, + ); + return await sendPlainChunkedText(opts.plainText ?? rawText, context); + } + const plainTextChunks = splitTelegramPlainTextFallback( + opts.plainText ?? rawText, + htmlChunks.length, + 4000, + ); const chunks = htmlChunks.map((chunk, index) => ({ rawText: chunk, htmlText: chunk,