diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts index ac4163b96f0..304b89f5c65 100644 --- a/src/telegram/format.test.ts +++ b/src/telegram/format.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { markdownToTelegramHtml } from "./format.js"; +import { markdownToTelegramHtml, splitTelegramHtmlChunks } from "./format.js"; describe("markdownToTelegramHtml", () => { it("handles core markdown-to-telegram conversions", () => { @@ -112,4 +112,12 @@ describe("markdownToTelegramHtml", () => { expect(res).toContain("secret"); expect(res).toContain("trailing ||"); }); + + it("splits long html text without breaking balanced tags", () => { + const chunks = splitTelegramHtmlChunks(`${"A".repeat(5000)}`, 4000); + expect(chunks.length).toBeGreaterThan(1); + expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true); + expect(chunks[0]).toMatch(/^.*<\/b>$/); + expect(chunks[1]).toMatch(/^.*<\/b>$/); + }); }); diff --git a/src/telegram/format.ts b/src/telegram/format.ts index f74b508b42d..b0b5851f101 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -241,6 +241,158 @@ export function renderTelegramHtmlText( return markdownToTelegramHtml(text, { tableMode: options.tableMode }); } +type TelegramHtmlTag = { + name: string; + openTag: string; + closeTag: string; +}; + +const TELEGRAM_SELF_CLOSING_HTML_TAGS = new Set(["br"]); + +function buildTelegramHtmlOpenPrefix(tags: TelegramHtmlTag[]): string { + return tags.map((tag) => tag.openTag).join(""); +} + +function buildTelegramHtmlCloseSuffix(tags: TelegramHtmlTag[]): string { + return tags + .slice() + .toReversed() + .map((tag) => tag.closeTag) + .join(""); +} + +function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number { + return tags.reduce((total, tag) => total + tag.closeTag.length, 0); +} + +function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number { + if (text.length <= maxLength) { + return text.length; + } + const normalizedMaxLength = Math.max(1, Math.floor(maxLength)); + let splitAt = normalizedMaxLength; + const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1); + const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1); + if (lastAmpersand > lastSemicolon) { + splitAt = lastAmpersand; + } + return splitAt > 0 ? splitAt : normalizedMaxLength; +} + +function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void { + for (let index = tags.length - 1; index >= 0; index -= 1) { + if (tags[index]?.name === name) { + tags.splice(index, 1); + return; + } + } +} + +export function splitTelegramHtmlChunks(html: string, limit: number): string[] { + if (!html) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + if (html.length <= normalizedLimit) { + return [html]; + } + + const chunks: string[] = []; + const openTags: TelegramHtmlTag[] = []; + let current = ""; + let chunkHasContent = false; + + const resetCurrent = () => { + current = buildTelegramHtmlOpenPrefix(openTags); + chunkHasContent = false; + }; + + const flushCurrent = () => { + if (!chunkHasContent) { + return; + } + chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`); + resetCurrent(); + }; + + const appendText = (segment: string) => { + let remaining = segment; + while (remaining.length > 0) { + const available = + normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags); + if (available <= 0) { + const prefix = buildTelegramHtmlOpenPrefix(openTags); + if (!chunkHasContent && current === prefix) { + current += remaining; + chunkHasContent = true; + remaining = ""; + break; + } + flushCurrent(); + continue; + } + if (remaining.length <= available) { + current += remaining; + chunkHasContent = true; + break; + } + const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available); + current += remaining.slice(0, splitAt); + chunkHasContent = true; + remaining = remaining.slice(splitAt); + flushCurrent(); + } + }; + + resetCurrent(); + HTML_TAG_PATTERN.lastIndex = 0; + let lastIndex = 0; + let match: RegExpExecArray | null; + while ((match = HTML_TAG_PATTERN.exec(html)) !== null) { + const tagStart = match.index; + const tagEnd = HTML_TAG_PATTERN.lastIndex; + appendText(html.slice(lastIndex, tagStart)); + + const rawTag = match[0]; + const isClosing = match[1] === "")); + + if (!isClosing) { + const nextCloseLength = isSelfClosing ? 0 : ``.length; + if ( + chunkHasContent && + current.length + + rawTag.length + + buildTelegramHtmlCloseSuffixLength(openTags) + + nextCloseLength > + normalizedLimit + ) { + flushCurrent(); + } + } + + current += rawTag; + chunkHasContent = true; + if (isClosing) { + popTelegramHtmlTag(openTags, tagName); + } else if (!isSelfClosing) { + openTags.push({ + name: tagName, + openTag: rawTag, + closeTag: ``, + }); + } + lastIndex = tagEnd; + } + + appendText(html.slice(lastIndex)); + flushCurrent(); + return chunks.length > 0 ? chunks : [html]; +} + function splitTelegramChunkByHtmlLimit( chunk: MarkdownIR, htmlLimit: number, diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index a34f27d196f..2905b066b97 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -1257,6 +1257,37 @@ describe("sendMessageTelegram", () => { expect.objectContaining({ maxBytes: 42 * 1024 * 1024 }), ); }); + + it("chunks long html-mode text and keeps buttons on the last chunk only", async () => { + const chatId = "123"; + const htmlText = `${"A".repeat(5000)}`; + + const sendMessage = vi + .fn() + .mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } }) + .mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + buttons: [[{ text: "OK", callback_data: "ok" }]], + }); + + expect(sendMessage).toHaveBeenCalledTimes(2); + const firstCall = sendMessage.mock.calls[0]; + const secondCall = sendMessage.mock.calls[1]; + expect(firstCall).toBeDefined(); + expect(secondCall).toBeDefined(); + expect((firstCall[1] as string).length).toBeLessThanOrEqual(4000); + expect((secondCall[1] as string).length).toBeLessThanOrEqual(4000); + expect(firstCall[2]?.reply_markup).toBeUndefined(); + expect(secondCall[2]?.reply_markup).toEqual({ + inline_keyboard: [[{ text: "OK", callback_data: "ok" }]], + }); + expect(res.messageId).toBe("91"); + }); }); describe("reactMessageTelegram", () => { diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 313abf361e8..ff1226ab818 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -26,7 +26,7 @@ import { buildTelegramThreadParams, buildTypingThreadParams } from "./bot/helper import type { TelegramInlineButtons } from "./button-types.js"; import { splitTelegramCaption } from "./caption.js"; import { resolveTelegramFetch } from "./fetch.js"; -import { renderTelegramHtmlText } from "./format.js"; +import { renderTelegramHtmlText, splitTelegramHtmlChunks } from "./format.js"; import { isRecoverableTelegramNetworkError, isSafeToRetrySendError } from "./network-errors.js"; import { makeProxyFetch } from "./proxy.js"; import { recordSentMessage } from "./sent-message-cache.js"; @@ -600,13 +600,14 @@ export async function sendMessageTelegram( rawText: string, params?: Record, fallbackText?: string, + preRenderedHtml?: string, ) => { return await withTelegramThreadFallback( params, "message", opts.verbose, async (effectiveParams, label) => { - const htmlText = renderHtmlText(rawText); + const htmlText = preRenderedHtml ?? renderHtmlText(rawText); const baseParams = effectiveParams ? { ...effectiveParams } : {}; if (linkPreviewOptions) { baseParams.link_preview_options = linkPreviewOptions; @@ -647,6 +648,47 @@ export async function sendMessageTelegram( ); }; + const buildTextParams = (isLastChunk: boolean) => + hasThreadParams || (isLastChunk && replyMarkup) + ? { + ...threadParams, + ...(isLastChunk && replyMarkup ? { reply_markup: replyMarkup } : {}), + } + : undefined; + + const sendChunkedText = async ( + rawText: string, + context: string, + ): Promise<{ messageId: string; chatId: string }> => { + const chunks = splitTelegramHtmlChunks(rawText, 4000).map((chunk) => ({ + rawText: chunk, + htmlText: chunk, + plainText: chunk, + })); + + let lastMessageId = ""; + let lastChatId = chatId; + for (let index = 0; index < chunks.length; index += 1) { + const chunk = chunks[index]; + if (!chunk) { + continue; + } + const isLastChunk = index === chunks.length - 1; + const res = await sendTelegramText( + chunk.rawText, + buildTextParams(isLastChunk), + chunk.plainText, + chunk.htmlText, + ); + const messageId = resolveTelegramMessageIdOrThrow(res, context); + recordSentMessage(chatId, messageId); + lastMessageId = String(messageId); + lastChatId = String(res?.chat?.id ?? chatId); + } + + return { messageId: lastMessageId, chatId: lastChatId }; + }; + if (mediaUrl) { const media = await loadWebMedia( mediaUrl, @@ -801,21 +843,15 @@ export async function sendMessageTelegram( // If text was too long for a caption, send it as a separate follow-up message. // Use HTML conversion so markdown renders like captions. if (needsSeparateText && followUpText) { - const textParams = - hasThreadParams || replyMarkup - ? { - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : undefined; + if (textMode === "html") { + const textResult = await sendChunkedText(followUpText, "text follow-up send"); + return { messageId: textResult.messageId, chatId: resolvedChatId }; + } + const textParams = buildTextParams(true); const textRes = await sendTelegramText(followUpText, textParams); - // Return the text message ID as the "main" message (it's the actual content). const textMessageId = resolveTelegramMessageIdOrThrow(textRes, "text follow-up send"); recordSentMessage(chatId, textMessageId); - return { - messageId: String(textMessageId), - chatId: resolvedChatId, - }; + return { messageId: String(textMessageId), chatId: resolvedChatId }; } return { messageId: String(mediaMessageId), chatId: resolvedChatId }; @@ -824,22 +860,22 @@ export async function sendMessageTelegram( if (!text || !text.trim()) { throw new Error("Message must be non-empty for Telegram sends"); } - const textParams = - hasThreadParams || replyMarkup - ? { - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : undefined; - const res = await sendTelegramText(text, textParams, opts.plainText); - const messageId = resolveTelegramMessageIdOrThrow(res, "text send"); - recordSentMessage(chatId, messageId); + let textResult: { messageId: string; chatId: string }; + if (textMode === "html") { + textResult = await sendChunkedText(text, "text send"); + } else { + const textParams = buildTextParams(true); + const res = await sendTelegramText(text, textParams, opts.plainText); + const messageId = resolveTelegramMessageIdOrThrow(res, "text send"); + recordSentMessage(chatId, messageId); + textResult = { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) }; + } recordChannelActivity({ channel: "telegram", accountId: account.accountId, direction: "outbound", }); - return { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) }; + return textResult; } export async function sendTypingTelegram(