fix: chunk long html telegram sends

This commit is contained in:
Ayaan Zaidi
2026-03-10 20:13:29 +05:30
parent 8bf64f219a
commit cce7953d8d
4 changed files with 253 additions and 26 deletions

View File

@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { markdownToTelegramHtml } from "./format.js";
import { markdownToTelegramHtml, splitTelegramHtmlChunks } from "./format.js";
describe("markdownToTelegramHtml", () => {
it("handles core markdown-to-telegram conversions", () => {
@@ -112,4 +112,12 @@ describe("markdownToTelegramHtml", () => {
expect(res).toContain("<tg-spoiler>secret</tg-spoiler>");
expect(res).toContain("trailing ||");
});
it("splits long html text without breaking balanced tags", () => {
const chunks = splitTelegramHtmlChunks(`<b>${"A".repeat(5000)}</b>`, 4000);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
expect(chunks[0]).toMatch(/^<b>.*<\/b>$/);
expect(chunks[1]).toMatch(/^<b>.*<\/b>$/);
});
});

View File

@@ -241,6 +241,158 @@ export function renderTelegramHtmlText(
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
}
type TelegramHtmlTag = {
name: string;
openTag: string;
closeTag: string;
};
const TELEGRAM_SELF_CLOSING_HTML_TAGS = new Set(["br"]);
function buildTelegramHtmlOpenPrefix(tags: TelegramHtmlTag[]): string {
return tags.map((tag) => tag.openTag).join("");
}
function buildTelegramHtmlCloseSuffix(tags: TelegramHtmlTag[]): string {
return tags
.slice()
.toReversed()
.map((tag) => tag.closeTag)
.join("");
}
function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number {
return tags.reduce((total, tag) => total + tag.closeTag.length, 0);
}
function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number {
if (text.length <= maxLength) {
return text.length;
}
const normalizedMaxLength = Math.max(1, Math.floor(maxLength));
let splitAt = normalizedMaxLength;
const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1);
const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
if (lastAmpersand > lastSemicolon) {
splitAt = lastAmpersand;
}
return splitAt > 0 ? splitAt : normalizedMaxLength;
}
function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void {
for (let index = tags.length - 1; index >= 0; index -= 1) {
if (tags[index]?.name === name) {
tags.splice(index, 1);
return;
}
}
}
export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
if (!html) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
if (html.length <= normalizedLimit) {
return [html];
}
const chunks: string[] = [];
const openTags: TelegramHtmlTag[] = [];
let current = "";
let chunkHasContent = false;
const resetCurrent = () => {
current = buildTelegramHtmlOpenPrefix(openTags);
chunkHasContent = false;
};
const flushCurrent = () => {
if (!chunkHasContent) {
return;
}
chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`);
resetCurrent();
};
const appendText = (segment: string) => {
let remaining = segment;
while (remaining.length > 0) {
const available =
normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags);
if (available <= 0) {
const prefix = buildTelegramHtmlOpenPrefix(openTags);
if (!chunkHasContent && current === prefix) {
current += remaining;
chunkHasContent = true;
remaining = "";
break;
}
flushCurrent();
continue;
}
if (remaining.length <= available) {
current += remaining;
chunkHasContent = true;
break;
}
const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available);
current += remaining.slice(0, splitAt);
chunkHasContent = true;
remaining = remaining.slice(splitAt);
flushCurrent();
}
};
resetCurrent();
HTML_TAG_PATTERN.lastIndex = 0;
let lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = HTML_TAG_PATTERN.exec(html)) !== null) {
const tagStart = match.index;
const tagEnd = HTML_TAG_PATTERN.lastIndex;
appendText(html.slice(lastIndex, tagStart));
const rawTag = match[0];
const isClosing = match[1] === "</";
const tagName = match[2].toLowerCase();
const isSelfClosing =
!isClosing &&
(TELEGRAM_SELF_CLOSING_HTML_TAGS.has(tagName) || rawTag.trimEnd().endsWith("/>"));
if (!isClosing) {
const nextCloseLength = isSelfClosing ? 0 : `</${tagName}>`.length;
if (
chunkHasContent &&
current.length +
rawTag.length +
buildTelegramHtmlCloseSuffixLength(openTags) +
nextCloseLength >
normalizedLimit
) {
flushCurrent();
}
}
current += rawTag;
chunkHasContent = true;
if (isClosing) {
popTelegramHtmlTag(openTags, tagName);
} else if (!isSelfClosing) {
openTags.push({
name: tagName,
openTag: rawTag,
closeTag: `</${tagName}>`,
});
}
lastIndex = tagEnd;
}
appendText(html.slice(lastIndex));
flushCurrent();
return chunks.length > 0 ? chunks : [html];
}
function splitTelegramChunkByHtmlLimit(
chunk: MarkdownIR,
htmlLimit: number,

View File

@@ -1257,6 +1257,37 @@ describe("sendMessageTelegram", () => {
expect.objectContaining({ maxBytes: 42 * 1024 * 1024 }),
);
});
it("chunks long html-mode text and keeps buttons on the last chunk only", async () => {
const chatId = "123";
const htmlText = `<b>${"A".repeat(5000)}</b>`;
const sendMessage = vi
.fn()
.mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } })
.mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
buttons: [[{ text: "OK", callback_data: "ok" }]],
});
expect(sendMessage).toHaveBeenCalledTimes(2);
const firstCall = sendMessage.mock.calls[0];
const secondCall = sendMessage.mock.calls[1];
expect(firstCall).toBeDefined();
expect(secondCall).toBeDefined();
expect((firstCall[1] as string).length).toBeLessThanOrEqual(4000);
expect((secondCall[1] as string).length).toBeLessThanOrEqual(4000);
expect(firstCall[2]?.reply_markup).toBeUndefined();
expect(secondCall[2]?.reply_markup).toEqual({
inline_keyboard: [[{ text: "OK", callback_data: "ok" }]],
});
expect(res.messageId).toBe("91");
});
});
describe("reactMessageTelegram", () => {

View File

@@ -26,7 +26,7 @@ import { buildTelegramThreadParams, buildTypingThreadParams } from "./bot/helper
import type { TelegramInlineButtons } from "./button-types.js";
import { splitTelegramCaption } from "./caption.js";
import { resolveTelegramFetch } from "./fetch.js";
import { renderTelegramHtmlText } from "./format.js";
import { renderTelegramHtmlText, splitTelegramHtmlChunks } from "./format.js";
import { isRecoverableTelegramNetworkError, isSafeToRetrySendError } from "./network-errors.js";
import { makeProxyFetch } from "./proxy.js";
import { recordSentMessage } from "./sent-message-cache.js";
@@ -600,13 +600,14 @@ export async function sendMessageTelegram(
rawText: string,
params?: Record<string, unknown>,
fallbackText?: string,
preRenderedHtml?: string,
) => {
return await withTelegramThreadFallback(
params,
"message",
opts.verbose,
async (effectiveParams, label) => {
const htmlText = renderHtmlText(rawText);
const htmlText = preRenderedHtml ?? renderHtmlText(rawText);
const baseParams = effectiveParams ? { ...effectiveParams } : {};
if (linkPreviewOptions) {
baseParams.link_preview_options = linkPreviewOptions;
@@ -647,6 +648,47 @@ export async function sendMessageTelegram(
);
};
const buildTextParams = (isLastChunk: boolean) =>
hasThreadParams || (isLastChunk && replyMarkup)
? {
...threadParams,
...(isLastChunk && replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const sendChunkedText = async (
rawText: string,
context: string,
): Promise<{ messageId: string; chatId: string }> => {
const chunks = splitTelegramHtmlChunks(rawText, 4000).map((chunk) => ({
rawText: chunk,
htmlText: chunk,
plainText: chunk,
}));
let lastMessageId = "";
let lastChatId = chatId;
for (let index = 0; index < chunks.length; index += 1) {
const chunk = chunks[index];
if (!chunk) {
continue;
}
const isLastChunk = index === chunks.length - 1;
const res = await sendTelegramText(
chunk.rawText,
buildTextParams(isLastChunk),
chunk.plainText,
chunk.htmlText,
);
const messageId = resolveTelegramMessageIdOrThrow(res, context);
recordSentMessage(chatId, messageId);
lastMessageId = String(messageId);
lastChatId = String(res?.chat?.id ?? chatId);
}
return { messageId: lastMessageId, chatId: lastChatId };
};
if (mediaUrl) {
const media = await loadWebMedia(
mediaUrl,
@@ -801,21 +843,15 @@ export async function sendMessageTelegram(
// If text was too long for a caption, send it as a separate follow-up message.
// Use HTML conversion so markdown renders like captions.
if (needsSeparateText && followUpText) {
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
if (textMode === "html") {
const textResult = await sendChunkedText(followUpText, "text follow-up send");
return { messageId: textResult.messageId, chatId: resolvedChatId };
}
const textParams = buildTextParams(true);
const textRes = await sendTelegramText(followUpText, textParams);
// Return the text message ID as the "main" message (it's the actual content).
const textMessageId = resolveTelegramMessageIdOrThrow(textRes, "text follow-up send");
recordSentMessage(chatId, textMessageId);
return {
messageId: String(textMessageId),
chatId: resolvedChatId,
};
return { messageId: String(textMessageId), chatId: resolvedChatId };
}
return { messageId: String(mediaMessageId), chatId: resolvedChatId };
@@ -824,22 +860,22 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
const textParams =
hasThreadParams || replyMarkup
? {
...threadParams,
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const res = await sendTelegramText(text, textParams, opts.plainText);
const messageId = resolveTelegramMessageIdOrThrow(res, "text send");
recordSentMessage(chatId, messageId);
let textResult: { messageId: string; chatId: string };
if (textMode === "html") {
textResult = await sendChunkedText(text, "text send");
} else {
const textParams = buildTextParams(true);
const res = await sendTelegramText(text, textParams, opts.plainText);
const messageId = resolveTelegramMessageIdOrThrow(res, "text send");
recordSentMessage(chatId, messageId);
textResult = { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) };
}
recordChannelActivity({
channel: "telegram",
accountId: account.accountId,
direction: "outbound",
});
return { messageId: String(messageId), chatId: String(res?.chat?.id ?? chatId) };
return textResult;
}
export async function sendTypingTelegram(