diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts index 304b89f5c65..1fa6ebbffe0 100644 --- a/src/telegram/format.test.ts +++ b/src/telegram/format.test.ts @@ -113,11 +113,19 @@ describe("markdownToTelegramHtml", () => { expect(res).toContain("trailing ||"); }); - it("splits long html text without breaking balanced tags", () => { - const chunks = splitTelegramHtmlChunks(`${"A".repeat(5000)}`, 4000); + it("splits long multiline html text without breaking balanced tags", () => { + const chunks = splitTelegramHtmlChunks(`${"A\n".repeat(2500)}`, 4000); expect(chunks.length).toBeGreaterThan(1); expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true); - expect(chunks[0]).toMatch(/^.*<\/b>$/); - expect(chunks[1]).toMatch(/^.*<\/b>$/); + expect(chunks[0]).toMatch(/^[\s\S]*<\/b>$/); + expect(chunks[1]).toMatch(/^[\s\S]*<\/b>$/); + }); + + it("fails loudly when a leading entity cannot fit inside a chunk", () => { + expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i); + }); + + it("fails loudly when tag overhead leaves no room for text", () => { + expect(() => splitTelegramHtmlChunks("x", 10)).toThrow(/tag overhead/i); }); }); diff --git a/src/telegram/format.ts b/src/telegram/format.ts index b0b5851f101..101da515f93 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -270,13 +270,15 @@ function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number return text.length; } const normalizedMaxLength = Math.max(1, Math.floor(maxLength)); - let splitAt = normalizedMaxLength; const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1); - const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1); - if (lastAmpersand > lastSemicolon) { - splitAt = lastAmpersand; + if (lastAmpersand === -1) { + return normalizedMaxLength; } - return splitAt > 0 ? splitAt : normalizedMaxLength; + const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1); + if (lastAmpersand < lastSemicolon) { + return normalizedMaxLength; + } + return lastAmpersand; } function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void { @@ -300,15 +302,15 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] { const chunks: string[] = []; const openTags: TelegramHtmlTag[] = []; let current = ""; - let chunkHasContent = false; + let chunkHasPayload = false; const resetCurrent = () => { current = buildTelegramHtmlOpenPrefix(openTags); - chunkHasContent = false; + chunkHasPayload = false; }; const flushCurrent = () => { - if (!chunkHasContent) { + if (!chunkHasPayload) { return; } chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`); @@ -321,24 +323,31 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] { const available = normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags); if (available <= 0) { - const prefix = buildTelegramHtmlOpenPrefix(openTags); - if (!chunkHasContent && current === prefix) { - current += remaining; - chunkHasContent = true; - remaining = ""; - break; + if (!chunkHasPayload) { + throw new Error( + `Telegram HTML chunk limit exceeded by tag overhead (limit=${normalizedLimit})`, + ); } flushCurrent(); continue; } if (remaining.length <= available) { current += remaining; - chunkHasContent = true; + chunkHasPayload = true; break; } const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available); + if (splitAt <= 0) { + if (!chunkHasPayload) { + throw new Error( + `Telegram HTML chunk limit exceeded by leading entity (limit=${normalizedLimit})`, + ); + } + flushCurrent(); + continue; + } current += remaining.slice(0, splitAt); - chunkHasContent = true; + chunkHasPayload = true; remaining = remaining.slice(splitAt); flushCurrent(); } @@ -363,7 +372,7 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] { if (!isClosing) { const nextCloseLength = isSelfClosing ? 0 : ``.length; if ( - chunkHasContent && + chunkHasPayload && current.length + rawTag.length + buildTelegramHtmlCloseSuffixLength(openTags) + @@ -375,7 +384,9 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] { } current += rawTag; - chunkHasContent = true; + if (isSelfClosing) { + chunkHasPayload = true; + } if (isClosing) { popTelegramHtmlTag(openTags, tagName); } else if (!isSelfClosing) { diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index 2905b066b97..4bfbfe95b9d 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -1288,6 +1288,35 @@ describe("sendMessageTelegram", () => { }); expect(res.messageId).toBe("91"); }); + + it("preserves caller plain-text fallback across chunked html parse retries", async () => { + const chatId = "123"; + const htmlText = `${"A".repeat(5000)}`; + const plainText = `${"P".repeat(2500)}${"Q".repeat(2500)}`; + const parseErr = new Error( + "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9", + ); + const sendMessage = vi + .fn() + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } }) + .mockRejectedValueOnce(parseErr) + .mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } }); + const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage }; + + const res = await sendMessageTelegram(chatId, htmlText, { + token: "tok", + api, + textMode: "html", + plainText, + }); + + expect(sendMessage).toHaveBeenCalledTimes(4); + const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]]; + expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText); + expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true); + expect(res.messageId).toBe("91"); + }); }); describe("reactMessageTelegram", () => { diff --git a/src/telegram/send.ts b/src/telegram/send.ts index ff1226ab818..7cb71ed2576 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -108,6 +108,36 @@ function resolveTelegramMessageIdOrThrow( throw new Error(`Telegram ${context} returned no message_id`); } +function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] { + if (!text) { + return []; + } + const normalizedLimit = Math.max(1, Math.floor(limit)); + if (chunkCount <= 1 || text.length <= normalizedLimit) { + return [text]; + } + if (text.length > chunkCount * normalizedLimit) { + const chunks: string[] = []; + for (let start = 0; start < text.length; start += normalizedLimit) { + chunks.push(text.slice(start, start + normalizedLimit)); + } + return chunks; + } + const chunks: string[] = []; + let offset = 0; + for (let index = 0; index < chunkCount && offset < text.length; index += 1) { + const remainingChars = text.length - offset; + const remainingChunks = chunkCount - index; + const nextChunkLength = + remainingChunks === 1 + ? remainingChars + : Math.min(normalizedLimit, Math.ceil(remainingChars / remainingChunks)); + chunks.push(text.slice(offset, offset + nextChunkLength)); + offset += nextChunkLength; + } + return chunks; +} + const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i; const THREAD_NOT_FOUND_RE = /400:\s*Bad Request:\s*message thread not found/i; const MESSAGE_NOT_MODIFIED_RE = @@ -660,10 +690,14 @@ export async function sendMessageTelegram( rawText: string, context: string, ): Promise<{ messageId: string; chatId: string }> => { - const chunks = splitTelegramHtmlChunks(rawText, 4000).map((chunk) => ({ + const htmlChunks = splitTelegramHtmlChunks(rawText, 4000); + const plainTextChunks = opts.plainText + ? splitTelegramPlainTextFallback(opts.plainText, htmlChunks.length, 4000) + : []; + const chunks = htmlChunks.map((chunk, index) => ({ rawText: chunk, htmlText: chunk, - plainText: chunk, + plainText: plainTextChunks[index], })); let lastMessageId = "";