diff --git a/src/telegram/format.test.ts b/src/telegram/format.test.ts
index 304b89f5c65..1fa6ebbffe0 100644
--- a/src/telegram/format.test.ts
+++ b/src/telegram/format.test.ts
@@ -113,11 +113,19 @@ describe("markdownToTelegramHtml", () => {
expect(res).toContain("trailing ||");
});
- it("splits long html text without breaking balanced tags", () => {
- const chunks = splitTelegramHtmlChunks(`${"A".repeat(5000)}`, 4000);
+ it("splits long multiline html text without breaking balanced tags", () => {
+ const chunks = splitTelegramHtmlChunks(`${"A\n".repeat(2500)}`, 4000);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
- expect(chunks[0]).toMatch(/^.*<\/b>$/);
- expect(chunks[1]).toMatch(/^.*<\/b>$/);
+ expect(chunks[0]).toMatch(/^[\s\S]*<\/b>$/);
+ expect(chunks[1]).toMatch(/^[\s\S]*<\/b>$/);
+ });
+
+ it("fails loudly when a leading entity cannot fit inside a chunk", () => {
+ expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i);
+ });
+
+ it("fails loudly when tag overhead leaves no room for text", () => {
+ expect(() => splitTelegramHtmlChunks("x", 10)).toThrow(/tag overhead/i);
});
});
diff --git a/src/telegram/format.ts b/src/telegram/format.ts
index b0b5851f101..101da515f93 100644
--- a/src/telegram/format.ts
+++ b/src/telegram/format.ts
@@ -270,13 +270,15 @@ function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number
return text.length;
}
const normalizedMaxLength = Math.max(1, Math.floor(maxLength));
- let splitAt = normalizedMaxLength;
const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1);
- const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
- if (lastAmpersand > lastSemicolon) {
- splitAt = lastAmpersand;
+ if (lastAmpersand === -1) {
+ return normalizedMaxLength;
}
- return splitAt > 0 ? splitAt : normalizedMaxLength;
+ const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
+ if (lastAmpersand < lastSemicolon) {
+ return normalizedMaxLength;
+ }
+ return lastAmpersand;
}
function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void {
@@ -300,15 +302,15 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
const chunks: string[] = [];
const openTags: TelegramHtmlTag[] = [];
let current = "";
- let chunkHasContent = false;
+ let chunkHasPayload = false;
const resetCurrent = () => {
current = buildTelegramHtmlOpenPrefix(openTags);
- chunkHasContent = false;
+ chunkHasPayload = false;
};
const flushCurrent = () => {
- if (!chunkHasContent) {
+ if (!chunkHasPayload) {
return;
}
chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`);
@@ -321,24 +323,31 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
const available =
normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags);
if (available <= 0) {
- const prefix = buildTelegramHtmlOpenPrefix(openTags);
- if (!chunkHasContent && current === prefix) {
- current += remaining;
- chunkHasContent = true;
- remaining = "";
- break;
+ if (!chunkHasPayload) {
+ throw new Error(
+ `Telegram HTML chunk limit exceeded by tag overhead (limit=${normalizedLimit})`,
+ );
}
flushCurrent();
continue;
}
if (remaining.length <= available) {
current += remaining;
- chunkHasContent = true;
+ chunkHasPayload = true;
break;
}
const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available);
+ if (splitAt <= 0) {
+ if (!chunkHasPayload) {
+ throw new Error(
+ `Telegram HTML chunk limit exceeded by leading entity (limit=${normalizedLimit})`,
+ );
+ }
+ flushCurrent();
+ continue;
+ }
current += remaining.slice(0, splitAt);
- chunkHasContent = true;
+ chunkHasPayload = true;
remaining = remaining.slice(splitAt);
flushCurrent();
}
@@ -363,7 +372,7 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
if (!isClosing) {
const nextCloseLength = isSelfClosing ? 0 : `${tagName}>`.length;
if (
- chunkHasContent &&
+ chunkHasPayload &&
current.length +
rawTag.length +
buildTelegramHtmlCloseSuffixLength(openTags) +
@@ -375,7 +384,9 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
}
current += rawTag;
- chunkHasContent = true;
+ if (isSelfClosing) {
+ chunkHasPayload = true;
+ }
if (isClosing) {
popTelegramHtmlTag(openTags, tagName);
} else if (!isSelfClosing) {
diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts
index 2905b066b97..4bfbfe95b9d 100644
--- a/src/telegram/send.test.ts
+++ b/src/telegram/send.test.ts
@@ -1288,6 +1288,35 @@ describe("sendMessageTelegram", () => {
});
expect(res.messageId).toBe("91");
});
+
+ it("preserves caller plain-text fallback across chunked html parse retries", async () => {
+ const chatId = "123";
+ const htmlText = `${"A".repeat(5000)}`;
+ const plainText = `${"P".repeat(2500)}${"Q".repeat(2500)}`;
+ const parseErr = new Error(
+ "400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
+ );
+ const sendMessage = vi
+ .fn()
+ .mockRejectedValueOnce(parseErr)
+ .mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } })
+ .mockRejectedValueOnce(parseErr)
+ .mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } });
+ const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
+
+ const res = await sendMessageTelegram(chatId, htmlText, {
+ token: "tok",
+ api,
+ textMode: "html",
+ plainText,
+ });
+
+ expect(sendMessage).toHaveBeenCalledTimes(4);
+ const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]];
+ expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText);
+ expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true);
+ expect(res.messageId).toBe("91");
+ });
});
describe("reactMessageTelegram", () => {
diff --git a/src/telegram/send.ts b/src/telegram/send.ts
index ff1226ab818..7cb71ed2576 100644
--- a/src/telegram/send.ts
+++ b/src/telegram/send.ts
@@ -108,6 +108,36 @@ function resolveTelegramMessageIdOrThrow(
throw new Error(`Telegram ${context} returned no message_id`);
}
+function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] {
+ if (!text) {
+ return [];
+ }
+ const normalizedLimit = Math.max(1, Math.floor(limit));
+ if (chunkCount <= 1 || text.length <= normalizedLimit) {
+ return [text];
+ }
+ if (text.length > chunkCount * normalizedLimit) {
+ const chunks: string[] = [];
+ for (let start = 0; start < text.length; start += normalizedLimit) {
+ chunks.push(text.slice(start, start + normalizedLimit));
+ }
+ return chunks;
+ }
+ const chunks: string[] = [];
+ let offset = 0;
+ for (let index = 0; index < chunkCount && offset < text.length; index += 1) {
+ const remainingChars = text.length - offset;
+ const remainingChunks = chunkCount - index;
+ const nextChunkLength =
+ remainingChunks === 1
+ ? remainingChars
+ : Math.min(normalizedLimit, Math.ceil(remainingChars / remainingChunks));
+ chunks.push(text.slice(offset, offset + nextChunkLength));
+ offset += nextChunkLength;
+ }
+ return chunks;
+}
+
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
const THREAD_NOT_FOUND_RE = /400:\s*Bad Request:\s*message thread not found/i;
const MESSAGE_NOT_MODIFIED_RE =
@@ -660,10 +690,14 @@ export async function sendMessageTelegram(
rawText: string,
context: string,
): Promise<{ messageId: string; chatId: string }> => {
- const chunks = splitTelegramHtmlChunks(rawText, 4000).map((chunk) => ({
+ const htmlChunks = splitTelegramHtmlChunks(rawText, 4000);
+ const plainTextChunks = opts.plainText
+ ? splitTelegramPlainTextFallback(opts.plainText, htmlChunks.length, 4000)
+ : [];
+ const chunks = htmlChunks.map((chunk, index) => ({
rawText: chunk,
htmlText: chunk,
- plainText: chunk,
+ plainText: plainTextChunks[index],
}));
let lastMessageId = "";