fix: preserve telegram html fallback

This commit is contained in:
Ayaan Zaidi
2026-03-10 20:58:58 +05:30
parent 1381705a14
commit 3dfff49629
4 changed files with 155 additions and 14 deletions

View File

@@ -125,6 +125,12 @@ describe("markdownToTelegramHtml", () => {
expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i);
});
it("treats malformed leading ampersands as plain text when chunking html", () => {
const chunks = splitTelegramHtmlChunks(`&${"A".repeat(5000)}`, 4000);
expect(chunks.length).toBeGreaterThan(1);
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
});
it("fails loudly when tag overhead leaves no room for text", () => {
expect(() => splitTelegramHtmlChunks("<b><i><u>x</u></i></b>", 10)).toThrow(/tag overhead/i);
});

View File

@@ -265,6 +265,50 @@ function buildTelegramHtmlCloseSuffixLength(tags: TelegramHtmlTag[]): number {
return tags.reduce((total, tag) => total + tag.closeTag.length, 0);
}
function findTelegramHtmlEntityEnd(text: string, start: number): number {
if (text[start] !== "&") {
return -1;
}
let index = start + 1;
if (index >= text.length) {
return -1;
}
if (text[index] === "#") {
index += 1;
if (index >= text.length) {
return -1;
}
const isHex = text[index] === "x" || text[index] === "X";
if (isHex) {
index += 1;
const hexStart = index;
while (/[0-9A-Fa-f]/.test(text[index] ?? "")) {
index += 1;
}
if (index === hexStart) {
return -1;
}
} else {
const digitStart = index;
while (/[0-9]/.test(text[index] ?? "")) {
index += 1;
}
if (index === digitStart) {
return -1;
}
}
} else {
const nameStart = index;
while (/[A-Za-z0-9]/.test(text[index] ?? "")) {
index += 1;
}
if (index === nameStart) {
return -1;
}
}
return text[index] === ";" ? index : -1;
}
function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number {
if (text.length <= maxLength) {
return text.length;
@@ -278,6 +322,10 @@ function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number
if (lastAmpersand < lastSemicolon) {
return normalizedMaxLength;
}
const entityEnd = findTelegramHtmlEntityEnd(text, lastAmpersand);
if (entityEnd === -1 || entityEnd < normalizedMaxLength) {
return normalizedMaxLength;
}
return lastAmpersand;
}

View File

@@ -1317,6 +1317,36 @@ describe("sendMessageTelegram", () => {
expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true);
expect(res.messageId).toBe("91");
});
it("keeps malformed leading ampersands on the chunked plain-text fallback path", async () => {
const chatId = "123";
const htmlText = `&${"A".repeat(5000)}`;
const plainText = "fallback!!";
const parseErr = new Error(
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 0",
);
const sendMessage = vi
.fn()
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 92, chat: { id: chatId } })
.mockRejectedValueOnce(parseErr)
.mockResolvedValueOnce({ message_id: 93, chat: { id: chatId } });
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
const res = await sendMessageTelegram(chatId, htmlText, {
token: "tok",
api,
textMode: "html",
plainText,
});
expect(sendMessage).toHaveBeenCalledTimes(4);
expect(String(sendMessage.mock.calls[0]?.[1] ?? "")).toMatch(/^&/);
const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]];
expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText);
expect(plainFallbackCalls.every((call) => String(call?.[1] ?? "").length > 0)).toBe(true);
expect(res.messageId).toBe("93");
});
});
describe("reactMessageTelegram", () => {

View File

@@ -108,24 +108,30 @@ function resolveTelegramMessageIdOrThrow(
throw new Error(`Telegram ${context} returned no message_id`);
}
function splitTelegramPlainTextChunks(text: string, limit: number): string[] {
if (!text) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
const chunks: string[] = [];
for (let start = 0; start < text.length; start += normalizedLimit) {
chunks.push(text.slice(start, start + normalizedLimit));
}
return chunks;
}
function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] {
if (!text) {
return [];
}
const normalizedLimit = Math.max(1, Math.floor(limit));
if (chunkCount <= 1 || text.length <= normalizedLimit) {
return [text];
}
if (text.length > chunkCount * normalizedLimit) {
const chunks: string[] = [];
for (let start = 0; start < text.length; start += normalizedLimit) {
chunks.push(text.slice(start, start + normalizedLimit));
}
return chunks;
const fixedChunks = splitTelegramPlainTextChunks(text, normalizedLimit);
if (chunkCount <= 1 || fixedChunks.length >= chunkCount) {
return fixedChunks;
}
const chunks: string[] = [];
let offset = 0;
for (let index = 0; index < chunkCount && offset < text.length; index += 1) {
for (let index = 0; index < chunkCount; index += 1) {
const remainingChars = text.length - offset;
const remainingChunks = chunkCount - index;
const nextChunkLength =
@@ -686,14 +692,65 @@ export async function sendMessageTelegram(
}
: undefined;
const sendPlainChunkedText = async (
plainText: string,
context: string,
): Promise<{ messageId: string; chatId: string }> => {
const chunks = splitTelegramPlainTextChunks(plainText, 4000);
let lastMessageId = "";
let lastChatId = chatId;
for (let index = 0; index < chunks.length; index += 1) {
const chunk = chunks[index];
if (!chunk) {
continue;
}
const res = await withTelegramThreadFallback(
buildTextParams(index === chunks.length - 1),
"message",
opts.verbose,
async (effectiveParams, label) => {
const params = effectiveParams ? { ...effectiveParams } : {};
if (linkPreviewOptions) {
params.link_preview_options = linkPreviewOptions;
}
const hasParams = Object.keys(params).length > 0;
return await requestWithChatNotFound(
() =>
hasParams
? api.sendMessage(chatId, chunk, params as Parameters<typeof api.sendMessage>[2])
: api.sendMessage(chatId, chunk),
label,
);
},
);
const messageId = resolveTelegramMessageIdOrThrow(res, context);
recordSentMessage(chatId, messageId);
lastMessageId = String(messageId);
lastChatId = String(res?.chat?.id ?? chatId);
}
return { messageId: lastMessageId, chatId: lastChatId };
};
const sendChunkedText = async (
rawText: string,
context: string,
): Promise<{ messageId: string; chatId: string }> => {
const htmlChunks = splitTelegramHtmlChunks(rawText, 4000);
const plainTextChunks = opts.plainText
? splitTelegramPlainTextFallback(opts.plainText, htmlChunks.length, 4000)
: [];
let htmlChunks: string[];
try {
htmlChunks = splitTelegramHtmlChunks(rawText, 4000);
} catch (error) {
logVerbose(
`telegram ${context} failed HTML chunk planning, retrying as plain text: ${formatErrorMessage(
error,
)}`,
);
return await sendPlainChunkedText(opts.plainText ?? rawText, context);
}
const plainTextChunks = splitTelegramPlainTextFallback(
opts.plainText ?? rawText,
htmlChunks.length,
4000,
);
const chunks = htmlChunks.map((chunk, index) => ({
rawText: chunk,
htmlText: chunk,