mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 16:01:01 +00:00
fix: tighten telegram html chunking
This commit is contained in:
@@ -113,11 +113,19 @@ describe("markdownToTelegramHtml", () => {
|
||||
expect(res).toContain("trailing ||");
|
||||
});
|
||||
|
||||
it("splits long html text without breaking balanced tags", () => {
|
||||
const chunks = splitTelegramHtmlChunks(`<b>${"A".repeat(5000)}</b>`, 4000);
|
||||
it("splits long multiline html text without breaking balanced tags", () => {
|
||||
const chunks = splitTelegramHtmlChunks(`<b>${"A\n".repeat(2500)}</b>`, 4000);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
|
||||
expect(chunks[0]).toMatch(/^<b>.*<\/b>$/);
|
||||
expect(chunks[1]).toMatch(/^<b>.*<\/b>$/);
|
||||
expect(chunks[0]).toMatch(/^<b>[\s\S]*<\/b>$/);
|
||||
expect(chunks[1]).toMatch(/^<b>[\s\S]*<\/b>$/);
|
||||
});
|
||||
|
||||
it("fails loudly when a leading entity cannot fit inside a chunk", () => {
|
||||
expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i);
|
||||
});
|
||||
|
||||
it("fails loudly when tag overhead leaves no room for text", () => {
|
||||
expect(() => splitTelegramHtmlChunks("<b><i><u>x</u></i></b>", 10)).toThrow(/tag overhead/i);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -270,13 +270,15 @@ function findTelegramHtmlSafeSplitIndex(text: string, maxLength: number): number
|
||||
return text.length;
|
||||
}
|
||||
const normalizedMaxLength = Math.max(1, Math.floor(maxLength));
|
||||
let splitAt = normalizedMaxLength;
|
||||
const lastAmpersand = text.lastIndexOf("&", normalizedMaxLength - 1);
|
||||
const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
|
||||
if (lastAmpersand > lastSemicolon) {
|
||||
splitAt = lastAmpersand;
|
||||
if (lastAmpersand === -1) {
|
||||
return normalizedMaxLength;
|
||||
}
|
||||
return splitAt > 0 ? splitAt : normalizedMaxLength;
|
||||
const lastSemicolon = text.lastIndexOf(";", normalizedMaxLength - 1);
|
||||
if (lastAmpersand < lastSemicolon) {
|
||||
return normalizedMaxLength;
|
||||
}
|
||||
return lastAmpersand;
|
||||
}
|
||||
|
||||
function popTelegramHtmlTag(tags: TelegramHtmlTag[], name: string): void {
|
||||
@@ -300,15 +302,15 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
|
||||
const chunks: string[] = [];
|
||||
const openTags: TelegramHtmlTag[] = [];
|
||||
let current = "";
|
||||
let chunkHasContent = false;
|
||||
let chunkHasPayload = false;
|
||||
|
||||
const resetCurrent = () => {
|
||||
current = buildTelegramHtmlOpenPrefix(openTags);
|
||||
chunkHasContent = false;
|
||||
chunkHasPayload = false;
|
||||
};
|
||||
|
||||
const flushCurrent = () => {
|
||||
if (!chunkHasContent) {
|
||||
if (!chunkHasPayload) {
|
||||
return;
|
||||
}
|
||||
chunks.push(`${current}${buildTelegramHtmlCloseSuffix(openTags)}`);
|
||||
@@ -321,24 +323,31 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
|
||||
const available =
|
||||
normalizedLimit - current.length - buildTelegramHtmlCloseSuffixLength(openTags);
|
||||
if (available <= 0) {
|
||||
const prefix = buildTelegramHtmlOpenPrefix(openTags);
|
||||
if (!chunkHasContent && current === prefix) {
|
||||
current += remaining;
|
||||
chunkHasContent = true;
|
||||
remaining = "";
|
||||
break;
|
||||
if (!chunkHasPayload) {
|
||||
throw new Error(
|
||||
`Telegram HTML chunk limit exceeded by tag overhead (limit=${normalizedLimit})`,
|
||||
);
|
||||
}
|
||||
flushCurrent();
|
||||
continue;
|
||||
}
|
||||
if (remaining.length <= available) {
|
||||
current += remaining;
|
||||
chunkHasContent = true;
|
||||
chunkHasPayload = true;
|
||||
break;
|
||||
}
|
||||
const splitAt = findTelegramHtmlSafeSplitIndex(remaining, available);
|
||||
if (splitAt <= 0) {
|
||||
if (!chunkHasPayload) {
|
||||
throw new Error(
|
||||
`Telegram HTML chunk limit exceeded by leading entity (limit=${normalizedLimit})`,
|
||||
);
|
||||
}
|
||||
flushCurrent();
|
||||
continue;
|
||||
}
|
||||
current += remaining.slice(0, splitAt);
|
||||
chunkHasContent = true;
|
||||
chunkHasPayload = true;
|
||||
remaining = remaining.slice(splitAt);
|
||||
flushCurrent();
|
||||
}
|
||||
@@ -363,7 +372,7 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
|
||||
if (!isClosing) {
|
||||
const nextCloseLength = isSelfClosing ? 0 : `</${tagName}>`.length;
|
||||
if (
|
||||
chunkHasContent &&
|
||||
chunkHasPayload &&
|
||||
current.length +
|
||||
rawTag.length +
|
||||
buildTelegramHtmlCloseSuffixLength(openTags) +
|
||||
@@ -375,7 +384,9 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
|
||||
}
|
||||
|
||||
current += rawTag;
|
||||
chunkHasContent = true;
|
||||
if (isSelfClosing) {
|
||||
chunkHasPayload = true;
|
||||
}
|
||||
if (isClosing) {
|
||||
popTelegramHtmlTag(openTags, tagName);
|
||||
} else if (!isSelfClosing) {
|
||||
|
||||
@@ -1288,6 +1288,35 @@ describe("sendMessageTelegram", () => {
|
||||
});
|
||||
expect(res.messageId).toBe("91");
|
||||
});
|
||||
|
||||
it("preserves caller plain-text fallback across chunked html parse retries", async () => {
|
||||
const chatId = "123";
|
||||
const htmlText = `<b>${"A".repeat(5000)}</b>`;
|
||||
const plainText = `${"P".repeat(2500)}${"Q".repeat(2500)}`;
|
||||
const parseErr = new Error(
|
||||
"400: Bad Request: can't parse entities: Can't find end of the entity starting at byte offset 9",
|
||||
);
|
||||
const sendMessage = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(parseErr)
|
||||
.mockResolvedValueOnce({ message_id: 90, chat: { id: chatId } })
|
||||
.mockRejectedValueOnce(parseErr)
|
||||
.mockResolvedValueOnce({ message_id: 91, chat: { id: chatId } });
|
||||
const api = { sendMessage } as unknown as { sendMessage: typeof sendMessage };
|
||||
|
||||
const res = await sendMessageTelegram(chatId, htmlText, {
|
||||
token: "tok",
|
||||
api,
|
||||
textMode: "html",
|
||||
plainText,
|
||||
});
|
||||
|
||||
expect(sendMessage).toHaveBeenCalledTimes(4);
|
||||
const plainFallbackCalls = [sendMessage.mock.calls[1], sendMessage.mock.calls[3]];
|
||||
expect(plainFallbackCalls.map((call) => String(call?.[1] ?? "")).join("")).toBe(plainText);
|
||||
expect(plainFallbackCalls.every((call) => !String(call?.[1] ?? "").includes("<"))).toBe(true);
|
||||
expect(res.messageId).toBe("91");
|
||||
});
|
||||
});
|
||||
|
||||
describe("reactMessageTelegram", () => {
|
||||
|
||||
@@ -108,6 +108,36 @@ function resolveTelegramMessageIdOrThrow(
|
||||
throw new Error(`Telegram ${context} returned no message_id`);
|
||||
}
|
||||
|
||||
function splitTelegramPlainTextFallback(text: string, chunkCount: number, limit: number): string[] {
|
||||
if (!text) {
|
||||
return [];
|
||||
}
|
||||
const normalizedLimit = Math.max(1, Math.floor(limit));
|
||||
if (chunkCount <= 1 || text.length <= normalizedLimit) {
|
||||
return [text];
|
||||
}
|
||||
if (text.length > chunkCount * normalizedLimit) {
|
||||
const chunks: string[] = [];
|
||||
for (let start = 0; start < text.length; start += normalizedLimit) {
|
||||
chunks.push(text.slice(start, start + normalizedLimit));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
const chunks: string[] = [];
|
||||
let offset = 0;
|
||||
for (let index = 0; index < chunkCount && offset < text.length; index += 1) {
|
||||
const remainingChars = text.length - offset;
|
||||
const remainingChunks = chunkCount - index;
|
||||
const nextChunkLength =
|
||||
remainingChunks === 1
|
||||
? remainingChars
|
||||
: Math.min(normalizedLimit, Math.ceil(remainingChars / remainingChunks));
|
||||
chunks.push(text.slice(offset, offset + nextChunkLength));
|
||||
offset += nextChunkLength;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
|
||||
const THREAD_NOT_FOUND_RE = /400:\s*Bad Request:\s*message thread not found/i;
|
||||
const MESSAGE_NOT_MODIFIED_RE =
|
||||
@@ -660,10 +690,14 @@ export async function sendMessageTelegram(
|
||||
rawText: string,
|
||||
context: string,
|
||||
): Promise<{ messageId: string; chatId: string }> => {
|
||||
const chunks = splitTelegramHtmlChunks(rawText, 4000).map((chunk) => ({
|
||||
const htmlChunks = splitTelegramHtmlChunks(rawText, 4000);
|
||||
const plainTextChunks = opts.plainText
|
||||
? splitTelegramPlainTextFallback(opts.plainText, htmlChunks.length, 4000)
|
||||
: [];
|
||||
const chunks = htmlChunks.map((chunk, index) => ({
|
||||
rawText: chunk,
|
||||
htmlText: chunk,
|
||||
plainText: chunk,
|
||||
plainText: plainTextChunks[index],
|
||||
}));
|
||||
|
||||
let lastMessageId = "";
|
||||
|
||||
Reference in New Issue
Block a user