fix(telegram): split long messages at word boundaries instead of mid-word (#56595)

Replace proportional text estimate with binary search for the largest
text prefix whose rendered Telegram HTML fits the character limit, then
split at the last whitespace boundary within that verified prefix.

Single words longer than the limit still hard-split (unavoidable).
Markdown formatting stays balanced across split points.

Fixes #36644
This commit is contained in:
Robin Waslander
2026-03-28 21:24:59 +01:00
committed by GitHub
parent 865160e572
commit ab2ef7bbfc
2 changed files with 73 additions and 16 deletions

View File

@@ -433,28 +433,21 @@ export function splitTelegramHtmlChunks(html: string, limit: number): string[] {
return chunks.length > 0 ? chunks : [html];
}
function splitTelegramChunkByHtmlLimit(
chunk: MarkdownIR,
htmlLimit: number,
renderedHtmlLength: number,
): MarkdownIR[] {
function splitTelegramChunkByHtmlLimit(chunk: MarkdownIR, htmlLimit: number): MarkdownIR[] {
const currentTextLength = chunk.text.length;
if (currentTextLength <= 1) {
return [chunk];
}
const proportionalLimit = Math.floor(
(currentTextLength * htmlLimit) / Math.max(renderedHtmlLength, 1),
);
const candidateLimit = Math.min(currentTextLength - 1, proportionalLimit);
const splitLimit =
Number.isFinite(candidateLimit) && candidateLimit > 0
? candidateLimit
: Math.max(1, Math.floor(currentTextLength / 2));
const splitLimit = findLargestTelegramChunkTextLengthWithinHtmlLimit(chunk, htmlLimit);
if (splitLimit <= 0) {
return [chunk];
}
const split = splitMarkdownIRPreserveWhitespace(chunk, splitLimit);
if (split.length > 1) {
const firstChunk = split[0];
if (firstChunk && renderTelegramChunkHtml(firstChunk).length <= htmlLimit) {
return split;
}
return splitMarkdownIRPreserveWhitespace(chunk, Math.max(1, Math.floor(currentTextLength / 2)));
return [sliceMarkdownIR(chunk, 0, splitLimit), sliceMarkdownIR(chunk, splitLimit, currentTextLength)];
}
function sliceStyleSpans(
@@ -554,6 +547,26 @@ function renderTelegramChunkHtml(ir: MarkdownIR): string {
return wrapFileReferencesInHtml(renderTelegramHtml(ir));
}
function findLargestTelegramChunkTextLengthWithinHtmlLimit(
chunk: MarkdownIR,
htmlLimit: number,
): number {
const currentTextLength = chunk.text.length;
if (currentTextLength <= 1) {
return currentTextLength;
}
// Prefix HTML length is not monotonic because a sliced auto-link can render as
// a long <a ...> fragment, while a longer completed file ref de-linkifies to
// a shorter <code>...</code> wrapper. Search exact candidates instead.
for (let candidateLength = currentTextLength - 1; candidateLength >= 1; candidateLength -= 1) {
if (renderTelegramChunkHtml(sliceMarkdownIR(chunk, 0, candidateLength)).length <= htmlLimit) {
return candidateLength;
}
}
return 0;
}
function findMarkdownIRPreservedSplitIndex(text: string, start: number, limit: number): number {
const maxEnd = Math.min(text.length, start + limit);
if (maxEnd >= text.length) {
@@ -735,7 +748,7 @@ function renderTelegramChunksWithinHtmlLimit(
finalized.push(chunk);
continue;
}
const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit, html.length);
const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit);
if (split.length <= 1) {
// Worst-case safety: avoid retry loops, deliver the chunk as-is.
finalized.push(chunk);