mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-29 07:13:40 +00:00
604 lines
25 KiB
TypeScript
604 lines
25 KiB
TypeScript
// Telegram tests cover format plugin behavior.
|
|
import { describe, expect, it } from "vitest";
|
|
import {
|
|
markdownToTelegramChunks,
|
|
markdownToTelegramHtml,
|
|
markdownToTelegramRichHtml,
|
|
materializeTelegramRichHtmlLineBreaks,
|
|
renderTelegramHtmlText,
|
|
sanitizeTelegramRichHtml,
|
|
splitTelegramHtmlChunks,
|
|
telegramHtmlToPlainTextFallback,
|
|
} from "./format.js";
|
|
|
|
describe("markdownToTelegramHtml", () => {
|
|
it("handles core markdown-to-telegram conversions", () => {
|
|
const cases = [
|
|
[
|
|
"renders basic inline formatting",
|
|
"hi _there_ **boss** `code`",
|
|
"hi <i>there</i> <b>boss</b> <code>code</code>",
|
|
],
|
|
[
|
|
"renders links as Telegram-safe HTML",
|
|
"see [docs](https://example.com)",
|
|
'see <a href="https://example.com">docs</a>',
|
|
],
|
|
["preserves Telegram HTML", "<b>yes</b>", "<b>yes</b>"],
|
|
[
|
|
"escapes unsupported raw HTML",
|
|
"<script>nope</script>",
|
|
"<script>nope</script>",
|
|
],
|
|
["escapes unsafe characters", "a & b < c", "a & b < c"],
|
|
["renders paragraphs with blank lines", "first\n\nsecond", "first\n\nsecond"],
|
|
["renders lists without block HTML", "- one\n- two", "• one\n• two"],
|
|
["renders ordered lists with numbering", "2. two\n3. three", "2. two\n3. three"],
|
|
["flattens headings", "# Title", "Title"],
|
|
] as const;
|
|
for (const [name, input, expected] of cases) {
|
|
expect(markdownToTelegramHtml(input), name).toBe(expected);
|
|
}
|
|
});
|
|
|
|
it("preserves supported Telegram HTML in stream markdown rendering", () => {
|
|
const input = [
|
|
"✉️ <b>Morning Email Rollup</b>",
|
|
"",
|
|
"<blockquote>✅ No important emails in the last 24 hours.</blockquote>",
|
|
"",
|
|
"<pre><code>oauth2: invalid_grant</code></pre>",
|
|
].join("\n");
|
|
|
|
expect(markdownToTelegramHtml(input)).toBe(input);
|
|
expect(
|
|
markdownToTelegramChunks(input, 4096)
|
|
.map((chunk) => chunk.html)
|
|
.join(""),
|
|
).toBe(input);
|
|
});
|
|
|
|
it("preserves Telegram expandable blockquote HTML", () => {
|
|
const input = "<blockquote expandable>hidden details</blockquote>";
|
|
|
|
expect(markdownToTelegramHtml(input)).toBe(input);
|
|
expect(renderTelegramHtmlText(input, { textMode: "html" })).toBe(input);
|
|
});
|
|
|
|
it("does not promote Telegram HTML tags inside code", () => {
|
|
expect(markdownToTelegramHtml("`<b>literal</b>`")).toBe(
|
|
"<code><b>literal</b></code>",
|
|
);
|
|
expect(markdownToTelegramHtml("```\n<blockquote>literal</blockquote>\n```")).toBe(
|
|
"<pre><code><blockquote>literal</blockquote>\n</code></pre>",
|
|
);
|
|
});
|
|
|
|
it("keeps unsupported Telegram HTML variants escaped", () => {
|
|
expect(markdownToTelegramHtml('<b class="x">bad</b>')).toBe('<b class="x">bad</b>');
|
|
expect(markdownToTelegramHtml('<blockquote cite="x">bad</blockquote>')).toBe(
|
|
'<blockquote cite="x">bad</blockquote>',
|
|
);
|
|
expect(markdownToTelegramHtml("<sup>1</sup>")).toBe("<sup>1</sup>");
|
|
expect(renderTelegramHtmlText('<b class="x">bad</b>', { textMode: "html" })).toBe(
|
|
'<b class="x">bad</b>',
|
|
);
|
|
});
|
|
|
|
it("preserves rich-only Telegram HTML tags on the rich path", () => {
|
|
expect(markdownToTelegramRichHtml("<sup>1</sup>")).toBe("<sup>1</sup>");
|
|
});
|
|
|
|
it("materializes inline and paragraph newlines as <br> for rich messages", () => {
|
|
// The exact reported symptom: literal "• " bullets (not Markdown list markers)
|
|
// joined by soft breaks, which Bot API 10.1 rich messages collapse without <br>.
|
|
expect(
|
|
materializeTelegramRichHtmlLineBreaks(
|
|
"Start here:\n\n• Florist - Red Bird\n• Tomberlin - Seventeen",
|
|
),
|
|
).toBe("Start here:<br><br>• Florist - Red Bird<br>• Tomberlin - Seventeen");
|
|
expect(materializeTelegramRichHtmlLineBreaks("Line one\nLine two")).toBe(
|
|
"Line one<br>Line two",
|
|
);
|
|
// Soft breaks inside an inline-styled block (blockquote) also collapse.
|
|
expect(materializeTelegramRichHtmlLineBreaks("<blockquote>one\ntwo</blockquote>")).toBe(
|
|
"<blockquote>one<br>two</blockquote>",
|
|
);
|
|
expect(
|
|
materializeTelegramRichHtmlLineBreaks('<b>one</b>\n<a href="https://example.com">two</a>'),
|
|
).toBe('<b>one</b><br><a href="https://example.com">two</a>');
|
|
});
|
|
|
|
it("keeps newlines literal inside code, pre, and math", () => {
|
|
expect(materializeTelegramRichHtmlLineBreaks("<pre><code>first\nsecond\n</code></pre>")).toBe(
|
|
"<pre><code>first\nsecond\n</code></pre>",
|
|
);
|
|
expect(materializeTelegramRichHtmlLineBreaks("<code>a\nb</code>")).toBe("<code>a\nb</code>");
|
|
expect(materializeTelegramRichHtmlLineBreaks("<tg-math-block>x\ny</tg-math-block>")).toBe(
|
|
"<tg-math-block>x\ny</tg-math-block>",
|
|
);
|
|
});
|
|
|
|
it("preserves structural newlines that only separate block tags", () => {
|
|
// Block tags already break; a stray <br> would add a blank line or land as an
|
|
// invalid container child. Mixed text hugging a block keeps its boundary \n too.
|
|
const blocks = "<h2>Plan</h2>\n<table><tbody><tr><td>A</td></tr></tbody></table>";
|
|
expect(materializeTelegramRichHtmlLineBreaks(blocks)).toBe(blocks);
|
|
expect(
|
|
materializeTelegramRichHtmlLineBreaks(
|
|
'A\n\n<figure><img src="https://x/a.jpg"/></figure>\n\nB',
|
|
),
|
|
).toBe('A\n\n<figure><img src="https://x/a.jpg"/></figure>\n\nB');
|
|
});
|
|
|
|
it("does not let a self-closing literal tag swallow later line breaks", () => {
|
|
expect(materializeTelegramRichHtmlLineBreaks("<tg-math/>\na\nb")).toBe("<tg-math/><br>a<br>b");
|
|
});
|
|
|
|
it("does not inject <br> into pretty-printed rich containers", () => {
|
|
// Explicit rich HTML can arrive pretty-printed; newlines between or inside
|
|
// table/figure/details container children are layout, not prose, and the
|
|
// block-counting set omits thead/tbody/td/th/caption/figcaption/summary.
|
|
const table =
|
|
"<table>\n<thead>\n<tr><th>H</th></tr>\n</thead>\n<tbody>\n<tr><td>A</td></tr>\n</tbody>\n</table>";
|
|
expect(materializeTelegramRichHtmlLineBreaks(table)).toBe(table);
|
|
const figure =
|
|
'<figure>\n<img src="https://x/a.jpg"/>\n<figcaption>\nCap\n</figcaption>\n</figure>';
|
|
expect(materializeTelegramRichHtmlLineBreaks(figure)).toBe(figure);
|
|
const details = "<details>\n<summary>\nMore\n</summary>\nBody\n</details>";
|
|
expect(materializeTelegramRichHtmlLineBreaks(details)).toBe(details);
|
|
});
|
|
|
|
it("keeps existing <br> tags intact without doubling adjacent newlines", () => {
|
|
expect(materializeTelegramRichHtmlLineBreaks("a<br>b\nc")).toBe("a<br>b<br>c");
|
|
// A newline hugging an existing <br> stays literal — the break already exists.
|
|
expect(materializeTelegramRichHtmlLineBreaks("line1<br>\nline2")).toBe("line1<br>\nline2");
|
|
});
|
|
|
|
it("preserves rich table, details, quote, checklist, anchor, and math HTML", () => {
|
|
const input = [
|
|
'<a name="top"></a>',
|
|
"<h2>Plan</h2>",
|
|
'<table bordered striped><caption>Scores</caption><thead><tr><th align="left">Name</th><th align="right" colspan="2">Total</th></tr></thead><tbody><tr><td>A</td><td align="right">1</td><td>2</td></tr></tbody></table>',
|
|
"<details><summary>More</summary><p>Hidden</p></details>",
|
|
"<aside>Pull quote<cite>Source</cite></aside>",
|
|
'<ul><li><input type="checkbox" checked/>Done</li><li><input type="checkbox"/>Todo</li></ul>',
|
|
'<p><a href="#top">Back</a> H<sub>2</sub>O E=mc<sup>2</sup> <mark>note</mark> <tg-spoiler>secret</tg-spoiler> <tg-math>E=mc^2</tg-math></p>',
|
|
"<tg-math-block>\\int_0^1 x^2 dx</tg-math-block>",
|
|
].join("\n");
|
|
|
|
expect(markdownToTelegramRichHtml(input)).toBe(input);
|
|
});
|
|
|
|
it("converts raw HTML tables to code fallbacks in legacy HTML mode", () => {
|
|
const input = [
|
|
"<table>",
|
|
"<thead><tr><th>Name</th><th>Age</th></tr></thead>",
|
|
"<tbody><tr><td>Ada</td><td>37</td></tr></tbody>",
|
|
"</table>",
|
|
].join("");
|
|
|
|
const html = renderTelegramHtmlText(input, { textMode: "html" });
|
|
|
|
expect(html).toBe("<pre><code>| Name | Age |\n| Ada | 37 |</code></pre>\n\n");
|
|
expect(html).not.toContain("<table");
|
|
});
|
|
|
|
it("keeps raw HTML tables escaped inside legacy HTML code blocks", () => {
|
|
expect(
|
|
renderTelegramHtmlText("<pre><code><table><tr><td>A</td></tr></table></code></pre>", {
|
|
textMode: "html",
|
|
}),
|
|
).toBe(
|
|
"<pre><code><table><tr><td>A</td></tr></table></code></pre>",
|
|
);
|
|
});
|
|
|
|
it("preserves supported raw rich HTML tables during sanitization", () => {
|
|
const input =
|
|
'<table bordered><caption>Scores</caption><tbody><tr><td>A</td><td align="right">1</td></tr></tbody></table>';
|
|
|
|
expect(sanitizeTelegramRichHtml(input)).toBe(input);
|
|
});
|
|
|
|
it("isolates rich media tags as blocks", () => {
|
|
const html = markdownToTelegramRichHtml(
|
|
'One <img src="https://example.com/a.jpg" alt="A"> two https://example.com/page',
|
|
);
|
|
|
|
expect(html).toContain(
|
|
'\n\n<figure><img src="https://example.com/a.jpg" alt="A"/></figure>\n\n',
|
|
);
|
|
expect(html).toContain('<a href="https://example.com/page">https://example.com/page</a>');
|
|
expect(html).not.toContain("<img");
|
|
expect(html).not.toContain('<a href="https://example.com/a.jpg">');
|
|
});
|
|
|
|
it("escapes rich media tags without supported http sources", () => {
|
|
expect(markdownToTelegramRichHtml('<img src="logo.png" alt="Logo">')).toBe(
|
|
'<img src="logo.png" alt="Logo">',
|
|
);
|
|
expect(markdownToTelegramRichHtml('<audio src="data:audio/wav;base64,x"></audio>')).toBe(
|
|
'<audio src="data:audio/wav;base64,x"></audio>',
|
|
);
|
|
expect(markdownToTelegramRichHtml('<video src="https://example.com/a.mp4"></video>')).toBe(
|
|
'<figure><video src="https://example.com/a.mp4"></video></figure>',
|
|
);
|
|
});
|
|
|
|
it("renders Markdown media blocks on the rich HTML fallback path", () => {
|
|
expect(markdownToTelegramRichHtml('')).toBe(
|
|
'<figure><img src="https://example.com/a.jpg" alt="Diagram"/><figcaption>Caption</figcaption></figure>',
|
|
);
|
|
expect(
|
|
markdownToTelegramRichHtml(''),
|
|
).toBe(
|
|
'<figure><img src="https://cdn.example/img.png?token=a&expires=b" alt="A "quote""/></figure>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("")).toBe(
|
|
'<figure><img src="https://example.com/a.png" alt="A > B"/></figure>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("See .")).toBe(
|
|
'See <a href="https://example.com/a.jpg">Diagram</a>.',
|
|
);
|
|
expect(markdownToTelegramRichHtml("```\n\n```")).toBe(
|
|
"<pre><code>\n</code></pre>",
|
|
);
|
|
});
|
|
|
|
it("renders rich tables and falls back when they exceed Telegram's column limit", () => {
|
|
const table = (columns: number) =>
|
|
[
|
|
`| ${Array.from({ length: columns }, (_, index) => `H${index + 1}`).join(" | ")} |`,
|
|
`| ${Array.from({ length: columns }, () => "---").join(" | ")} |`,
|
|
`| ${Array.from({ length: columns }, (_, index) => String(index + 1)).join(" | ")} |`,
|
|
].join("\n");
|
|
|
|
expect(markdownToTelegramRichHtml(table(20))).toContain("<table bordered striped>");
|
|
expect(markdownToTelegramRichHtml(table(21))).toContain("<pre><code>");
|
|
expect(markdownToTelegramRichHtml(table(2), { tableMode: "code" })).toContain("<pre><code>");
|
|
expect(markdownToTelegramRichHtml(table(2), { tableMode: "code" })).not.toContain("<table>");
|
|
});
|
|
|
|
it("falls back over-wide raw rich HTML tables", () => {
|
|
const cells = Array.from({ length: 21 }, (_, index) => `<td>C${index + 1}</td>`).join("");
|
|
const html = `<table><caption>Wide</caption><tbody><tr>${cells}</tr></tbody></table>`;
|
|
const sanitized = sanitizeTelegramRichHtml(html);
|
|
|
|
expect(sanitized).toContain("<pre><code>Wide");
|
|
expect(sanitized).toContain("C21");
|
|
expect(sanitized).not.toContain("<table>");
|
|
});
|
|
|
|
it("clamps raw rich HTML table colspans before fallback", () => {
|
|
const html = '<table><tbody><tr><td colspan="1000000000">x</td></tr></tbody></table>';
|
|
const sanitized = sanitizeTelegramRichHtml(html);
|
|
|
|
expect(sanitized).toContain("<pre><code>");
|
|
expect(sanitized.length).toBeLessThan(300);
|
|
});
|
|
|
|
it("renders block-mode tables as code in legacy Telegram HTML", () => {
|
|
const table = "| A | B |\n| --- | --- |\n| 1 | 2 |";
|
|
|
|
expect(markdownToTelegramHtml(table, { tableMode: "block" })).toBe(
|
|
"<pre><code>| A | B |\n| --- | --- |\n| 1 | 2 |\n</code></pre>",
|
|
);
|
|
});
|
|
|
|
it("preserves inline markdown inside rich table cells", () => {
|
|
const html = markdownToTelegramRichHtml(
|
|
"| Name | Link |\n| --- | --- |\n| **API** | [docs](https://example.com) |",
|
|
);
|
|
|
|
expect(html).toContain("<td><b>API</b></td>");
|
|
expect(html).toContain('<td><a href="https://example.com">docs</a></td>');
|
|
});
|
|
|
|
it("preserves markdown table column alignment in rich tables", () => {
|
|
const html = markdownToTelegramRichHtml(
|
|
"| Feature | Status | Count |\n| :--- | :---: | ---: |\n| Rich tables | Fixed | 2 |",
|
|
);
|
|
|
|
expect(html).toContain('<th align="left">Feature</th>');
|
|
expect(html).toContain('<th align="center">Status</th>');
|
|
expect(html).toContain('<th align="right">Count</th>');
|
|
expect(html).toContain('<td align="left">Rich tables</td>');
|
|
expect(html).toContain('<td align="center">Fixed</td>');
|
|
expect(html).toContain('<td align="right">2</td>');
|
|
});
|
|
|
|
it("does not auto-linkify bare URLs when entity detection is skipped", () => {
|
|
expect(markdownToTelegramRichHtml("https://example.com", { skipEntityDetection: true })).toBe(
|
|
"https://example.com",
|
|
);
|
|
expect(
|
|
markdownToTelegramRichHtml("[docs](https://example.com)", { skipEntityDetection: true }),
|
|
).toBe('<a href="https://example.com">docs</a>');
|
|
});
|
|
|
|
it("keeps unsupported markdown link hrefs as visible text in rich HTML", () => {
|
|
expect(
|
|
markdownToTelegramRichHtml(
|
|
"[scripts/yougile.py](/home/dankar/.openclaw/workspace-yougile/scripts/yougile.py#L41)",
|
|
),
|
|
).toBe("<code>scripts/yougile.py</code>");
|
|
expect(markdownToTelegramRichHtml("[config](./openclaw.json)")).toBe("config");
|
|
expect(markdownToTelegramRichHtml("[docs](https://example.com/docs)")).toBe(
|
|
'<a href="https://example.com/docs">docs</a>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("[user](tg://user?id=123)")).toBe(
|
|
'<a href="tg://user?id=123">user</a>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("[support](mailto:user@example.com)")).toBe(
|
|
'<a href="mailto:user@example.com">support</a>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("[call](tel:+123456789)")).toBe(
|
|
'<a href="tel:+123456789">call</a>',
|
|
);
|
|
expect(markdownToTelegramRichHtml("[back](#top)")).toBe('<a href="#top">back</a>');
|
|
});
|
|
|
|
it("preserves Markdown heading levels in rich HTML", () => {
|
|
expect(markdownToTelegramRichHtml("# Title\n\n### Detail")).toBe(
|
|
"<h1>Title</h1>\n\n<h3>Detail</h3>",
|
|
);
|
|
});
|
|
|
|
it("normalizes raw code language HTML without leaking tags", () => {
|
|
const commandBlock = '<code class="language-text">/queue followup debounce:0\n</code>';
|
|
|
|
expect(markdownToTelegramHtml(commandBlock)).toBe("<code>/queue followup debounce:0\n</code>");
|
|
expect(
|
|
markdownToTelegramHtml('<pre><code class="language-python">print(1)\n</code></pre>'),
|
|
).toBe('<pre><code class="language-python">print(1)\n</code></pre>');
|
|
});
|
|
|
|
it("renders blockquotes as native Telegram blockquote tags", () => {
|
|
const res = markdownToTelegramHtml("> Quote");
|
|
expect(res).toContain("<blockquote>");
|
|
expect(res).toContain("Quote");
|
|
expect(res).toContain("</blockquote>");
|
|
});
|
|
|
|
it("renders blockquotes with inline formatting", () => {
|
|
const res = markdownToTelegramHtml("> **bold** quote");
|
|
expect(res).toContain("<blockquote>");
|
|
expect(res).toContain("<b>bold</b>");
|
|
expect(res).toContain("</blockquote>");
|
|
});
|
|
|
|
it("renders multiline blockquotes as a single Telegram blockquote", () => {
|
|
const res = markdownToTelegramHtml("> first\n> second");
|
|
expect(res).toBe("<blockquote>first\nsecond</blockquote>");
|
|
});
|
|
|
|
it("renders separated quoted paragraphs as distinct blockquotes", () => {
|
|
const res = markdownToTelegramHtml("> first\n\n> second");
|
|
expect(res).toContain("<blockquote>first");
|
|
expect(res).toContain("<blockquote>second</blockquote>");
|
|
expect(res.match(/<blockquote>/g)).toHaveLength(2);
|
|
});
|
|
|
|
it("renders fenced code block languages for Telegram native copy buttons", () => {
|
|
const res = markdownToTelegramHtml('```bash\necho "hello"\n```');
|
|
expect(res).toBe('<pre><code class="language-bash">echo "hello"\n</code></pre>');
|
|
});
|
|
|
|
it("properly nests overlapping bold and autolink (#4071)", () => {
|
|
const res = markdownToTelegramHtml("**start https://example.com** end");
|
|
expect(res).toMatch(
|
|
/<b>start <a href="https:\/\/example\.com">https:\/\/example\.com<\/a><\/b> end/,
|
|
);
|
|
});
|
|
|
|
it("properly nests link inside bold", () => {
|
|
const res = markdownToTelegramHtml("**bold [link](https://example.com) text**");
|
|
expect(res).toBe('<b>bold <a href="https://example.com">link</a> text</b>');
|
|
});
|
|
|
|
it("properly nests bold wrapping a link with trailing text", () => {
|
|
const res = markdownToTelegramHtml("**[link](https://example.com) rest**");
|
|
expect(res).toBe('<b><a href="https://example.com">link</a> rest</b>');
|
|
});
|
|
|
|
it("properly nests bold inside a link", () => {
|
|
const res = markdownToTelegramHtml("[**bold**](https://example.com)");
|
|
expect(res).toBe('<a href="https://example.com"><b>bold</b></a>');
|
|
});
|
|
|
|
it("wraps punctuated file references in code tags", () => {
|
|
const res = markdownToTelegramHtml("See README.md. Also (backup.sh).");
|
|
expect(res).toContain("<code>README.md</code>.");
|
|
expect(res).toContain("(<code>backup.sh</code>).");
|
|
});
|
|
|
|
it("renders spoiler tags", () => {
|
|
const res = markdownToTelegramHtml("the answer is ||42||");
|
|
expect(res).toBe("the answer is <tg-spoiler>42</tg-spoiler>");
|
|
});
|
|
|
|
it("renders spoiler with nested formatting", () => {
|
|
const res = markdownToTelegramHtml("||**secret** text||");
|
|
expect(res).toBe("<tg-spoiler><b>secret</b> text</tg-spoiler>");
|
|
});
|
|
|
|
it("preserves spacing between Telegram bullet blocks and following numbered sections", () => {
|
|
const input = [
|
|
"2. Main invariants:",
|
|
"",
|
|
" • Raw Log is source of truth.",
|
|
" • Autonomy starts only with report/draft.",
|
|
"3. Cognee is a candidate:",
|
|
"",
|
|
" • bake-off first;",
|
|
" • decide keep/adopt/hybrid later.",
|
|
"4. Project Flow slices:",
|
|
].join("\n");
|
|
|
|
const res = markdownToTelegramHtml(input, { wrapFileRefs: false });
|
|
|
|
expect(res).toContain("report/draft.\n\n3. Cognee");
|
|
expect(res).toContain("keep/adopt/hybrid later.\n\n4. Project");
|
|
});
|
|
|
|
it("preserves Telegram list boundary spacing in chunked rendering", () => {
|
|
const input = [
|
|
"2. Main invariants:",
|
|
"",
|
|
" • Raw Log is source of truth.",
|
|
" • Autonomy starts only with report/draft.",
|
|
"3. Cognee is a candidate:",
|
|
].join("\n");
|
|
|
|
const res = markdownToTelegramChunks(input, 4096)
|
|
.map((chunk) => chunk.html)
|
|
.join("");
|
|
|
|
expect(res).toContain("report/draft.\n\n3. Cognee");
|
|
});
|
|
|
|
it("does not insert Telegram list boundary spacing inside fenced code", () => {
|
|
const input = ["```", " • literal bullet", "3. literal number", "```"].join("\n");
|
|
|
|
const res = markdownToTelegramHtml(input, { wrapFileRefs: false });
|
|
|
|
expect(res).toBe("<pre><code> • literal bullet\n3. literal number\n</code></pre>");
|
|
});
|
|
|
|
it("does not insert Telegram list boundary spacing inside indented code", () => {
|
|
const input = [" • literal bullet", " 3. literal number"].join("\n");
|
|
|
|
const res = markdownToTelegramHtml(input, { wrapFileRefs: false });
|
|
const chunks = markdownToTelegramChunks(input, 4096)
|
|
.map((chunk) => chunk.html)
|
|
.join("");
|
|
|
|
expect(res).toBe("<pre><code>• literal bullet\n3. literal number\n</code></pre>");
|
|
expect(chunks).toBe(res);
|
|
});
|
|
|
|
it("does not treat single pipe as spoiler", () => {
|
|
const res = markdownToTelegramHtml("( ̄_ ̄|) face");
|
|
expect(res).not.toContain("tg-spoiler");
|
|
expect(res).toContain("|");
|
|
});
|
|
|
|
it("does not treat unpaired || as spoiler", () => {
|
|
const res = markdownToTelegramHtml("before || after");
|
|
expect(res).not.toContain("tg-spoiler");
|
|
expect(res).toContain("||");
|
|
});
|
|
|
|
it("keeps valid spoiler pairs when a trailing || is unmatched", () => {
|
|
const res = markdownToTelegramHtml("||secret|| trailing ||");
|
|
expect(res).toContain("<tg-spoiler>secret</tg-spoiler>");
|
|
expect(res).toContain("trailing ||");
|
|
});
|
|
|
|
it("splits long multiline html text without breaking balanced tags", () => {
|
|
const chunks = splitTelegramHtmlChunks(`<b>${"A\n".repeat(2500)}</b>`, 4000);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
|
|
expect(chunks[0]).toMatch(/^<b>[\s\S]*<\/b>$/);
|
|
expect(chunks[1]).toMatch(/^<b>[\s\S]*<\/b>$/);
|
|
});
|
|
|
|
it("does not synthesize closing tags for rich void tags when chunking html", () => {
|
|
const chunks = splitTelegramHtmlChunks(
|
|
`<figure><img src="https://example.com/a.jpg"></figure><ul><li><input type="checkbox" checked>${"A".repeat(80)}</li></ul>`,
|
|
64,
|
|
);
|
|
|
|
expect(chunks.join("")).not.toContain("</img>");
|
|
expect(chunks.join("")).not.toContain("</input>");
|
|
});
|
|
|
|
it("fails loudly when a leading entity cannot fit inside a chunk", () => {
|
|
expect(() => splitTelegramHtmlChunks(`A&${"B".repeat(20)}`, 4)).toThrow(/leading entity/i);
|
|
});
|
|
|
|
it("treats malformed leading ampersands as plain text when chunking html", () => {
|
|
const chunks = splitTelegramHtmlChunks(`&${"A".repeat(5000)}`, 4000);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
expect(chunks.every((chunk) => chunk.length <= 4000)).toBe(true);
|
|
});
|
|
|
|
it("derives readable plain text from Telegram HTML fallback markup", () => {
|
|
const html = [
|
|
'Created: <a href="https://example.com/a?x=1&y=2">Task & One</a>',
|
|
"<code>file.md</code>",
|
|
"<br>",
|
|
'<a href="https://example.com/same">https://example.com/same</a>',
|
|
"<b>done</b>",
|
|
].join(" ");
|
|
|
|
expect(telegramHtmlToPlainTextFallback(html)).toBe(
|
|
"Created: Task & One (https://example.com/a?x=1&y=2) file.md \n https://example.com/same done",
|
|
);
|
|
});
|
|
|
|
it("preserves escaped angle-bracket text in Telegram HTML fallback links", () => {
|
|
expect(
|
|
telegramHtmlToPlainTextFallback(
|
|
'<a href="https://example.com/task?id=1&kind=bug">Task <id></a>',
|
|
),
|
|
).toBe("Task <id> (https://example.com/task?id=1&kind=bug)");
|
|
});
|
|
|
|
it("preserves table cell boundaries in Telegram HTML fallback text", () => {
|
|
expect(
|
|
telegramHtmlToPlainTextFallback(
|
|
"<table><thead><tr><th>Name</th><th>Age</th></tr></thead><tbody><tr><td>Alice</td><td>30</td></tr></tbody></table>",
|
|
),
|
|
).toBe("Name | Age\nAlice | 30");
|
|
});
|
|
|
|
it("fails loudly when tag overhead leaves no room for text", () => {
|
|
expect(() => splitTelegramHtmlChunks("<b><i><u>x</u></i></b>", 10)).toThrow(/tag overhead/i);
|
|
});
|
|
|
|
it("does not split an astral char across the chunk boundary", () => {
|
|
// Emoji surrogate pair straddles index 10 (limit): high at 9, low at 10.
|
|
const input = `${"A".repeat(9)}😀${"B".repeat(20)}`;
|
|
const chunks = splitTelegramHtmlChunks(input, 10);
|
|
expect(chunks.length).toBeGreaterThan(1);
|
|
expect(chunks.join("")).toBe(input);
|
|
for (const chunk of chunks) {
|
|
expect(containsLoneSurrogate(chunk)).toBe(false);
|
|
}
|
|
});
|
|
|
|
it("keeps an astral char whole when a positive limit starts on its pair", () => {
|
|
expect(splitTelegramHtmlChunks("A😀B", 1)).toEqual(["A", "😀", "B"]);
|
|
});
|
|
|
|
it("keeps astral chars whole in rendered Markdown chunks", () => {
|
|
const chunks = markdownToTelegramChunks("A😀B", 1);
|
|
|
|
expect(chunks.map((chunk) => chunk.text)).toEqual(["A", "😀", "B"]);
|
|
for (const chunk of chunks) {
|
|
expect(containsLoneSurrogate(chunk.html)).toBe(false);
|
|
expect(containsLoneSurrogate(chunk.text)).toBe(false);
|
|
}
|
|
});
|
|
});
|
|
|
|
function containsLoneSurrogate(text: string): boolean {
|
|
for (let index = 0; index < text.length; index += 1) {
|
|
const code = text.charCodeAt(index);
|
|
const isHigh = code >= 0xd800 && code <= 0xdbff;
|
|
const isLow = code >= 0xdc00 && code <= 0xdfff;
|
|
if (isHigh) {
|
|
const next = text.charCodeAt(index + 1);
|
|
if (!(next >= 0xdc00 && next <= 0xdfff)) {
|
|
return true;
|
|
}
|
|
index += 1;
|
|
} else if (isLow) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|