From 7253bb298e7c2b0e52323fbb7853eaa5f4e8d03e Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Wed, 24 Jun 2026 23:28:45 +0800 Subject: [PATCH] fix(canvas): stop self-closing embed from starting a greedy block match --- .../telegram/src/bot-message-dispatch.ts | 15 ++---- extensions/telegram/src/truncate.test.ts | 48 +++++++++++++++++++ extensions/telegram/src/truncate.ts | 20 ++++++++ 3 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 extensions/telegram/src/truncate.test.ts create mode 100644 extensions/telegram/src/truncate.ts diff --git a/extensions/telegram/src/bot-message-dispatch.ts b/extensions/telegram/src/bot-message-dispatch.ts index a4d64940af6..7cd7218139f 100644 --- a/extensions/telegram/src/bot-message-dispatch.ts +++ b/extensions/telegram/src/bot-message-dispatch.ts @@ -69,6 +69,7 @@ import { resolveDefaultModelForAgent, } from "./bot-message-dispatch.agent.runtime.js"; import { deduplicateBlockSentMedia } from "./bot-message-dispatch.media-dedup.js"; +import { clipTelegramProgressText } from "./truncate.js"; import { generateTopicLabel, getAgentScopedMediaLocalRoots, @@ -364,22 +365,14 @@ async function mirrorTelegramAssistantReplyToTranscript(params: { } } -const MAX_PROGRESS_MARKDOWN_TEXT_CHARS = 300; const TELEGRAM_GENERAL_TOPIC_ID = 1; -function clipProgressMarkdownText(text: string): string { - if (text.length <= MAX_PROGRESS_MARKDOWN_TEXT_CHARS) { - return text; - } - return `${text.slice(0, MAX_PROGRESS_MARKDOWN_TEXT_CHARS - 1).trimEnd()}…`; -} - function sanitizeProgressMarkdownText(text: string): string { return text.replaceAll("`", "'"); } function formatProgressAsMarkdownCode(text: string): string { - const clipped = clipProgressMarkdownText(text); + const clipped = clipTelegramProgressText(text); return `\`${sanitizeProgressMarkdownText(clipped)}\``; } @@ -399,7 +392,7 @@ function escapeTelegramProgressHtml(text: string): string { } function renderTelegramProgressStringLine(text: string): string { - const clipped = clipProgressMarkdownText(text.trim()); + const clipped = clipTelegramProgressText(text.trim()); const italic = clipped.match(/^_(.*)_$/u); if (italic) { return `${escapeTelegramProgressHtml(italic[1] ?? "")}`; @@ -418,7 +411,7 @@ function renderTelegramProgressLine(line: ChannelProgressDraftCompositorLine): s const parts = [`${escapeTelegramProgressHtml(label)}`]; const detail = line.detail && line.detail !== line.label ? line.detail : undefined; if (detail) { - parts.push(`${escapeTelegramProgressHtml(clipProgressMarkdownText(detail))}`); + parts.push(`${escapeTelegramProgressHtml(clipTelegramProgressText(detail))}`); } else { const text = line.text.trim(); if (text && text !== label) { diff --git a/extensions/telegram/src/truncate.test.ts b/extensions/telegram/src/truncate.test.ts new file mode 100644 index 00000000000..2eea1cdea96 --- /dev/null +++ b/extensions/telegram/src/truncate.test.ts @@ -0,0 +1,48 @@ +// Telegram tests cover progress text clipping behavior. +import { describe, expect, it } from "vitest"; +import { clipTelegramProgressText, TELEGRAM_PROGRESS_MAX_CHARS } from "./truncate.js"; + +describe("clipTelegramProgressText", () => { + it("drops a surrogate-pair emoji whole when it straddles the limit", () => { + // πŸ˜€ is U+1F600, encoded as two UTF-16 code units (high \uD83D + low \uDE00). + // Placing the emoji at positions [MAX-2, MAX-1] (0-indexed) puts its high + // surrogate right on the .slice(0, MAX-1) cut edge. A raw .slice keeps only + // \uD83D β€” an unpaired high surrogate β€” which is invalid in a Telegram payload. + const base = "a".repeat(TELEGRAM_PROGRESS_MAX_CHARS - 2); // 298 'a's + const out = clipTelegramProgressText(`${base}πŸ˜€tail`); + expect(out).toBe(`${base}…`); + // No dangling high surrogate (high not followed by a low surrogate). + expect(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])/.test(out)).toBe(false); + }); + + it("keeps an emoji that fits entirely before the cut", () => { + // 296 'a's + 'πŸ˜€' (2 units) + 'xyz' (3 units) = 301 total > 300. + // The emoji sits at [296, 297] β€” entirely before the cut at 299 β€” so it stays. + const base = "a".repeat(TELEGRAM_PROGRESS_MAX_CHARS - 4); // 296 'a's + const out = clipTelegramProgressText(`${base}πŸ˜€xyz`); + expect(out).toBe(`${base}πŸ˜€x…`); + expect(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])/.test(out)).toBe(false); + }); + + it("returns text unchanged when it is within the limit", () => { + const short = "hello πŸ˜€ world"; + expect(clipTelegramProgressText(short)).toBe(short); + }); + + it("trims trailing whitespace before the ellipsis", () => { + // The sliced portion may end in spaces when trailing spaces straddle the cut. + const text = `${"a".repeat(TELEGRAM_PROGRESS_MAX_CHARS - 2)} rest`; + const out = clipTelegramProgressText(text); + expect(out).not.toContain(" …"); + expect(out.endsWith("…")).toBe(true); + }); + + it("handles plain ASCII that fills exactly to the limit", () => { + const exact = "x".repeat(TELEGRAM_PROGRESS_MAX_CHARS); + expect(clipTelegramProgressText(exact)).toBe(exact); + const oneOver = `${"x".repeat(TELEGRAM_PROGRESS_MAX_CHARS)}y`; + const out = clipTelegramProgressText(oneOver); + expect(out.length).toBeLessThanOrEqual(TELEGRAM_PROGRESS_MAX_CHARS); + expect(out.endsWith("…")).toBe(true); + }); +}); diff --git a/extensions/telegram/src/truncate.ts b/extensions/telegram/src/truncate.ts new file mode 100644 index 00000000000..18090e813e8 --- /dev/null +++ b/extensions/telegram/src/truncate.ts @@ -0,0 +1,20 @@ +// Telegram tests cover progress text clipping behavior. +import { sliceUtf16Safe } from "openclaw/plugin-sdk/text-utility-runtime"; + +export const TELEGRAM_PROGRESS_MAX_CHARS = 300; + +/** + * Clips Telegram progress text to at most {@link TELEGRAM_PROGRESS_MAX_CHARS} UTF-16 code units, + * slicing on a code-point boundary so a surrogate pair straddling the limit is + * dropped whole rather than leaving a lone high surrogate in the payload. + */ +export function clipTelegramProgressText(text: string): string { + if (text.length <= TELEGRAM_PROGRESS_MAX_CHARS) { + return text; + } + // Slice on a code-point boundary so an emoji (or any astral character) that + // straddles the limit is dropped whole instead of leaving a lone \uD83D-style + // high surrogate before the ellipsis, which serializes to an invalid character + // in the Telegram Bot API payload. + return `${sliceUtf16Safe(text, 0, TELEGRAM_PROGRESS_MAX_CHARS - 1).trimEnd()}…`; +}