import type { MarkdownTableMode } from "openclaw/plugin-sdk/config-runtime"; import { chunkMarkdownIR, FILE_REF_EXTENSIONS_WITH_TLD, isAutoLinkedFileRef, markdownToIR, type MarkdownLinkSpan, type MarkdownIR, } from "openclaw/plugin-sdk/text-runtime"; import { renderMarkdownWithMarkers } from "openclaw/plugin-sdk/text-runtime"; export type TelegramFormattedChunk = { html: string; text: string; }; function escapeHtml(text: string): string { return text.replace(/&/g, "&").replace(//g, ">"); } function escapeHtmlAttr(text: string): string { return escapeHtml(text).replace(/"/g, """); } /** * File extensions that share TLDs and commonly appear in code/documentation. * These are wrapped in tags to prevent Telegram from generating * spurious domain registrar previews. * * Only includes extensions that are: * 1. Commonly used as file extensions in code/docs * 2. Rarely used as intentional domain references * * Excluded: .ai, .io, .tv, .fm (popular domain TLDs like x.ai, vercel.io, github.io) */ function buildTelegramLink(link: MarkdownLinkSpan, text: string) { const href = link.href.trim(); if (!href) { return null; } if (link.start === link.end) { return null; } // Suppress auto-linkified file references (e.g. README.md → http://README.md) const label = text.slice(link.start, link.end); if (isAutoLinkedFileRef(href, label)) { return null; } const safeHref = escapeHtmlAttr(href); return { start: link.start, end: link.end, open: ``, close: "", }; } function renderTelegramHtml(ir: MarkdownIR): string { return renderMarkdownWithMarkers(ir, { styleMarkers: { bold: { open: "", close: "" }, italic: { open: "", close: "" }, strikethrough: { open: "", close: "" }, code: { open: "", close: "" }, code_block: { open: "
", close: "
" }, spoiler: { open: "", close: "" }, blockquote: { open: "
", close: "
" }, }, escapeText: escapeHtml, buildLink: buildTelegramLink, }); } export function markdownToTelegramHtml( markdown: string, options: { tableMode?: MarkdownTableMode; wrapFileRefs?: boolean } = {}, ): string { const ir = markdownToIR(markdown ?? "", { linkify: true, enableSpoilers: true, headingStyle: "none", blockquotePrefix: "", tableMode: options.tableMode, }); const html = renderTelegramHtml(ir); // Apply file reference wrapping if requested (for chunked rendering) if (options.wrapFileRefs !== false) { return wrapFileReferencesInHtml(html); } return html; } /** * Wraps standalone file references (with TLD extensions) in tags. * This prevents Telegram from treating them as URLs and generating * irrelevant domain registrar previews. * * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes. * Skips content inside ,
, and  tags to avoid nesting issues.
 */
/** Escape regex metacharacters in a string */
function escapeRegex(str: string): string {
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

const AUTO_LINKED_ANCHOR_PATTERN = /]*>\1<\/a>/gi;
const HTML_TAG_PATTERN = /(<\/?)([a-zA-Z][a-zA-Z0-9-]*)\b[^>]*?>/gi;
let fileReferencePattern: RegExp | undefined;
let orphanedTldPattern: RegExp | undefined;

function getFileReferencePattern(): RegExp {
  if (fileReferencePattern) {
    return fileReferencePattern;
  }
  const fileExtensionsPattern = Array.from(FILE_REF_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
  fileReferencePattern = new RegExp(
    `(^|[^a-zA-Z0-9_\\-/])([a-zA-Z0-9_.\\-./]+\\.(?:${fileExtensionsPattern}))(?=$|[^a-zA-Z0-9_\\-/])`,
    "gi",
  );
  return fileReferencePattern;
}

function getOrphanedTldPattern(): RegExp {
  if (orphanedTldPattern) {
    return orphanedTldPattern;
  }
  const fileExtensionsPattern = Array.from(FILE_REF_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
  orphanedTldPattern = new RegExp(
    `([^a-zA-Z0-9]|^)([A-Za-z]\\.(?:${fileExtensionsPattern}))(?=[^a-zA-Z0-9/]|$)`,
    "g",
  );
  return orphanedTldPattern;
}

function wrapStandaloneFileRef(match: string, prefix: string, filename: string): string {
  if (filename.startsWith("//")) {
    return match;
  }
  if (/https?:\/\/$/i.test(prefix)) {
    return match;
  }
  return `${prefix}${escapeHtml(filename)}`;
}

function wrapSegmentFileRefs(
  text: string,
  codeDepth: number,
  preDepth: number,
  anchorDepth: number,
): string {
  if (!text || codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
    return text;
  }
  const wrappedStandalone = text.replace(getFileReferencePattern(), wrapStandaloneFileRef);
  return wrappedStandalone.replace(getOrphanedTldPattern(), (match, prefix: string, tld: string) =>
    prefix === ">" ? match : `${prefix}${escapeHtml(tld)}`,
  );
}

export function wrapFileReferencesInHtml(html: string): string {
  // Safety-net: de-linkify auto-generated anchors where href="http:// fragment, while a longer completed file ref de-linkifies to
  // a shorter ... wrapper. Search exact candidates instead.
  for (let candidateLength = currentTextLength - 1; candidateLength >= 1; candidateLength -= 1) {
    if (renderTelegramChunkHtml(sliceMarkdownIR(chunk, 0, candidateLength)).length <= htmlLimit) {
      return candidateLength;
    }
  }
  return 0;
}

function findMarkdownIRPreservedSplitIndex(text: string, start: number, limit: number): number {
  const maxEnd = Math.min(text.length, start + limit);
  if (maxEnd >= text.length) {
    return text.length;
  }

  let lastOutsideParenNewlineBreak = -1;
  let lastOutsideParenWhitespaceBreak = -1;
  let lastOutsideParenWhitespaceRunStart = -1;
  let lastAnyNewlineBreak = -1;
  let lastAnyWhitespaceBreak = -1;
  let lastAnyWhitespaceRunStart = -1;
  let parenDepth = 0;
  let sawNonWhitespace = false;

  for (let index = start; index < maxEnd; index += 1) {
    const char = text[index];
    if (char === "(") {
      sawNonWhitespace = true;
      parenDepth += 1;
      continue;
    }
    if (char === ")" && parenDepth > 0) {
      sawNonWhitespace = true;
      parenDepth -= 1;
      continue;
    }
    if (!/\s/.test(char)) {
      sawNonWhitespace = true;
      continue;
    }
    if (!sawNonWhitespace) {
      continue;
    }
    if (char === "\n") {
      lastAnyNewlineBreak = index + 1;
      if (parenDepth === 0) {
        lastOutsideParenNewlineBreak = index + 1;
      }
      continue;
    }
    const whitespaceRunStart =
      index === start || !/\s/.test(text[index - 1] ?? "") ? index : lastAnyWhitespaceRunStart;
    lastAnyWhitespaceBreak = index + 1;
    lastAnyWhitespaceRunStart = whitespaceRunStart;
    if (parenDepth === 0) {
      lastOutsideParenWhitespaceBreak = index + 1;
      lastOutsideParenWhitespaceRunStart = whitespaceRunStart;
    }
  }

  const resolveWhitespaceBreak = (breakIndex: number, runStart: number): number => {
    if (breakIndex <= start) {
      return breakIndex;
    }
    if (runStart <= start) {
      return breakIndex;
    }
    return /\s/.test(text[breakIndex] ?? "") ? runStart : breakIndex;
  };

  if (lastOutsideParenNewlineBreak > start) {
    return lastOutsideParenNewlineBreak;
  }
  if (lastOutsideParenWhitespaceBreak > start) {
    return resolveWhitespaceBreak(
      lastOutsideParenWhitespaceBreak,
      lastOutsideParenWhitespaceRunStart,
    );
  }
  if (lastAnyNewlineBreak > start) {
    return lastAnyNewlineBreak;
  }
  if (lastAnyWhitespaceBreak > start) {
    return resolveWhitespaceBreak(lastAnyWhitespaceBreak, lastAnyWhitespaceRunStart);
  }
  return maxEnd;
}

function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): MarkdownIR[] {
  if (!ir.text) {
    return [];
  }
  const normalizedLimit = Math.max(1, Math.floor(limit));
  if (normalizedLimit <= 0 || ir.text.length <= normalizedLimit) {
    return [ir];
  }
  const chunks: MarkdownIR[] = [];
  let cursor = 0;
  while (cursor < ir.text.length) {
    const end = findMarkdownIRPreservedSplitIndex(ir.text, cursor, normalizedLimit);
    chunks.push({
      text: ir.text.slice(cursor, end),
      styles: sliceStyleSpans(ir.styles, cursor, end),
      links: sliceLinkSpans(ir.links, cursor, end),
    });
    cursor = end;
  }
  return chunks;
}

function coalesceWhitespaceOnlyMarkdownIRChunks(chunks: MarkdownIR[], limit: number): MarkdownIR[] {
  const coalesced: MarkdownIR[] = [];
  let index = 0;

  while (index < chunks.length) {
    const chunk = chunks[index];
    if (!chunk) {
      index += 1;
      continue;
    }
    if (chunk.text.trim().length > 0) {
      coalesced.push(chunk);
      index += 1;
      continue;
    }

    const prev = coalesced.at(-1);
    const next = chunks[index + 1];
    const chunkLength = chunk.text.length;

    const canMergePrev = (candidate: MarkdownIR) =>
      renderTelegramChunkHtml(candidate).length <= limit;
    const canMergeNext = (candidate: MarkdownIR) =>
      renderTelegramChunkHtml(candidate).length <= limit;

    if (prev) {
      const mergedPrev = mergeMarkdownIRChunks(prev, chunk);
      if (canMergePrev(mergedPrev)) {
        coalesced[coalesced.length - 1] = mergedPrev;
        index += 1;
        continue;
      }
    }

    if (next) {
      const mergedNext = mergeMarkdownIRChunks(chunk, next);
      if (canMergeNext(mergedNext)) {
        chunks[index + 1] = mergedNext;
        index += 1;
        continue;
      }
    }

    if (prev && next) {
      for (let prefixLength = chunkLength - 1; prefixLength >= 1; prefixLength -= 1) {
        const prefix = sliceMarkdownIR(chunk, 0, prefixLength);
        const suffix = sliceMarkdownIR(chunk, prefixLength, chunkLength);
        const mergedPrev = mergeMarkdownIRChunks(prev, prefix);
        const mergedNext = mergeMarkdownIRChunks(suffix, next);
        if (canMergePrev(mergedPrev) && canMergeNext(mergedNext)) {
          coalesced[coalesced.length - 1] = mergedPrev;
          chunks[index + 1] = mergedNext;
          break;
        }
      }
    }

    index += 1;
  }

  return coalesced;
}

function renderTelegramChunksWithinHtmlLimit(
  ir: MarkdownIR,
  limit: number,
): TelegramFormattedChunk[] {
  const normalizedLimit = Math.max(1, Math.floor(limit));
  const pending = chunkMarkdownIR(ir, normalizedLimit);
  const finalized: MarkdownIR[] = [];
  while (pending.length > 0) {
    const chunk = pending.shift();
    if (!chunk) {
      continue;
    }
    const html = renderTelegramChunkHtml(chunk);
    if (html.length <= normalizedLimit || chunk.text.length <= 1) {
      finalized.push(chunk);
      continue;
    }
    const split = splitTelegramChunkByHtmlLimit(chunk, normalizedLimit);
    if (split.length <= 1) {
      // Worst-case safety: avoid retry loops, deliver the chunk as-is.
      finalized.push(chunk);
      continue;
    }
    pending.unshift(...split);
  }
  return coalesceWhitespaceOnlyMarkdownIRChunks(finalized, normalizedLimit).map((chunk) => ({
    html: renderTelegramChunkHtml(chunk),
    text: chunk.text,
  }));
}

export function markdownToTelegramChunks(
  markdown: string,
  limit: number,
  options: { tableMode?: MarkdownTableMode } = {},
): TelegramFormattedChunk[] {
  const ir = markdownToIR(markdown ?? "", {
    linkify: true,
    enableSpoilers: true,
    headingStyle: "none",
    blockquotePrefix: "",
    tableMode: options.tableMode,
  });
  return renderTelegramChunksWithinHtmlLimit(ir, limit);
}

export function markdownToTelegramHtmlChunks(markdown: string, limit: number): string[] {
  return markdownToTelegramChunks(markdown, limit).map((chunk) => chunk.html);
}