Files
openclaw/extensions/imessage/src/markdown-format.ts
Omar Shahine e259751ec9 feat(imessage): private-API support via imsg JSON-RPC [AI-assisted] (#78317)
Merged via squash.

Prepared head SHA: b7d336b296
Co-authored-by: omarshahine <10343873+omarshahine@users.noreply.github.com>
Co-authored-by: omarshahine <10343873+omarshahine@users.noreply.github.com>
Reviewed-by: @omarshahine
2026-05-07 19:20:18 -07:00

155 lines
5.7 KiB
TypeScript

/**
* Convert markdown bold/italic/underline/strikethrough markers in agent text
* into typed-run formatting ranges that the imsg bridge's `sendMessage`
* action understands. Returns the marker-stripped text plus an array of
* ranges keyed by their start in the OUTPUT string.
*
* macOS 15+ recipients render typed runs natively; macOS 14 falls back to
* client-side markdown rendering, so passing both raw markdown and ranges
* would double up — callers should send the stripped `text` only.
*
* Supported markers:
* - `**bold**`
* - `*italic*` / `_italic_` (single-underscore enforces word boundaries)
* - `__underline__` (double-underscore also enforces word boundaries so
* Python identifiers like `__init__` are not mangled)
* - `~~strikethrough~~`
*
* Nesting:
* - `***bold-italic***` is parsed as `**` containing `*italic*`, yielding
* two ranges over the same span (one bold, one italic).
* - Other nested combinations (`**bold _underline_**`, etc.) are
* similarly parsed by recursing into the inner text of every marker
* pair we consume.
*
* Out of scope: escaped markers (`\*literal\*`), code spans (` `code` `),
* and combining-character edge cases. The receiver's iMessage style
* vocabulary covers only bold/italic/underline/strikethrough — there is
* nowhere to render anything fancier, and over-eager parsing would mangle
* plain-text emoji/punctuation that happens to look like markdown.
*/
export type IMessageFormatStyle = "bold" | "italic" | "underline" | "strikethrough";
export type IMessageFormatRange = {
start: number;
length: number;
styles: IMessageFormatStyle[];
};
type Marker = {
marker: string;
styles: IMessageFormatStyle[];
/**
* When true, the marker only counts when both ends sit on a word
* boundary. Single-underscore italics need this so `snake_case_var` is
* left literal, and double-underscore underline needs it so Python
* dunder names like `__init__` are not turned into underline.
*/
requireWordBoundary: boolean;
};
// Order matters: longer/compound markers are tried first.
// - `***...***` is bold+italic over the inner span.
// - `___...___` is underline+italic.
// - `~~`, `**`, `__` cover their own styles.
// - `*` / `_` italic match last (with `_` enforcing word boundaries).
const MARKERS: readonly Marker[] = [
{ marker: "***", styles: ["bold", "italic"], requireWordBoundary: false },
{ marker: "___", styles: ["underline", "italic"], requireWordBoundary: true },
{ marker: "~~", styles: ["strikethrough"], requireWordBoundary: false },
{ marker: "**", styles: ["bold"], requireWordBoundary: false },
{ marker: "__", styles: ["underline"], requireWordBoundary: true },
{ marker: "*", styles: ["italic"], requireWordBoundary: false },
{ marker: "_", styles: ["italic"], requireWordBoundary: true },
];
function tryConsumeMarker(
input: string,
i: number,
m: Marker,
): { close: number; inner: string } | null {
if (!input.startsWith(m.marker, i)) {
return null;
}
// For single-char markers, reject when the next char is the same so we
// don't consume the leading half of a longer marker (e.g. `*` matching
// the first asterisk of `**bold**`).
if (m.marker.length === 1 && input[i + 1] === m.marker) {
return null;
}
// For 2-char markers, reject when there's a third repeat — that's the
// longer compound marker (`***`, `___`) which should match first.
if (m.marker.length === 2 && input[i + 2] === m.marker[0]) {
return null;
}
// For underscore markers we use a stricter rule than CommonMark: the
// OUTSIDE of each marker must be whitespace, start-of-string, or
// end-of-string. That keeps `def __init__(self)` literal (`(` after the
// close is neither whitespace nor end-of-string) while `__under__ and`
// still parses cleanly. Asterisk markers don't need this because they
// don't appear inside identifiers.
const isAtBoundary = (ch: string | undefined): boolean => ch === undefined || /\s/.test(ch);
if (m.requireWordBoundary && i > 0 && !isAtBoundary(input[i - 1])) {
return null;
}
const startInner = i + m.marker.length;
const close = input.indexOf(m.marker, startInner);
if (close === -1 || close === startInner) {
return null;
}
if (m.requireWordBoundary && !isAtBoundary(input[close + m.marker.length])) {
return null;
}
const inner = input.slice(startInner, close);
if (!inner.trim()) {
return null;
}
return { close, inner };
}
function parseInternal(input: string, baseOffset: number, sink: IMessageFormatRange[]): string {
let out = "";
let i = 0;
while (i < input.length) {
let consumed = false;
for (const m of MARKERS) {
const hit = tryConsumeMarker(input, i, m);
if (!hit) {
continue;
}
// Recurse on the inner span so nested markers compose. The inner
// ranges are emitted with offsets relative to the new base.
const innerOffset = baseOffset + out.length;
const innerStripped = parseInternal(hit.inner, innerOffset, sink);
// Compound markers (`***`, `___`) emit multiple styles over the same
// span — push them in order so callers see e.g. italic before bold.
for (const style of m.styles) {
sink.push({
start: innerOffset,
length: innerStripped.length,
styles: [style],
});
}
out += innerStripped;
i = hit.close + m.marker.length;
consumed = true;
break;
}
if (!consumed) {
out += input[i];
i += 1;
}
}
return out;
}
export function extractMarkdownFormatRuns(input: string): {
text: string;
ranges: IMessageFormatRange[];
} {
const ranges: IMessageFormatRange[] = [];
const text = parseInternal(input, 0, ranges);
return { text, ranges };
}