mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 00:30:43 +00:00
Merged via squash.
Prepared head SHA: b7d336b296
Co-authored-by: omarshahine <10343873+omarshahine@users.noreply.github.com>
Co-authored-by: omarshahine <10343873+omarshahine@users.noreply.github.com>
Reviewed-by: @omarshahine
155 lines
5.7 KiB
TypeScript
155 lines
5.7 KiB
TypeScript
/**
|
|
* Convert markdown bold/italic/underline/strikethrough markers in agent text
|
|
* into typed-run formatting ranges that the imsg bridge's `sendMessage`
|
|
* action understands. Returns the marker-stripped text plus an array of
|
|
* ranges keyed by their start in the OUTPUT string.
|
|
*
|
|
* macOS 15+ recipients render typed runs natively; macOS 14 falls back to
|
|
* client-side markdown rendering, so passing both raw markdown and ranges
|
|
* would double up — callers should send the stripped `text` only.
|
|
*
|
|
* Supported markers:
|
|
* - `**bold**`
|
|
* - `*italic*` / `_italic_` (single-underscore enforces word boundaries)
|
|
* - `__underline__` (double-underscore also enforces word boundaries so
|
|
* Python identifiers like `__init__` are not mangled)
|
|
* - `~~strikethrough~~`
|
|
*
|
|
* Nesting:
|
|
* - `***bold-italic***` is parsed as `**` containing `*italic*`, yielding
|
|
* two ranges over the same span (one bold, one italic).
|
|
* - Other nested combinations (`**bold _underline_**`, etc.) are
|
|
* similarly parsed by recursing into the inner text of every marker
|
|
* pair we consume.
|
|
*
|
|
* Out of scope: escaped markers (`\*literal\*`), code spans (` `code` `),
|
|
* and combining-character edge cases. The receiver's iMessage style
|
|
* vocabulary covers only bold/italic/underline/strikethrough — there is
|
|
* nowhere to render anything fancier, and over-eager parsing would mangle
|
|
* plain-text emoji/punctuation that happens to look like markdown.
|
|
*/
|
|
|
|
export type IMessageFormatStyle = "bold" | "italic" | "underline" | "strikethrough";
|
|
|
|
export type IMessageFormatRange = {
|
|
start: number;
|
|
length: number;
|
|
styles: IMessageFormatStyle[];
|
|
};
|
|
|
|
type Marker = {
|
|
marker: string;
|
|
styles: IMessageFormatStyle[];
|
|
/**
|
|
* When true, the marker only counts when both ends sit on a word
|
|
* boundary. Single-underscore italics need this so `snake_case_var` is
|
|
* left literal, and double-underscore underline needs it so Python
|
|
* dunder names like `__init__` are not turned into underline.
|
|
*/
|
|
requireWordBoundary: boolean;
|
|
};
|
|
|
|
// Order matters: longer/compound markers are tried first.
|
|
// - `***...***` is bold+italic over the inner span.
|
|
// - `___...___` is underline+italic.
|
|
// - `~~`, `**`, `__` cover their own styles.
|
|
// - `*` / `_` italic match last (with `_` enforcing word boundaries).
|
|
const MARKERS: readonly Marker[] = [
|
|
{ marker: "***", styles: ["bold", "italic"], requireWordBoundary: false },
|
|
{ marker: "___", styles: ["underline", "italic"], requireWordBoundary: true },
|
|
{ marker: "~~", styles: ["strikethrough"], requireWordBoundary: false },
|
|
{ marker: "**", styles: ["bold"], requireWordBoundary: false },
|
|
{ marker: "__", styles: ["underline"], requireWordBoundary: true },
|
|
{ marker: "*", styles: ["italic"], requireWordBoundary: false },
|
|
{ marker: "_", styles: ["italic"], requireWordBoundary: true },
|
|
];
|
|
|
|
function tryConsumeMarker(
|
|
input: string,
|
|
i: number,
|
|
m: Marker,
|
|
): { close: number; inner: string } | null {
|
|
if (!input.startsWith(m.marker, i)) {
|
|
return null;
|
|
}
|
|
// For single-char markers, reject when the next char is the same so we
|
|
// don't consume the leading half of a longer marker (e.g. `*` matching
|
|
// the first asterisk of `**bold**`).
|
|
if (m.marker.length === 1 && input[i + 1] === m.marker) {
|
|
return null;
|
|
}
|
|
// For 2-char markers, reject when there's a third repeat — that's the
|
|
// longer compound marker (`***`, `___`) which should match first.
|
|
if (m.marker.length === 2 && input[i + 2] === m.marker[0]) {
|
|
return null;
|
|
}
|
|
// For underscore markers we use a stricter rule than CommonMark: the
|
|
// OUTSIDE of each marker must be whitespace, start-of-string, or
|
|
// end-of-string. That keeps `def __init__(self)` literal (`(` after the
|
|
// close is neither whitespace nor end-of-string) while `__under__ and`
|
|
// still parses cleanly. Asterisk markers don't need this because they
|
|
// don't appear inside identifiers.
|
|
const isAtBoundary = (ch: string | undefined): boolean => ch === undefined || /\s/.test(ch);
|
|
if (m.requireWordBoundary && i > 0 && !isAtBoundary(input[i - 1])) {
|
|
return null;
|
|
}
|
|
const startInner = i + m.marker.length;
|
|
const close = input.indexOf(m.marker, startInner);
|
|
if (close === -1 || close === startInner) {
|
|
return null;
|
|
}
|
|
if (m.requireWordBoundary && !isAtBoundary(input[close + m.marker.length])) {
|
|
return null;
|
|
}
|
|
const inner = input.slice(startInner, close);
|
|
if (!inner.trim()) {
|
|
return null;
|
|
}
|
|
return { close, inner };
|
|
}
|
|
|
|
function parseInternal(input: string, baseOffset: number, sink: IMessageFormatRange[]): string {
|
|
let out = "";
|
|
let i = 0;
|
|
while (i < input.length) {
|
|
let consumed = false;
|
|
for (const m of MARKERS) {
|
|
const hit = tryConsumeMarker(input, i, m);
|
|
if (!hit) {
|
|
continue;
|
|
}
|
|
// Recurse on the inner span so nested markers compose. The inner
|
|
// ranges are emitted with offsets relative to the new base.
|
|
const innerOffset = baseOffset + out.length;
|
|
const innerStripped = parseInternal(hit.inner, innerOffset, sink);
|
|
// Compound markers (`***`, `___`) emit multiple styles over the same
|
|
// span — push them in order so callers see e.g. italic before bold.
|
|
for (const style of m.styles) {
|
|
sink.push({
|
|
start: innerOffset,
|
|
length: innerStripped.length,
|
|
styles: [style],
|
|
});
|
|
}
|
|
out += innerStripped;
|
|
i = hit.close + m.marker.length;
|
|
consumed = true;
|
|
break;
|
|
}
|
|
if (!consumed) {
|
|
out += input[i];
|
|
i += 1;
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
export function extractMarkdownFormatRuns(input: string): {
|
|
text: string;
|
|
ranges: IMessageFormatRange[];
|
|
} {
|
|
const ranges: IMessageFormatRange[] = [];
|
|
const text = parseInternal(input, 0, ranges);
|
|
return { text, ranges };
|
|
}
|