to backtick wrapping", () => {
+ expect(sanitizeForPlainText("foo()")).toBe("`foo()`");
+ });
+
+ // --- block elements -----------------------------------------------------
+
+ it("converts and
to newlines", () => {
+ expect(sanitizeForPlainText("paragraph
")).toBe("\nparagraph\n");
+ });
+
+ it("converts headings to bold text with newlines", () => {
+ expect(sanitizeForPlainText("Title
")).toBe("\n*Title*\n");
+ expect(sanitizeForPlainText("Section
")).toBe("\n*Section*\n");
+ });
+
+ it("converts to bullet points", () => {
+ expect(sanitizeForPlainText(" item one item two ")).toBe(
+ "• item one\n• item two\n",
+ );
+ });
+
+ // --- tag stripping ------------------------------------------------------
+
+ it("strips unknown/remaining tags", () => {
+ expect(sanitizeForPlainText('text')).toBe("text");
+ expect(sanitizeForPlainText('link')).toBe("link");
+ });
+
+ // --- passthrough --------------------------------------------------------
+
+ it("passes through clean text unchanged", () => {
+ expect(sanitizeForPlainText("hello world")).toBe("hello world");
+ });
+
+ it("does not corrupt angle brackets in prose", () => {
+ // `a < b` does not match `` pattern because there is no closing `>`
+ // immediately after a tag-like sequence.
+ expect(sanitizeForPlainText("a < b && c > d")).toBe("a < b && c > d");
+ });
+
+ // --- mixed content ------------------------------------------------------
+
+ it("handles mixed HTML content", () => {
+ const input = "Hello
world this is nice";
+ expect(sanitizeForPlainText(input)).toBe("Hello\n*world* this is _nice_");
+ });
+
+ it("collapses excessive newlines", () => {
+ expect(sanitizeForPlainText("a
b")).toBe("a\n\nb");
+ });
+});
diff --git a/src/infra/outbound/sanitize-text.ts b/src/infra/outbound/sanitize-text.ts
new file mode 100644
index 00000000000..fb6b6abbbc1
--- /dev/null
+++ b/src/infra/outbound/sanitize-text.ts
@@ -0,0 +1,62 @@
+/**
+ * Sanitize model output for plain-text messaging surfaces.
+ *
+ * LLMs occasionally produce HTML tags (`
`, ``, ``, etc.) that render
+ * correctly on web but appear as literal text on WhatsApp, Signal, SMS, and IRC.
+ *
+ * Converts common inline HTML to lightweight-markup equivalents used by
+ * WhatsApp/Signal/Telegram and strips any remaining tags.
+ *
+ * @see https://github.com/openclaw/openclaw/issues/31884
+ * @see https://github.com/openclaw/openclaw/issues/18558
+ */
+
+/** Channels where HTML tags should be converted/stripped. */
+const PLAIN_TEXT_SURFACES = new Set([
+ "whatsapp",
+ "signal",
+ "sms",
+ "irc",
+ "telegram",
+ "imessage",
+ "googlechat",
+]);
+
+/** Returns `true` when the channel cannot render raw HTML. */
+export function isPlainTextSurface(channelId: string): boolean {
+ return PLAIN_TEXT_SURFACES.has(channelId.toLowerCase());
+}
+
+/**
+ * Convert common HTML tags to their plain-text/lightweight-markup equivalents
+ * and strip anything that remains.
+ *
+ * The function is intentionally conservative — it only targets tags that models
+ * are known to produce and avoids false positives on angle brackets in normal
+ * prose (e.g. `a < b`).
+ */
+export function sanitizeForPlainText(text: string): string {
+ return (
+ text
+ // Line breaks
+ .replace(/
/gi, "\n")
+ // Block elements → newlines
+ .replace(/<\/?(p|div)>/gi, "\n")
+ // Bold → WhatsApp/Signal bold
+ .replace(/<(b|strong)>(.*?)<\/\1>/gi, "*$2*")
+ // Italic → WhatsApp/Signal italic
+ .replace(/<(i|em)>(.*?)<\/\1>/gi, "_$2_")
+ // Strikethrough → WhatsApp/Signal strikethrough
+ .replace(/<(s|strike|del)>(.*?)<\/\1>/gi, "~$2~")
+ // Inline code
+ .replace(/(.*?)<\/code>/gi, "`$1`")
+ // Headings → bold text with newline
+ .replace(/]*>(.*?)<\/h[1-6]>/gi, "\n*$1*\n")
+ // List items → bullet points
+ .replace(/]*>(.*?)<\/li>/gi, "• $1\n")
+ // Strip remaining HTML tags (require tag-like structure: )
+ .replace(/<\/?[a-z][a-z0-9]*\b[^>]*>/gi, "")
+ // Collapse 3+ consecutive newlines into 2
+ .replace(/\n{3,}/g, "\n\n")
+ );
+}