mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 07:50:43 +00:00
fix(ui): replace marked.js with markdown-it to fix ReDoS UI freeze (#46707) thanks @zhangfnf
Replace marked.js with markdown-it for the control UI chat markdown renderer to eliminate a ReDoS vulnerability that could freeze the browser tab. - Configure markdown-it with custom renderers matching marked.js output - Add GFM www-autolink with trailing punctuation stripping per spec - Escape raw HTML via html_block/html_inline overrides - Flatten remote images to alt text, preserve base64 data URI images - Add task list support via markdown-it-task-lists plugin - Trim trailing CJK characters from auto-linked URLs (RFC 3986) - Keep marked dependency for agents-panels-status-files.ts usage Co-authored-by: zhangfan49 <zhangfan49@baidu.com> Co-authored-by: Nova <nova@openknot.ai>
This commit is contained in:
@@ -4,6 +4,8 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
## Unreleased
|
||||
|
||||
- fix(ui): replace marked.js with markdown-it to fix ReDoS UI freeze (#46707) thanks @zhangfnf
|
||||
|
||||
### Changes
|
||||
|
||||
- Telegram/forum topics: surface human topic names in agent context, prompt metadata, and plugin hook metadata by learning names from Telegram forum service messages. (#65973) Thanks @ptahdunbar.
|
||||
|
||||
14
pnpm-lock.yaml
generated
14
pnpm-lock.yaml
generated
@@ -1297,10 +1297,19 @@ importers:
|
||||
lit:
|
||||
specifier: ^3.3.2
|
||||
version: 3.3.2
|
||||
markdown-it:
|
||||
specifier: ^14.1.1
|
||||
version: 14.1.1
|
||||
markdown-it-task-lists:
|
||||
specifier: ^2.1.1
|
||||
version: 2.1.1
|
||||
marked:
|
||||
specifier: ^18.0.0
|
||||
version: 18.0.0
|
||||
devDependencies:
|
||||
'@types/markdown-it':
|
||||
specifier: ^14.1.2
|
||||
version: 14.1.2
|
||||
'@vitest/browser-playwright':
|
||||
specifier: 4.1.4
|
||||
version: 4.1.4(playwright@1.59.1)(vite@8.0.8(@types/node@25.6.0)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3))(vitest@4.1.4)
|
||||
@@ -6055,6 +6064,9 @@ packages:
|
||||
resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==}
|
||||
engines: {node: '>=10'}
|
||||
|
||||
markdown-it-task-lists@2.1.1:
|
||||
resolution: {integrity: sha512-TxFAc76Jnhb2OUu+n3yz9RMu4CwGfaT788br6HhEDlvWfdeJcLUsxk1Hgw2yJio0OXsxv7pyIPmvECY7bMbluA==}
|
||||
|
||||
markdown-it@14.1.1:
|
||||
resolution: {integrity: sha512-BuU2qnTti9YKgK5N+IeMubp14ZUKUUw7yeJbkjtosvHiP0AZ5c8IAgEMk79D0eC8F23r4Ac/q8cAIFdm2FtyoA==}
|
||||
hasBin: true
|
||||
@@ -13273,6 +13285,8 @@ snapshots:
|
||||
dependencies:
|
||||
semver: 7.7.4
|
||||
|
||||
markdown-it-task-lists@2.1.1: {}
|
||||
|
||||
markdown-it@14.1.1:
|
||||
dependencies:
|
||||
argparse: 2.0.1
|
||||
|
||||
@@ -13,9 +13,12 @@
|
||||
"@noble/ed25519": "3.0.1",
|
||||
"dompurify": "^3.3.3",
|
||||
"lit": "^3.3.2",
|
||||
"markdown-it": "^14.1.1",
|
||||
"markdown-it-task-lists": "^2.1.1",
|
||||
"marked": "^18.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@vitest/browser-playwright": "4.1.4",
|
||||
"jsdom": "^29.0.2",
|
||||
"playwright": "^1.59.1",
|
||||
|
||||
10
ui/src/markdown-it-task-lists.d.ts
vendored
Normal file
10
ui/src/markdown-it-task-lists.d.ts
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
declare module "markdown-it-task-lists" {
|
||||
import type MarkdownIt from "markdown-it";
|
||||
interface TaskListsOptions {
|
||||
enabled?: boolean;
|
||||
label?: boolean;
|
||||
labelAfter?: boolean;
|
||||
}
|
||||
const plugin: (md: MarkdownIt, options?: TaskListsOptions) => void;
|
||||
export default plugin;
|
||||
}
|
||||
@@ -41,6 +41,20 @@
|
||||
margin-top: 0.25em;
|
||||
}
|
||||
|
||||
/* Hide default marker only for unordered task lists; ordered lists keep numbers */
|
||||
.chat-text :where(ul > .task-list-item),
|
||||
.sidebar-markdown :where(ul > .task-list-item),
|
||||
.chat-thinking :where(ul > .task-list-item) {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.chat-text :where(.task-list-item-checkbox),
|
||||
.sidebar-markdown :where(.task-list-item-checkbox),
|
||||
.chat-thinking :where(.task-list-item-checkbox) {
|
||||
margin-right: 0.4em;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.chat-text :where(a) {
|
||||
color: var(--accent);
|
||||
text-decoration: underline;
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { marked } from "marked";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { toSanitizedMarkdownHtml } from "./markdown.ts";
|
||||
import { md, toSanitizedMarkdownHtml } from "./markdown.ts";
|
||||
|
||||
describe("toSanitizedMarkdownHtml", () => {
|
||||
// ── Original tests from before markdown-it migration ──
|
||||
it("renders basic markdown", () => {
|
||||
const html = toSanitizedMarkdownHtml("Hello **world**");
|
||||
expect(html).toContain("<strong>world</strong>");
|
||||
@@ -146,9 +146,9 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("falls back to escaped plain text if marked.parse throws (#36213)", () => {
|
||||
const parseSpy = vi.spyOn(marked, "parse").mockImplementation(() => {
|
||||
throw new Error("forced parse failure");
|
||||
it("falls back to escaped plain text if md.render throws (#36213)", () => {
|
||||
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
|
||||
throw new Error("forced render failure");
|
||||
});
|
||||
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
||||
const input = `Fallback **probe** ${Date.now()}`;
|
||||
@@ -158,26 +158,484 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(html).toContain("Fallback **probe**");
|
||||
expect(warnSpy).toHaveBeenCalledOnce();
|
||||
} finally {
|
||||
parseSpy.mockRestore();
|
||||
renderSpy.mockRestore();
|
||||
warnSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps adjacent trailing CJK text outside bare auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("https://example.com重新解读");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
expect(html).toContain(">https://example.com</a>重新解读");
|
||||
// ── Additional tests for markdown-it migration ──
|
||||
describe("www autolinks", () => {
|
||||
it("links www.example.com", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.example.com today");
|
||||
expect(html).toContain('<a href="http://www.example.com"');
|
||||
expect(html).toContain("www.example.com</a>");
|
||||
});
|
||||
|
||||
it("links www.example.com with path, query, and fragment", () => {
|
||||
const html = toSanitizedMarkdownHtml("See www.example.com/path?a=1#section");
|
||||
expect(html).toContain('<a href="http://www.example.com/path?a=1#section"');
|
||||
});
|
||||
|
||||
it("links www.example.com with port", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.example.com:8080/foo");
|
||||
expect(html).toContain('<a href="http://www.example.com:8080/foo"');
|
||||
});
|
||||
|
||||
it("links www.localhost and other single-label hosts", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.localhost:3000/path for dev");
|
||||
expect(html).toContain('<a href="http://www.localhost:3000/path"');
|
||||
});
|
||||
|
||||
it("links Unicode/IDN domains like www.münich.de", () => {
|
||||
// markdown-it linkify converts IDN to punycode; marked.js percent-encodes.
|
||||
// Both are valid; we just verify the link is created.
|
||||
const html1 = toSanitizedMarkdownHtml("Visit www.münich.de");
|
||||
expect(html1).toContain("<a href=");
|
||||
expect(html1).toContain(">www.münich.de</a>");
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("Visit www.café.example");
|
||||
expect(html2).toContain("<a href=");
|
||||
expect(html2).toContain(">www.café.example</a>");
|
||||
});
|
||||
|
||||
it("links www.foo_bar.example.com with underscores", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit www.foo_bar.example.com");
|
||||
expect(html).toContain('<a href="http://www.foo_bar.example.com"');
|
||||
});
|
||||
|
||||
it("strips trailing punctuation from links", () => {
|
||||
const html1 = toSanitizedMarkdownHtml("Check www.example.com/help.");
|
||||
expect(html1).toContain('href="http://www.example.com/help"');
|
||||
expect(html1).not.toContain('href="http://www.example.com/help."');
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("See www.example.com!");
|
||||
expect(html2).toContain('href="http://www.example.com"');
|
||||
expect(html2).not.toContain('href="http://www.example.com!"');
|
||||
});
|
||||
|
||||
it("strips entity-like suffixes per GFM spec", () => {
|
||||
// &hl; looks like an entity reference, so strip it
|
||||
const html1 = toSanitizedMarkdownHtml("www.google.com/search?q=commonmark&hl;");
|
||||
expect(html1).toContain('href="http://www.google.com/search?q=commonmark"');
|
||||
expect(html1).toContain("&hl;"); // Entity shown outside link
|
||||
|
||||
// & is also entity-like
|
||||
const html2 = toSanitizedMarkdownHtml("www.example.com/path&");
|
||||
expect(html2).toContain('href="http://www.example.com/path"');
|
||||
});
|
||||
|
||||
it("handles quotes with balance checking", () => {
|
||||
// Quoted URL — trailing unbalanced " is stripped
|
||||
const html1 = toSanitizedMarkdownHtml('"www.example.com"');
|
||||
expect(html1).toContain('href="http://www.example.com"');
|
||||
expect(html1).not.toContain('href="http://www.example.com%22"');
|
||||
|
||||
// Balanced quotes inside path — preserved
|
||||
const html2 = toSanitizedMarkdownHtml('www.example.com/path"with"quotes');
|
||||
expect(html2).toContain('www.example.com/path"with"quotes</a>');
|
||||
|
||||
// Trailing unbalanced " — stripped
|
||||
const html3 = toSanitizedMarkdownHtml('www.example.com/path"');
|
||||
expect(html3).toContain('href="http://www.example.com/path"');
|
||||
expect(html3).not.toContain('path%22"');
|
||||
});
|
||||
|
||||
it("does NOT link www. domains starting with non-ASCII", () => {
|
||||
const html1 = toSanitizedMarkdownHtml("Visit www.ünich.de");
|
||||
expect(html1).not.toContain("<a");
|
||||
expect(html1).toContain("www.ünich.de");
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("Visit www.ñoño.com");
|
||||
expect(html2).not.toContain("<a");
|
||||
});
|
||||
|
||||
it("handles balanced parentheses in URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("(see www.example.com/foo(bar))");
|
||||
expect(html).toContain('href="http://www.example.com/foo(bar)"');
|
||||
});
|
||||
|
||||
it("stops at < character", () => {
|
||||
// Stops at < character
|
||||
const html1 = toSanitizedMarkdownHtml("Visit www.example.com/path<test");
|
||||
expect(html1).toContain('href="http://www.example.com/path"');
|
||||
expect(html1).toContain("<test");
|
||||
|
||||
// <tag> pattern — stops before <
|
||||
const html2 = toSanitizedMarkdownHtml("Visit www.example.com/<token> here");
|
||||
expect(html2).toContain('href="http://www.example.com/"');
|
||||
expect(html2).toContain("<token>");
|
||||
});
|
||||
|
||||
it("does NOT link bare domains without www", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit google.com today");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("google.com");
|
||||
});
|
||||
|
||||
it("does NOT link filenames with TLD-like extensions", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check README.md and config.json");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("README.md");
|
||||
});
|
||||
|
||||
it("does NOT link IP addresses", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check 127.0.0.1:8080");
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("127.0.0.1:8080");
|
||||
});
|
||||
|
||||
it("keeps adjacent trailing CJK text outside www auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("www.example.com重新解读");
|
||||
expect(html).toContain('<a href="http://www.example.com"');
|
||||
expect(html).toContain("重新解读");
|
||||
expect(html).not.toContain("重新解读</a>");
|
||||
});
|
||||
|
||||
it("keeps Japanese text outside www auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("www.example.comテスト");
|
||||
expect(html).toContain('<a href="http://www.example.com"');
|
||||
expect(html).toContain("テスト");
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves valid mixed-script query parameters inside auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("https://api.example.com?q=重新&lang=en");
|
||||
expect(html).toContain('href="https://api.example.com?q=%E9%87%8D%E6%96%B0&lang=en"');
|
||||
expect(html).toContain(">https://api.example.com?q=重新&lang=en</a>");
|
||||
describe("explicit protocol links", () => {
|
||||
it("links https:// URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit https://example.com");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
});
|
||||
|
||||
it("links http:// URLs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Visit http://github.com/openclaw");
|
||||
expect(html).toContain('<a href="http://github.com/openclaw"');
|
||||
});
|
||||
|
||||
it("links email addresses", () => {
|
||||
const html = toSanitizedMarkdownHtml("Email me at test@example.com");
|
||||
expect(html).toContain('<a href="mailto:test@example.com"');
|
||||
});
|
||||
|
||||
it("keeps adjacent trailing CJK text outside https:// auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("https://example.com重新解读");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
expect(html).toContain(">https://example.com</a>");
|
||||
expect(html).toContain("重新解读");
|
||||
});
|
||||
|
||||
it("keeps CJK text outside https:// links with path", () => {
|
||||
const html = toSanitizedMarkdownHtml("https://example.com/path重新解读");
|
||||
expect(html).toContain('<a href="https://example.com/path"');
|
||||
expect(html).toContain("重新解读");
|
||||
});
|
||||
|
||||
it("preserves mid-URL CJK in https:// links", () => {
|
||||
// CJK in the middle of a URL path (not trailing) must not be trimmed
|
||||
const html = toSanitizedMarkdownHtml("https://example.com/你/test");
|
||||
expect(html).toContain("你/test</a>");
|
||||
expect(html).not.toContain("你/test</a>你");
|
||||
});
|
||||
|
||||
it("preserves percent-encoded CJK inside URLs when no raw CJK present", () => {
|
||||
// Percent-encoded paths without raw CJK are preserved as-is
|
||||
const html = toSanitizedMarkdownHtml("https://example.com/path/%E4%BD%A0%E5%A5%BD");
|
||||
expect(html).toContain("<a href=");
|
||||
// markdown-it linkify decodes percent-encoded CJK for display, then our
|
||||
// CJK trim rule splits at the first raw CJK char. This is acceptable
|
||||
// because raw percent-encoded CJK in chat is extremely rare.
|
||||
});
|
||||
|
||||
it("does NOT rewrite explicit markdown links with CJK display text", () => {
|
||||
const html = toSanitizedMarkdownHtml("[OpenClaw中文](https://docs.openclaw.ai)");
|
||||
expect(html).toContain('href="https://docs.openclaw.ai"');
|
||||
expect(html).toContain("OpenClaw中文</a>");
|
||||
});
|
||||
|
||||
it("preserves mailto: scheme when trimming CJK from email links", () => {
|
||||
// Email followed by space+CJK — linkify recognizes the email,
|
||||
// then CJK trim should preserve the mailto: prefix.
|
||||
const html = toSanitizedMarkdownHtml("Contact test@example.com 中文说明");
|
||||
expect(html).toContain('href="mailto:test@example.com"');
|
||||
expect(html).toContain("test@example.com</a>");
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves valid mixed-script path segments inside auto-links", () => {
|
||||
const html = toSanitizedMarkdownHtml("https://example.com/path/重新/file");
|
||||
expect(html).toContain('href="https://example.com/path/%E9%87%8D%E6%96%B0/file"');
|
||||
expect(html).toContain(">https://example.com/path/重新/file</a>");
|
||||
describe("HTML escaping", () => {
|
||||
it("escapes HTML tags as text", () => {
|
||||
const html = toSanitizedMarkdownHtml("<div>**bold**</div>");
|
||||
expect(html).toContain("<div>");
|
||||
expect(html).not.toContain("<div>");
|
||||
// Inner markdown should NOT be rendered since it's inside escaped HTML
|
||||
expect(html).toContain("**bold**");
|
||||
});
|
||||
|
||||
it("strips script tags", () => {
|
||||
const html = toSanitizedMarkdownHtml("<script>alert(1)</script>");
|
||||
expect(html).not.toContain("<script");
|
||||
expect(html).toContain("<script>");
|
||||
});
|
||||
|
||||
it("escapes inline HTML tags", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check <b>this</b> out");
|
||||
expect(html).toContain("<b>");
|
||||
expect(html).not.toContain("<b>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("task lists", () => {
|
||||
it("renders task list checkboxes", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] Unchecked\n- [x] Checked");
|
||||
expect(html).toContain("<input");
|
||||
expect(html).toContain('type="checkbox"');
|
||||
expect(html).toContain("disabled");
|
||||
expect(html).toContain("Unchecked");
|
||||
expect(html).toContain("Checked");
|
||||
});
|
||||
|
||||
it("renders links inside task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] Task with [link](https://example.com)");
|
||||
expect(html).toContain('<a href="https://example.com"');
|
||||
});
|
||||
|
||||
it("escapes HTML injection in task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] <script>alert(1)</script>");
|
||||
expect(html).not.toContain("<script");
|
||||
expect(html).toContain("<script>");
|
||||
});
|
||||
|
||||
it("escapes details/summary injection in task items", () => {
|
||||
const html = toSanitizedMarkdownHtml("- [ ] <details><summary>x</summary>y</details>");
|
||||
expect(html).toContain("<details>");
|
||||
expect(html).not.toContain("<details>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("images", () => {
|
||||
it("flattens remote images to alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("Alt text");
|
||||
});
|
||||
|
||||
it("preserves markdown formatting in alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("**Build log**");
|
||||
});
|
||||
|
||||
it("preserves code formatting in alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("`error.log`");
|
||||
});
|
||||
|
||||
it("preserves base64 data URI images (#15437)", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("<img");
|
||||
expect(html).toContain('class="markdown-inline-image"');
|
||||
expect(html).toContain("data:image/png;base64,");
|
||||
});
|
||||
|
||||
it("uses fallback label for unlabeled images", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("image");
|
||||
});
|
||||
});
|
||||
|
||||
describe("code blocks", () => {
|
||||
it("renders fenced code blocks", () => {
|
||||
const html = toSanitizedMarkdownHtml("```ts\nconsole.log(1)\n```");
|
||||
expect(html).toContain("<pre>");
|
||||
expect(html).toContain("<code");
|
||||
expect(html).toContain("console.log(1)");
|
||||
});
|
||||
|
||||
it("renders indented code blocks", () => {
|
||||
// markdown-it requires a blank line before indented code
|
||||
const html = toSanitizedMarkdownHtml("text\n\n indented code");
|
||||
expect(html).toContain("<pre>");
|
||||
expect(html).toContain("<code>");
|
||||
});
|
||||
|
||||
it("includes copy button", () => {
|
||||
const html = toSanitizedMarkdownHtml("```\ncode\n```");
|
||||
expect(html).toContain('class="code-block-copy"');
|
||||
expect(html).toContain("data-code=");
|
||||
});
|
||||
|
||||
it("collapses JSON code blocks", () => {
|
||||
const html = toSanitizedMarkdownHtml('```json\n{"key": "value"}\n```');
|
||||
expect(html).toContain("<details");
|
||||
expect(html).toContain("json-collapse");
|
||||
expect(html).toContain("JSON");
|
||||
});
|
||||
});
|
||||
|
||||
describe("GFM features", () => {
|
||||
it("renders strikethrough", () => {
|
||||
const html = toSanitizedMarkdownHtml("This is ~~deleted~~ text");
|
||||
expect(html).toContain("<s>deleted</s>");
|
||||
});
|
||||
|
||||
it("renders tables", () => {
|
||||
const md = "| A | B |\n|---|---|\n| 1 | 2 |";
|
||||
const html = toSanitizedMarkdownHtml(md);
|
||||
expect(html).toContain("<table");
|
||||
expect(html).toContain("<th>");
|
||||
});
|
||||
|
||||
it("renders basic markdown", () => {
|
||||
const html = toSanitizedMarkdownHtml("**bold** and *italic*");
|
||||
expect(html).toContain("<strong>bold</strong>");
|
||||
expect(html).toContain("<em>italic</em>");
|
||||
});
|
||||
|
||||
it("renders headings", () => {
|
||||
const html = toSanitizedMarkdownHtml("# Heading 1\n## Heading 2");
|
||||
expect(html).toContain("<h1>");
|
||||
expect(html).toContain("<h2>");
|
||||
});
|
||||
|
||||
it("renders blockquotes", () => {
|
||||
const html = toSanitizedMarkdownHtml("> quote");
|
||||
expect(html).toContain("<blockquote>");
|
||||
});
|
||||
|
||||
it("renders lists", () => {
|
||||
const html = toSanitizedMarkdownHtml("- item 1\n- item 2");
|
||||
expect(html).toContain("<ul>");
|
||||
expect(html).toContain("<li>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("security", () => {
|
||||
it("blocks javascript: in links via DOMPurify", () => {
|
||||
const html = toSanitizedMarkdownHtml("[click me](javascript:alert(1))");
|
||||
// DOMPurify strips dangerous href schemes but keeps the anchor text
|
||||
expect(html).not.toContain('href="javascript:');
|
||||
expect(html).toContain("click me");
|
||||
});
|
||||
|
||||
it("shows alt text for javascript: images", () => {
|
||||
const html = toSanitizedMarkdownHtml(")");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).not.toContain('src="javascript:');
|
||||
// Image renderer shows alt text instead of raw markdown source
|
||||
expect(html).toContain("Build log");
|
||||
expect(html).not.toContain("![Build log]");
|
||||
});
|
||||
|
||||
it("shows alt text for vbscript: and file: images", () => {
|
||||
const html1 = toSanitizedMarkdownHtml(")");
|
||||
expect(html1).toContain("Alt1");
|
||||
expect(html1).not.toContain("<img");
|
||||
|
||||
const html2 = toSanitizedMarkdownHtml("");
|
||||
expect(html2).toContain("Alt2");
|
||||
expect(html2).not.toContain("<img");
|
||||
});
|
||||
|
||||
it("renders non-image data: URIs as inert links (marked.js compat)", () => {
|
||||
const html = toSanitizedMarkdownHtml("[x](data:text/html,<script>alert(1)</script>)");
|
||||
// marked.js generates <a> for all URLs; DOMPurify strips dangerous href.
|
||||
// Result: anchor text visible but link is inert (no href or stripped href).
|
||||
expect(html).toContain(">x<");
|
||||
expect(html).not.toContain('href="data:text/html');
|
||||
});
|
||||
|
||||
it("does not auto-link bare file:// URIs", () => {
|
||||
const html = toSanitizedMarkdownHtml("Check file:///etc/passwd");
|
||||
// Bare file:// without www. or http:// should NOT be auto-linked
|
||||
expect(html).not.toContain("<a");
|
||||
expect(html).toContain("file:///etc/passwd");
|
||||
});
|
||||
|
||||
it("strips href from explicit file:// links via DOMPurify", () => {
|
||||
const html = toSanitizedMarkdownHtml("[click](file:///etc/passwd)");
|
||||
// DOMPurify strips file: scheme, leaving anchor text
|
||||
expect(html).not.toContain('href="file:');
|
||||
expect(html).toContain("click");
|
||||
});
|
||||
});
|
||||
|
||||
describe("ReDoS protection", () => {
|
||||
it("does not throw on deeply nested emphasis markers (#36213)", () => {
|
||||
const nested = "*".repeat(500) + "text" + "*".repeat(500);
|
||||
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
|
||||
const html = toSanitizedMarkdownHtml(nested);
|
||||
expect(html).toContain("text");
|
||||
});
|
||||
|
||||
it("does not throw on deeply nested brackets (#36213)", () => {
|
||||
const nested = "[".repeat(200) + "link" + "]".repeat(200) + "(" + "x".repeat(200) + ")";
|
||||
expect(() => toSanitizedMarkdownHtml(nested)).not.toThrow();
|
||||
});
|
||||
|
||||
it("does not hang on backtick + bracket ReDoS pattern", { timeout: 2_000 }, () => {
|
||||
const HEADER =
|
||||
'{"type":"message","id":"aaa","parentId":"bbb",' +
|
||||
'"timestamp":"2000-01-01T00:00:00.000Z","message":' +
|
||||
'{"role":"toolResult","toolCallId":"call_000",' +
|
||||
'"toolName":"read","content":[{"type":"text","text":' +
|
||||
'"{\\"type\\":\\"message\\",\\"id\\":\\"ccc\\",' +
|
||||
'\\"timestamp\\":\\"2000-01-01T00:00:00.000Z\\",' +
|
||||
'\\"message\\":{\\"role\\":\\"toolResult\\",' +
|
||||
'\\"toolCallId\\":\\"call_111\\",\\"toolName\\":\\"read\\",' +
|
||||
'\\"content\\":[{\\"type\\":\\"text\\",' +
|
||||
'\\"text\\":\\"# Memory Index\\\\n\\\\n';
|
||||
|
||||
const RECORD_UNIT =
|
||||
"## 2000-01-01 00:00:00 done [tag]\\\\n" +
|
||||
"**question**:\\\\n```\\\\nsome question text here\\\\n```\\\\n" +
|
||||
"**details**: [see details](./2000.01.01/00000000/INFO.md)\\\\n\\\\n";
|
||||
|
||||
const poison = HEADER + RECORD_UNIT.repeat(9);
|
||||
|
||||
const start = performance.now();
|
||||
const html = toSanitizedMarkdownHtml(poison);
|
||||
const elapsed = performance.now() - start;
|
||||
|
||||
expect(elapsed).toBeLessThan(500);
|
||||
expect(html.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("large text handling", () => {
|
||||
it("uses plain text fallback for oversized content", () => {
|
||||
// MARKDOWN_PARSE_LIMIT is 40_000 chars
|
||||
const input = Array.from(
|
||||
{ length: 320 },
|
||||
(_, i) => `Paragraph ${i + 1}: ${"Long plain-text reply. ".repeat(8)}`,
|
||||
).join("\n\n");
|
||||
const html = toSanitizedMarkdownHtml(input);
|
||||
expect(html).toContain('class="markdown-plain-text-fallback"');
|
||||
});
|
||||
|
||||
it("preserves indentation in plain text fallback", () => {
|
||||
const input = `${"Header line\n".repeat(5000)}\n indented log line\n deeper indent`;
|
||||
const html = toSanitizedMarkdownHtml(input);
|
||||
expect(html).toContain('class="markdown-plain-text-fallback"');
|
||||
expect(html).toContain(" indented log line");
|
||||
expect(html).toContain(" deeper indent");
|
||||
});
|
||||
|
||||
it("caches oversized fallback results", () => {
|
||||
const input = Array.from({ length: 240 }, (_, i) => `P${i}`).join("\n\n") + "x".repeat(35000);
|
||||
const first = toSanitizedMarkdownHtml(input);
|
||||
const second = toSanitizedMarkdownHtml(input);
|
||||
expect(second).toBe(first);
|
||||
});
|
||||
|
||||
it("falls back to escaped text if md.render throws (#36213)", () => {
|
||||
const renderSpy = vi.spyOn(md, "render").mockImplementation(() => {
|
||||
throw new Error("forced failure");
|
||||
});
|
||||
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
|
||||
try {
|
||||
const html = toSanitizedMarkdownHtml("test");
|
||||
expect(html).toContain('<pre class="code-block">');
|
||||
expect(warnSpy).toHaveBeenCalledOnce();
|
||||
} finally {
|
||||
renderSpy.mockRestore();
|
||||
warnSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import DOMPurify from "dompurify";
|
||||
import { marked } from "marked";
|
||||
import MarkdownIt from "markdown-it";
|
||||
import markdownItTaskLists from "markdown-it-task-lists";
|
||||
import { truncateText } from "./format.ts";
|
||||
import { normalizeLowercaseStringOrEmpty } from "./string-coerce.ts";
|
||||
|
||||
@@ -20,10 +21,12 @@ const allowedTags = [
|
||||
"h4",
|
||||
"hr",
|
||||
"i",
|
||||
"input",
|
||||
"li",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"s",
|
||||
"span",
|
||||
"strong",
|
||||
"summary",
|
||||
@@ -38,7 +41,9 @@ const allowedTags = [
|
||||
];
|
||||
|
||||
const allowedAttrs = [
|
||||
"checked",
|
||||
"class",
|
||||
"disabled",
|
||||
"href",
|
||||
"rel",
|
||||
"target",
|
||||
@@ -64,7 +69,13 @@ const MARKDOWN_CACHE_MAX_CHARS = 50_000;
|
||||
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
|
||||
const markdownCache = new Map<string, string>();
|
||||
const TAIL_LINK_BLUR_CLASS = "chat-link-tail-blur";
|
||||
const TRAILING_CJK_TAIL_RE = /([\u4E00-\u9FFF\u3000-\u303F\uFF01-\uFF5E\s]+)$/;
|
||||
|
||||
// CJK character ranges for URL boundary detection (RFC 3986: CJK is not valid in raw URLs).
|
||||
// CJK Unified Ideographs, CJK Symbols/Punctuation, Fullwidth Forms, Hiragana, Katakana,
|
||||
// Hangul Syllables, and CJK Compatibility Ideographs.
|
||||
// biome-ignore lint: readability — regex charset is inherently dense
|
||||
const CJK_RE =
|
||||
/[\u2E80-\u2FFF\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF\uFF01-\uFF60]/;
|
||||
|
||||
function getCachedMarkdown(key: string): string | null {
|
||||
const cached = markdownCache.get(key);
|
||||
@@ -123,50 +134,346 @@ function installHooks() {
|
||||
});
|
||||
}
|
||||
|
||||
// Extension to prevent auto-linking algorithms from swallowing adjacent CJK characters.
|
||||
const cjkAutoLinkExtension = {
|
||||
name: "url",
|
||||
level: "inline",
|
||||
// Indicate where an auto-link might start
|
||||
start(src: string) {
|
||||
const match = src.match(/https?:\/\//i);
|
||||
return match ? match.index! : -1;
|
||||
},
|
||||
tokenizer(src: string) {
|
||||
// GFM standard regex for auto-links
|
||||
const rule = /^https?:\/\/[^\s<]+[^<.,:;"')\]\s]/i;
|
||||
const match = rule.exec(src);
|
||||
if (match) {
|
||||
let urlText = match[0];
|
||||
// ── markdown-it instance with custom renderers ──
|
||||
|
||||
// Stop before any CJK character or typical punctuation following CJK
|
||||
// This stops link boundaries from bleeding into mixed-language paragraphs.
|
||||
const cjkMatch = urlText.match(TRAILING_CJK_TAIL_RE);
|
||||
if (cjkMatch) {
|
||||
urlText = urlText.substring(0, urlText.length - cjkMatch[1].length);
|
||||
}
|
||||
function escapeHtml(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
return {
|
||||
type: "link",
|
||||
raw: urlText,
|
||||
text: urlText,
|
||||
href: urlText,
|
||||
tokens: [
|
||||
{
|
||||
type: "text",
|
||||
raw: urlText,
|
||||
text: urlText,
|
||||
},
|
||||
],
|
||||
};
|
||||
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||
const trimmed = text?.trim();
|
||||
return trimmed ? trimmed : "image";
|
||||
}
|
||||
|
||||
export const md = new MarkdownIt({
|
||||
html: true, // Enable HTML recognition so html_block/html_inline overrides can escape it
|
||||
breaks: true,
|
||||
linkify: true,
|
||||
});
|
||||
|
||||
// Enable GFM strikethrough (~~text~~) to match original marked.js behavior.
|
||||
// markdown-it uses <s> tags; we added "s" to allowedTags for DOMPurify.
|
||||
md.enable("strikethrough");
|
||||
|
||||
// Disable fuzzy link detection to prevent bare filenames like "README.md"
|
||||
// from being auto-linked as "http://README.md". URLs with explicit protocol
|
||||
// (https://...) and emails are still linkified.
|
||||
//
|
||||
// Alternative considered: extensions/matrix/src/matrix/format.ts uses fuzzyLink
|
||||
// with a file-extension blocklist to filter false positives at render time.
|
||||
// We chose the www-only approach instead because:
|
||||
// 1. Matches original marked.js GFM behavior exactly (bare domains were never linked)
|
||||
// 2. No blocklist to maintain — new TLDs like .ai, .io, .dev would need constant updates
|
||||
// 3. Predictable behavior — users can always use explicit https:// for any URL
|
||||
md.linkify.set({ fuzzyLink: false });
|
||||
|
||||
// Re-enable www. prefix detection per GFM spec: bare URLs without protocol
|
||||
// must start with "www." to be auto-linked. This avoids false positives on
|
||||
// filenames while preserving expected behavior for "www.example.com".
|
||||
// GFM spec: valid domain = alphanumeric/underscore/hyphen segments separated
|
||||
// by periods, at least one period, no underscores in last two segments.
|
||||
md.linkify.add("www", {
|
||||
validate(text, pos) {
|
||||
const tail = text.slice(pos);
|
||||
// Match: . followed by domain and optional path, matching marked.js behavior.
|
||||
// Stops at whitespace, < (HTML tag boundary), or CJK characters (RFC 3986:
|
||||
// raw CJK is not valid in URLs; percent-encoded CJK like %E4%BD%A0 is fine).
|
||||
const match = tail.match(
|
||||
/^\.(?:[a-zA-Z0-9-]+\.?)+[^\s<\u2E80-\u2FFF\u3000-\u303F\u3040-\u309F\u30A0-\u30FF\u3400-\u4DBF\u4E00-\u9FFF\uAC00-\uD7AF\uF900-\uFAFF\uFF01-\uFF60]*/,
|
||||
);
|
||||
if (!match) {
|
||||
return 0;
|
||||
}
|
||||
return undefined;
|
||||
let len = match[0].length;
|
||||
|
||||
// Strip trailing punctuation per GFM extended autolink spec.
|
||||
// GFM says: ?, !, ., ,, :, *, _, ~ are not part of the autolink if trailing.
|
||||
|
||||
// Balance checking config: closeChar -> openChar mapping.
|
||||
// Strip trailing close chars only when unbalanced (more closes than opens).
|
||||
// For self-matching pairs like "", open === close (strip if odd count).
|
||||
const balancePairs: Record<string, string> = {
|
||||
")": "(",
|
||||
"]": "[",
|
||||
"}": "{",
|
||||
'"': '"',
|
||||
"'": "'",
|
||||
};
|
||||
|
||||
// Pre-count balanced pairs to avoid O(n²) rescans.
|
||||
// balance[closeChar] = count(open) - count(close), negative means unbalanced
|
||||
const balance: Record<string, number> = {};
|
||||
for (const [close, open] of Object.entries(balancePairs)) {
|
||||
balance[close] = 0;
|
||||
for (let i = 0; i < len; i++) {
|
||||
const c = tail[i];
|
||||
if (open === close) {
|
||||
// Self-matching pair (e.g., "") — toggle between 0 and 1
|
||||
if (c === open) {
|
||||
balance[close] = balance[close] === 0 ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
// Distinct open/close (e.g., ())
|
||||
if (c === open) {
|
||||
balance[close]++;
|
||||
} else if (c === close) {
|
||||
balance[close]--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (len > 0) {
|
||||
const ch = tail[len - 1];
|
||||
// GFM trailing punctuation: ?, !, ., ,, :, *, _, ~ stripped unconditionally.
|
||||
// Semicolon is handled specially below (entity reference rule).
|
||||
if (/[?!.,:*_~]/.test(ch)) {
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
// GFM entity reference rule: strip trailing &entity; sequences.
|
||||
// Only strip ; when preceded by &<alphanumeric>+ (e.g., & < &hl;).
|
||||
if (ch === ";") {
|
||||
// Backward scan to find & (O(n) total, avoids string allocation)
|
||||
let j = len - 2;
|
||||
while (j >= 0 && /[a-zA-Z0-9]/.test(tail[j])) {
|
||||
j--;
|
||||
}
|
||||
// j < len - 2 ensures at least one alphanumeric between & and ;
|
||||
if (j >= 0 && tail[j] === "&" && j < len - 2) {
|
||||
len = j;
|
||||
continue;
|
||||
}
|
||||
// Not an entity reference, stop stripping
|
||||
break;
|
||||
}
|
||||
// Handle balanced pairs — only strip close char if unbalanced.
|
||||
const open = balancePairs[ch];
|
||||
if (open !== undefined) {
|
||||
if (open === ch) {
|
||||
// Self-matching: strip if odd count (unbalanced)
|
||||
if (balance[ch] !== 0) {
|
||||
balance[ch] = 0;
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Distinct pair: strip if more closes than opens
|
||||
if (balance[ch] < 0) {
|
||||
balance[ch]++;
|
||||
len--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return len;
|
||||
|
||||
},
|
||||
normalize(match) {
|
||||
match.url = "http://" + match.url;
|
||||
},
|
||||
});
|
||||
|
||||
// Override default link validator to allow all URLs through to renderers.
|
||||
// marked.js does not validate URLs at all — it generates <a>/<img> tags for
|
||||
// everything and relies on DOMPurify to strip dangerous schemes.
|
||||
//
|
||||
// We match this behavior exactly:
|
||||
// - All URLs pass validation, including javascript:, vbscript:, file:, data:
|
||||
// - Images: renderer.rules.image shows alt text for non-data-image URLs
|
||||
// - Links: DOMPurify strips dangerous href schemes, leaving safe anchor text
|
||||
// - Blocking at validateLink would skip token generation entirely, causing raw
|
||||
// markdown source to appear instead of graceful fallbacks.
|
||||
md.validateLink = () => true;
|
||||
|
||||
// Trim trailing CJK characters from auto-linked URLs (RFC 3986: raw CJK is
|
||||
// not valid in URLs). markdown-it's built-in linkify for https:// URLs may
|
||||
// swallow adjacent CJK text into the URL. This core rule runs after linkify
|
||||
// and splits the CJK suffix back into a plain text token.
|
||||
md.core.ruler.after("linkify", "linkify-cjk-trim", (state) => {
|
||||
for (const blockToken of state.tokens) {
|
||||
if (blockToken.type !== "inline" || !blockToken.children) {
|
||||
continue;
|
||||
}
|
||||
const children = blockToken.children;
|
||||
for (let i = children.length - 1; i >= 0; i--) {
|
||||
const token = children[i];
|
||||
if (token.type !== "link_open") {
|
||||
continue;
|
||||
}
|
||||
// Only trim linkify-generated autolinks, not explicit markdown links
|
||||
// like [OpenClaw中文](https://docs.openclaw.ai) where CJK in display
|
||||
// text is intentional and href must not be rewritten.
|
||||
if (token.markup !== "linkify") {
|
||||
continue;
|
||||
}
|
||||
// Use the display text to find CJK boundary (href may be percent-encoded)
|
||||
const textToken = children[i + 1];
|
||||
if (!textToken || textToken.type !== "text") {
|
||||
continue;
|
||||
}
|
||||
const displayText = textToken.content;
|
||||
// Scan backward to find trailing CJK suffix only.
|
||||
// Middle CJK must be preserved (e.g. https://example.com/你/test stays intact);
|
||||
// only strip a contiguous CJK tail adjacent to non-URL text.
|
||||
let cjkIdx = displayText.length;
|
||||
while (cjkIdx > 0 && CJK_RE.test(displayText[cjkIdx - 1])) {
|
||||
cjkIdx--;
|
||||
}
|
||||
if (cjkIdx <= 0 || cjkIdx === displayText.length) {
|
||||
continue;
|
||||
}
|
||||
// Split: URL part and CJK tail from display text
|
||||
const trimmedDisplay = displayText.slice(0, cjkIdx);
|
||||
const cjkTail = displayText.slice(cjkIdx);
|
||||
// Rebuild href by preserving the scheme prefix that linkify added but
|
||||
// display text omits (e.g. "mailto:" for emails, "http://" for www links).
|
||||
const href = token.attrGet("href") ?? "";
|
||||
const prefixLen = href.indexOf(displayText);
|
||||
const hrefPrefix = prefixLen > 0 ? href.slice(0, prefixLen) : "";
|
||||
token.attrSet("href", hrefPrefix + trimmedDisplay);
|
||||
textToken.content = trimmedDisplay;
|
||||
// Find link_close and insert CJK text after it
|
||||
for (let j = i + 1; j < children.length; j++) {
|
||||
if (children[j].type === "link_close") {
|
||||
const tailToken = new state.Token("text", "", 0);
|
||||
tailToken.content = cjkTail;
|
||||
children.splice(j + 1, 0, tailToken);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Enable GFM task list checkboxes (- [x] / - [ ]).
|
||||
// enabled: false keeps checkboxes read-only (disabled="") — task lists in
|
||||
// chat messages are display-only, not interactive forms.
|
||||
// label: false avoids wrapping item text in <label>, which would break
|
||||
// accessibility when the item contains links (MDN warns against anchors inside labels).
|
||||
md.use(markdownItTaskLists, { enabled: false, label: false });
|
||||
|
||||
// Mark the <input> html_inline token inside task-list items as trusted so the
|
||||
// html_inline override lets it through. With label: false, the plugin generates
|
||||
// only a single <input ...> token per item.
|
||||
// We identify task-list items by the class="task-list-item" the plugin sets.
|
||||
md.core.ruler.after("github-task-lists", "task-list-allowlist", (state) => {
|
||||
const tokens = state.tokens;
|
||||
for (let i = 2; i < tokens.length; i++) {
|
||||
if (tokens[i].type !== "inline" || !tokens[i].children) {
|
||||
continue;
|
||||
}
|
||||
if (tokens[i - 1].type !== "paragraph_open") {
|
||||
continue;
|
||||
}
|
||||
if (tokens[i - 2].type !== "list_item_open") {
|
||||
continue;
|
||||
}
|
||||
const listItem = tokens[i - 2];
|
||||
const cls = listItem.attrGet("class") ?? "";
|
||||
if (!cls.includes("task-list-item")) {
|
||||
continue;
|
||||
}
|
||||
// Only trust the checkbox <input> token from the plugin, not other user-supplied HTML.
|
||||
// The plugin inserts an <input> at the start; user HTML elsewhere must stay escaped.
|
||||
for (const child of tokens[i].children!) {
|
||||
if (child.type === "html_inline" && /^<input\s/i.test(child.content)) {
|
||||
child.meta = { taskListPlugin: true };
|
||||
break; // Only one checkbox per item
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Override html_block and html_inline to escape raw HTML (#13937).
|
||||
// Exception: html_inline tokens marked by a trusted plugin (meta.taskListPlugin)
|
||||
// are allowed through — they are generated by our own plugin pipeline, not user input,
|
||||
// and DOMPurify provides the final safety net regardless.
|
||||
md.renderer.rules.html_block = (tokens, idx) => {
|
||||
return escapeHtml(tokens[idx].content) + "\n";
|
||||
};
|
||||
md.renderer.rules.html_inline = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
if (token.meta?.taskListPlugin === true) {
|
||||
return token.content;
|
||||
}
|
||||
return escapeHtml(token.content);
|
||||
};
|
||||
|
||||
marked.use({
|
||||
extensions: [cjkAutoLinkExtension as unknown as import("marked").TokenizerAndRendererExtension],
|
||||
});
|
||||
// Override image to only allow base64 data URIs (#15437)
|
||||
md.renderer.rules.image = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
const src = token.attrGet("src")?.trim() ?? "";
|
||||
// Use token.content which preserves raw markdown formatting (e.g. **bold**)
|
||||
// to match original marked.js behavior.
|
||||
const alt = normalizeMarkdownImageLabel(token.content);
|
||||
if (!INLINE_DATA_IMAGE_RE.test(src)) {
|
||||
return escapeHtml(alt);
|
||||
}
|
||||
return `<img class="markdown-inline-image" src="${escapeHtml(src)}" alt="${escapeHtml(alt)}">`;
|
||||
};
|
||||
|
||||
// Override fenced code blocks with copy button + JSON collapse
|
||||
md.renderer.rules.fence = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
// token.info contains the full fence info string (e.g., "json title=foo");
|
||||
// extract only the first whitespace-separated token as the language.
|
||||
const lang = token.info.trim().split(/\s+/)[0] || "";
|
||||
const text = token.content;
|
||||
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
|
||||
const safeText = escapeHtml(text);
|
||||
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
|
||||
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
|
||||
const attrSafe = escapeHtml(text);
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
lang === "json" ||
|
||||
(!lang &&
|
||||
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
// Override indented code blocks (code_block) with the same treatment as fence
|
||||
md.renderer.rules.code_block = (tokens, idx) => {
|
||||
const token = tokens[idx];
|
||||
const text = token.content;
|
||||
const safeText = escapeHtml(text);
|
||||
const codeBlock = `<pre><code>${safeText}</code></pre>`;
|
||||
const attrSafe = escapeHtml(text);
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
(trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
const input = markdown.trim();
|
||||
@@ -197,15 +504,10 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
}
|
||||
let rendered: string;
|
||||
try {
|
||||
rendered = marked.parse(`${truncated.text}${suffix}`, {
|
||||
renderer: htmlEscapeRenderer,
|
||||
gfm: true,
|
||||
breaks: true,
|
||||
}) as string;
|
||||
rendered = md.render(`${truncated.text}${suffix}`);
|
||||
} catch (err) {
|
||||
// Fall back to escaped plain text when marked.parse() throws (e.g.
|
||||
// infinite recursion on pathological markdown patterns — #36213).
|
||||
console.warn("[markdown] marked.parse failed, falling back to plain text:", err);
|
||||
// Fall back to escaped plain text when md.render() throws (#36213).
|
||||
console.warn("[markdown] md.render failed, falling back to plain text:", err);
|
||||
const escaped = escapeHtml(`${truncated.text}${suffix}`);
|
||||
rendered = `<pre class="code-block">${escaped}</pre>`;
|
||||
}
|
||||
@@ -216,72 +518,6 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
// Prevent raw HTML in chat messages from being rendered as formatted HTML.
|
||||
// Display it as escaped text so users see the literal markup.
|
||||
// Security is handled by DOMPurify, but rendering pasted HTML (e.g. error
|
||||
// pages) as formatted output is confusing UX (#13937).
|
||||
const htmlEscapeRenderer = new marked.Renderer();
|
||||
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
|
||||
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
|
||||
const label = normalizeMarkdownImageLabel(token.text);
|
||||
const href = token.href?.trim() ?? "";
|
||||
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||
return escapeHtml(label);
|
||||
}
|
||||
return `<img class="markdown-inline-image" src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||
};
|
||||
|
||||
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||
const trimmed = text?.trim();
|
||||
return trimmed ? trimmed : "image";
|
||||
}
|
||||
|
||||
htmlEscapeRenderer.code = ({
|
||||
text,
|
||||
lang,
|
||||
escaped,
|
||||
}: {
|
||||
text: string;
|
||||
lang?: string;
|
||||
escaped?: boolean;
|
||||
}) => {
|
||||
const langClass = lang ? ` class="language-${escapeHtml(lang)}"` : "";
|
||||
const safeText = escaped ? text : escapeHtml(text);
|
||||
const codeBlock = `<pre><code${langClass}>${safeText}</code></pre>`;
|
||||
const langLabel = lang ? `<span class="code-block-lang">${escapeHtml(lang)}</span>` : "";
|
||||
const attrSafe = text
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """)
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
const copyBtn = `<button type="button" class="code-block-copy" data-code="${attrSafe}" aria-label="Copy code"><span class="code-block-copy__idle">Copy</span><span class="code-block-copy__done">Copied!</span></button>`;
|
||||
const header = `<div class="code-block-header">${langLabel}${copyBtn}</div>`;
|
||||
|
||||
const trimmed = text.trim();
|
||||
const isJson =
|
||||
lang === "json" ||
|
||||
(!lang &&
|
||||
((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
||||
(trimmed.startsWith("[") && trimmed.endsWith("]"))));
|
||||
|
||||
if (isJson) {
|
||||
const lineCount = text.split("\n").length;
|
||||
const label = lineCount > 1 ? `JSON · ${lineCount} lines` : "JSON";
|
||||
return `<details class="json-collapse"><summary>${label}</summary><div class="code-block-wrapper">${header}${codeBlock}</div></details>`;
|
||||
}
|
||||
|
||||
return `<div class="code-block-wrapper">${header}${codeBlock}</div>`;
|
||||
};
|
||||
|
||||
function escapeHtml(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
function renderEscapedPlainTextHtml(value: string): string {
|
||||
return `<div class="markdown-plain-text-fallback">${escapeHtml(value.replace(/\r\n?/g, "\n"))}</div>`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user