diff --git a/CHANGELOG.md b/CHANGELOG.md index 941a6d6e0ec..4cdf71abbd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Harden exported markdown link rendering [AI]. (#80902) Thanks @pgondhi987. - fix(gateway): honor minimal discovery mode for wide-area DNS-SD [AI]. (#80903) Thanks @pgondhi987. - slack: enforce reaction notification policy [AI]. (#80907) Thanks @pgondhi987. - Enforce gateway command scopes by caller context [AI]. (#80891) Thanks @pgondhi987. diff --git a/src/auto-reply/reply/export-html/template.js b/src/auto-reply/reply/export-html/template.js index da12d2625cf..40303cb5198 100644 --- a/src/auto-reply/reply/export-html/template.js +++ b/src/auto-reply/reply/export-html/template.js @@ -1732,6 +1732,73 @@ return `${escapeHtmlAttr(label)}`; } + const SAFE_MARKDOWN_LINK_PROTOCOLS = new Set(["http:", "https:", "mailto:", "tel:", "ftp:"]); + + function decodeMarkdownHrefCodePoint(value, radix) { + const codePoint = Number.parseInt(value, radix); + if ( + !Number.isFinite(codePoint) || + codePoint < 0 || + codePoint > 0x10ffff || + (codePoint >= 0xd800 && codePoint <= 0xdfff) + ) { + return ""; + } + return String.fromCodePoint(codePoint); + } + + function decodeMarkdownHrefEntities(text) { + return text.replace( + /&(?:#(\d+)|#x([\da-f]+)|(colon|tab|newline));/gi, + (_match, decimal, hex, named) => { + if (decimal) { + return decodeMarkdownHrefCodePoint(decimal, 10); + } + if (hex) { + return decodeMarkdownHrefCodePoint(hex, 16); + } + if (named?.toLowerCase() === "tab") { + return "\t"; + } + if (named?.toLowerCase() === "newline") { + return "\n"; + } + return ":"; + }, + ); + } + + function getMarkdownHrefProtocol(href) { + const normalized = decodeMarkdownHrefEntities(href) + .replace(/[\u0000-\u001f\u007f\u200b-\u200f\u2028\u2029\ufeff\s]+/g, "") + .trim(); + const match = /^([a-z][a-z0-9+.-]*):/i.exec(normalized); + return match ? `${match[1].toLowerCase()}:` : null; + } + + function isSafeMarkdownLinkHref(href) { + const trimmed = typeof href === "string" ? href.trim() : ""; + if (!trimmed) { + return true; + } + const protocol = getMarkdownHrefProtocol(trimmed); + return protocol === null || SAFE_MARKDOWN_LINK_PROTOCOLS.has(protocol); + } + + function renderMarkdownLink(token) { + const text = this.parser.parseInline(token.tokens); + const href = typeof token?.href === "string" ? token.href.trim() : ""; + if (!isSafeMarkdownLinkHref(href)) { + return text; + } + + let html = `${text}`; + } + // Configure marked with syntax highlighting and HTML escaping for text marked.use({ breaks: true, @@ -1773,6 +1840,9 @@ image(token) { return renderMarkdownImage(token); }, + link(token) { + return renderMarkdownLink.call(this, token); + }, }, }); diff --git a/src/auto-reply/reply/export-html/template.security.test.ts b/src/auto-reply/reply/export-html/template.security.test.ts index ec6b3cc123f..d9ae6badce7 100644 --- a/src/auto-reply/reply/export-html/template.security.test.ts +++ b/src/auto-reply/reply/export-html/template.security.test.ts @@ -414,10 +414,90 @@ describe("export html security hardening", () => { requireElement(messages.querySelector(`img[src="${dataImage}"]`), "data markdown image missing"); }); + it("flattens unsafe markdown links while preserving safe links", async () => { + const session: SessionData = { + header: { id: "session-5", timestamp: now() }, + entries: [ + { + id: "1", + parentId: null, + timestamp: now(), + type: "message", + message: { + role: "user", + content: [ + "[script](javascript:alert(1))", + "[encoded](javascript:alert(2))", + "[split](java script:alert(3))", + "[zero-width](java​script:alert(4))", + "[surrogate](java�script:alert(5))", + '[safe](https://example.com/report "report")', + ].join("\n"), + }, + }, + { + id: "2", + parentId: "1", + timestamp: now(), + type: "message", + message: { + role: "assistant", + content: [ + { + type: "text", + text: "[data](data:text/html;base64,PGgxPnBvYzwvaDE+) [mail](mailto:test@example.com)", + }, + ], + }, + }, + { + id: "3", + parentId: "2", + timestamp: now(), + type: "branch_summary", + summary: "[relative](./notes.md)", + }, + { + id: "4", + parentId: "3", + timestamp: now(), + type: "custom_message", + customType: "x", + display: true, + content: "[hash](#entry-1)", + }, + ], + leafId: "4", + systemPrompt: "", + tools: [], + }; + + const { document } = await renderTemplate(session); + const messages = requireElement(document.getElementById("messages"), "messages root missing"); + const hrefs = Array.from(messages.querySelectorAll("a"), (link) => link.getAttribute("href")); + + expect(hrefs).toEqual([ + "https://example.com/report", + "mailto:test@example.com", + "./notes.md", + "#entry-1", + ]); + expect(messages.querySelector("a")?.getAttribute("title")).toBe("report"); + expect(messages.textContent).toContain("script"); + expect(messages.textContent).toContain("encoded"); + expect(messages.textContent).toContain("split"); + expect(messages.textContent).toContain("zero-width"); + expect(messages.textContent).toContain("surrogate"); + expect(messages.textContent).toContain("data"); + expect(hrefs.some((href) => href?.startsWith("javascript:") || href?.startsWith("data:"))).toBe( + false, + ); + }); + it("escapes markdown data-image attributes", async () => { const dataImage = "data:image/png;base64,AAAA"; const session: SessionData = { - header: { id: "session-5", timestamp: now() }, + header: { id: "session-6", timestamp: now() }, entries: [ { id: "1",