mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix: flatten remote markdown images
This commit is contained in:
@@ -13,6 +13,8 @@ enum ChatMarkdownPreprocessor {
|
|||||||
"Chat history since last reply (untrusted, for context):",
|
"Chat history since last reply (untrusted, for context):",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
private static let markdownImagePattern = #"!\[([^\]]*)\]\(([^)]+)\)"#
|
||||||
|
|
||||||
struct InlineImage: Identifiable {
|
struct InlineImage: Identifiable {
|
||||||
let id = UUID()
|
let id = UUID()
|
||||||
let label: String
|
let label: String
|
||||||
@@ -27,8 +29,7 @@ enum ChatMarkdownPreprocessor {
|
|||||||
static func preprocess(markdown raw: String) -> Result {
|
static func preprocess(markdown raw: String) -> Result {
|
||||||
let withoutContextBlocks = self.stripInboundContextBlocks(raw)
|
let withoutContextBlocks = self.stripInboundContextBlocks(raw)
|
||||||
let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks)
|
let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks)
|
||||||
let pattern = #"!\[([^\]]*)\]\((data:image\/[^;]+;base64,[^)]+)\)"#
|
guard let re = try? NSRegularExpression(pattern: self.markdownImagePattern) else {
|
||||||
guard let re = try? NSRegularExpression(pattern: pattern) else {
|
|
||||||
return Result(cleaned: self.normalize(withoutTimestamps), images: [])
|
return Result(cleaned: self.normalize(withoutTimestamps), images: [])
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -44,24 +45,41 @@ enum ChatMarkdownPreprocessor {
|
|||||||
for match in matches.reversed() {
|
for match in matches.reversed() {
|
||||||
guard match.numberOfRanges >= 3 else { continue }
|
guard match.numberOfRanges >= 3 else { continue }
|
||||||
let label = ns.substring(with: match.range(at: 1))
|
let label = ns.substring(with: match.range(at: 1))
|
||||||
let dataURL = ns.substring(with: match.range(at: 2))
|
let source = ns.substring(with: match.range(at: 2))
|
||||||
|
|
||||||
let image: OpenClawPlatformImage? = {
|
|
||||||
guard let comma = dataURL.firstIndex(of: ",") else { return nil }
|
|
||||||
let b64 = String(dataURL[dataURL.index(after: comma)...])
|
|
||||||
guard let data = Data(base64Encoded: b64) else { return nil }
|
|
||||||
return OpenClawPlatformImage(data: data)
|
|
||||||
}()
|
|
||||||
images.append(InlineImage(label: label, image: image))
|
|
||||||
|
|
||||||
let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location)
|
let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location)
|
||||||
let end = cleaned.index(start, offsetBy: match.range.length)
|
let end = cleaned.index(start, offsetBy: match.range.length)
|
||||||
cleaned.replaceSubrange(start..<end, with: "")
|
if let inlineImage = self.inlineImage(label: label, source: source) {
|
||||||
|
images.append(inlineImage)
|
||||||
|
cleaned.replaceSubrange(start..<end, with: "")
|
||||||
|
} else {
|
||||||
|
cleaned.replaceSubrange(start..<end, with: self.fallbackImageLabel(label))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Result(cleaned: self.normalize(cleaned), images: images.reversed())
|
return Result(cleaned: self.normalize(cleaned), images: images.reversed())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static func inlineImage(label: String, source: String) -> InlineImage? {
|
||||||
|
let trimmed = source.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
guard let comma = trimmed.firstIndex(of: ","),
|
||||||
|
trimmed[..<comma].range(
|
||||||
|
of: #"^data:image\/[^;]+;base64$"#,
|
||||||
|
options: [.regularExpression, .caseInsensitive]) != nil
|
||||||
|
else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
let b64 = String(trimmed[trimmed.index(after: comma)...])
|
||||||
|
let image = Data(base64Encoded: b64).flatMap(OpenClawPlatformImage.init(data:))
|
||||||
|
return InlineImage(label: label, image: image)
|
||||||
|
}
|
||||||
|
|
||||||
|
private static func fallbackImageLabel(_ label: String) -> String {
|
||||||
|
let trimmed = label.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
|
return trimmed.isEmpty ? "image" : trimmed
|
||||||
|
}
|
||||||
|
|
||||||
private static func stripInboundContextBlocks(_ raw: String) -> String {
|
private static func stripInboundContextBlocks(_ raw: String) -> String {
|
||||||
guard self.inboundContextHeaders.contains(where: raw.contains) else {
|
guard self.inboundContextHeaders.contains(where: raw.contains) else {
|
||||||
return raw
|
return raw
|
||||||
|
|||||||
@@ -18,6 +18,30 @@ struct ChatMarkdownPreprocessorTests {
|
|||||||
#expect(result.images.first?.image != nil)
|
#expect(result.images.first?.image != nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test func flattensRemoteMarkdownImagesIntoText() {
|
||||||
|
let base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4////GQAJ+wP/2hN8NwAAAABJRU5ErkJggg=="
|
||||||
|
let markdown = """
|
||||||
|

|
||||||
|
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
|
||||||
|
|
||||||
|
#expect(result.cleaned == "Leak")
|
||||||
|
#expect(result.images.count == 1)
|
||||||
|
#expect(result.images.first?.image != nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test func usesFallbackTextForUnlabeledRemoteMarkdownImages() {
|
||||||
|
let markdown = ""
|
||||||
|
|
||||||
|
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
|
||||||
|
|
||||||
|
#expect(result.cleaned == "image")
|
||||||
|
#expect(result.images.isEmpty)
|
||||||
|
}
|
||||||
|
|
||||||
@Test func stripsInboundUntrustedContextBlocks() {
|
@Test func stripsInboundUntrustedContextBlocks() {
|
||||||
let markdown = """
|
let markdown = """
|
||||||
Conversation info (untrusted metadata):
|
Conversation info (untrusted metadata):
|
||||||
|
|||||||
@@ -1712,6 +1712,22 @@
|
|||||||
return text.replace(/<(?=[a-zA-Z/])/g, "<");
|
return text.replace(/<(?=[a-zA-Z/])/g, "<");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
|
||||||
|
|
||||||
|
function normalizeMarkdownImageLabel(text) {
|
||||||
|
const trimmed = typeof text === "string" ? text.trim() : "";
|
||||||
|
return trimmed || "image";
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderMarkdownImage(token) {
|
||||||
|
const label = normalizeMarkdownImageLabel(token?.text);
|
||||||
|
const href = typeof token?.href === "string" ? token.href.trim() : "";
|
||||||
|
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||||
|
return escapeHtml(label);
|
||||||
|
}
|
||||||
|
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||||
|
}
|
||||||
|
|
||||||
// Configure marked with syntax highlighting and HTML escaping for text
|
// Configure marked with syntax highlighting and HTML escaping for text
|
||||||
marked.use({
|
marked.use({
|
||||||
breaks: true,
|
breaks: true,
|
||||||
@@ -1750,6 +1766,9 @@
|
|||||||
html(token) {
|
html(token) {
|
||||||
return escapeHtml(token.text);
|
return escapeHtml(token.text);
|
||||||
},
|
},
|
||||||
|
image(token) {
|
||||||
|
return renderMarkdownImage(token);
|
||||||
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -250,4 +250,38 @@ describe("export html security hardening", () => {
|
|||||||
expect(img?.getAttribute("onerror")).toBeNull();
|
expect(img?.getAttribute("onerror")).toBeNull();
|
||||||
expect(img?.getAttribute("src")).toBe("data:application/octet-stream;base64,AAAA");
|
expect(img?.getAttribute("src")).toBe("data:application/octet-stream;base64,AAAA");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("flattens remote markdown images but keeps data-image markdown", () => {
|
||||||
|
const dataImage = "data:image/png;base64,AAAA";
|
||||||
|
const session: SessionData = {
|
||||||
|
header: { id: "session-4", timestamp: now() },
|
||||||
|
entries: [
|
||||||
|
{
|
||||||
|
id: "1",
|
||||||
|
parentId: null,
|
||||||
|
timestamp: now(),
|
||||||
|
type: "message",
|
||||||
|
message: {
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
text: `Leak:\n\n\n\n`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
leafId: "1",
|
||||||
|
systemPrompt: "",
|
||||||
|
tools: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
const { document } = renderTemplate(session);
|
||||||
|
const messages = document.getElementById("messages");
|
||||||
|
expect(messages).toBeTruthy();
|
||||||
|
expect(messages?.querySelector('img[src^="https://"]')).toBeNull();
|
||||||
|
expect(messages?.textContent).toContain("exfil");
|
||||||
|
expect(messages?.querySelector(`img[src="${dataImage}"]`)).toBeTruthy();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -30,11 +30,10 @@ describe("toSanitizedMarkdownHtml", () => {
|
|||||||
expect(html).toContain("console.log(1)");
|
expect(html).toContain("console.log(1)");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("preserves img tags with src and alt from markdown images (#15437)", () => {
|
it("flattens remote markdown images into alt text", () => {
|
||||||
const html = toSanitizedMarkdownHtml("");
|
const html = toSanitizedMarkdownHtml("");
|
||||||
expect(html).toContain("<img");
|
expect(html).not.toContain("<img");
|
||||||
expect(html).toContain('src="https://example.com/image.png"');
|
expect(html).toContain("Alt text");
|
||||||
expect(html).toContain('alt="Alt text"');
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("preserves base64 data URI images (#15437)", () => {
|
it("preserves base64 data URI images (#15437)", () => {
|
||||||
@@ -43,11 +42,17 @@ describe("toSanitizedMarkdownHtml", () => {
|
|||||||
expect(html).toContain("data:image/png;base64,");
|
expect(html).toContain("data:image/png;base64,");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("strips javascript image urls", () => {
|
it("flattens non-data markdown image urls", () => {
|
||||||
const html = toSanitizedMarkdownHtml(")");
|
const html = toSanitizedMarkdownHtml(")");
|
||||||
expect(html).toContain("<img");
|
expect(html).not.toContain("<img");
|
||||||
expect(html).not.toContain("javascript:");
|
expect(html).not.toContain("javascript:");
|
||||||
expect(html).not.toContain("src=");
|
expect(html).toContain("X");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("uses a plain fallback label for unlabeled markdown images", () => {
|
||||||
|
const html = toSanitizedMarkdownHtml("");
|
||||||
|
expect(html).not.toContain("<img");
|
||||||
|
expect(html).toContain("image");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("renders GFM markdown tables (#20410)", () => {
|
it("renders GFM markdown tables (#20410)", () => {
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ const MARKDOWN_CHAR_LIMIT = 140_000;
|
|||||||
const MARKDOWN_PARSE_LIMIT = 40_000;
|
const MARKDOWN_PARSE_LIMIT = 40_000;
|
||||||
const MARKDOWN_CACHE_LIMIT = 200;
|
const MARKDOWN_CACHE_LIMIT = 200;
|
||||||
const MARKDOWN_CACHE_MAX_CHARS = 50_000;
|
const MARKDOWN_CACHE_MAX_CHARS = 50_000;
|
||||||
|
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
|
||||||
const markdownCache = new Map<string, string>();
|
const markdownCache = new Map<string, string>();
|
||||||
|
|
||||||
function getCachedMarkdown(key: string): string | null {
|
function getCachedMarkdown(key: string): string | null {
|
||||||
@@ -137,6 +138,19 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
|||||||
// pages) as formatted output is confusing UX (#13937).
|
// pages) as formatted output is confusing UX (#13937).
|
||||||
const htmlEscapeRenderer = new marked.Renderer();
|
const htmlEscapeRenderer = new marked.Renderer();
|
||||||
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
|
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
|
||||||
|
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
|
||||||
|
const label = normalizeMarkdownImageLabel(token.text);
|
||||||
|
const href = token.href?.trim() ?? "";
|
||||||
|
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||||
|
return escapeHtml(label);
|
||||||
|
}
|
||||||
|
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||||
|
};
|
||||||
|
|
||||||
|
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||||
|
const trimmed = text?.trim();
|
||||||
|
return trimmed ? trimmed : "image";
|
||||||
|
}
|
||||||
|
|
||||||
function escapeHtml(value: string): string {
|
function escapeHtml(value: string): string {
|
||||||
return value
|
return value
|
||||||
|
|||||||
Reference in New Issue
Block a user