fix: flatten remote markdown images

This commit is contained in:
Ayaan Zaidi
2026-03-07 19:16:11 +05:30
committed by Ayaan Zaidi
parent 53a7e3b6e5
commit 4bf902de58
6 changed files with 133 additions and 19 deletions

View File

@@ -13,6 +13,8 @@ enum ChatMarkdownPreprocessor {
"Chat history since last reply (untrusted, for context):",
]
private static let markdownImagePattern = #"!\[([^\]]*)\]\(([^)]+)\)"#
struct InlineImage: Identifiable {
let id = UUID()
let label: String
@@ -27,8 +29,7 @@ enum ChatMarkdownPreprocessor {
static func preprocess(markdown raw: String) -> Result {
let withoutContextBlocks = self.stripInboundContextBlocks(raw)
let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks)
let pattern = #"!\[([^\]]*)\]\((data:image\/[^;]+;base64,[^)]+)\)"#
guard let re = try? NSRegularExpression(pattern: pattern) else {
guard let re = try? NSRegularExpression(pattern: self.markdownImagePattern) else {
return Result(cleaned: self.normalize(withoutTimestamps), images: [])
}
@@ -44,24 +45,41 @@ enum ChatMarkdownPreprocessor {
for match in matches.reversed() {
guard match.numberOfRanges >= 3 else { continue }
let label = ns.substring(with: match.range(at: 1))
let dataURL = ns.substring(with: match.range(at: 2))
let image: OpenClawPlatformImage? = {
guard let comma = dataURL.firstIndex(of: ",") else { return nil }
let b64 = String(dataURL[dataURL.index(after: comma)...])
guard let data = Data(base64Encoded: b64) else { return nil }
return OpenClawPlatformImage(data: data)
}()
images.append(InlineImage(label: label, image: image))
let source = ns.substring(with: match.range(at: 2))
let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location)
let end = cleaned.index(start, offsetBy: match.range.length)
cleaned.replaceSubrange(start..<end, with: "")
if let inlineImage = self.inlineImage(label: label, source: source) {
images.append(inlineImage)
cleaned.replaceSubrange(start..<end, with: "")
} else {
cleaned.replaceSubrange(start..<end, with: self.fallbackImageLabel(label))
}
}
return Result(cleaned: self.normalize(cleaned), images: images.reversed())
}
private static func inlineImage(label: String, source: String) -> InlineImage? {
let trimmed = source.trimmingCharacters(in: .whitespacesAndNewlines)
guard let comma = trimmed.firstIndex(of: ","),
trimmed[..<comma].range(
of: #"^data:image\/[^;]+;base64$"#,
options: [.regularExpression, .caseInsensitive]) != nil
else {
return nil
}
let b64 = String(trimmed[trimmed.index(after: comma)...])
let image = Data(base64Encoded: b64).flatMap(OpenClawPlatformImage.init(data:))
return InlineImage(label: label, image: image)
}
private static func fallbackImageLabel(_ label: String) -> String {
let trimmed = label.trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? "image" : trimmed
}
private static func stripInboundContextBlocks(_ raw: String) -> String {
guard self.inboundContextHeaders.contains(where: raw.contains) else {
return raw

View File

@@ -18,6 +18,30 @@ struct ChatMarkdownPreprocessorTests {
#expect(result.images.first?.image != nil)
}
@Test func flattensRemoteMarkdownImagesIntoText() {
let base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4////GQAJ+wP/2hN8NwAAAABJRU5ErkJggg=="
let markdown = """
![Leak](https://example.com/collect?x=1)
![Pixel](data:image/png;base64,\(base64))
"""
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
#expect(result.cleaned == "Leak")
#expect(result.images.count == 1)
#expect(result.images.first?.image != nil)
}
@Test func usesFallbackTextForUnlabeledRemoteMarkdownImages() {
let markdown = "![](https://example.com/image.png)"
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
#expect(result.cleaned == "image")
#expect(result.images.isEmpty)
}
@Test func stripsInboundUntrustedContextBlocks() {
let markdown = """
Conversation info (untrusted metadata):