fix(chat/ios): downscale image attachments before send

Resize iOS chat PhotosPicker image attachments through the shared JPEG transcoder before staging/sending. Cap long edge and payload bytes, strip source metadata, preserve previews from processed data, and add focused processor/view-model regression tests.\n\nFixes #68524.\nSupersedes #73710.
This commit is contained in:
Val Alexander
2026-05-13 21:44:05 -05:00
committed by GitHub
parent 61ae9b7193
commit faa443a452
6 changed files with 423 additions and 11 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- iOS/chat: resize PhotosPicker image attachments to capped JPEGs before staging and sending, stripping source metadata and keeping oversized camera photos under the chat upload budget. Fixes #68524. Thanks @BunsDev.
- Codex startup: treat selectable configured OpenAI agent models as Codex runtime requirements during plugin auto-enable, startup planning, and doctor install repair, so Anthropic-primary configs can still switch to OpenAI/Codex cleanly.
- Agents: preserve source-reply delivery metadata when merging tool-returned media into the final reply, keeping message-tool-only replies deliverable and mirrored. Thanks @pashpashpash and @vincentkoc.
- macOS/companion: require system TLS trust before pinning a first-use direct `wss://` gateway certificate and honor `gateway.remote.tlsFingerprint` as the explicit pin for remote node-mode sessions, so fresh endpoints fail closed when macOS cannot trust the certificate unless configured out of band. Fixes #50642. Thanks @BunsDev.

View File

@@ -17,6 +17,7 @@ private let chatUILogger = Logger(subsystem: "ai.openclaw", category: "OpenClawC
// swiftlint:disable:next type_body_length
public final class OpenClawChatViewModel {
public static let defaultModelSelectionID = "__default__"
private static let maxAttachmentBytes = 5_000_000
public private(set) var messages: [OpenClawChatMessage] = []
public var input: String = ""
@@ -1298,11 +1299,6 @@ public final class OpenClawChatViewModel {
}
private func addImageAttachment(url: URL?, data: Data, fileName: String, mimeType: String) async {
if data.count > 5_000_000 {
self.errorText = "Attachment \(fileName) exceeds 5 MB limit"
return
}
let uti: UTType = {
if let url {
return UTType(filenameExtension: url.pathExtension) ?? .data
@@ -1314,13 +1310,33 @@ public final class OpenClawChatViewModel {
return
}
let preview = Self.previewImage(data: data)
let processed: Data
do {
processed = try await Task.detached(priority: .userInitiated) {
try ChatImageProcessor.processForUpload(data: data)
}.value
} catch {
self.errorText = "Could not process \(fileName): \(error.localizedDescription)"
return
}
if processed.count > Self.maxAttachmentBytes {
self.errorText = "Attachment \(fileName) exceeds 5 MB limit after resizing"
return
}
let outputFileName: String = {
let baseName = (fileName as NSString).deletingPathExtension
return baseName.isEmpty ? "image.jpg" : "\(baseName).jpg"
}()
let preview = Self.previewImage(data: processed)
self.attachments.append(
OpenClawPendingAttachment(
url: url,
data: data,
fileName: fileName,
mimeType: mimeType,
data: processed,
fileName: outputFileName,
mimeType: "image/jpeg",
preview: preview))
}

View File

@@ -0,0 +1,44 @@
import Foundation
/// Chat-specific image upload policy built on the shared JPEG transcoder.
public enum ChatImageProcessor {
public static let maxLongEdgePx = 1600
public static let jpegQuality = 0.8
public static let maxPayloadBytes = 3_500_000
public enum ProcessError: Error, LocalizedError, Sendable {
case notAnImage
case decodeFailed
case encodeFailed
public var errorDescription: String? {
switch self {
case .notAnImage:
"The data is not a recognizable image."
case .decodeFailed:
"The image could not be decoded."
case .encodeFailed:
"The image could not be resized to fit the chat upload limit."
}
}
}
public static func processForUpload(data: Data) throws -> Data {
do {
let result = try JPEGTranscoder.transcodeToJPEG(
imageData: data,
maxLongEdgePx: self.maxLongEdgePx,
quality: self.jpegQuality,
maxBytes: self.maxPayloadBytes)
return result.data
} catch JPEGTranscodeError.decodeFailed {
throw ProcessError.notAnImage
} catch JPEGTranscodeError.propertiesMissing {
throw ProcessError.decodeFailed
} catch JPEGTranscodeError.sizeLimitExceeded {
throw ProcessError.encodeFailed
} catch {
throw ProcessError.encodeFailed
}
}
}

View File

@@ -37,6 +37,26 @@ public struct JPEGTranscoder: Sendable {
maxWidthPx: Int?,
quality: Double,
maxBytes: Int? = nil) throws -> (data: Data, widthPx: Int, heightPx: Int)
{
try self.transcodeToJPEG(
imageData: imageData,
maxWidthPx: maxWidthPx,
maxLongEdgePx: nil,
quality: quality,
maxBytes: maxBytes)
}
/// Re-encodes image data to JPEG, optionally downscaling so the *oriented* longest edge is <= `maxLongEdgePx`.
///
/// When `maxLongEdgePx` is provided it takes precedence over `maxWidthPx`.
/// - Important: This normalizes EXIF orientation (the output pixels are rotated if needed; orientation tag is not
/// relied on).
public static func transcodeToJPEG(
imageData: Data,
maxWidthPx: Int? = nil,
maxLongEdgePx: Int?,
quality: Double,
maxBytes: Int? = nil) throws -> (data: Data, widthPx: Int, heightPx: Int)
{
guard let src = CGImageSourceCreateWithData(imageData as CFData, nil) else {
throw JPEGTranscodeError.decodeFailed
@@ -63,6 +83,10 @@ public struct JPEGTranscoder: Sendable {
let maxDim = max(orientedWidth, orientedHeight)
var targetMaxPixelSize: Int = {
if let maxLongEdgePx, maxLongEdgePx > 0 {
guard maxDim > maxLongEdgePx else { return maxDim } // never upscale
return maxLongEdgePx
}
guard let maxWidthPx, maxWidthPx > 0 else { return maxDim }
guard orientedWidth > maxWidthPx else { return maxDim } // never upscale
@@ -81,6 +105,7 @@ public struct JPEGTranscoder: Sendable {
guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else {
throw JPEGTranscodeError.decodeFailed
}
let opaqueImage = Self.flattenAlphaIfNeeded(img)
let out = NSMutableData()
guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else {
@@ -88,12 +113,12 @@ public struct JPEGTranscoder: Sendable {
}
let q = self.clampQuality(quality)
let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary
CGImageDestinationAddImage(dest, img, encodeProps)
CGImageDestinationAddImage(dest, opaqueImage, encodeProps)
guard CGImageDestinationFinalize(dest) else {
throw JPEGTranscodeError.encodeFailed
}
return (out as Data, img.width, img.height)
return (out as Data, opaqueImage.width, opaqueImage.height)
}
guard let maxBytes, maxBytes > 0 else {
@@ -132,4 +157,34 @@ public struct JPEGTranscoder: Sendable {
return best
}
/// JPEG cannot store alpha. Flatten transparent sources over white before encoding so ImageIO does not composite
/// transparent pixels onto black by default.
private static func flattenAlphaIfNeeded(_ image: CGImage) -> CGImage {
switch image.alphaInfo {
case .none, .noneSkipFirst, .noneSkipLast:
return image
default:
break
}
guard
let context = CGContext(
data: nil,
width: image.width,
height: image.height,
bitsPerComponent: 8,
bytesPerRow: 0,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue)
else {
return image
}
let rect = CGRect(x: 0, y: 0, width: image.width, height: image.height)
context.setFillColor(CGColor(red: 1, green: 1, blue: 1, alpha: 1))
context.fill(rect)
context.draw(image, in: rect)
return context.makeImage() ?? image
}
}

View File

@@ -0,0 +1,187 @@
import CoreGraphics
import Foundation
import ImageIO
import Testing
import UniformTypeIdentifiers
@testable import OpenClawKit
struct ChatImageProcessorTests {
private func syntheticJPEG(width: Int, height: Int) throws -> Data {
guard
let context = CGContext(
data: nil,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue)
else {
throw NSError(domain: "ChatImageProcessorTests", code: 1)
}
context.setFillColor(CGColor(red: 0.8, green: 0.2, blue: 0.4, alpha: 1))
context.fill(CGRect(x: 0, y: 0, width: width, height: height))
context.setFillColor(CGColor(red: 0.1, green: 0.7, blue: 0.3, alpha: 1))
context.fill(CGRect(x: 0, y: 0, width: width / 2, height: height / 2))
guard let image = context.makeImage() else {
throw NSError(domain: "ChatImageProcessorTests", code: 2)
}
let data = NSMutableData()
guard let destination = CGImageDestinationCreateWithData(data, UTType.jpeg.identifier as CFString, 1, nil)
else {
throw NSError(domain: "ChatImageProcessorTests", code: 3)
}
let properties: [CFString: Any] = [
kCGImageDestinationLossyCompressionQuality: 0.95,
kCGImagePropertyExifDictionary: [
kCGImagePropertyExifDateTimeOriginal: "2026:04:20 16:30:00",
kCGImagePropertyExifLensModel: "Leaky Lens 50mm f/1.4",
] as CFDictionary,
kCGImagePropertyGPSDictionary: [
kCGImagePropertyGPSLatitude: 60.02,
kCGImagePropertyGPSLatitudeRef: "N",
kCGImagePropertyGPSLongitude: 10.95,
kCGImagePropertyGPSLongitudeRef: "E",
] as CFDictionary,
kCGImagePropertyTIFFDictionary: [
kCGImagePropertyTIFFMake: "LeakCorp",
kCGImagePropertyTIFFModel: "Privacy-Leaker-1",
] as CFDictionary,
]
CGImageDestinationAddImage(destination, image, properties as CFDictionary)
guard CGImageDestinationFinalize(destination) else {
throw NSError(domain: "ChatImageProcessorTests", code: 4)
}
return data as Data
}
private func syntheticPNGWithAlpha(width: Int, height: Int) throws -> Data {
guard
let context = CGContext(
data: nil,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue)
else {
throw NSError(domain: "ChatImageProcessorTests", code: 5)
}
context.clear(CGRect(x: 0, y: 0, width: width, height: height))
context.setFillColor(CGColor(red: 1, green: 0, blue: 0, alpha: 1))
context.fill(CGRect(x: width / 4, y: height / 4, width: width / 2, height: height / 2))
guard let image = context.makeImage() else {
throw NSError(domain: "ChatImageProcessorTests", code: 6)
}
let data = NSMutableData()
guard let destination = CGImageDestinationCreateWithData(data, UTType.png.identifier as CFString, 1, nil)
else {
throw NSError(domain: "ChatImageProcessorTests", code: 7)
}
CGImageDestinationAddImage(destination, image, nil)
guard CGImageDestinationFinalize(destination) else {
throw NSError(domain: "ChatImageProcessorTests", code: 8)
}
return data as Data
}
private func properties(for data: Data) -> [CFString: Any] {
guard
let source = CGImageSourceCreateWithData(data as CFData, nil),
let properties = CGImageSourceCopyPropertiesAtIndex(source, 0, nil) as? [CFString: Any]
else {
return [:]
}
return properties
}
private func dimensions(for data: Data) -> (width: Int, height: Int)? {
let properties = self.properties(for: data)
guard
let width = properties[kCGImagePropertyPixelWidth] as? NSNumber,
let height = properties[kCGImagePropertyPixelHeight] as? NSNumber
else {
return nil
}
return (width.intValue, height.intValue)
}
@Test func `resizes landscape long edge to upload limit`() throws {
let source = try self.syntheticJPEG(width: 4000, height: 3000)
let output = try ChatImageProcessor.processForUpload(data: source)
let dimensions = try #require(self.dimensions(for: output))
#expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx)
#expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (4000.0 / 3000.0)) <= 0.02)
}
@Test func `resizes portrait long edge to upload limit`() throws {
let source = try self.syntheticJPEG(width: 3000, height: 4000)
let output = try ChatImageProcessor.processForUpload(data: source)
let dimensions = try #require(self.dimensions(for: output))
#expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx)
#expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (3000.0 / 4000.0)) <= 0.02)
}
@Test func `resizes narrow tall long edge to upload limit`() throws {
let source = try self.syntheticJPEG(width: 1080, height: 2400)
let output = try ChatImageProcessor.processForUpload(data: source)
let dimensions = try #require(self.dimensions(for: output))
#expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx)
#expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (1080.0 / 2400.0)) <= 0.02)
}
@Test func `small image is not upscaled`() throws {
let source = try self.syntheticJPEG(width: 400, height: 300)
let output = try ChatImageProcessor.processForUpload(data: source)
let dimensions = try #require(self.dimensions(for: output))
#expect(max(dimensions.width, dimensions.height) <= 400)
}
@Test func `output fits payload budget`() throws {
let source = try self.syntheticJPEG(width: 4000, height: 3000)
let output = try ChatImageProcessor.processForUpload(data: source)
#expect(output.count <= ChatImageProcessor.maxPayloadBytes)
}
@Test func `rejects non image data`() {
let garbage = Data("not an image".utf8)
#expect(throws: ChatImageProcessor.ProcessError.self) {
_ = try ChatImageProcessor.processForUpload(data: garbage)
}
}
@Test func `strips source metadata from output`() throws {
let source = try self.syntheticJPEG(width: 3000, height: 2000)
let output = try ChatImageProcessor.processForUpload(data: source)
let properties = self.properties(for: output)
let gps = properties[kCGImagePropertyGPSDictionary] as? [CFString: Any] ?? [:]
#expect(gps.isEmpty)
for needle in ["Leaky Lens", "LeakCorp", "Privacy-Leaker", "2026:04:20"] {
#expect(output.range(of: Data(needle.utf8)) == nil)
}
}
@Test func `flattens transparent sources to opaque JPEG`() throws {
let source = try self.syntheticPNGWithAlpha(width: 800, height: 600)
let output = try ChatImageProcessor.processForUpload(data: source)
let imageSource = try #require(CGImageSourceCreateWithData(output as CFData, nil))
let image = try #require(CGImageSourceCreateImageAtIndex(imageSource, 0, nil))
#expect([.none, .noneSkipFirst, .noneSkipLast].contains(image.alphaInfo))
}
}

View File

@@ -0,0 +1,109 @@
import CoreGraphics
import Foundation
import ImageIO
import OpenClawKit
import UniformTypeIdentifiers
import XCTest
@testable import OpenClawChatUI
private struct AttachmentProcessingTransport: OpenClawChatTransport {
func requestHistory(sessionKey _: String) async throws -> OpenClawChatHistoryPayload {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 1)
}
func sendMessage(
sessionKey _: String,
message _: String,
thinking _: String,
idempotencyKey _: String,
attachments _: [OpenClawChatAttachmentPayload]) async throws -> OpenClawChatSendResponse
{
throw NSError(domain: "ChatViewModelAttachmentTests", code: 2)
}
func requestHealth(timeoutMs _: Int) async throws -> Bool {
true
}
func events() -> AsyncStream<OpenClawChatTransportEvent> {
AsyncStream { _ in }
}
}
private func makeChatAttachmentJPEG(width: Int, height: Int) throws -> Data {
guard
let context = CGContext(
data: nil,
width: width,
height: height,
bitsPerComponent: 8,
bytesPerRow: width * 4,
space: CGColorSpaceCreateDeviceRGB(),
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue)
else {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 3)
}
context.setFillColor(CGColor(red: 0.2, green: 0.4, blue: 0.8, alpha: 1))
context.fill(CGRect(x: 0, y: 0, width: width, height: height))
context.setFillColor(CGColor(red: 0.9, green: 0.5, blue: 0.1, alpha: 1))
context.fill(CGRect(x: 0, y: 0, width: width / 2, height: height / 2))
guard let image = context.makeImage() else {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 4)
}
let data = NSMutableData()
guard let destination = CGImageDestinationCreateWithData(data, UTType.jpeg.identifier as CFString, 1, nil) else {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 5)
}
CGImageDestinationAddImage(destination, image, [kCGImageDestinationLossyCompressionQuality: 0.95] as CFDictionary)
guard CGImageDestinationFinalize(destination) else {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 6)
}
return data as Data
}
private func chatAttachmentDimensions(for data: Data) -> (width: Int, height: Int)? {
guard
let source = CGImageSourceCreateWithData(data as CFData, nil),
let properties = CGImageSourceCopyPropertiesAtIndex(source, 0, nil) as? [CFString: Any],
let width = properties[kCGImagePropertyPixelWidth] as? NSNumber,
let height = properties[kCGImagePropertyPixelHeight] as? NSNumber
else {
return nil
}
return (width.intValue, height.intValue)
}
final class ChatViewModelAttachmentTests: XCTestCase {
func testImageAttachmentsAreProcessedBeforeStaging() async throws {
let imageData = try makeChatAttachmentJPEG(width: 3000, height: 4000)
let viewModel = await MainActor.run {
OpenClawChatViewModel(sessionKey: "main", transport: AttachmentProcessingTransport())
}
await MainActor.run {
viewModel.addImageAttachment(data: imageData, fileName: "camera.heic", mimeType: "image/jpeg")
}
try await waitUntil("attachment processed") {
await MainActor.run { !viewModel.attachments.isEmpty || viewModel.errorText != nil }
}
let attachment = try await MainActor.run {
guard let attachment = viewModel.attachments.first else {
throw NSError(domain: "ChatViewModelAttachmentTests", code: 7)
}
return (attachment.fileName, attachment.mimeType, attachment.data)
}
let dimensions = try XCTUnwrap(chatAttachmentDimensions(for: attachment.2))
XCTAssertEqual(attachment.0, "camera.jpg")
XCTAssertEqual(attachment.1, "image/jpeg")
XCTAssertLessThanOrEqual(attachment.2.count, ChatImageProcessor.maxPayloadBytes)
XCTAssertLessThanOrEqual(max(dimensions.width, dimensions.height), ChatImageProcessor.maxLongEdgePx)
let errorText = await MainActor.run { viewModel.errorText }
XCTAssertNil(errorText)
}
}