diff --git a/CHANGELOG.md b/CHANGELOG.md index 3435be75229..3cd429f34ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- iOS/chat: resize PhotosPicker image attachments to capped JPEGs before staging and sending, stripping source metadata and keeping oversized camera photos under the chat upload budget. Fixes #68524. Thanks @BunsDev. - Codex startup: treat selectable configured OpenAI agent models as Codex runtime requirements during plugin auto-enable, startup planning, and doctor install repair, so Anthropic-primary configs can still switch to OpenAI/Codex cleanly. - Agents: preserve source-reply delivery metadata when merging tool-returned media into the final reply, keeping message-tool-only replies deliverable and mirrored. Thanks @pashpashpash and @vincentkoc. - macOS/companion: require system TLS trust before pinning a first-use direct `wss://` gateway certificate and honor `gateway.remote.tlsFingerprint` as the explicit pin for remote node-mode sessions, so fresh endpoints fail closed when macOS cannot trust the certificate unless configured out of band. Fixes #50642. Thanks @BunsDev. diff --git a/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatViewModel.swift b/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatViewModel.swift index d5601c86415..ac8fbb2d2ab 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatViewModel.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatViewModel.swift @@ -17,6 +17,7 @@ private let chatUILogger = Logger(subsystem: "ai.openclaw", category: "OpenClawC // swiftlint:disable:next type_body_length public final class OpenClawChatViewModel { public static let defaultModelSelectionID = "__default__" + private static let maxAttachmentBytes = 5_000_000 public private(set) var messages: [OpenClawChatMessage] = [] public var input: String = "" @@ -1298,11 +1299,6 @@ public final class OpenClawChatViewModel { } private func addImageAttachment(url: URL?, data: Data, fileName: String, mimeType: String) async { - if data.count > 5_000_000 { - self.errorText = "Attachment \(fileName) exceeds 5 MB limit" - return - } - let uti: UTType = { if let url { return UTType(filenameExtension: url.pathExtension) ?? .data @@ -1314,13 +1310,33 @@ public final class OpenClawChatViewModel { return } - let preview = Self.previewImage(data: data) + let processed: Data + do { + processed = try await Task.detached(priority: .userInitiated) { + try ChatImageProcessor.processForUpload(data: data) + }.value + } catch { + self.errorText = "Could not process \(fileName): \(error.localizedDescription)" + return + } + + if processed.count > Self.maxAttachmentBytes { + self.errorText = "Attachment \(fileName) exceeds 5 MB limit after resizing" + return + } + + let outputFileName: String = { + let baseName = (fileName as NSString).deletingPathExtension + return baseName.isEmpty ? "image.jpg" : "\(baseName).jpg" + }() + + let preview = Self.previewImage(data: processed) self.attachments.append( OpenClawPendingAttachment( url: url, - data: data, - fileName: fileName, - mimeType: mimeType, + data: processed, + fileName: outputFileName, + mimeType: "image/jpeg", preview: preview)) } diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatImageProcessor.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatImageProcessor.swift new file mode 100644 index 00000000000..96db5a8c02e --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/ChatImageProcessor.swift @@ -0,0 +1,44 @@ +import Foundation + +/// Chat-specific image upload policy built on the shared JPEG transcoder. +public enum ChatImageProcessor { + public static let maxLongEdgePx = 1600 + public static let jpegQuality = 0.8 + public static let maxPayloadBytes = 3_500_000 + + public enum ProcessError: Error, LocalizedError, Sendable { + case notAnImage + case decodeFailed + case encodeFailed + + public var errorDescription: String? { + switch self { + case .notAnImage: + "The data is not a recognizable image." + case .decodeFailed: + "The image could not be decoded." + case .encodeFailed: + "The image could not be resized to fit the chat upload limit." + } + } + } + + public static func processForUpload(data: Data) throws -> Data { + do { + let result = try JPEGTranscoder.transcodeToJPEG( + imageData: data, + maxLongEdgePx: self.maxLongEdgePx, + quality: self.jpegQuality, + maxBytes: self.maxPayloadBytes) + return result.data + } catch JPEGTranscodeError.decodeFailed { + throw ProcessError.notAnImage + } catch JPEGTranscodeError.propertiesMissing { + throw ProcessError.decodeFailed + } catch JPEGTranscodeError.sizeLimitExceeded { + throw ProcessError.encodeFailed + } catch { + throw ProcessError.encodeFailed + } + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/JPEGTranscoder.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/JPEGTranscoder.swift index f4b1cb95125..1eafcbcc6e8 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/JPEGTranscoder.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/JPEGTranscoder.swift @@ -37,6 +37,26 @@ public struct JPEGTranscoder: Sendable { maxWidthPx: Int?, quality: Double, maxBytes: Int? = nil) throws -> (data: Data, widthPx: Int, heightPx: Int) + { + try self.transcodeToJPEG( + imageData: imageData, + maxWidthPx: maxWidthPx, + maxLongEdgePx: nil, + quality: quality, + maxBytes: maxBytes) + } + + /// Re-encodes image data to JPEG, optionally downscaling so the *oriented* longest edge is <= `maxLongEdgePx`. + /// + /// When `maxLongEdgePx` is provided it takes precedence over `maxWidthPx`. + /// - Important: This normalizes EXIF orientation (the output pixels are rotated if needed; orientation tag is not + /// relied on). + public static func transcodeToJPEG( + imageData: Data, + maxWidthPx: Int? = nil, + maxLongEdgePx: Int?, + quality: Double, + maxBytes: Int? = nil) throws -> (data: Data, widthPx: Int, heightPx: Int) { guard let src = CGImageSourceCreateWithData(imageData as CFData, nil) else { throw JPEGTranscodeError.decodeFailed @@ -63,6 +83,10 @@ public struct JPEGTranscoder: Sendable { let maxDim = max(orientedWidth, orientedHeight) var targetMaxPixelSize: Int = { + if let maxLongEdgePx, maxLongEdgePx > 0 { + guard maxDim > maxLongEdgePx else { return maxDim } // never upscale + return maxLongEdgePx + } guard let maxWidthPx, maxWidthPx > 0 else { return maxDim } guard orientedWidth > maxWidthPx else { return maxDim } // never upscale @@ -81,6 +105,7 @@ public struct JPEGTranscoder: Sendable { guard let img = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOpts as CFDictionary) else { throw JPEGTranscodeError.decodeFailed } + let opaqueImage = Self.flattenAlphaIfNeeded(img) let out = NSMutableData() guard let dest = CGImageDestinationCreateWithData(out, UTType.jpeg.identifier as CFString, 1, nil) else { @@ -88,12 +113,12 @@ public struct JPEGTranscoder: Sendable { } let q = self.clampQuality(quality) let encodeProps = [kCGImageDestinationLossyCompressionQuality: q] as CFDictionary - CGImageDestinationAddImage(dest, img, encodeProps) + CGImageDestinationAddImage(dest, opaqueImage, encodeProps) guard CGImageDestinationFinalize(dest) else { throw JPEGTranscodeError.encodeFailed } - return (out as Data, img.width, img.height) + return (out as Data, opaqueImage.width, opaqueImage.height) } guard let maxBytes, maxBytes > 0 else { @@ -132,4 +157,34 @@ public struct JPEGTranscoder: Sendable { return best } + + /// JPEG cannot store alpha. Flatten transparent sources over white before encoding so ImageIO does not composite + /// transparent pixels onto black by default. + private static func flattenAlphaIfNeeded(_ image: CGImage) -> CGImage { + switch image.alphaInfo { + case .none, .noneSkipFirst, .noneSkipLast: + return image + default: + break + } + + guard + let context = CGContext( + data: nil, + width: image.width, + height: image.height, + bitsPerComponent: 8, + bytesPerRow: 0, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue) + else { + return image + } + + let rect = CGRect(x: 0, y: 0, width: image.width, height: image.height) + context.setFillColor(CGColor(red: 1, green: 1, blue: 1, alpha: 1)) + context.fill(rect) + context.draw(image, in: rect) + return context.makeImage() ?? image + } } diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatImageProcessorTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatImageProcessorTests.swift new file mode 100644 index 00000000000..c30d0f4e715 --- /dev/null +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatImageProcessorTests.swift @@ -0,0 +1,187 @@ +import CoreGraphics +import Foundation +import ImageIO +import Testing +import UniformTypeIdentifiers +@testable import OpenClawKit + +struct ChatImageProcessorTests { + private func syntheticJPEG(width: Int, height: Int) throws -> Data { + guard + let context = CGContext( + data: nil, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: width * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) + else { + throw NSError(domain: "ChatImageProcessorTests", code: 1) + } + + context.setFillColor(CGColor(red: 0.8, green: 0.2, blue: 0.4, alpha: 1)) + context.fill(CGRect(x: 0, y: 0, width: width, height: height)) + context.setFillColor(CGColor(red: 0.1, green: 0.7, blue: 0.3, alpha: 1)) + context.fill(CGRect(x: 0, y: 0, width: width / 2, height: height / 2)) + + guard let image = context.makeImage() else { + throw NSError(domain: "ChatImageProcessorTests", code: 2) + } + + let data = NSMutableData() + guard let destination = CGImageDestinationCreateWithData(data, UTType.jpeg.identifier as CFString, 1, nil) + else { + throw NSError(domain: "ChatImageProcessorTests", code: 3) + } + + let properties: [CFString: Any] = [ + kCGImageDestinationLossyCompressionQuality: 0.95, + kCGImagePropertyExifDictionary: [ + kCGImagePropertyExifDateTimeOriginal: "2026:04:20 16:30:00", + kCGImagePropertyExifLensModel: "Leaky Lens 50mm f/1.4", + ] as CFDictionary, + kCGImagePropertyGPSDictionary: [ + kCGImagePropertyGPSLatitude: 60.02, + kCGImagePropertyGPSLatitudeRef: "N", + kCGImagePropertyGPSLongitude: 10.95, + kCGImagePropertyGPSLongitudeRef: "E", + ] as CFDictionary, + kCGImagePropertyTIFFDictionary: [ + kCGImagePropertyTIFFMake: "LeakCorp", + kCGImagePropertyTIFFModel: "Privacy-Leaker-1", + ] as CFDictionary, + ] + CGImageDestinationAddImage(destination, image, properties as CFDictionary) + guard CGImageDestinationFinalize(destination) else { + throw NSError(domain: "ChatImageProcessorTests", code: 4) + } + return data as Data + } + + private func syntheticPNGWithAlpha(width: Int, height: Int) throws -> Data { + guard + let context = CGContext( + data: nil, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: width * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) + else { + throw NSError(domain: "ChatImageProcessorTests", code: 5) + } + + context.clear(CGRect(x: 0, y: 0, width: width, height: height)) + context.setFillColor(CGColor(red: 1, green: 0, blue: 0, alpha: 1)) + context.fill(CGRect(x: width / 4, y: height / 4, width: width / 2, height: height / 2)) + + guard let image = context.makeImage() else { + throw NSError(domain: "ChatImageProcessorTests", code: 6) + } + + let data = NSMutableData() + guard let destination = CGImageDestinationCreateWithData(data, UTType.png.identifier as CFString, 1, nil) + else { + throw NSError(domain: "ChatImageProcessorTests", code: 7) + } + CGImageDestinationAddImage(destination, image, nil) + guard CGImageDestinationFinalize(destination) else { + throw NSError(domain: "ChatImageProcessorTests", code: 8) + } + return data as Data + } + + private func properties(for data: Data) -> [CFString: Any] { + guard + let source = CGImageSourceCreateWithData(data as CFData, nil), + let properties = CGImageSourceCopyPropertiesAtIndex(source, 0, nil) as? [CFString: Any] + else { + return [:] + } + return properties + } + + private func dimensions(for data: Data) -> (width: Int, height: Int)? { + let properties = self.properties(for: data) + guard + let width = properties[kCGImagePropertyPixelWidth] as? NSNumber, + let height = properties[kCGImagePropertyPixelHeight] as? NSNumber + else { + return nil + } + return (width.intValue, height.intValue) + } + + @Test func `resizes landscape long edge to upload limit`() throws { + let source = try self.syntheticJPEG(width: 4000, height: 3000) + let output = try ChatImageProcessor.processForUpload(data: source) + let dimensions = try #require(self.dimensions(for: output)) + + #expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx) + #expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (4000.0 / 3000.0)) <= 0.02) + } + + @Test func `resizes portrait long edge to upload limit`() throws { + let source = try self.syntheticJPEG(width: 3000, height: 4000) + let output = try ChatImageProcessor.processForUpload(data: source) + let dimensions = try #require(self.dimensions(for: output)) + + #expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx) + #expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (3000.0 / 4000.0)) <= 0.02) + } + + @Test func `resizes narrow tall long edge to upload limit`() throws { + let source = try self.syntheticJPEG(width: 1080, height: 2400) + let output = try ChatImageProcessor.processForUpload(data: source) + let dimensions = try #require(self.dimensions(for: output)) + + #expect(max(dimensions.width, dimensions.height) <= ChatImageProcessor.maxLongEdgePx) + #expect(abs((Double(dimensions.width) / Double(dimensions.height)) - (1080.0 / 2400.0)) <= 0.02) + } + + @Test func `small image is not upscaled`() throws { + let source = try self.syntheticJPEG(width: 400, height: 300) + let output = try ChatImageProcessor.processForUpload(data: source) + let dimensions = try #require(self.dimensions(for: output)) + + #expect(max(dimensions.width, dimensions.height) <= 400) + } + + @Test func `output fits payload budget`() throws { + let source = try self.syntheticJPEG(width: 4000, height: 3000) + let output = try ChatImageProcessor.processForUpload(data: source) + + #expect(output.count <= ChatImageProcessor.maxPayloadBytes) + } + + @Test func `rejects non image data`() { + let garbage = Data("not an image".utf8) + + #expect(throws: ChatImageProcessor.ProcessError.self) { + _ = try ChatImageProcessor.processForUpload(data: garbage) + } + } + + @Test func `strips source metadata from output`() throws { + let source = try self.syntheticJPEG(width: 3000, height: 2000) + let output = try ChatImageProcessor.processForUpload(data: source) + let properties = self.properties(for: output) + let gps = properties[kCGImagePropertyGPSDictionary] as? [CFString: Any] ?? [:] + + #expect(gps.isEmpty) + for needle in ["Leaky Lens", "LeakCorp", "Privacy-Leaker", "2026:04:20"] { + #expect(output.range(of: Data(needle.utf8)) == nil) + } + } + + @Test func `flattens transparent sources to opaque JPEG`() throws { + let source = try self.syntheticPNGWithAlpha(width: 800, height: 600) + let output = try ChatImageProcessor.processForUpload(data: source) + let imageSource = try #require(CGImageSourceCreateWithData(output as CFData, nil)) + let image = try #require(CGImageSourceCreateImageAtIndex(imageSource, 0, nil)) + + #expect([.none, .noneSkipFirst, .noneSkipLast].contains(image.alphaInfo)) + } +} diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatViewModelAttachmentTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatViewModelAttachmentTests.swift new file mode 100644 index 00000000000..74f1c902282 --- /dev/null +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatViewModelAttachmentTests.swift @@ -0,0 +1,109 @@ +import CoreGraphics +import Foundation +import ImageIO +import OpenClawKit +import UniformTypeIdentifiers +import XCTest +@testable import OpenClawChatUI + +private struct AttachmentProcessingTransport: OpenClawChatTransport { + func requestHistory(sessionKey _: String) async throws -> OpenClawChatHistoryPayload { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 1) + } + + func sendMessage( + sessionKey _: String, + message _: String, + thinking _: String, + idempotencyKey _: String, + attachments _: [OpenClawChatAttachmentPayload]) async throws -> OpenClawChatSendResponse + { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 2) + } + + func requestHealth(timeoutMs _: Int) async throws -> Bool { + true + } + + func events() -> AsyncStream { + AsyncStream { _ in } + } +} + +private func makeChatAttachmentJPEG(width: Int, height: Int) throws -> Data { + guard + let context = CGContext( + data: nil, + width: width, + height: height, + bitsPerComponent: 8, + bytesPerRow: width * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) + else { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 3) + } + + context.setFillColor(CGColor(red: 0.2, green: 0.4, blue: 0.8, alpha: 1)) + context.fill(CGRect(x: 0, y: 0, width: width, height: height)) + context.setFillColor(CGColor(red: 0.9, green: 0.5, blue: 0.1, alpha: 1)) + context.fill(CGRect(x: 0, y: 0, width: width / 2, height: height / 2)) + + guard let image = context.makeImage() else { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 4) + } + + let data = NSMutableData() + guard let destination = CGImageDestinationCreateWithData(data, UTType.jpeg.identifier as CFString, 1, nil) else { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 5) + } + CGImageDestinationAddImage(destination, image, [kCGImageDestinationLossyCompressionQuality: 0.95] as CFDictionary) + guard CGImageDestinationFinalize(destination) else { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 6) + } + return data as Data +} + +private func chatAttachmentDimensions(for data: Data) -> (width: Int, height: Int)? { + guard + let source = CGImageSourceCreateWithData(data as CFData, nil), + let properties = CGImageSourceCopyPropertiesAtIndex(source, 0, nil) as? [CFString: Any], + let width = properties[kCGImagePropertyPixelWidth] as? NSNumber, + let height = properties[kCGImagePropertyPixelHeight] as? NSNumber + else { + return nil + } + return (width.intValue, height.intValue) +} + +final class ChatViewModelAttachmentTests: XCTestCase { + func testImageAttachmentsAreProcessedBeforeStaging() async throws { + let imageData = try makeChatAttachmentJPEG(width: 3000, height: 4000) + let viewModel = await MainActor.run { + OpenClawChatViewModel(sessionKey: "main", transport: AttachmentProcessingTransport()) + } + + await MainActor.run { + viewModel.addImageAttachment(data: imageData, fileName: "camera.heic", mimeType: "image/jpeg") + } + + try await waitUntil("attachment processed") { + await MainActor.run { !viewModel.attachments.isEmpty || viewModel.errorText != nil } + } + + let attachment = try await MainActor.run { + guard let attachment = viewModel.attachments.first else { + throw NSError(domain: "ChatViewModelAttachmentTests", code: 7) + } + return (attachment.fileName, attachment.mimeType, attachment.data) + } + let dimensions = try XCTUnwrap(chatAttachmentDimensions(for: attachment.2)) + + XCTAssertEqual(attachment.0, "camera.jpg") + XCTAssertEqual(attachment.1, "image/jpeg") + XCTAssertLessThanOrEqual(attachment.2.count, ChatImageProcessor.maxPayloadBytes) + XCTAssertLessThanOrEqual(max(dimensions.width, dimensions.height), ChatImageProcessor.maxLongEdgePx) + let errorText = await MainActor.run { viewModel.errorText } + XCTAssertNil(errorText) + } +}