import AVFoundation import CoreGraphics import Foundation import OpenClawIPC import OpenClawKit import OSLog actor CameraCaptureService { struct CameraDeviceInfo: Encodable { let id: String let name: String let position: String let deviceType: String } enum CameraError: LocalizedError { case cameraUnavailable case microphoneUnavailable case permissionDenied(kind: String) case captureFailed(String) case exportFailed(String) var errorDescription: String? { switch self { case .cameraUnavailable: "Camera unavailable" case .microphoneUnavailable: "Microphone unavailable" case let .permissionDenied(kind): "\(kind) permission denied" case let .captureFailed(msg): msg case let .exportFailed(msg): msg } } } private let logger = Logger(subsystem: "ai.openclaw", category: "camera") func listDevices() -> [CameraDeviceInfo] { Self.availableCameras().map { device in CameraDeviceInfo( id: device.uniqueID, name: device.localizedName, position: Self.positionLabel(device.position), deviceType: device.deviceType.rawValue) } } func snap( facing: CameraFacing?, maxWidth: Int?, quality: Double?, deviceId: String?, delayMs: Int) async throws -> (data: Data, size: CGSize) { let facing = facing ?? .front let normalized = Self.normalizeSnap(maxWidth: maxWidth, quality: quality) let maxWidth = normalized.maxWidth let quality = normalized.quality let delayMs = max(0, delayMs) let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines) try await self.ensureAccess(for: .video) let prepared = try CameraCapturePipelineSupport.preparePhotoSession( preferFrontCamera: facing == .front, deviceId: deviceId, pickCamera: { preferFrontCamera, deviceId in Self.pickCamera(facing: preferFrontCamera ? .front : .back, deviceId: deviceId) }, cameraUnavailableError: CameraError.cameraUnavailable, mapSetupError: { setupError in CameraError.captureFailed(setupError.localizedDescription) }) let session = prepared.session let device = prepared.device let output = prepared.output session.startRunning() defer { session.stopRunning() } await CameraCapturePipelineSupport.warmUpCaptureSession() await self.waitForExposureAndWhiteBalance(device: device) await self.sleepDelayMs(delayMs) var delegate: PhotoCaptureDelegate? let rawData: Data = try await withCheckedThrowingContinuation { continuation in let captureDelegate = PhotoCaptureDelegate(continuation) delegate = captureDelegate output.capturePhoto( with: CameraCapturePipelineSupport.makePhotoSettings(output: output), delegate: captureDelegate) } withExtendedLifetime(delegate) {} let res: (data: Data, widthPx: Int, heightPx: Int) do { res = try PhotoCapture.transcodeJPEGForGateway( rawData: rawData, maxWidthPx: maxWidth, quality: quality) } catch { throw CameraError.captureFailed(error.localizedDescription) } return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx)) } func clip( facing: CameraFacing?, durationMs: Int?, includeAudio: Bool, deviceId: String?, outPath: String?) async throws -> (path: String, durationMs: Int, hasAudio: Bool) { let facing = facing ?? .front let durationMs = Self.clampDurationMs(durationMs) let deviceId = deviceId?.trimmingCharacters(in: .whitespacesAndNewlines) try await self.ensureAccess(for: .video) if includeAudio { try await self.ensureAccess(for: .audio) } let prepared = try await CameraCapturePipelineSupport.prepareWarmMovieSession( preferFrontCamera: facing == .front, deviceId: deviceId, includeAudio: includeAudio, durationMs: durationMs, pickCamera: { preferFrontCamera, deviceId in Self.pickCamera(facing: preferFrontCamera ? .front : .back, deviceId: deviceId) }, cameraUnavailableError: CameraError.cameraUnavailable, mapSetupError: Self.mapMovieSetupError) let session = prepared.session let output = prepared.output defer { session.stopRunning() } let tmpMovURL = FileManager().temporaryDirectory .appendingPathComponent("openclaw-camera-\(UUID().uuidString).mov") defer { try? FileManager().removeItem(at: tmpMovURL) } let outputURL: URL = { if let outPath, !outPath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { return URL(fileURLWithPath: outPath) } return FileManager().temporaryDirectory .appendingPathComponent("openclaw-camera-\(UUID().uuidString).mp4") }() // Ensure we don't fail exporting due to an existing file. try? FileManager().removeItem(at: outputURL) let logger = self.logger var delegate: MovieFileDelegate? let recordedURL: URL = try await withCheckedThrowingContinuation { cont in let d = MovieFileDelegate(cont, logger: logger) delegate = d output.startRecording(to: tmpMovURL, recordingDelegate: d) } withExtendedLifetime(delegate) {} try await Self.exportToMP4(inputURL: recordedURL, outputURL: outputURL) return (path: outputURL.path, durationMs: durationMs, hasAudio: includeAudio) } private func ensureAccess(for mediaType: AVMediaType) async throws { if await !(CameraAuthorization.isAuthorized(for: mediaType)) { throw CameraError.permissionDenied(kind: mediaType == .video ? "Camera" : "Microphone") } } private nonisolated static func availableCameras() -> [AVCaptureDevice] { var types: [AVCaptureDevice.DeviceType] = [ .builtInWideAngleCamera, .continuityCamera, ] if let external = externalDeviceType() { types.append(external) } let session = AVCaptureDevice.DiscoverySession( deviceTypes: types, mediaType: .video, position: .unspecified) return session.devices } private nonisolated static func externalDeviceType() -> AVCaptureDevice.DeviceType? { if #available(macOS 14.0, *) { return .external } // Use raw value to avoid deprecated symbol in the SDK. return AVCaptureDevice.DeviceType(rawValue: "AVCaptureDeviceTypeExternalUnknown") } private nonisolated static func pickCamera( facing: CameraFacing, deviceId: String?) -> AVCaptureDevice? { if let deviceId, !deviceId.isEmpty { if let match = availableCameras().first(where: { $0.uniqueID == deviceId }) { return match } } let position: AVCaptureDevice.Position = (facing == .front) ? .front : .back if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: position) { return device } // Many macOS cameras report `unspecified` position; fall back to any default. return AVCaptureDevice.default(for: .video) } private nonisolated static func clampQuality(_ quality: Double?) -> Double { let q = quality ?? 0.9 return min(1.0, max(0.05, q)) } nonisolated static func normalizeSnap(maxWidth: Int?, quality: Double?) -> (maxWidth: Int, quality: Double) { // Default to a reasonable max width to keep downstream payload sizes manageable. // If you need full-res, explicitly request a larger maxWidth. let maxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? 1600 let quality = Self.clampQuality(quality) return (maxWidth: maxWidth, quality: quality) } private nonisolated static func clampDurationMs(_ ms: Int?) -> Int { let v = ms ?? 3000 return min(60000, max(250, v)) } private nonisolated static func mapMovieSetupError(_ setupError: CameraSessionConfigurationError) -> CameraError { CameraCapturePipelineSupport.mapMovieSetupError( setupError, microphoneUnavailableError: .microphoneUnavailable, captureFailed: { .captureFailed($0) }) } private nonisolated static func exportToMP4(inputURL: URL, outputURL: URL) async throws { let asset = AVURLAsset(url: inputURL) guard let export = AVAssetExportSession(asset: asset, presetName: AVAssetExportPresetMediumQuality) else { throw CameraError.exportFailed("Failed to create export session") } export.shouldOptimizeForNetworkUse = true if #available(macOS 15.0, *) { do { try await export.export(to: outputURL, as: .mp4) return } catch { throw CameraError.exportFailed(error.localizedDescription) } } else { export.outputURL = outputURL export.outputFileType = .mp4 try await withCheckedThrowingContinuation(isolation: nil) { (cont: CheckedContinuation) in export.exportAsynchronously { cont.resume(returning: ()) } } switch export.status { case .completed: return case .failed: throw CameraError.exportFailed(export.error?.localizedDescription ?? "export failed") case .cancelled: throw CameraError.exportFailed("export cancelled") default: throw CameraError.exportFailed("export did not complete (\(export.status.rawValue))") } } } private func waitForExposureAndWhiteBalance(device: AVCaptureDevice) async { let stepNs: UInt64 = 50_000_000 let maxSteps = 30 // ~1.5s for _ in 0.. 0 else { return } let ns = UInt64(min(delayMs, 10000)) * 1_000_000 try? await Task.sleep(nanoseconds: ns) } private nonisolated static func positionLabel(_ position: AVCaptureDevice.Position) -> String { CameraCapturePipelineSupport.positionLabel(position) } } private final class PhotoCaptureDelegate: NSObject, AVCapturePhotoCaptureDelegate { private var cont: CheckedContinuation? private var didResume = false init(_ cont: CheckedContinuation) { self.cont = cont } func photoOutput( _ output: AVCapturePhotoOutput, didFinishProcessingPhoto photo: AVCapturePhoto, error: Error?) { guard !self.didResume, let cont else { return } self.didResume = true self.cont = nil if let error { cont.resume(throwing: error) return } guard let data = photo.fileDataRepresentation() else { cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("No photo data")) return } if data.isEmpty { cont.resume(throwing: CameraCaptureService.CameraError.captureFailed("Photo data empty")) return } cont.resume(returning: data) } func photoOutput( _ output: AVCapturePhotoOutput, didFinishCaptureFor resolvedSettings: AVCaptureResolvedPhotoSettings, error: Error?) { guard let error else { return } guard !self.didResume, let cont else { return } self.didResume = true self.cont = nil cont.resume(throwing: error) } } private final class MovieFileDelegate: NSObject, AVCaptureFileOutputRecordingDelegate { private var cont: CheckedContinuation? private let logger: Logger init(_ cont: CheckedContinuation, logger: Logger) { self.cont = cont self.logger = logger } func fileOutput( _ output: AVCaptureFileOutput, didFinishRecordingTo outputFileURL: URL, from connections: [AVCaptureConnection], error: Error?) { guard let cont else { return } self.cont = nil if let error { let ns = error as NSError if ns.domain == AVFoundationErrorDomain, ns.code == AVError.maximumDurationReached.rawValue { cont.resume(returning: outputFileURL) return } self.logger.error("camera record failed: \(error.localizedDescription, privacy: .public)") cont.resume(throwing: error) return } cont.resume(returning: outputFileURL) } }