diff --git a/apps/android/app/src/main/java/ai/openclaw/android/node/CameraCaptureManager.kt b/apps/android/app/src/main/java/ai/openclaw/android/node/CameraCaptureManager.kt index c4d60cd17fd..0dc6f2b6955 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/node/CameraCaptureManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/node/CameraCaptureManager.kt @@ -1,13 +1,16 @@ package ai.openclaw.android.node import android.Manifest -import android.content.Context import android.annotation.SuppressLint +import android.content.Context import android.graphics.Bitmap import android.graphics.BitmapFactory import android.graphics.Matrix -import android.util.Base64 import android.content.pm.PackageManager +import android.hardware.camera2.CameraCharacteristics +import android.util.Base64 +import androidx.camera.camera2.interop.Camera2CameraInfo +import androidx.camera.core.CameraInfo import androidx.exifinterface.media.ExifInterface import androidx.lifecycle.LifecycleOwner import androidx.camera.core.CameraSelector @@ -44,6 +47,12 @@ import kotlin.coroutines.resumeWithException class CameraCaptureManager(private val context: Context) { data class Payload(val payloadJson: String) data class FilePayload(val file: File, val durationMs: Long, val hasAudio: Boolean) + data class CameraDeviceInfo( + val id: String, + val name: String, + val position: String, + val deviceType: String, + ) @Volatile private var lifecycleOwner: LifecycleOwner? = null @Volatile private var permissionRequester: PermissionRequester? = null @@ -56,6 +65,14 @@ class CameraCaptureManager(private val context: Context) { permissionRequester = requester } + suspend fun listDevices(): List = + withContext(Dispatchers.Main) { + val provider = context.cameraProvider() + provider.availableCameraInfos + .mapNotNull { info -> cameraDeviceInfoOrNull(info) } + .sortedBy { it.id } + } + private suspend fun ensureCameraPermission() { val granted = checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED if (granted) return @@ -88,11 +105,11 @@ class CameraCaptureManager(private val context: Context) { val facing = parseFacing(params) ?: "front" val quality = (parseQuality(params) ?: 0.95).coerceIn(0.1, 1.0) val maxWidth = parseMaxWidth(params) ?: 1600 + val deviceId = parseDeviceId(params) val provider = context.cameraProvider() val capture = ImageCapture.Builder().build() - val selector = - if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA + val selector = resolveCameraSelector(provider, facing, deviceId) provider.unbindAll() provider.bindToLifecycle(owner, selector, capture) @@ -154,9 +171,10 @@ class CameraCaptureManager(private val context: Context) { val facing = parseFacing(params) ?: "front" val durationMs = (parseDurationMs(params) ?: 3_000).coerceIn(200, 60_000) val includeAudio = parseIncludeAudio(params) ?: true + val deviceId = parseDeviceId(params) if (includeAudio) ensureMicPermission() - android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio") + android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio deviceId=${deviceId ?: "-"}") val provider = context.cameraProvider() android.util.Log.w("CameraCaptureManager", "clip: got camera provider") @@ -168,8 +186,7 @@ class CameraCaptureManager(private val context: Context) { ) .build() val videoCapture = VideoCapture.withOutput(recorder) - val selector = - if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA + val selector = resolveCameraSelector(provider, facing, deviceId) // CameraX requires a Preview use case for the camera to start producing frames; // without it, the encoder may get no data (ERROR_NO_VALID_DATA). @@ -308,6 +325,12 @@ class CameraCaptureManager(private val context: Context) { private fun parseDurationMs(params: JsonObject?): Int? = readPrimitive(params, "durationMs")?.contentOrNull?.toIntOrNull() + private fun parseDeviceId(params: JsonObject?): String? = + readPrimitive(params, "deviceId") + ?.contentOrNull + ?.trim() + ?.takeIf { it.isNotEmpty() } + private fun parseIncludeAudio(params: JsonObject?): Boolean? { val value = readPrimitive(params, "includeAudio")?.contentOrNull?.trim()?.lowercase() return when (value) { @@ -318,6 +341,56 @@ class CameraCaptureManager(private val context: Context) { } private fun Context.mainExecutor(): Executor = ContextCompat.getMainExecutor(this) + + private fun resolveCameraSelector( + provider: ProcessCameraProvider, + facing: String, + deviceId: String?, + ): CameraSelector { + if (deviceId.isNullOrEmpty()) { + return if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA + } + val availableIds = provider.availableCameraInfos.mapNotNull { cameraIdOrNull(it) }.toSet() + if (!availableIds.contains(deviceId)) { + throw IllegalStateException("INVALID_REQUEST: unknown camera deviceId '$deviceId'") + } + return CameraSelector.Builder() + .addCameraFilter { infos -> infos.filter { cameraIdOrNull(it) == deviceId } } + .build() + } + + private fun cameraDeviceInfoOrNull(info: CameraInfo): CameraDeviceInfo? { + val cameraId = cameraIdOrNull(info) ?: return null + val lensFacing = + runCatching { + Camera2CameraInfo.from(info).getCameraCharacteristic(CameraCharacteristics.LENS_FACING) + }.getOrNull() + val position = + when (lensFacing) { + CameraCharacteristics.LENS_FACING_FRONT -> "front" + CameraCharacteristics.LENS_FACING_BACK -> "back" + CameraCharacteristics.LENS_FACING_EXTERNAL -> "external" + else -> "unspecified" + } + val deviceType = + if (lensFacing == CameraCharacteristics.LENS_FACING_EXTERNAL) "external" else "builtIn" + val name = + when (position) { + "front" -> "Front Camera" + "back" -> "Back Camera" + "external" -> "External Camera" + else -> "Camera $cameraId" + } + return CameraDeviceInfo( + id = cameraId, + name = name, + position = position, + deviceType = deviceType, + ) + } + + private fun cameraIdOrNull(info: CameraInfo): String? = + runCatching { Camera2CameraInfo.from(info).cameraId }.getOrNull() } private suspend fun Context.cameraProvider(): ProcessCameraProvider = diff --git a/apps/android/app/src/main/java/ai/openclaw/android/node/CameraHandler.kt b/apps/android/app/src/main/java/ai/openclaw/android/node/CameraHandler.kt index ff1b8468cd6..0ee22849a62 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/node/CameraHandler.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/node/CameraHandler.kt @@ -9,7 +9,10 @@ import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.withContext import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonArray +import kotlinx.serialization.json.buildJsonObject import kotlinx.serialization.json.contentOrNull +import kotlinx.serialization.json.put internal const val CAMERA_CLIP_MAX_RAW_BYTES: Long = 18L * 1024L * 1024L @@ -24,6 +27,33 @@ class CameraHandler( private val triggerCameraFlash: () -> Unit, private val invokeErrorFromThrowable: (err: Throwable) -> Pair, ) { + suspend fun handleList(_paramsJson: String?): GatewaySession.InvokeResult { + return try { + val devices = camera.listDevices() + val payload = + buildJsonObject { + put( + "devices", + buildJsonArray { + devices.forEach { device -> + add( + buildJsonObject { + put("id", JsonPrimitive(device.id)) + put("name", JsonPrimitive(device.name)) + put("position", JsonPrimitive(device.position)) + put("deviceType", JsonPrimitive(device.deviceType)) + }, + ) + } + }, + ) + }.toString() + GatewaySession.InvokeResult.ok(payload) + } catch (err: Throwable) { + val (code, message) = invokeErrorFromThrowable(err) + GatewaySession.InvokeResult.error(code = code, message = message) + } + } suspend fun handleSnap(paramsJson: String?): GatewaySession.InvokeResult { val logFile = if (BuildConfig.DEBUG) java.io.File(appContext.cacheDir, "camera_debug.log") else null diff --git a/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeCommandRegistry.kt b/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeCommandRegistry.kt index 8d37794df4c..823d312a212 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeCommandRegistry.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeCommandRegistry.kt @@ -62,6 +62,11 @@ object InvokeCommandRegistry { name = OpenClawScreenCommand.Record.rawValue, requiresForeground = true, ), + InvokeCommandSpec( + name = OpenClawCameraCommand.List.rawValue, + requiresForeground = true, + availability = InvokeCommandAvailability.CameraEnabled, + ), InvokeCommandSpec( name = OpenClawCameraCommand.Snap.rawValue, requiresForeground = true, diff --git a/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeDispatcher.kt b/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeDispatcher.kt index fb88aef03a8..8ef070f25a3 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeDispatcher.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/node/InvokeDispatcher.kt @@ -112,6 +112,7 @@ class InvokeDispatcher( } // Camera commands + OpenClawCameraCommand.List.rawValue -> cameraHandler.handleList(paramsJson) OpenClawCameraCommand.Snap.rawValue -> cameraHandler.handleSnap(paramsJson) OpenClawCameraCommand.Clip.rawValue -> cameraHandler.handleClip(paramsJson) diff --git a/apps/android/app/src/main/java/ai/openclaw/android/protocol/OpenClawProtocolConstants.kt b/apps/android/app/src/main/java/ai/openclaw/android/protocol/OpenClawProtocolConstants.kt index 7dd48941331..1a97a546a42 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/protocol/OpenClawProtocolConstants.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/protocol/OpenClawProtocolConstants.kt @@ -35,6 +35,7 @@ enum class OpenClawCanvasA2UICommand(val rawValue: String) { } enum class OpenClawCameraCommand(val rawValue: String) { + List("camera.list"), Snap("camera.snap"), Clip("camera.clip"), ; diff --git a/apps/android/app/src/test/java/ai/openclaw/android/node/InvokeCommandRegistryTest.kt b/apps/android/app/src/test/java/ai/openclaw/android/node/InvokeCommandRegistryTest.kt index 148d3866346..353f0e8c7aa 100644 --- a/apps/android/app/src/test/java/ai/openclaw/android/node/InvokeCommandRegistryTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/android/node/InvokeCommandRegistryTest.kt @@ -22,6 +22,7 @@ class InvokeCommandRegistryTest { assertFalse(commands.contains(OpenClawCameraCommand.Snap.rawValue)) assertFalse(commands.contains(OpenClawCameraCommand.Clip.rawValue)) + assertFalse(commands.contains(OpenClawCameraCommand.List.rawValue)) assertFalse(commands.contains(OpenClawLocationCommand.Get.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue)) @@ -44,6 +45,7 @@ class InvokeCommandRegistryTest { assertTrue(commands.contains(OpenClawCameraCommand.Snap.rawValue)) assertTrue(commands.contains(OpenClawCameraCommand.Clip.rawValue)) + assertTrue(commands.contains(OpenClawCameraCommand.List.rawValue)) assertTrue(commands.contains(OpenClawLocationCommand.Get.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue)) diff --git a/apps/android/app/src/test/java/ai/openclaw/android/protocol/OpenClawProtocolConstantsTest.kt b/apps/android/app/src/test/java/ai/openclaw/android/protocol/OpenClawProtocolConstantsTest.kt index 41a9a7514e8..2268246a87d 100644 --- a/apps/android/app/src/test/java/ai/openclaw/android/protocol/OpenClawProtocolConstantsTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/android/protocol/OpenClawProtocolConstantsTest.kt @@ -31,6 +31,13 @@ class OpenClawProtocolConstantsTest { assertEquals("device", OpenClawCapability.Device.rawValue) } + @Test + fun cameraCommandsUseStableStrings() { + assertEquals("camera.list", OpenClawCameraCommand.List.rawValue) + assertEquals("camera.snap", OpenClawCameraCommand.Snap.rawValue) + assertEquals("camera.clip", OpenClawCameraCommand.Clip.rawValue) + } + @Test fun screenCommandsUseStableStrings() { assertEquals("screen.record", OpenClawScreenCommand.Record.rawValue)