feat(android): add camera list and device selection

This commit is contained in:
Ayaan Zaidi
2026-02-27 09:24:57 +05:30
committed by Ayaan Zaidi
parent c838a4dde0
commit 1f7b3c613d
7 changed files with 126 additions and 7 deletions

View File

@@ -1,13 +1,16 @@
package ai.openclaw.android.node
import android.Manifest
import android.content.Context
import android.annotation.SuppressLint
import android.content.Context
import android.graphics.Bitmap
import android.graphics.BitmapFactory
import android.graphics.Matrix
import android.util.Base64
import android.content.pm.PackageManager
import android.hardware.camera2.CameraCharacteristics
import android.util.Base64
import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.core.CameraInfo
import androidx.exifinterface.media.ExifInterface
import androidx.lifecycle.LifecycleOwner
import androidx.camera.core.CameraSelector
@@ -44,6 +47,12 @@ import kotlin.coroutines.resumeWithException
class CameraCaptureManager(private val context: Context) {
data class Payload(val payloadJson: String)
data class FilePayload(val file: File, val durationMs: Long, val hasAudio: Boolean)
data class CameraDeviceInfo(
val id: String,
val name: String,
val position: String,
val deviceType: String,
)
@Volatile private var lifecycleOwner: LifecycleOwner? = null
@Volatile private var permissionRequester: PermissionRequester? = null
@@ -56,6 +65,14 @@ class CameraCaptureManager(private val context: Context) {
permissionRequester = requester
}
suspend fun listDevices(): List<CameraDeviceInfo> =
withContext(Dispatchers.Main) {
val provider = context.cameraProvider()
provider.availableCameraInfos
.mapNotNull { info -> cameraDeviceInfoOrNull(info) }
.sortedBy { it.id }
}
private suspend fun ensureCameraPermission() {
val granted = checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
if (granted) return
@@ -88,11 +105,11 @@ class CameraCaptureManager(private val context: Context) {
val facing = parseFacing(params) ?: "front"
val quality = (parseQuality(params) ?: 0.95).coerceIn(0.1, 1.0)
val maxWidth = parseMaxWidth(params) ?: 1600
val deviceId = parseDeviceId(params)
val provider = context.cameraProvider()
val capture = ImageCapture.Builder().build()
val selector =
if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
val selector = resolveCameraSelector(provider, facing, deviceId)
provider.unbindAll()
provider.bindToLifecycle(owner, selector, capture)
@@ -154,9 +171,10 @@ class CameraCaptureManager(private val context: Context) {
val facing = parseFacing(params) ?: "front"
val durationMs = (parseDurationMs(params) ?: 3_000).coerceIn(200, 60_000)
val includeAudio = parseIncludeAudio(params) ?: true
val deviceId = parseDeviceId(params)
if (includeAudio) ensureMicPermission()
android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio")
android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio deviceId=${deviceId ?: "-"}")
val provider = context.cameraProvider()
android.util.Log.w("CameraCaptureManager", "clip: got camera provider")
@@ -168,8 +186,7 @@ class CameraCaptureManager(private val context: Context) {
)
.build()
val videoCapture = VideoCapture.withOutput(recorder)
val selector =
if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
val selector = resolveCameraSelector(provider, facing, deviceId)
// CameraX requires a Preview use case for the camera to start producing frames;
// without it, the encoder may get no data (ERROR_NO_VALID_DATA).
@@ -308,6 +325,12 @@ class CameraCaptureManager(private val context: Context) {
private fun parseDurationMs(params: JsonObject?): Int? =
readPrimitive(params, "durationMs")?.contentOrNull?.toIntOrNull()
private fun parseDeviceId(params: JsonObject?): String? =
readPrimitive(params, "deviceId")
?.contentOrNull
?.trim()
?.takeIf { it.isNotEmpty() }
private fun parseIncludeAudio(params: JsonObject?): Boolean? {
val value = readPrimitive(params, "includeAudio")?.contentOrNull?.trim()?.lowercase()
return when (value) {
@@ -318,6 +341,56 @@ class CameraCaptureManager(private val context: Context) {
}
private fun Context.mainExecutor(): Executor = ContextCompat.getMainExecutor(this)
private fun resolveCameraSelector(
provider: ProcessCameraProvider,
facing: String,
deviceId: String?,
): CameraSelector {
if (deviceId.isNullOrEmpty()) {
return if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
}
val availableIds = provider.availableCameraInfos.mapNotNull { cameraIdOrNull(it) }.toSet()
if (!availableIds.contains(deviceId)) {
throw IllegalStateException("INVALID_REQUEST: unknown camera deviceId '$deviceId'")
}
return CameraSelector.Builder()
.addCameraFilter { infos -> infos.filter { cameraIdOrNull(it) == deviceId } }
.build()
}
private fun cameraDeviceInfoOrNull(info: CameraInfo): CameraDeviceInfo? {
val cameraId = cameraIdOrNull(info) ?: return null
val lensFacing =
runCatching {
Camera2CameraInfo.from(info).getCameraCharacteristic(CameraCharacteristics.LENS_FACING)
}.getOrNull()
val position =
when (lensFacing) {
CameraCharacteristics.LENS_FACING_FRONT -> "front"
CameraCharacteristics.LENS_FACING_BACK -> "back"
CameraCharacteristics.LENS_FACING_EXTERNAL -> "external"
else -> "unspecified"
}
val deviceType =
if (lensFacing == CameraCharacteristics.LENS_FACING_EXTERNAL) "external" else "builtIn"
val name =
when (position) {
"front" -> "Front Camera"
"back" -> "Back Camera"
"external" -> "External Camera"
else -> "Camera $cameraId"
}
return CameraDeviceInfo(
id = cameraId,
name = name,
position = position,
deviceType = deviceType,
)
}
private fun cameraIdOrNull(info: CameraInfo): String? =
runCatching { Camera2CameraInfo.from(info).cameraId }.getOrNull()
}
private suspend fun Context.cameraProvider(): ProcessCameraProvider =

View File

@@ -9,7 +9,10 @@ import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.withContext
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonPrimitive
import kotlinx.serialization.json.buildJsonArray
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.contentOrNull
import kotlinx.serialization.json.put
internal const val CAMERA_CLIP_MAX_RAW_BYTES: Long = 18L * 1024L * 1024L
@@ -24,6 +27,33 @@ class CameraHandler(
private val triggerCameraFlash: () -> Unit,
private val invokeErrorFromThrowable: (err: Throwable) -> Pair<String, String>,
) {
suspend fun handleList(_paramsJson: String?): GatewaySession.InvokeResult {
return try {
val devices = camera.listDevices()
val payload =
buildJsonObject {
put(
"devices",
buildJsonArray {
devices.forEach { device ->
add(
buildJsonObject {
put("id", JsonPrimitive(device.id))
put("name", JsonPrimitive(device.name))
put("position", JsonPrimitive(device.position))
put("deviceType", JsonPrimitive(device.deviceType))
},
)
}
},
)
}.toString()
GatewaySession.InvokeResult.ok(payload)
} catch (err: Throwable) {
val (code, message) = invokeErrorFromThrowable(err)
GatewaySession.InvokeResult.error(code = code, message = message)
}
}
suspend fun handleSnap(paramsJson: String?): GatewaySession.InvokeResult {
val logFile = if (BuildConfig.DEBUG) java.io.File(appContext.cacheDir, "camera_debug.log") else null

View File

@@ -62,6 +62,11 @@ object InvokeCommandRegistry {
name = OpenClawScreenCommand.Record.rawValue,
requiresForeground = true,
),
InvokeCommandSpec(
name = OpenClawCameraCommand.List.rawValue,
requiresForeground = true,
availability = InvokeCommandAvailability.CameraEnabled,
),
InvokeCommandSpec(
name = OpenClawCameraCommand.Snap.rawValue,
requiresForeground = true,

View File

@@ -112,6 +112,7 @@ class InvokeDispatcher(
}
// Camera commands
OpenClawCameraCommand.List.rawValue -> cameraHandler.handleList(paramsJson)
OpenClawCameraCommand.Snap.rawValue -> cameraHandler.handleSnap(paramsJson)
OpenClawCameraCommand.Clip.rawValue -> cameraHandler.handleClip(paramsJson)

View File

@@ -35,6 +35,7 @@ enum class OpenClawCanvasA2UICommand(val rawValue: String) {
}
enum class OpenClawCameraCommand(val rawValue: String) {
List("camera.list"),
Snap("camera.snap"),
Clip("camera.clip"),
;

View File

@@ -22,6 +22,7 @@ class InvokeCommandRegistryTest {
assertFalse(commands.contains(OpenClawCameraCommand.Snap.rawValue))
assertFalse(commands.contains(OpenClawCameraCommand.Clip.rawValue))
assertFalse(commands.contains(OpenClawCameraCommand.List.rawValue))
assertFalse(commands.contains(OpenClawLocationCommand.Get.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue))
@@ -44,6 +45,7 @@ class InvokeCommandRegistryTest {
assertTrue(commands.contains(OpenClawCameraCommand.Snap.rawValue))
assertTrue(commands.contains(OpenClawCameraCommand.Clip.rawValue))
assertTrue(commands.contains(OpenClawCameraCommand.List.rawValue))
assertTrue(commands.contains(OpenClawLocationCommand.Get.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue))

View File

@@ -31,6 +31,13 @@ class OpenClawProtocolConstantsTest {
assertEquals("device", OpenClawCapability.Device.rawValue)
}
@Test
fun cameraCommandsUseStableStrings() {
assertEquals("camera.list", OpenClawCameraCommand.List.rawValue)
assertEquals("camera.snap", OpenClawCameraCommand.Snap.rawValue)
assertEquals("camera.clip", OpenClawCameraCommand.Clip.rawValue)
}
@Test
fun screenCommandsUseStableStrings() {
assertEquals("screen.record", OpenClawScreenCommand.Record.rawValue)