From 86d897cfaa6a12b734bb8040ecd55ab2dabe749b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 20:11:20 +0100 Subject: [PATCH] feat(android): expose talk mode Co-authored-by: alex-latitude <213670856+alex-latitude@users.noreply.github.com> --- CHANGELOG.md | 1 + apps/android/app/src/main/AndroidManifest.xml | 3 +- .../java/ai/openclaw/app/MainViewModel.kt | 11 +- .../ai/openclaw/app/NodeForegroundService.kt | 172 +++++++++++++++--- .../main/java/ai/openclaw/app/NodeRuntime.kt | 109 ++++++++--- .../main/java/ai/openclaw/app/SecurePrefs.kt | 11 +- .../java/ai/openclaw/app/VoiceCaptureMode.kt | 7 + .../java/ai/openclaw/app/ui/VoiceTabScreen.kt | 79 ++++++-- .../openclaw/app/NodeForegroundServiceTest.kt | 30 +++ .../java/ai/openclaw/app/SecurePrefsTest.kt | 28 +++ docs/nodes/talk.md | 7 + docs/platforms/android.md | 6 +- 12 files changed, 391 insertions(+), 73 deletions(-) create mode 100644 apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt diff --git a/CHANGELOG.md b/CHANGELOG.md index 0641c111846..7602314fc5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai - Providers/Xiaomi: add MiMo TTS as a bundled speech provider with MP3/WAV output and voice-note Opus transcoding. Fixes #52376. (#55614) Thanks @zoujiejun. - Providers/ElevenLabs: include `eleven_v3` in the bundled TTS model catalog so model selection surfaces can offer ElevenLabs v3. (#68321) Thanks @itsuzef. - Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain. +- Android/Talk Mode: expose Talk Mode in the Voice tab with runtime-owned voice capture modes and microphone foreground-service escalation. Thanks @alex-latitude. - Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris. ### Fixes diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml index 310cf0e26bc..6a9eed63d7c 100644 --- a/apps/android/app/src/main/AndroidManifest.xml +++ b/apps/android/app/src/main/AndroidManifest.xml @@ -3,6 +3,7 @@ + + android:foregroundServiceType="dataSync|microphone" /> = prefs.onboardingCompleted val canvasDebugStatusEnabled: StateFlow = prefs.canvasDebugStatusEnabled val speakerEnabled: StateFlow = prefs.speakerEnabled - val micEnabled: StateFlow = prefs.talkEnabled + val voiceCaptureMode: StateFlow = runtimeState(initial = VoiceCaptureMode.Off) { it.voiceCaptureMode } + val micEnabled: StateFlow = runtimeState(initial = false) { it.micEnabled } val micCooldown: StateFlow = runtimeState(initial = false) { it.micCooldown } val micStatusText: StateFlow = runtimeState(initial = "Mic off") { it.micStatusText } @@ -111,6 +112,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) { val micConversation: StateFlow> = runtimeState(initial = emptyList()) { it.micConversation } val micInputLevel: StateFlow = runtimeState(initial = 0f) { it.micInputLevel } val micIsSending: StateFlow = runtimeState(initial = false) { it.micIsSending } + val talkModeEnabled: StateFlow = runtimeState(initial = false) { it.talkModeEnabled } + val talkModeListening: StateFlow = runtimeState(initial = false) { it.talkModeListening } + val talkModeSpeaking: StateFlow = runtimeState(initial = false) { it.talkModeSpeaking } + val talkModeStatusText: StateFlow = runtimeState(initial = "Off") { it.talkModeStatusText } val chatSessionKey: StateFlow = runtimeState(initial = "main") { it.chatSessionKey } val chatSessionId: StateFlow = runtimeState(initial = null) { it.chatSessionId } @@ -283,6 +288,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) { ensureRuntime().setMicEnabled(enabled) } + fun setTalkModeEnabled(enabled: Boolean) { + ensureRuntime().setTalkModeEnabled(enabled) + } + fun setSpeakerEnabled(enabled: Boolean) { ensureRuntime().setSpeakerEnabled(enabled) } diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt index 4c7ccdd56e5..c57e378b0ed 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt @@ -3,12 +3,14 @@ package ai.openclaw.app import android.app.Notification import android.app.NotificationChannel import android.app.NotificationManager -import android.app.Service import android.app.PendingIntent +import android.app.Service import android.content.Context import android.content.Intent import android.content.pm.ServiceInfo import androidx.core.app.NotificationCompat +import androidx.core.app.ServiceCompat +import androidx.core.content.ContextCompat import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job @@ -21,6 +23,7 @@ class NodeForegroundService : Service() { private val scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main) private var notificationJob: Job? = null private var didStartForeground = false + private var voiceCaptureMode = VoiceCaptureMode.Off override fun onCreate() { super.onCreate() @@ -36,22 +39,51 @@ class NodeForegroundService : Service() { notificationJob = scope.launch { combine( - runtime.statusText, - runtime.serverName, - runtime.isConnected, - runtime.micEnabled, - runtime.micIsListening, - ) { status, server, connected, micEnabled, micListening -> - Quint(status, server, connected, micEnabled, micListening) - }.collect { (status, server, connected, micEnabled, micListening) -> - val title = if (connected) "OpenClaw Node · Connected" else "OpenClaw Node" - val micSuffix = - if (micEnabled) { - if (micListening) " · Mic: Listening" else " · Mic: Pending" - } else { - "" + combine( + runtime.statusText, + runtime.serverName, + runtime.isConnected, + runtime.voiceCaptureMode, + ) { status, server, connected, mode -> + VoiceNotificationBase( + status = status, + server = server, + connected = connected, + mode = mode, + ) + }, + combine( + runtime.micEnabled, + runtime.micIsListening, + runtime.talkModeListening, + runtime.talkModeSpeaking, + ) { micEnabled, micListening, talkListening, talkSpeaking -> + VoiceNotificationCapture( + micEnabled = micEnabled, + micListening = micListening, + talkListening = talkListening, + talkSpeaking = talkSpeaking, + ) + }, + ) { base, capture -> + VoiceNotificationState(base = base, capture = capture) + }.collect { state -> + voiceCaptureMode = state.mode + val title = + when { + state.connected && state.mode == VoiceCaptureMode.TalkMode -> "OpenClaw Node · Talk" + state.connected -> "OpenClaw Node · Connected" + else -> "OpenClaw Node" } - val text = (server?.let { "$status · $it" } ?: status) + micSuffix + val text = + (state.server?.let { "${state.status} · $it" } ?: state.status) + + voiceNotificationSuffix( + mode = state.mode, + manualMicEnabled = state.capture.micEnabled, + manualMicListening = state.capture.micListening, + talkListening = state.capture.talkListening, + talkSpeaking = state.capture.talkSpeaking, + ) startForegroundWithTypes( notification = buildNotification(title = title, text = text), @@ -60,13 +92,27 @@ class NodeForegroundService : Service() { } } - override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + override fun onStartCommand( + intent: Intent?, + flags: Int, + startId: Int, + ): Int { when (intent?.action) { ACTION_STOP -> { (application as NodeApp).peekRuntime()?.disconnect() stopSelf() return START_NOT_STICKY } + ACTION_SET_VOICE_CAPTURE_MODE -> { + voiceCaptureMode = intent.getStringExtra(EXTRA_VOICE_CAPTURE_MODE).toVoiceCaptureMode() + startForegroundWithTypes( + notification = + buildNotification( + title = "OpenClaw Node", + text = if (voiceCaptureMode == VoiceCaptureMode.TalkMode) "Talk mode active" else "Connected", + ), + ) + } } // Keep running; connection is managed by NodeRuntime (auto-reconnect + manual). return START_STICKY @@ -127,17 +173,13 @@ class NodeForegroundService : Service() { .build() } - private fun updateNotification(notification: Notification) { - val mgr = getSystemService(Context.NOTIFICATION_SERVICE) as NotificationManager - mgr.notify(NOTIFICATION_ID, notification) - } - private fun startForegroundWithTypes(notification: Notification) { + val serviceTypes = foregroundServiceTypesForVoiceMode(voiceCaptureMode) if (didStartForeground) { - updateNotification(notification) + ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes) return } - startForeground(NOTIFICATION_ID, notification, ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC) + ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes) didStartForeground = true } @@ -146,6 +188,8 @@ class NodeForegroundService : Service() { private const val NOTIFICATION_ID = 1 private const val ACTION_STOP = "ai.openclaw.app.action.STOP" + private const val ACTION_SET_VOICE_CAPTURE_MODE = "ai.openclaw.app.action.SET_VOICE_CAPTURE_MODE" + private const val EXTRA_VOICE_CAPTURE_MODE = "ai.openclaw.app.extra.VOICE_CAPTURE_MODE" fun start(context: Context) { val intent = Intent(context, NodeForegroundService::class.java) @@ -156,7 +200,85 @@ class NodeForegroundService : Service() { val intent = Intent(context, NodeForegroundService::class.java).setAction(ACTION_STOP) context.startService(intent) } + + fun setVoiceCaptureMode( + context: Context, + mode: VoiceCaptureMode, + ) { + val intent = + Intent(context, NodeForegroundService::class.java) + .setAction(ACTION_SET_VOICE_CAPTURE_MODE) + .putExtra(EXTRA_VOICE_CAPTURE_MODE, mode.name) + if (mode == VoiceCaptureMode.TalkMode) { + ContextCompat.startForegroundService(context, intent) + } else { + context.startService(intent) + } + } } } -private data class Quint(val first: A, val second: B, val third: C, val fourth: D, val fifth: E) +internal fun foregroundServiceTypesForVoiceMode(mode: VoiceCaptureMode): Int { + val base = ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC + return if (mode == VoiceCaptureMode.TalkMode) { + base or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE + } else { + base + } +} + +internal fun voiceNotificationSuffix( + mode: VoiceCaptureMode, + manualMicEnabled: Boolean, + manualMicListening: Boolean, + talkListening: Boolean, + talkSpeaking: Boolean, +): String { + return when (mode) { + VoiceCaptureMode.TalkMode -> + when { + talkSpeaking -> " · Talk: Speaking" + talkListening -> " · Talk: Listening" + else -> " · Talk: On" + } + VoiceCaptureMode.ManualMic -> + if (manualMicEnabled) { + if (manualMicListening) " · Mic: Listening" else " · Mic: Pending" + } else { + "" + } + VoiceCaptureMode.Off -> "" + } +} + +private fun String?.toVoiceCaptureMode(): VoiceCaptureMode { + return VoiceCaptureMode.entries.firstOrNull { it.name == this } ?: VoiceCaptureMode.Off +} + +private data class VoiceNotificationBase( + val status: String, + val server: String?, + val connected: Boolean, + val mode: VoiceCaptureMode, +) + +private data class VoiceNotificationCapture( + val micEnabled: Boolean, + val micListening: Boolean, + val talkListening: Boolean, + val talkSpeaking: Boolean, +) + +private data class VoiceNotificationState( + val base: VoiceNotificationBase, + val capture: VoiceNotificationCapture, +) { + val status: String + get() = base.status + val server: String? + get() = base.server + val connected: Boolean + get() = base.connected + val mode: VoiceCaptureMode + get() = base.mode +} diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt index 7572a9f41be..760fab44b0b 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt @@ -64,6 +64,8 @@ class NodeRuntime( private val json = Json { ignoreUnknownKeys = true } private val externalAudioCaptureActive = MutableStateFlow(false) + private val _voiceCaptureMode = MutableStateFlow(VoiceCaptureMode.Off) + val voiceCaptureMode: StateFlow = _voiceCaptureMode.asStateFlow() private val discovery = GatewayDiscovery(appContext, scope = scope) val gateways: StateFlow> = discovery.gateways @@ -428,6 +430,18 @@ class NodeRuntime( ) } + val talkModeEnabled: StateFlow + get() = talkMode.isEnabled + + val talkModeListening: StateFlow + get() = talkMode.isListening + + val talkModeSpeaking: StateFlow + get() = talkMode.isSpeaking + + val talkModeStatusText: StateFlow + get() = talkMode.statusText + private fun syncMainSessionKey(agentId: String?) { val resolvedKey = resolveNodeMainSessionKey(agentId) // Always push the resolved session key into TalkMode, even when the @@ -599,17 +613,8 @@ class NodeRuntime( prefs.loadGatewayToken() } - scope.launch { - prefs.talkEnabled.collect { enabled -> - // MicCaptureManager handles STT + send to gateway, while the dedicated - // reply speaker handles TTS for assistant replies in the voice tab. - micCapture.setMicEnabled(enabled) - if (enabled) { - talkMode.ttsOnAllResponses = false - scope.launch { talkMode.ensureChatSubscribed() } - } - externalAudioCaptureActive.value = enabled - } + if (prefs.voiceMicEnabled.value) { + setVoiceCaptureMode(VoiceCaptureMode.ManualMic, persistManualMic = false) } scope.launch(Dispatchers.Default) { @@ -643,7 +648,7 @@ class NodeRuntime( if (value) { reconnectPreferredGatewayOnForeground() } else { - stopActiveVoiceSession() + stopManualVoiceSession() } } @@ -757,21 +762,17 @@ class NodeRuntime( fun setVoiceScreenActive(active: Boolean) { if (!active) { - stopActiveVoiceSession() + stopManualVoiceSession() } // Don't re-enable on active=true; mic toggle drives that } fun setMicEnabled(value: Boolean) { - prefs.setTalkEnabled(value) - if (value) { - // Tapping mic on interrupts any active TTS (barge-in) - stopVoicePlayback() - talkMode.ttsOnAllResponses = false - scope.launch { talkMode.ensureChatSubscribed() } - } - micCapture.setMicEnabled(value) - externalAudioCaptureActive.value = value + setVoiceCaptureMode(if (value) VoiceCaptureMode.ManualMic else VoiceCaptureMode.Off) + } + + fun setTalkModeEnabled(value: Boolean) { + setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off) } val speakerEnabled: StateFlow @@ -786,11 +787,72 @@ class NodeRuntime( talkMode.setPlaybackEnabled(value) } + private fun setVoiceCaptureMode( + mode: VoiceCaptureMode, + persistManualMic: Boolean = true, + ) { + if (mode == VoiceCaptureMode.TalkMode && !hasRecordAudioPermission()) { + _voiceCaptureMode.value = VoiceCaptureMode.Off + externalAudioCaptureActive.value = false + return + } + if (_voiceCaptureMode.value == mode) return + _voiceCaptureMode.value = mode + when (mode) { + VoiceCaptureMode.Off -> { + talkMode.ttsOnAllResponses = false + talkMode.setEnabled(false) + stopVoicePlayback() + micCapture.setMicEnabled(false) + if (persistManualMic) { + prefs.setVoiceMicEnabled(false) + } + NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off) + externalAudioCaptureActive.value = false + } + + VoiceCaptureMode.ManualMic -> { + talkMode.ttsOnAllResponses = false + talkMode.setEnabled(false) + NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.ManualMic) + if (persistManualMic) { + prefs.setVoiceMicEnabled(true) + } + // Tapping mic on interrupts any active TTS (barge-in). + stopVoicePlayback() + scope.launch { talkMode.ensureChatSubscribed() } + micCapture.setMicEnabled(true) + externalAudioCaptureActive.value = true + } + + VoiceCaptureMode.TalkMode -> { + if (persistManualMic) { + prefs.setVoiceMicEnabled(false) + } + micCapture.setMicEnabled(false) + NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode) + talkMode.ttsOnAllResponses = true + talkMode.setPlaybackEnabled(speakerEnabled.value) + scope.launch { talkMode.ensureChatSubscribed() } + talkMode.setEnabled(true) + externalAudioCaptureActive.value = true + } + } + } + + private fun stopManualVoiceSession() { + if (_voiceCaptureMode.value != VoiceCaptureMode.ManualMic) return + setVoiceCaptureMode(VoiceCaptureMode.Off) + } + private fun stopActiveVoiceSession() { talkMode.ttsOnAllResponses = false + talkMode.setEnabled(false) stopVoicePlayback() micCapture.setMicEnabled(false) - prefs.setTalkEnabled(false) + prefs.setVoiceMicEnabled(false) + NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off) + _voiceCaptureMode.value = VoiceCaptureMode.Off externalAudioCaptureActive.value = false } @@ -970,6 +1032,7 @@ class NodeRuntime( } fun disconnect() { + stopActiveVoiceSession() connectedEndpoint = null activeGatewayAuth = null _pendingGatewayTrust.value = null diff --git a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt index e8a6acd841b..97be3090c5f 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt @@ -37,6 +37,7 @@ class SecurePrefs( private const val notificationsForwardingMaxEventsPerMinuteKey = "notifications.forwarding.maxEventsPerMinute" private const val notificationsForwardingSessionKeyKey = "notifications.forwarding.sessionKey" + private const val voiceMicEnabledKey = "voice.micEnabled" } private val appContext = context.applicationContext @@ -162,8 +163,8 @@ class SecurePrefs( private val _voiceWakeMode = MutableStateFlow(loadVoiceWakeMode()) val voiceWakeMode: StateFlow = _voiceWakeMode - private val _talkEnabled = MutableStateFlow(plainPrefs.getBoolean("talk.enabled", false)) - val talkEnabled: StateFlow = _talkEnabled + private val _voiceMicEnabled = MutableStateFlow(plainPrefs.getBoolean(voiceMicEnabledKey, false)) + val voiceMicEnabled: StateFlow = _voiceMicEnabled private val _speakerEnabled = MutableStateFlow(plainPrefs.getBoolean("voice.speakerEnabled", true)) val speakerEnabled: StateFlow = _speakerEnabled @@ -478,9 +479,9 @@ class SecurePrefs( _voiceWakeMode.value = mode } - fun setTalkEnabled(value: Boolean) { - plainPrefs.edit { putBoolean("talk.enabled", value) } - _talkEnabled.value = value + fun setVoiceMicEnabled(value: Boolean) { + plainPrefs.edit { putBoolean(voiceMicEnabledKey, value) } + _voiceMicEnabled.value = value } fun setSpeakerEnabled(value: Boolean) { diff --git a/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt new file mode 100644 index 00000000000..071d0df1970 --- /dev/null +++ b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt @@ -0,0 +1,7 @@ +package ai.openclaw.app + +enum class VoiceCaptureMode { + Off, + ManualMic, + TalkMode, +} diff --git a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt index 76fc2c4f0c9..1b9277afcca 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt @@ -35,10 +35,11 @@ import androidx.compose.foundation.lazy.rememberLazyListState import androidx.compose.foundation.shape.CircleShape import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.material.icons.Icons -import androidx.compose.material.icons.filled.Mic -import androidx.compose.material.icons.filled.MicOff import androidx.compose.material.icons.automirrored.filled.VolumeOff import androidx.compose.material.icons.automirrored.filled.VolumeUp +import androidx.compose.material.icons.filled.Mic +import androidx.compose.material.icons.filled.MicOff +import androidx.compose.material.icons.filled.RecordVoiceOver import androidx.compose.material3.Button import androidx.compose.material3.ButtonDefaults import androidx.compose.material3.Icon @@ -69,6 +70,7 @@ import androidx.lifecycle.Lifecycle import androidx.lifecycle.LifecycleEventObserver import androidx.lifecycle.compose.LocalLifecycleOwner import ai.openclaw.app.MainViewModel +import ai.openclaw.app.VoiceCaptureMode import ai.openclaw.app.voice.VoiceConversationEntry import ai.openclaw.app.voice.VoiceConversationRole import kotlin.math.max @@ -81,6 +83,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) { val listState = rememberLazyListState() val gatewayStatus by viewModel.statusText.collectAsState() + val voiceCaptureMode by viewModel.voiceCaptureMode.collectAsState() val micEnabled by viewModel.micEnabled.collectAsState() val micCooldown by viewModel.micCooldown.collectAsState() val speakerEnabled by viewModel.speakerEnabled.collectAsState() @@ -90,12 +93,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) { val micConversation by viewModel.micConversation.collectAsState() val micInputLevel by viewModel.micInputLevel.collectAsState() val micIsSending by viewModel.micIsSending.collectAsState() + val talkModeEnabled by viewModel.talkModeEnabled.collectAsState() + val talkModeListening by viewModel.talkModeListening.collectAsState() + val talkModeSpeaking by viewModel.talkModeSpeaking.collectAsState() val hasStreamingAssistant = micConversation.any { it.role == VoiceConversationRole.Assistant && it.isStreaming } val showThinkingBubble = micIsSending && !hasStreamingAssistant var hasMicPermission by remember { mutableStateOf(context.hasRecordAudioPermission()) } - var pendingMicEnable by remember { mutableStateOf(false) } + var pendingVoicePermissionAction by remember { mutableStateOf(null) } DisposableEffect(lifecycleOwner, context) { val observer = @@ -107,7 +113,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) { lifecycleOwner.lifecycle.addObserver(observer) onDispose { lifecycleOwner.lifecycle.removeObserver(observer) - // Stop TTS when leaving the voice screen + // Manual mic is tied to the Voice tab; Talk Mode is explicit and can continue. viewModel.setVoiceScreenActive(false) } } @@ -115,10 +121,14 @@ fun VoiceTabScreen(viewModel: MainViewModel) { val requestMicPermission = rememberLauncherForActivityResult(ActivityResultContracts.RequestPermission()) { granted -> hasMicPermission = granted - if (granted && pendingMicEnable) { - viewModel.setMicEnabled(true) + if (granted) { + when (pendingVoicePermissionAction) { + PendingVoicePermissionAction.ManualMic -> viewModel.setMicEnabled(true) + PendingVoicePermissionAction.TalkMode -> viewModel.setTalkModeEnabled(true) + null -> Unit + } } - pendingMicEnable = false + pendingVoicePermissionAction = null } LaunchedEffect(micConversation.size, showThinkingBubble) { @@ -161,12 +171,12 @@ fun VoiceTabScreen(viewModel: MainViewModel) { tint = mobileTextTertiary, ) Text( - "Tap the mic to start", + "Tap mic or Talk", style = mobileHeadline, color = mobileTextSecondary, ) Text( - "Each pause sends a turn automatically.", + "Mic sends turns; Talk keeps the conversation open.", style = mobileCallout, color = mobileTextTertiary, ) @@ -263,7 +273,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) { if (hasMicPermission) { viewModel.setMicEnabled(true) } else { - pendingMicEnable = true + pendingVoicePermissionAction = PendingVoicePermissionAction.ManualMic requestMicPermission.launch(Manifest.permission.RECORD_AUDIO) } }, @@ -287,11 +297,39 @@ fun VoiceTabScreen(viewModel: MainViewModel) { } } - // Invisible spacer to balance the row (matches speaker column width) - Column(horizontalAlignment = Alignment.CenterHorizontally) { - Box(modifier = Modifier.size(48.dp)) + Column(horizontalAlignment = Alignment.CenterHorizontally, verticalArrangement = Arrangement.spacedBy(4.dp)) { + IconButton( + onClick = { + if (talkModeEnabled) { + viewModel.setTalkModeEnabled(false) + return@IconButton + } + if (hasMicPermission) { + viewModel.setTalkModeEnabled(true) + } else { + pendingVoicePermissionAction = PendingVoicePermissionAction.TalkMode + requestMicPermission.launch(Manifest.permission.RECORD_AUDIO) + } + }, + modifier = Modifier.size(48.dp), + colors = + IconButtonDefaults.iconButtonColors( + containerColor = if (talkModeEnabled) mobileSuccessSoft else mobileSurface, + ), + ) { + Icon( + imageVector = Icons.Default.RecordVoiceOver, + contentDescription = if (talkModeEnabled) "Turn Talk Mode off" else "Turn Talk Mode on", + modifier = Modifier.size(22.dp), + tint = if (talkModeEnabled) mobileSuccess else mobileTextSecondary, + ) + } Spacer(modifier = Modifier.height(4.dp)) - Text("", style = mobileCaption2) + Text( + if (talkModeEnabled) "Talk on" else "Talk", + style = mobileCaption2, + color = if (talkModeEnabled) mobileSuccess else mobileTextTertiary, + ) } } @@ -299,6 +337,9 @@ fun VoiceTabScreen(viewModel: MainViewModel) { val queueCount = micQueuedMessages.size val stateText = when { + voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeSpeaking -> "Talk speaking" + voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeListening -> "Talk listening" + voiceCaptureMode == VoiceCaptureMode.TalkMode -> "Talk on" queueCount > 0 -> "$queueCount queued" micIsSending -> "Sending" micCooldown -> "Cooldown" @@ -307,14 +348,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) { } val stateColor = when { + voiceCaptureMode == VoiceCaptureMode.TalkMode -> mobileSuccess micEnabled -> mobileSuccess micIsSending -> mobileAccent else -> mobileTextSecondary } Surface( shape = RoundedCornerShape(999.dp), - color = if (micEnabled) mobileSuccessSoft else mobileSurface, - border = BorderStroke(1.dp, if (micEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder), + color = if (micEnabled || talkModeEnabled) mobileSuccessSoft else mobileSurface, + border = BorderStroke(1.dp, if (micEnabled || talkModeEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder), ) { Text( "$gatewayStatus · $stateText", @@ -353,6 +395,11 @@ fun VoiceTabScreen(viewModel: MainViewModel) { } } +private enum class PendingVoicePermissionAction { + ManualMic, + TalkMode, +} + @Composable private fun VoiceTurnBubble(entry: VoiceConversationEntry) { val isUser = entry.role == VoiceConversationRole.User diff --git a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt index fddc347f487..9c76eeb3219 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt @@ -2,6 +2,7 @@ package ai.openclaw.app import android.app.Notification import android.content.Intent +import android.content.pm.ServiceInfo import org.junit.Assert.assertEquals import org.junit.Assert.assertNotNull import org.junit.Test @@ -30,6 +31,35 @@ class NodeForegroundServiceTest { assertEquals(expectedFlags, savedIntent.flags and expectedFlags) } + @Test + fun foregroundServiceTypesForVoiceMode_addsMicrophoneOnlyForTalkMode() { + assertEquals( + ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC, + foregroundServiceTypesForVoiceMode(VoiceCaptureMode.Off), + ) + assertEquals( + ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC, + foregroundServiceTypesForVoiceMode(VoiceCaptureMode.ManualMic), + ) + assertEquals( + ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE, + foregroundServiceTypesForVoiceMode(VoiceCaptureMode.TalkMode), + ) + } + + @Test + fun voiceNotificationSuffixReflectsActiveCaptureMode() { + assertEquals("", voiceNotificationSuffix(VoiceCaptureMode.Off, false, false, false, false)) + assertEquals( + " · Mic: Listening", + voiceNotificationSuffix(VoiceCaptureMode.ManualMic, true, true, false, false), + ) + assertEquals( + " · Talk: Speaking", + voiceNotificationSuffix(VoiceCaptureMode.TalkMode, false, false, true, true), + ) + } + private fun buildNotification(service: NodeForegroundService): Notification { val method = NodeForegroundService::class.java.getDeclaredMethod( diff --git a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt index 811eed0c900..d209f52a2f5 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt @@ -2,7 +2,9 @@ package ai.openclaw.app import android.content.Context import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue import org.junit.Test import org.junit.runner.RunWith import org.robolectric.RobolectricTestRunner @@ -22,6 +24,32 @@ class SecurePrefsTest { assertEquals("whileUsing", plainPrefs.getString("location.enabledMode", null)) } + @Test + fun voiceMicEnabled_ignoresOldTalkEnabledKey() { + val context = RuntimeEnvironment.getApplication() + val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE) + plainPrefs.edit().clear().putBoolean("talk.enabled", true).commit() + + val prefs = SecurePrefs(context) + + assertFalse(prefs.voiceMicEnabled.value) + assertFalse(plainPrefs.contains("voice.micEnabled")) + } + + @Test + fun setVoiceMicEnabled_persistsNewKeyOnly() { + val context = RuntimeEnvironment.getApplication() + val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE) + plainPrefs.edit().clear().putBoolean("talk.enabled", false).commit() + val prefs = SecurePrefs(context) + + prefs.setVoiceMicEnabled(true) + + assertTrue(prefs.voiceMicEnabled.value) + assertTrue(plainPrefs.getBoolean("voice.micEnabled", false)) + assertFalse(plainPrefs.getBoolean("talk.enabled", false)) + } + @Test fun saveGatewayBootstrapToken_persistsSeparatelyFromSharedToken() { val context = RuntimeEnvironment.getApplication() diff --git a/docs/nodes/talk.md b/docs/nodes/talk.md index fe65b915aca..c8d40b08957 100644 --- a/docs/nodes/talk.md +++ b/docs/nodes/talk.md @@ -91,6 +91,13 @@ Defaults: - Click cloud: stop speaking - Click X: exit Talk mode +## Android UI + +- Voice tab toggle: **Talk** +- Manual **Mic** and **Talk** are mutually exclusive runtime capture modes. +- Manual Mic stops when the app leaves the foreground or the user leaves the Voice tab. +- Talk Mode keeps running until toggled off or the Android node disconnects, and uses Android's microphone foreground-service type while active. + ## Notes - Requires Speech + Microphone permissions. diff --git a/docs/platforms/android.md b/docs/platforms/android.md index 0ed03f8ba33..c42b2f28a57 100644 --- a/docs/platforms/android.md +++ b/docs/platforms/android.md @@ -199,8 +199,10 @@ See [Camera node](/nodes/camera) for parameters and CLI helpers. ### 8) Voice + expanded Android command surface -- Voice: Android uses a single mic on/off flow in the Voice tab with transcript capture and `talk.speak` playback. Local system TTS is used only when `talk.speak` is unavailable. Voice stops when the app leaves the foreground. -- Voice wake/talk-mode toggles are currently removed from Android UX/runtime. +- Voice tab: Android has two explicit capture modes. **Mic** is a manual Voice-tab session that sends each pause as a chat turn and stops when the app leaves the foreground or the user leaves the Voice tab. **Talk** is continuous Talk Mode and keeps listening until toggled off or the node disconnects. +- Talk Mode promotes the existing foreground service from `dataSync` to `dataSync|microphone` before capture starts, then demotes it when Talk Mode stops. Android 14+ requires the `FOREGROUND_SERVICE_MICROPHONE` declaration, the `RECORD_AUDIO` runtime grant, and the microphone service type at runtime. +- Spoken replies use `talk.speak` through the configured gateway Talk provider. Local system TTS is used only when `talk.speak` is unavailable. +- Voice wake remains disabled in the Android UX/runtime. - Additional Android command families (availability depends on device + permissions): - `device.status`, `device.info`, `device.permissions`, `device.health` - `notifications.list`, `notifications.actions` (see [Notification forwarding](#notification-forwarding) below)