diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0641c111846..7602314fc5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
- Providers/Xiaomi: add MiMo TTS as a bundled speech provider with MP3/WAV output and voice-note Opus transcoding. Fixes #52376. (#55614) Thanks @zoujiejun.
- Providers/ElevenLabs: include `eleven_v3` in the bundled TTS model catalog so model selection surfaces can offer ElevenLabs v3. (#68321) Thanks @itsuzef.
- Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain.
+- Android/Talk Mode: expose Talk Mode in the Voice tab with runtime-owned voice capture modes and microphone foreground-service escalation. Thanks @alex-latitude.
- Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris.
### Fixes
diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml
index 310cf0e26bc..6a9eed63d7c 100644
--- a/apps/android/app/src/main/AndroidManifest.xml
+++ b/apps/android/app/src/main/AndroidManifest.xml
@@ -3,6 +3,7 @@
+
+ android:foregroundServiceType="dataSync|microphone" />
= prefs.onboardingCompleted
val canvasDebugStatusEnabled: StateFlow = prefs.canvasDebugStatusEnabled
val speakerEnabled: StateFlow = prefs.speakerEnabled
- val micEnabled: StateFlow = prefs.talkEnabled
+ val voiceCaptureMode: StateFlow = runtimeState(initial = VoiceCaptureMode.Off) { it.voiceCaptureMode }
+ val micEnabled: StateFlow = runtimeState(initial = false) { it.micEnabled }
val micCooldown: StateFlow = runtimeState(initial = false) { it.micCooldown }
val micStatusText: StateFlow = runtimeState(initial = "Mic off") { it.micStatusText }
@@ -111,6 +112,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
val micConversation: StateFlow> = runtimeState(initial = emptyList()) { it.micConversation }
val micInputLevel: StateFlow = runtimeState(initial = 0f) { it.micInputLevel }
val micIsSending: StateFlow = runtimeState(initial = false) { it.micIsSending }
+ val talkModeEnabled: StateFlow = runtimeState(initial = false) { it.talkModeEnabled }
+ val talkModeListening: StateFlow = runtimeState(initial = false) { it.talkModeListening }
+ val talkModeSpeaking: StateFlow = runtimeState(initial = false) { it.talkModeSpeaking }
+ val talkModeStatusText: StateFlow = runtimeState(initial = "Off") { it.talkModeStatusText }
val chatSessionKey: StateFlow = runtimeState(initial = "main") { it.chatSessionKey }
val chatSessionId: StateFlow = runtimeState(initial = null) { it.chatSessionId }
@@ -283,6 +288,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
ensureRuntime().setMicEnabled(enabled)
}
+ fun setTalkModeEnabled(enabled: Boolean) {
+ ensureRuntime().setTalkModeEnabled(enabled)
+ }
+
fun setSpeakerEnabled(enabled: Boolean) {
ensureRuntime().setSpeakerEnabled(enabled)
}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
index 4c7ccdd56e5..c57e378b0ed 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
@@ -3,12 +3,14 @@ package ai.openclaw.app
import android.app.Notification
import android.app.NotificationChannel
import android.app.NotificationManager
-import android.app.Service
import android.app.PendingIntent
+import android.app.Service
import android.content.Context
import android.content.Intent
import android.content.pm.ServiceInfo
import androidx.core.app.NotificationCompat
+import androidx.core.app.ServiceCompat
+import androidx.core.content.ContextCompat
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
@@ -21,6 +23,7 @@ class NodeForegroundService : Service() {
private val scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main)
private var notificationJob: Job? = null
private var didStartForeground = false
+ private var voiceCaptureMode = VoiceCaptureMode.Off
override fun onCreate() {
super.onCreate()
@@ -36,22 +39,51 @@ class NodeForegroundService : Service() {
notificationJob =
scope.launch {
combine(
- runtime.statusText,
- runtime.serverName,
- runtime.isConnected,
- runtime.micEnabled,
- runtime.micIsListening,
- ) { status, server, connected, micEnabled, micListening ->
- Quint(status, server, connected, micEnabled, micListening)
- }.collect { (status, server, connected, micEnabled, micListening) ->
- val title = if (connected) "OpenClaw Node · Connected" else "OpenClaw Node"
- val micSuffix =
- if (micEnabled) {
- if (micListening) " · Mic: Listening" else " · Mic: Pending"
- } else {
- ""
+ combine(
+ runtime.statusText,
+ runtime.serverName,
+ runtime.isConnected,
+ runtime.voiceCaptureMode,
+ ) { status, server, connected, mode ->
+ VoiceNotificationBase(
+ status = status,
+ server = server,
+ connected = connected,
+ mode = mode,
+ )
+ },
+ combine(
+ runtime.micEnabled,
+ runtime.micIsListening,
+ runtime.talkModeListening,
+ runtime.talkModeSpeaking,
+ ) { micEnabled, micListening, talkListening, talkSpeaking ->
+ VoiceNotificationCapture(
+ micEnabled = micEnabled,
+ micListening = micListening,
+ talkListening = talkListening,
+ talkSpeaking = talkSpeaking,
+ )
+ },
+ ) { base, capture ->
+ VoiceNotificationState(base = base, capture = capture)
+ }.collect { state ->
+ voiceCaptureMode = state.mode
+ val title =
+ when {
+ state.connected && state.mode == VoiceCaptureMode.TalkMode -> "OpenClaw Node · Talk"
+ state.connected -> "OpenClaw Node · Connected"
+ else -> "OpenClaw Node"
}
- val text = (server?.let { "$status · $it" } ?: status) + micSuffix
+ val text =
+ (state.server?.let { "${state.status} · $it" } ?: state.status) +
+ voiceNotificationSuffix(
+ mode = state.mode,
+ manualMicEnabled = state.capture.micEnabled,
+ manualMicListening = state.capture.micListening,
+ talkListening = state.capture.talkListening,
+ talkSpeaking = state.capture.talkSpeaking,
+ )
startForegroundWithTypes(
notification = buildNotification(title = title, text = text),
@@ -60,13 +92,27 @@ class NodeForegroundService : Service() {
}
}
- override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
+ override fun onStartCommand(
+ intent: Intent?,
+ flags: Int,
+ startId: Int,
+ ): Int {
when (intent?.action) {
ACTION_STOP -> {
(application as NodeApp).peekRuntime()?.disconnect()
stopSelf()
return START_NOT_STICKY
}
+ ACTION_SET_VOICE_CAPTURE_MODE -> {
+ voiceCaptureMode = intent.getStringExtra(EXTRA_VOICE_CAPTURE_MODE).toVoiceCaptureMode()
+ startForegroundWithTypes(
+ notification =
+ buildNotification(
+ title = "OpenClaw Node",
+ text = if (voiceCaptureMode == VoiceCaptureMode.TalkMode) "Talk mode active" else "Connected",
+ ),
+ )
+ }
}
// Keep running; connection is managed by NodeRuntime (auto-reconnect + manual).
return START_STICKY
@@ -127,17 +173,13 @@ class NodeForegroundService : Service() {
.build()
}
- private fun updateNotification(notification: Notification) {
- val mgr = getSystemService(Context.NOTIFICATION_SERVICE) as NotificationManager
- mgr.notify(NOTIFICATION_ID, notification)
- }
-
private fun startForegroundWithTypes(notification: Notification) {
+ val serviceTypes = foregroundServiceTypesForVoiceMode(voiceCaptureMode)
if (didStartForeground) {
- updateNotification(notification)
+ ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
return
}
- startForeground(NOTIFICATION_ID, notification, ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC)
+ ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
didStartForeground = true
}
@@ -146,6 +188,8 @@ class NodeForegroundService : Service() {
private const val NOTIFICATION_ID = 1
private const val ACTION_STOP = "ai.openclaw.app.action.STOP"
+ private const val ACTION_SET_VOICE_CAPTURE_MODE = "ai.openclaw.app.action.SET_VOICE_CAPTURE_MODE"
+ private const val EXTRA_VOICE_CAPTURE_MODE = "ai.openclaw.app.extra.VOICE_CAPTURE_MODE"
fun start(context: Context) {
val intent = Intent(context, NodeForegroundService::class.java)
@@ -156,7 +200,85 @@ class NodeForegroundService : Service() {
val intent = Intent(context, NodeForegroundService::class.java).setAction(ACTION_STOP)
context.startService(intent)
}
+
+ fun setVoiceCaptureMode(
+ context: Context,
+ mode: VoiceCaptureMode,
+ ) {
+ val intent =
+ Intent(context, NodeForegroundService::class.java)
+ .setAction(ACTION_SET_VOICE_CAPTURE_MODE)
+ .putExtra(EXTRA_VOICE_CAPTURE_MODE, mode.name)
+ if (mode == VoiceCaptureMode.TalkMode) {
+ ContextCompat.startForegroundService(context, intent)
+ } else {
+ context.startService(intent)
+ }
+ }
}
}
-private data class Quint(val first: A, val second: B, val third: C, val fourth: D, val fifth: E)
+internal fun foregroundServiceTypesForVoiceMode(mode: VoiceCaptureMode): Int {
+ val base = ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC
+ return if (mode == VoiceCaptureMode.TalkMode) {
+ base or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
+ } else {
+ base
+ }
+}
+
+internal fun voiceNotificationSuffix(
+ mode: VoiceCaptureMode,
+ manualMicEnabled: Boolean,
+ manualMicListening: Boolean,
+ talkListening: Boolean,
+ talkSpeaking: Boolean,
+): String {
+ return when (mode) {
+ VoiceCaptureMode.TalkMode ->
+ when {
+ talkSpeaking -> " · Talk: Speaking"
+ talkListening -> " · Talk: Listening"
+ else -> " · Talk: On"
+ }
+ VoiceCaptureMode.ManualMic ->
+ if (manualMicEnabled) {
+ if (manualMicListening) " · Mic: Listening" else " · Mic: Pending"
+ } else {
+ ""
+ }
+ VoiceCaptureMode.Off -> ""
+ }
+}
+
+private fun String?.toVoiceCaptureMode(): VoiceCaptureMode {
+ return VoiceCaptureMode.entries.firstOrNull { it.name == this } ?: VoiceCaptureMode.Off
+}
+
+private data class VoiceNotificationBase(
+ val status: String,
+ val server: String?,
+ val connected: Boolean,
+ val mode: VoiceCaptureMode,
+)
+
+private data class VoiceNotificationCapture(
+ val micEnabled: Boolean,
+ val micListening: Boolean,
+ val talkListening: Boolean,
+ val talkSpeaking: Boolean,
+)
+
+private data class VoiceNotificationState(
+ val base: VoiceNotificationBase,
+ val capture: VoiceNotificationCapture,
+) {
+ val status: String
+ get() = base.status
+ val server: String?
+ get() = base.server
+ val connected: Boolean
+ get() = base.connected
+ val mode: VoiceCaptureMode
+ get() = base.mode
+}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
index 7572a9f41be..760fab44b0b 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
@@ -64,6 +64,8 @@ class NodeRuntime(
private val json = Json { ignoreUnknownKeys = true }
private val externalAudioCaptureActive = MutableStateFlow(false)
+ private val _voiceCaptureMode = MutableStateFlow(VoiceCaptureMode.Off)
+ val voiceCaptureMode: StateFlow = _voiceCaptureMode.asStateFlow()
private val discovery = GatewayDiscovery(appContext, scope = scope)
val gateways: StateFlow> = discovery.gateways
@@ -428,6 +430,18 @@ class NodeRuntime(
)
}
+ val talkModeEnabled: StateFlow
+ get() = talkMode.isEnabled
+
+ val talkModeListening: StateFlow
+ get() = talkMode.isListening
+
+ val talkModeSpeaking: StateFlow
+ get() = talkMode.isSpeaking
+
+ val talkModeStatusText: StateFlow
+ get() = talkMode.statusText
+
private fun syncMainSessionKey(agentId: String?) {
val resolvedKey = resolveNodeMainSessionKey(agentId)
// Always push the resolved session key into TalkMode, even when the
@@ -599,17 +613,8 @@ class NodeRuntime(
prefs.loadGatewayToken()
}
- scope.launch {
- prefs.talkEnabled.collect { enabled ->
- // MicCaptureManager handles STT + send to gateway, while the dedicated
- // reply speaker handles TTS for assistant replies in the voice tab.
- micCapture.setMicEnabled(enabled)
- if (enabled) {
- talkMode.ttsOnAllResponses = false
- scope.launch { talkMode.ensureChatSubscribed() }
- }
- externalAudioCaptureActive.value = enabled
- }
+ if (prefs.voiceMicEnabled.value) {
+ setVoiceCaptureMode(VoiceCaptureMode.ManualMic, persistManualMic = false)
}
scope.launch(Dispatchers.Default) {
@@ -643,7 +648,7 @@ class NodeRuntime(
if (value) {
reconnectPreferredGatewayOnForeground()
} else {
- stopActiveVoiceSession()
+ stopManualVoiceSession()
}
}
@@ -757,21 +762,17 @@ class NodeRuntime(
fun setVoiceScreenActive(active: Boolean) {
if (!active) {
- stopActiveVoiceSession()
+ stopManualVoiceSession()
}
// Don't re-enable on active=true; mic toggle drives that
}
fun setMicEnabled(value: Boolean) {
- prefs.setTalkEnabled(value)
- if (value) {
- // Tapping mic on interrupts any active TTS (barge-in)
- stopVoicePlayback()
- talkMode.ttsOnAllResponses = false
- scope.launch { talkMode.ensureChatSubscribed() }
- }
- micCapture.setMicEnabled(value)
- externalAudioCaptureActive.value = value
+ setVoiceCaptureMode(if (value) VoiceCaptureMode.ManualMic else VoiceCaptureMode.Off)
+ }
+
+ fun setTalkModeEnabled(value: Boolean) {
+ setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off)
}
val speakerEnabled: StateFlow
@@ -786,11 +787,72 @@ class NodeRuntime(
talkMode.setPlaybackEnabled(value)
}
+ private fun setVoiceCaptureMode(
+ mode: VoiceCaptureMode,
+ persistManualMic: Boolean = true,
+ ) {
+ if (mode == VoiceCaptureMode.TalkMode && !hasRecordAudioPermission()) {
+ _voiceCaptureMode.value = VoiceCaptureMode.Off
+ externalAudioCaptureActive.value = false
+ return
+ }
+ if (_voiceCaptureMode.value == mode) return
+ _voiceCaptureMode.value = mode
+ when (mode) {
+ VoiceCaptureMode.Off -> {
+ talkMode.ttsOnAllResponses = false
+ talkMode.setEnabled(false)
+ stopVoicePlayback()
+ micCapture.setMicEnabled(false)
+ if (persistManualMic) {
+ prefs.setVoiceMicEnabled(false)
+ }
+ NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
+ externalAudioCaptureActive.value = false
+ }
+
+ VoiceCaptureMode.ManualMic -> {
+ talkMode.ttsOnAllResponses = false
+ talkMode.setEnabled(false)
+ NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.ManualMic)
+ if (persistManualMic) {
+ prefs.setVoiceMicEnabled(true)
+ }
+ // Tapping mic on interrupts any active TTS (barge-in).
+ stopVoicePlayback()
+ scope.launch { talkMode.ensureChatSubscribed() }
+ micCapture.setMicEnabled(true)
+ externalAudioCaptureActive.value = true
+ }
+
+ VoiceCaptureMode.TalkMode -> {
+ if (persistManualMic) {
+ prefs.setVoiceMicEnabled(false)
+ }
+ micCapture.setMicEnabled(false)
+ NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
+ talkMode.ttsOnAllResponses = true
+ talkMode.setPlaybackEnabled(speakerEnabled.value)
+ scope.launch { talkMode.ensureChatSubscribed() }
+ talkMode.setEnabled(true)
+ externalAudioCaptureActive.value = true
+ }
+ }
+ }
+
+ private fun stopManualVoiceSession() {
+ if (_voiceCaptureMode.value != VoiceCaptureMode.ManualMic) return
+ setVoiceCaptureMode(VoiceCaptureMode.Off)
+ }
+
private fun stopActiveVoiceSession() {
talkMode.ttsOnAllResponses = false
+ talkMode.setEnabled(false)
stopVoicePlayback()
micCapture.setMicEnabled(false)
- prefs.setTalkEnabled(false)
+ prefs.setVoiceMicEnabled(false)
+ NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
+ _voiceCaptureMode.value = VoiceCaptureMode.Off
externalAudioCaptureActive.value = false
}
@@ -970,6 +1032,7 @@ class NodeRuntime(
}
fun disconnect() {
+ stopActiveVoiceSession()
connectedEndpoint = null
activeGatewayAuth = null
_pendingGatewayTrust.value = null
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
index e8a6acd841b..97be3090c5f 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
@@ -37,6 +37,7 @@ class SecurePrefs(
private const val notificationsForwardingMaxEventsPerMinuteKey =
"notifications.forwarding.maxEventsPerMinute"
private const val notificationsForwardingSessionKeyKey = "notifications.forwarding.sessionKey"
+ private const val voiceMicEnabledKey = "voice.micEnabled"
}
private val appContext = context.applicationContext
@@ -162,8 +163,8 @@ class SecurePrefs(
private val _voiceWakeMode = MutableStateFlow(loadVoiceWakeMode())
val voiceWakeMode: StateFlow = _voiceWakeMode
- private val _talkEnabled = MutableStateFlow(plainPrefs.getBoolean("talk.enabled", false))
- val talkEnabled: StateFlow = _talkEnabled
+ private val _voiceMicEnabled = MutableStateFlow(plainPrefs.getBoolean(voiceMicEnabledKey, false))
+ val voiceMicEnabled: StateFlow = _voiceMicEnabled
private val _speakerEnabled = MutableStateFlow(plainPrefs.getBoolean("voice.speakerEnabled", true))
val speakerEnabled: StateFlow = _speakerEnabled
@@ -478,9 +479,9 @@ class SecurePrefs(
_voiceWakeMode.value = mode
}
- fun setTalkEnabled(value: Boolean) {
- plainPrefs.edit { putBoolean("talk.enabled", value) }
- _talkEnabled.value = value
+ fun setVoiceMicEnabled(value: Boolean) {
+ plainPrefs.edit { putBoolean(voiceMicEnabledKey, value) }
+ _voiceMicEnabled.value = value
}
fun setSpeakerEnabled(value: Boolean) {
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt
new file mode 100644
index 00000000000..071d0df1970
--- /dev/null
+++ b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt
@@ -0,0 +1,7 @@
+package ai.openclaw.app
+
+enum class VoiceCaptureMode {
+ Off,
+ ManualMic,
+ TalkMode,
+}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
index 76fc2c4f0c9..1b9277afcca 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
@@ -35,10 +35,11 @@ import androidx.compose.foundation.lazy.rememberLazyListState
import androidx.compose.foundation.shape.CircleShape
import androidx.compose.foundation.shape.RoundedCornerShape
import androidx.compose.material.icons.Icons
-import androidx.compose.material.icons.filled.Mic
-import androidx.compose.material.icons.filled.MicOff
import androidx.compose.material.icons.automirrored.filled.VolumeOff
import androidx.compose.material.icons.automirrored.filled.VolumeUp
+import androidx.compose.material.icons.filled.Mic
+import androidx.compose.material.icons.filled.MicOff
+import androidx.compose.material.icons.filled.RecordVoiceOver
import androidx.compose.material3.Button
import androidx.compose.material3.ButtonDefaults
import androidx.compose.material3.Icon
@@ -69,6 +70,7 @@ import androidx.lifecycle.Lifecycle
import androidx.lifecycle.LifecycleEventObserver
import androidx.lifecycle.compose.LocalLifecycleOwner
import ai.openclaw.app.MainViewModel
+import ai.openclaw.app.VoiceCaptureMode
import ai.openclaw.app.voice.VoiceConversationEntry
import ai.openclaw.app.voice.VoiceConversationRole
import kotlin.math.max
@@ -81,6 +83,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
val listState = rememberLazyListState()
val gatewayStatus by viewModel.statusText.collectAsState()
+ val voiceCaptureMode by viewModel.voiceCaptureMode.collectAsState()
val micEnabled by viewModel.micEnabled.collectAsState()
val micCooldown by viewModel.micCooldown.collectAsState()
val speakerEnabled by viewModel.speakerEnabled.collectAsState()
@@ -90,12 +93,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
val micConversation by viewModel.micConversation.collectAsState()
val micInputLevel by viewModel.micInputLevel.collectAsState()
val micIsSending by viewModel.micIsSending.collectAsState()
+ val talkModeEnabled by viewModel.talkModeEnabled.collectAsState()
+ val talkModeListening by viewModel.talkModeListening.collectAsState()
+ val talkModeSpeaking by viewModel.talkModeSpeaking.collectAsState()
val hasStreamingAssistant = micConversation.any { it.role == VoiceConversationRole.Assistant && it.isStreaming }
val showThinkingBubble = micIsSending && !hasStreamingAssistant
var hasMicPermission by remember { mutableStateOf(context.hasRecordAudioPermission()) }
- var pendingMicEnable by remember { mutableStateOf(false) }
+ var pendingVoicePermissionAction by remember { mutableStateOf(null) }
DisposableEffect(lifecycleOwner, context) {
val observer =
@@ -107,7 +113,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
lifecycleOwner.lifecycle.addObserver(observer)
onDispose {
lifecycleOwner.lifecycle.removeObserver(observer)
- // Stop TTS when leaving the voice screen
+ // Manual mic is tied to the Voice tab; Talk Mode is explicit and can continue.
viewModel.setVoiceScreenActive(false)
}
}
@@ -115,10 +121,14 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
val requestMicPermission =
rememberLauncherForActivityResult(ActivityResultContracts.RequestPermission()) { granted ->
hasMicPermission = granted
- if (granted && pendingMicEnable) {
- viewModel.setMicEnabled(true)
+ if (granted) {
+ when (pendingVoicePermissionAction) {
+ PendingVoicePermissionAction.ManualMic -> viewModel.setMicEnabled(true)
+ PendingVoicePermissionAction.TalkMode -> viewModel.setTalkModeEnabled(true)
+ null -> Unit
+ }
}
- pendingMicEnable = false
+ pendingVoicePermissionAction = null
}
LaunchedEffect(micConversation.size, showThinkingBubble) {
@@ -161,12 +171,12 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
tint = mobileTextTertiary,
)
Text(
- "Tap the mic to start",
+ "Tap mic or Talk",
style = mobileHeadline,
color = mobileTextSecondary,
)
Text(
- "Each pause sends a turn automatically.",
+ "Mic sends turns; Talk keeps the conversation open.",
style = mobileCallout,
color = mobileTextTertiary,
)
@@ -263,7 +273,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
if (hasMicPermission) {
viewModel.setMicEnabled(true)
} else {
- pendingMicEnable = true
+ pendingVoicePermissionAction = PendingVoicePermissionAction.ManualMic
requestMicPermission.launch(Manifest.permission.RECORD_AUDIO)
}
},
@@ -287,11 +297,39 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
}
}
- // Invisible spacer to balance the row (matches speaker column width)
- Column(horizontalAlignment = Alignment.CenterHorizontally) {
- Box(modifier = Modifier.size(48.dp))
+ Column(horizontalAlignment = Alignment.CenterHorizontally, verticalArrangement = Arrangement.spacedBy(4.dp)) {
+ IconButton(
+ onClick = {
+ if (talkModeEnabled) {
+ viewModel.setTalkModeEnabled(false)
+ return@IconButton
+ }
+ if (hasMicPermission) {
+ viewModel.setTalkModeEnabled(true)
+ } else {
+ pendingVoicePermissionAction = PendingVoicePermissionAction.TalkMode
+ requestMicPermission.launch(Manifest.permission.RECORD_AUDIO)
+ }
+ },
+ modifier = Modifier.size(48.dp),
+ colors =
+ IconButtonDefaults.iconButtonColors(
+ containerColor = if (talkModeEnabled) mobileSuccessSoft else mobileSurface,
+ ),
+ ) {
+ Icon(
+ imageVector = Icons.Default.RecordVoiceOver,
+ contentDescription = if (talkModeEnabled) "Turn Talk Mode off" else "Turn Talk Mode on",
+ modifier = Modifier.size(22.dp),
+ tint = if (talkModeEnabled) mobileSuccess else mobileTextSecondary,
+ )
+ }
Spacer(modifier = Modifier.height(4.dp))
- Text("", style = mobileCaption2)
+ Text(
+ if (talkModeEnabled) "Talk on" else "Talk",
+ style = mobileCaption2,
+ color = if (talkModeEnabled) mobileSuccess else mobileTextTertiary,
+ )
}
}
@@ -299,6 +337,9 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
val queueCount = micQueuedMessages.size
val stateText =
when {
+ voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeSpeaking -> "Talk speaking"
+ voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeListening -> "Talk listening"
+ voiceCaptureMode == VoiceCaptureMode.TalkMode -> "Talk on"
queueCount > 0 -> "$queueCount queued"
micIsSending -> "Sending"
micCooldown -> "Cooldown"
@@ -307,14 +348,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
}
val stateColor =
when {
+ voiceCaptureMode == VoiceCaptureMode.TalkMode -> mobileSuccess
micEnabled -> mobileSuccess
micIsSending -> mobileAccent
else -> mobileTextSecondary
}
Surface(
shape = RoundedCornerShape(999.dp),
- color = if (micEnabled) mobileSuccessSoft else mobileSurface,
- border = BorderStroke(1.dp, if (micEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder),
+ color = if (micEnabled || talkModeEnabled) mobileSuccessSoft else mobileSurface,
+ border = BorderStroke(1.dp, if (micEnabled || talkModeEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder),
) {
Text(
"$gatewayStatus · $stateText",
@@ -353,6 +395,11 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
}
}
+private enum class PendingVoicePermissionAction {
+ ManualMic,
+ TalkMode,
+}
+
@Composable
private fun VoiceTurnBubble(entry: VoiceConversationEntry) {
val isUser = entry.role == VoiceConversationRole.User
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
index fddc347f487..9c76eeb3219 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
@@ -2,6 +2,7 @@ package ai.openclaw.app
import android.app.Notification
import android.content.Intent
+import android.content.pm.ServiceInfo
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNotNull
import org.junit.Test
@@ -30,6 +31,35 @@ class NodeForegroundServiceTest {
assertEquals(expectedFlags, savedIntent.flags and expectedFlags)
}
+ @Test
+ fun foregroundServiceTypesForVoiceMode_addsMicrophoneOnlyForTalkMode() {
+ assertEquals(
+ ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+ foregroundServiceTypesForVoiceMode(VoiceCaptureMode.Off),
+ )
+ assertEquals(
+ ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+ foregroundServiceTypesForVoiceMode(VoiceCaptureMode.ManualMic),
+ )
+ assertEquals(
+ ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE,
+ foregroundServiceTypesForVoiceMode(VoiceCaptureMode.TalkMode),
+ )
+ }
+
+ @Test
+ fun voiceNotificationSuffixReflectsActiveCaptureMode() {
+ assertEquals("", voiceNotificationSuffix(VoiceCaptureMode.Off, false, false, false, false))
+ assertEquals(
+ " · Mic: Listening",
+ voiceNotificationSuffix(VoiceCaptureMode.ManualMic, true, true, false, false),
+ )
+ assertEquals(
+ " · Talk: Speaking",
+ voiceNotificationSuffix(VoiceCaptureMode.TalkMode, false, false, true, true),
+ )
+ }
+
private fun buildNotification(service: NodeForegroundService): Notification {
val method =
NodeForegroundService::class.java.getDeclaredMethod(
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
index 811eed0c900..d209f52a2f5 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
@@ -2,7 +2,9 @@ package ai.openclaw.app
import android.content.Context
import org.junit.Assert.assertEquals
+import org.junit.Assert.assertFalse
import org.junit.Assert.assertNull
+import org.junit.Assert.assertTrue
import org.junit.Test
import org.junit.runner.RunWith
import org.robolectric.RobolectricTestRunner
@@ -22,6 +24,32 @@ class SecurePrefsTest {
assertEquals("whileUsing", plainPrefs.getString("location.enabledMode", null))
}
+ @Test
+ fun voiceMicEnabled_ignoresOldTalkEnabledKey() {
+ val context = RuntimeEnvironment.getApplication()
+ val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+ plainPrefs.edit().clear().putBoolean("talk.enabled", true).commit()
+
+ val prefs = SecurePrefs(context)
+
+ assertFalse(prefs.voiceMicEnabled.value)
+ assertFalse(plainPrefs.contains("voice.micEnabled"))
+ }
+
+ @Test
+ fun setVoiceMicEnabled_persistsNewKeyOnly() {
+ val context = RuntimeEnvironment.getApplication()
+ val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+ plainPrefs.edit().clear().putBoolean("talk.enabled", false).commit()
+ val prefs = SecurePrefs(context)
+
+ prefs.setVoiceMicEnabled(true)
+
+ assertTrue(prefs.voiceMicEnabled.value)
+ assertTrue(plainPrefs.getBoolean("voice.micEnabled", false))
+ assertFalse(plainPrefs.getBoolean("talk.enabled", false))
+ }
+
@Test
fun saveGatewayBootstrapToken_persistsSeparatelyFromSharedToken() {
val context = RuntimeEnvironment.getApplication()
diff --git a/docs/nodes/talk.md b/docs/nodes/talk.md
index fe65b915aca..c8d40b08957 100644
--- a/docs/nodes/talk.md
+++ b/docs/nodes/talk.md
@@ -91,6 +91,13 @@ Defaults:
- Click cloud: stop speaking
- Click X: exit Talk mode
+## Android UI
+
+- Voice tab toggle: **Talk**
+- Manual **Mic** and **Talk** are mutually exclusive runtime capture modes.
+- Manual Mic stops when the app leaves the foreground or the user leaves the Voice tab.
+- Talk Mode keeps running until toggled off or the Android node disconnects, and uses Android's microphone foreground-service type while active.
+
## Notes
- Requires Speech + Microphone permissions.
diff --git a/docs/platforms/android.md b/docs/platforms/android.md
index 0ed03f8ba33..c42b2f28a57 100644
--- a/docs/platforms/android.md
+++ b/docs/platforms/android.md
@@ -199,8 +199,10 @@ See [Camera node](/nodes/camera) for parameters and CLI helpers.
### 8) Voice + expanded Android command surface
-- Voice: Android uses a single mic on/off flow in the Voice tab with transcript capture and `talk.speak` playback. Local system TTS is used only when `talk.speak` is unavailable. Voice stops when the app leaves the foreground.
-- Voice wake/talk-mode toggles are currently removed from Android UX/runtime.
+- Voice tab: Android has two explicit capture modes. **Mic** is a manual Voice-tab session that sends each pause as a chat turn and stops when the app leaves the foreground or the user leaves the Voice tab. **Talk** is continuous Talk Mode and keeps listening until toggled off or the node disconnects.
+- Talk Mode promotes the existing foreground service from `dataSync` to `dataSync|microphone` before capture starts, then demotes it when Talk Mode stops. Android 14+ requires the `FOREGROUND_SERVICE_MICROPHONE` declaration, the `RECORD_AUDIO` runtime grant, and the microphone service type at runtime.
+- Spoken replies use `talk.speak` through the configured gateway Talk provider. Local system TTS is used only when `talk.speak` is unavailable.
+- Voice wake remains disabled in the Android UX/runtime.
- Additional Android command families (availability depends on device + permissions):
- `device.status`, `device.info`, `device.permissions`, `device.health`
- `notifications.list`, `notifications.actions` (see [Notification forwarding](#notification-forwarding) below)