From 86d897cfaa6a12b734bb8040ecd55ab2dabe749b Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 25 Apr 2026 20:11:20 +0100
Subject: [PATCH] feat(android): expose talk mode

Co-authored-by: alex-latitude <213670856+alex-latitude@users.noreply.github.com>
---
 CHANGELOG.md                                  |   1 +
 apps/android/app/src/main/AndroidManifest.xml |   3 +-
 .../java/ai/openclaw/app/MainViewModel.kt     |  11 +-
 .../ai/openclaw/app/NodeForegroundService.kt  | 172 +++++++++++++++---
 .../main/java/ai/openclaw/app/NodeRuntime.kt  | 109 ++++++++---
 .../main/java/ai/openclaw/app/SecurePrefs.kt  |  11 +-
 .../java/ai/openclaw/app/VoiceCaptureMode.kt  |   7 +
 .../java/ai/openclaw/app/ui/VoiceTabScreen.kt |  79 ++++++--
 .../openclaw/app/NodeForegroundServiceTest.kt |  30 +++
 .../java/ai/openclaw/app/SecurePrefsTest.kt   |  28 +++
 docs/nodes/talk.md                            |   7 +
 docs/platforms/android.md                     |   6 +-
 12 files changed, 391 insertions(+), 73 deletions(-)
 create mode 100644 apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0641c111846..7602314fc5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Docs: https://docs.openclaw.ai
 - Providers/Xiaomi: add MiMo TTS as a bundled speech provider with MP3/WAV output and voice-note Opus transcoding. Fixes #52376. (#55614) Thanks @zoujiejun.
 - Providers/ElevenLabs: include `eleven_v3` in the bundled TTS model catalog so model selection surfaces can offer ElevenLabs v3. (#68321) Thanks @itsuzef.
 - Providers/Local CLI TTS: add a bundled local command speech provider with file/stdout input, voice-note Opus conversion, and telephony PCM output. (#56239) Thanks @solar2ain.
+- Android/Talk Mode: expose Talk Mode in the Voice tab with runtime-owned voice capture modes and microphone foreground-service escalation. Thanks @alex-latitude.
 - Providers/LiteLLM: register `litellm` as an image-generation provider so `image_generate model=litellm/...` calls and `agents.defaults.imageGenerationModel.fallbacks` entries resolve through the LiteLLM proxy. Thanks @zqchris.
 
 ### Fixes
diff --git a/apps/android/app/src/main/AndroidManifest.xml b/apps/android/app/src/main/AndroidManifest.xml
index 310cf0e26bc..6a9eed63d7c 100644
--- a/apps/android/app/src/main/AndroidManifest.xml
+++ b/apps/android/app/src/main/AndroidManifest.xml
@@ -3,6 +3,7 @@
     <uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
     <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
     <uses-permission android:name="android.permission.FOREGROUND_SERVICE_DATA_SYNC" />
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
     <uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
     <uses-permission
         android:name="android.permission.NEARBY_WIFI_DEVICES"
@@ -52,7 +53,7 @@
         <service
             android:name=".NodeForegroundService"
             android:exported="false"
-            android:foregroundServiceType="dataSync" />
+            android:foregroundServiceType="dataSync|microphone" />
         <service
             android:name=".node.DeviceNotificationListenerService"
             android:label="@string/app_name"
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
index 6edaf13926a..65e110dea98 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
@@ -101,7 +101,8 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
   val onboardingCompleted: StateFlow<Boolean> = prefs.onboardingCompleted
   val canvasDebugStatusEnabled: StateFlow<Boolean> = prefs.canvasDebugStatusEnabled
   val speakerEnabled: StateFlow<Boolean> = prefs.speakerEnabled
-  val micEnabled: StateFlow<Boolean> = prefs.talkEnabled
+  val voiceCaptureMode: StateFlow<VoiceCaptureMode> = runtimeState(initial = VoiceCaptureMode.Off) { it.voiceCaptureMode }
+  val micEnabled: StateFlow<Boolean> = runtimeState(initial = false) { it.micEnabled }
 
   val micCooldown: StateFlow<Boolean> = runtimeState(initial = false) { it.micCooldown }
   val micStatusText: StateFlow<String> = runtimeState(initial = "Mic off") { it.micStatusText }
@@ -111,6 +112,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
   val micConversation: StateFlow<List<VoiceConversationEntry>> = runtimeState(initial = emptyList()) { it.micConversation }
   val micInputLevel: StateFlow<Float> = runtimeState(initial = 0f) { it.micInputLevel }
   val micIsSending: StateFlow<Boolean> = runtimeState(initial = false) { it.micIsSending }
+  val talkModeEnabled: StateFlow<Boolean> = runtimeState(initial = false) { it.talkModeEnabled }
+  val talkModeListening: StateFlow<Boolean> = runtimeState(initial = false) { it.talkModeListening }
+  val talkModeSpeaking: StateFlow<Boolean> = runtimeState(initial = false) { it.talkModeSpeaking }
+  val talkModeStatusText: StateFlow<String> = runtimeState(initial = "Off") { it.talkModeStatusText }
 
   val chatSessionKey: StateFlow<String> = runtimeState(initial = "main") { it.chatSessionKey }
   val chatSessionId: StateFlow<String?> = runtimeState(initial = null) { it.chatSessionId }
@@ -283,6 +288,10 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
     ensureRuntime().setMicEnabled(enabled)
   }
 
+  fun setTalkModeEnabled(enabled: Boolean) {
+    ensureRuntime().setTalkModeEnabled(enabled)
+  }
+
   fun setSpeakerEnabled(enabled: Boolean) {
     ensureRuntime().setSpeakerEnabled(enabled)
   }
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
index 4c7ccdd56e5..c57e378b0ed 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
@@ -3,12 +3,14 @@ package ai.openclaw.app
 import android.app.Notification
 import android.app.NotificationChannel
 import android.app.NotificationManager
-import android.app.Service
 import android.app.PendingIntent
+import android.app.Service
 import android.content.Context
 import android.content.Intent
 import android.content.pm.ServiceInfo
 import androidx.core.app.NotificationCompat
+import androidx.core.app.ServiceCompat
+import androidx.core.content.ContextCompat
 import kotlinx.coroutines.CoroutineScope
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.Job
@@ -21,6 +23,7 @@ class NodeForegroundService : Service() {
   private val scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main)
   private var notificationJob: Job? = null
   private var didStartForeground = false
+  private var voiceCaptureMode = VoiceCaptureMode.Off
 
   override fun onCreate() {
     super.onCreate()
@@ -36,22 +39,51 @@ class NodeForegroundService : Service() {
     notificationJob =
       scope.launch {
         combine(
-          runtime.statusText,
-          runtime.serverName,
-          runtime.isConnected,
-          runtime.micEnabled,
-          runtime.micIsListening,
-        ) { status, server, connected, micEnabled, micListening ->
-          Quint(status, server, connected, micEnabled, micListening)
-        }.collect { (status, server, connected, micEnabled, micListening) ->
-          val title = if (connected) "OpenClaw Node · Connected" else "OpenClaw Node"
-          val micSuffix =
-            if (micEnabled) {
-              if (micListening) " · Mic: Listening" else " · Mic: Pending"
-            } else {
-              ""
+          combine(
+            runtime.statusText,
+            runtime.serverName,
+            runtime.isConnected,
+            runtime.voiceCaptureMode,
+          ) { status, server, connected, mode ->
+            VoiceNotificationBase(
+              status = status,
+              server = server,
+              connected = connected,
+              mode = mode,
+            )
+          },
+          combine(
+            runtime.micEnabled,
+            runtime.micIsListening,
+            runtime.talkModeListening,
+            runtime.talkModeSpeaking,
+          ) { micEnabled, micListening, talkListening, talkSpeaking ->
+            VoiceNotificationCapture(
+              micEnabled = micEnabled,
+              micListening = micListening,
+              talkListening = talkListening,
+              talkSpeaking = talkSpeaking,
+            )
+          },
+        ) { base, capture ->
+          VoiceNotificationState(base = base, capture = capture)
+        }.collect { state ->
+          voiceCaptureMode = state.mode
+          val title =
+            when {
+              state.connected && state.mode == VoiceCaptureMode.TalkMode -> "OpenClaw Node · Talk"
+              state.connected -> "OpenClaw Node · Connected"
+              else -> "OpenClaw Node"
             }
-          val text = (server?.let { "$status · $it" } ?: status) + micSuffix
+          val text =
+            (state.server?.let { "${state.status} · $it" } ?: state.status) +
+              voiceNotificationSuffix(
+                mode = state.mode,
+                manualMicEnabled = state.capture.micEnabled,
+                manualMicListening = state.capture.micListening,
+                talkListening = state.capture.talkListening,
+                talkSpeaking = state.capture.talkSpeaking,
+              )
 
           startForegroundWithTypes(
             notification = buildNotification(title = title, text = text),
@@ -60,13 +92,27 @@ class NodeForegroundService : Service() {
       }
   }
 
-  override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
+  override fun onStartCommand(
+    intent: Intent?,
+    flags: Int,
+    startId: Int,
+  ): Int {
     when (intent?.action) {
       ACTION_STOP -> {
         (application as NodeApp).peekRuntime()?.disconnect()
         stopSelf()
         return START_NOT_STICKY
       }
+      ACTION_SET_VOICE_CAPTURE_MODE -> {
+        voiceCaptureMode = intent.getStringExtra(EXTRA_VOICE_CAPTURE_MODE).toVoiceCaptureMode()
+        startForegroundWithTypes(
+          notification =
+            buildNotification(
+              title = "OpenClaw Node",
+              text = if (voiceCaptureMode == VoiceCaptureMode.TalkMode) "Talk mode active" else "Connected",
+            ),
+        )
+      }
     }
     // Keep running; connection is managed by NodeRuntime (auto-reconnect + manual).
     return START_STICKY
@@ -127,17 +173,13 @@ class NodeForegroundService : Service() {
       .build()
   }
 
-  private fun updateNotification(notification: Notification) {
-    val mgr = getSystemService(Context.NOTIFICATION_SERVICE) as NotificationManager
-    mgr.notify(NOTIFICATION_ID, notification)
-  }
-
   private fun startForegroundWithTypes(notification: Notification) {
+    val serviceTypes = foregroundServiceTypesForVoiceMode(voiceCaptureMode)
     if (didStartForeground) {
-      updateNotification(notification)
+      ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
       return
     }
-    startForeground(NOTIFICATION_ID, notification, ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC)
+    ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
     didStartForeground = true
   }
 
@@ -146,6 +188,8 @@ class NodeForegroundService : Service() {
     private const val NOTIFICATION_ID = 1
 
     private const val ACTION_STOP = "ai.openclaw.app.action.STOP"
+    private const val ACTION_SET_VOICE_CAPTURE_MODE = "ai.openclaw.app.action.SET_VOICE_CAPTURE_MODE"
+    private const val EXTRA_VOICE_CAPTURE_MODE = "ai.openclaw.app.extra.VOICE_CAPTURE_MODE"
 
     fun start(context: Context) {
       val intent = Intent(context, NodeForegroundService::class.java)
@@ -156,7 +200,85 @@ class NodeForegroundService : Service() {
       val intent = Intent(context, NodeForegroundService::class.java).setAction(ACTION_STOP)
       context.startService(intent)
     }
+
+    fun setVoiceCaptureMode(
+      context: Context,
+      mode: VoiceCaptureMode,
+    ) {
+      val intent =
+        Intent(context, NodeForegroundService::class.java)
+          .setAction(ACTION_SET_VOICE_CAPTURE_MODE)
+          .putExtra(EXTRA_VOICE_CAPTURE_MODE, mode.name)
+      if (mode == VoiceCaptureMode.TalkMode) {
+        ContextCompat.startForegroundService(context, intent)
+      } else {
+        context.startService(intent)
+      }
+    }
   }
 }
 
-private data class Quint<A, B, C, D, E>(val first: A, val second: B, val third: C, val fourth: D, val fifth: E)
+internal fun foregroundServiceTypesForVoiceMode(mode: VoiceCaptureMode): Int {
+  val base = ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC
+  return if (mode == VoiceCaptureMode.TalkMode) {
+    base or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
+  } else {
+    base
+  }
+}
+
+internal fun voiceNotificationSuffix(
+  mode: VoiceCaptureMode,
+  manualMicEnabled: Boolean,
+  manualMicListening: Boolean,
+  talkListening: Boolean,
+  talkSpeaking: Boolean,
+): String {
+  return when (mode) {
+    VoiceCaptureMode.TalkMode ->
+      when {
+        talkSpeaking -> " · Talk: Speaking"
+        talkListening -> " · Talk: Listening"
+        else -> " · Talk: On"
+      }
+    VoiceCaptureMode.ManualMic ->
+      if (manualMicEnabled) {
+        if (manualMicListening) " · Mic: Listening" else " · Mic: Pending"
+      } else {
+        ""
+      }
+    VoiceCaptureMode.Off -> ""
+  }
+}
+
+private fun String?.toVoiceCaptureMode(): VoiceCaptureMode {
+  return VoiceCaptureMode.entries.firstOrNull { it.name == this } ?: VoiceCaptureMode.Off
+}
+
+private data class VoiceNotificationBase(
+  val status: String,
+  val server: String?,
+  val connected: Boolean,
+  val mode: VoiceCaptureMode,
+)
+
+private data class VoiceNotificationCapture(
+  val micEnabled: Boolean,
+  val micListening: Boolean,
+  val talkListening: Boolean,
+  val talkSpeaking: Boolean,
+)
+
+private data class VoiceNotificationState(
+  val base: VoiceNotificationBase,
+  val capture: VoiceNotificationCapture,
+) {
+  val status: String
+    get() = base.status
+  val server: String?
+    get() = base.server
+  val connected: Boolean
+    get() = base.connected
+  val mode: VoiceCaptureMode
+    get() = base.mode
+}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
index 7572a9f41be..760fab44b0b 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
@@ -64,6 +64,8 @@ class NodeRuntime(
   private val json = Json { ignoreUnknownKeys = true }
 
   private val externalAudioCaptureActive = MutableStateFlow(false)
+  private val _voiceCaptureMode = MutableStateFlow(VoiceCaptureMode.Off)
+  val voiceCaptureMode: StateFlow<VoiceCaptureMode> = _voiceCaptureMode.asStateFlow()
 
   private val discovery = GatewayDiscovery(appContext, scope = scope)
   val gateways: StateFlow<List<GatewayEndpoint>> = discovery.gateways
@@ -428,6 +430,18 @@ class NodeRuntime(
     )
   }
 
+  val talkModeEnabled: StateFlow<Boolean>
+    get() = talkMode.isEnabled
+
+  val talkModeListening: StateFlow<Boolean>
+    get() = talkMode.isListening
+
+  val talkModeSpeaking: StateFlow<Boolean>
+    get() = talkMode.isSpeaking
+
+  val talkModeStatusText: StateFlow<String>
+    get() = talkMode.statusText
+
   private fun syncMainSessionKey(agentId: String?) {
     val resolvedKey = resolveNodeMainSessionKey(agentId)
     // Always push the resolved session key into TalkMode, even when the
@@ -599,17 +613,8 @@ class NodeRuntime(
       prefs.loadGatewayToken()
     }
 
-    scope.launch {
-      prefs.talkEnabled.collect { enabled ->
-        // MicCaptureManager handles STT + send to gateway, while the dedicated
-        // reply speaker handles TTS for assistant replies in the voice tab.
-        micCapture.setMicEnabled(enabled)
-        if (enabled) {
-          talkMode.ttsOnAllResponses = false
-          scope.launch { talkMode.ensureChatSubscribed() }
-        }
-        externalAudioCaptureActive.value = enabled
-      }
+    if (prefs.voiceMicEnabled.value) {
+      setVoiceCaptureMode(VoiceCaptureMode.ManualMic, persistManualMic = false)
     }
 
     scope.launch(Dispatchers.Default) {
@@ -643,7 +648,7 @@ class NodeRuntime(
     if (value) {
       reconnectPreferredGatewayOnForeground()
     } else {
-      stopActiveVoiceSession()
+      stopManualVoiceSession()
     }
   }
 
@@ -757,21 +762,17 @@ class NodeRuntime(
 
   fun setVoiceScreenActive(active: Boolean) {
     if (!active) {
-      stopActiveVoiceSession()
+      stopManualVoiceSession()
     }
     // Don't re-enable on active=true; mic toggle drives that
   }
 
   fun setMicEnabled(value: Boolean) {
-    prefs.setTalkEnabled(value)
-    if (value) {
-      // Tapping mic on interrupts any active TTS (barge-in)
-      stopVoicePlayback()
-      talkMode.ttsOnAllResponses = false
-      scope.launch { talkMode.ensureChatSubscribed() }
-    }
-    micCapture.setMicEnabled(value)
-    externalAudioCaptureActive.value = value
+    setVoiceCaptureMode(if (value) VoiceCaptureMode.ManualMic else VoiceCaptureMode.Off)
+  }
+
+  fun setTalkModeEnabled(value: Boolean) {
+    setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off)
   }
 
   val speakerEnabled: StateFlow<Boolean>
@@ -786,11 +787,72 @@ class NodeRuntime(
     talkMode.setPlaybackEnabled(value)
   }
 
+  private fun setVoiceCaptureMode(
+    mode: VoiceCaptureMode,
+    persistManualMic: Boolean = true,
+  ) {
+    if (mode == VoiceCaptureMode.TalkMode && !hasRecordAudioPermission()) {
+      _voiceCaptureMode.value = VoiceCaptureMode.Off
+      externalAudioCaptureActive.value = false
+      return
+    }
+    if (_voiceCaptureMode.value == mode) return
+    _voiceCaptureMode.value = mode
+    when (mode) {
+      VoiceCaptureMode.Off -> {
+        talkMode.ttsOnAllResponses = false
+        talkMode.setEnabled(false)
+        stopVoicePlayback()
+        micCapture.setMicEnabled(false)
+        if (persistManualMic) {
+          prefs.setVoiceMicEnabled(false)
+        }
+        NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
+        externalAudioCaptureActive.value = false
+      }
+
+      VoiceCaptureMode.ManualMic -> {
+        talkMode.ttsOnAllResponses = false
+        talkMode.setEnabled(false)
+        NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.ManualMic)
+        if (persistManualMic) {
+          prefs.setVoiceMicEnabled(true)
+        }
+        // Tapping mic on interrupts any active TTS (barge-in).
+        stopVoicePlayback()
+        scope.launch { talkMode.ensureChatSubscribed() }
+        micCapture.setMicEnabled(true)
+        externalAudioCaptureActive.value = true
+      }
+
+      VoiceCaptureMode.TalkMode -> {
+        if (persistManualMic) {
+          prefs.setVoiceMicEnabled(false)
+        }
+        micCapture.setMicEnabled(false)
+        NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
+        talkMode.ttsOnAllResponses = true
+        talkMode.setPlaybackEnabled(speakerEnabled.value)
+        scope.launch { talkMode.ensureChatSubscribed() }
+        talkMode.setEnabled(true)
+        externalAudioCaptureActive.value = true
+      }
+    }
+  }
+
+  private fun stopManualVoiceSession() {
+    if (_voiceCaptureMode.value != VoiceCaptureMode.ManualMic) return
+    setVoiceCaptureMode(VoiceCaptureMode.Off)
+  }
+
   private fun stopActiveVoiceSession() {
     talkMode.ttsOnAllResponses = false
+    talkMode.setEnabled(false)
     stopVoicePlayback()
     micCapture.setMicEnabled(false)
-    prefs.setTalkEnabled(false)
+    prefs.setVoiceMicEnabled(false)
+    NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
+    _voiceCaptureMode.value = VoiceCaptureMode.Off
     externalAudioCaptureActive.value = false
   }
 
@@ -970,6 +1032,7 @@ class NodeRuntime(
   }
 
   fun disconnect() {
+    stopActiveVoiceSession()
     connectedEndpoint = null
     activeGatewayAuth = null
     _pendingGatewayTrust.value = null
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
index e8a6acd841b..97be3090c5f 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
@@ -37,6 +37,7 @@ class SecurePrefs(
     private const val notificationsForwardingMaxEventsPerMinuteKey =
       "notifications.forwarding.maxEventsPerMinute"
     private const val notificationsForwardingSessionKeyKey = "notifications.forwarding.sessionKey"
+    private const val voiceMicEnabledKey = "voice.micEnabled"
   }
 
   private val appContext = context.applicationContext
@@ -162,8 +163,8 @@ class SecurePrefs(
   private val _voiceWakeMode = MutableStateFlow(loadVoiceWakeMode())
   val voiceWakeMode: StateFlow<VoiceWakeMode> = _voiceWakeMode
 
-  private val _talkEnabled = MutableStateFlow(plainPrefs.getBoolean("talk.enabled", false))
-  val talkEnabled: StateFlow<Boolean> = _talkEnabled
+  private val _voiceMicEnabled = MutableStateFlow(plainPrefs.getBoolean(voiceMicEnabledKey, false))
+  val voiceMicEnabled: StateFlow<Boolean> = _voiceMicEnabled
 
   private val _speakerEnabled = MutableStateFlow(plainPrefs.getBoolean("voice.speakerEnabled", true))
   val speakerEnabled: StateFlow<Boolean> = _speakerEnabled
@@ -478,9 +479,9 @@ class SecurePrefs(
     _voiceWakeMode.value = mode
   }
 
-  fun setTalkEnabled(value: Boolean) {
-    plainPrefs.edit { putBoolean("talk.enabled", value) }
-    _talkEnabled.value = value
+  fun setVoiceMicEnabled(value: Boolean) {
+    plainPrefs.edit { putBoolean(voiceMicEnabledKey, value) }
+    _voiceMicEnabled.value = value
   }
 
   fun setSpeakerEnabled(value: Boolean) {
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt
new file mode 100644
index 00000000000..071d0df1970
--- /dev/null
+++ b/apps/android/app/src/main/java/ai/openclaw/app/VoiceCaptureMode.kt
@@ -0,0 +1,7 @@
+package ai.openclaw.app
+
+enum class VoiceCaptureMode {
+  Off,
+  ManualMic,
+  TalkMode,
+}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
index 76fc2c4f0c9..1b9277afcca 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceTabScreen.kt
@@ -35,10 +35,11 @@ import androidx.compose.foundation.lazy.rememberLazyListState
 import androidx.compose.foundation.shape.CircleShape
 import androidx.compose.foundation.shape.RoundedCornerShape
 import androidx.compose.material.icons.Icons
-import androidx.compose.material.icons.filled.Mic
-import androidx.compose.material.icons.filled.MicOff
 import androidx.compose.material.icons.automirrored.filled.VolumeOff
 import androidx.compose.material.icons.automirrored.filled.VolumeUp
+import androidx.compose.material.icons.filled.Mic
+import androidx.compose.material.icons.filled.MicOff
+import androidx.compose.material.icons.filled.RecordVoiceOver
 import androidx.compose.material3.Button
 import androidx.compose.material3.ButtonDefaults
 import androidx.compose.material3.Icon
@@ -69,6 +70,7 @@ import androidx.lifecycle.Lifecycle
 import androidx.lifecycle.LifecycleEventObserver
 import androidx.lifecycle.compose.LocalLifecycleOwner
 import ai.openclaw.app.MainViewModel
+import ai.openclaw.app.VoiceCaptureMode
 import ai.openclaw.app.voice.VoiceConversationEntry
 import ai.openclaw.app.voice.VoiceConversationRole
 import kotlin.math.max
@@ -81,6 +83,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
   val listState = rememberLazyListState()
 
   val gatewayStatus by viewModel.statusText.collectAsState()
+  val voiceCaptureMode by viewModel.voiceCaptureMode.collectAsState()
   val micEnabled by viewModel.micEnabled.collectAsState()
   val micCooldown by viewModel.micCooldown.collectAsState()
   val speakerEnabled by viewModel.speakerEnabled.collectAsState()
@@ -90,12 +93,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
   val micConversation by viewModel.micConversation.collectAsState()
   val micInputLevel by viewModel.micInputLevel.collectAsState()
   val micIsSending by viewModel.micIsSending.collectAsState()
+  val talkModeEnabled by viewModel.talkModeEnabled.collectAsState()
+  val talkModeListening by viewModel.talkModeListening.collectAsState()
+  val talkModeSpeaking by viewModel.talkModeSpeaking.collectAsState()
 
   val hasStreamingAssistant = micConversation.any { it.role == VoiceConversationRole.Assistant && it.isStreaming }
   val showThinkingBubble = micIsSending && !hasStreamingAssistant
 
   var hasMicPermission by remember { mutableStateOf(context.hasRecordAudioPermission()) }
-  var pendingMicEnable by remember { mutableStateOf(false) }
+  var pendingVoicePermissionAction by remember { mutableStateOf<PendingVoicePermissionAction?>(null) }
 
   DisposableEffect(lifecycleOwner, context) {
     val observer =
@@ -107,7 +113,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
     lifecycleOwner.lifecycle.addObserver(observer)
     onDispose {
       lifecycleOwner.lifecycle.removeObserver(observer)
-      // Stop TTS when leaving the voice screen
+      // Manual mic is tied to the Voice tab; Talk Mode is explicit and can continue.
       viewModel.setVoiceScreenActive(false)
     }
   }
@@ -115,10 +121,14 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
   val requestMicPermission =
     rememberLauncherForActivityResult(ActivityResultContracts.RequestPermission()) { granted ->
       hasMicPermission = granted
-      if (granted && pendingMicEnable) {
-        viewModel.setMicEnabled(true)
+      if (granted) {
+        when (pendingVoicePermissionAction) {
+          PendingVoicePermissionAction.ManualMic -> viewModel.setMicEnabled(true)
+          PendingVoicePermissionAction.TalkMode -> viewModel.setTalkModeEnabled(true)
+          null -> Unit
+        }
       }
-      pendingMicEnable = false
+      pendingVoicePermissionAction = null
     }
 
   LaunchedEffect(micConversation.size, showThinkingBubble) {
@@ -161,12 +171,12 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
                 tint = mobileTextTertiary,
               )
               Text(
-                "Tap the mic to start",
+                "Tap mic or Talk",
                 style = mobileHeadline,
                 color = mobileTextSecondary,
               )
               Text(
-                "Each pause sends a turn automatically.",
+                "Mic sends turns; Talk keeps the conversation open.",
                 style = mobileCallout,
                 color = mobileTextTertiary,
               )
@@ -263,7 +273,7 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
               if (hasMicPermission) {
                 viewModel.setMicEnabled(true)
               } else {
-                pendingMicEnable = true
+                pendingVoicePermissionAction = PendingVoicePermissionAction.ManualMic
                 requestMicPermission.launch(Manifest.permission.RECORD_AUDIO)
               }
             },
@@ -287,11 +297,39 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
           }
         }
 
-        // Invisible spacer to balance the row (matches speaker column width)
-        Column(horizontalAlignment = Alignment.CenterHorizontally) {
-          Box(modifier = Modifier.size(48.dp))
+        Column(horizontalAlignment = Alignment.CenterHorizontally, verticalArrangement = Arrangement.spacedBy(4.dp)) {
+          IconButton(
+            onClick = {
+              if (talkModeEnabled) {
+                viewModel.setTalkModeEnabled(false)
+                return@IconButton
+              }
+              if (hasMicPermission) {
+                viewModel.setTalkModeEnabled(true)
+              } else {
+                pendingVoicePermissionAction = PendingVoicePermissionAction.TalkMode
+                requestMicPermission.launch(Manifest.permission.RECORD_AUDIO)
+              }
+            },
+            modifier = Modifier.size(48.dp),
+            colors =
+              IconButtonDefaults.iconButtonColors(
+                containerColor = if (talkModeEnabled) mobileSuccessSoft else mobileSurface,
+              ),
+          ) {
+            Icon(
+              imageVector = Icons.Default.RecordVoiceOver,
+              contentDescription = if (talkModeEnabled) "Turn Talk Mode off" else "Turn Talk Mode on",
+              modifier = Modifier.size(22.dp),
+              tint = if (talkModeEnabled) mobileSuccess else mobileTextSecondary,
+            )
+          }
           Spacer(modifier = Modifier.height(4.dp))
-          Text("", style = mobileCaption2)
+          Text(
+            if (talkModeEnabled) "Talk on" else "Talk",
+            style = mobileCaption2,
+            color = if (talkModeEnabled) mobileSuccess else mobileTextTertiary,
+          )
         }
       }
 
@@ -299,6 +337,9 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
       val queueCount = micQueuedMessages.size
       val stateText =
         when {
+          voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeSpeaking -> "Talk speaking"
+          voiceCaptureMode == VoiceCaptureMode.TalkMode && talkModeListening -> "Talk listening"
+          voiceCaptureMode == VoiceCaptureMode.TalkMode -> "Talk on"
           queueCount > 0 -> "$queueCount queued"
           micIsSending -> "Sending"
           micCooldown -> "Cooldown"
@@ -307,14 +348,15 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
         }
       val stateColor =
         when {
+          voiceCaptureMode == VoiceCaptureMode.TalkMode -> mobileSuccess
           micEnabled -> mobileSuccess
           micIsSending -> mobileAccent
           else -> mobileTextSecondary
         }
       Surface(
         shape = RoundedCornerShape(999.dp),
-        color = if (micEnabled) mobileSuccessSoft else mobileSurface,
-        border = BorderStroke(1.dp, if (micEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder),
+        color = if (micEnabled || talkModeEnabled) mobileSuccessSoft else mobileSurface,
+        border = BorderStroke(1.dp, if (micEnabled || talkModeEnabled) mobileSuccess.copy(alpha = 0.3f) else mobileBorder),
       ) {
         Text(
           "$gatewayStatus · $stateText",
@@ -353,6 +395,11 @@ fun VoiceTabScreen(viewModel: MainViewModel) {
   }
 }
 
+private enum class PendingVoicePermissionAction {
+  ManualMic,
+  TalkMode,
+}
+
 @Composable
 private fun VoiceTurnBubble(entry: VoiceConversationEntry) {
   val isUser = entry.role == VoiceConversationRole.User
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
index fddc347f487..9c76eeb3219 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
@@ -2,6 +2,7 @@ package ai.openclaw.app
 
 import android.app.Notification
 import android.content.Intent
+import android.content.pm.ServiceInfo
 import org.junit.Assert.assertEquals
 import org.junit.Assert.assertNotNull
 import org.junit.Test
@@ -30,6 +31,35 @@ class NodeForegroundServiceTest {
     assertEquals(expectedFlags, savedIntent.flags and expectedFlags)
   }
 
+  @Test
+  fun foregroundServiceTypesForVoiceMode_addsMicrophoneOnlyForTalkMode() {
+    assertEquals(
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.Off),
+    )
+    assertEquals(
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.ManualMic),
+    )
+    assertEquals(
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE,
+      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.TalkMode),
+    )
+  }
+
+  @Test
+  fun voiceNotificationSuffixReflectsActiveCaptureMode() {
+    assertEquals("", voiceNotificationSuffix(VoiceCaptureMode.Off, false, false, false, false))
+    assertEquals(
+      " · Mic: Listening",
+      voiceNotificationSuffix(VoiceCaptureMode.ManualMic, true, true, false, false),
+    )
+    assertEquals(
+      " · Talk: Speaking",
+      voiceNotificationSuffix(VoiceCaptureMode.TalkMode, false, false, true, true),
+    )
+  }
+
   private fun buildNotification(service: NodeForegroundService): Notification {
     val method =
       NodeForegroundService::class.java.getDeclaredMethod(
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
index 811eed0c900..d209f52a2f5 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
@@ -2,7 +2,9 @@ package ai.openclaw.app
 
 import android.content.Context
 import org.junit.Assert.assertEquals
+import org.junit.Assert.assertFalse
 import org.junit.Assert.assertNull
+import org.junit.Assert.assertTrue
 import org.junit.Test
 import org.junit.runner.RunWith
 import org.robolectric.RobolectricTestRunner
@@ -22,6 +24,32 @@ class SecurePrefsTest {
     assertEquals("whileUsing", plainPrefs.getString("location.enabledMode", null))
   }
 
+  @Test
+  fun voiceMicEnabled_ignoresOldTalkEnabledKey() {
+    val context = RuntimeEnvironment.getApplication()
+    val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+    plainPrefs.edit().clear().putBoolean("talk.enabled", true).commit()
+
+    val prefs = SecurePrefs(context)
+
+    assertFalse(prefs.voiceMicEnabled.value)
+    assertFalse(plainPrefs.contains("voice.micEnabled"))
+  }
+
+  @Test
+  fun setVoiceMicEnabled_persistsNewKeyOnly() {
+    val context = RuntimeEnvironment.getApplication()
+    val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+    plainPrefs.edit().clear().putBoolean("talk.enabled", false).commit()
+    val prefs = SecurePrefs(context)
+
+    prefs.setVoiceMicEnabled(true)
+
+    assertTrue(prefs.voiceMicEnabled.value)
+    assertTrue(plainPrefs.getBoolean("voice.micEnabled", false))
+    assertFalse(plainPrefs.getBoolean("talk.enabled", false))
+  }
+
   @Test
   fun saveGatewayBootstrapToken_persistsSeparatelyFromSharedToken() {
     val context = RuntimeEnvironment.getApplication()
diff --git a/docs/nodes/talk.md b/docs/nodes/talk.md
index fe65b915aca..c8d40b08957 100644
--- a/docs/nodes/talk.md
+++ b/docs/nodes/talk.md
@@ -91,6 +91,13 @@ Defaults:
   - Click cloud: stop speaking
   - Click X: exit Talk mode
 
+## Android UI
+
+- Voice tab toggle: **Talk**
+- Manual **Mic** and **Talk** are mutually exclusive runtime capture modes.
+- Manual Mic stops when the app leaves the foreground or the user leaves the Voice tab.
+- Talk Mode keeps running until toggled off or the Android node disconnects, and uses Android's microphone foreground-service type while active.
+
 ## Notes
 
 - Requires Speech + Microphone permissions.
diff --git a/docs/platforms/android.md b/docs/platforms/android.md
index 0ed03f8ba33..c42b2f28a57 100644
--- a/docs/platforms/android.md
+++ b/docs/platforms/android.md
@@ -199,8 +199,10 @@ See [Camera node](/nodes/camera) for parameters and CLI helpers.
 
 ### 8) Voice + expanded Android command surface
 
-- Voice: Android uses a single mic on/off flow in the Voice tab with transcript capture and `talk.speak` playback. Local system TTS is used only when `talk.speak` is unavailable. Voice stops when the app leaves the foreground.
-- Voice wake/talk-mode toggles are currently removed from Android UX/runtime.
+- Voice tab: Android has two explicit capture modes. **Mic** is a manual Voice-tab session that sends each pause as a chat turn and stops when the app leaves the foreground or the user leaves the Voice tab. **Talk** is continuous Talk Mode and keeps listening until toggled off or the node disconnects.
+- Talk Mode promotes the existing foreground service from `dataSync` to `dataSync|microphone` before capture starts, then demotes it when Talk Mode stops. Android 14+ requires the `FOREGROUND_SERVICE_MICROPHONE` declaration, the `RECORD_AUDIO` runtime grant, and the microphone service type at runtime.
+- Spoken replies use `talk.speak` through the configured gateway Talk provider. Local system TTS is used only when `talk.speak` is unavailable.
+- Voice wake remains disabled in the Android UX/runtime.
 - Additional Android command families (availability depends on device + permissions):
   - `device.status`, `device.info`, `device.permissions`, `device.health`
   - `notifications.list`, `notifications.actions` (see [Notification forwarding](#notification-forwarding) below)