mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
feat(android): add single-path mic capture runtime manager
This commit is contained in:
@@ -45,14 +45,13 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
|
||||
val locationMode: StateFlow<LocationMode> = runtime.locationMode
|
||||
val locationPreciseEnabled: StateFlow<Boolean> = runtime.locationPreciseEnabled
|
||||
val preventSleep: StateFlow<Boolean> = runtime.preventSleep
|
||||
val wakeWords: StateFlow<List<String>> = runtime.wakeWords
|
||||
val voiceWakeMode: StateFlow<VoiceWakeMode> = runtime.voiceWakeMode
|
||||
val voiceWakeStatusText: StateFlow<String> = runtime.voiceWakeStatusText
|
||||
val voiceWakeIsListening: StateFlow<Boolean> = runtime.voiceWakeIsListening
|
||||
val talkEnabled: StateFlow<Boolean> = runtime.talkEnabled
|
||||
val talkStatusText: StateFlow<String> = runtime.talkStatusText
|
||||
val talkIsListening: StateFlow<Boolean> = runtime.talkIsListening
|
||||
val talkIsSpeaking: StateFlow<Boolean> = runtime.talkIsSpeaking
|
||||
val micEnabled: StateFlow<Boolean> = runtime.micEnabled
|
||||
val micStatusText: StateFlow<String> = runtime.micStatusText
|
||||
val micLiveTranscript: StateFlow<String?> = runtime.micLiveTranscript
|
||||
val micIsListening: StateFlow<Boolean> = runtime.micIsListening
|
||||
val micQueuedMessages: StateFlow<List<String>> = runtime.micQueuedMessages
|
||||
val micInputLevel: StateFlow<Float> = runtime.micInputLevel
|
||||
val micIsSending: StateFlow<Boolean> = runtime.micIsSending
|
||||
val manualEnabled: StateFlow<Boolean> = runtime.manualEnabled
|
||||
val manualHost: StateFlow<String> = runtime.manualHost
|
||||
val manualPort: StateFlow<Int> = runtime.manualPort
|
||||
@@ -128,20 +127,8 @@ class MainViewModel(app: Application) : AndroidViewModel(app) {
|
||||
runtime.setCanvasDebugStatusEnabled(value)
|
||||
}
|
||||
|
||||
fun setWakeWords(words: List<String>) {
|
||||
runtime.setWakeWords(words)
|
||||
}
|
||||
|
||||
fun resetWakeWordsDefaults() {
|
||||
runtime.resetWakeWordsDefaults()
|
||||
}
|
||||
|
||||
fun setVoiceWakeMode(mode: VoiceWakeMode) {
|
||||
runtime.setVoiceWakeMode(mode)
|
||||
}
|
||||
|
||||
fun setTalkEnabled(enabled: Boolean) {
|
||||
runtime.setTalkEnabled(enabled)
|
||||
fun setMicEnabled(enabled: Boolean) {
|
||||
runtime.setMicEnabled(enabled)
|
||||
}
|
||||
|
||||
fun refreshGatewayConnection() {
|
||||
|
||||
@@ -39,22 +39,22 @@ class NodeForegroundService : Service() {
|
||||
runtime.statusText,
|
||||
runtime.serverName,
|
||||
runtime.isConnected,
|
||||
runtime.voiceWakeMode,
|
||||
runtime.voiceWakeIsListening,
|
||||
) { status, server, connected, voiceMode, voiceListening ->
|
||||
Quint(status, server, connected, voiceMode, voiceListening)
|
||||
}.collect { (status, server, connected, voiceMode, voiceListening) ->
|
||||
runtime.micEnabled,
|
||||
runtime.micIsListening,
|
||||
) { status, server, connected, micEnabled, micListening ->
|
||||
Quint(status, server, connected, micEnabled, micListening)
|
||||
}.collect { (status, server, connected, micEnabled, micListening) ->
|
||||
val title = if (connected) "OpenClaw Node · Connected" else "OpenClaw Node"
|
||||
val voiceSuffix =
|
||||
if (voiceMode == VoiceWakeMode.Always) {
|
||||
if (voiceListening) " · Voice Wake: Listening" else " · Voice Wake: Paused"
|
||||
val micSuffix =
|
||||
if (micEnabled) {
|
||||
if (micListening) " · Mic: Listening" else " · Mic: Pending"
|
||||
} else {
|
||||
""
|
||||
}
|
||||
val text = (server?.let { "$status · $it" } ?: status) + voiceSuffix
|
||||
val text = (server?.let { "$status · $it" } ?: status) + micSuffix
|
||||
|
||||
val requiresMic =
|
||||
voiceMode == VoiceWakeMode.Always && hasRecordAudioPermission()
|
||||
micEnabled && hasRecordAudioPermission()
|
||||
startForegroundWithTypes(
|
||||
notification = buildNotification(title = title, text = text),
|
||||
requiresMic = requiresMic,
|
||||
|
||||
@@ -19,8 +19,7 @@ import ai.openclaw.android.gateway.GatewaySession
|
||||
import ai.openclaw.android.gateway.probeGatewayTlsFingerprint
|
||||
import ai.openclaw.android.node.*
|
||||
import ai.openclaw.android.protocol.OpenClawCanvasA2UIAction
|
||||
import ai.openclaw.android.voice.TalkModeManager
|
||||
import ai.openclaw.android.voice.VoiceWakeManager
|
||||
import ai.openclaw.android.voice.MicCaptureManager
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
@@ -37,6 +36,7 @@ import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import kotlinx.serialization.json.buildJsonObject
|
||||
import java.util.UUID
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
|
||||
class NodeRuntime(context: Context) {
|
||||
@@ -54,40 +54,6 @@ class NodeRuntime(context: Context) {
|
||||
|
||||
private val externalAudioCaptureActive = MutableStateFlow(false)
|
||||
|
||||
private val voiceWake: VoiceWakeManager by lazy {
|
||||
VoiceWakeManager(
|
||||
context = appContext,
|
||||
scope = scope,
|
||||
onCommand = { command ->
|
||||
nodeSession.sendNodeEvent(
|
||||
event = "agent.request",
|
||||
payloadJson =
|
||||
buildJsonObject {
|
||||
put("message", JsonPrimitive(command))
|
||||
put("sessionKey", JsonPrimitive(resolveMainSessionKey()))
|
||||
put("thinking", JsonPrimitive(chatThinkingLevel.value))
|
||||
put("deliver", JsonPrimitive(false))
|
||||
}.toString(),
|
||||
)
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
val voiceWakeIsListening: StateFlow<Boolean>
|
||||
get() = voiceWake.isListening
|
||||
|
||||
val voiceWakeStatusText: StateFlow<String>
|
||||
get() = voiceWake.statusText
|
||||
|
||||
val talkStatusText: StateFlow<String>
|
||||
get() = talkMode.statusText
|
||||
|
||||
val talkIsListening: StateFlow<Boolean>
|
||||
get() = talkMode.isListening
|
||||
|
||||
val talkIsSpeaking: StateFlow<Boolean>
|
||||
get() = talkMode.isSpeaking
|
||||
|
||||
private val discovery = GatewayDiscovery(appContext, scope = scope)
|
||||
val gateways: StateFlow<List<GatewayEndpoint>> = discovery.gateways
|
||||
val discoveryStatusText: StateFlow<String> = discovery.statusText
|
||||
@@ -146,7 +112,7 @@ class NodeRuntime(context: Context) {
|
||||
prefs = prefs,
|
||||
cameraEnabled = { cameraEnabled.value },
|
||||
locationMode = { locationMode.value },
|
||||
voiceWakeMode = { voiceWakeMode.value },
|
||||
voiceWakeMode = { VoiceWakeMode.Off },
|
||||
smsAvailable = { sms.canSendSms() },
|
||||
hasRecordAudioPermission = { hasRecordAudioPermission() },
|
||||
manualTls = { manualTls.value },
|
||||
@@ -172,8 +138,6 @@ class NodeRuntime(context: Context) {
|
||||
onCanvasA2uiReset = { _canvasA2uiHydrated.value = false },
|
||||
)
|
||||
|
||||
private lateinit var gatewayEventHandler: GatewayEventHandler
|
||||
|
||||
data class GatewayTrustPrompt(
|
||||
val endpoint: GatewayEndpoint,
|
||||
val fingerprintSha256: String,
|
||||
@@ -242,8 +206,8 @@ class NodeRuntime(context: Context) {
|
||||
_seamColorArgb.value = DEFAULT_SEAM_COLOR_ARGB
|
||||
applyMainSessionKey(mainSessionKey)
|
||||
updateStatus()
|
||||
micCapture.onGatewayConnectionChanged(true)
|
||||
scope.launch { refreshBrandingFromGateway() }
|
||||
scope.launch { gatewayEventHandler.refreshWakeWordsFromGateway() }
|
||||
},
|
||||
onDisconnected = { message ->
|
||||
operatorConnected = false
|
||||
@@ -254,11 +218,10 @@ class NodeRuntime(context: Context) {
|
||||
if (!isCanonicalMainSessionKey(_mainSessionKey.value)) {
|
||||
_mainSessionKey.value = "main"
|
||||
}
|
||||
val mainKey = resolveMainSessionKey()
|
||||
talkMode.setMainSessionKey(mainKey)
|
||||
chat.applyMainSessionKey(mainKey)
|
||||
chat.applyMainSessionKey(resolveMainSessionKey())
|
||||
chat.onDisconnected(message)
|
||||
updateStatus()
|
||||
micCapture.onGatewayConnectionChanged(false)
|
||||
},
|
||||
onEvent = { event, payloadJson ->
|
||||
handleGatewayEvent(event, payloadJson)
|
||||
@@ -307,22 +270,52 @@ class NodeRuntime(context: Context) {
|
||||
json = json,
|
||||
supportsChatSubscribe = false,
|
||||
)
|
||||
private val talkMode: TalkModeManager by lazy {
|
||||
TalkModeManager(
|
||||
private val micCapture: MicCaptureManager by lazy {
|
||||
MicCaptureManager(
|
||||
context = appContext,
|
||||
scope = scope,
|
||||
session = operatorSession,
|
||||
supportsChatSubscribe = false,
|
||||
isConnected = { operatorConnected },
|
||||
sendToGateway = { message ->
|
||||
val idempotencyKey = UUID.randomUUID().toString()
|
||||
val params =
|
||||
buildJsonObject {
|
||||
put("sessionKey", JsonPrimitive(resolveMainSessionKey()))
|
||||
put("message", JsonPrimitive(message))
|
||||
put("thinking", JsonPrimitive(chatThinkingLevel.value))
|
||||
put("timeoutMs", JsonPrimitive(30_000))
|
||||
put("idempotencyKey", JsonPrimitive(idempotencyKey))
|
||||
}
|
||||
val response = operatorSession.request("chat.send", params.toString())
|
||||
parseChatSendRunId(response) ?: idempotencyKey
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
val micStatusText: StateFlow<String>
|
||||
get() = micCapture.statusText
|
||||
|
||||
val micLiveTranscript: StateFlow<String?>
|
||||
get() = micCapture.liveTranscript
|
||||
|
||||
val micIsListening: StateFlow<Boolean>
|
||||
get() = micCapture.isListening
|
||||
|
||||
val micEnabled: StateFlow<Boolean>
|
||||
get() = micCapture.micEnabled
|
||||
|
||||
val micQueuedMessages: StateFlow<List<String>>
|
||||
get() = micCapture.queuedMessages
|
||||
|
||||
val micInputLevel: StateFlow<Float>
|
||||
get() = micCapture.inputLevel
|
||||
|
||||
val micIsSending: StateFlow<Boolean>
|
||||
get() = micCapture.isSending
|
||||
|
||||
private fun applyMainSessionKey(candidate: String?) {
|
||||
val trimmed = normalizeMainKey(candidate) ?: return
|
||||
if (isCanonicalMainSessionKey(_mainSessionKey.value)) return
|
||||
if (_mainSessionKey.value == trimmed) return
|
||||
_mainSessionKey.value = trimmed
|
||||
talkMode.setMainSessionKey(trimmed)
|
||||
chat.applyMainSessionKey(trimmed)
|
||||
}
|
||||
|
||||
@@ -424,9 +417,6 @@ class NodeRuntime(context: Context) {
|
||||
val locationMode: StateFlow<LocationMode> = prefs.locationMode
|
||||
val locationPreciseEnabled: StateFlow<Boolean> = prefs.locationPreciseEnabled
|
||||
val preventSleep: StateFlow<Boolean> = prefs.preventSleep
|
||||
val wakeWords: StateFlow<List<String>> = prefs.wakeWords
|
||||
val voiceWakeMode: StateFlow<VoiceWakeMode> = prefs.voiceWakeMode
|
||||
val talkEnabled: StateFlow<Boolean> = prefs.talkEnabled
|
||||
val manualEnabled: StateFlow<Boolean> = prefs.manualEnabled
|
||||
val manualHost: StateFlow<String> = prefs.manualHost
|
||||
val manualPort: StateFlow<Int> = prefs.manualPort
|
||||
@@ -453,50 +443,13 @@ class NodeRuntime(context: Context) {
|
||||
val pendingRunCount: StateFlow<Int> = chat.pendingRunCount
|
||||
|
||||
init {
|
||||
gatewayEventHandler = GatewayEventHandler(
|
||||
scope = scope,
|
||||
prefs = prefs,
|
||||
json = json,
|
||||
operatorSession = operatorSession,
|
||||
isConnected = { _isConnected.value },
|
||||
)
|
||||
|
||||
scope.launch {
|
||||
combine(
|
||||
voiceWakeMode,
|
||||
isForeground,
|
||||
externalAudioCaptureActive,
|
||||
wakeWords,
|
||||
) { mode, foreground, externalAudio, words ->
|
||||
Quad(mode, foreground, externalAudio, words)
|
||||
}.distinctUntilChanged()
|
||||
.collect { (mode, foreground, externalAudio, words) ->
|
||||
voiceWake.setTriggerWords(words)
|
||||
|
||||
val shouldListen =
|
||||
when (mode) {
|
||||
VoiceWakeMode.Off -> false
|
||||
VoiceWakeMode.Foreground -> foreground
|
||||
VoiceWakeMode.Always -> true
|
||||
} && !externalAudio
|
||||
|
||||
if (!shouldListen) {
|
||||
voiceWake.stop(statusText = if (mode == VoiceWakeMode.Off) "Off" else "Paused")
|
||||
return@collect
|
||||
}
|
||||
|
||||
if (!hasRecordAudioPermission()) {
|
||||
voiceWake.stop(statusText = "Microphone permission required")
|
||||
return@collect
|
||||
}
|
||||
|
||||
voiceWake.start()
|
||||
}
|
||||
if (prefs.voiceWakeMode.value != VoiceWakeMode.Off) {
|
||||
prefs.setVoiceWakeMode(VoiceWakeMode.Off)
|
||||
}
|
||||
|
||||
scope.launch {
|
||||
talkEnabled.collect { enabled ->
|
||||
talkMode.setEnabled(enabled)
|
||||
prefs.talkEnabled.collect { enabled ->
|
||||
micCapture.setMicEnabled(enabled)
|
||||
externalAudioCaptureActive.value = enabled
|
||||
}
|
||||
}
|
||||
@@ -604,20 +557,7 @@ class NodeRuntime(context: Context) {
|
||||
prefs.setCanvasDebugStatusEnabled(value)
|
||||
}
|
||||
|
||||
fun setWakeWords(words: List<String>) {
|
||||
prefs.setWakeWords(words)
|
||||
gatewayEventHandler.scheduleWakeWordsSyncIfNeeded()
|
||||
}
|
||||
|
||||
fun resetWakeWordsDefaults() {
|
||||
setWakeWords(SecurePrefs.defaultWakeWords)
|
||||
}
|
||||
|
||||
fun setVoiceWakeMode(mode: VoiceWakeMode) {
|
||||
prefs.setVoiceWakeMode(mode)
|
||||
}
|
||||
|
||||
fun setTalkEnabled(value: Boolean) {
|
||||
fun setMicEnabled(value: Boolean) {
|
||||
prefs.setTalkEnabled(value)
|
||||
}
|
||||
|
||||
@@ -801,15 +741,19 @@ class NodeRuntime(context: Context) {
|
||||
}
|
||||
|
||||
private fun handleGatewayEvent(event: String, payloadJson: String?) {
|
||||
if (event == "voicewake.changed") {
|
||||
gatewayEventHandler.handleVoiceWakeChangedEvent(payloadJson)
|
||||
return
|
||||
}
|
||||
|
||||
talkMode.handleGatewayEvent(event, payloadJson)
|
||||
micCapture.handleGatewayEvent(event, payloadJson)
|
||||
chat.handleGatewayEvent(event, payloadJson)
|
||||
}
|
||||
|
||||
private fun parseChatSendRunId(response: String): String? {
|
||||
return try {
|
||||
val root = json.parseToJsonElement(response).asObjectOrNull() ?: return null
|
||||
root["runId"].asStringOrNull()
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun refreshBrandingFromGateway() {
|
||||
if (!_isConnected.value) return
|
||||
try {
|
||||
|
||||
@@ -0,0 +1,362 @@
|
||||
package ai.openclaw.android.voice
|
||||
|
||||
import android.Manifest
|
||||
import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.content.pm.PackageManager
|
||||
import android.os.Bundle
|
||||
import android.os.Handler
|
||||
import android.os.Looper
|
||||
import android.speech.RecognitionListener
|
||||
import android.speech.RecognizerIntent
|
||||
import android.speech.SpeechRecognizer
|
||||
import androidx.core.content.ContextCompat
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.flow.StateFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
|
||||
class MicCaptureManager(
|
||||
private val context: Context,
|
||||
private val scope: CoroutineScope,
|
||||
private val sendToGateway: suspend (String) -> String?,
|
||||
) {
|
||||
companion object {
|
||||
private const val speechMinSessionMs = 30_000L
|
||||
private const val speechCompleteSilenceMs = 1_500L
|
||||
private const val speechPossibleSilenceMs = 900L
|
||||
}
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
|
||||
private val _micEnabled = MutableStateFlow(false)
|
||||
val micEnabled: StateFlow<Boolean> = _micEnabled
|
||||
|
||||
private val _isListening = MutableStateFlow(false)
|
||||
val isListening: StateFlow<Boolean> = _isListening
|
||||
|
||||
private val _statusText = MutableStateFlow("Mic off")
|
||||
val statusText: StateFlow<String> = _statusText
|
||||
|
||||
private val _liveTranscript = MutableStateFlow<String?>(null)
|
||||
val liveTranscript: StateFlow<String?> = _liveTranscript
|
||||
|
||||
private val _queuedMessages = MutableStateFlow<List<String>>(emptyList())
|
||||
val queuedMessages: StateFlow<List<String>> = _queuedMessages
|
||||
|
||||
private val _inputLevel = MutableStateFlow(0f)
|
||||
val inputLevel: StateFlow<Float> = _inputLevel
|
||||
|
||||
private val _isSending = MutableStateFlow(false)
|
||||
val isSending: StateFlow<Boolean> = _isSending
|
||||
|
||||
private val messageQueue = ArrayDeque<String>()
|
||||
private val sessionSegments = mutableListOf<String>()
|
||||
private var lastFinalSegment: String? = null
|
||||
private var pendingRunId: String? = null
|
||||
private var gatewayConnected = false
|
||||
|
||||
private var recognizer: SpeechRecognizer? = null
|
||||
private var restartJob: Job? = null
|
||||
private var stopRequested = false
|
||||
|
||||
fun setMicEnabled(enabled: Boolean) {
|
||||
if (_micEnabled.value == enabled) return
|
||||
_micEnabled.value = enabled
|
||||
if (enabled) {
|
||||
start()
|
||||
} else {
|
||||
stop()
|
||||
flushSessionToQueue()
|
||||
sendQueuedIfIdle()
|
||||
}
|
||||
}
|
||||
|
||||
fun onGatewayConnectionChanged(connected: Boolean) {
|
||||
gatewayConnected = connected
|
||||
if (connected) {
|
||||
if (!_micEnabled.value) {
|
||||
sendQueuedIfIdle()
|
||||
}
|
||||
return
|
||||
}
|
||||
if (!_micEnabled.value && messageQueue.isNotEmpty()) {
|
||||
_statusText.value = "Queued ${messageQueue.size} message(s) · waiting for gateway"
|
||||
}
|
||||
}
|
||||
|
||||
fun handleGatewayEvent(event: String, payloadJson: String?) {
|
||||
if (event != "chat") return
|
||||
val runId = pendingRunId ?: return
|
||||
if (payloadJson.isNullOrBlank()) return
|
||||
val obj =
|
||||
try {
|
||||
json.parseToJsonElement(payloadJson).asObjectOrNull()
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
} ?: return
|
||||
val eventRunId = obj["runId"].asStringOrNull() ?: return
|
||||
if (eventRunId != runId) return
|
||||
val state = obj["state"].asStringOrNull() ?: return
|
||||
if (state != "final") return
|
||||
|
||||
if (messageQueue.isNotEmpty()) {
|
||||
messageQueue.removeFirst()
|
||||
publishQueue()
|
||||
}
|
||||
pendingRunId = null
|
||||
_isSending.value = false
|
||||
sendQueuedIfIdle()
|
||||
}
|
||||
|
||||
private fun start() {
|
||||
stopRequested = false
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
_statusText.value = "Speech recognizer unavailable"
|
||||
_micEnabled.value = false
|
||||
return
|
||||
}
|
||||
if (!hasMicPermission()) {
|
||||
_statusText.value = "Microphone permission required"
|
||||
_micEnabled.value = false
|
||||
return
|
||||
}
|
||||
|
||||
mainHandler.post {
|
||||
try {
|
||||
if (recognizer == null) {
|
||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||
}
|
||||
startListeningSession()
|
||||
} catch (err: Throwable) {
|
||||
_statusText.value = "Start failed: ${err.message ?: err::class.simpleName}"
|
||||
_micEnabled.value = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun stop() {
|
||||
stopRequested = true
|
||||
restartJob?.cancel()
|
||||
restartJob = null
|
||||
_isListening.value = false
|
||||
_statusText.value = if (_isSending.value) "Mic off · sending queued messages…" else "Mic off"
|
||||
_inputLevel.value = 0f
|
||||
mainHandler.post {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun startListeningSession() {
|
||||
val r = recognizer ?: return
|
||||
val intent =
|
||||
Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
|
||||
putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
|
||||
putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
|
||||
putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3)
|
||||
putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, context.packageName)
|
||||
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, speechMinSessionMs)
|
||||
putExtra(RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, speechCompleteSilenceMs)
|
||||
putExtra(
|
||||
RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS,
|
||||
speechPossibleSilenceMs,
|
||||
)
|
||||
}
|
||||
_statusText.value = if (_isSending.value) "Listening · queueing while gateway replies" else "Listening"
|
||||
_isListening.value = true
|
||||
r.startListening(intent)
|
||||
}
|
||||
|
||||
private fun scheduleRestart(delayMs: Long = 350L) {
|
||||
if (stopRequested) return
|
||||
if (!_micEnabled.value) return
|
||||
restartJob?.cancel()
|
||||
restartJob =
|
||||
scope.launch {
|
||||
delay(delayMs)
|
||||
mainHandler.post {
|
||||
if (stopRequested || !_micEnabled.value) return@post
|
||||
try {
|
||||
startListeningSession()
|
||||
} catch (_: Throwable) {
|
||||
// onError will retry
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun publishQueue() {
|
||||
_queuedMessages.value = messageQueue.toList()
|
||||
}
|
||||
|
||||
private fun flushSessionToQueue() {
|
||||
val message = sessionSegments.joinToString("\n").trim()
|
||||
sessionSegments.clear()
|
||||
_liveTranscript.value = null
|
||||
lastFinalSegment = null
|
||||
if (message.isEmpty()) return
|
||||
messageQueue.addLast(message)
|
||||
publishQueue()
|
||||
}
|
||||
|
||||
private fun sendQueuedIfIdle() {
|
||||
if (_micEnabled.value) return
|
||||
if (_isSending.value) return
|
||||
if (messageQueue.isEmpty()) {
|
||||
_statusText.value = "Mic off"
|
||||
return
|
||||
}
|
||||
if (!gatewayConnected) {
|
||||
_statusText.value = "Queued ${messageQueue.size} message(s) · waiting for gateway"
|
||||
return
|
||||
}
|
||||
val next = messageQueue.first()
|
||||
_isSending.value = true
|
||||
_statusText.value = "Sending ${messageQueue.size} queued message(s)…"
|
||||
scope.launch {
|
||||
try {
|
||||
val runId = sendToGateway(next)
|
||||
pendingRunId = runId
|
||||
if (runId == null) {
|
||||
if (messageQueue.isNotEmpty()) {
|
||||
messageQueue.removeFirst()
|
||||
publishQueue()
|
||||
}
|
||||
_isSending.value = false
|
||||
sendQueuedIfIdle()
|
||||
}
|
||||
} catch (err: Throwable) {
|
||||
_isSending.value = false
|
||||
_statusText.value =
|
||||
if (!gatewayConnected) {
|
||||
"Queued ${messageQueue.size} message(s) · waiting for gateway"
|
||||
} else {
|
||||
"Send failed: ${err.message ?: err::class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun disableMic(status: String) {
|
||||
stopRequested = true
|
||||
restartJob?.cancel()
|
||||
restartJob = null
|
||||
_micEnabled.value = false
|
||||
_isListening.value = false
|
||||
_inputLevel.value = 0f
|
||||
_statusText.value = status
|
||||
mainHandler.post {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun onFinalTranscript(text: String) {
|
||||
val trimmed = text.trim()
|
||||
if (trimmed.isEmpty()) return
|
||||
_liveTranscript.value = trimmed
|
||||
if (lastFinalSegment == trimmed) return
|
||||
lastFinalSegment = trimmed
|
||||
sessionSegments.add(trimmed)
|
||||
}
|
||||
|
||||
private fun hasMicPermission(): Boolean {
|
||||
return (
|
||||
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
)
|
||||
}
|
||||
|
||||
private val listener =
|
||||
object : RecognitionListener {
|
||||
override fun onReadyForSpeech(params: Bundle?) {
|
||||
_isListening.value = true
|
||||
}
|
||||
|
||||
override fun onBeginningOfSpeech() {}
|
||||
|
||||
override fun onRmsChanged(rmsdB: Float) {
|
||||
val level = ((rmsdB + 2f) / 12f).coerceIn(0f, 1f)
|
||||
_inputLevel.value = level
|
||||
}
|
||||
|
||||
override fun onBufferReceived(buffer: ByteArray?) {}
|
||||
|
||||
override fun onEndOfSpeech() {
|
||||
_inputLevel.value = 0f
|
||||
scheduleRestart()
|
||||
}
|
||||
|
||||
override fun onError(error: Int) {
|
||||
if (stopRequested) return
|
||||
_isListening.value = false
|
||||
_inputLevel.value = 0f
|
||||
val status =
|
||||
when (error) {
|
||||
SpeechRecognizer.ERROR_AUDIO -> "Audio error"
|
||||
SpeechRecognizer.ERROR_CLIENT -> "Client error"
|
||||
SpeechRecognizer.ERROR_NETWORK -> "Network error"
|
||||
SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
|
||||
SpeechRecognizer.ERROR_NO_MATCH -> "Listening"
|
||||
SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer busy"
|
||||
SpeechRecognizer.ERROR_SERVER -> "Server error"
|
||||
SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "Listening"
|
||||
SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Microphone permission required"
|
||||
SpeechRecognizer.ERROR_LANGUAGE_NOT_SUPPORTED -> "Language not supported on this device"
|
||||
SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE -> "Language unavailable on this device"
|
||||
SpeechRecognizer.ERROR_SERVER_DISCONNECTED -> "Speech service disconnected"
|
||||
SpeechRecognizer.ERROR_TOO_MANY_REQUESTS -> "Speech requests limited; retrying"
|
||||
else -> "Speech error ($error)"
|
||||
}
|
||||
_statusText.value = status
|
||||
|
||||
if (
|
||||
error == SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS ||
|
||||
error == SpeechRecognizer.ERROR_LANGUAGE_NOT_SUPPORTED ||
|
||||
error == SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE
|
||||
) {
|
||||
disableMic(status)
|
||||
return
|
||||
}
|
||||
val restartDelayMs =
|
||||
when (error) {
|
||||
SpeechRecognizer.ERROR_NO_MATCH,
|
||||
SpeechRecognizer.ERROR_SPEECH_TIMEOUT,
|
||||
-> 1_500L
|
||||
SpeechRecognizer.ERROR_TOO_MANY_REQUESTS -> 2_500L
|
||||
else -> 600L
|
||||
}
|
||||
scheduleRestart(delayMs = restartDelayMs)
|
||||
}
|
||||
|
||||
override fun onResults(results: Bundle?) {
|
||||
val text = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty().firstOrNull()
|
||||
if (!text.isNullOrBlank()) onFinalTranscript(text)
|
||||
scheduleRestart()
|
||||
}
|
||||
|
||||
override fun onPartialResults(partialResults: Bundle?) {
|
||||
val text = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty().firstOrNull()
|
||||
if (!text.isNullOrBlank()) {
|
||||
_liveTranscript.value = text.trim()
|
||||
}
|
||||
}
|
||||
|
||||
override fun onEvent(eventType: Int, params: Bundle?) {}
|
||||
}
|
||||
}
|
||||
|
||||
private fun kotlinx.serialization.json.JsonElement?.asObjectOrNull(): JsonObject? =
|
||||
this as? JsonObject
|
||||
|
||||
private fun kotlinx.serialization.json.JsonElement?.asStringOrNull(): String? =
|
||||
(this as? JsonPrimitive)?.takeIf { it.isString }?.content
|
||||
Reference in New Issue
Block a user