mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-05 08:30:22 +00:00
fix(android): speak final voice replies in mic capture flow
This commit is contained in:
@@ -20,6 +20,7 @@ import ai.openclaw.android.gateway.probeGatewayTlsFingerprint
|
|||||||
import ai.openclaw.android.node.*
|
import ai.openclaw.android.node.*
|
||||||
import ai.openclaw.android.protocol.OpenClawCanvasA2UIAction
|
import ai.openclaw.android.protocol.OpenClawCanvasA2UIAction
|
||||||
import ai.openclaw.android.voice.MicCaptureManager
|
import ai.openclaw.android.voice.MicCaptureManager
|
||||||
|
import ai.openclaw.android.voice.TalkModeManager
|
||||||
import ai.openclaw.android.voice.VoiceConversationEntry
|
import ai.openclaw.android.voice.VoiceConversationEntry
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.CoroutineScope
|
||||||
import kotlinx.coroutines.Dispatchers
|
import kotlinx.coroutines.Dispatchers
|
||||||
@@ -318,6 +319,18 @@ class NodeRuntime(context: Context) {
|
|||||||
json = json,
|
json = json,
|
||||||
supportsChatSubscribe = false,
|
supportsChatSubscribe = false,
|
||||||
)
|
)
|
||||||
|
private val voiceReplySpeaker: TalkModeManager by lazy {
|
||||||
|
// Reuse the existing TalkMode speech engine (ElevenLabs + deterministic system-TTS fallback)
|
||||||
|
// without enabling the legacy talk capture loop.
|
||||||
|
TalkModeManager(
|
||||||
|
context = appContext,
|
||||||
|
scope = scope,
|
||||||
|
session = operatorSession,
|
||||||
|
supportsChatSubscribe = false,
|
||||||
|
isConnected = { operatorConnected },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
private val micCapture: MicCaptureManager by lazy {
|
private val micCapture: MicCaptureManager by lazy {
|
||||||
MicCaptureManager(
|
MicCaptureManager(
|
||||||
context = appContext,
|
context = appContext,
|
||||||
@@ -335,6 +348,9 @@ class NodeRuntime(context: Context) {
|
|||||||
val response = operatorSession.request("chat.send", params.toString())
|
val response = operatorSession.request("chat.send", params.toString())
|
||||||
parseChatSendRunId(response) ?: idempotencyKey
|
parseChatSendRunId(response) ?: idempotencyKey
|
||||||
},
|
},
|
||||||
|
speakAssistantReply = { text ->
|
||||||
|
voiceReplySpeaker.speakAssistantReply(text)
|
||||||
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import android.os.Looper
|
|||||||
import android.speech.RecognitionListener
|
import android.speech.RecognitionListener
|
||||||
import android.speech.RecognizerIntent
|
import android.speech.RecognizerIntent
|
||||||
import android.speech.SpeechRecognizer
|
import android.speech.SpeechRecognizer
|
||||||
|
import android.util.Log
|
||||||
import androidx.core.content.ContextCompat
|
import androidx.core.content.ContextCompat
|
||||||
import java.util.UUID
|
import java.util.UUID
|
||||||
import kotlinx.coroutines.CoroutineScope
|
import kotlinx.coroutines.CoroutineScope
|
||||||
@@ -39,8 +40,10 @@ class MicCaptureManager(
|
|||||||
private val context: Context,
|
private val context: Context,
|
||||||
private val scope: CoroutineScope,
|
private val scope: CoroutineScope,
|
||||||
private val sendToGateway: suspend (String) -> String?,
|
private val sendToGateway: suspend (String) -> String?,
|
||||||
|
private val speakAssistantReply: suspend (String) -> Unit = {},
|
||||||
) {
|
) {
|
||||||
companion object {
|
companion object {
|
||||||
|
private const val tag = "MicCapture"
|
||||||
private const val speechMinSessionMs = 30_000L
|
private const val speechMinSessionMs = 30_000L
|
||||||
private const val speechCompleteSilenceMs = 1_500L
|
private const val speechCompleteSilenceMs = 1_500L
|
||||||
private const val speechPossibleSilenceMs = 900L
|
private const val speechPossibleSilenceMs = 900L
|
||||||
@@ -140,6 +143,7 @@ class MicCaptureManager(
|
|||||||
val finalText = parseAssistantText(payload)?.trim().orEmpty()
|
val finalText = parseAssistantText(payload)?.trim().orEmpty()
|
||||||
if (finalText.isNotEmpty()) {
|
if (finalText.isNotEmpty()) {
|
||||||
upsertPendingAssistant(text = finalText, isStreaming = false)
|
upsertPendingAssistant(text = finalText, isStreaming = false)
|
||||||
|
playAssistantReplyAsync(finalText)
|
||||||
} else if (pendingAssistantEntryId != null) {
|
} else if (pendingAssistantEntryId != null) {
|
||||||
updateConversationEntry(pendingAssistantEntryId!!, text = null, isStreaming = false)
|
updateConversationEntry(pendingAssistantEntryId!!, text = null, isStreaming = false)
|
||||||
}
|
}
|
||||||
@@ -386,6 +390,18 @@ class MicCaptureManager(
|
|||||||
updateConversationEntry(id = currentId, text = text, isStreaming = isStreaming)
|
updateConversationEntry(id = currentId, text = text, isStreaming = isStreaming)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private fun playAssistantReplyAsync(text: String) {
|
||||||
|
val spoken = text.trim()
|
||||||
|
if (spoken.isEmpty()) return
|
||||||
|
scope.launch {
|
||||||
|
try {
|
||||||
|
speakAssistantReply(spoken)
|
||||||
|
} catch (err: Throwable) {
|
||||||
|
Log.w(tag, "assistant speech failed: ${err.message ?: err::class.simpleName}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private fun onFinalTranscript(text: String) {
|
private fun onFinalTranscript(text: String) {
|
||||||
val trimmed = text.trim()
|
val trimmed = text.trim()
|
||||||
if (trimmed.isEmpty()) return
|
if (trimmed.isEmpty()) return
|
||||||
|
|||||||
Reference in New Issue
Block a user