mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:20:43 +00:00
feat: wire talk handoff into native nodes
This commit is contained in:
@@ -36,6 +36,7 @@ import ai.openclaw.app.node.Quad
|
||||
import ai.openclaw.app.node.SmsHandler
|
||||
import ai.openclaw.app.node.SmsManager
|
||||
import ai.openclaw.app.node.SystemHandler
|
||||
import ai.openclaw.app.node.TalkHandler
|
||||
import ai.openclaw.app.node.asObjectOrNull
|
||||
import ai.openclaw.app.node.asStringOrNull
|
||||
import ai.openclaw.app.node.invokeErrorFromThrowable
|
||||
@@ -205,6 +206,16 @@ class NodeRuntime(
|
||||
deviceHandler = deviceHandler,
|
||||
notificationsHandler = notificationsHandler,
|
||||
systemHandler = systemHandler,
|
||||
talkHandler =
|
||||
object : TalkHandler {
|
||||
override suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStart()
|
||||
|
||||
override suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStop()
|
||||
|
||||
override suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttCancel()
|
||||
|
||||
override suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttOnce()
|
||||
},
|
||||
photosHandler = photosHandler,
|
||||
contactsHandler = contactsHandler,
|
||||
calendarHandler = calendarHandler,
|
||||
@@ -881,6 +892,80 @@ class NodeRuntime(
|
||||
setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off)
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttStart(): GatewaySession.InvokeResult =
|
||||
runPreparedTalkPttCommand {
|
||||
val payload = talkMode.beginPushToTalk()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttStop(): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
val payload = talkMode.endPushToTalk()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttCancel(): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
val payload = talkMode.cancelPushToTalk()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttOnce(): GatewaySession.InvokeResult =
|
||||
runPreparedTalkPttCommand {
|
||||
val payload = talkMode.runPushToTalkOnce()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun runPreparedTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
prepareTalkCapture()
|
||||
try {
|
||||
block()
|
||||
} catch (err: Throwable) {
|
||||
cleanupFailedTalkCapture()
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun runTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
|
||||
try {
|
||||
block()
|
||||
} catch (err: Throwable) {
|
||||
val (code, message) = invokeErrorFromThrowable(err)
|
||||
GatewaySession.InvokeResult.error(code = code, message = message)
|
||||
}
|
||||
|
||||
private suspend fun prepareTalkCapture() {
|
||||
if (!hasRecordAudioPermission()) {
|
||||
throw IllegalStateException("MIC_PERMISSION_REQUIRED: grant Microphone permission")
|
||||
}
|
||||
micCapture.setMicEnabled(false)
|
||||
stopVoicePlayback()
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
|
||||
talkMode.ttsOnAllResponses = true
|
||||
talkMode.setPlaybackEnabled(speakerEnabled.value)
|
||||
talkMode.ensureChatSubscribed()
|
||||
externalAudioCaptureActive.value = true
|
||||
}
|
||||
|
||||
private suspend fun cleanupFailedTalkCapture() {
|
||||
runCatching { talkMode.cancelPushToTalk() }
|
||||
talkMode.ttsOnAllResponses = false
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
|
||||
externalAudioCaptureActive.value = false
|
||||
}
|
||||
|
||||
private fun finishTalkCaptureIfIdle() {
|
||||
if (!talkMode.isEnabled.value && !talkMode.isListening.value && !talkMode.isSpeaking.value) {
|
||||
talkMode.ttsOnAllResponses = false
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
|
||||
externalAudioCaptureActive.value = false
|
||||
}
|
||||
}
|
||||
|
||||
val speakerEnabled: StateFlow<Boolean>
|
||||
get() = prefs.speakerEnabled
|
||||
|
||||
|
||||
@@ -278,14 +278,13 @@ class GatewayDiscovery(
|
||||
return legacyHostAddress(resolved)
|
||||
}
|
||||
|
||||
private fun legacyHostAddress(resolved: NsdServiceInfo): String? {
|
||||
return try {
|
||||
private fun legacyHostAddress(resolved: NsdServiceInfo): String? =
|
||||
try {
|
||||
val host = NsdServiceInfo::class.java.getMethod("getHost").invoke(resolved) as? InetAddress
|
||||
host?.hostAddress
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun publish() {
|
||||
_gateways.value =
|
||||
@@ -529,20 +528,20 @@ class GatewayDiscovery(
|
||||
val cm = connectivity ?: return null
|
||||
|
||||
// Prefer VPN (Tailscale) when present; otherwise use the active network.
|
||||
trackedNetworks(cm).firstOrNull { n ->
|
||||
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
|
||||
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
|
||||
}?.let { return it }
|
||||
trackedNetworks(cm)
|
||||
.firstOrNull { n ->
|
||||
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
|
||||
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
|
||||
}?.let { return it }
|
||||
|
||||
return cm.activeNetwork
|
||||
}
|
||||
|
||||
private fun trackedNetworks(cm: ConnectivityManager): List<Network> {
|
||||
return buildList {
|
||||
private fun trackedNetworks(cm: ConnectivityManager): List<Network> =
|
||||
buildList {
|
||||
cm.activeNetwork?.let(::add)
|
||||
addAll(availableNetworks)
|
||||
}.distinct()
|
||||
}
|
||||
|
||||
private fun createDirectResolver(): Resolver? {
|
||||
val cm = connectivity ?: return null
|
||||
|
||||
@@ -14,6 +14,7 @@ import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawPhotosCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
|
||||
data class NodeRuntimeFlags(
|
||||
val cameraEnabled: Boolean,
|
||||
@@ -81,6 +82,7 @@ object InvokeCommandRegistry {
|
||||
name = OpenClawCapability.VoiceWake.rawValue,
|
||||
availability = NodeCapabilityAvailability.VoiceWakeEnabled,
|
||||
),
|
||||
NodeCapabilitySpec(name = OpenClawCapability.Talk.rawValue),
|
||||
NodeCapabilitySpec(
|
||||
name = OpenClawCapability.Location.rawValue,
|
||||
availability = NodeCapabilityAvailability.LocationEnabled,
|
||||
@@ -135,6 +137,18 @@ object InvokeCommandRegistry {
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawSystemCommand.Notify.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttStart.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttStop.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttCancel.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttOnce.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawCameraCommand.List.rawValue,
|
||||
requiresForeground = true,
|
||||
|
||||
@@ -13,6 +13,7 @@ import ai.openclaw.app.protocol.OpenClawMotionCommand
|
||||
import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
|
||||
internal enum class SmsSearchAvailabilityReason {
|
||||
Available,
|
||||
@@ -59,6 +60,7 @@ class InvokeDispatcher(
|
||||
private val deviceHandler: DeviceHandler,
|
||||
private val notificationsHandler: NotificationsHandler,
|
||||
private val systemHandler: SystemHandler,
|
||||
private val talkHandler: TalkHandler,
|
||||
private val photosHandler: PhotosHandler,
|
||||
private val contactsHandler: ContactsHandler,
|
||||
private val calendarHandler: CalendarHandler,
|
||||
@@ -188,6 +190,12 @@ class InvokeDispatcher(
|
||||
// System command
|
||||
OpenClawSystemCommand.Notify.rawValue -> systemHandler.handleSystemNotify(paramsJson)
|
||||
|
||||
// Talk commands
|
||||
OpenClawTalkCommand.PttStart.rawValue -> talkHandler.handlePttStart(paramsJson)
|
||||
OpenClawTalkCommand.PttStop.rawValue -> talkHandler.handlePttStop(paramsJson)
|
||||
OpenClawTalkCommand.PttCancel.rawValue -> talkHandler.handlePttCancel(paramsJson)
|
||||
OpenClawTalkCommand.PttOnce.rawValue -> talkHandler.handlePttOnce(paramsJson)
|
||||
|
||||
// Photos command
|
||||
ai.openclaw.app.protocol.OpenClawPhotosCommand.Latest.rawValue ->
|
||||
photosHandler.handlePhotosLatest(
|
||||
@@ -336,3 +344,13 @@ class InvokeDispatcher(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface TalkHandler {
|
||||
suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ enum class OpenClawCapability(
|
||||
Camera("camera"),
|
||||
Sms("sms"),
|
||||
VoiceWake("voiceWake"),
|
||||
Talk("talk"),
|
||||
Location("location"),
|
||||
Device("device"),
|
||||
Notifications("notifications"),
|
||||
@@ -71,6 +72,20 @@ enum class OpenClawSmsCommand(
|
||||
}
|
||||
}
|
||||
|
||||
enum class OpenClawTalkCommand(
|
||||
val rawValue: String,
|
||||
) {
|
||||
PttStart("talk.ptt.start"),
|
||||
PttStop("talk.ptt.stop"),
|
||||
PttCancel("talk.ptt.cancel"),
|
||||
PttOnce("talk.ptt.once"),
|
||||
;
|
||||
|
||||
companion object {
|
||||
const val NamespacePrefix: String = "talk."
|
||||
}
|
||||
}
|
||||
|
||||
enum class OpenClawLocationCommand(
|
||||
val rawValue: String,
|
||||
) {
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
|
||||
internal object ChatEventText {
|
||||
fun assistantTextFromPayload(payload: JsonObject): String? = assistantTextFromMessage(payload["message"])
|
||||
|
||||
fun assistantTextFromMessage(messageEl: JsonElement?): String? {
|
||||
val message = messageEl.asObjectOrNull() ?: return null
|
||||
val role = message["role"].asStringOrNull()
|
||||
if (role != null && role != "assistant") return null
|
||||
return textFromContent(message["content"])
|
||||
}
|
||||
|
||||
private fun textFromContent(content: JsonElement?): String? =
|
||||
when (content) {
|
||||
is JsonPrimitive -> content.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
is JsonArray ->
|
||||
content
|
||||
.mapNotNull(::textFromContentPart)
|
||||
.filter { it.isNotEmpty() }
|
||||
.joinToString("\n")
|
||||
.takeIf { it.isNotBlank() }
|
||||
else -> null
|
||||
}
|
||||
|
||||
private fun textFromContentPart(part: JsonElement): String? {
|
||||
part
|
||||
.asStringOrNull()
|
||||
?.trim()
|
||||
?.takeIf { it.isNotEmpty() }
|
||||
?.let { return it }
|
||||
val obj = part.asObjectOrNull() ?: return null
|
||||
val type = obj["type"].asStringOrNull()
|
||||
if (type != null && type != "text") return null
|
||||
return obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
}
|
||||
|
||||
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
|
||||
|
||||
private fun JsonElement?.asStringOrNull(): String? = (this as? JsonPrimitive)?.takeIf { it.isString }?.content
|
||||
@@ -21,7 +21,6 @@ import kotlinx.coroutines.flow.StateFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import java.util.UUID
|
||||
@@ -596,20 +595,7 @@ class MicCaptureManager(
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
)
|
||||
|
||||
private fun parseAssistantText(payload: JsonObject): String? {
|
||||
val message = payload["message"].asObjectOrNull() ?: return null
|
||||
if (message["role"].asStringOrNull() != "assistant") return null
|
||||
val content = message["content"] as? JsonArray ?: return null
|
||||
|
||||
val parts =
|
||||
content.mapNotNull { item ->
|
||||
val obj = item.asObjectOrNull() ?: return@mapNotNull null
|
||||
if (obj["type"].asStringOrNull() != "text") return@mapNotNull null
|
||||
obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
if (parts.isEmpty()) return null
|
||||
return parts.joinToString("\n")
|
||||
}
|
||||
private fun parseAssistantText(payload: JsonObject): String? = ChatEventText.assistantTextFromPayload(payload)
|
||||
|
||||
private val listener =
|
||||
object : RecognitionListener {
|
||||
|
||||
@@ -12,20 +12,26 @@ import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.withContext
|
||||
import java.io.File
|
||||
|
||||
internal interface TalkAudioPlaying {
|
||||
suspend fun play(audio: TalkSpeakAudio)
|
||||
|
||||
fun stop()
|
||||
}
|
||||
|
||||
internal class TalkAudioPlayer(
|
||||
private val context: Context,
|
||||
) {
|
||||
) : TalkAudioPlaying {
|
||||
private val lock = Any()
|
||||
private var active: ActivePlayback? = null
|
||||
|
||||
suspend fun play(audio: TalkSpeakAudio) {
|
||||
override suspend fun play(audio: TalkSpeakAudio) {
|
||||
when (val mode = resolvePlaybackMode(audio)) {
|
||||
is TalkPlaybackMode.Pcm -> playPcm(audio.bytes, mode.sampleRate)
|
||||
is TalkPlaybackMode.Compressed -> playCompressed(audio.bytes, mode.fileExtension)
|
||||
}
|
||||
}
|
||||
|
||||
fun stop() {
|
||||
override fun stop() {
|
||||
synchronized(lock) {
|
||||
active?.cancel()
|
||||
active = null
|
||||
|
||||
@@ -41,7 +41,28 @@ import java.util.UUID
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
import kotlin.coroutines.coroutineContext
|
||||
|
||||
class TalkModeManager(
|
||||
data class TalkPttStartPayload(
|
||||
val captureId: String,
|
||||
) {
|
||||
fun toJson(): String = """{"captureId":"$captureId"}"""
|
||||
}
|
||||
|
||||
data class TalkPttStopPayload(
|
||||
val captureId: String,
|
||||
val transcript: String?,
|
||||
val status: String,
|
||||
) {
|
||||
fun toJson(): String =
|
||||
buildJsonObject {
|
||||
put("captureId", JsonPrimitive(captureId))
|
||||
if (transcript != null) {
|
||||
put("transcript", JsonPrimitive(transcript))
|
||||
}
|
||||
put("status", JsonPrimitive(status))
|
||||
}.toString()
|
||||
}
|
||||
|
||||
class TalkModeManager internal constructor(
|
||||
private val context: Context,
|
||||
private val scope: CoroutineScope,
|
||||
private val session: GatewaySession,
|
||||
@@ -49,6 +70,8 @@ class TalkModeManager(
|
||||
private val isConnected: () -> Boolean,
|
||||
private val onBeforeSpeak: suspend () -> Unit = {},
|
||||
private val onAfterSpeak: suspend () -> Unit = {},
|
||||
private val talkSpeakClient: TalkSpeechSynthesizing = TalkSpeakClient(session = session),
|
||||
private val talkAudioPlayer: TalkAudioPlaying = TalkAudioPlayer(context),
|
||||
) {
|
||||
companion object {
|
||||
private const val tag = "TalkMode"
|
||||
@@ -60,9 +83,6 @@ class TalkModeManager(
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
private val talkSpeakClient = TalkSpeakClient(session = session, json = json)
|
||||
private val talkAudioPlayer = TalkAudioPlayer(context)
|
||||
|
||||
private val _isEnabled = MutableStateFlow(false)
|
||||
val isEnabled: StateFlow<Boolean> = _isEnabled
|
||||
|
||||
@@ -82,6 +102,10 @@ class TalkModeManager(
|
||||
private var restartJob: Job? = null
|
||||
private var stopRequested = false
|
||||
private var listeningMode = false
|
||||
private var activePttCaptureId: String? = null
|
||||
private var pttAutoStopEnabled = false
|
||||
private var pttTimeoutJob: Job? = null
|
||||
private var pttCompletion: CompletableDeferred<TalkPttStopPayload>? = null
|
||||
|
||||
private var silenceJob: Job? = null
|
||||
private var silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
|
||||
@@ -156,6 +180,127 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
suspend fun beginPushToTalk(): TalkPttStartPayload {
|
||||
if (!isConnected()) {
|
||||
_statusText.value = "Gateway not connected"
|
||||
throw IllegalStateException("UNAVAILABLE: Gateway not connected")
|
||||
}
|
||||
activePttCaptureId?.let { return TalkPttStartPayload(captureId = it) }
|
||||
|
||||
stopSpeaking(resetInterrupt = false)
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
pttAutoStopEnabled = false
|
||||
pttCompletion = null
|
||||
silenceJob?.cancel()
|
||||
silenceJob = null
|
||||
listeningMode = false
|
||||
finalizeInFlight = false
|
||||
stopRequested = false
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
|
||||
val micOk =
|
||||
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
if (!micOk) {
|
||||
_statusText.value = "Microphone permission required"
|
||||
throw IllegalStateException("MIC_PERMISSION_REQUIRED: grant Microphone permission")
|
||||
}
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
_statusText.value = "Speech recognizer unavailable"
|
||||
throw IllegalStateException("UNAVAILABLE: Speech recognizer unavailable")
|
||||
}
|
||||
|
||||
val captureId = UUID.randomUUID().toString()
|
||||
activePttCaptureId = captureId
|
||||
withContext(Dispatchers.Main) {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||
startListeningInternal(markListening = true)
|
||||
}
|
||||
_statusText.value = "Listening (PTT)"
|
||||
return TalkPttStartPayload(captureId = captureId)
|
||||
}
|
||||
|
||||
suspend fun endPushToTalk(): TalkPttStopPayload {
|
||||
val captureId = activePttCaptureId ?: UUID.randomUUID().toString()
|
||||
if (activePttCaptureId == null) {
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "idle"))
|
||||
}
|
||||
|
||||
clearPushToTalkRecognition()
|
||||
val transcript = lastTranscript.trim()
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
|
||||
if (transcript.isEmpty()) {
|
||||
_statusText.value = if (_isEnabled.value) "Listening" else "Ready"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "empty"))
|
||||
}
|
||||
|
||||
if (!isConnected()) {
|
||||
_statusText.value = "Gateway not connected"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = transcript, status = "offline"))
|
||||
}
|
||||
|
||||
_statusText.value = "Thinking…"
|
||||
scope.launch {
|
||||
finalizeTranscript(transcript)
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = transcript, status = "queued"))
|
||||
}
|
||||
|
||||
suspend fun cancelPushToTalk(): TalkPttStopPayload {
|
||||
val captureId = activePttCaptureId ?: UUID.randomUUID().toString()
|
||||
if (activePttCaptureId == null) {
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "idle"))
|
||||
}
|
||||
|
||||
clearPushToTalkRecognition()
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
_statusText.value = if (_isEnabled.value) "Listening" else "Ready"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "cancelled"))
|
||||
}
|
||||
|
||||
suspend fun runPushToTalkOnce(maxDurationMs: Long = 12_000L): TalkPttStopPayload {
|
||||
if (pttCompletion != null) {
|
||||
cancelPushToTalk()
|
||||
}
|
||||
if (activePttCaptureId != null) {
|
||||
return TalkPttStopPayload(
|
||||
captureId = activePttCaptureId ?: UUID.randomUUID().toString(),
|
||||
transcript = null,
|
||||
status = "busy",
|
||||
)
|
||||
}
|
||||
|
||||
beginPushToTalk()
|
||||
val completion = CompletableDeferred<TalkPttStopPayload>()
|
||||
pttCompletion = completion
|
||||
pttAutoStopEnabled = true
|
||||
startSilenceMonitor()
|
||||
pttTimeoutJob =
|
||||
scope.launch {
|
||||
delay(maxDurationMs)
|
||||
if (pttAutoStopEnabled && activePttCaptureId != null) {
|
||||
endPushToTalk()
|
||||
}
|
||||
}
|
||||
return completion.await()
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak a wake-word command through TalkMode's full pipeline:
|
||||
* chat.send → wait for final → read assistant text → TTS.
|
||||
@@ -335,6 +480,12 @@ class TalkModeManager(
|
||||
stopRequested = true
|
||||
finalizeInFlight = false
|
||||
listeningMode = false
|
||||
activePttCaptureId = null
|
||||
pttAutoStopEnabled = false
|
||||
pttCompletion?.cancel()
|
||||
pttCompletion = null
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
restartJob?.cancel()
|
||||
restartJob = null
|
||||
silenceJob?.cancel()
|
||||
@@ -434,7 +585,7 @@ class TalkModeManager(
|
||||
silenceJob?.cancel()
|
||||
silenceJob =
|
||||
scope.launch {
|
||||
while (_isEnabled.value) {
|
||||
while (_isEnabled.value || pttAutoStopEnabled) {
|
||||
delay(200)
|
||||
checkSilence()
|
||||
}
|
||||
@@ -448,6 +599,12 @@ class TalkModeManager(
|
||||
val lastHeard = lastHeardAtMs ?: return
|
||||
val elapsed = SystemClock.elapsedRealtime() - lastHeard
|
||||
if (elapsed < silenceWindowMs) return
|
||||
if (activePttCaptureId != null) {
|
||||
if (pttAutoStopEnabled) {
|
||||
scope.launch { endPushToTalk() }
|
||||
}
|
||||
return
|
||||
}
|
||||
if (finalizeInFlight) return
|
||||
finalizeInFlight = true
|
||||
scope.launch {
|
||||
@@ -525,6 +682,27 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun clearPushToTalkRecognition() {
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
pttAutoStopEnabled = false
|
||||
activePttCaptureId = null
|
||||
_isListening.value = false
|
||||
listeningMode = false
|
||||
clearListenWatchdog()
|
||||
withContext(Dispatchers.Main) {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun finishPushToTalk(payload: TalkPttStopPayload): TalkPttStopPayload {
|
||||
pttCompletion?.complete(payload)
|
||||
pttCompletion = null
|
||||
return payload
|
||||
}
|
||||
|
||||
private suspend fun subscribeChatIfNeeded(
|
||||
session: GatewaySession,
|
||||
sessionKey: String,
|
||||
@@ -656,20 +834,7 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractTextFromChatEventMessage(messageEl: JsonElement?): String? {
|
||||
val msg = messageEl?.asObjectOrNull() ?: return null
|
||||
val content = msg["content"] as? JsonArray ?: return null
|
||||
return content
|
||||
.mapNotNull { entry ->
|
||||
entry
|
||||
.asObjectOrNull()
|
||||
?.get("text")
|
||||
?.asStringOrNull()
|
||||
?.trim()
|
||||
}.filter { it.isNotEmpty() }
|
||||
.joinToString("\n")
|
||||
.takeIf { it.isNotBlank() }
|
||||
}
|
||||
private fun extractTextFromChatEventMessage(messageEl: JsonElement?): String? = ChatEventText.assistantTextFromMessage(messageEl)
|
||||
|
||||
private suspend fun waitForAssistantText(
|
||||
session: GatewaySession,
|
||||
@@ -729,17 +894,16 @@ class TalkModeManager(
|
||||
_lastAssistantText.value = cleaned
|
||||
ensurePlaybackActive(playbackToken)
|
||||
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
_statusText.value = "Generating voice…"
|
||||
_isSpeaking.value = false
|
||||
lastSpokenText = cleaned
|
||||
ensureInterruptListener()
|
||||
requestAudioFocusForTts()
|
||||
|
||||
try {
|
||||
val started = SystemClock.elapsedRealtime()
|
||||
when (val result = talkSpeakClient.synthesize(text = cleaned, directive = directive)) {
|
||||
is TalkSpeakResult.Success -> {
|
||||
ensurePlaybackActive(playbackToken)
|
||||
markAudioPlaybackStarting(playbackToken)
|
||||
talkAudioPlayer.play(result.audio)
|
||||
ensurePlaybackActive(playbackToken)
|
||||
Log.d(tag, "talk.speak ok durMs=${SystemClock.elapsedRealtime() - started}")
|
||||
@@ -789,8 +953,6 @@ class TalkModeManager(
|
||||
shouldResumeAfterSpeak = true
|
||||
onBeforeSpeak()
|
||||
ensurePlaybackActive(playbackToken)
|
||||
_isSpeaking.value = true
|
||||
_statusText.value = "Speaking…"
|
||||
block()
|
||||
} finally {
|
||||
synchronized(ttsJobLock) {
|
||||
@@ -888,6 +1050,7 @@ class TalkModeManager(
|
||||
}
|
||||
},
|
||||
)
|
||||
markAudioPlaybackStarting(playbackToken)
|
||||
val result = engine.speak(text, TextToSpeech.QUEUE_FLUSH, null, utteranceId)
|
||||
if (result != TextToSpeech.SUCCESS) {
|
||||
throw IllegalStateException("TextToSpeech start failed")
|
||||
@@ -905,6 +1068,14 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun markAudioPlaybackStarting(playbackToken: Long) {
|
||||
ensurePlaybackActive(playbackToken)
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
ensureInterruptListener()
|
||||
requestAudioFocusForTts()
|
||||
}
|
||||
|
||||
fun stopTts() {
|
||||
stopSpeaking(resetInterrupt = true)
|
||||
_isSpeaking.value = false
|
||||
|
||||
@@ -28,12 +28,19 @@ internal sealed interface TalkSpeakResult {
|
||||
) : TalkSpeakResult
|
||||
}
|
||||
|
||||
internal interface TalkSpeechSynthesizing {
|
||||
suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult
|
||||
}
|
||||
|
||||
internal class TalkSpeakClient(
|
||||
private val session: GatewaySession? = null,
|
||||
private val json: Json = Json { ignoreUnknownKeys = true },
|
||||
private val requestDetailed: (suspend (String, String, Long) -> GatewaySession.RpcResult)? = null,
|
||||
) {
|
||||
suspend fun synthesize(
|
||||
) : TalkSpeechSynthesizing {
|
||||
override suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult {
|
||||
|
||||
@@ -6,6 +6,11 @@ import ai.openclaw.app.gateway.GatewayEndpoint
|
||||
import ai.openclaw.app.gateway.GatewaySession
|
||||
import ai.openclaw.app.gateway.GatewayTlsProbeFailure
|
||||
import ai.openclaw.app.gateway.GatewayTlsProbeResult
|
||||
import ai.openclaw.app.node.InvokeDispatcher
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import ai.openclaw.app.voice.TalkModeManager
|
||||
import android.Manifest
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
@@ -15,6 +20,7 @@ import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import org.robolectric.RobolectricTestRunner
|
||||
import org.robolectric.RuntimeEnvironment
|
||||
import org.robolectric.Shadows.shadowOf
|
||||
import org.robolectric.annotation.Config
|
||||
import java.lang.reflect.Field
|
||||
import java.util.UUID
|
||||
@@ -221,6 +227,23 @@ class GatewayBootstrapAuthTest {
|
||||
assertNull(authStore.loadToken(deviceId, "operator"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun talkPttStart_cleansPreparedCaptureWhenBeginFails() =
|
||||
runBlocking {
|
||||
val app = RuntimeEnvironment.getApplication()
|
||||
shadowOf(app).grantPermissions(Manifest.permission.RECORD_AUDIO)
|
||||
val runtime = NodeRuntime(app)
|
||||
val dispatcher = readField<InvokeDispatcher>(runtime, "invokeDispatcher")
|
||||
|
||||
val result = dispatcher.handleInvoke(OpenClawTalkCommand.PttStart.rawValue, null)
|
||||
|
||||
assertEquals("UNAVAILABLE", result.error?.code)
|
||||
assertEquals(VoiceCaptureMode.Off, runtime.voiceCaptureMode.value)
|
||||
assertFalse(readField<MutableStateFlow<Boolean>>(runtime, "externalAudioCaptureActive").value)
|
||||
val talkMode = readField<Lazy<TalkModeManager>>(runtime, "talkMode\$delegate").value
|
||||
assertFalse(talkMode.ttsOnAllResponses)
|
||||
}
|
||||
|
||||
private fun waitForGatewayTrustPrompt(runtime: NodeRuntime): NodeRuntime.GatewayTrustPrompt {
|
||||
repeat(50) {
|
||||
runtime.pendingGatewayTrust.value?.let { return it }
|
||||
|
||||
@@ -12,6 +12,7 @@ import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawPhotosCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
import org.junit.Assert.assertNotNull
|
||||
@@ -26,6 +27,7 @@ class InvokeCommandRegistryTest {
|
||||
OpenClawCapability.Device.rawValue,
|
||||
OpenClawCapability.Notifications.rawValue,
|
||||
OpenClawCapability.System.rawValue,
|
||||
OpenClawCapability.Talk.rawValue,
|
||||
OpenClawCapability.Photos.rawValue,
|
||||
OpenClawCapability.Contacts.rawValue,
|
||||
OpenClawCapability.Calendar.rawValue,
|
||||
@@ -50,6 +52,10 @@ class InvokeCommandRegistryTest {
|
||||
OpenClawNotificationsCommand.List.rawValue,
|
||||
OpenClawNotificationsCommand.Actions.rawValue,
|
||||
OpenClawSystemCommand.Notify.rawValue,
|
||||
OpenClawTalkCommand.PttStart.rawValue,
|
||||
OpenClawTalkCommand.PttStop.rawValue,
|
||||
OpenClawTalkCommand.PttCancel.rawValue,
|
||||
OpenClawTalkCommand.PttOnce.rawValue,
|
||||
OpenClawPhotosCommand.Latest.rawValue,
|
||||
OpenClawContactsCommand.Search.rawValue,
|
||||
OpenClawContactsCommand.Add.rawValue,
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package ai.openclaw.app.node
|
||||
|
||||
import ai.openclaw.app.gateway.DeviceIdentityStore
|
||||
import ai.openclaw.app.gateway.GatewaySession
|
||||
import ai.openclaw.app.protocol.OpenClawCallLogCommand
|
||||
import ai.openclaw.app.protocol.OpenClawCameraCommand
|
||||
import ai.openclaw.app.protocol.OpenClawLocationCommand
|
||||
import ai.openclaw.app.protocol.OpenClawMotionCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
@@ -208,6 +210,27 @@ class InvokeDispatcherTest {
|
||||
assertEquals("INVALID_REQUEST: unknown command", result.error?.message)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun handleInvoke_routesTalkPttCommands() =
|
||||
runTest {
|
||||
val talk = InvokeDispatcherFakeTalkHandler()
|
||||
val dispatcher = newDispatcher(talkHandler = talk)
|
||||
|
||||
val start = dispatcher.handleInvoke(OpenClawTalkCommand.PttStart.rawValue, null)
|
||||
val stop = dispatcher.handleInvoke(OpenClawTalkCommand.PttStop.rawValue, null)
|
||||
val cancel = dispatcher.handleInvoke(OpenClawTalkCommand.PttCancel.rawValue, null)
|
||||
val once = dispatcher.handleInvoke(OpenClawTalkCommand.PttOnce.rawValue, null)
|
||||
|
||||
assertEquals("""{"captureId":"start"}""", start.payloadJson)
|
||||
assertEquals("""{"status":"stop"}""", stop.payloadJson)
|
||||
assertEquals("""{"status":"cancel"}""", cancel.payloadJson)
|
||||
assertEquals("""{"status":"once"}""", once.payloadJson)
|
||||
assertEquals(
|
||||
listOf("start", "stop", "cancel", "once"),
|
||||
talk.calls,
|
||||
)
|
||||
}
|
||||
|
||||
private fun newDispatcher(
|
||||
cameraEnabled: Boolean = false,
|
||||
locationEnabled: Boolean = false,
|
||||
@@ -219,6 +242,7 @@ class InvokeDispatcherTest {
|
||||
debugBuild: Boolean = false,
|
||||
motionActivityAvailable: Boolean = false,
|
||||
motionPedometerAvailable: Boolean = false,
|
||||
talkHandler: TalkHandler = InvokeDispatcherFakeTalkHandler(),
|
||||
): InvokeDispatcher {
|
||||
val appContext = RuntimeEnvironment.getApplication()
|
||||
shadowOf(appContext.packageManager).setSystemFeature(PackageManager.FEATURE_TELEPHONY, smsTelephonyAvailable)
|
||||
@@ -238,6 +262,7 @@ class InvokeDispatcherTest {
|
||||
stateProvider = InvokeDispatcherFakeNotificationsStateProvider(),
|
||||
),
|
||||
systemHandler = SystemHandler.forTesting(InvokeDispatcherFakeSystemNotificationPoster()),
|
||||
talkHandler = talkHandler,
|
||||
photosHandler = PhotosHandler.forTesting(appContext, InvokeDispatcherFakePhotosDataSource()),
|
||||
contactsHandler = ContactsHandler.forTesting(appContext, InvokeDispatcherFakeContactsDataSource()),
|
||||
calendarHandler = CalendarHandler.forTesting(appContext, InvokeDispatcherFakeCalendarDataSource()),
|
||||
@@ -312,6 +337,30 @@ private class InvokeDispatcherFakeSystemNotificationPoster : SystemNotificationP
|
||||
override fun post(request: SystemNotifyRequest) = Unit
|
||||
}
|
||||
|
||||
private class InvokeDispatcherFakeTalkHandler : TalkHandler {
|
||||
val calls = mutableListOf<String>()
|
||||
|
||||
override suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("start")
|
||||
return GatewaySession.InvokeResult.ok("""{"captureId":"start"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("stop")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"stop"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("cancel")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"cancel"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("once")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"once"}""")
|
||||
}
|
||||
}
|
||||
|
||||
private class InvokeDispatcherFakePhotosDataSource : PhotosDataSource {
|
||||
override fun hasPermission(context: Context): Boolean = true
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ class OpenClawProtocolConstantsTest {
|
||||
assertEquals("canvas", OpenClawCapability.Canvas.rawValue)
|
||||
assertEquals("camera", OpenClawCapability.Camera.rawValue)
|
||||
assertEquals("voiceWake", OpenClawCapability.VoiceWake.rawValue)
|
||||
assertEquals("talk", OpenClawCapability.Talk.rawValue)
|
||||
assertEquals("location", OpenClawCapability.Location.rawValue)
|
||||
assertEquals("sms", OpenClawCapability.Sms.rawValue)
|
||||
assertEquals("device", OpenClawCapability.Device.rawValue)
|
||||
@@ -92,6 +93,14 @@ class OpenClawProtocolConstantsTest {
|
||||
assertEquals("sms.search", OpenClawSmsCommand.Search.rawValue)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun talkCommandsUseStableStrings() {
|
||||
assertEquals("talk.ptt.start", OpenClawTalkCommand.PttStart.rawValue)
|
||||
assertEquals("talk.ptt.stop", OpenClawTalkCommand.PttStop.rawValue)
|
||||
assertEquals("talk.ptt.cancel", OpenClawTalkCommand.PttCancel.rawValue)
|
||||
assertEquals("talk.ptt.once", OpenClawTalkCommand.PttOnce.rawValue)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun callLogCommandsUseStableStrings() {
|
||||
assertEquals("callLog.search", OpenClawCallLogCommand.Search.rawValue)
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertNull
|
||||
import org.junit.Test
|
||||
|
||||
class ChatEventTextTest {
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
|
||||
@Test
|
||||
fun extractsAssistantTextParts() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{ "type": "text", "text": "hello" },
|
||||
{ "type": "text", "text": "world" }
|
||||
]
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertEquals("hello\nworld", ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun extractsPlainStringContent() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "plain reply"
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertEquals("plain reply", ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun ignoresUserMessages() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "text", "text": "do not speak" }
|
||||
]
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertNull(ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
private fun payload(source: String): JsonObject = json.parseToJsonElement(source.trimIndent()) as JsonObject
|
||||
}
|
||||
@@ -9,7 +9,10 @@ import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
import org.junit.Assert.assertTrue
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
@@ -78,7 +81,54 @@ class TalkModeManagerTest {
|
||||
assertEquals(1L, playbackGeneration(manager).get())
|
||||
}
|
||||
|
||||
private fun createManager(): TalkModeManager {
|
||||
@Test
|
||||
fun nonPendingUserFinalDoesNotUseAllResponseTts() {
|
||||
val manager = createManager()
|
||||
|
||||
manager.ttsOnAllResponses = true
|
||||
manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-user", text = "do not speak", role = "user"))
|
||||
|
||||
assertEquals(0L, playbackGeneration(manager).get())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun textReadyDoesNotEnterSpeakingUntilAudioPlaybackStarts() =
|
||||
runTest {
|
||||
val talkSpeakClient = FakeTalkSpeechSynthesizer()
|
||||
val talkAudioPlayer = FakeTalkAudioPlayer()
|
||||
val manager = createManager(talkSpeakClient = talkSpeakClient, talkAudioPlayer = talkAudioPlayer)
|
||||
|
||||
val job = launch { manager.speakAssistantReply("hello") }
|
||||
talkSpeakClient.requested.await()
|
||||
|
||||
assertEquals("Generating voice…", manager.statusText.value)
|
||||
assertFalse(manager.isSpeaking.value)
|
||||
|
||||
talkSpeakClient.result.complete(
|
||||
TalkSpeakResult.Success(
|
||||
TalkSpeakAudio(
|
||||
bytes = byteArrayOf(1, 2, 3),
|
||||
provider = "test",
|
||||
outputFormat = "mp3_44100_128",
|
||||
voiceCompatible = true,
|
||||
mimeType = "audio/mpeg",
|
||||
fileExtension = ".mp3",
|
||||
),
|
||||
),
|
||||
)
|
||||
talkAudioPlayer.started.await()
|
||||
|
||||
assertEquals("Speaking…", manager.statusText.value)
|
||||
assertTrue(manager.isSpeaking.value)
|
||||
|
||||
talkAudioPlayer.finished.complete(Unit)
|
||||
job.join()
|
||||
}
|
||||
|
||||
private fun createManager(
|
||||
talkSpeakClient: TalkSpeechSynthesizing = TalkSpeakClient(),
|
||||
talkAudioPlayer: TalkAudioPlaying? = null,
|
||||
): TalkModeManager {
|
||||
val app = RuntimeEnvironment.getApplication()
|
||||
val sessionJob = SupervisorJob()
|
||||
val session =
|
||||
@@ -96,6 +146,8 @@ class TalkModeManagerTest {
|
||||
session = session,
|
||||
supportsChatSubscribe = false,
|
||||
isConnected = { true },
|
||||
talkSpeakClient = talkSpeakClient,
|
||||
talkAudioPlayer = talkAudioPlayer ?: TalkAudioPlayer(app),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -124,6 +176,7 @@ class TalkModeManagerTest {
|
||||
private fun chatFinalPayload(
|
||||
runId: String,
|
||||
text: String,
|
||||
role: String = "assistant",
|
||||
): String =
|
||||
"""
|
||||
{
|
||||
@@ -131,7 +184,7 @@ class TalkModeManagerTest {
|
||||
"sessionKey": "main",
|
||||
"state": "final",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"role": "$role",
|
||||
"content": [
|
||||
{ "type": "text", "text": "$text" }
|
||||
]
|
||||
@@ -140,6 +193,34 @@ class TalkModeManagerTest {
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private class FakeTalkSpeechSynthesizer : TalkSpeechSynthesizing {
|
||||
val requested = CompletableDeferred<Unit>()
|
||||
val result = CompletableDeferred<TalkSpeakResult>()
|
||||
|
||||
override suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult {
|
||||
requested.complete(Unit)
|
||||
return result.await()
|
||||
}
|
||||
}
|
||||
|
||||
private class FakeTalkAudioPlayer : TalkAudioPlaying {
|
||||
val started = CompletableDeferred<Unit>()
|
||||
val finished = CompletableDeferred<Unit>()
|
||||
var stopped = false
|
||||
|
||||
override suspend fun play(audio: TalkSpeakAudio) {
|
||||
started.complete(Unit)
|
||||
finished.await()
|
||||
}
|
||||
|
||||
override fun stop() {
|
||||
stopped = true
|
||||
}
|
||||
}
|
||||
|
||||
private class InMemoryDeviceAuthStore : DeviceAuthTokenStore {
|
||||
override fun loadEntry(
|
||||
deviceId: String,
|
||||
|
||||
Reference in New Issue
Block a user