mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:10:44 +00:00
feat: wire talk handoff into native nodes
This commit is contained in:
@@ -36,6 +36,7 @@ import ai.openclaw.app.node.Quad
|
||||
import ai.openclaw.app.node.SmsHandler
|
||||
import ai.openclaw.app.node.SmsManager
|
||||
import ai.openclaw.app.node.SystemHandler
|
||||
import ai.openclaw.app.node.TalkHandler
|
||||
import ai.openclaw.app.node.asObjectOrNull
|
||||
import ai.openclaw.app.node.asStringOrNull
|
||||
import ai.openclaw.app.node.invokeErrorFromThrowable
|
||||
@@ -205,6 +206,16 @@ class NodeRuntime(
|
||||
deviceHandler = deviceHandler,
|
||||
notificationsHandler = notificationsHandler,
|
||||
systemHandler = systemHandler,
|
||||
talkHandler =
|
||||
object : TalkHandler {
|
||||
override suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStart()
|
||||
|
||||
override suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttStop()
|
||||
|
||||
override suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttCancel()
|
||||
|
||||
override suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult = handleTalkPttOnce()
|
||||
},
|
||||
photosHandler = photosHandler,
|
||||
contactsHandler = contactsHandler,
|
||||
calendarHandler = calendarHandler,
|
||||
@@ -881,6 +892,80 @@ class NodeRuntime(
|
||||
setVoiceCaptureMode(if (value) VoiceCaptureMode.TalkMode else VoiceCaptureMode.Off)
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttStart(): GatewaySession.InvokeResult =
|
||||
runPreparedTalkPttCommand {
|
||||
val payload = talkMode.beginPushToTalk()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttStop(): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
val payload = talkMode.endPushToTalk()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttCancel(): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
val payload = talkMode.cancelPushToTalk()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun handleTalkPttOnce(): GatewaySession.InvokeResult =
|
||||
runPreparedTalkPttCommand {
|
||||
val payload = talkMode.runPushToTalkOnce()
|
||||
finishTalkCaptureIfIdle()
|
||||
GatewaySession.InvokeResult.ok(payload.toJson())
|
||||
}
|
||||
|
||||
private suspend fun runPreparedTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
|
||||
runTalkPttCommand {
|
||||
prepareTalkCapture()
|
||||
try {
|
||||
block()
|
||||
} catch (err: Throwable) {
|
||||
cleanupFailedTalkCapture()
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun runTalkPttCommand(block: suspend () -> GatewaySession.InvokeResult): GatewaySession.InvokeResult =
|
||||
try {
|
||||
block()
|
||||
} catch (err: Throwable) {
|
||||
val (code, message) = invokeErrorFromThrowable(err)
|
||||
GatewaySession.InvokeResult.error(code = code, message = message)
|
||||
}
|
||||
|
||||
private suspend fun prepareTalkCapture() {
|
||||
if (!hasRecordAudioPermission()) {
|
||||
throw IllegalStateException("MIC_PERMISSION_REQUIRED: grant Microphone permission")
|
||||
}
|
||||
micCapture.setMicEnabled(false)
|
||||
stopVoicePlayback()
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.TalkMode)
|
||||
talkMode.ttsOnAllResponses = true
|
||||
talkMode.setPlaybackEnabled(speakerEnabled.value)
|
||||
talkMode.ensureChatSubscribed()
|
||||
externalAudioCaptureActive.value = true
|
||||
}
|
||||
|
||||
private suspend fun cleanupFailedTalkCapture() {
|
||||
runCatching { talkMode.cancelPushToTalk() }
|
||||
talkMode.ttsOnAllResponses = false
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
|
||||
externalAudioCaptureActive.value = false
|
||||
}
|
||||
|
||||
private fun finishTalkCaptureIfIdle() {
|
||||
if (!talkMode.isEnabled.value && !talkMode.isListening.value && !talkMode.isSpeaking.value) {
|
||||
talkMode.ttsOnAllResponses = false
|
||||
NodeForegroundService.setVoiceCaptureMode(appContext, VoiceCaptureMode.Off)
|
||||
externalAudioCaptureActive.value = false
|
||||
}
|
||||
}
|
||||
|
||||
val speakerEnabled: StateFlow<Boolean>
|
||||
get() = prefs.speakerEnabled
|
||||
|
||||
|
||||
@@ -278,14 +278,13 @@ class GatewayDiscovery(
|
||||
return legacyHostAddress(resolved)
|
||||
}
|
||||
|
||||
private fun legacyHostAddress(resolved: NsdServiceInfo): String? {
|
||||
return try {
|
||||
private fun legacyHostAddress(resolved: NsdServiceInfo): String? =
|
||||
try {
|
||||
val host = NsdServiceInfo::class.java.getMethod("getHost").invoke(resolved) as? InetAddress
|
||||
host?.hostAddress
|
||||
} catch (_: Throwable) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private fun publish() {
|
||||
_gateways.value =
|
||||
@@ -529,20 +528,20 @@ class GatewayDiscovery(
|
||||
val cm = connectivity ?: return null
|
||||
|
||||
// Prefer VPN (Tailscale) when present; otherwise use the active network.
|
||||
trackedNetworks(cm).firstOrNull { n ->
|
||||
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
|
||||
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
|
||||
}?.let { return it }
|
||||
trackedNetworks(cm)
|
||||
.firstOrNull { n ->
|
||||
val caps = cm.getNetworkCapabilities(n) ?: return@firstOrNull false
|
||||
caps.hasTransport(NetworkCapabilities.TRANSPORT_VPN)
|
||||
}?.let { return it }
|
||||
|
||||
return cm.activeNetwork
|
||||
}
|
||||
|
||||
private fun trackedNetworks(cm: ConnectivityManager): List<Network> {
|
||||
return buildList {
|
||||
private fun trackedNetworks(cm: ConnectivityManager): List<Network> =
|
||||
buildList {
|
||||
cm.activeNetwork?.let(::add)
|
||||
addAll(availableNetworks)
|
||||
}.distinct()
|
||||
}
|
||||
|
||||
private fun createDirectResolver(): Resolver? {
|
||||
val cm = connectivity ?: return null
|
||||
|
||||
@@ -14,6 +14,7 @@ import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawPhotosCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
|
||||
data class NodeRuntimeFlags(
|
||||
val cameraEnabled: Boolean,
|
||||
@@ -81,6 +82,7 @@ object InvokeCommandRegistry {
|
||||
name = OpenClawCapability.VoiceWake.rawValue,
|
||||
availability = NodeCapabilityAvailability.VoiceWakeEnabled,
|
||||
),
|
||||
NodeCapabilitySpec(name = OpenClawCapability.Talk.rawValue),
|
||||
NodeCapabilitySpec(
|
||||
name = OpenClawCapability.Location.rawValue,
|
||||
availability = NodeCapabilityAvailability.LocationEnabled,
|
||||
@@ -135,6 +137,18 @@ object InvokeCommandRegistry {
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawSystemCommand.Notify.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttStart.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttStop.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttCancel.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawTalkCommand.PttOnce.rawValue,
|
||||
),
|
||||
InvokeCommandSpec(
|
||||
name = OpenClawCameraCommand.List.rawValue,
|
||||
requiresForeground = true,
|
||||
|
||||
@@ -13,6 +13,7 @@ import ai.openclaw.app.protocol.OpenClawMotionCommand
|
||||
import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
|
||||
internal enum class SmsSearchAvailabilityReason {
|
||||
Available,
|
||||
@@ -59,6 +60,7 @@ class InvokeDispatcher(
|
||||
private val deviceHandler: DeviceHandler,
|
||||
private val notificationsHandler: NotificationsHandler,
|
||||
private val systemHandler: SystemHandler,
|
||||
private val talkHandler: TalkHandler,
|
||||
private val photosHandler: PhotosHandler,
|
||||
private val contactsHandler: ContactsHandler,
|
||||
private val calendarHandler: CalendarHandler,
|
||||
@@ -188,6 +190,12 @@ class InvokeDispatcher(
|
||||
// System command
|
||||
OpenClawSystemCommand.Notify.rawValue -> systemHandler.handleSystemNotify(paramsJson)
|
||||
|
||||
// Talk commands
|
||||
OpenClawTalkCommand.PttStart.rawValue -> talkHandler.handlePttStart(paramsJson)
|
||||
OpenClawTalkCommand.PttStop.rawValue -> talkHandler.handlePttStop(paramsJson)
|
||||
OpenClawTalkCommand.PttCancel.rawValue -> talkHandler.handlePttCancel(paramsJson)
|
||||
OpenClawTalkCommand.PttOnce.rawValue -> talkHandler.handlePttOnce(paramsJson)
|
||||
|
||||
// Photos command
|
||||
ai.openclaw.app.protocol.OpenClawPhotosCommand.Latest.rawValue ->
|
||||
photosHandler.handlePhotosLatest(
|
||||
@@ -336,3 +344,13 @@ class InvokeDispatcher(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface TalkHandler {
|
||||
suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult
|
||||
|
||||
suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ enum class OpenClawCapability(
|
||||
Camera("camera"),
|
||||
Sms("sms"),
|
||||
VoiceWake("voiceWake"),
|
||||
Talk("talk"),
|
||||
Location("location"),
|
||||
Device("device"),
|
||||
Notifications("notifications"),
|
||||
@@ -71,6 +72,20 @@ enum class OpenClawSmsCommand(
|
||||
}
|
||||
}
|
||||
|
||||
enum class OpenClawTalkCommand(
|
||||
val rawValue: String,
|
||||
) {
|
||||
PttStart("talk.ptt.start"),
|
||||
PttStop("talk.ptt.stop"),
|
||||
PttCancel("talk.ptt.cancel"),
|
||||
PttOnce("talk.ptt.once"),
|
||||
;
|
||||
|
||||
companion object {
|
||||
const val NamespacePrefix: String = "talk."
|
||||
}
|
||||
}
|
||||
|
||||
enum class OpenClawLocationCommand(
|
||||
val rawValue: String,
|
||||
) {
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
|
||||
internal object ChatEventText {
|
||||
fun assistantTextFromPayload(payload: JsonObject): String? = assistantTextFromMessage(payload["message"])
|
||||
|
||||
fun assistantTextFromMessage(messageEl: JsonElement?): String? {
|
||||
val message = messageEl.asObjectOrNull() ?: return null
|
||||
val role = message["role"].asStringOrNull()
|
||||
if (role != null && role != "assistant") return null
|
||||
return textFromContent(message["content"])
|
||||
}
|
||||
|
||||
private fun textFromContent(content: JsonElement?): String? =
|
||||
when (content) {
|
||||
is JsonPrimitive -> content.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
is JsonArray ->
|
||||
content
|
||||
.mapNotNull(::textFromContentPart)
|
||||
.filter { it.isNotEmpty() }
|
||||
.joinToString("\n")
|
||||
.takeIf { it.isNotBlank() }
|
||||
else -> null
|
||||
}
|
||||
|
||||
private fun textFromContentPart(part: JsonElement): String? {
|
||||
part
|
||||
.asStringOrNull()
|
||||
?.trim()
|
||||
?.takeIf { it.isNotEmpty() }
|
||||
?.let { return it }
|
||||
val obj = part.asObjectOrNull() ?: return null
|
||||
val type = obj["type"].asStringOrNull()
|
||||
if (type != null && type != "text") return null
|
||||
return obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
}
|
||||
|
||||
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
|
||||
|
||||
private fun JsonElement?.asStringOrNull(): String? = (this as? JsonPrimitive)?.takeIf { it.isString }?.content
|
||||
@@ -21,7 +21,6 @@ import kotlinx.coroutines.flow.StateFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import java.util.UUID
|
||||
@@ -596,20 +595,7 @@ class MicCaptureManager(
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
)
|
||||
|
||||
private fun parseAssistantText(payload: JsonObject): String? {
|
||||
val message = payload["message"].asObjectOrNull() ?: return null
|
||||
if (message["role"].asStringOrNull() != "assistant") return null
|
||||
val content = message["content"] as? JsonArray ?: return null
|
||||
|
||||
val parts =
|
||||
content.mapNotNull { item ->
|
||||
val obj = item.asObjectOrNull() ?: return@mapNotNull null
|
||||
if (obj["type"].asStringOrNull() != "text") return@mapNotNull null
|
||||
obj["text"].asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
if (parts.isEmpty()) return null
|
||||
return parts.joinToString("\n")
|
||||
}
|
||||
private fun parseAssistantText(payload: JsonObject): String? = ChatEventText.assistantTextFromPayload(payload)
|
||||
|
||||
private val listener =
|
||||
object : RecognitionListener {
|
||||
|
||||
@@ -12,20 +12,26 @@ import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.withContext
|
||||
import java.io.File
|
||||
|
||||
internal interface TalkAudioPlaying {
|
||||
suspend fun play(audio: TalkSpeakAudio)
|
||||
|
||||
fun stop()
|
||||
}
|
||||
|
||||
internal class TalkAudioPlayer(
|
||||
private val context: Context,
|
||||
) {
|
||||
) : TalkAudioPlaying {
|
||||
private val lock = Any()
|
||||
private var active: ActivePlayback? = null
|
||||
|
||||
suspend fun play(audio: TalkSpeakAudio) {
|
||||
override suspend fun play(audio: TalkSpeakAudio) {
|
||||
when (val mode = resolvePlaybackMode(audio)) {
|
||||
is TalkPlaybackMode.Pcm -> playPcm(audio.bytes, mode.sampleRate)
|
||||
is TalkPlaybackMode.Compressed -> playCompressed(audio.bytes, mode.fileExtension)
|
||||
}
|
||||
}
|
||||
|
||||
fun stop() {
|
||||
override fun stop() {
|
||||
synchronized(lock) {
|
||||
active?.cancel()
|
||||
active = null
|
||||
|
||||
@@ -41,7 +41,28 @@ import java.util.UUID
|
||||
import java.util.concurrent.atomic.AtomicLong
|
||||
import kotlin.coroutines.coroutineContext
|
||||
|
||||
class TalkModeManager(
|
||||
data class TalkPttStartPayload(
|
||||
val captureId: String,
|
||||
) {
|
||||
fun toJson(): String = """{"captureId":"$captureId"}"""
|
||||
}
|
||||
|
||||
data class TalkPttStopPayload(
|
||||
val captureId: String,
|
||||
val transcript: String?,
|
||||
val status: String,
|
||||
) {
|
||||
fun toJson(): String =
|
||||
buildJsonObject {
|
||||
put("captureId", JsonPrimitive(captureId))
|
||||
if (transcript != null) {
|
||||
put("transcript", JsonPrimitive(transcript))
|
||||
}
|
||||
put("status", JsonPrimitive(status))
|
||||
}.toString()
|
||||
}
|
||||
|
||||
class TalkModeManager internal constructor(
|
||||
private val context: Context,
|
||||
private val scope: CoroutineScope,
|
||||
private val session: GatewaySession,
|
||||
@@ -49,6 +70,8 @@ class TalkModeManager(
|
||||
private val isConnected: () -> Boolean,
|
||||
private val onBeforeSpeak: suspend () -> Unit = {},
|
||||
private val onAfterSpeak: suspend () -> Unit = {},
|
||||
private val talkSpeakClient: TalkSpeechSynthesizing = TalkSpeakClient(session = session),
|
||||
private val talkAudioPlayer: TalkAudioPlaying = TalkAudioPlayer(context),
|
||||
) {
|
||||
companion object {
|
||||
private const val tag = "TalkMode"
|
||||
@@ -60,9 +83,6 @@ class TalkModeManager(
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
private val talkSpeakClient = TalkSpeakClient(session = session, json = json)
|
||||
private val talkAudioPlayer = TalkAudioPlayer(context)
|
||||
|
||||
private val _isEnabled = MutableStateFlow(false)
|
||||
val isEnabled: StateFlow<Boolean> = _isEnabled
|
||||
|
||||
@@ -82,6 +102,10 @@ class TalkModeManager(
|
||||
private var restartJob: Job? = null
|
||||
private var stopRequested = false
|
||||
private var listeningMode = false
|
||||
private var activePttCaptureId: String? = null
|
||||
private var pttAutoStopEnabled = false
|
||||
private var pttTimeoutJob: Job? = null
|
||||
private var pttCompletion: CompletableDeferred<TalkPttStopPayload>? = null
|
||||
|
||||
private var silenceJob: Job? = null
|
||||
private var silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
|
||||
@@ -156,6 +180,127 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
suspend fun beginPushToTalk(): TalkPttStartPayload {
|
||||
if (!isConnected()) {
|
||||
_statusText.value = "Gateway not connected"
|
||||
throw IllegalStateException("UNAVAILABLE: Gateway not connected")
|
||||
}
|
||||
activePttCaptureId?.let { return TalkPttStartPayload(captureId = it) }
|
||||
|
||||
stopSpeaking(resetInterrupt = false)
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
pttAutoStopEnabled = false
|
||||
pttCompletion = null
|
||||
silenceJob?.cancel()
|
||||
silenceJob = null
|
||||
listeningMode = false
|
||||
finalizeInFlight = false
|
||||
stopRequested = false
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
|
||||
val micOk =
|
||||
ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) ==
|
||||
PackageManager.PERMISSION_GRANTED
|
||||
if (!micOk) {
|
||||
_statusText.value = "Microphone permission required"
|
||||
throw IllegalStateException("MIC_PERMISSION_REQUIRED: grant Microphone permission")
|
||||
}
|
||||
if (!SpeechRecognizer.isRecognitionAvailable(context)) {
|
||||
_statusText.value = "Speech recognizer unavailable"
|
||||
throw IllegalStateException("UNAVAILABLE: Speech recognizer unavailable")
|
||||
}
|
||||
|
||||
val captureId = UUID.randomUUID().toString()
|
||||
activePttCaptureId = captureId
|
||||
withContext(Dispatchers.Main) {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = SpeechRecognizer.createSpeechRecognizer(context).also { it.setRecognitionListener(listener) }
|
||||
startListeningInternal(markListening = true)
|
||||
}
|
||||
_statusText.value = "Listening (PTT)"
|
||||
return TalkPttStartPayload(captureId = captureId)
|
||||
}
|
||||
|
||||
suspend fun endPushToTalk(): TalkPttStopPayload {
|
||||
val captureId = activePttCaptureId ?: UUID.randomUUID().toString()
|
||||
if (activePttCaptureId == null) {
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "idle"))
|
||||
}
|
||||
|
||||
clearPushToTalkRecognition()
|
||||
val transcript = lastTranscript.trim()
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
|
||||
if (transcript.isEmpty()) {
|
||||
_statusText.value = if (_isEnabled.value) "Listening" else "Ready"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "empty"))
|
||||
}
|
||||
|
||||
if (!isConnected()) {
|
||||
_statusText.value = "Gateway not connected"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = transcript, status = "offline"))
|
||||
}
|
||||
|
||||
_statusText.value = "Thinking…"
|
||||
scope.launch {
|
||||
finalizeTranscript(transcript)
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = transcript, status = "queued"))
|
||||
}
|
||||
|
||||
suspend fun cancelPushToTalk(): TalkPttStopPayload {
|
||||
val captureId = activePttCaptureId ?: UUID.randomUUID().toString()
|
||||
if (activePttCaptureId == null) {
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "idle"))
|
||||
}
|
||||
|
||||
clearPushToTalkRecognition()
|
||||
lastTranscript = ""
|
||||
lastHeardAtMs = null
|
||||
_statusText.value = if (_isEnabled.value) "Listening" else "Ready"
|
||||
if (_isEnabled.value) {
|
||||
start()
|
||||
}
|
||||
return finishPushToTalk(TalkPttStopPayload(captureId = captureId, transcript = null, status = "cancelled"))
|
||||
}
|
||||
|
||||
suspend fun runPushToTalkOnce(maxDurationMs: Long = 12_000L): TalkPttStopPayload {
|
||||
if (pttCompletion != null) {
|
||||
cancelPushToTalk()
|
||||
}
|
||||
if (activePttCaptureId != null) {
|
||||
return TalkPttStopPayload(
|
||||
captureId = activePttCaptureId ?: UUID.randomUUID().toString(),
|
||||
transcript = null,
|
||||
status = "busy",
|
||||
)
|
||||
}
|
||||
|
||||
beginPushToTalk()
|
||||
val completion = CompletableDeferred<TalkPttStopPayload>()
|
||||
pttCompletion = completion
|
||||
pttAutoStopEnabled = true
|
||||
startSilenceMonitor()
|
||||
pttTimeoutJob =
|
||||
scope.launch {
|
||||
delay(maxDurationMs)
|
||||
if (pttAutoStopEnabled && activePttCaptureId != null) {
|
||||
endPushToTalk()
|
||||
}
|
||||
}
|
||||
return completion.await()
|
||||
}
|
||||
|
||||
/**
|
||||
* Speak a wake-word command through TalkMode's full pipeline:
|
||||
* chat.send → wait for final → read assistant text → TTS.
|
||||
@@ -335,6 +480,12 @@ class TalkModeManager(
|
||||
stopRequested = true
|
||||
finalizeInFlight = false
|
||||
listeningMode = false
|
||||
activePttCaptureId = null
|
||||
pttAutoStopEnabled = false
|
||||
pttCompletion?.cancel()
|
||||
pttCompletion = null
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
restartJob?.cancel()
|
||||
restartJob = null
|
||||
silenceJob?.cancel()
|
||||
@@ -434,7 +585,7 @@ class TalkModeManager(
|
||||
silenceJob?.cancel()
|
||||
silenceJob =
|
||||
scope.launch {
|
||||
while (_isEnabled.value) {
|
||||
while (_isEnabled.value || pttAutoStopEnabled) {
|
||||
delay(200)
|
||||
checkSilence()
|
||||
}
|
||||
@@ -448,6 +599,12 @@ class TalkModeManager(
|
||||
val lastHeard = lastHeardAtMs ?: return
|
||||
val elapsed = SystemClock.elapsedRealtime() - lastHeard
|
||||
if (elapsed < silenceWindowMs) return
|
||||
if (activePttCaptureId != null) {
|
||||
if (pttAutoStopEnabled) {
|
||||
scope.launch { endPushToTalk() }
|
||||
}
|
||||
return
|
||||
}
|
||||
if (finalizeInFlight) return
|
||||
finalizeInFlight = true
|
||||
scope.launch {
|
||||
@@ -525,6 +682,27 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun clearPushToTalkRecognition() {
|
||||
pttTimeoutJob?.cancel()
|
||||
pttTimeoutJob = null
|
||||
pttAutoStopEnabled = false
|
||||
activePttCaptureId = null
|
||||
_isListening.value = false
|
||||
listeningMode = false
|
||||
clearListenWatchdog()
|
||||
withContext(Dispatchers.Main) {
|
||||
recognizer?.cancel()
|
||||
recognizer?.destroy()
|
||||
recognizer = null
|
||||
}
|
||||
}
|
||||
|
||||
private fun finishPushToTalk(payload: TalkPttStopPayload): TalkPttStopPayload {
|
||||
pttCompletion?.complete(payload)
|
||||
pttCompletion = null
|
||||
return payload
|
||||
}
|
||||
|
||||
private suspend fun subscribeChatIfNeeded(
|
||||
session: GatewaySession,
|
||||
sessionKey: String,
|
||||
@@ -656,20 +834,7 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun extractTextFromChatEventMessage(messageEl: JsonElement?): String? {
|
||||
val msg = messageEl?.asObjectOrNull() ?: return null
|
||||
val content = msg["content"] as? JsonArray ?: return null
|
||||
return content
|
||||
.mapNotNull { entry ->
|
||||
entry
|
||||
.asObjectOrNull()
|
||||
?.get("text")
|
||||
?.asStringOrNull()
|
||||
?.trim()
|
||||
}.filter { it.isNotEmpty() }
|
||||
.joinToString("\n")
|
||||
.takeIf { it.isNotBlank() }
|
||||
}
|
||||
private fun extractTextFromChatEventMessage(messageEl: JsonElement?): String? = ChatEventText.assistantTextFromMessage(messageEl)
|
||||
|
||||
private suspend fun waitForAssistantText(
|
||||
session: GatewaySession,
|
||||
@@ -729,17 +894,16 @@ class TalkModeManager(
|
||||
_lastAssistantText.value = cleaned
|
||||
ensurePlaybackActive(playbackToken)
|
||||
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
_statusText.value = "Generating voice…"
|
||||
_isSpeaking.value = false
|
||||
lastSpokenText = cleaned
|
||||
ensureInterruptListener()
|
||||
requestAudioFocusForTts()
|
||||
|
||||
try {
|
||||
val started = SystemClock.elapsedRealtime()
|
||||
when (val result = talkSpeakClient.synthesize(text = cleaned, directive = directive)) {
|
||||
is TalkSpeakResult.Success -> {
|
||||
ensurePlaybackActive(playbackToken)
|
||||
markAudioPlaybackStarting(playbackToken)
|
||||
talkAudioPlayer.play(result.audio)
|
||||
ensurePlaybackActive(playbackToken)
|
||||
Log.d(tag, "talk.speak ok durMs=${SystemClock.elapsedRealtime() - started}")
|
||||
@@ -789,8 +953,6 @@ class TalkModeManager(
|
||||
shouldResumeAfterSpeak = true
|
||||
onBeforeSpeak()
|
||||
ensurePlaybackActive(playbackToken)
|
||||
_isSpeaking.value = true
|
||||
_statusText.value = "Speaking…"
|
||||
block()
|
||||
} finally {
|
||||
synchronized(ttsJobLock) {
|
||||
@@ -888,6 +1050,7 @@ class TalkModeManager(
|
||||
}
|
||||
},
|
||||
)
|
||||
markAudioPlaybackStarting(playbackToken)
|
||||
val result = engine.speak(text, TextToSpeech.QUEUE_FLUSH, null, utteranceId)
|
||||
if (result != TextToSpeech.SUCCESS) {
|
||||
throw IllegalStateException("TextToSpeech start failed")
|
||||
@@ -905,6 +1068,14 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun markAudioPlaybackStarting(playbackToken: Long) {
|
||||
ensurePlaybackActive(playbackToken)
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
ensureInterruptListener()
|
||||
requestAudioFocusForTts()
|
||||
}
|
||||
|
||||
fun stopTts() {
|
||||
stopSpeaking(resetInterrupt = true)
|
||||
_isSpeaking.value = false
|
||||
|
||||
@@ -28,12 +28,19 @@ internal sealed interface TalkSpeakResult {
|
||||
) : TalkSpeakResult
|
||||
}
|
||||
|
||||
internal interface TalkSpeechSynthesizing {
|
||||
suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult
|
||||
}
|
||||
|
||||
internal class TalkSpeakClient(
|
||||
private val session: GatewaySession? = null,
|
||||
private val json: Json = Json { ignoreUnknownKeys = true },
|
||||
private val requestDetailed: (suspend (String, String, Long) -> GatewaySession.RpcResult)? = null,
|
||||
) {
|
||||
suspend fun synthesize(
|
||||
) : TalkSpeechSynthesizing {
|
||||
override suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult {
|
||||
|
||||
@@ -6,6 +6,11 @@ import ai.openclaw.app.gateway.GatewayEndpoint
|
||||
import ai.openclaw.app.gateway.GatewaySession
|
||||
import ai.openclaw.app.gateway.GatewayTlsProbeFailure
|
||||
import ai.openclaw.app.gateway.GatewayTlsProbeResult
|
||||
import ai.openclaw.app.node.InvokeDispatcher
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import ai.openclaw.app.voice.TalkModeManager
|
||||
import android.Manifest
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
@@ -15,6 +20,7 @@ import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import org.robolectric.RobolectricTestRunner
|
||||
import org.robolectric.RuntimeEnvironment
|
||||
import org.robolectric.Shadows.shadowOf
|
||||
import org.robolectric.annotation.Config
|
||||
import java.lang.reflect.Field
|
||||
import java.util.UUID
|
||||
@@ -221,6 +227,23 @@ class GatewayBootstrapAuthTest {
|
||||
assertNull(authStore.loadToken(deviceId, "operator"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun talkPttStart_cleansPreparedCaptureWhenBeginFails() =
|
||||
runBlocking {
|
||||
val app = RuntimeEnvironment.getApplication()
|
||||
shadowOf(app).grantPermissions(Manifest.permission.RECORD_AUDIO)
|
||||
val runtime = NodeRuntime(app)
|
||||
val dispatcher = readField<InvokeDispatcher>(runtime, "invokeDispatcher")
|
||||
|
||||
val result = dispatcher.handleInvoke(OpenClawTalkCommand.PttStart.rawValue, null)
|
||||
|
||||
assertEquals("UNAVAILABLE", result.error?.code)
|
||||
assertEquals(VoiceCaptureMode.Off, runtime.voiceCaptureMode.value)
|
||||
assertFalse(readField<MutableStateFlow<Boolean>>(runtime, "externalAudioCaptureActive").value)
|
||||
val talkMode = readField<Lazy<TalkModeManager>>(runtime, "talkMode\$delegate").value
|
||||
assertFalse(talkMode.ttsOnAllResponses)
|
||||
}
|
||||
|
||||
private fun waitForGatewayTrustPrompt(runtime: NodeRuntime): NodeRuntime.GatewayTrustPrompt {
|
||||
repeat(50) {
|
||||
runtime.pendingGatewayTrust.value?.let { return it }
|
||||
|
||||
@@ -12,6 +12,7 @@ import ai.openclaw.app.protocol.OpenClawNotificationsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawPhotosCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSystemCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
import org.junit.Assert.assertNotNull
|
||||
@@ -26,6 +27,7 @@ class InvokeCommandRegistryTest {
|
||||
OpenClawCapability.Device.rawValue,
|
||||
OpenClawCapability.Notifications.rawValue,
|
||||
OpenClawCapability.System.rawValue,
|
||||
OpenClawCapability.Talk.rawValue,
|
||||
OpenClawCapability.Photos.rawValue,
|
||||
OpenClawCapability.Contacts.rawValue,
|
||||
OpenClawCapability.Calendar.rawValue,
|
||||
@@ -50,6 +52,10 @@ class InvokeCommandRegistryTest {
|
||||
OpenClawNotificationsCommand.List.rawValue,
|
||||
OpenClawNotificationsCommand.Actions.rawValue,
|
||||
OpenClawSystemCommand.Notify.rawValue,
|
||||
OpenClawTalkCommand.PttStart.rawValue,
|
||||
OpenClawTalkCommand.PttStop.rawValue,
|
||||
OpenClawTalkCommand.PttCancel.rawValue,
|
||||
OpenClawTalkCommand.PttOnce.rawValue,
|
||||
OpenClawPhotosCommand.Latest.rawValue,
|
||||
OpenClawContactsCommand.Search.rawValue,
|
||||
OpenClawContactsCommand.Add.rawValue,
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package ai.openclaw.app.node
|
||||
|
||||
import ai.openclaw.app.gateway.DeviceIdentityStore
|
||||
import ai.openclaw.app.gateway.GatewaySession
|
||||
import ai.openclaw.app.protocol.OpenClawCallLogCommand
|
||||
import ai.openclaw.app.protocol.OpenClawCameraCommand
|
||||
import ai.openclaw.app.protocol.OpenClawLocationCommand
|
||||
import ai.openclaw.app.protocol.OpenClawMotionCommand
|
||||
import ai.openclaw.app.protocol.OpenClawSmsCommand
|
||||
import ai.openclaw.app.protocol.OpenClawTalkCommand
|
||||
import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
@@ -208,6 +210,27 @@ class InvokeDispatcherTest {
|
||||
assertEquals("INVALID_REQUEST: unknown command", result.error?.message)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun handleInvoke_routesTalkPttCommands() =
|
||||
runTest {
|
||||
val talk = InvokeDispatcherFakeTalkHandler()
|
||||
val dispatcher = newDispatcher(talkHandler = talk)
|
||||
|
||||
val start = dispatcher.handleInvoke(OpenClawTalkCommand.PttStart.rawValue, null)
|
||||
val stop = dispatcher.handleInvoke(OpenClawTalkCommand.PttStop.rawValue, null)
|
||||
val cancel = dispatcher.handleInvoke(OpenClawTalkCommand.PttCancel.rawValue, null)
|
||||
val once = dispatcher.handleInvoke(OpenClawTalkCommand.PttOnce.rawValue, null)
|
||||
|
||||
assertEquals("""{"captureId":"start"}""", start.payloadJson)
|
||||
assertEquals("""{"status":"stop"}""", stop.payloadJson)
|
||||
assertEquals("""{"status":"cancel"}""", cancel.payloadJson)
|
||||
assertEquals("""{"status":"once"}""", once.payloadJson)
|
||||
assertEquals(
|
||||
listOf("start", "stop", "cancel", "once"),
|
||||
talk.calls,
|
||||
)
|
||||
}
|
||||
|
||||
private fun newDispatcher(
|
||||
cameraEnabled: Boolean = false,
|
||||
locationEnabled: Boolean = false,
|
||||
@@ -219,6 +242,7 @@ class InvokeDispatcherTest {
|
||||
debugBuild: Boolean = false,
|
||||
motionActivityAvailable: Boolean = false,
|
||||
motionPedometerAvailable: Boolean = false,
|
||||
talkHandler: TalkHandler = InvokeDispatcherFakeTalkHandler(),
|
||||
): InvokeDispatcher {
|
||||
val appContext = RuntimeEnvironment.getApplication()
|
||||
shadowOf(appContext.packageManager).setSystemFeature(PackageManager.FEATURE_TELEPHONY, smsTelephonyAvailable)
|
||||
@@ -238,6 +262,7 @@ class InvokeDispatcherTest {
|
||||
stateProvider = InvokeDispatcherFakeNotificationsStateProvider(),
|
||||
),
|
||||
systemHandler = SystemHandler.forTesting(InvokeDispatcherFakeSystemNotificationPoster()),
|
||||
talkHandler = talkHandler,
|
||||
photosHandler = PhotosHandler.forTesting(appContext, InvokeDispatcherFakePhotosDataSource()),
|
||||
contactsHandler = ContactsHandler.forTesting(appContext, InvokeDispatcherFakeContactsDataSource()),
|
||||
calendarHandler = CalendarHandler.forTesting(appContext, InvokeDispatcherFakeCalendarDataSource()),
|
||||
@@ -312,6 +337,30 @@ private class InvokeDispatcherFakeSystemNotificationPoster : SystemNotificationP
|
||||
override fun post(request: SystemNotifyRequest) = Unit
|
||||
}
|
||||
|
||||
private class InvokeDispatcherFakeTalkHandler : TalkHandler {
|
||||
val calls = mutableListOf<String>()
|
||||
|
||||
override suspend fun handlePttStart(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("start")
|
||||
return GatewaySession.InvokeResult.ok("""{"captureId":"start"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttStop(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("stop")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"stop"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttCancel(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("cancel")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"cancel"}""")
|
||||
}
|
||||
|
||||
override suspend fun handlePttOnce(paramsJson: String?): GatewaySession.InvokeResult {
|
||||
calls.add("once")
|
||||
return GatewaySession.InvokeResult.ok("""{"status":"once"}""")
|
||||
}
|
||||
}
|
||||
|
||||
private class InvokeDispatcherFakePhotosDataSource : PhotosDataSource {
|
||||
override fun hasPermission(context: Context): Boolean = true
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ class OpenClawProtocolConstantsTest {
|
||||
assertEquals("canvas", OpenClawCapability.Canvas.rawValue)
|
||||
assertEquals("camera", OpenClawCapability.Camera.rawValue)
|
||||
assertEquals("voiceWake", OpenClawCapability.VoiceWake.rawValue)
|
||||
assertEquals("talk", OpenClawCapability.Talk.rawValue)
|
||||
assertEquals("location", OpenClawCapability.Location.rawValue)
|
||||
assertEquals("sms", OpenClawCapability.Sms.rawValue)
|
||||
assertEquals("device", OpenClawCapability.Device.rawValue)
|
||||
@@ -92,6 +93,14 @@ class OpenClawProtocolConstantsTest {
|
||||
assertEquals("sms.search", OpenClawSmsCommand.Search.rawValue)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun talkCommandsUseStableStrings() {
|
||||
assertEquals("talk.ptt.start", OpenClawTalkCommand.PttStart.rawValue)
|
||||
assertEquals("talk.ptt.stop", OpenClawTalkCommand.PttStop.rawValue)
|
||||
assertEquals("talk.ptt.cancel", OpenClawTalkCommand.PttCancel.rawValue)
|
||||
assertEquals("talk.ptt.once", OpenClawTalkCommand.PttOnce.rawValue)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun callLogCommandsUseStableStrings() {
|
||||
assertEquals("callLog.search", OpenClawCallLogCommand.Search.rawValue)
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertNull
|
||||
import org.junit.Test
|
||||
|
||||
class ChatEventTextTest {
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
|
||||
@Test
|
||||
fun extractsAssistantTextParts() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{ "type": "text", "text": "hello" },
|
||||
{ "type": "text", "text": "world" }
|
||||
]
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertEquals("hello\nworld", ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun extractsPlainStringContent() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "plain reply"
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertEquals("plain reply", ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun ignoresUserMessages() {
|
||||
val payload =
|
||||
payload(
|
||||
"""
|
||||
{
|
||||
"message": {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{ "type": "text", "text": "do not speak" }
|
||||
]
|
||||
}
|
||||
}
|
||||
""",
|
||||
)
|
||||
|
||||
assertNull(ChatEventText.assistantTextFromPayload(payload))
|
||||
}
|
||||
|
||||
private fun payload(source: String): JsonObject = json.parseToJsonElement(source.trimIndent()) as JsonObject
|
||||
}
|
||||
@@ -9,7 +9,10 @@ import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertFalse
|
||||
import org.junit.Assert.assertTrue
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
@@ -78,7 +81,54 @@ class TalkModeManagerTest {
|
||||
assertEquals(1L, playbackGeneration(manager).get())
|
||||
}
|
||||
|
||||
private fun createManager(): TalkModeManager {
|
||||
@Test
|
||||
fun nonPendingUserFinalDoesNotUseAllResponseTts() {
|
||||
val manager = createManager()
|
||||
|
||||
manager.ttsOnAllResponses = true
|
||||
manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-user", text = "do not speak", role = "user"))
|
||||
|
||||
assertEquals(0L, playbackGeneration(manager).get())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun textReadyDoesNotEnterSpeakingUntilAudioPlaybackStarts() =
|
||||
runTest {
|
||||
val talkSpeakClient = FakeTalkSpeechSynthesizer()
|
||||
val talkAudioPlayer = FakeTalkAudioPlayer()
|
||||
val manager = createManager(talkSpeakClient = talkSpeakClient, talkAudioPlayer = talkAudioPlayer)
|
||||
|
||||
val job = launch { manager.speakAssistantReply("hello") }
|
||||
talkSpeakClient.requested.await()
|
||||
|
||||
assertEquals("Generating voice…", manager.statusText.value)
|
||||
assertFalse(manager.isSpeaking.value)
|
||||
|
||||
talkSpeakClient.result.complete(
|
||||
TalkSpeakResult.Success(
|
||||
TalkSpeakAudio(
|
||||
bytes = byteArrayOf(1, 2, 3),
|
||||
provider = "test",
|
||||
outputFormat = "mp3_44100_128",
|
||||
voiceCompatible = true,
|
||||
mimeType = "audio/mpeg",
|
||||
fileExtension = ".mp3",
|
||||
),
|
||||
),
|
||||
)
|
||||
talkAudioPlayer.started.await()
|
||||
|
||||
assertEquals("Speaking…", manager.statusText.value)
|
||||
assertTrue(manager.isSpeaking.value)
|
||||
|
||||
talkAudioPlayer.finished.complete(Unit)
|
||||
job.join()
|
||||
}
|
||||
|
||||
private fun createManager(
|
||||
talkSpeakClient: TalkSpeechSynthesizing = TalkSpeakClient(),
|
||||
talkAudioPlayer: TalkAudioPlaying? = null,
|
||||
): TalkModeManager {
|
||||
val app = RuntimeEnvironment.getApplication()
|
||||
val sessionJob = SupervisorJob()
|
||||
val session =
|
||||
@@ -96,6 +146,8 @@ class TalkModeManagerTest {
|
||||
session = session,
|
||||
supportsChatSubscribe = false,
|
||||
isConnected = { true },
|
||||
talkSpeakClient = talkSpeakClient,
|
||||
talkAudioPlayer = talkAudioPlayer ?: TalkAudioPlayer(app),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -124,6 +176,7 @@ class TalkModeManagerTest {
|
||||
private fun chatFinalPayload(
|
||||
runId: String,
|
||||
text: String,
|
||||
role: String = "assistant",
|
||||
): String =
|
||||
"""
|
||||
{
|
||||
@@ -131,7 +184,7 @@ class TalkModeManagerTest {
|
||||
"sessionKey": "main",
|
||||
"state": "final",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"role": "$role",
|
||||
"content": [
|
||||
{ "type": "text", "text": "$text" }
|
||||
]
|
||||
@@ -140,6 +193,34 @@ class TalkModeManagerTest {
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private class FakeTalkSpeechSynthesizer : TalkSpeechSynthesizing {
|
||||
val requested = CompletableDeferred<Unit>()
|
||||
val result = CompletableDeferred<TalkSpeakResult>()
|
||||
|
||||
override suspend fun synthesize(
|
||||
text: String,
|
||||
directive: TalkDirective?,
|
||||
): TalkSpeakResult {
|
||||
requested.complete(Unit)
|
||||
return result.await()
|
||||
}
|
||||
}
|
||||
|
||||
private class FakeTalkAudioPlayer : TalkAudioPlaying {
|
||||
val started = CompletableDeferred<Unit>()
|
||||
val finished = CompletableDeferred<Unit>()
|
||||
var stopped = false
|
||||
|
||||
override suspend fun play(audio: TalkSpeakAudio) {
|
||||
started.complete(Unit)
|
||||
finished.await()
|
||||
}
|
||||
|
||||
override fun stop() {
|
||||
stopped = true
|
||||
}
|
||||
}
|
||||
|
||||
private class InMemoryDeviceAuthStore : DeviceAuthTokenStore {
|
||||
override fun loadEntry(
|
||||
deviceId: String,
|
||||
|
||||
@@ -821,6 +821,7 @@ final class GatewayConnectionController {
|
||||
if locationMode != .off { caps.append(OpenClawCapability.location.rawValue) }
|
||||
|
||||
caps.append(OpenClawCapability.device.rawValue)
|
||||
caps.append(OpenClawCapability.talk.rawValue)
|
||||
if WatchMessagingService.isSupportedOnDevice() {
|
||||
caps.append(OpenClawCapability.watch.rawValue)
|
||||
}
|
||||
|
||||
@@ -800,11 +800,11 @@ final class TalkModeManager: NSObject {
|
||||
}
|
||||
}
|
||||
let completion = await self.waitForChatCompletion(runId: runId, gateway: gateway, timeoutSeconds: 120)
|
||||
if completion == .timeout {
|
||||
if completion.state == .timeout {
|
||||
self.logger.warning(
|
||||
"chat completion timeout runId=\(runId, privacy: .public); attempting history fallback")
|
||||
GatewayDiagnostics.log("talk: chat completion timeout runId=\(runId)")
|
||||
} else if completion == .aborted {
|
||||
} else if completion.state == .aborted {
|
||||
self.statusText = "Aborted"
|
||||
self.logger.warning("chat completion aborted runId=\(runId, privacy: .public)")
|
||||
GatewayDiagnostics.log("talk: chat completion aborted runId=\(runId)")
|
||||
@@ -812,7 +812,7 @@ final class TalkModeManager: NSObject {
|
||||
await self.finishIncrementalSpeech()
|
||||
await self.start()
|
||||
return
|
||||
} else if completion == .error {
|
||||
} else if completion.state == .error {
|
||||
self.statusText = "Chat error"
|
||||
self.logger.warning("chat completion error runId=\(runId, privacy: .public)")
|
||||
GatewayDiagnostics.log("talk: chat completion error runId=\(runId)")
|
||||
@@ -822,16 +822,19 @@ final class TalkModeManager: NSObject {
|
||||
return
|
||||
}
|
||||
|
||||
var assistantText = try await self.waitForAssistantText(
|
||||
gateway: gateway,
|
||||
since: startedAt,
|
||||
timeoutSeconds: completion == .final ? 12 : 25)
|
||||
var assistantText = completion.assistantText
|
||||
if assistantText == nil, shouldIncremental {
|
||||
let fallback = self.incrementalSpeechBuffer.latestText
|
||||
if !fallback.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||
assistantText = fallback
|
||||
}
|
||||
}
|
||||
if assistantText == nil {
|
||||
assistantText = try await self.waitForAssistantTextFromHistory(
|
||||
gateway: gateway,
|
||||
since: startedAt,
|
||||
timeoutSeconds: completion.state == .final ? 12 : 25)
|
||||
}
|
||||
guard let assistantText else {
|
||||
self.statusText = "No reply"
|
||||
self.logger.warning("assistant text timeout runId=\(runId, privacy: .public)")
|
||||
@@ -898,6 +901,11 @@ final class TalkModeManager: NSObject {
|
||||
}
|
||||
}
|
||||
|
||||
private struct ChatCompletionResult {
|
||||
var state: ChatCompletionState
|
||||
var assistantText: String?
|
||||
}
|
||||
|
||||
private func sendChat(_ message: String, gateway: GatewayNodeSession) async throws -> String {
|
||||
struct SendResponse: Decodable { let runId: String }
|
||||
let payload: [String: Any] = [
|
||||
@@ -922,40 +930,51 @@ final class TalkModeManager: NSObject {
|
||||
private func waitForChatCompletion(
|
||||
runId: String,
|
||||
gateway: GatewayNodeSession,
|
||||
timeoutSeconds: Int = 120) async -> ChatCompletionState
|
||||
timeoutSeconds: Int = 120) async -> ChatCompletionResult
|
||||
{
|
||||
let stream = await gateway.subscribeServerEvents(bufferingNewest: 200)
|
||||
return await withTaskGroup(of: ChatCompletionState.self) { group in
|
||||
return await withTaskGroup(of: ChatCompletionResult.self) { group in
|
||||
group.addTask { [runId] in
|
||||
var latestAssistantText: String?
|
||||
for await evt in stream {
|
||||
if Task.isCancelled { return .timeout }
|
||||
if Task.isCancelled {
|
||||
return ChatCompletionResult(state: .timeout, assistantText: latestAssistantText)
|
||||
}
|
||||
guard evt.event == "chat", let payload = evt.payload else { continue }
|
||||
guard let chatEvent = try? GatewayPayloadDecoding.decode(payload, as: ChatEvent.self) else {
|
||||
guard let chatEvent = try? GatewayPayloadDecoding.decode(
|
||||
payload,
|
||||
as: OpenClawChatEventPayload.self)
|
||||
else {
|
||||
continue
|
||||
}
|
||||
guard chatEvent.runid == runId else { continue }
|
||||
if let state = chatEvent.state.value as? String {
|
||||
switch state {
|
||||
case "final": return .final
|
||||
case "aborted": return .aborted
|
||||
case "error": return .error
|
||||
default: break
|
||||
}
|
||||
guard chatEvent.runId == runId else { continue }
|
||||
if let text = OpenClawChatEventText.assistantText(from: chatEvent) {
|
||||
latestAssistantText = text
|
||||
}
|
||||
switch chatEvent.state {
|
||||
case "final":
|
||||
return ChatCompletionResult(state: .final, assistantText: latestAssistantText)
|
||||
case "aborted":
|
||||
return ChatCompletionResult(state: .aborted, assistantText: nil)
|
||||
case "error":
|
||||
return ChatCompletionResult(state: .error, assistantText: nil)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
return .timeout
|
||||
return ChatCompletionResult(state: .timeout, assistantText: latestAssistantText)
|
||||
}
|
||||
group.addTask {
|
||||
try? await Task.sleep(nanoseconds: UInt64(timeoutSeconds) * 1_000_000_000)
|
||||
return .timeout
|
||||
return ChatCompletionResult(state: .timeout, assistantText: nil)
|
||||
}
|
||||
let result = await group.next() ?? .timeout
|
||||
let result = await group.next() ?? ChatCompletionResult(state: .timeout, assistantText: nil)
|
||||
group.cancelAll()
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
private func waitForAssistantText(
|
||||
private func waitForAssistantTextFromHistory(
|
||||
gateway: GatewayNodeSession,
|
||||
since: Double,
|
||||
timeoutSeconds: Int) async throws -> String?
|
||||
|
||||
@@ -36,6 +36,7 @@ import UIKit
|
||||
#expect(caps.contains(OpenClawCapability.camera.rawValue))
|
||||
#expect(caps.contains(OpenClawCapability.location.rawValue))
|
||||
#expect(caps.contains(OpenClawCapability.voiceWake.rawValue))
|
||||
#expect(caps.contains(OpenClawCapability.talk.rawValue))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -395,10 +395,18 @@ actor TalkModeRuntime {
|
||||
"talk chat.send ok runId=\(response.runId, privacy: .public) " +
|
||||
"session=\(sessionKey, privacy: .public)")
|
||||
|
||||
guard let assistantText = await self.waitForAssistantText(
|
||||
var assistantText = await self.waitForAssistantEventText(
|
||||
sessionKey: sessionKey,
|
||||
since: startedAt,
|
||||
runId: response.runId,
|
||||
timeoutSeconds: 45)
|
||||
if assistantText == nil {
|
||||
self.logger.warning("talk assistant event text missing; using history fallback")
|
||||
assistantText = await self.waitForAssistantTextFromHistory(
|
||||
sessionKey: sessionKey,
|
||||
since: startedAt,
|
||||
timeoutSeconds: 12)
|
||||
}
|
||||
guard let assistantText
|
||||
else {
|
||||
self.logger.warning("talk assistant text missing after timeout")
|
||||
await self.startListening()
|
||||
@@ -439,7 +447,67 @@ actor TalkModeRuntime {
|
||||
return TalkPromptBuilder.build(transcript: transcript, interruptedAtSeconds: interrupted)
|
||||
}
|
||||
|
||||
private func waitForAssistantText(
|
||||
private func waitForAssistantEventText(
|
||||
sessionKey: String,
|
||||
runId: String,
|
||||
timeoutSeconds: Int) async -> String?
|
||||
{
|
||||
let stream = await GatewayConnection.shared.subscribe(bufferingNewest: 200)
|
||||
return await withTaskGroup(of: String?.self) { group in
|
||||
group.addTask { [runId, sessionKey] in
|
||||
var latestText: String?
|
||||
for await push in stream {
|
||||
if Task.isCancelled { return latestText }
|
||||
guard case let .event(evt) = push else { continue }
|
||||
guard evt.event == "chat", let payload = evt.payload else { continue }
|
||||
guard let chatEvent = try? GatewayPayloadDecoding.decode(
|
||||
payload,
|
||||
as: OpenClawChatEventPayload.self)
|
||||
else {
|
||||
continue
|
||||
}
|
||||
guard chatEvent.runId == runId else { continue }
|
||||
if let eventSessionKey = chatEvent.sessionKey,
|
||||
!Self.matchesSessionKey(eventSessionKey, sessionKey)
|
||||
{
|
||||
continue
|
||||
}
|
||||
if let text = OpenClawChatEventText.assistantText(from: chatEvent) {
|
||||
latestText = text
|
||||
}
|
||||
switch chatEvent.state {
|
||||
case "final":
|
||||
return latestText
|
||||
case "aborted", "error":
|
||||
return nil
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
return latestText
|
||||
}
|
||||
group.addTask {
|
||||
try? await Task.sleep(nanoseconds: UInt64(timeoutSeconds) * 1_000_000_000)
|
||||
return nil
|
||||
}
|
||||
guard let result = await group.next() else {
|
||||
group.cancelAll()
|
||||
return nil
|
||||
}
|
||||
group.cancelAll()
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
private static func matchesSessionKey(_ incoming: String, _ current: String) -> Bool {
|
||||
let incoming = incoming.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
let current = current.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
if incoming == current { return true }
|
||||
return (incoming == "agent:main:main" && current == "main") ||
|
||||
(incoming == "main" && current == "agent:main:main")
|
||||
}
|
||||
|
||||
private func waitForAssistantTextFromHistory(
|
||||
sessionKey: String,
|
||||
since: Double,
|
||||
timeoutSeconds: Int) async -> String?
|
||||
@@ -1111,7 +1179,10 @@ extension TalkModeRuntime {
|
||||
} else {
|
||||
self.ttsLogger
|
||||
.info(
|
||||
"talk provider \(parsed.activeProvider, privacy: .public) uses gateway talk.speak with system voice fallback")
|
||||
"""
|
||||
talk provider \(parsed.activeProvider, privacy: .public) uses gateway talk.speak \
|
||||
with system voice fallback
|
||||
""")
|
||||
}
|
||||
return parsed
|
||||
} catch {
|
||||
|
||||
@@ -2630,6 +2630,116 @@ public struct TalkModeParams: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkEvent: Codable, Sendable {
|
||||
public let id: String
|
||||
public let type: AnyCodable
|
||||
public let sessionid: String
|
||||
public let turnid: String?
|
||||
public let captureid: String?
|
||||
public let seq: Int
|
||||
public let timestamp: String
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let provider: String?
|
||||
public let final: Bool?
|
||||
public let callid: String?
|
||||
public let itemid: String?
|
||||
public let parentid: String?
|
||||
public let payload: AnyCodable
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
type: AnyCodable,
|
||||
sessionid: String,
|
||||
turnid: String?,
|
||||
captureid: String?,
|
||||
seq: Int,
|
||||
timestamp: String,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
provider: String?,
|
||||
final: Bool?,
|
||||
callid: String?,
|
||||
itemid: String?,
|
||||
parentid: String?,
|
||||
payload: AnyCodable)
|
||||
{
|
||||
self.id = id
|
||||
self.type = type
|
||||
self.sessionid = sessionid
|
||||
self.turnid = turnid
|
||||
self.captureid = captureid
|
||||
self.seq = seq
|
||||
self.timestamp = timestamp
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.provider = provider
|
||||
self.final = final
|
||||
self.callid = callid
|
||||
self.itemid = itemid
|
||||
self.parentid = parentid
|
||||
self.payload = payload
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case type
|
||||
case sessionid = "sessionId"
|
||||
case turnid = "turnId"
|
||||
case captureid = "captureId"
|
||||
case seq
|
||||
case timestamp
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case provider
|
||||
case final
|
||||
case callid = "callId"
|
||||
case itemid = "itemId"
|
||||
case parentid = "parentId"
|
||||
case payload
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkCatalogParams: Codable, Sendable {}
|
||||
|
||||
public struct TalkCatalogResult: Codable, Sendable {
|
||||
public let modes: [AnyCodable]
|
||||
public let transports: [AnyCodable]
|
||||
public let brains: [AnyCodable]
|
||||
public let speech: [String: AnyCodable]
|
||||
public let transcription: [String: AnyCodable]
|
||||
public let realtime: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
modes: [AnyCodable],
|
||||
transports: [AnyCodable],
|
||||
brains: [AnyCodable],
|
||||
speech: [String: AnyCodable],
|
||||
transcription: [String: AnyCodable],
|
||||
realtime: [String: AnyCodable])
|
||||
{
|
||||
self.modes = modes
|
||||
self.transports = transports
|
||||
self.brains = brains
|
||||
self.speech = speech
|
||||
self.transcription = transcription
|
||||
self.realtime = realtime
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case modes
|
||||
case transports
|
||||
case brains
|
||||
case speech
|
||||
case transcription
|
||||
case realtime
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkConfigParams: Codable, Sendable {
|
||||
public let includesecrets: Bool?
|
||||
|
||||
@@ -2658,22 +2768,383 @@ public struct TalkConfigResult: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffCreateParams: Codable, Sendable {
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable?
|
||||
public let transport: AnyCodable?
|
||||
public let brain: AnyCodable?
|
||||
public let ttlms: Int?
|
||||
|
||||
public init(
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable?,
|
||||
transport: AnyCodable?,
|
||||
brain: AnyCodable?,
|
||||
ttlms: Int?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.ttlms = ttlms
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case ttlms = "ttlMs"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffCreateResult: Codable, Sendable {
|
||||
public let id: String
|
||||
public let roomid: String
|
||||
public let roomurl: String
|
||||
public let token: String
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let createdat: Double
|
||||
public let expiresat: Double
|
||||
public let room: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
roomid: String,
|
||||
roomurl: String,
|
||||
token: String,
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
createdat: Double,
|
||||
expiresat: Double,
|
||||
room: [String: AnyCodable])
|
||||
{
|
||||
self.id = id
|
||||
self.roomid = roomid
|
||||
self.roomurl = roomurl
|
||||
self.token = token
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.createdat = createdat
|
||||
self.expiresat = expiresat
|
||||
self.room = room
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case roomid = "roomId"
|
||||
case roomurl = "roomUrl"
|
||||
case token
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case createdat = "createdAt"
|
||||
case expiresat = "expiresAt"
|
||||
case room
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffJoinParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffJoinResult: Codable, Sendable {
|
||||
public let id: String
|
||||
public let roomid: String
|
||||
public let roomurl: String
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let createdat: Double
|
||||
public let expiresat: Double
|
||||
public let room: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
roomid: String,
|
||||
roomurl: String,
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
createdat: Double,
|
||||
expiresat: Double,
|
||||
room: [String: AnyCodable])
|
||||
{
|
||||
self.id = id
|
||||
self.roomid = roomid
|
||||
self.roomurl = roomurl
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.createdat = createdat
|
||||
self.expiresat = expiresat
|
||||
self.room = room
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case roomid = "roomId"
|
||||
case roomurl = "roomUrl"
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case createdat = "createdAt"
|
||||
case expiresat = "expiresAt"
|
||||
case room
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffRevokeParams: Codable, Sendable {
|
||||
public let id: String
|
||||
|
||||
public init(
|
||||
id: String)
|
||||
{
|
||||
self.id = id
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffRevokeResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
public let revoked: Bool
|
||||
|
||||
public init(
|
||||
ok: Bool,
|
||||
revoked: Bool)
|
||||
{
|
||||
self.ok = ok
|
||||
self.revoked = revoked
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
case revoked
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnStartParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnEndParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnCancelParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?,
|
||||
reason: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
public let record: TalkHandoffJoinResult
|
||||
public let turnid: String
|
||||
public let events: [TalkEvent]
|
||||
|
||||
public init(
|
||||
ok: Bool,
|
||||
record: TalkHandoffJoinResult,
|
||||
turnid: String,
|
||||
events: [TalkEvent])
|
||||
{
|
||||
self.ok = ok
|
||||
self.record = record
|
||||
self.turnid = turnid
|
||||
self.events = events
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
case record
|
||||
case turnid = "turnId"
|
||||
case events
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeSessionParams: Codable, Sendable {
|
||||
public let sessionkey: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable?
|
||||
public let transport: AnyCodable?
|
||||
public let brain: AnyCodable?
|
||||
|
||||
public init(
|
||||
sessionkey: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?)
|
||||
voice: String?,
|
||||
mode: AnyCodable?,
|
||||
transport: AnyCodable?,
|
||||
brain: AnyCodable?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
@@ -2681,6 +3152,9 @@ public struct TalkRealtimeSessionParams: Codable, Sendable {
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2706,6 +3180,24 @@ public struct TalkRealtimeRelayAudioParams: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeRelayCancelParams: Codable, Sendable {
|
||||
public let relaysessionid: String
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
relaysessionid: String,
|
||||
reason: String?)
|
||||
{
|
||||
self.relaysessionid = relaysessionid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case relaysessionid = "relaySessionId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeRelayMarkParams: Codable, Sendable {
|
||||
public let relaysessionid: String
|
||||
public let markname: String?
|
||||
@@ -2774,6 +3266,166 @@ public struct TalkRealtimeRelayOkResult: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeToolCallParams: Codable, Sendable {
|
||||
public let sessionkey: String
|
||||
public let callid: String
|
||||
public let name: String
|
||||
public let args: AnyCodable?
|
||||
public let relaysessionid: String?
|
||||
|
||||
public init(
|
||||
sessionkey: String,
|
||||
callid: String,
|
||||
name: String,
|
||||
args: AnyCodable?,
|
||||
relaysessionid: String?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.callid = callid
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.relaysessionid = relaysessionid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case sessionkey = "sessionKey"
|
||||
case callid = "callId"
|
||||
case name
|
||||
case args
|
||||
case relaysessionid = "relaySessionId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeToolCallResult: Codable, Sendable {
|
||||
public let runid: String
|
||||
public let idempotencykey: String
|
||||
|
||||
public init(
|
||||
runid: String,
|
||||
idempotencykey: String)
|
||||
{
|
||||
self.runid = runid
|
||||
self.idempotencykey = idempotencykey
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case runid = "runId"
|
||||
case idempotencykey = "idempotencyKey"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionSessionParams: Codable, Sendable {
|
||||
public let provider: String?
|
||||
|
||||
public init(
|
||||
provider: String?)
|
||||
{
|
||||
self.provider = provider
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case provider
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionSessionResult: Codable, Sendable {
|
||||
public let provider: String
|
||||
public let mode: String
|
||||
public let transport: String
|
||||
public let transcriptionsessionid: String
|
||||
public let audio: [String: AnyCodable]
|
||||
public let expiresat: Double
|
||||
|
||||
public init(
|
||||
provider: String,
|
||||
mode: String,
|
||||
transport: String,
|
||||
transcriptionsessionid: String,
|
||||
audio: [String: AnyCodable],
|
||||
expiresat: Double)
|
||||
{
|
||||
self.provider = provider
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.audio = audio
|
||||
self.expiresat = expiresat
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case provider
|
||||
case mode
|
||||
case transport
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case audio
|
||||
case expiresat = "expiresAt"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayAudioParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
public let audiobase64: String
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String,
|
||||
audiobase64: String)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.audiobase64 = audiobase64
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case audiobase64 = "audioBase64"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayCancelParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String,
|
||||
reason: String?)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayStopParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayOkResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
|
||||
public init(
|
||||
ok: Bool)
|
||||
{
|
||||
self.ok = ok
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkSpeakParams: Codable, Sendable {
|
||||
public let text: String
|
||||
public let voiceid: String?
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
import OpenClawKit
|
||||
|
||||
public enum OpenClawChatEventText {
|
||||
public static func assistantText(from event: OpenClawChatEventPayload) -> String? {
|
||||
self.assistantText(fromMessage: event.message)
|
||||
}
|
||||
|
||||
public static func assistantText(fromMessage message: AnyCodable?) -> String? {
|
||||
guard let message else { return nil }
|
||||
return self.assistantText(fromValue: message.value)
|
||||
}
|
||||
|
||||
private static func assistantText(fromValue value: Any) -> String? {
|
||||
if let text = value as? String {
|
||||
return self.trimmed(text)
|
||||
}
|
||||
|
||||
guard let object = self.dictionary(from: value) else { return nil }
|
||||
if let role = self.stringValue(object["role"])?.trimmingCharacters(in: .whitespacesAndNewlines),
|
||||
!role.isEmpty,
|
||||
role.lowercased() != "assistant"
|
||||
{
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let content = object["content"] else { return nil }
|
||||
return self.textContent(from: content)
|
||||
}
|
||||
|
||||
private static func textContent(from value: Any) -> String? {
|
||||
if let text = value as? String {
|
||||
return self.trimmed(text)
|
||||
}
|
||||
|
||||
let parts: [String] = if let array = value as? [AnyCodable] {
|
||||
array.compactMap { self.textContentPart(from: $0.value) }
|
||||
} else if let array = value as? [Any] {
|
||||
array.compactMap { self.textContentPart(from: $0) }
|
||||
} else {
|
||||
self.textContentPart(from: value).map { [$0] } ?? []
|
||||
}
|
||||
|
||||
return self.trimmed(parts.joined(separator: "\n"))
|
||||
}
|
||||
|
||||
private static func textContentPart(from value: Any) -> String? {
|
||||
if let text = value as? String {
|
||||
return self.trimmed(text)
|
||||
}
|
||||
guard let object = self.dictionary(from: value) else { return nil }
|
||||
return self.trimmed(self.stringValue(object["text"]) ?? "")
|
||||
}
|
||||
|
||||
private static func dictionary(from value: Any) -> [String: Any]? {
|
||||
if let dict = value as? [String: AnyCodable] {
|
||||
return dict.mapValues(\.value)
|
||||
}
|
||||
if let dict = value as? [String: Any] {
|
||||
return dict
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private static func stringValue(_ value: Any?) -> String? {
|
||||
if let string = value as? String {
|
||||
return string
|
||||
}
|
||||
if let wrapped = value as? AnyCodable {
|
||||
return self.stringValue(wrapped.value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
private static func trimmed(_ text: String) -> String? {
|
||||
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ public enum OpenClawCapability: String, Codable, Sendable {
|
||||
case camera
|
||||
case screen
|
||||
case voiceWake
|
||||
case talk
|
||||
case location
|
||||
case device
|
||||
case watch
|
||||
|
||||
@@ -2630,6 +2630,116 @@ public struct TalkModeParams: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkEvent: Codable, Sendable {
|
||||
public let id: String
|
||||
public let type: AnyCodable
|
||||
public let sessionid: String
|
||||
public let turnid: String?
|
||||
public let captureid: String?
|
||||
public let seq: Int
|
||||
public let timestamp: String
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let provider: String?
|
||||
public let final: Bool?
|
||||
public let callid: String?
|
||||
public let itemid: String?
|
||||
public let parentid: String?
|
||||
public let payload: AnyCodable
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
type: AnyCodable,
|
||||
sessionid: String,
|
||||
turnid: String?,
|
||||
captureid: String?,
|
||||
seq: Int,
|
||||
timestamp: String,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
provider: String?,
|
||||
final: Bool?,
|
||||
callid: String?,
|
||||
itemid: String?,
|
||||
parentid: String?,
|
||||
payload: AnyCodable)
|
||||
{
|
||||
self.id = id
|
||||
self.type = type
|
||||
self.sessionid = sessionid
|
||||
self.turnid = turnid
|
||||
self.captureid = captureid
|
||||
self.seq = seq
|
||||
self.timestamp = timestamp
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.provider = provider
|
||||
self.final = final
|
||||
self.callid = callid
|
||||
self.itemid = itemid
|
||||
self.parentid = parentid
|
||||
self.payload = payload
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case type
|
||||
case sessionid = "sessionId"
|
||||
case turnid = "turnId"
|
||||
case captureid = "captureId"
|
||||
case seq
|
||||
case timestamp
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case provider
|
||||
case final
|
||||
case callid = "callId"
|
||||
case itemid = "itemId"
|
||||
case parentid = "parentId"
|
||||
case payload
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkCatalogParams: Codable, Sendable {}
|
||||
|
||||
public struct TalkCatalogResult: Codable, Sendable {
|
||||
public let modes: [AnyCodable]
|
||||
public let transports: [AnyCodable]
|
||||
public let brains: [AnyCodable]
|
||||
public let speech: [String: AnyCodable]
|
||||
public let transcription: [String: AnyCodable]
|
||||
public let realtime: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
modes: [AnyCodable],
|
||||
transports: [AnyCodable],
|
||||
brains: [AnyCodable],
|
||||
speech: [String: AnyCodable],
|
||||
transcription: [String: AnyCodable],
|
||||
realtime: [String: AnyCodable])
|
||||
{
|
||||
self.modes = modes
|
||||
self.transports = transports
|
||||
self.brains = brains
|
||||
self.speech = speech
|
||||
self.transcription = transcription
|
||||
self.realtime = realtime
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case modes
|
||||
case transports
|
||||
case brains
|
||||
case speech
|
||||
case transcription
|
||||
case realtime
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkConfigParams: Codable, Sendable {
|
||||
public let includesecrets: Bool?
|
||||
|
||||
@@ -2658,22 +2768,383 @@ public struct TalkConfigResult: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffCreateParams: Codable, Sendable {
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable?
|
||||
public let transport: AnyCodable?
|
||||
public let brain: AnyCodable?
|
||||
public let ttlms: Int?
|
||||
|
||||
public init(
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable?,
|
||||
transport: AnyCodable?,
|
||||
brain: AnyCodable?,
|
||||
ttlms: Int?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.ttlms = ttlms
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case ttlms = "ttlMs"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffCreateResult: Codable, Sendable {
|
||||
public let id: String
|
||||
public let roomid: String
|
||||
public let roomurl: String
|
||||
public let token: String
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let createdat: Double
|
||||
public let expiresat: Double
|
||||
public let room: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
roomid: String,
|
||||
roomurl: String,
|
||||
token: String,
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
createdat: Double,
|
||||
expiresat: Double,
|
||||
room: [String: AnyCodable])
|
||||
{
|
||||
self.id = id
|
||||
self.roomid = roomid
|
||||
self.roomurl = roomurl
|
||||
self.token = token
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.createdat = createdat
|
||||
self.expiresat = expiresat
|
||||
self.room = room
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case roomid = "roomId"
|
||||
case roomurl = "roomUrl"
|
||||
case token
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case createdat = "createdAt"
|
||||
case expiresat = "expiresAt"
|
||||
case room
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffJoinParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffJoinResult: Codable, Sendable {
|
||||
public let id: String
|
||||
public let roomid: String
|
||||
public let roomurl: String
|
||||
public let sessionkey: String
|
||||
public let sessionid: String?
|
||||
public let channel: String?
|
||||
public let target: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable
|
||||
public let transport: AnyCodable
|
||||
public let brain: AnyCodable
|
||||
public let createdat: Double
|
||||
public let expiresat: Double
|
||||
public let room: [String: AnyCodable]
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
roomid: String,
|
||||
roomurl: String,
|
||||
sessionkey: String,
|
||||
sessionid: String?,
|
||||
channel: String?,
|
||||
target: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?,
|
||||
mode: AnyCodable,
|
||||
transport: AnyCodable,
|
||||
brain: AnyCodable,
|
||||
createdat: Double,
|
||||
expiresat: Double,
|
||||
room: [String: AnyCodable])
|
||||
{
|
||||
self.id = id
|
||||
self.roomid = roomid
|
||||
self.roomurl = roomurl
|
||||
self.sessionkey = sessionkey
|
||||
self.sessionid = sessionid
|
||||
self.channel = channel
|
||||
self.target = target
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
self.createdat = createdat
|
||||
self.expiresat = expiresat
|
||||
self.room = room
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case roomid = "roomId"
|
||||
case roomurl = "roomUrl"
|
||||
case sessionkey = "sessionKey"
|
||||
case sessionid = "sessionId"
|
||||
case channel
|
||||
case target
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
case createdat = "createdAt"
|
||||
case expiresat = "expiresAt"
|
||||
case room
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffRevokeParams: Codable, Sendable {
|
||||
public let id: String
|
||||
|
||||
public init(
|
||||
id: String)
|
||||
{
|
||||
self.id = id
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffRevokeResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
public let revoked: Bool
|
||||
|
||||
public init(
|
||||
ok: Bool,
|
||||
revoked: Bool)
|
||||
{
|
||||
self.ok = ok
|
||||
self.revoked = revoked
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
case revoked
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnStartParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnEndParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnCancelParams: Codable, Sendable {
|
||||
public let id: String
|
||||
public let token: String
|
||||
public let turnid: String?
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
id: String,
|
||||
token: String,
|
||||
turnid: String?,
|
||||
reason: String?)
|
||||
{
|
||||
self.id = id
|
||||
self.token = token
|
||||
self.turnid = turnid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case id
|
||||
case token
|
||||
case turnid = "turnId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkHandoffTurnResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
public let record: TalkHandoffJoinResult
|
||||
public let turnid: String
|
||||
public let events: [TalkEvent]
|
||||
|
||||
public init(
|
||||
ok: Bool,
|
||||
record: TalkHandoffJoinResult,
|
||||
turnid: String,
|
||||
events: [TalkEvent])
|
||||
{
|
||||
self.ok = ok
|
||||
self.record = record
|
||||
self.turnid = turnid
|
||||
self.events = events
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
case record
|
||||
case turnid = "turnId"
|
||||
case events
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeSessionParams: Codable, Sendable {
|
||||
public let sessionkey: String?
|
||||
public let provider: String?
|
||||
public let model: String?
|
||||
public let voice: String?
|
||||
public let mode: AnyCodable?
|
||||
public let transport: AnyCodable?
|
||||
public let brain: AnyCodable?
|
||||
|
||||
public init(
|
||||
sessionkey: String?,
|
||||
provider: String?,
|
||||
model: String?,
|
||||
voice: String?)
|
||||
voice: String?,
|
||||
mode: AnyCodable?,
|
||||
transport: AnyCodable?,
|
||||
brain: AnyCodable?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.provider = provider
|
||||
self.model = model
|
||||
self.voice = voice
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.brain = brain
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
@@ -2681,6 +3152,9 @@ public struct TalkRealtimeSessionParams: Codable, Sendable {
|
||||
case provider
|
||||
case model
|
||||
case voice
|
||||
case mode
|
||||
case transport
|
||||
case brain
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2706,6 +3180,24 @@ public struct TalkRealtimeRelayAudioParams: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeRelayCancelParams: Codable, Sendable {
|
||||
public let relaysessionid: String
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
relaysessionid: String,
|
||||
reason: String?)
|
||||
{
|
||||
self.relaysessionid = relaysessionid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case relaysessionid = "relaySessionId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeRelayMarkParams: Codable, Sendable {
|
||||
public let relaysessionid: String
|
||||
public let markname: String?
|
||||
@@ -2774,6 +3266,166 @@ public struct TalkRealtimeRelayOkResult: Codable, Sendable {
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeToolCallParams: Codable, Sendable {
|
||||
public let sessionkey: String
|
||||
public let callid: String
|
||||
public let name: String
|
||||
public let args: AnyCodable?
|
||||
public let relaysessionid: String?
|
||||
|
||||
public init(
|
||||
sessionkey: String,
|
||||
callid: String,
|
||||
name: String,
|
||||
args: AnyCodable?,
|
||||
relaysessionid: String?)
|
||||
{
|
||||
self.sessionkey = sessionkey
|
||||
self.callid = callid
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.relaysessionid = relaysessionid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case sessionkey = "sessionKey"
|
||||
case callid = "callId"
|
||||
case name
|
||||
case args
|
||||
case relaysessionid = "relaySessionId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkRealtimeToolCallResult: Codable, Sendable {
|
||||
public let runid: String
|
||||
public let idempotencykey: String
|
||||
|
||||
public init(
|
||||
runid: String,
|
||||
idempotencykey: String)
|
||||
{
|
||||
self.runid = runid
|
||||
self.idempotencykey = idempotencykey
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case runid = "runId"
|
||||
case idempotencykey = "idempotencyKey"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionSessionParams: Codable, Sendable {
|
||||
public let provider: String?
|
||||
|
||||
public init(
|
||||
provider: String?)
|
||||
{
|
||||
self.provider = provider
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case provider
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionSessionResult: Codable, Sendable {
|
||||
public let provider: String
|
||||
public let mode: String
|
||||
public let transport: String
|
||||
public let transcriptionsessionid: String
|
||||
public let audio: [String: AnyCodable]
|
||||
public let expiresat: Double
|
||||
|
||||
public init(
|
||||
provider: String,
|
||||
mode: String,
|
||||
transport: String,
|
||||
transcriptionsessionid: String,
|
||||
audio: [String: AnyCodable],
|
||||
expiresat: Double)
|
||||
{
|
||||
self.provider = provider
|
||||
self.mode = mode
|
||||
self.transport = transport
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.audio = audio
|
||||
self.expiresat = expiresat
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case provider
|
||||
case mode
|
||||
case transport
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case audio
|
||||
case expiresat = "expiresAt"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayAudioParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
public let audiobase64: String
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String,
|
||||
audiobase64: String)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.audiobase64 = audiobase64
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case audiobase64 = "audioBase64"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayCancelParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
public let reason: String?
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String,
|
||||
reason: String?)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
self.reason = reason
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
case reason
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayStopParams: Codable, Sendable {
|
||||
public let transcriptionsessionid: String
|
||||
|
||||
public init(
|
||||
transcriptionsessionid: String)
|
||||
{
|
||||
self.transcriptionsessionid = transcriptionsessionid
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case transcriptionsessionid = "transcriptionSessionId"
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkTranscriptionRelayOkResult: Codable, Sendable {
|
||||
public let ok: Bool
|
||||
|
||||
public init(
|
||||
ok: Bool)
|
||||
{
|
||||
self.ok = ok
|
||||
}
|
||||
|
||||
private enum CodingKeys: String, CodingKey {
|
||||
case ok
|
||||
}
|
||||
}
|
||||
|
||||
public struct TalkSpeakParams: Codable, Sendable {
|
||||
public let text: String
|
||||
public let voiceid: String?
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
import OpenClawKit
|
||||
import Testing
|
||||
@testable import OpenClawChatUI
|
||||
|
||||
struct ChatEventTextTests {
|
||||
@Test func `extracts assistant text from final chat event message`() {
|
||||
let event = OpenClawChatEventPayload(
|
||||
runId: "run-1",
|
||||
sessionKey: "main",
|
||||
state: "final",
|
||||
message: AnyCodable([
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
["type": "text", "text": "hello"],
|
||||
["type": "text", "text": "world"],
|
||||
],
|
||||
]),
|
||||
errorMessage: nil)
|
||||
|
||||
#expect(OpenClawChatEventText.assistantText(from: event) == "hello\nworld")
|
||||
}
|
||||
|
||||
@Test func `ignores user messages`() {
|
||||
let event = OpenClawChatEventPayload(
|
||||
runId: "run-1",
|
||||
sessionKey: "main",
|
||||
state: "delta",
|
||||
message: AnyCodable([
|
||||
"role": "user",
|
||||
"content": [["type": "text", "text": "ignore me"]],
|
||||
]),
|
||||
errorMessage: nil)
|
||||
|
||||
#expect(OpenClawChatEventText.assistantText(from: event) == nil)
|
||||
}
|
||||
|
||||
@Test func `extracts plain string content`() {
|
||||
let event = OpenClawChatEventPayload(
|
||||
runId: "run-1",
|
||||
sessionKey: "main",
|
||||
state: "final",
|
||||
message: AnyCodable([
|
||||
"role": "assistant",
|
||||
"content": "plain reply",
|
||||
]),
|
||||
errorMessage: nil)
|
||||
|
||||
#expect(OpenClawChatEventText.assistantText(from: event) == "plain reply")
|
||||
}
|
||||
}
|
||||
@@ -534,6 +534,7 @@ describeLive("android node capability integration (preconditioned)", () => {
|
||||
const allowlist = resolveNodeCommandAllowlist(cfg, {
|
||||
platform: target.platform,
|
||||
deviceFamily: target.deviceFamily,
|
||||
commands,
|
||||
});
|
||||
|
||||
commandsToRun = commands.filter(
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { normalizeDeclaredNodeCommands } from "./node-command-policy.js";
|
||||
import type { OpenClawConfig } from "../config/types.openclaw.js";
|
||||
import {
|
||||
isNodeCommandAllowed,
|
||||
normalizeDeclaredNodeCommands,
|
||||
resolveNodeCommandAllowlist,
|
||||
} from "./node-command-policy.js";
|
||||
|
||||
describe("gateway/node-command-policy", () => {
|
||||
it("normalizes declared node commands against the allowlist", () => {
|
||||
@@ -11,4 +16,43 @@ describe("gateway/node-command-policy", () => {
|
||||
}),
|
||||
).toEqual(["canvas.snapshot", "system.run"]);
|
||||
});
|
||||
|
||||
it("allows declared push-to-talk commands on trusted talk-capable nodes", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
for (const platform of ["ios", "android", "macos", "other"]) {
|
||||
const allowlist = resolveNodeCommandAllowlist(cfg, { platform, caps: ["talk"] });
|
||||
expect(allowlist.has("talk.ptt.start")).toBe(true);
|
||||
expect(allowlist.has("talk.ptt.stop")).toBe(true);
|
||||
expect(allowlist.has("talk.ptt.cancel")).toBe(true);
|
||||
expect(allowlist.has("talk.ptt.once")).toBe(true);
|
||||
expect(
|
||||
isNodeCommandAllowed({
|
||||
command: "talk.ptt.start",
|
||||
declaredCommands: ["talk.ptt.start"],
|
||||
allowlist,
|
||||
}),
|
||||
).toEqual({ ok: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("does not allow push-to-talk commands from platform label alone", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
const allowlist = resolveNodeCommandAllowlist(cfg, {
|
||||
platform: "android",
|
||||
caps: ["device"],
|
||||
commands: [],
|
||||
});
|
||||
|
||||
expect(allowlist.has("talk.ptt.start")).toBe(false);
|
||||
});
|
||||
|
||||
it("allows push-to-talk commands when the node declares talk command support", () => {
|
||||
const cfg = {} as OpenClawConfig;
|
||||
const allowlist = resolveNodeCommandAllowlist(cfg, {
|
||||
platform: "custom",
|
||||
commands: ["talk.ptt.start"],
|
||||
});
|
||||
|
||||
expect(allowlist.has("talk.ptt.start")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
NODE_SYSTEM_RUN_COMMANDS,
|
||||
} from "../infra/node-commands.js";
|
||||
import { getActiveRuntimePluginRegistry } from "../plugins/active-runtime-registry.js";
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
import { normalizeDeviceMetadataForPolicy } from "./device-metadata-normalization.js";
|
||||
import type { NodeSession } from "./node-registry.js";
|
||||
|
||||
@@ -49,6 +50,8 @@ const MOTION_COMMANDS = ["motion.activity", "motion.pedometer"];
|
||||
|
||||
const SMS_DANGEROUS_COMMANDS = ["sms.send", "sms.search"];
|
||||
|
||||
const TALK_PTT_COMMANDS = ["talk.ptt.start", "talk.ptt.stop", "talk.ptt.cancel", "talk.ptt.once"];
|
||||
|
||||
// iOS nodes don't implement system.run/which, but they do support notifications.
|
||||
const IOS_SYSTEM_COMMANDS = [NODE_SYSTEM_NOTIFY_COMMAND];
|
||||
|
||||
@@ -197,17 +200,35 @@ export function listDangerousPluginNodeCommands(): string[] {
|
||||
return [...new Set(commands.map((command) => command.trim()).filter(Boolean))];
|
||||
}
|
||||
|
||||
type NodeCommandPolicyNode = Pick<NodeSession, "platform" | "deviceFamily"> &
|
||||
Partial<Pick<NodeSession, "caps" | "commands">>;
|
||||
|
||||
function hasTalkSurface(node?: NodeCommandPolicyNode): boolean {
|
||||
if (!node) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
(node.caps ?? []).some(
|
||||
(capability) => normalizeOptionalLowercaseString(capability) === "talk",
|
||||
) ||
|
||||
(node.commands ?? []).some((command) =>
|
||||
normalizeOptionalLowercaseString(command)?.startsWith("talk."),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveNodeCommandAllowlist(
|
||||
cfg: OpenClawConfig,
|
||||
node?: Pick<NodeSession, "platform" | "deviceFamily">,
|
||||
node?: NodeCommandPolicyNode,
|
||||
): Set<string> {
|
||||
const platformId = normalizePlatformId(node?.platform, node?.deviceFamily);
|
||||
const base = PLATFORM_DEFAULTS[platformId] ?? PLATFORM_DEFAULTS.unknown;
|
||||
const talkCommands = hasTalkSurface(node) ? TALK_PTT_COMMANDS : [];
|
||||
const extra = cfg.gateway?.nodes?.allowCommands ?? [];
|
||||
const deny = new Set(cfg.gateway?.nodes?.denyCommands ?? []);
|
||||
const dangerousPluginCommands = new Set(listDangerousPluginNodeCommands());
|
||||
const allow = new Set(
|
||||
[...base, ...extra]
|
||||
[...base, ...talkCommands, ...extra]
|
||||
.map((cmd) => cmd.trim())
|
||||
.filter((cmd) => cmd && !dangerousPluginCommands.has(cmd)),
|
||||
);
|
||||
|
||||
@@ -62,6 +62,8 @@ export async function reconcileNodePairingOnConnect(params: {
|
||||
const allowlist = resolveNodeCommandAllowlist(params.cfg, {
|
||||
platform: params.connectParams.client.platform,
|
||||
deviceFamily: params.connectParams.client.deviceFamily,
|
||||
caps: params.connectParams.caps,
|
||||
commands: params.connectParams.commands,
|
||||
});
|
||||
const declared = normalizeDeclaredNodeCommands({
|
||||
declaredCommands: Array.isArray(params.connectParams.commands)
|
||||
|
||||
@@ -405,6 +405,66 @@ describe("node.invoke APNs wake path", () => {
|
||||
expect(call?.[1]).toMatchObject({ ok: true, nodeId: "ios-node-reconnect" });
|
||||
});
|
||||
|
||||
it("broadcasts canonical Talk capture events for successful PTT node commands", async () => {
|
||||
const respond = vi.fn();
|
||||
const broadcast = vi.fn();
|
||||
const nodeRegistry = {
|
||||
get: vi.fn(() => ({
|
||||
nodeId: "android-talk-node",
|
||||
commands: ["talk.ptt.start"],
|
||||
capabilities: ["talk"],
|
||||
platform: "android",
|
||||
})),
|
||||
invoke: vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
payloadJSON: '{"captureId":"capture-1"}',
|
||||
}),
|
||||
};
|
||||
|
||||
await nodeHandlers["node.invoke"]({
|
||||
params: {
|
||||
nodeId: "android-talk-node",
|
||||
command: "talk.ptt.start",
|
||||
idempotencyKey: "idem-talk-ptt-start",
|
||||
},
|
||||
respond: respond as never,
|
||||
context: {
|
||||
nodeRegistry,
|
||||
execApprovalManager: undefined,
|
||||
logGateway: { info: vi.fn(), warn: vi.fn() },
|
||||
getRuntimeConfig: () => mocks.getRuntimeConfig(),
|
||||
broadcast,
|
||||
} as never,
|
||||
client: null,
|
||||
req: { type: "req", id: "req-talk-ptt", method: "node.invoke" },
|
||||
isWebchatConnect: () => false,
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(broadcast).toHaveBeenCalledWith(
|
||||
"talk.event",
|
||||
expect.objectContaining({
|
||||
nodeId: "android-talk-node",
|
||||
command: "talk.ptt.start",
|
||||
talkEvent: expect.objectContaining({
|
||||
type: "capture.started",
|
||||
sessionId: "node:android-talk-node:talk:capture-1",
|
||||
captureId: "capture-1",
|
||||
seq: expect.any(Number),
|
||||
mode: "stt-tts",
|
||||
transport: "managed-room",
|
||||
brain: "agent-consult",
|
||||
final: false,
|
||||
payload: expect.objectContaining({
|
||||
nodeId: "android-talk-node",
|
||||
command: "talk.ptt.start",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
{ dropIfSlow: true },
|
||||
);
|
||||
});
|
||||
|
||||
it("clears stale registrations after an invalid device token wake failure", async () => {
|
||||
const registration = directRegistration("ios-node-stale");
|
||||
mocks.loadApnsRegistration.mockResolvedValue(registration);
|
||||
|
||||
@@ -66,6 +66,7 @@ import {
|
||||
respondUnavailableOnThrow,
|
||||
safeParseJson,
|
||||
} from "./nodes.helpers.js";
|
||||
import type { GatewayRequestContext } from "./shared-types.js";
|
||||
import type { GatewayRequestHandlers } from "./types.js";
|
||||
|
||||
export {
|
||||
@@ -78,6 +79,13 @@ const NODE_WAKE_THROTTLE_MS = 15_000;
|
||||
const NODE_WAKE_NUDGE_THROTTLE_MS = 10 * 60_000;
|
||||
const NODE_PENDING_ACTION_TTL_MS = 10 * 60_000;
|
||||
const NODE_PENDING_ACTION_MAX_PER_NODE = 64;
|
||||
const TALK_PTT_COMMANDS = new Set([
|
||||
"talk.ptt.start",
|
||||
"talk.ptt.stop",
|
||||
"talk.ptt.cancel",
|
||||
"talk.ptt.once",
|
||||
]);
|
||||
const talkPttEventSeqBySessionId = new Map<string, number>();
|
||||
|
||||
type NodeWakeNudgeAttempt = {
|
||||
sent: boolean;
|
||||
@@ -259,6 +267,8 @@ function resolveAllowedPendingNodeActions(params: {
|
||||
const allowlist = resolveNodeCommandAllowlist(params.cfg, {
|
||||
platform: connect?.client?.platform,
|
||||
deviceFamily: connect?.client?.deviceFamily,
|
||||
caps: connect?.caps,
|
||||
commands: declaredCommands,
|
||||
});
|
||||
const allowed = pending.filter((entry) => {
|
||||
const result = isNodeCommandAllowed({
|
||||
@@ -304,6 +314,69 @@ function toPendingParamsJSON(params: unknown): string | undefined {
|
||||
}
|
||||
}
|
||||
|
||||
function emitTalkPttNodeEvent(params: {
|
||||
context: Pick<GatewayRequestContext, "broadcast">;
|
||||
nodeId: string;
|
||||
command: string;
|
||||
payload: unknown;
|
||||
}): void {
|
||||
if (!TALK_PTT_COMMANDS.has(params.command)) {
|
||||
return;
|
||||
}
|
||||
const payloadObj =
|
||||
typeof params.payload === "object" && params.payload !== null
|
||||
? (params.payload as Record<string, unknown>)
|
||||
: {};
|
||||
const captureId = normalizeOptionalString(payloadObj.captureId) ?? randomUUID();
|
||||
const sessionId = `node:${params.nodeId}:talk:${captureId}`;
|
||||
const seq = (talkPttEventSeqBySessionId.get(sessionId) ?? 0) + 1;
|
||||
talkPttEventSeqBySessionId.set(sessionId, seq);
|
||||
while (talkPttEventSeqBySessionId.size > 2048) {
|
||||
const oldest = talkPttEventSeqBySessionId.keys().next().value;
|
||||
if (oldest === undefined) {
|
||||
break;
|
||||
}
|
||||
talkPttEventSeqBySessionId.delete(oldest);
|
||||
}
|
||||
|
||||
const type =
|
||||
params.command === "talk.ptt.start"
|
||||
? "capture.started"
|
||||
: params.command === "talk.ptt.cancel"
|
||||
? "capture.cancelled"
|
||||
: params.command === "talk.ptt.once"
|
||||
? "capture.once"
|
||||
: "capture.stopped";
|
||||
const final = params.command !== "talk.ptt.start";
|
||||
const talkEvent = {
|
||||
id: `${sessionId}:${seq}`,
|
||||
type,
|
||||
sessionId,
|
||||
captureId,
|
||||
seq,
|
||||
timestamp: new Date().toISOString(),
|
||||
mode: "stt-tts",
|
||||
transport: "managed-room",
|
||||
brain: "agent-consult",
|
||||
final,
|
||||
payload: {
|
||||
nodeId: params.nodeId,
|
||||
command: params.command,
|
||||
status: normalizeOptionalString(payloadObj.status) ?? undefined,
|
||||
transcript: normalizeOptionalString(payloadObj.transcript) ?? undefined,
|
||||
},
|
||||
};
|
||||
params.context.broadcast(
|
||||
"talk.event",
|
||||
{
|
||||
nodeId: params.nodeId,
|
||||
command: params.command,
|
||||
talkEvent,
|
||||
},
|
||||
{ dropIfSlow: true },
|
||||
);
|
||||
}
|
||||
|
||||
export async function maybeWakeNodeWithApns(
|
||||
nodeId: string,
|
||||
opts?: { force?: boolean; wakeReason?: string; cfg?: OpenClawConfig },
|
||||
@@ -1078,6 +1151,15 @@ export const nodeHandlers: GatewayRequestHandlers = {
|
||||
);
|
||||
return;
|
||||
}
|
||||
const payload = policyResult.payloadJSON
|
||||
? safeParseJson(policyResult.payloadJSON)
|
||||
: policyResult.payload;
|
||||
emitTalkPttNodeEvent({
|
||||
context,
|
||||
nodeId,
|
||||
command,
|
||||
payload,
|
||||
});
|
||||
respond(
|
||||
true,
|
||||
{
|
||||
@@ -1151,6 +1233,12 @@ export const nodeHandlers: GatewayRequestHandlers = {
|
||||
return;
|
||||
}
|
||||
const payload = res.payloadJSON ? safeParseJson(res.payloadJSON) : res.payload;
|
||||
emitTalkPttNodeEvent({
|
||||
context,
|
||||
nodeId,
|
||||
command,
|
||||
payload,
|
||||
});
|
||||
respond(
|
||||
true,
|
||||
{
|
||||
@@ -1228,6 +1316,9 @@ function buildNodeCommandRejectionHint(
|
||||
return `node command not allowed: the node (platform: ${platform}) does not support "${command}"`;
|
||||
}
|
||||
if (reason === "command not allowlisted") {
|
||||
if (command.startsWith("talk.")) {
|
||||
return `node command not allowed: "${command}" requires a trusted Talk-capable node`;
|
||||
}
|
||||
return `node command not allowed: "${command}" is not in the allowlist for platform "${platform}"`;
|
||||
}
|
||||
if (reason === "node did not declare commands") {
|
||||
|
||||
32
src/gateway/server-talk-nodes.test.ts
Normal file
32
src/gateway/server-talk-nodes.test.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { NodeRegistry, NodeSession } from "./node-registry.js";
|
||||
import { hasConnectedTalkNode } from "./server-talk-nodes.js";
|
||||
|
||||
function registryWith(nodes: Array<Partial<NodeSession>>): NodeRegistry {
|
||||
return {
|
||||
listConnected: () =>
|
||||
nodes.map((node, index) => ({
|
||||
nodeId: `node-${index}`,
|
||||
connId: `conn-${index}`,
|
||||
caps: [],
|
||||
commands: [],
|
||||
connectedAtMs: 0,
|
||||
...node,
|
||||
})),
|
||||
} as NodeRegistry;
|
||||
}
|
||||
|
||||
describe("hasConnectedTalkNode", () => {
|
||||
it("uses explicit talk capability instead of platform names", () => {
|
||||
expect(
|
||||
hasConnectedTalkNode(registryWith([{ platform: "android", caps: ["device"], commands: [] }])),
|
||||
).toBe(false);
|
||||
expect(hasConnectedTalkNode(registryWith([{ platform: "linux", caps: ["talk"] }]))).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts nodes that declare talk command support", () => {
|
||||
expect(
|
||||
hasConnectedTalkNode(registryWith([{ platform: "custom", commands: ["talk.ptt.start"] }])),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
20
src/gateway/server-talk-nodes.ts
Normal file
20
src/gateway/server-talk-nodes.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { normalizeOptionalLowercaseString } from "../shared/string-coerce.js";
|
||||
import type { NodeRegistry, NodeSession } from "./node-registry.js";
|
||||
|
||||
const TALK_CAPABILITY = "talk";
|
||||
const TALK_COMMAND_PREFIX = "talk.";
|
||||
|
||||
export function hasConnectedTalkNode(registry: NodeRegistry): boolean {
|
||||
return registry.listConnected().some(isTalkCapableNode);
|
||||
}
|
||||
|
||||
function isTalkCapableNode(node: NodeSession): boolean {
|
||||
return (
|
||||
node.caps.some(
|
||||
(capability) => normalizeOptionalLowercaseString(capability) === TALK_CAPABILITY,
|
||||
) ||
|
||||
node.commands.some((command) =>
|
||||
normalizeOptionalLowercaseString(command)?.startsWith(TALK_COMMAND_PREFIX),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -133,6 +133,12 @@ function listKnownNodeCommands(cfg: OpenClawConfig): Set<string> {
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const cmd of resolveNodeCommandAllowlist(baseCfg, { caps: ["talk"] })) {
|
||||
const normalized = normalizeNodeCommand(cmd);
|
||||
if (normalized) {
|
||||
out.add(normalized);
|
||||
}
|
||||
}
|
||||
for (const cmd of DEFAULT_DANGEROUS_NODE_COMMANDS) {
|
||||
const normalized = normalizeNodeCommand(cmd);
|
||||
if (normalized) {
|
||||
|
||||
Reference in New Issue
Block a user