mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
refactor: split android talk voice resolution
This commit is contained in:
@@ -813,7 +813,7 @@ class TalkModeManager(
|
|||||||
_lastAssistantText.value = cleaned
|
_lastAssistantText.value = cleaned
|
||||||
|
|
||||||
val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
|
val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
|
||||||
val resolvedVoice = resolveVoiceAlias(requestedVoice)
|
val resolvedVoice = TalkModeVoiceResolver.resolveVoiceAlias(requestedVoice, voiceAliases)
|
||||||
if (requestedVoice != null && resolvedVoice == null) {
|
if (requestedVoice != null && resolvedVoice == null) {
|
||||||
Log.w(tag, "unknown voice alias: $requestedVoice")
|
Log.w(tag, "unknown voice alias: $requestedVoice")
|
||||||
}
|
}
|
||||||
@@ -836,12 +836,35 @@ class TalkModeManager(
|
|||||||
apiKey?.trim()?.takeIf { it.isNotEmpty() }
|
apiKey?.trim()?.takeIf { it.isNotEmpty() }
|
||||||
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||||
val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId
|
val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId
|
||||||
val voiceId =
|
val resolvedPlaybackVoice =
|
||||||
if (!apiKey.isNullOrEmpty()) {
|
if (!apiKey.isNullOrEmpty()) {
|
||||||
resolveVoiceId(preferredVoice, apiKey)
|
try {
|
||||||
|
TalkModeVoiceResolver.resolveVoiceId(
|
||||||
|
preferred = preferredVoice,
|
||||||
|
fallbackVoiceId = fallbackVoiceId,
|
||||||
|
defaultVoiceId = defaultVoiceId,
|
||||||
|
currentVoiceId = currentVoiceId,
|
||||||
|
voiceOverrideActive = voiceOverrideActive,
|
||||||
|
listVoices = { TalkModeVoiceResolver.listVoices(apiKey, json) },
|
||||||
|
)
|
||||||
|
} catch (err: Throwable) {
|
||||||
|
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
|
||||||
|
null
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
resolvedPlaybackVoice?.let { resolved ->
|
||||||
|
fallbackVoiceId = resolved.fallbackVoiceId
|
||||||
|
defaultVoiceId = resolved.defaultVoiceId
|
||||||
|
currentVoiceId = resolved.currentVoiceId
|
||||||
|
resolved.selectedVoiceName?.let { name ->
|
||||||
|
resolved.voiceId?.let { voiceId ->
|
||||||
|
Log.d(tag, "default voice selected $name ($voiceId)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val voiceId = resolvedPlaybackVoice?.voiceId
|
||||||
|
|
||||||
_statusText.value = "Speaking…"
|
_statusText.value = "Speaking…"
|
||||||
_isSpeaking.value = true
|
_isSpeaking.value = true
|
||||||
@@ -1703,82 +1726,6 @@ class TalkModeManager(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun resolveVoiceAlias(value: String?): String? {
|
|
||||||
val trimmed = value?.trim().orEmpty()
|
|
||||||
if (trimmed.isEmpty()) return null
|
|
||||||
val normalized = normalizeAliasKey(trimmed)
|
|
||||||
voiceAliases[normalized]?.let { return it }
|
|
||||||
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
|
|
||||||
return if (isLikelyVoiceId(trimmed)) trimmed else null
|
|
||||||
}
|
|
||||||
|
|
||||||
private suspend fun resolveVoiceId(preferred: String?, apiKey: String): String? {
|
|
||||||
val trimmed = preferred?.trim().orEmpty()
|
|
||||||
if (trimmed.isNotEmpty()) {
|
|
||||||
val resolved = resolveVoiceAlias(trimmed)
|
|
||||||
// If it resolves as an alias, use the alias target.
|
|
||||||
// Otherwise treat it as a direct voice ID (e.g. "21m00Tcm4TlvDq8ikWAM").
|
|
||||||
return resolved ?: trimmed
|
|
||||||
}
|
|
||||||
fallbackVoiceId?.let { return it }
|
|
||||||
|
|
||||||
return try {
|
|
||||||
val voices = listVoices(apiKey)
|
|
||||||
val first = voices.firstOrNull() ?: return null
|
|
||||||
fallbackVoiceId = first.voiceId
|
|
||||||
if (defaultVoiceId.isNullOrBlank()) {
|
|
||||||
defaultVoiceId = first.voiceId
|
|
||||||
}
|
|
||||||
if (!voiceOverrideActive) {
|
|
||||||
currentVoiceId = first.voiceId
|
|
||||||
}
|
|
||||||
val name = first.name ?: "unknown"
|
|
||||||
Log.d(tag, "default voice selected $name (${first.voiceId})")
|
|
||||||
first.voiceId
|
|
||||||
} catch (err: Throwable) {
|
|
||||||
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
|
|
||||||
null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private suspend fun listVoices(apiKey: String): List<ElevenLabsVoice> {
|
|
||||||
return withContext(Dispatchers.IO) {
|
|
||||||
val url = URL("https://api.elevenlabs.io/v1/voices")
|
|
||||||
val conn = url.openConnection() as HttpURLConnection
|
|
||||||
conn.requestMethod = "GET"
|
|
||||||
conn.connectTimeout = 15_000
|
|
||||||
conn.readTimeout = 15_000
|
|
||||||
conn.setRequestProperty("xi-api-key", apiKey)
|
|
||||||
|
|
||||||
val code = conn.responseCode
|
|
||||||
val stream = if (code >= 400) conn.errorStream else conn.inputStream
|
|
||||||
val data = stream.readBytes()
|
|
||||||
if (code >= 400) {
|
|
||||||
val message = data.toString(Charsets.UTF_8)
|
|
||||||
throw IllegalStateException("ElevenLabs voices failed: $code $message")
|
|
||||||
}
|
|
||||||
|
|
||||||
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
|
|
||||||
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
|
|
||||||
voices.mapNotNull { entry ->
|
|
||||||
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
|
|
||||||
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
|
|
||||||
val name = obj["name"].asStringOrNull()
|
|
||||||
ElevenLabsVoice(voiceId, name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun isLikelyVoiceId(value: String): Boolean {
|
|
||||||
if (value.length < 10) return false
|
|
||||||
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun normalizeAliasKey(value: String): String =
|
|
||||||
value.trim().lowercase()
|
|
||||||
|
|
||||||
private data class ElevenLabsVoice(val voiceId: String, val name: String?)
|
|
||||||
|
|
||||||
private val listener =
|
private val listener =
|
||||||
object : RecognitionListener {
|
object : RecognitionListener {
|
||||||
override fun onReadyForSpeech(params: Bundle?) {
|
override fun onReadyForSpeech(params: Bundle?) {
|
||||||
|
|||||||
@@ -0,0 +1,118 @@
|
|||||||
|
package ai.openclaw.app.voice
|
||||||
|
|
||||||
|
import java.net.HttpURLConnection
|
||||||
|
import java.net.URL
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
|
import kotlinx.serialization.json.JsonArray
|
||||||
|
import kotlinx.serialization.json.JsonElement
|
||||||
|
import kotlinx.serialization.json.JsonObject
|
||||||
|
import kotlinx.serialization.json.JsonPrimitive
|
||||||
|
|
||||||
|
internal data class ElevenLabsVoice(val voiceId: String, val name: String?)
|
||||||
|
|
||||||
|
internal data class TalkModeResolvedVoice(
|
||||||
|
val voiceId: String?,
|
||||||
|
val fallbackVoiceId: String?,
|
||||||
|
val defaultVoiceId: String?,
|
||||||
|
val currentVoiceId: String?,
|
||||||
|
val selectedVoiceName: String? = null,
|
||||||
|
)
|
||||||
|
|
||||||
|
internal object TalkModeVoiceResolver {
|
||||||
|
fun resolveVoiceAlias(value: String?, voiceAliases: Map<String, String>): String? {
|
||||||
|
val trimmed = value?.trim().orEmpty()
|
||||||
|
if (trimmed.isEmpty()) return null
|
||||||
|
val normalized = normalizeAliasKey(trimmed)
|
||||||
|
voiceAliases[normalized]?.let { return it }
|
||||||
|
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
|
||||||
|
return if (isLikelyVoiceId(trimmed)) trimmed else null
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun resolveVoiceId(
|
||||||
|
preferred: String?,
|
||||||
|
fallbackVoiceId: String?,
|
||||||
|
defaultVoiceId: String?,
|
||||||
|
currentVoiceId: String?,
|
||||||
|
voiceOverrideActive: Boolean,
|
||||||
|
listVoices: suspend () -> List<ElevenLabsVoice>,
|
||||||
|
): TalkModeResolvedVoice {
|
||||||
|
val trimmed = preferred?.trim().orEmpty()
|
||||||
|
if (trimmed.isNotEmpty()) {
|
||||||
|
return TalkModeResolvedVoice(
|
||||||
|
voiceId = trimmed,
|
||||||
|
fallbackVoiceId = fallbackVoiceId,
|
||||||
|
defaultVoiceId = defaultVoiceId,
|
||||||
|
currentVoiceId = currentVoiceId,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if (!fallbackVoiceId.isNullOrBlank()) {
|
||||||
|
return TalkModeResolvedVoice(
|
||||||
|
voiceId = fallbackVoiceId,
|
||||||
|
fallbackVoiceId = fallbackVoiceId,
|
||||||
|
defaultVoiceId = defaultVoiceId,
|
||||||
|
currentVoiceId = currentVoiceId,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
val first = listVoices().firstOrNull()
|
||||||
|
if (first == null) {
|
||||||
|
return TalkModeResolvedVoice(
|
||||||
|
voiceId = null,
|
||||||
|
fallbackVoiceId = fallbackVoiceId,
|
||||||
|
defaultVoiceId = defaultVoiceId,
|
||||||
|
currentVoiceId = currentVoiceId,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return TalkModeResolvedVoice(
|
||||||
|
voiceId = first.voiceId,
|
||||||
|
fallbackVoiceId = first.voiceId,
|
||||||
|
defaultVoiceId = if (defaultVoiceId.isNullOrBlank()) first.voiceId else defaultVoiceId,
|
||||||
|
currentVoiceId = if (voiceOverrideActive) currentVoiceId else first.voiceId,
|
||||||
|
selectedVoiceName = first.name,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun listVoices(apiKey: String, json: Json): List<ElevenLabsVoice> {
|
||||||
|
return withContext(Dispatchers.IO) {
|
||||||
|
val url = URL("https://api.elevenlabs.io/v1/voices")
|
||||||
|
val conn = url.openConnection() as HttpURLConnection
|
||||||
|
conn.requestMethod = "GET"
|
||||||
|
conn.connectTimeout = 15_000
|
||||||
|
conn.readTimeout = 15_000
|
||||||
|
conn.setRequestProperty("xi-api-key", apiKey)
|
||||||
|
|
||||||
|
val code = conn.responseCode
|
||||||
|
val stream = if (code >= 400) conn.errorStream else conn.inputStream
|
||||||
|
val data = stream.readBytes()
|
||||||
|
if (code >= 400) {
|
||||||
|
val message = data.toString(Charsets.UTF_8)
|
||||||
|
throw IllegalStateException("ElevenLabs voices failed: $code $message")
|
||||||
|
}
|
||||||
|
|
||||||
|
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
|
||||||
|
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
|
||||||
|
voices.mapNotNull { entry ->
|
||||||
|
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
|
||||||
|
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
|
||||||
|
val name = obj["name"].asStringOrNull()
|
||||||
|
ElevenLabsVoice(voiceId, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun isLikelyVoiceId(value: String): Boolean {
|
||||||
|
if (value.length < 10) return false
|
||||||
|
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun normalizeAliasKey(value: String): String =
|
||||||
|
value.trim().lowercase()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
|
||||||
|
|
||||||
|
private fun JsonElement?.asStringOrNull(): String? =
|
||||||
|
(this as? JsonPrimitive)?.takeIf { it.isString }?.content
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
package ai.openclaw.app.voice
|
||||||
|
|
||||||
|
import kotlinx.coroutines.runBlocking
|
||||||
|
import org.junit.Assert.assertEquals
|
||||||
|
import org.junit.Assert.assertNull
|
||||||
|
import org.junit.Test
|
||||||
|
|
||||||
|
class TalkModeVoiceResolverTest {
|
||||||
|
@Test
|
||||||
|
fun resolvesVoiceAliasCaseInsensitively() {
|
||||||
|
val resolved =
|
||||||
|
TalkModeVoiceResolver.resolveVoiceAlias(
|
||||||
|
" Clawd ",
|
||||||
|
mapOf("clawd" to "voice-123"),
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals("voice-123", resolved)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun acceptsDirectVoiceIds() {
|
||||||
|
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("21m00Tcm4TlvDq8ikWAM", emptyMap())
|
||||||
|
|
||||||
|
assertEquals("21m00Tcm4TlvDq8ikWAM", resolved)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun rejectsUnknownAliases() {
|
||||||
|
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("nickname", emptyMap())
|
||||||
|
|
||||||
|
assertNull(resolved)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun reusesCachedFallbackVoiceBeforeFetchingCatalog() =
|
||||||
|
runBlocking {
|
||||||
|
var fetchCount = 0
|
||||||
|
|
||||||
|
val resolved =
|
||||||
|
TalkModeVoiceResolver.resolveVoiceId(
|
||||||
|
preferred = null,
|
||||||
|
fallbackVoiceId = "cached-voice",
|
||||||
|
defaultVoiceId = null,
|
||||||
|
currentVoiceId = null,
|
||||||
|
voiceOverrideActive = false,
|
||||||
|
listVoices = {
|
||||||
|
fetchCount += 1
|
||||||
|
emptyList()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals("cached-voice", resolved.voiceId)
|
||||||
|
assertEquals(0, fetchCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun seedsDefaultVoiceFromCatalogWhenNeeded() =
|
||||||
|
runBlocking {
|
||||||
|
val resolved =
|
||||||
|
TalkModeVoiceResolver.resolveVoiceId(
|
||||||
|
preferred = null,
|
||||||
|
fallbackVoiceId = null,
|
||||||
|
defaultVoiceId = null,
|
||||||
|
currentVoiceId = null,
|
||||||
|
voiceOverrideActive = false,
|
||||||
|
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals("voice-1", resolved.voiceId)
|
||||||
|
assertEquals("voice-1", resolved.fallbackVoiceId)
|
||||||
|
assertEquals("voice-1", resolved.defaultVoiceId)
|
||||||
|
assertEquals("voice-1", resolved.currentVoiceId)
|
||||||
|
assertEquals("First", resolved.selectedVoiceName)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun preservesCurrentVoiceWhenOverrideIsActive() =
|
||||||
|
runBlocking {
|
||||||
|
val resolved =
|
||||||
|
TalkModeVoiceResolver.resolveVoiceId(
|
||||||
|
preferred = null,
|
||||||
|
fallbackVoiceId = null,
|
||||||
|
defaultVoiceId = null,
|
||||||
|
currentVoiceId = null,
|
||||||
|
voiceOverrideActive = true,
|
||||||
|
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals("voice-1", resolved.voiceId)
|
||||||
|
assertNull(resolved.currentVoiceId)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user