refactor: split android talk voice resolution

This commit is contained in:
Peter Steinberger
2026-03-08 16:30:58 +00:00
parent 371c53b282
commit fa580e33c1
3 changed files with 236 additions and 79 deletions

View File

@@ -813,7 +813,7 @@ class TalkModeManager(
_lastAssistantText.value = cleaned
val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
val resolvedVoice = resolveVoiceAlias(requestedVoice)
val resolvedVoice = TalkModeVoiceResolver.resolveVoiceAlias(requestedVoice, voiceAliases)
if (requestedVoice != null && resolvedVoice == null) {
Log.w(tag, "unknown voice alias: $requestedVoice")
}
@@ -836,12 +836,35 @@ class TalkModeManager(
apiKey?.trim()?.takeIf { it.isNotEmpty() }
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId
val voiceId =
val resolvedPlaybackVoice =
if (!apiKey.isNullOrEmpty()) {
resolveVoiceId(preferredVoice, apiKey)
try {
TalkModeVoiceResolver.resolveVoiceId(
preferred = preferredVoice,
fallbackVoiceId = fallbackVoiceId,
defaultVoiceId = defaultVoiceId,
currentVoiceId = currentVoiceId,
voiceOverrideActive = voiceOverrideActive,
listVoices = { TalkModeVoiceResolver.listVoices(apiKey, json) },
)
} catch (err: Throwable) {
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
null
}
} else {
null
}
resolvedPlaybackVoice?.let { resolved ->
fallbackVoiceId = resolved.fallbackVoiceId
defaultVoiceId = resolved.defaultVoiceId
currentVoiceId = resolved.currentVoiceId
resolved.selectedVoiceName?.let { name ->
resolved.voiceId?.let { voiceId ->
Log.d(tag, "default voice selected $name ($voiceId)")
}
}
}
val voiceId = resolvedPlaybackVoice?.voiceId
_statusText.value = "Speaking…"
_isSpeaking.value = true
@@ -1703,82 +1726,6 @@ class TalkModeManager(
}
}
private fun resolveVoiceAlias(value: String?): String? {
val trimmed = value?.trim().orEmpty()
if (trimmed.isEmpty()) return null
val normalized = normalizeAliasKey(trimmed)
voiceAliases[normalized]?.let { return it }
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
return if (isLikelyVoiceId(trimmed)) trimmed else null
}
private suspend fun resolveVoiceId(preferred: String?, apiKey: String): String? {
val trimmed = preferred?.trim().orEmpty()
if (trimmed.isNotEmpty()) {
val resolved = resolveVoiceAlias(trimmed)
// If it resolves as an alias, use the alias target.
// Otherwise treat it as a direct voice ID (e.g. "21m00Tcm4TlvDq8ikWAM").
return resolved ?: trimmed
}
fallbackVoiceId?.let { return it }
return try {
val voices = listVoices(apiKey)
val first = voices.firstOrNull() ?: return null
fallbackVoiceId = first.voiceId
if (defaultVoiceId.isNullOrBlank()) {
defaultVoiceId = first.voiceId
}
if (!voiceOverrideActive) {
currentVoiceId = first.voiceId
}
val name = first.name ?: "unknown"
Log.d(tag, "default voice selected $name (${first.voiceId})")
first.voiceId
} catch (err: Throwable) {
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
null
}
}
private suspend fun listVoices(apiKey: String): List<ElevenLabsVoice> {
return withContext(Dispatchers.IO) {
val url = URL("https://api.elevenlabs.io/v1/voices")
val conn = url.openConnection() as HttpURLConnection
conn.requestMethod = "GET"
conn.connectTimeout = 15_000
conn.readTimeout = 15_000
conn.setRequestProperty("xi-api-key", apiKey)
val code = conn.responseCode
val stream = if (code >= 400) conn.errorStream else conn.inputStream
val data = stream.readBytes()
if (code >= 400) {
val message = data.toString(Charsets.UTF_8)
throw IllegalStateException("ElevenLabs voices failed: $code $message")
}
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
voices.mapNotNull { entry ->
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
val name = obj["name"].asStringOrNull()
ElevenLabsVoice(voiceId, name)
}
}
}
private fun isLikelyVoiceId(value: String): Boolean {
if (value.length < 10) return false
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
}
private fun normalizeAliasKey(value: String): String =
value.trim().lowercase()
private data class ElevenLabsVoice(val voiceId: String, val name: String?)
private val listener =
object : RecognitionListener {
override fun onReadyForSpeech(params: Bundle?) {

View File

@@ -0,0 +1,118 @@
package ai.openclaw.app.voice
import java.net.HttpURLConnection
import java.net.URL
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonArray
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.JsonPrimitive
internal data class ElevenLabsVoice(val voiceId: String, val name: String?)
internal data class TalkModeResolvedVoice(
val voiceId: String?,
val fallbackVoiceId: String?,
val defaultVoiceId: String?,
val currentVoiceId: String?,
val selectedVoiceName: String? = null,
)
internal object TalkModeVoiceResolver {
fun resolveVoiceAlias(value: String?, voiceAliases: Map<String, String>): String? {
val trimmed = value?.trim().orEmpty()
if (trimmed.isEmpty()) return null
val normalized = normalizeAliasKey(trimmed)
voiceAliases[normalized]?.let { return it }
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
return if (isLikelyVoiceId(trimmed)) trimmed else null
}
suspend fun resolveVoiceId(
preferred: String?,
fallbackVoiceId: String?,
defaultVoiceId: String?,
currentVoiceId: String?,
voiceOverrideActive: Boolean,
listVoices: suspend () -> List<ElevenLabsVoice>,
): TalkModeResolvedVoice {
val trimmed = preferred?.trim().orEmpty()
if (trimmed.isNotEmpty()) {
return TalkModeResolvedVoice(
voiceId = trimmed,
fallbackVoiceId = fallbackVoiceId,
defaultVoiceId = defaultVoiceId,
currentVoiceId = currentVoiceId,
)
}
if (!fallbackVoiceId.isNullOrBlank()) {
return TalkModeResolvedVoice(
voiceId = fallbackVoiceId,
fallbackVoiceId = fallbackVoiceId,
defaultVoiceId = defaultVoiceId,
currentVoiceId = currentVoiceId,
)
}
val first = listVoices().firstOrNull()
if (first == null) {
return TalkModeResolvedVoice(
voiceId = null,
fallbackVoiceId = fallbackVoiceId,
defaultVoiceId = defaultVoiceId,
currentVoiceId = currentVoiceId,
)
}
return TalkModeResolvedVoice(
voiceId = first.voiceId,
fallbackVoiceId = first.voiceId,
defaultVoiceId = if (defaultVoiceId.isNullOrBlank()) first.voiceId else defaultVoiceId,
currentVoiceId = if (voiceOverrideActive) currentVoiceId else first.voiceId,
selectedVoiceName = first.name,
)
}
suspend fun listVoices(apiKey: String, json: Json): List<ElevenLabsVoice> {
return withContext(Dispatchers.IO) {
val url = URL("https://api.elevenlabs.io/v1/voices")
val conn = url.openConnection() as HttpURLConnection
conn.requestMethod = "GET"
conn.connectTimeout = 15_000
conn.readTimeout = 15_000
conn.setRequestProperty("xi-api-key", apiKey)
val code = conn.responseCode
val stream = if (code >= 400) conn.errorStream else conn.inputStream
val data = stream.readBytes()
if (code >= 400) {
val message = data.toString(Charsets.UTF_8)
throw IllegalStateException("ElevenLabs voices failed: $code $message")
}
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
voices.mapNotNull { entry ->
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
val name = obj["name"].asStringOrNull()
ElevenLabsVoice(voiceId, name)
}
}
}
private fun isLikelyVoiceId(value: String): Boolean {
if (value.length < 10) return false
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
}
private fun normalizeAliasKey(value: String): String =
value.trim().lowercase()
}
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
private fun JsonElement?.asStringOrNull(): String? =
(this as? JsonPrimitive)?.takeIf { it.isString }?.content

View File

@@ -0,0 +1,92 @@
package ai.openclaw.app.voice
import kotlinx.coroutines.runBlocking
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNull
import org.junit.Test
class TalkModeVoiceResolverTest {
@Test
fun resolvesVoiceAliasCaseInsensitively() {
val resolved =
TalkModeVoiceResolver.resolveVoiceAlias(
" Clawd ",
mapOf("clawd" to "voice-123"),
)
assertEquals("voice-123", resolved)
}
@Test
fun acceptsDirectVoiceIds() {
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("21m00Tcm4TlvDq8ikWAM", emptyMap())
assertEquals("21m00Tcm4TlvDq8ikWAM", resolved)
}
@Test
fun rejectsUnknownAliases() {
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("nickname", emptyMap())
assertNull(resolved)
}
@Test
fun reusesCachedFallbackVoiceBeforeFetchingCatalog() =
runBlocking {
var fetchCount = 0
val resolved =
TalkModeVoiceResolver.resolveVoiceId(
preferred = null,
fallbackVoiceId = "cached-voice",
defaultVoiceId = null,
currentVoiceId = null,
voiceOverrideActive = false,
listVoices = {
fetchCount += 1
emptyList()
},
)
assertEquals("cached-voice", resolved.voiceId)
assertEquals(0, fetchCount)
}
@Test
fun seedsDefaultVoiceFromCatalogWhenNeeded() =
runBlocking {
val resolved =
TalkModeVoiceResolver.resolveVoiceId(
preferred = null,
fallbackVoiceId = null,
defaultVoiceId = null,
currentVoiceId = null,
voiceOverrideActive = false,
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
)
assertEquals("voice-1", resolved.voiceId)
assertEquals("voice-1", resolved.fallbackVoiceId)
assertEquals("voice-1", resolved.defaultVoiceId)
assertEquals("voice-1", resolved.currentVoiceId)
assertEquals("First", resolved.selectedVoiceName)
}
@Test
fun preservesCurrentVoiceWhenOverrideIsActive() =
runBlocking {
val resolved =
TalkModeVoiceResolver.resolveVoiceId(
preferred = null,
fallbackVoiceId = null,
defaultVoiceId = null,
currentVoiceId = null,
voiceOverrideActive = true,
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
)
assertEquals("voice-1", resolved.voiceId)
assertNull(resolved.currentVoiceId)
}
}