diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt index 07bd8a346f0..70b6113fc35 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt @@ -813,7 +813,7 @@ class TalkModeManager( _lastAssistantText.value = cleaned val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() } - val resolvedVoice = resolveVoiceAlias(requestedVoice) + val resolvedVoice = TalkModeVoiceResolver.resolveVoiceAlias(requestedVoice, voiceAliases) if (requestedVoice != null && resolvedVoice == null) { Log.w(tag, "unknown voice alias: $requestedVoice") } @@ -836,12 +836,35 @@ class TalkModeManager( apiKey?.trim()?.takeIf { it.isNotEmpty() } ?: System.getenv("ELEVENLABS_API_KEY")?.trim() val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId - val voiceId = + val resolvedPlaybackVoice = if (!apiKey.isNullOrEmpty()) { - resolveVoiceId(preferredVoice, apiKey) + try { + TalkModeVoiceResolver.resolveVoiceId( + preferred = preferredVoice, + fallbackVoiceId = fallbackVoiceId, + defaultVoiceId = defaultVoiceId, + currentVoiceId = currentVoiceId, + voiceOverrideActive = voiceOverrideActive, + listVoices = { TalkModeVoiceResolver.listVoices(apiKey, json) }, + ) + } catch (err: Throwable) { + Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}") + null + } } else { null } + resolvedPlaybackVoice?.let { resolved -> + fallbackVoiceId = resolved.fallbackVoiceId + defaultVoiceId = resolved.defaultVoiceId + currentVoiceId = resolved.currentVoiceId + resolved.selectedVoiceName?.let { name -> + resolved.voiceId?.let { voiceId -> + Log.d(tag, "default voice selected $name ($voiceId)") + } + } + } + val voiceId = resolvedPlaybackVoice?.voiceId _statusText.value = "Speaking…" _isSpeaking.value = true @@ -1703,82 +1726,6 @@ class TalkModeManager( } } - private fun resolveVoiceAlias(value: String?): String? { - val trimmed = value?.trim().orEmpty() - if (trimmed.isEmpty()) return null - val normalized = normalizeAliasKey(trimmed) - voiceAliases[normalized]?.let { return it } - if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed - return if (isLikelyVoiceId(trimmed)) trimmed else null - } - - private suspend fun resolveVoiceId(preferred: String?, apiKey: String): String? { - val trimmed = preferred?.trim().orEmpty() - if (trimmed.isNotEmpty()) { - val resolved = resolveVoiceAlias(trimmed) - // If it resolves as an alias, use the alias target. - // Otherwise treat it as a direct voice ID (e.g. "21m00Tcm4TlvDq8ikWAM"). - return resolved ?: trimmed - } - fallbackVoiceId?.let { return it } - - return try { - val voices = listVoices(apiKey) - val first = voices.firstOrNull() ?: return null - fallbackVoiceId = first.voiceId - if (defaultVoiceId.isNullOrBlank()) { - defaultVoiceId = first.voiceId - } - if (!voiceOverrideActive) { - currentVoiceId = first.voiceId - } - val name = first.name ?: "unknown" - Log.d(tag, "default voice selected $name (${first.voiceId})") - first.voiceId - } catch (err: Throwable) { - Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}") - null - } - } - - private suspend fun listVoices(apiKey: String): List { - return withContext(Dispatchers.IO) { - val url = URL("https://api.elevenlabs.io/v1/voices") - val conn = url.openConnection() as HttpURLConnection - conn.requestMethod = "GET" - conn.connectTimeout = 15_000 - conn.readTimeout = 15_000 - conn.setRequestProperty("xi-api-key", apiKey) - - val code = conn.responseCode - val stream = if (code >= 400) conn.errorStream else conn.inputStream - val data = stream.readBytes() - if (code >= 400) { - val message = data.toString(Charsets.UTF_8) - throw IllegalStateException("ElevenLabs voices failed: $code $message") - } - - val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull() - val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList()) - voices.mapNotNull { entry -> - val obj = entry.asObjectOrNull() ?: return@mapNotNull null - val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null - val name = obj["name"].asStringOrNull() - ElevenLabsVoice(voiceId, name) - } - } - } - - private fun isLikelyVoiceId(value: String): Boolean { - if (value.length < 10) return false - return value.all { it.isLetterOrDigit() || it == '-' || it == '_' } - } - - private fun normalizeAliasKey(value: String): String = - value.trim().lowercase() - - private data class ElevenLabsVoice(val voiceId: String, val name: String?) - private val listener = object : RecognitionListener { override fun onReadyForSpeech(params: Bundle?) { diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt new file mode 100644 index 00000000000..eff52017624 --- /dev/null +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeVoiceResolver.kt @@ -0,0 +1,118 @@ +package ai.openclaw.app.voice + +import java.net.HttpURLConnection +import java.net.URL +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonArray +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive + +internal data class ElevenLabsVoice(val voiceId: String, val name: String?) + +internal data class TalkModeResolvedVoice( + val voiceId: String?, + val fallbackVoiceId: String?, + val defaultVoiceId: String?, + val currentVoiceId: String?, + val selectedVoiceName: String? = null, +) + +internal object TalkModeVoiceResolver { + fun resolveVoiceAlias(value: String?, voiceAliases: Map): String? { + val trimmed = value?.trim().orEmpty() + if (trimmed.isEmpty()) return null + val normalized = normalizeAliasKey(trimmed) + voiceAliases[normalized]?.let { return it } + if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed + return if (isLikelyVoiceId(trimmed)) trimmed else null + } + + suspend fun resolveVoiceId( + preferred: String?, + fallbackVoiceId: String?, + defaultVoiceId: String?, + currentVoiceId: String?, + voiceOverrideActive: Boolean, + listVoices: suspend () -> List, + ): TalkModeResolvedVoice { + val trimmed = preferred?.trim().orEmpty() + if (trimmed.isNotEmpty()) { + return TalkModeResolvedVoice( + voiceId = trimmed, + fallbackVoiceId = fallbackVoiceId, + defaultVoiceId = defaultVoiceId, + currentVoiceId = currentVoiceId, + ) + } + if (!fallbackVoiceId.isNullOrBlank()) { + return TalkModeResolvedVoice( + voiceId = fallbackVoiceId, + fallbackVoiceId = fallbackVoiceId, + defaultVoiceId = defaultVoiceId, + currentVoiceId = currentVoiceId, + ) + } + + val first = listVoices().firstOrNull() + if (first == null) { + return TalkModeResolvedVoice( + voiceId = null, + fallbackVoiceId = fallbackVoiceId, + defaultVoiceId = defaultVoiceId, + currentVoiceId = currentVoiceId, + ) + } + + return TalkModeResolvedVoice( + voiceId = first.voiceId, + fallbackVoiceId = first.voiceId, + defaultVoiceId = if (defaultVoiceId.isNullOrBlank()) first.voiceId else defaultVoiceId, + currentVoiceId = if (voiceOverrideActive) currentVoiceId else first.voiceId, + selectedVoiceName = first.name, + ) + } + + suspend fun listVoices(apiKey: String, json: Json): List { + return withContext(Dispatchers.IO) { + val url = URL("https://api.elevenlabs.io/v1/voices") + val conn = url.openConnection() as HttpURLConnection + conn.requestMethod = "GET" + conn.connectTimeout = 15_000 + conn.readTimeout = 15_000 + conn.setRequestProperty("xi-api-key", apiKey) + + val code = conn.responseCode + val stream = if (code >= 400) conn.errorStream else conn.inputStream + val data = stream.readBytes() + if (code >= 400) { + val message = data.toString(Charsets.UTF_8) + throw IllegalStateException("ElevenLabs voices failed: $code $message") + } + + val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull() + val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList()) + voices.mapNotNull { entry -> + val obj = entry.asObjectOrNull() ?: return@mapNotNull null + val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null + val name = obj["name"].asStringOrNull() + ElevenLabsVoice(voiceId, name) + } + } + } + + private fun isLikelyVoiceId(value: String): Boolean { + if (value.length < 10) return false + return value.all { it.isLetterOrDigit() || it == '-' || it == '_' } + } + + private fun normalizeAliasKey(value: String): String = + value.trim().lowercase() +} + +private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject + +private fun JsonElement?.asStringOrNull(): String? = + (this as? JsonPrimitive)?.takeIf { it.isString }?.content diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt new file mode 100644 index 00000000000..5cd46895d42 --- /dev/null +++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeVoiceResolverTest.kt @@ -0,0 +1,92 @@ +package ai.openclaw.app.voice + +import kotlinx.coroutines.runBlocking +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNull +import org.junit.Test + +class TalkModeVoiceResolverTest { + @Test + fun resolvesVoiceAliasCaseInsensitively() { + val resolved = + TalkModeVoiceResolver.resolveVoiceAlias( + " Clawd ", + mapOf("clawd" to "voice-123"), + ) + + assertEquals("voice-123", resolved) + } + + @Test + fun acceptsDirectVoiceIds() { + val resolved = TalkModeVoiceResolver.resolveVoiceAlias("21m00Tcm4TlvDq8ikWAM", emptyMap()) + + assertEquals("21m00Tcm4TlvDq8ikWAM", resolved) + } + + @Test + fun rejectsUnknownAliases() { + val resolved = TalkModeVoiceResolver.resolveVoiceAlias("nickname", emptyMap()) + + assertNull(resolved) + } + + @Test + fun reusesCachedFallbackVoiceBeforeFetchingCatalog() = + runBlocking { + var fetchCount = 0 + + val resolved = + TalkModeVoiceResolver.resolveVoiceId( + preferred = null, + fallbackVoiceId = "cached-voice", + defaultVoiceId = null, + currentVoiceId = null, + voiceOverrideActive = false, + listVoices = { + fetchCount += 1 + emptyList() + }, + ) + + assertEquals("cached-voice", resolved.voiceId) + assertEquals(0, fetchCount) + } + + @Test + fun seedsDefaultVoiceFromCatalogWhenNeeded() = + runBlocking { + val resolved = + TalkModeVoiceResolver.resolveVoiceId( + preferred = null, + fallbackVoiceId = null, + defaultVoiceId = null, + currentVoiceId = null, + voiceOverrideActive = false, + listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) }, + ) + + assertEquals("voice-1", resolved.voiceId) + assertEquals("voice-1", resolved.fallbackVoiceId) + assertEquals("voice-1", resolved.defaultVoiceId) + assertEquals("voice-1", resolved.currentVoiceId) + assertEquals("First", resolved.selectedVoiceName) + } + + @Test + fun preservesCurrentVoiceWhenOverrideIsActive() = + runBlocking { + val resolved = + TalkModeVoiceResolver.resolveVoiceId( + preferred = null, + fallbackVoiceId = null, + defaultVoiceId = null, + currentVoiceId = null, + voiceOverrideActive = true, + listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) }, + ) + + assertEquals("voice-1", resolved.voiceId) + assertNull(resolved.currentVoiceId) + } +}