mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
refactor: split android talk voice resolution
This commit is contained in:
@@ -813,7 +813,7 @@ class TalkModeManager(
|
||||
_lastAssistantText.value = cleaned
|
||||
|
||||
val requestedVoice = directive?.voiceId?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val resolvedVoice = resolveVoiceAlias(requestedVoice)
|
||||
val resolvedVoice = TalkModeVoiceResolver.resolveVoiceAlias(requestedVoice, voiceAliases)
|
||||
if (requestedVoice != null && resolvedVoice == null) {
|
||||
Log.w(tag, "unknown voice alias: $requestedVoice")
|
||||
}
|
||||
@@ -836,12 +836,35 @@ class TalkModeManager(
|
||||
apiKey?.trim()?.takeIf { it.isNotEmpty() }
|
||||
?: System.getenv("ELEVENLABS_API_KEY")?.trim()
|
||||
val preferredVoice = resolvedVoice ?: currentVoiceId ?: defaultVoiceId
|
||||
val voiceId =
|
||||
val resolvedPlaybackVoice =
|
||||
if (!apiKey.isNullOrEmpty()) {
|
||||
resolveVoiceId(preferredVoice, apiKey)
|
||||
try {
|
||||
TalkModeVoiceResolver.resolveVoiceId(
|
||||
preferred = preferredVoice,
|
||||
fallbackVoiceId = fallbackVoiceId,
|
||||
defaultVoiceId = defaultVoiceId,
|
||||
currentVoiceId = currentVoiceId,
|
||||
voiceOverrideActive = voiceOverrideActive,
|
||||
listVoices = { TalkModeVoiceResolver.listVoices(apiKey, json) },
|
||||
)
|
||||
} catch (err: Throwable) {
|
||||
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
|
||||
null
|
||||
}
|
||||
} else {
|
||||
null
|
||||
}
|
||||
resolvedPlaybackVoice?.let { resolved ->
|
||||
fallbackVoiceId = resolved.fallbackVoiceId
|
||||
defaultVoiceId = resolved.defaultVoiceId
|
||||
currentVoiceId = resolved.currentVoiceId
|
||||
resolved.selectedVoiceName?.let { name ->
|
||||
resolved.voiceId?.let { voiceId ->
|
||||
Log.d(tag, "default voice selected $name ($voiceId)")
|
||||
}
|
||||
}
|
||||
}
|
||||
val voiceId = resolvedPlaybackVoice?.voiceId
|
||||
|
||||
_statusText.value = "Speaking…"
|
||||
_isSpeaking.value = true
|
||||
@@ -1703,82 +1726,6 @@ class TalkModeManager(
|
||||
}
|
||||
}
|
||||
|
||||
private fun resolveVoiceAlias(value: String?): String? {
|
||||
val trimmed = value?.trim().orEmpty()
|
||||
if (trimmed.isEmpty()) return null
|
||||
val normalized = normalizeAliasKey(trimmed)
|
||||
voiceAliases[normalized]?.let { return it }
|
||||
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
|
||||
return if (isLikelyVoiceId(trimmed)) trimmed else null
|
||||
}
|
||||
|
||||
private suspend fun resolveVoiceId(preferred: String?, apiKey: String): String? {
|
||||
val trimmed = preferred?.trim().orEmpty()
|
||||
if (trimmed.isNotEmpty()) {
|
||||
val resolved = resolveVoiceAlias(trimmed)
|
||||
// If it resolves as an alias, use the alias target.
|
||||
// Otherwise treat it as a direct voice ID (e.g. "21m00Tcm4TlvDq8ikWAM").
|
||||
return resolved ?: trimmed
|
||||
}
|
||||
fallbackVoiceId?.let { return it }
|
||||
|
||||
return try {
|
||||
val voices = listVoices(apiKey)
|
||||
val first = voices.firstOrNull() ?: return null
|
||||
fallbackVoiceId = first.voiceId
|
||||
if (defaultVoiceId.isNullOrBlank()) {
|
||||
defaultVoiceId = first.voiceId
|
||||
}
|
||||
if (!voiceOverrideActive) {
|
||||
currentVoiceId = first.voiceId
|
||||
}
|
||||
val name = first.name ?: "unknown"
|
||||
Log.d(tag, "default voice selected $name (${first.voiceId})")
|
||||
first.voiceId
|
||||
} catch (err: Throwable) {
|
||||
Log.w(tag, "list voices failed: ${err.message ?: err::class.simpleName}")
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun listVoices(apiKey: String): List<ElevenLabsVoice> {
|
||||
return withContext(Dispatchers.IO) {
|
||||
val url = URL("https://api.elevenlabs.io/v1/voices")
|
||||
val conn = url.openConnection() as HttpURLConnection
|
||||
conn.requestMethod = "GET"
|
||||
conn.connectTimeout = 15_000
|
||||
conn.readTimeout = 15_000
|
||||
conn.setRequestProperty("xi-api-key", apiKey)
|
||||
|
||||
val code = conn.responseCode
|
||||
val stream = if (code >= 400) conn.errorStream else conn.inputStream
|
||||
val data = stream.readBytes()
|
||||
if (code >= 400) {
|
||||
val message = data.toString(Charsets.UTF_8)
|
||||
throw IllegalStateException("ElevenLabs voices failed: $code $message")
|
||||
}
|
||||
|
||||
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
|
||||
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
|
||||
voices.mapNotNull { entry ->
|
||||
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
|
||||
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
|
||||
val name = obj["name"].asStringOrNull()
|
||||
ElevenLabsVoice(voiceId, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun isLikelyVoiceId(value: String): Boolean {
|
||||
if (value.length < 10) return false
|
||||
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
|
||||
}
|
||||
|
||||
private fun normalizeAliasKey(value: String): String =
|
||||
value.trim().lowercase()
|
||||
|
||||
private data class ElevenLabsVoice(val voiceId: String, val name: String?)
|
||||
|
||||
private val listener =
|
||||
object : RecognitionListener {
|
||||
override fun onReadyForSpeech(params: Bundle?) {
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import java.net.HttpURLConnection
|
||||
import java.net.URL
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.withContext
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
|
||||
internal data class ElevenLabsVoice(val voiceId: String, val name: String?)
|
||||
|
||||
internal data class TalkModeResolvedVoice(
|
||||
val voiceId: String?,
|
||||
val fallbackVoiceId: String?,
|
||||
val defaultVoiceId: String?,
|
||||
val currentVoiceId: String?,
|
||||
val selectedVoiceName: String? = null,
|
||||
)
|
||||
|
||||
internal object TalkModeVoiceResolver {
|
||||
fun resolveVoiceAlias(value: String?, voiceAliases: Map<String, String>): String? {
|
||||
val trimmed = value?.trim().orEmpty()
|
||||
if (trimmed.isEmpty()) return null
|
||||
val normalized = normalizeAliasKey(trimmed)
|
||||
voiceAliases[normalized]?.let { return it }
|
||||
if (voiceAliases.values.any { it.equals(trimmed, ignoreCase = true) }) return trimmed
|
||||
return if (isLikelyVoiceId(trimmed)) trimmed else null
|
||||
}
|
||||
|
||||
suspend fun resolveVoiceId(
|
||||
preferred: String?,
|
||||
fallbackVoiceId: String?,
|
||||
defaultVoiceId: String?,
|
||||
currentVoiceId: String?,
|
||||
voiceOverrideActive: Boolean,
|
||||
listVoices: suspend () -> List<ElevenLabsVoice>,
|
||||
): TalkModeResolvedVoice {
|
||||
val trimmed = preferred?.trim().orEmpty()
|
||||
if (trimmed.isNotEmpty()) {
|
||||
return TalkModeResolvedVoice(
|
||||
voiceId = trimmed,
|
||||
fallbackVoiceId = fallbackVoiceId,
|
||||
defaultVoiceId = defaultVoiceId,
|
||||
currentVoiceId = currentVoiceId,
|
||||
)
|
||||
}
|
||||
if (!fallbackVoiceId.isNullOrBlank()) {
|
||||
return TalkModeResolvedVoice(
|
||||
voiceId = fallbackVoiceId,
|
||||
fallbackVoiceId = fallbackVoiceId,
|
||||
defaultVoiceId = defaultVoiceId,
|
||||
currentVoiceId = currentVoiceId,
|
||||
)
|
||||
}
|
||||
|
||||
val first = listVoices().firstOrNull()
|
||||
if (first == null) {
|
||||
return TalkModeResolvedVoice(
|
||||
voiceId = null,
|
||||
fallbackVoiceId = fallbackVoiceId,
|
||||
defaultVoiceId = defaultVoiceId,
|
||||
currentVoiceId = currentVoiceId,
|
||||
)
|
||||
}
|
||||
|
||||
return TalkModeResolvedVoice(
|
||||
voiceId = first.voiceId,
|
||||
fallbackVoiceId = first.voiceId,
|
||||
defaultVoiceId = if (defaultVoiceId.isNullOrBlank()) first.voiceId else defaultVoiceId,
|
||||
currentVoiceId = if (voiceOverrideActive) currentVoiceId else first.voiceId,
|
||||
selectedVoiceName = first.name,
|
||||
)
|
||||
}
|
||||
|
||||
suspend fun listVoices(apiKey: String, json: Json): List<ElevenLabsVoice> {
|
||||
return withContext(Dispatchers.IO) {
|
||||
val url = URL("https://api.elevenlabs.io/v1/voices")
|
||||
val conn = url.openConnection() as HttpURLConnection
|
||||
conn.requestMethod = "GET"
|
||||
conn.connectTimeout = 15_000
|
||||
conn.readTimeout = 15_000
|
||||
conn.setRequestProperty("xi-api-key", apiKey)
|
||||
|
||||
val code = conn.responseCode
|
||||
val stream = if (code >= 400) conn.errorStream else conn.inputStream
|
||||
val data = stream.readBytes()
|
||||
if (code >= 400) {
|
||||
val message = data.toString(Charsets.UTF_8)
|
||||
throw IllegalStateException("ElevenLabs voices failed: $code $message")
|
||||
}
|
||||
|
||||
val root = json.parseToJsonElement(data.toString(Charsets.UTF_8)).asObjectOrNull()
|
||||
val voices = (root?.get("voices") as? JsonArray) ?: JsonArray(emptyList())
|
||||
voices.mapNotNull { entry ->
|
||||
val obj = entry.asObjectOrNull() ?: return@mapNotNull null
|
||||
val voiceId = obj["voice_id"].asStringOrNull() ?: return@mapNotNull null
|
||||
val name = obj["name"].asStringOrNull()
|
||||
ElevenLabsVoice(voiceId, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun isLikelyVoiceId(value: String): Boolean {
|
||||
if (value.length < 10) return false
|
||||
return value.all { it.isLetterOrDigit() || it == '-' || it == '_' }
|
||||
}
|
||||
|
||||
private fun normalizeAliasKey(value: String): String =
|
||||
value.trim().lowercase()
|
||||
}
|
||||
|
||||
private fun JsonElement?.asObjectOrNull(): JsonObject? = this as? JsonObject
|
||||
|
||||
private fun JsonElement?.asStringOrNull(): String? =
|
||||
(this as? JsonPrimitive)?.takeIf { it.isString }?.content
|
||||
@@ -0,0 +1,92 @@
|
||||
package ai.openclaw.app.voice
|
||||
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertNull
|
||||
import org.junit.Test
|
||||
|
||||
class TalkModeVoiceResolverTest {
|
||||
@Test
|
||||
fun resolvesVoiceAliasCaseInsensitively() {
|
||||
val resolved =
|
||||
TalkModeVoiceResolver.resolveVoiceAlias(
|
||||
" Clawd ",
|
||||
mapOf("clawd" to "voice-123"),
|
||||
)
|
||||
|
||||
assertEquals("voice-123", resolved)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun acceptsDirectVoiceIds() {
|
||||
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("21m00Tcm4TlvDq8ikWAM", emptyMap())
|
||||
|
||||
assertEquals("21m00Tcm4TlvDq8ikWAM", resolved)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun rejectsUnknownAliases() {
|
||||
val resolved = TalkModeVoiceResolver.resolveVoiceAlias("nickname", emptyMap())
|
||||
|
||||
assertNull(resolved)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun reusesCachedFallbackVoiceBeforeFetchingCatalog() =
|
||||
runBlocking {
|
||||
var fetchCount = 0
|
||||
|
||||
val resolved =
|
||||
TalkModeVoiceResolver.resolveVoiceId(
|
||||
preferred = null,
|
||||
fallbackVoiceId = "cached-voice",
|
||||
defaultVoiceId = null,
|
||||
currentVoiceId = null,
|
||||
voiceOverrideActive = false,
|
||||
listVoices = {
|
||||
fetchCount += 1
|
||||
emptyList()
|
||||
},
|
||||
)
|
||||
|
||||
assertEquals("cached-voice", resolved.voiceId)
|
||||
assertEquals(0, fetchCount)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun seedsDefaultVoiceFromCatalogWhenNeeded() =
|
||||
runBlocking {
|
||||
val resolved =
|
||||
TalkModeVoiceResolver.resolveVoiceId(
|
||||
preferred = null,
|
||||
fallbackVoiceId = null,
|
||||
defaultVoiceId = null,
|
||||
currentVoiceId = null,
|
||||
voiceOverrideActive = false,
|
||||
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
|
||||
)
|
||||
|
||||
assertEquals("voice-1", resolved.voiceId)
|
||||
assertEquals("voice-1", resolved.fallbackVoiceId)
|
||||
assertEquals("voice-1", resolved.defaultVoiceId)
|
||||
assertEquals("voice-1", resolved.currentVoiceId)
|
||||
assertEquals("First", resolved.selectedVoiceName)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun preservesCurrentVoiceWhenOverrideIsActive() =
|
||||
runBlocking {
|
||||
val resolved =
|
||||
TalkModeVoiceResolver.resolveVoiceId(
|
||||
preferred = null,
|
||||
fallbackVoiceId = null,
|
||||
defaultVoiceId = null,
|
||||
currentVoiceId = null,
|
||||
voiceOverrideActive = true,
|
||||
listVoices = { listOf(ElevenLabsVoice("voice-1", "First")) },
|
||||
)
|
||||
|
||||
assertEquals("voice-1", resolved.voiceId)
|
||||
assertNull(resolved.currentVoiceId)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user