talk: add configurable silence timeout

This commit is contained in:
dano does design
2026-03-08 17:58:15 +11:00
committed by Peter Steinberger
parent 097c588a6b
commit 6ff7e8f42e
18 changed files with 162 additions and 9 deletions

View File

@@ -59,8 +59,8 @@ class TalkModeManager(
private const val tag = "TalkMode"
private const val defaultModelIdFallback = "eleven_v3"
private const val defaultOutputFormatFallback = "pcm_24000"
private const val defaultTalkProvider = "elevenlabs"
private const val silenceWindowMs = 500L
private const val defaultTalkProvider = "elevenlabs"
private const val defaultSilenceTimeoutMs = 700L
private const val listenWatchdogMs = 12_000L
private const val chatFinalWaitWithSubscribeMs = 45_000L
private const val chatFinalWaitWithoutSubscribeMs = 6_000L
@@ -105,6 +105,14 @@ private const val defaultTalkProvider = "elevenlabs"
normalizedPayload = false,
)
}
internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
val timeout = talk?.get("silenceTimeoutMs").asDoubleOrNull() ?: return defaultSilenceTimeoutMs
if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) {
return defaultSilenceTimeoutMs
}
return timeout.toLong()
}
}
private val mainHandler = Handler(Looper.getMainLooper())
@@ -134,7 +142,7 @@ private const val defaultTalkProvider = "elevenlabs"
private var listeningMode = false
private var silenceJob: Job? = null
private val silenceWindowMs = 700L
private var silenceWindowMs = defaultSilenceTimeoutMs
private var lastTranscript: String = ""
private var lastHeardAtMs: Long? = null
private var lastSpokenText: String? = null
@@ -1411,6 +1419,7 @@ private const val defaultTalkProvider = "elevenlabs"
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
if (!isCanonicalMainSessionKey(mainSessionKey)) {
mainSessionKey = mainKey
@@ -1427,7 +1436,11 @@ private const val defaultTalkProvider = "elevenlabs"
if (!modelOverrideActive) currentModelId = defaultModelId
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
Log.d(tag, "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId")
silenceWindowMs = silenceTimeoutMs
Log.d(
tag,
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs",
)
if (interrupt != null) interruptOnSpeech = interrupt
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
if (!activeProviderIsElevenLabs) {
@@ -1441,6 +1454,7 @@ private const val defaultTalkProvider = "elevenlabs"
}
configLoaded = true
} catch (_: Throwable) {
silenceWindowMs = defaultSilenceTimeoutMs
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
defaultModelId = defaultModelIdFallback
if (!modelOverrideActive) currentModelId = defaultModelId

View File

@@ -54,4 +54,23 @@ class TalkModeConfigParsingTest {
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
}
@Test
fun readsConfiguredSilenceTimeoutMs() {
val talk = buildJsonObject { put("silenceTimeoutMs", 1500) }
assertEquals(1500L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
}
@Test
fun defaultsSilenceTimeoutMsWhenMissing() {
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(null))
}
@Test
fun defaultsSilenceTimeoutMsWhenInvalid() {
val talk = buildJsonObject { put("silenceTimeoutMs", 0) }
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
}
}