mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
talk: add configurable silence timeout
This commit is contained in:
committed by
Peter Steinberger
parent
097c588a6b
commit
6ff7e8f42e
@@ -59,8 +59,8 @@ class TalkModeManager(
|
||||
private const val tag = "TalkMode"
|
||||
private const val defaultModelIdFallback = "eleven_v3"
|
||||
private const val defaultOutputFormatFallback = "pcm_24000"
|
||||
private const val defaultTalkProvider = "elevenlabs"
|
||||
private const val silenceWindowMs = 500L
|
||||
private const val defaultTalkProvider = "elevenlabs"
|
||||
private const val defaultSilenceTimeoutMs = 700L
|
||||
private const val listenWatchdogMs = 12_000L
|
||||
private const val chatFinalWaitWithSubscribeMs = 45_000L
|
||||
private const val chatFinalWaitWithoutSubscribeMs = 6_000L
|
||||
@@ -105,6 +105,14 @@ private const val defaultTalkProvider = "elevenlabs"
|
||||
normalizedPayload = false,
|
||||
)
|
||||
}
|
||||
|
||||
internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
|
||||
val timeout = talk?.get("silenceTimeoutMs").asDoubleOrNull() ?: return defaultSilenceTimeoutMs
|
||||
if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) {
|
||||
return defaultSilenceTimeoutMs
|
||||
}
|
||||
return timeout.toLong()
|
||||
}
|
||||
}
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
@@ -134,7 +142,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
||||
private var listeningMode = false
|
||||
|
||||
private var silenceJob: Job? = null
|
||||
private val silenceWindowMs = 700L
|
||||
private var silenceWindowMs = defaultSilenceTimeoutMs
|
||||
private var lastTranscript: String = ""
|
||||
private var lastHeardAtMs: Long? = null
|
||||
private var lastSpokenText: String? = null
|
||||
@@ -1411,6 +1419,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
||||
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
||||
val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
|
||||
|
||||
if (!isCanonicalMainSessionKey(mainSessionKey)) {
|
||||
mainSessionKey = mainKey
|
||||
@@ -1427,7 +1436,11 @@ private const val defaultTalkProvider = "elevenlabs"
|
||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
|
||||
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
|
||||
Log.d(tag, "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId")
|
||||
silenceWindowMs = silenceTimeoutMs
|
||||
Log.d(
|
||||
tag,
|
||||
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs",
|
||||
)
|
||||
if (interrupt != null) interruptOnSpeech = interrupt
|
||||
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
|
||||
if (!activeProviderIsElevenLabs) {
|
||||
@@ -1441,6 +1454,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
||||
}
|
||||
configLoaded = true
|
||||
} catch (_: Throwable) {
|
||||
silenceWindowMs = defaultSilenceTimeoutMs
|
||||
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
defaultModelId = defaultModelIdFallback
|
||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||
|
||||
@@ -54,4 +54,23 @@ class TalkModeConfigParsingTest {
|
||||
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
|
||||
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readsConfiguredSilenceTimeoutMs() {
|
||||
val talk = buildJsonObject { put("silenceTimeoutMs", 1500) }
|
||||
|
||||
assertEquals(1500L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun defaultsSilenceTimeoutMsWhenMissing() {
|
||||
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(null))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun defaultsSilenceTimeoutMsWhenInvalid() {
|
||||
val talk = buildJsonObject { put("silenceTimeoutMs", 0) }
|
||||
|
||||
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ final class TalkModeManager: NSObject {
|
||||
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
|
||||
private static let defaultModelIdFallback = "eleven_v3"
|
||||
private static let defaultTalkProvider = "elevenlabs"
|
||||
private static let defaultSilenceTimeoutMs = 900
|
||||
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
|
||||
var isEnabled: Bool = false
|
||||
var isListening: Bool = false
|
||||
@@ -97,7 +98,7 @@ final class TalkModeManager: NSObject {
|
||||
|
||||
private var gateway: GatewayNodeSession?
|
||||
private var gatewayConnected = false
|
||||
private let silenceWindow: TimeInterval = 0.9
|
||||
private var silenceWindow: TimeInterval = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
|
||||
private var lastAudioActivity: Date?
|
||||
private var noiseFloorSamples: [Double] = []
|
||||
private var noiseFloor: Double?
|
||||
@@ -2001,6 +2002,24 @@ extension TalkModeManager {
|
||||
config: normalizedProviders[providerID] ?? [:])
|
||||
}
|
||||
|
||||
static func resolvedSilenceTimeoutMs(_ talk: [String: Any]?) -> Int {
|
||||
switch talk?["silenceTimeoutMs"] {
|
||||
case let timeout as Int where timeout > 0:
|
||||
return timeout
|
||||
case let timeout as Double
|
||||
where timeout > 0 && timeout.rounded(.towardZero) == timeout && timeout <= Double(Int.max):
|
||||
return Int(timeout)
|
||||
case let timeout as NSNumber:
|
||||
let value = timeout.doubleValue
|
||||
if value > 0 && value.rounded(.towardZero) == value && value <= Double(Int.max) {
|
||||
return Int(value)
|
||||
}
|
||||
return Self.defaultSilenceTimeoutMs
|
||||
default:
|
||||
return Self.defaultSilenceTimeoutMs
|
||||
}
|
||||
}
|
||||
|
||||
func reloadConfig() async {
|
||||
guard let gateway else { return }
|
||||
self.pcmFormatUnavailable = false
|
||||
@@ -2020,6 +2039,7 @@ extension TalkModeManager {
|
||||
}
|
||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||
let activeConfig = selection?.config
|
||||
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
|
||||
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
|
||||
@@ -2067,8 +2087,9 @@ extension TalkModeManager {
|
||||
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
|
||||
self.interruptOnSpeech = interrupt
|
||||
}
|
||||
self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000
|
||||
if selection != nil {
|
||||
GatewayDiagnostics.log("talk config provider=\(activeProvider)")
|
||||
GatewayDiagnostics.log("talk config provider=\(activeProvider) silenceTimeoutMs=\(silenceTimeoutMs)")
|
||||
}
|
||||
} catch {
|
||||
self.defaultModelId = Self.defaultModelIdFallback
|
||||
@@ -2079,6 +2100,7 @@ extension TalkModeManager {
|
||||
self.gatewayTalkDefaultModelId = nil
|
||||
self.gatewayTalkApiKeyConfigured = false
|
||||
self.gatewayTalkConfigLoaded = false
|
||||
self.silenceWindow = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,4 +47,24 @@ import Testing
|
||||
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
|
||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
|
||||
}
|
||||
|
||||
@Test func readsConfiguredSilenceTimeoutMs() {
|
||||
let talk: [String: Any] = [
|
||||
"silenceTimeoutMs": 1500,
|
||||
]
|
||||
|
||||
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 1500)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenMissing() {
|
||||
#expect(TalkModeManager.resolvedSilenceTimeoutMs(nil) == 900)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
|
||||
let talk: [String: Any] = [
|
||||
"silenceTimeoutMs": 0,
|
||||
]
|
||||
|
||||
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ actor TalkModeRuntime {
|
||||
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
|
||||
private static let defaultModelIdFallback = "eleven_v3"
|
||||
private static let defaultTalkProvider = "elevenlabs"
|
||||
private static let defaultSilenceTimeoutMs = 700
|
||||
|
||||
private final class RMSMeter: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
@@ -66,7 +67,7 @@ actor TalkModeRuntime {
|
||||
private var fallbackVoiceId: String?
|
||||
private var lastPlaybackWasPCM: Bool = false
|
||||
|
||||
private let silenceWindow: TimeInterval = 0.7
|
||||
private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
|
||||
private let minSpeechRMS: Double = 1e-3
|
||||
private let speechBoostFactor: Double = 6.0
|
||||
|
||||
@@ -783,6 +784,7 @@ extension TalkModeRuntime {
|
||||
}
|
||||
self.defaultOutputFormat = cfg.outputFormat
|
||||
self.interruptOnSpeech = cfg.interruptOnSpeech
|
||||
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
|
||||
self.apiKey = cfg.apiKey
|
||||
let hasApiKey = (cfg.apiKey?.isEmpty == false)
|
||||
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
|
||||
@@ -792,7 +794,8 @@ extension TalkModeRuntime {
|
||||
"talk config voiceId=\(voiceLabel, privacy: .public) " +
|
||||
"modelId=\(modelLabel, privacy: .public) " +
|
||||
"apiKey=\(hasApiKey, privacy: .public) " +
|
||||
"interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
|
||||
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
|
||||
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
|
||||
}
|
||||
|
||||
private struct TalkRuntimeConfig {
|
||||
@@ -801,6 +804,7 @@ extension TalkModeRuntime {
|
||||
let modelId: String?
|
||||
let outputFormat: String?
|
||||
let interruptOnSpeech: Bool
|
||||
let silenceTimeoutMs: Int
|
||||
let apiKey: String?
|
||||
}
|
||||
|
||||
@@ -880,6 +884,21 @@ extension TalkModeRuntime {
|
||||
normalizedPayload: false)
|
||||
}
|
||||
|
||||
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
|
||||
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
|
||||
return timeout
|
||||
}
|
||||
if
|
||||
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
|
||||
timeout > 0,
|
||||
timeout.rounded(.towardZero) == timeout,
|
||||
timeout <= Double(Int.max)
|
||||
{
|
||||
return Int(timeout)
|
||||
}
|
||||
return Self.defaultSilenceTimeoutMs
|
||||
}
|
||||
|
||||
private func fetchTalkConfig() async -> TalkRuntimeConfig {
|
||||
let env = ProcessInfo.processInfo.environment
|
||||
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
@@ -895,6 +914,7 @@ extension TalkModeRuntime {
|
||||
let selection = Self.selectTalkProviderConfig(talk)
|
||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||
let activeConfig = selection?.config
|
||||
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
|
||||
let ui = snap.config?["ui"]?.dictionaryValue
|
||||
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
await MainActor.run {
|
||||
@@ -939,6 +959,7 @@ extension TalkModeRuntime {
|
||||
modelId: resolvedModel,
|
||||
outputFormat: outputFormat,
|
||||
interruptOnSpeech: interrupt ?? true,
|
||||
silenceTimeoutMs: silenceTimeoutMs,
|
||||
apiKey: resolvedApiKey)
|
||||
} catch {
|
||||
let resolvedVoice =
|
||||
@@ -951,6 +972,7 @@ extension TalkModeRuntime {
|
||||
modelId: Self.defaultModelIdFallback,
|
||||
outputFormat: nil,
|
||||
interruptOnSpeech: true,
|
||||
silenceTimeoutMs: Self.defaultSilenceTimeoutMs,
|
||||
apiKey: resolvedApiKey)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,4 +32,24 @@ struct TalkModeConfigParsingTests {
|
||||
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
|
||||
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
|
||||
}
|
||||
|
||||
@Test func readsConfiguredSilenceTimeoutMs() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"silenceTimeoutMs": AnyCodable(1500),
|
||||
]
|
||||
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenMissing() {
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"silenceTimeoutMs": AnyCodable(0),
|
||||
]
|
||||
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user