talk: add configurable silence timeout

This commit is contained in:
dano does design
2026-03-08 17:58:15 +11:00
committed by Peter Steinberger
parent 097c588a6b
commit 6ff7e8f42e
18 changed files with 162 additions and 9 deletions

View File

@@ -12,6 +12,7 @@ actor TalkModeRuntime {
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = 700
private final class RMSMeter: @unchecked Sendable {
private let lock = NSLock()
@@ -66,7 +67,7 @@ actor TalkModeRuntime {
private var fallbackVoiceId: String?
private var lastPlaybackWasPCM: Bool = false
private let silenceWindow: TimeInterval = 0.7
private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
private let minSpeechRMS: Double = 1e-3
private let speechBoostFactor: Double = 6.0
@@ -783,6 +784,7 @@ extension TalkModeRuntime {
}
self.defaultOutputFormat = cfg.outputFormat
self.interruptOnSpeech = cfg.interruptOnSpeech
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
self.apiKey = cfg.apiKey
let hasApiKey = (cfg.apiKey?.isEmpty == false)
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
@@ -792,7 +794,8 @@ extension TalkModeRuntime {
"talk config voiceId=\(voiceLabel, privacy: .public) " +
"modelId=\(modelLabel, privacy: .public) " +
"apiKey=\(hasApiKey, privacy: .public) " +
"interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
}
private struct TalkRuntimeConfig {
@@ -801,6 +804,7 @@ extension TalkModeRuntime {
let modelId: String?
let outputFormat: String?
let interruptOnSpeech: Bool
let silenceTimeoutMs: Int
let apiKey: String?
}
@@ -880,6 +884,21 @@ extension TalkModeRuntime {
normalizedPayload: false)
}
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
return timeout
}
if
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
timeout > 0,
timeout.rounded(.towardZero) == timeout,
timeout <= Double(Int.max)
{
return Int(timeout)
}
return Self.defaultSilenceTimeoutMs
}
private func fetchTalkConfig() async -> TalkRuntimeConfig {
let env = ProcessInfo.processInfo.environment
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -895,6 +914,7 @@ extension TalkModeRuntime {
let selection = Self.selectTalkProviderConfig(talk)
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
let activeConfig = selection?.config
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
let ui = snap.config?["ui"]?.dictionaryValue
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
await MainActor.run {
@@ -939,6 +959,7 @@ extension TalkModeRuntime {
modelId: resolvedModel,
outputFormat: outputFormat,
interruptOnSpeech: interrupt ?? true,
silenceTimeoutMs: silenceTimeoutMs,
apiKey: resolvedApiKey)
} catch {
let resolvedVoice =
@@ -951,6 +972,7 @@ extension TalkModeRuntime {
modelId: Self.defaultModelIdFallback,
outputFormat: nil,
interruptOnSpeech: true,
silenceTimeoutMs: Self.defaultSilenceTimeoutMs,
apiKey: resolvedApiKey)
}
}

View File

@@ -32,4 +32,24 @@ struct TalkModeConfigParsingTests {
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
}
@Test func readsConfiguredSilenceTimeoutMs() {
let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(1500),
]
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
}
@Test func defaultsSilenceTimeoutMsWhenMissing() {
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700)
}
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(0),
]
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700)
}
}