mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
talk: add configurable silence timeout
This commit is contained in:
committed by
Peter Steinberger
parent
097c588a6b
commit
6ff7e8f42e
@@ -12,6 +12,7 @@ actor TalkModeRuntime {
|
||||
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
|
||||
private static let defaultModelIdFallback = "eleven_v3"
|
||||
private static let defaultTalkProvider = "elevenlabs"
|
||||
private static let defaultSilenceTimeoutMs = 700
|
||||
|
||||
private final class RMSMeter: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
@@ -66,7 +67,7 @@ actor TalkModeRuntime {
|
||||
private var fallbackVoiceId: String?
|
||||
private var lastPlaybackWasPCM: Bool = false
|
||||
|
||||
private let silenceWindow: TimeInterval = 0.7
|
||||
private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
|
||||
private let minSpeechRMS: Double = 1e-3
|
||||
private let speechBoostFactor: Double = 6.0
|
||||
|
||||
@@ -783,6 +784,7 @@ extension TalkModeRuntime {
|
||||
}
|
||||
self.defaultOutputFormat = cfg.outputFormat
|
||||
self.interruptOnSpeech = cfg.interruptOnSpeech
|
||||
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
|
||||
self.apiKey = cfg.apiKey
|
||||
let hasApiKey = (cfg.apiKey?.isEmpty == false)
|
||||
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
|
||||
@@ -792,7 +794,8 @@ extension TalkModeRuntime {
|
||||
"talk config voiceId=\(voiceLabel, privacy: .public) " +
|
||||
"modelId=\(modelLabel, privacy: .public) " +
|
||||
"apiKey=\(hasApiKey, privacy: .public) " +
|
||||
"interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
|
||||
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
|
||||
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
|
||||
}
|
||||
|
||||
private struct TalkRuntimeConfig {
|
||||
@@ -801,6 +804,7 @@ extension TalkModeRuntime {
|
||||
let modelId: String?
|
||||
let outputFormat: String?
|
||||
let interruptOnSpeech: Bool
|
||||
let silenceTimeoutMs: Int
|
||||
let apiKey: String?
|
||||
}
|
||||
|
||||
@@ -880,6 +884,21 @@ extension TalkModeRuntime {
|
||||
normalizedPayload: false)
|
||||
}
|
||||
|
||||
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
|
||||
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
|
||||
return timeout
|
||||
}
|
||||
if
|
||||
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
|
||||
timeout > 0,
|
||||
timeout.rounded(.towardZero) == timeout,
|
||||
timeout <= Double(Int.max)
|
||||
{
|
||||
return Int(timeout)
|
||||
}
|
||||
return Self.defaultSilenceTimeoutMs
|
||||
}
|
||||
|
||||
private func fetchTalkConfig() async -> TalkRuntimeConfig {
|
||||
let env = ProcessInfo.processInfo.environment
|
||||
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
@@ -895,6 +914,7 @@ extension TalkModeRuntime {
|
||||
let selection = Self.selectTalkProviderConfig(talk)
|
||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||
let activeConfig = selection?.config
|
||||
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
|
||||
let ui = snap.config?["ui"]?.dictionaryValue
|
||||
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
await MainActor.run {
|
||||
@@ -939,6 +959,7 @@ extension TalkModeRuntime {
|
||||
modelId: resolvedModel,
|
||||
outputFormat: outputFormat,
|
||||
interruptOnSpeech: interrupt ?? true,
|
||||
silenceTimeoutMs: silenceTimeoutMs,
|
||||
apiKey: resolvedApiKey)
|
||||
} catch {
|
||||
let resolvedVoice =
|
||||
@@ -951,6 +972,7 @@ extension TalkModeRuntime {
|
||||
modelId: Self.defaultModelIdFallback,
|
||||
outputFormat: nil,
|
||||
interruptOnSpeech: true,
|
||||
silenceTimeoutMs: Self.defaultSilenceTimeoutMs,
|
||||
apiKey: resolvedApiKey)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,4 +32,24 @@ struct TalkModeConfigParsingTests {
|
||||
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
|
||||
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
|
||||
}
|
||||
|
||||
@Test func readsConfiguredSilenceTimeoutMs() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"silenceTimeoutMs": AnyCodable(1500),
|
||||
]
|
||||
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenMissing() {
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700)
|
||||
}
|
||||
|
||||
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"silenceTimeoutMs": AnyCodable(0),
|
||||
]
|
||||
|
||||
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user