mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
talk: add configurable silence timeout
This commit is contained in:
committed by
Peter Steinberger
parent
097c588a6b
commit
6ff7e8f42e
@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
- TUI: infer the active agent from the current workspace when launched inside a configured agent workspace, while preserving explicit `agent:` session targets. (#39591) thanks @arceus77-7.
|
- TUI: infer the active agent from the current workspace when launched inside a configured agent workspace, while preserving explicit `agent:` session targets. (#39591) thanks @arceus77-7.
|
||||||
- Tools/Brave web search: add opt-in `tools.web.search.brave.mode: "llm-context"` so `web_search` can call Brave's LLM Context endpoint and return extracted grounding snippets with source metadata, plus config/docs/test coverage. (#33383) Thanks @thirumaleshp.
|
- Tools/Brave web search: add opt-in `tools.web.search.brave.mode: "llm-context"` so `web_search` can call Brave's LLM Context endpoint and return extracted grounding snippets with source metadata, plus config/docs/test coverage. (#33383) Thanks @thirumaleshp.
|
||||||
|
- Talk mode: add top-level `talk.silenceTimeoutMs` config so Talk waits a configurable amount of silence before auto-sending the current transcript, while keeping each platform's existing default pause window when unset. (#39607) Thanks @danodoesdesign. Fixes #17147.
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
|||||||
@@ -59,8 +59,8 @@ class TalkModeManager(
|
|||||||
private const val tag = "TalkMode"
|
private const val tag = "TalkMode"
|
||||||
private const val defaultModelIdFallback = "eleven_v3"
|
private const val defaultModelIdFallback = "eleven_v3"
|
||||||
private const val defaultOutputFormatFallback = "pcm_24000"
|
private const val defaultOutputFormatFallback = "pcm_24000"
|
||||||
private const val defaultTalkProvider = "elevenlabs"
|
private const val defaultTalkProvider = "elevenlabs"
|
||||||
private const val silenceWindowMs = 500L
|
private const val defaultSilenceTimeoutMs = 700L
|
||||||
private const val listenWatchdogMs = 12_000L
|
private const val listenWatchdogMs = 12_000L
|
||||||
private const val chatFinalWaitWithSubscribeMs = 45_000L
|
private const val chatFinalWaitWithSubscribeMs = 45_000L
|
||||||
private const val chatFinalWaitWithoutSubscribeMs = 6_000L
|
private const val chatFinalWaitWithoutSubscribeMs = 6_000L
|
||||||
@@ -105,6 +105,14 @@ private const val defaultTalkProvider = "elevenlabs"
|
|||||||
normalizedPayload = false,
|
normalizedPayload = false,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
|
||||||
|
val timeout = talk?.get("silenceTimeoutMs").asDoubleOrNull() ?: return defaultSilenceTimeoutMs
|
||||||
|
if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) {
|
||||||
|
return defaultSilenceTimeoutMs
|
||||||
|
}
|
||||||
|
return timeout.toLong()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private val mainHandler = Handler(Looper.getMainLooper())
|
private val mainHandler = Handler(Looper.getMainLooper())
|
||||||
@@ -134,7 +142,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
|||||||
private var listeningMode = false
|
private var listeningMode = false
|
||||||
|
|
||||||
private var silenceJob: Job? = null
|
private var silenceJob: Job? = null
|
||||||
private val silenceWindowMs = 700L
|
private var silenceWindowMs = defaultSilenceTimeoutMs
|
||||||
private var lastTranscript: String = ""
|
private var lastTranscript: String = ""
|
||||||
private var lastHeardAtMs: Long? = null
|
private var lastHeardAtMs: Long? = null
|
||||||
private var lastSpokenText: String? = null
|
private var lastSpokenText: String? = null
|
||||||
@@ -1411,6 +1419,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
|||||||
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||||
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||||
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
||||||
|
val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
|
||||||
|
|
||||||
if (!isCanonicalMainSessionKey(mainSessionKey)) {
|
if (!isCanonicalMainSessionKey(mainSessionKey)) {
|
||||||
mainSessionKey = mainKey
|
mainSessionKey = mainKey
|
||||||
@@ -1427,7 +1436,11 @@ private const val defaultTalkProvider = "elevenlabs"
|
|||||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||||
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
|
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
|
||||||
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
|
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
|
||||||
Log.d(tag, "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId")
|
silenceWindowMs = silenceTimeoutMs
|
||||||
|
Log.d(
|
||||||
|
tag,
|
||||||
|
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs",
|
||||||
|
)
|
||||||
if (interrupt != null) interruptOnSpeech = interrupt
|
if (interrupt != null) interruptOnSpeech = interrupt
|
||||||
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
|
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
|
||||||
if (!activeProviderIsElevenLabs) {
|
if (!activeProviderIsElevenLabs) {
|
||||||
@@ -1441,6 +1454,7 @@ private const val defaultTalkProvider = "elevenlabs"
|
|||||||
}
|
}
|
||||||
configLoaded = true
|
configLoaded = true
|
||||||
} catch (_: Throwable) {
|
} catch (_: Throwable) {
|
||||||
|
silenceWindowMs = defaultSilenceTimeoutMs
|
||||||
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||||
defaultModelId = defaultModelIdFallback
|
defaultModelId = defaultModelIdFallback
|
||||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||||
|
|||||||
@@ -54,4 +54,23 @@ class TalkModeConfigParsingTest {
|
|||||||
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
|
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
|
||||||
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
|
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun readsConfiguredSilenceTimeoutMs() {
|
||||||
|
val talk = buildJsonObject { put("silenceTimeoutMs", 1500) }
|
||||||
|
|
||||||
|
assertEquals(1500L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun defaultsSilenceTimeoutMsWhenMissing() {
|
||||||
|
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(null))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun defaultsSilenceTimeoutMsWhenInvalid() {
|
||||||
|
val talk = buildJsonObject { put("silenceTimeoutMs", 0) }
|
||||||
|
|
||||||
|
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ final class TalkModeManager: NSObject {
|
|||||||
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
|
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
|
||||||
private static let defaultModelIdFallback = "eleven_v3"
|
private static let defaultModelIdFallback = "eleven_v3"
|
||||||
private static let defaultTalkProvider = "elevenlabs"
|
private static let defaultTalkProvider = "elevenlabs"
|
||||||
|
private static let defaultSilenceTimeoutMs = 900
|
||||||
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
|
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
|
||||||
var isEnabled: Bool = false
|
var isEnabled: Bool = false
|
||||||
var isListening: Bool = false
|
var isListening: Bool = false
|
||||||
@@ -97,7 +98,7 @@ final class TalkModeManager: NSObject {
|
|||||||
|
|
||||||
private var gateway: GatewayNodeSession?
|
private var gateway: GatewayNodeSession?
|
||||||
private var gatewayConnected = false
|
private var gatewayConnected = false
|
||||||
private let silenceWindow: TimeInterval = 0.9
|
private var silenceWindow: TimeInterval = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
|
||||||
private var lastAudioActivity: Date?
|
private var lastAudioActivity: Date?
|
||||||
private var noiseFloorSamples: [Double] = []
|
private var noiseFloorSamples: [Double] = []
|
||||||
private var noiseFloor: Double?
|
private var noiseFloor: Double?
|
||||||
@@ -2001,6 +2002,24 @@ extension TalkModeManager {
|
|||||||
config: normalizedProviders[providerID] ?? [:])
|
config: normalizedProviders[providerID] ?? [:])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static func resolvedSilenceTimeoutMs(_ talk: [String: Any]?) -> Int {
|
||||||
|
switch talk?["silenceTimeoutMs"] {
|
||||||
|
case let timeout as Int where timeout > 0:
|
||||||
|
return timeout
|
||||||
|
case let timeout as Double
|
||||||
|
where timeout > 0 && timeout.rounded(.towardZero) == timeout && timeout <= Double(Int.max):
|
||||||
|
return Int(timeout)
|
||||||
|
case let timeout as NSNumber:
|
||||||
|
let value = timeout.doubleValue
|
||||||
|
if value > 0 && value.rounded(.towardZero) == value && value <= Double(Int.max) {
|
||||||
|
return Int(value)
|
||||||
|
}
|
||||||
|
return Self.defaultSilenceTimeoutMs
|
||||||
|
default:
|
||||||
|
return Self.defaultSilenceTimeoutMs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func reloadConfig() async {
|
func reloadConfig() async {
|
||||||
guard let gateway else { return }
|
guard let gateway else { return }
|
||||||
self.pcmFormatUnavailable = false
|
self.pcmFormatUnavailable = false
|
||||||
@@ -2020,6 +2039,7 @@ extension TalkModeManager {
|
|||||||
}
|
}
|
||||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||||
let activeConfig = selection?.config
|
let activeConfig = selection?.config
|
||||||
|
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
|
||||||
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
|
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
|
||||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
|
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
|
||||||
@@ -2067,8 +2087,9 @@ extension TalkModeManager {
|
|||||||
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
|
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
|
||||||
self.interruptOnSpeech = interrupt
|
self.interruptOnSpeech = interrupt
|
||||||
}
|
}
|
||||||
|
self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000
|
||||||
if selection != nil {
|
if selection != nil {
|
||||||
GatewayDiagnostics.log("talk config provider=\(activeProvider)")
|
GatewayDiagnostics.log("talk config provider=\(activeProvider) silenceTimeoutMs=\(silenceTimeoutMs)")
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
self.defaultModelId = Self.defaultModelIdFallback
|
self.defaultModelId = Self.defaultModelIdFallback
|
||||||
@@ -2079,6 +2100,7 @@ extension TalkModeManager {
|
|||||||
self.gatewayTalkDefaultModelId = nil
|
self.gatewayTalkDefaultModelId = nil
|
||||||
self.gatewayTalkApiKeyConfigured = false
|
self.gatewayTalkApiKeyConfigured = false
|
||||||
self.gatewayTalkConfigLoaded = false
|
self.gatewayTalkConfigLoaded = false
|
||||||
|
self.silenceWindow = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -47,4 +47,24 @@ import Testing
|
|||||||
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
|
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
|
||||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
|
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test func readsConfiguredSilenceTimeoutMs() {
|
||||||
|
let talk: [String: Any] = [
|
||||||
|
"silenceTimeoutMs": 1500,
|
||||||
|
]
|
||||||
|
|
||||||
|
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 1500)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test func defaultsSilenceTimeoutMsWhenMissing() {
|
||||||
|
#expect(TalkModeManager.resolvedSilenceTimeoutMs(nil) == 900)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
|
||||||
|
let talk: [String: Any] = [
|
||||||
|
"silenceTimeoutMs": 0,
|
||||||
|
]
|
||||||
|
|
||||||
|
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ actor TalkModeRuntime {
|
|||||||
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
|
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
|
||||||
private static let defaultModelIdFallback = "eleven_v3"
|
private static let defaultModelIdFallback = "eleven_v3"
|
||||||
private static let defaultTalkProvider = "elevenlabs"
|
private static let defaultTalkProvider = "elevenlabs"
|
||||||
|
private static let defaultSilenceTimeoutMs = 700
|
||||||
|
|
||||||
private final class RMSMeter: @unchecked Sendable {
|
private final class RMSMeter: @unchecked Sendable {
|
||||||
private let lock = NSLock()
|
private let lock = NSLock()
|
||||||
@@ -66,7 +67,7 @@ actor TalkModeRuntime {
|
|||||||
private var fallbackVoiceId: String?
|
private var fallbackVoiceId: String?
|
||||||
private var lastPlaybackWasPCM: Bool = false
|
private var lastPlaybackWasPCM: Bool = false
|
||||||
|
|
||||||
private let silenceWindow: TimeInterval = 0.7
|
private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
|
||||||
private let minSpeechRMS: Double = 1e-3
|
private let minSpeechRMS: Double = 1e-3
|
||||||
private let speechBoostFactor: Double = 6.0
|
private let speechBoostFactor: Double = 6.0
|
||||||
|
|
||||||
@@ -783,6 +784,7 @@ extension TalkModeRuntime {
|
|||||||
}
|
}
|
||||||
self.defaultOutputFormat = cfg.outputFormat
|
self.defaultOutputFormat = cfg.outputFormat
|
||||||
self.interruptOnSpeech = cfg.interruptOnSpeech
|
self.interruptOnSpeech = cfg.interruptOnSpeech
|
||||||
|
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
|
||||||
self.apiKey = cfg.apiKey
|
self.apiKey = cfg.apiKey
|
||||||
let hasApiKey = (cfg.apiKey?.isEmpty == false)
|
let hasApiKey = (cfg.apiKey?.isEmpty == false)
|
||||||
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
|
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
|
||||||
@@ -792,7 +794,8 @@ extension TalkModeRuntime {
|
|||||||
"talk config voiceId=\(voiceLabel, privacy: .public) " +
|
"talk config voiceId=\(voiceLabel, privacy: .public) " +
|
||||||
"modelId=\(modelLabel, privacy: .public) " +
|
"modelId=\(modelLabel, privacy: .public) " +
|
||||||
"apiKey=\(hasApiKey, privacy: .public) " +
|
"apiKey=\(hasApiKey, privacy: .public) " +
|
||||||
"interrupt=\(cfg.interruptOnSpeech, privacy: .public)")
|
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
|
||||||
|
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct TalkRuntimeConfig {
|
private struct TalkRuntimeConfig {
|
||||||
@@ -801,6 +804,7 @@ extension TalkModeRuntime {
|
|||||||
let modelId: String?
|
let modelId: String?
|
||||||
let outputFormat: String?
|
let outputFormat: String?
|
||||||
let interruptOnSpeech: Bool
|
let interruptOnSpeech: Bool
|
||||||
|
let silenceTimeoutMs: Int
|
||||||
let apiKey: String?
|
let apiKey: String?
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -880,6 +884,21 @@ extension TalkModeRuntime {
|
|||||||
normalizedPayload: false)
|
normalizedPayload: false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
|
||||||
|
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
|
||||||
|
return timeout
|
||||||
|
}
|
||||||
|
if
|
||||||
|
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
|
||||||
|
timeout > 0,
|
||||||
|
timeout.rounded(.towardZero) == timeout,
|
||||||
|
timeout <= Double(Int.max)
|
||||||
|
{
|
||||||
|
return Int(timeout)
|
||||||
|
}
|
||||||
|
return Self.defaultSilenceTimeoutMs
|
||||||
|
}
|
||||||
|
|
||||||
private func fetchTalkConfig() async -> TalkRuntimeConfig {
|
private func fetchTalkConfig() async -> TalkRuntimeConfig {
|
||||||
let env = ProcessInfo.processInfo.environment
|
let env = ProcessInfo.processInfo.environment
|
||||||
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
|
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||||
@@ -895,6 +914,7 @@ extension TalkModeRuntime {
|
|||||||
let selection = Self.selectTalkProviderConfig(talk)
|
let selection = Self.selectTalkProviderConfig(talk)
|
||||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||||
let activeConfig = selection?.config
|
let activeConfig = selection?.config
|
||||||
|
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
|
||||||
let ui = snap.config?["ui"]?.dictionaryValue
|
let ui = snap.config?["ui"]?.dictionaryValue
|
||||||
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||||
await MainActor.run {
|
await MainActor.run {
|
||||||
@@ -939,6 +959,7 @@ extension TalkModeRuntime {
|
|||||||
modelId: resolvedModel,
|
modelId: resolvedModel,
|
||||||
outputFormat: outputFormat,
|
outputFormat: outputFormat,
|
||||||
interruptOnSpeech: interrupt ?? true,
|
interruptOnSpeech: interrupt ?? true,
|
||||||
|
silenceTimeoutMs: silenceTimeoutMs,
|
||||||
apiKey: resolvedApiKey)
|
apiKey: resolvedApiKey)
|
||||||
} catch {
|
} catch {
|
||||||
let resolvedVoice =
|
let resolvedVoice =
|
||||||
@@ -951,6 +972,7 @@ extension TalkModeRuntime {
|
|||||||
modelId: Self.defaultModelIdFallback,
|
modelId: Self.defaultModelIdFallback,
|
||||||
outputFormat: nil,
|
outputFormat: nil,
|
||||||
interruptOnSpeech: true,
|
interruptOnSpeech: true,
|
||||||
|
silenceTimeoutMs: Self.defaultSilenceTimeoutMs,
|
||||||
apiKey: resolvedApiKey)
|
apiKey: resolvedApiKey)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,4 +32,24 @@ struct TalkModeConfigParsingTests {
|
|||||||
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
|
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
|
||||||
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
|
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test func readsConfiguredSilenceTimeoutMs() {
|
||||||
|
let talk: [String: AnyCodable] = [
|
||||||
|
"silenceTimeoutMs": AnyCodable(1500),
|
||||||
|
]
|
||||||
|
|
||||||
|
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test func defaultsSilenceTimeoutMsWhenMissing() {
|
||||||
|
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
|
||||||
|
let talk: [String: AnyCodable] = [
|
||||||
|
"silenceTimeoutMs": AnyCodable(0),
|
||||||
|
]
|
||||||
|
|
||||||
|
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1659,6 +1659,7 @@ Defaults for Talk mode (macOS/iOS/Android).
|
|||||||
modelId: "eleven_v3",
|
modelId: "eleven_v3",
|
||||||
outputFormat: "mp3_44100_128",
|
outputFormat: "mp3_44100_128",
|
||||||
apiKey: "elevenlabs_api_key",
|
apiKey: "elevenlabs_api_key",
|
||||||
|
silenceTimeoutMs: 1500,
|
||||||
interruptOnSpeech: true,
|
interruptOnSpeech: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -1668,6 +1669,7 @@ Defaults for Talk mode (macOS/iOS/Android).
|
|||||||
- `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects.
|
- `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects.
|
||||||
- `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured.
|
- `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured.
|
||||||
- `voiceAliases` lets Talk directives use friendly names.
|
- `voiceAliases` lets Talk directives use friendly names.
|
||||||
|
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700` ms on macOS and Android, `900` ms on iOS).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ Supported keys:
|
|||||||
modelId: "eleven_v3",
|
modelId: "eleven_v3",
|
||||||
outputFormat: "mp3_44100_128",
|
outputFormat: "mp3_44100_128",
|
||||||
apiKey: "elevenlabs_api_key",
|
apiKey: "elevenlabs_api_key",
|
||||||
|
silenceTimeoutMs: 1500,
|
||||||
interruptOnSpeech: true,
|
interruptOnSpeech: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -64,6 +65,7 @@ Supported keys:
|
|||||||
Defaults:
|
Defaults:
|
||||||
|
|
||||||
- `interruptOnSpeech`: true
|
- `interruptOnSpeech`: true
|
||||||
|
- `silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700` ms on macOS and Android, `900` ms on iOS)
|
||||||
- `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available)
|
- `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available)
|
||||||
- `modelId`: defaults to `eleven_v3` when unset
|
- `modelId`: defaults to `eleven_v3` when unset
|
||||||
- `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available)
|
- `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available)
|
||||||
|
|||||||
@@ -305,6 +305,7 @@ const TARGET_KEYS = [
|
|||||||
"talk.modelId",
|
"talk.modelId",
|
||||||
"talk.outputFormat",
|
"talk.outputFormat",
|
||||||
"talk.interruptOnSpeech",
|
"talk.interruptOnSpeech",
|
||||||
|
"talk.silenceTimeoutMs",
|
||||||
"meta",
|
"meta",
|
||||||
"env",
|
"env",
|
||||||
"env.shellEnv",
|
"env.shellEnv",
|
||||||
|
|||||||
@@ -163,6 +163,8 @@ export const FIELD_HELP: Record<string, string> = {
|
|||||||
"Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
|
"Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
|
||||||
"talk.interruptOnSpeech":
|
"talk.interruptOnSpeech":
|
||||||
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
|
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
|
||||||
|
"talk.silenceTimeoutMs":
|
||||||
|
"Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (700 ms on macOS and Android, 900 ms on iOS).",
|
||||||
acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.",
|
acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.",
|
||||||
"acp.enabled":
|
"acp.enabled":
|
||||||
"Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.",
|
"Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.",
|
||||||
|
|||||||
@@ -651,6 +651,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
|||||||
"talk.modelId": "Talk Model ID",
|
"talk.modelId": "Talk Model ID",
|
||||||
"talk.outputFormat": "Talk Output Format",
|
"talk.outputFormat": "Talk Output Format",
|
||||||
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
|
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
|
||||||
|
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
|
||||||
messages: "Messages",
|
messages: "Messages",
|
||||||
"messages.messagePrefix": "Inbound Message Prefix",
|
"messages.messagePrefix": "Inbound Message Prefix",
|
||||||
"messages.responsePrefix": "Outbound Response Prefix",
|
"messages.responsePrefix": "Outbound Response Prefix",
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ describe("talk normalization", () => {
|
|||||||
outputFormat: "pcm_44100",
|
outputFormat: "pcm_44100",
|
||||||
apiKey: "secret-key", // pragma: allowlist secret
|
apiKey: "secret-key", // pragma: allowlist secret
|
||||||
interruptOnSpeech: false,
|
interruptOnSpeech: false,
|
||||||
|
silenceTimeoutMs: 1500,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(normalized).toEqual({
|
expect(normalized).toEqual({
|
||||||
@@ -51,6 +52,7 @@ describe("talk normalization", () => {
|
|||||||
outputFormat: "pcm_44100",
|
outputFormat: "pcm_44100",
|
||||||
apiKey: "secret-key", // pragma: allowlist secret
|
apiKey: "secret-key", // pragma: allowlist secret
|
||||||
interruptOnSpeech: false,
|
interruptOnSpeech: false,
|
||||||
|
silenceTimeoutMs: 1500,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -47,6 +47,13 @@ function normalizeTalkSecretInput(value: unknown): TalkProviderConfig["apiKey"]
|
|||||||
return coerceSecretRef(value) ?? undefined;
|
return coerceSecretRef(value) ?? undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeSilenceTimeoutMs(value: unknown): number | undefined {
|
||||||
|
if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined {
|
function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined {
|
||||||
if (!isPlainObject(value)) {
|
if (!isPlainObject(value)) {
|
||||||
return undefined;
|
return undefined;
|
||||||
@@ -125,6 +132,10 @@ function normalizedLegacyTalkFields(source: Record<string, unknown>): Partial<Ta
|
|||||||
if (apiKey !== undefined) {
|
if (apiKey !== undefined) {
|
||||||
legacy.apiKey = apiKey;
|
legacy.apiKey = apiKey;
|
||||||
}
|
}
|
||||||
|
const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
|
||||||
|
if (silenceTimeoutMs !== undefined) {
|
||||||
|
legacy.silenceTimeoutMs = silenceTimeoutMs;
|
||||||
|
}
|
||||||
return legacy;
|
return legacy;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,6 +278,9 @@ export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined
|
|||||||
if (typeof normalized.interruptOnSpeech === "boolean") {
|
if (typeof normalized.interruptOnSpeech === "boolean") {
|
||||||
payload.interruptOnSpeech = normalized.interruptOnSpeech;
|
payload.interruptOnSpeech = normalized.interruptOnSpeech;
|
||||||
}
|
}
|
||||||
|
if (typeof normalized.silenceTimeoutMs === "number") {
|
||||||
|
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
|
||||||
|
}
|
||||||
if (normalized.providers && Object.keys(normalized.providers).length > 0) {
|
if (normalized.providers && Object.keys(normalized.providers).length > 0) {
|
||||||
payload.providers = normalized.providers;
|
payload.providers = normalized.providers;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,6 +70,8 @@ export type TalkConfig = {
|
|||||||
providers?: Record<string, TalkProviderConfig>;
|
providers?: Record<string, TalkProviderConfig>;
|
||||||
/** Stop speaking when user starts talking (default: true). */
|
/** Stop speaking when user starts talking (default: true). */
|
||||||
interruptOnSpeech?: boolean;
|
interruptOnSpeech?: boolean;
|
||||||
|
/** Milliseconds of user silence before Talk mode sends the transcript after a pause. */
|
||||||
|
silenceTimeoutMs?: number;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Legacy ElevenLabs compatibility fields.
|
* Legacy ElevenLabs compatibility fields.
|
||||||
|
|||||||
@@ -595,6 +595,7 @@ export const OpenClawSchema = z
|
|||||||
outputFormat: z.string().optional(),
|
outputFormat: z.string().optional(),
|
||||||
apiKey: SecretInputSchema.optional().register(sensitive),
|
apiKey: SecretInputSchema.optional().register(sensitive),
|
||||||
interruptOnSpeech: z.boolean().optional(),
|
interruptOnSpeech: z.boolean().optional(),
|
||||||
|
silenceTimeoutMs: z.number().int().positive().optional(),
|
||||||
})
|
})
|
||||||
.strict()
|
.strict()
|
||||||
.optional(),
|
.optional(),
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ export const TalkConfigResultSchema = Type.Object(
|
|||||||
outputFormat: Type.Optional(Type.String()),
|
outputFormat: Type.Optional(Type.String()),
|
||||||
apiKey: Type.Optional(Type.String()),
|
apiKey: Type.Optional(Type.String()),
|
||||||
interruptOnSpeech: Type.Optional(Type.Boolean()),
|
interruptOnSpeech: Type.Optional(Type.Boolean()),
|
||||||
|
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
|
||||||
},
|
},
|
||||||
{ additionalProperties: false },
|
{ additionalProperties: false },
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -56,7 +56,11 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function writeTalkConfig(config: { apiKey?: string; voiceId?: string }) {
|
async function writeTalkConfig(config: {
|
||||||
|
apiKey?: string;
|
||||||
|
voiceId?: string;
|
||||||
|
silenceTimeoutMs?: number;
|
||||||
|
}) {
|
||||||
const { writeConfigFile } = await import("../config/config.js");
|
const { writeConfigFile } = await import("../config/config.js");
|
||||||
await writeConfigFile({ talk: config });
|
await writeConfigFile({ talk: config });
|
||||||
}
|
}
|
||||||
@@ -68,6 +72,7 @@ describe("gateway talk.config", () => {
|
|||||||
talk: {
|
talk: {
|
||||||
voiceId: "voice-123",
|
voiceId: "voice-123",
|
||||||
apiKey: "secret-key-abc", // pragma: allowlist secret
|
apiKey: "secret-key-abc", // pragma: allowlist secret
|
||||||
|
silenceTimeoutMs: 1500,
|
||||||
},
|
},
|
||||||
session: {
|
session: {
|
||||||
mainKey: "main-test",
|
mainKey: "main-test",
|
||||||
@@ -88,6 +93,7 @@ describe("gateway talk.config", () => {
|
|||||||
};
|
};
|
||||||
apiKey?: string;
|
apiKey?: string;
|
||||||
voiceId?: string;
|
voiceId?: string;
|
||||||
|
silenceTimeoutMs?: number;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}>(ws, "talk.config", {});
|
}>(ws, "talk.config", {});
|
||||||
@@ -99,6 +105,7 @@ describe("gateway talk.config", () => {
|
|||||||
);
|
);
|
||||||
expect(res.payload?.config?.talk?.voiceId).toBe("voice-123");
|
expect(res.payload?.config?.talk?.voiceId).toBe("voice-123");
|
||||||
expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__");
|
expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__");
|
||||||
|
expect(res.payload?.config?.talk?.silenceTimeoutMs).toBe(1500);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user