talk: add configurable silence timeout

This commit is contained in:
dano does design
2026-03-08 17:58:15 +11:00
committed by Peter Steinberger
parent 097c588a6b
commit 6ff7e8f42e
18 changed files with 162 additions and 9 deletions

View File

@@ -8,6 +8,7 @@ Docs: https://docs.openclaw.ai
- TUI: infer the active agent from the current workspace when launched inside a configured agent workspace, while preserving explicit `agent:` session targets. (#39591) thanks @arceus77-7. - TUI: infer the active agent from the current workspace when launched inside a configured agent workspace, while preserving explicit `agent:` session targets. (#39591) thanks @arceus77-7.
- Tools/Brave web search: add opt-in `tools.web.search.brave.mode: "llm-context"` so `web_search` can call Brave's LLM Context endpoint and return extracted grounding snippets with source metadata, plus config/docs/test coverage. (#33383) Thanks @thirumaleshp. - Tools/Brave web search: add opt-in `tools.web.search.brave.mode: "llm-context"` so `web_search` can call Brave's LLM Context endpoint and return extracted grounding snippets with source metadata, plus config/docs/test coverage. (#33383) Thanks @thirumaleshp.
- Talk mode: add top-level `talk.silenceTimeoutMs` config so Talk waits a configurable amount of silence before auto-sending the current transcript, while keeping each platform's existing default pause window when unset. (#39607) Thanks @danodoesdesign. Fixes #17147.
### Fixes ### Fixes

View File

@@ -59,8 +59,8 @@ class TalkModeManager(
private const val tag = "TalkMode" private const val tag = "TalkMode"
private const val defaultModelIdFallback = "eleven_v3" private const val defaultModelIdFallback = "eleven_v3"
private const val defaultOutputFormatFallback = "pcm_24000" private const val defaultOutputFormatFallback = "pcm_24000"
private const val defaultTalkProvider = "elevenlabs" private const val defaultTalkProvider = "elevenlabs"
private const val silenceWindowMs = 500L private const val defaultSilenceTimeoutMs = 700L
private const val listenWatchdogMs = 12_000L private const val listenWatchdogMs = 12_000L
private const val chatFinalWaitWithSubscribeMs = 45_000L private const val chatFinalWaitWithSubscribeMs = 45_000L
private const val chatFinalWaitWithoutSubscribeMs = 6_000L private const val chatFinalWaitWithoutSubscribeMs = 6_000L
@@ -105,6 +105,14 @@ private const val defaultTalkProvider = "elevenlabs"
normalizedPayload = false, normalizedPayload = false,
) )
} }
internal fun resolvedSilenceTimeoutMs(talk: JsonObject?): Long {
val timeout = talk?.get("silenceTimeoutMs").asDoubleOrNull() ?: return defaultSilenceTimeoutMs
if (timeout <= 0 || timeout % 1.0 != 0.0 || timeout > Long.MAX_VALUE.toDouble()) {
return defaultSilenceTimeoutMs
}
return timeout.toLong()
}
} }
private val mainHandler = Handler(Looper.getMainLooper()) private val mainHandler = Handler(Looper.getMainLooper())
@@ -134,7 +142,7 @@ private const val defaultTalkProvider = "elevenlabs"
private var listeningMode = false private var listeningMode = false
private var silenceJob: Job? = null private var silenceJob: Job? = null
private val silenceWindowMs = 700L private var silenceWindowMs = defaultSilenceTimeoutMs
private var lastTranscript: String = "" private var lastTranscript: String = ""
private var lastHeardAtMs: Long? = null private var lastHeardAtMs: Long? = null
private var lastSpokenText: String? = null private var lastSpokenText: String? = null
@@ -1411,6 +1419,7 @@ private const val defaultTalkProvider = "elevenlabs"
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull() val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
if (!isCanonicalMainSessionKey(mainSessionKey)) { if (!isCanonicalMainSessionKey(mainSessionKey)) {
mainSessionKey = mainKey mainSessionKey = mainKey
@@ -1427,7 +1436,11 @@ private const val defaultTalkProvider = "elevenlabs"
if (!modelOverrideActive) currentModelId = defaultModelId if (!modelOverrideActive) currentModelId = defaultModelId
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() } apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
Log.d(tag, "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId") silenceWindowMs = silenceTimeoutMs
Log.d(
tag,
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs",
)
if (interrupt != null) interruptOnSpeech = interrupt if (interrupt != null) interruptOnSpeech = interrupt
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
if (!activeProviderIsElevenLabs) { if (!activeProviderIsElevenLabs) {
@@ -1441,6 +1454,7 @@ private const val defaultTalkProvider = "elevenlabs"
} }
configLoaded = true configLoaded = true
} catch (_: Throwable) { } catch (_: Throwable) {
silenceWindowMs = defaultSilenceTimeoutMs
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
defaultModelId = defaultModelIdFallback defaultModelId = defaultModelIdFallback
if (!modelOverrideActive) currentModelId = defaultModelId if (!modelOverrideActive) currentModelId = defaultModelId

View File

@@ -54,4 +54,23 @@ class TalkModeConfigParsingTest {
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content) assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content) assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
} }
@Test
fun readsConfiguredSilenceTimeoutMs() {
val talk = buildJsonObject { put("silenceTimeoutMs", 1500) }
assertEquals(1500L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
}
@Test
fun defaultsSilenceTimeoutMsWhenMissing() {
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(null))
}
@Test
fun defaultsSilenceTimeoutMsWhenInvalid() {
val talk = buildJsonObject { put("silenceTimeoutMs", 0) }
assertEquals(700L, TalkModeManager.resolvedSilenceTimeoutMs(talk))
}
} }

View File

@@ -34,6 +34,7 @@ final class TalkModeManager: NSObject {
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
private static let defaultModelIdFallback = "eleven_v3" private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs" private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = 900
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__" private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
var isEnabled: Bool = false var isEnabled: Bool = false
var isListening: Bool = false var isListening: Bool = false
@@ -97,7 +98,7 @@ final class TalkModeManager: NSObject {
private var gateway: GatewayNodeSession? private var gateway: GatewayNodeSession?
private var gatewayConnected = false private var gatewayConnected = false
private let silenceWindow: TimeInterval = 0.9 private var silenceWindow: TimeInterval = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
private var lastAudioActivity: Date? private var lastAudioActivity: Date?
private var noiseFloorSamples: [Double] = [] private var noiseFloorSamples: [Double] = []
private var noiseFloor: Double? private var noiseFloor: Double?
@@ -2001,6 +2002,24 @@ extension TalkModeManager {
config: normalizedProviders[providerID] ?? [:]) config: normalizedProviders[providerID] ?? [:])
} }
static func resolvedSilenceTimeoutMs(_ talk: [String: Any]?) -> Int {
switch talk?["silenceTimeoutMs"] {
case let timeout as Int where timeout > 0:
return timeout
case let timeout as Double
where timeout > 0 && timeout.rounded(.towardZero) == timeout && timeout <= Double(Int.max):
return Int(timeout)
case let timeout as NSNumber:
let value = timeout.doubleValue
if value > 0 && value.rounded(.towardZero) == value && value <= Double(Int.max) {
return Int(value)
}
return Self.defaultSilenceTimeoutMs
default:
return Self.defaultSilenceTimeoutMs
}
}
func reloadConfig() async { func reloadConfig() async {
guard let gateway else { return } guard let gateway else { return }
self.pcmFormatUnavailable = false self.pcmFormatUnavailable = false
@@ -2020,6 +2039,7 @@ extension TalkModeManager {
} }
let activeProvider = selection?.provider ?? Self.defaultTalkProvider let activeProvider = selection?.provider ?? Self.defaultTalkProvider
let activeConfig = selection?.config let activeConfig = selection?.config
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)? self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
.trimmingCharacters(in: .whitespacesAndNewlines) .trimmingCharacters(in: .whitespacesAndNewlines)
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] { if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
@@ -2067,8 +2087,9 @@ extension TalkModeManager {
if let interrupt = talk?["interruptOnSpeech"] as? Bool { if let interrupt = talk?["interruptOnSpeech"] as? Bool {
self.interruptOnSpeech = interrupt self.interruptOnSpeech = interrupt
} }
self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000
if selection != nil { if selection != nil {
GatewayDiagnostics.log("talk config provider=\(activeProvider)") GatewayDiagnostics.log("talk config provider=\(activeProvider) silenceTimeoutMs=\(silenceTimeoutMs)")
} }
} catch { } catch {
self.defaultModelId = Self.defaultModelIdFallback self.defaultModelId = Self.defaultModelIdFallback
@@ -2079,6 +2100,7 @@ extension TalkModeManager {
self.gatewayTalkDefaultModelId = nil self.gatewayTalkDefaultModelId = nil
self.gatewayTalkApiKeyConfigured = false self.gatewayTalkApiKeyConfigured = false
self.gatewayTalkConfigLoaded = false self.gatewayTalkConfigLoaded = false
self.silenceWindow = TimeInterval(Self.defaultSilenceTimeoutMs) / 1000
} }
} }

View File

@@ -47,4 +47,24 @@ import Testing
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"]) userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false) #expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
} }
@Test func readsConfiguredSilenceTimeoutMs() {
let talk: [String: Any] = [
"silenceTimeoutMs": 1500,
]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 1500)
}
@Test func defaultsSilenceTimeoutMsWhenMissing() {
#expect(TalkModeManager.resolvedSilenceTimeoutMs(nil) == 900)
}
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
let talk: [String: Any] = [
"silenceTimeoutMs": 0,
]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900)
}
} }

View File

@@ -12,6 +12,7 @@ actor TalkModeRuntime {
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts") private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
private static let defaultModelIdFallback = "eleven_v3" private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs" private static let defaultTalkProvider = "elevenlabs"
private static let defaultSilenceTimeoutMs = 700
private final class RMSMeter: @unchecked Sendable { private final class RMSMeter: @unchecked Sendable {
private let lock = NSLock() private let lock = NSLock()
@@ -66,7 +67,7 @@ actor TalkModeRuntime {
private var fallbackVoiceId: String? private var fallbackVoiceId: String?
private var lastPlaybackWasPCM: Bool = false private var lastPlaybackWasPCM: Bool = false
private let silenceWindow: TimeInterval = 0.7 private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
private let minSpeechRMS: Double = 1e-3 private let minSpeechRMS: Double = 1e-3
private let speechBoostFactor: Double = 6.0 private let speechBoostFactor: Double = 6.0
@@ -783,6 +784,7 @@ extension TalkModeRuntime {
} }
self.defaultOutputFormat = cfg.outputFormat self.defaultOutputFormat = cfg.outputFormat
self.interruptOnSpeech = cfg.interruptOnSpeech self.interruptOnSpeech = cfg.interruptOnSpeech
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
self.apiKey = cfg.apiKey self.apiKey = cfg.apiKey
let hasApiKey = (cfg.apiKey?.isEmpty == false) let hasApiKey = (cfg.apiKey?.isEmpty == false)
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none" let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
@@ -792,7 +794,8 @@ extension TalkModeRuntime {
"talk config voiceId=\(voiceLabel, privacy: .public) " + "talk config voiceId=\(voiceLabel, privacy: .public) " +
"modelId=\(modelLabel, privacy: .public) " + "modelId=\(modelLabel, privacy: .public) " +
"apiKey=\(hasApiKey, privacy: .public) " + "apiKey=\(hasApiKey, privacy: .public) " +
"interrupt=\(cfg.interruptOnSpeech, privacy: .public)") "interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
"silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)")
} }
private struct TalkRuntimeConfig { private struct TalkRuntimeConfig {
@@ -801,6 +804,7 @@ extension TalkModeRuntime {
let modelId: String? let modelId: String?
let outputFormat: String? let outputFormat: String?
let interruptOnSpeech: Bool let interruptOnSpeech: Bool
let silenceTimeoutMs: Int
let apiKey: String? let apiKey: String?
} }
@@ -880,6 +884,21 @@ extension TalkModeRuntime {
normalizedPayload: false) normalizedPayload: false)
} }
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
return timeout
}
if
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
timeout > 0,
timeout.rounded(.towardZero) == timeout,
timeout <= Double(Int.max)
{
return Int(timeout)
}
return Self.defaultSilenceTimeoutMs
}
private func fetchTalkConfig() async -> TalkRuntimeConfig { private func fetchTalkConfig() async -> TalkRuntimeConfig {
let env = ProcessInfo.processInfo.environment let env = ProcessInfo.processInfo.environment
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines) let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -895,6 +914,7 @@ extension TalkModeRuntime {
let selection = Self.selectTalkProviderConfig(talk) let selection = Self.selectTalkProviderConfig(talk)
let activeProvider = selection?.provider ?? Self.defaultTalkProvider let activeProvider = selection?.provider ?? Self.defaultTalkProvider
let activeConfig = selection?.config let activeConfig = selection?.config
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
let ui = snap.config?["ui"]?.dictionaryValue let ui = snap.config?["ui"]?.dictionaryValue
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
await MainActor.run { await MainActor.run {
@@ -939,6 +959,7 @@ extension TalkModeRuntime {
modelId: resolvedModel, modelId: resolvedModel,
outputFormat: outputFormat, outputFormat: outputFormat,
interruptOnSpeech: interrupt ?? true, interruptOnSpeech: interrupt ?? true,
silenceTimeoutMs: silenceTimeoutMs,
apiKey: resolvedApiKey) apiKey: resolvedApiKey)
} catch { } catch {
let resolvedVoice = let resolvedVoice =
@@ -951,6 +972,7 @@ extension TalkModeRuntime {
modelId: Self.defaultModelIdFallback, modelId: Self.defaultModelIdFallback,
outputFormat: nil, outputFormat: nil,
interruptOnSpeech: true, interruptOnSpeech: true,
silenceTimeoutMs: Self.defaultSilenceTimeoutMs,
apiKey: resolvedApiKey) apiKey: resolvedApiKey)
} }
} }

View File

@@ -32,4 +32,24 @@ struct TalkModeConfigParsingTests {
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy") #expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key") #expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
} }
@Test func readsConfiguredSilenceTimeoutMs() {
let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(1500),
]
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 1500)
}
@Test func defaultsSilenceTimeoutMsWhenMissing() {
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(nil) == 700)
}
@Test func defaultsSilenceTimeoutMsWhenInvalid() {
let talk: [String: AnyCodable] = [
"silenceTimeoutMs": AnyCodable(0),
]
#expect(TalkModeRuntime.resolvedSilenceTimeoutMs(talk) == 700)
}
} }

View File

@@ -1659,6 +1659,7 @@ Defaults for Talk mode (macOS/iOS/Android).
modelId: "eleven_v3", modelId: "eleven_v3",
outputFormat: "mp3_44100_128", outputFormat: "mp3_44100_128",
apiKey: "elevenlabs_api_key", apiKey: "elevenlabs_api_key",
silenceTimeoutMs: 1500,
interruptOnSpeech: true, interruptOnSpeech: true,
}, },
} }
@@ -1668,6 +1669,7 @@ Defaults for Talk mode (macOS/iOS/Android).
- `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects. - `apiKey` and `providers.*.apiKey` accept plaintext strings or SecretRef objects.
- `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured. - `ELEVENLABS_API_KEY` fallback applies only when no Talk API key is configured.
- `voiceAliases` lets Talk directives use friendly names. - `voiceAliases` lets Talk directives use friendly names.
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700` ms on macOS and Android, `900` ms on iOS).
--- ---

View File

@@ -56,6 +56,7 @@ Supported keys:
modelId: "eleven_v3", modelId: "eleven_v3",
outputFormat: "mp3_44100_128", outputFormat: "mp3_44100_128",
apiKey: "elevenlabs_api_key", apiKey: "elevenlabs_api_key",
silenceTimeoutMs: 1500,
interruptOnSpeech: true, interruptOnSpeech: true,
}, },
} }
@@ -64,6 +65,7 @@ Supported keys:
Defaults: Defaults:
- `interruptOnSpeech`: true - `interruptOnSpeech`: true
- `silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700` ms on macOS and Android, `900` ms on iOS)
- `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available) - `voiceId`: falls back to `ELEVENLABS_VOICE_ID` / `SAG_VOICE_ID` (or first ElevenLabs voice when API key is available)
- `modelId`: defaults to `eleven_v3` when unset - `modelId`: defaults to `eleven_v3` when unset
- `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available) - `apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available)

View File

@@ -305,6 +305,7 @@ const TARGET_KEYS = [
"talk.modelId", "talk.modelId",
"talk.outputFormat", "talk.outputFormat",
"talk.interruptOnSpeech", "talk.interruptOnSpeech",
"talk.silenceTimeoutMs",
"meta", "meta",
"env", "env",
"env.shellEnv", "env.shellEnv",

View File

@@ -163,6 +163,8 @@ export const FIELD_HELP: Record<string, string> = {
"Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).", "Use this legacy ElevenLabs API key for Talk mode only during migration, and keep secrets in env-backed storage. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
"talk.interruptOnSpeech": "talk.interruptOnSpeech":
"If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.", "If true (default), stop assistant speech when the user starts speaking in Talk mode. Keep enabled for conversational turn-taking.",
"talk.silenceTimeoutMs":
"Milliseconds of user silence before Talk mode finalizes and sends the current transcript. Leave unset to keep the platform default pause window (700 ms on macOS and Android, 900 ms on iOS).",
acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.", acp: "ACP runtime controls for enabling dispatch, selecting backends, constraining allowed agent targets, and tuning streamed turn projection behavior.",
"acp.enabled": "acp.enabled":
"Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.", "Global ACP feature gate. Keep disabled unless ACP runtime + policy are configured.",

View File

@@ -651,6 +651,7 @@ export const FIELD_LABELS: Record<string, string> = {
"talk.modelId": "Talk Model ID", "talk.modelId": "Talk Model ID",
"talk.outputFormat": "Talk Output Format", "talk.outputFormat": "Talk Output Format",
"talk.interruptOnSpeech": "Talk Interrupt on Speech", "talk.interruptOnSpeech": "Talk Interrupt on Speech",
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
messages: "Messages", messages: "Messages",
"messages.messagePrefix": "Inbound Message Prefix", "messages.messagePrefix": "Inbound Message Prefix",
"messages.responsePrefix": "Outbound Response Prefix", "messages.responsePrefix": "Outbound Response Prefix",

View File

@@ -32,6 +32,7 @@ describe("talk normalization", () => {
outputFormat: "pcm_44100", outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret apiKey: "secret-key", // pragma: allowlist secret
interruptOnSpeech: false, interruptOnSpeech: false,
silenceTimeoutMs: 1500,
}); });
expect(normalized).toEqual({ expect(normalized).toEqual({
@@ -51,6 +52,7 @@ describe("talk normalization", () => {
outputFormat: "pcm_44100", outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret apiKey: "secret-key", // pragma: allowlist secret
interruptOnSpeech: false, interruptOnSpeech: false,
silenceTimeoutMs: 1500,
}); });
}); });

View File

@@ -47,6 +47,13 @@ function normalizeTalkSecretInput(value: unknown): TalkProviderConfig["apiKey"]
return coerceSecretRef(value) ?? undefined; return coerceSecretRef(value) ?? undefined;
} }
function normalizeSilenceTimeoutMs(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
return undefined;
}
return value;
}
function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined { function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined {
if (!isPlainObject(value)) { if (!isPlainObject(value)) {
return undefined; return undefined;
@@ -125,6 +132,10 @@ function normalizedLegacyTalkFields(source: Record<string, unknown>): Partial<Ta
if (apiKey !== undefined) { if (apiKey !== undefined) {
legacy.apiKey = apiKey; legacy.apiKey = apiKey;
} }
const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
if (silenceTimeoutMs !== undefined) {
legacy.silenceTimeoutMs = silenceTimeoutMs;
}
return legacy; return legacy;
} }
@@ -267,6 +278,9 @@ export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined
if (typeof normalized.interruptOnSpeech === "boolean") { if (typeof normalized.interruptOnSpeech === "boolean") {
payload.interruptOnSpeech = normalized.interruptOnSpeech; payload.interruptOnSpeech = normalized.interruptOnSpeech;
} }
if (typeof normalized.silenceTimeoutMs === "number") {
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
}
if (normalized.providers && Object.keys(normalized.providers).length > 0) { if (normalized.providers && Object.keys(normalized.providers).length > 0) {
payload.providers = normalized.providers; payload.providers = normalized.providers;
} }

View File

@@ -70,6 +70,8 @@ export type TalkConfig = {
providers?: Record<string, TalkProviderConfig>; providers?: Record<string, TalkProviderConfig>;
/** Stop speaking when user starts talking (default: true). */ /** Stop speaking when user starts talking (default: true). */
interruptOnSpeech?: boolean; interruptOnSpeech?: boolean;
/** Milliseconds of user silence before Talk mode sends the transcript after a pause. */
silenceTimeoutMs?: number;
/** /**
* Legacy ElevenLabs compatibility fields. * Legacy ElevenLabs compatibility fields.

View File

@@ -595,6 +595,7 @@ export const OpenClawSchema = z
outputFormat: z.string().optional(), outputFormat: z.string().optional(),
apiKey: SecretInputSchema.optional().register(sensitive), apiKey: SecretInputSchema.optional().register(sensitive),
interruptOnSpeech: z.boolean().optional(), interruptOnSpeech: z.boolean().optional(),
silenceTimeoutMs: z.number().int().positive().optional(),
}) })
.strict() .strict()
.optional(), .optional(),

View File

@@ -42,6 +42,7 @@ export const TalkConfigResultSchema = Type.Object(
outputFormat: Type.Optional(Type.String()), outputFormat: Type.Optional(Type.String()),
apiKey: Type.Optional(Type.String()), apiKey: Type.Optional(Type.String()),
interruptOnSpeech: Type.Optional(Type.Boolean()), interruptOnSpeech: Type.Optional(Type.Boolean()),
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
}, },
{ additionalProperties: false }, { additionalProperties: false },
), ),

View File

@@ -56,7 +56,11 @@ async function connectOperator(ws: GatewaySocket, scopes: string[]) {
}); });
} }
async function writeTalkConfig(config: { apiKey?: string; voiceId?: string }) { async function writeTalkConfig(config: {
apiKey?: string;
voiceId?: string;
silenceTimeoutMs?: number;
}) {
const { writeConfigFile } = await import("../config/config.js"); const { writeConfigFile } = await import("../config/config.js");
await writeConfigFile({ talk: config }); await writeConfigFile({ talk: config });
} }
@@ -68,6 +72,7 @@ describe("gateway talk.config", () => {
talk: { talk: {
voiceId: "voice-123", voiceId: "voice-123",
apiKey: "secret-key-abc", // pragma: allowlist secret apiKey: "secret-key-abc", // pragma: allowlist secret
silenceTimeoutMs: 1500,
}, },
session: { session: {
mainKey: "main-test", mainKey: "main-test",
@@ -88,6 +93,7 @@ describe("gateway talk.config", () => {
}; };
apiKey?: string; apiKey?: string;
voiceId?: string; voiceId?: string;
silenceTimeoutMs?: number;
}; };
}; };
}>(ws, "talk.config", {}); }>(ws, "talk.config", {});
@@ -99,6 +105,7 @@ describe("gateway talk.config", () => {
); );
expect(res.payload?.config?.talk?.voiceId).toBe("voice-123"); expect(res.payload?.config?.talk?.voiceId).toBe("voice-123");
expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__"); expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__");
expect(res.payload?.config?.talk?.silenceTimeoutMs).toBe(1500);
}); });
}); });