From 16a5f0b006919f9b6150332474e8ceca08320513 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 8 Mar 2026 16:15:46 +0000 Subject: [PATCH] refactor: split talk gateway config loaders --- .../app/voice/TalkModeGatewayConfig.kt | 110 ++++++++++++++++++ .../ai/openclaw/app/voice/TalkModeManager.kt | 81 ++++++------- .../Sources/Voice/TalkModeGatewayConfig.swift | 69 +++++++++++ apps/ios/Sources/Voice/TalkModeManager.swift | 58 +++------ .../OpenClaw/TalkModeGatewayConfig.swift | 104 +++++++++++++++++ .../Sources/OpenClaw/TalkModeRuntime.swift | 91 ++++----------- 6 files changed, 359 insertions(+), 154 deletions(-) create mode 100644 apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt create mode 100644 apps/ios/Sources/Voice/TalkModeGatewayConfig.swift create mode 100644 apps/macos/Sources/OpenClaw/TalkModeGatewayConfig.swift diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt new file mode 100644 index 00000000000..4293c113896 --- /dev/null +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeGatewayConfig.kt @@ -0,0 +1,110 @@ +package ai.openclaw.app.voice + +import ai.openclaw.app.normalizeMainKey +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.booleanOrNull +import kotlinx.serialization.json.contentOrNull + +internal data class TalkModeGatewayConfigState( + val activeProvider: String, + val normalizedPayload: Boolean, + val missingResolvedPayload: Boolean, + val mainSessionKey: String, + val defaultVoiceId: String?, + val voiceAliases: Map, + val defaultModelId: String, + val defaultOutputFormat: String, + val apiKey: String?, + val interruptOnSpeech: Boolean?, + val silenceTimeoutMs: Long, +) + +internal object TalkModeGatewayConfigParser { + fun parse( + config: JsonObject?, + defaultProvider: String, + defaultModelIdFallback: String, + defaultOutputFormatFallback: String, + envVoice: String?, + sagVoice: String?, + envKey: String?, + ): TalkModeGatewayConfigState { + val talk = config?.get("talk").asObjectOrNull() + val selection = TalkModeManager.selectTalkProviderConfig(talk) + val activeProvider = selection?.provider ?: defaultProvider + val activeConfig = selection?.config + val sessionCfg = config?.get("session").asObjectOrNull() + val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull()) + val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val aliases = + activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) -> + val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null + normalizeTalkAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id } + }?.toMap().orEmpty() + val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val outputFormat = + activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull() + val silenceTimeoutMs = TalkModeManager.resolvedSilenceTimeoutMs(talk) + + return TalkModeGatewayConfigState( + activeProvider = activeProvider, + normalizedPayload = selection?.normalizedPayload == true, + missingResolvedPayload = talk != null && selection == null, + mainSessionKey = mainKey, + defaultVoiceId = + if (activeProvider == defaultProvider) { + voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } + } else { + voice + }, + voiceAliases = aliases, + defaultModelId = model ?: defaultModelIdFallback, + defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback, + apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }, + interruptOnSpeech = interrupt, + silenceTimeoutMs = silenceTimeoutMs, + ) + } + + fun fallback( + defaultProvider: String, + defaultModelIdFallback: String, + defaultOutputFormatFallback: String, + envVoice: String?, + sagVoice: String?, + envKey: String?, + ): TalkModeGatewayConfigState = + TalkModeGatewayConfigState( + activeProvider = defaultProvider, + normalizedPayload = false, + missingResolvedPayload = false, + mainSessionKey = "main", + defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }, + voiceAliases = emptyMap(), + defaultModelId = defaultModelIdFallback, + defaultOutputFormat = defaultOutputFormatFallback, + apiKey = envKey?.takeIf { it.isNotEmpty() }, + interruptOnSpeech = null, + silenceTimeoutMs = TalkDefaults.defaultSilenceTimeoutMs, + ) +} + +private fun normalizeTalkAliasKey(value: String): String = + value.trim().lowercase() + +private fun JsonElement?.asStringOrNull(): String? = + this?.let { element -> + element as? JsonPrimitive + }?.contentOrNull + +private fun JsonElement?.asBooleanOrNull(): Boolean? { + val primitive = this as? JsonPrimitive ?: return null + return primitive.booleanOrNull +} + +private fun JsonElement?.asObjectOrNull(): JsonObject? = + this as? JsonObject diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt index 3bd52a3a0a0..8a3b6fd948d 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt @@ -1400,69 +1400,64 @@ class TalkModeManager( try { val res = session.request("talk.config", """{"includeSecrets":true}""") val root = json.parseToJsonElement(res).asObjectOrNull() - val config = root?.get("config").asObjectOrNull() - val talk = config?.get("talk").asObjectOrNull() - val selection = selectTalkProviderConfig(talk) - if (talk != null && selection == null) { + val parsed = + TalkModeGatewayConfigParser.parse( + config = root?.get("config").asObjectOrNull(), + defaultProvider = defaultTalkProvider, + defaultModelIdFallback = defaultModelIdFallback, + defaultOutputFormatFallback = defaultOutputFormatFallback, + envVoice = envVoice, + sagVoice = sagVoice, + envKey = envKey, + ) + if (parsed.missingResolvedPayload) { Log.w(tag, "talk config ignored: normalized payload missing talk.resolved") } - val activeProvider = selection?.provider ?: defaultTalkProvider - val activeConfig = selection?.config - val sessionCfg = config?.get("session").asObjectOrNull() - val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull()) - val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val aliases = - activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) -> - val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null - normalizeAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id } - }?.toMap().orEmpty() - val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val outputFormat = - activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull() - val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk) if (!isCanonicalMainSessionKey(mainSessionKey)) { - mainSessionKey = mainKey + mainSessionKey = parsed.mainSessionKey } - defaultVoiceId = - if (activeProvider == defaultTalkProvider) { - voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } - } else { - voice - } - voiceAliases = aliases + defaultVoiceId = parsed.defaultVoiceId + voiceAliases = parsed.voiceAliases if (!voiceOverrideActive) currentVoiceId = defaultVoiceId - defaultModelId = model ?: defaultModelIdFallback + defaultModelId = parsed.defaultModelId if (!modelOverrideActive) currentModelId = defaultModelId - defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback - apiKey = key ?: envKey?.takeIf { it.isNotEmpty() } - silenceWindowMs = silenceTimeoutMs + defaultOutputFormat = parsed.defaultOutputFormat + apiKey = parsed.apiKey + silenceWindowMs = parsed.silenceTimeoutMs Log.d( tag, - "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs", + "reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=${parsed.silenceTimeoutMs}", ) - if (interrupt != null) interruptOnSpeech = interrupt - activeProviderIsElevenLabs = activeProvider == defaultTalkProvider + if (parsed.interruptOnSpeech != null) interruptOnSpeech = parsed.interruptOnSpeech + activeProviderIsElevenLabs = parsed.activeProvider == defaultTalkProvider if (!activeProviderIsElevenLabs) { // Clear ElevenLabs credentials so playAssistant won't attempt ElevenLabs calls apiKey = null defaultVoiceId = null if (!voiceOverrideActive) currentVoiceId = null - Log.w(tag, "talk provider $activeProvider unsupported; using system voice fallback") - } else if (selection?.normalizedPayload == true) { + Log.w(tag, "talk provider ${parsed.activeProvider} unsupported; using system voice fallback") + } else if (parsed.normalizedPayload) { Log.d(tag, "talk config provider=elevenlabs") } configLoaded = true } catch (_: Throwable) { - silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs - defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } - defaultModelId = defaultModelIdFallback + val fallback = + TalkModeGatewayConfigParser.fallback( + defaultProvider = defaultTalkProvider, + defaultModelIdFallback = defaultModelIdFallback, + defaultOutputFormatFallback = defaultOutputFormatFallback, + envVoice = envVoice, + sagVoice = sagVoice, + envKey = envKey, + ) + silenceWindowMs = fallback.silenceTimeoutMs + defaultVoiceId = fallback.defaultVoiceId + defaultModelId = fallback.defaultModelId if (!modelOverrideActive) currentModelId = defaultModelId - apiKey = envKey?.takeIf { it.isNotEmpty() } - voiceAliases = emptyMap() - defaultOutputFormat = defaultOutputFormatFallback + apiKey = fallback.apiKey + voiceAliases = fallback.voiceAliases + defaultOutputFormat = fallback.defaultOutputFormat // Keep config load retryable after transient fetch failures. configLoaded = false } diff --git a/apps/ios/Sources/Voice/TalkModeGatewayConfig.swift b/apps/ios/Sources/Voice/TalkModeGatewayConfig.swift new file mode 100644 index 00000000000..7215bc7d1af --- /dev/null +++ b/apps/ios/Sources/Voice/TalkModeGatewayConfig.swift @@ -0,0 +1,69 @@ +import Foundation +import OpenClawKit + +struct TalkModeGatewayConfigState { + let activeProvider: String + let normalizedPayload: Bool + let missingResolvedPayload: Bool + let defaultVoiceId: String? + let voiceAliases: [String: String] + let defaultModelId: String + let defaultOutputFormat: String? + let rawConfigApiKey: String? + let interruptOnSpeech: Bool? + let silenceTimeoutMs: Int +} + +enum TalkModeGatewayConfigParser { + static func parse( + config: [String: Any], + defaultProvider: String, + defaultModelIdFallback: String, + defaultSilenceTimeoutMs: Int + ) -> TalkModeGatewayConfigState { + let talk = TalkConfigParsing.bridgeFoundationDictionary(config["talk"] as? [String: Any]) + let selection = TalkConfigParsing.selectProviderConfig( + talk, + defaultProvider: defaultProvider, + allowLegacyFallback: false) + let activeProvider = selection?.provider ?? defaultProvider + let activeConfig = selection?.config + let defaultVoiceId = activeConfig?["voiceId"]?.stringValue? + .trimmingCharacters(in: .whitespacesAndNewlines) + let voiceAliases: [String: String] + if let aliases = activeConfig?["voiceAliases"]?.dictionaryValue { + var resolved: [String: String] = [:] + for (key, value) in aliases { + guard let id = value.stringValue else { continue } + let normalizedKey = key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + let trimmedId = id.trimmingCharacters(in: .whitespacesAndNewlines) + guard !normalizedKey.isEmpty, !trimmedId.isEmpty else { continue } + resolved[normalizedKey] = trimmedId + } + voiceAliases = resolved + } else { + voiceAliases = [:] + } + let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) + let defaultModelId = (model?.isEmpty == false) ? model! : defaultModelIdFallback + let defaultOutputFormat = activeConfig?["outputFormat"]?.stringValue? + .trimmingCharacters(in: .whitespacesAndNewlines) + let rawConfigApiKey = activeConfig?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) + let interruptOnSpeech = talk?["interruptOnSpeech"]?.boolValue + let silenceTimeoutMs = TalkConfigParsing.resolvedSilenceTimeoutMs( + talk, + fallback: defaultSilenceTimeoutMs) + + return TalkModeGatewayConfigState( + activeProvider: activeProvider, + normalizedPayload: selection?.normalizedPayload == true, + missingResolvedPayload: talk != nil && selection == nil, + defaultVoiceId: defaultVoiceId, + voiceAliases: voiceAliases, + defaultModelId: defaultModelId, + defaultOutputFormat: defaultOutputFormat, + rawConfigApiKey: rawConfigApiKey, + interruptOnSpeech: interruptOnSpeech, + silenceTimeoutMs: silenceTimeoutMs) + } +} diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index 9e963c17a16..fd3a65ca562 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -1970,17 +1970,6 @@ extension TalkModeManager { return trimmed } - static func selectTalkProviderConfig(_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? { - TalkConfigParsing.selectProviderConfig( - talk, - defaultProvider: Self.defaultTalkProvider, - allowLegacyFallback: false) - } - - static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int { - TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: Self.defaultSilenceTimeoutMs) - } - func reloadConfig() async { guard let gateway else { return } self.pcmFormatUnavailable = false @@ -1992,41 +1981,27 @@ extension TalkModeManager { ) guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return } guard let config = json["config"] as? [String: Any] else { return } - let talk = TalkConfigParsing.bridgeFoundationDictionary(config["talk"] as? [String: Any]) - let selection = Self.selectTalkProviderConfig(talk) - if talk != nil, selection == nil { + let parsed = TalkModeGatewayConfigParser.parse( + config: config, + defaultProvider: Self.defaultTalkProvider, + defaultModelIdFallback: Self.defaultModelIdFallback, + defaultSilenceTimeoutMs: Self.defaultSilenceTimeoutMs) + if parsed.missingResolvedPayload { GatewayDiagnostics.log( "talk config ignored: normalized payload missing talk.resolved") } - let activeProvider = selection?.provider ?? Self.defaultTalkProvider - let activeConfig = selection?.config - let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk) - self.defaultVoiceId = activeConfig?["voiceId"]?.stringValue? - .trimmingCharacters(in: .whitespacesAndNewlines) - if let aliases = activeConfig?["voiceAliases"]?.dictionaryValue { - var resolved: [String: String] = [:] - for (key, value) in aliases { - guard let id = value.stringValue else { continue } - let normalizedKey = key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - let trimmedId = id.trimmingCharacters(in: .whitespacesAndNewlines) - guard !normalizedKey.isEmpty, !trimmedId.isEmpty else { continue } - resolved[normalizedKey] = trimmedId - } - self.voiceAliases = resolved - } else { - self.voiceAliases = [:] - } + let activeProvider = parsed.activeProvider + self.defaultVoiceId = parsed.defaultVoiceId + self.voiceAliases = parsed.voiceAliases if !self.voiceOverrideActive { self.currentVoiceId = self.defaultVoiceId } - let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) - self.defaultModelId = (model?.isEmpty == false) ? model : Self.defaultModelIdFallback + self.defaultModelId = parsed.defaultModelId if !self.modelOverrideActive { self.currentModelId = self.defaultModelId } - self.defaultOutputFormat = activeConfig?["outputFormat"]?.stringValue? - .trimmingCharacters(in: .whitespacesAndNewlines) - let rawConfigApiKey = activeConfig?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) + self.defaultOutputFormat = parsed.defaultOutputFormat + let rawConfigApiKey = parsed.rawConfigApiKey let configApiKey = Self.normalizedTalkApiKey(rawConfigApiKey) let localApiKey = Self.normalizedTalkApiKey( GatewaySettingsStore.loadTalkProviderApiKey(provider: activeProvider)) @@ -2045,12 +2020,13 @@ extension TalkModeManager { self.gatewayTalkDefaultModelId = self.defaultModelId self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false) self.gatewayTalkConfigLoaded = true - if let interrupt = talk?["interruptOnSpeech"]?.boolValue { + if let interrupt = parsed.interruptOnSpeech { self.interruptOnSpeech = interrupt } - self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000 - if selection != nil { - GatewayDiagnostics.log("talk config provider=\(activeProvider) silenceTimeoutMs=\(silenceTimeoutMs)") + self.silenceWindow = TimeInterval(parsed.silenceTimeoutMs) / 1000 + if parsed.normalizedPayload || parsed.defaultVoiceId != nil || parsed.rawConfigApiKey != nil { + GatewayDiagnostics.log( + "talk config provider=\(activeProvider) silenceTimeoutMs=\(parsed.silenceTimeoutMs)") } } catch { self.defaultModelId = Self.defaultModelIdFallback diff --git a/apps/macos/Sources/OpenClaw/TalkModeGatewayConfig.swift b/apps/macos/Sources/OpenClaw/TalkModeGatewayConfig.swift new file mode 100644 index 00000000000..15600b5ea0e --- /dev/null +++ b/apps/macos/Sources/OpenClaw/TalkModeGatewayConfig.swift @@ -0,0 +1,104 @@ +import Foundation +import OpenClawKit + +struct TalkModeGatewayConfigState { + let activeProvider: String + let normalizedPayload: Bool + let missingResolvedPayload: Bool + let voiceId: String? + let voiceAliases: [String: String] + let modelId: String? + let outputFormat: String? + let interruptOnSpeech: Bool + let silenceTimeoutMs: Int + let apiKey: String? + let seamColorHex: String? +} + +enum TalkModeGatewayConfigParser { + static func parse( + snapshot: ConfigSnapshot, + defaultProvider: String, + defaultModelIdFallback: String, + defaultSilenceTimeoutMs: Int, + envVoice: String?, + sagVoice: String?, + envApiKey: String? + ) -> TalkModeGatewayConfigState { + let talk = snapshot.config?["talk"]?.dictionaryValue + let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: defaultProvider) + let activeProvider = selection?.provider ?? defaultProvider + let activeConfig = selection?.config + let silenceTimeoutMs = TalkConfigParsing.resolvedSilenceTimeoutMs( + talk, + fallback: defaultSilenceTimeoutMs) + let ui = snapshot.config?["ui"]?.dictionaryValue + let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let voice = activeConfig?["voiceId"]?.stringValue + let rawAliases = activeConfig?["voiceAliases"]?.dictionaryValue + let resolvedAliases: [String: String] = + rawAliases?.reduce(into: [:]) { acc, entry in + let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + let value = entry.value.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + guard !key.isEmpty, !value.isEmpty else { return } + acc[key] = value + } ?? [:] + let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModel = (model?.isEmpty == false) ? model! : defaultModelIdFallback + let outputFormat = activeConfig?["outputFormat"]?.stringValue + let interrupt = talk?["interruptOnSpeech"]?.boolValue + let apiKey = activeConfig?["apiKey"]?.stringValue + let resolvedVoice: String? = if activeProvider == defaultProvider { + (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ?? + (envVoice?.isEmpty == false ? envVoice : nil) ?? + (sagVoice?.isEmpty == false ? sagVoice : nil) + } else { + (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) + } + let resolvedApiKey: String? = if activeProvider == defaultProvider { + (envApiKey?.isEmpty == false ? envApiKey : nil) ?? + (apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil) + } else { + nil + } + + return TalkModeGatewayConfigState( + activeProvider: activeProvider, + normalizedPayload: selection?.normalizedPayload == true, + missingResolvedPayload: talk != nil && selection == nil, + voiceId: resolvedVoice, + voiceAliases: resolvedAliases, + modelId: resolvedModel, + outputFormat: outputFormat, + interruptOnSpeech: interrupt ?? true, + silenceTimeoutMs: silenceTimeoutMs, + apiKey: resolvedApiKey, + seamColorHex: rawSeam.isEmpty ? nil : rawSeam) + } + + static func fallback( + defaultModelIdFallback: String, + defaultSilenceTimeoutMs: Int, + envVoice: String?, + sagVoice: String?, + envApiKey: String? + ) -> TalkModeGatewayConfigState { + let resolvedVoice = + (envVoice?.isEmpty == false ? envVoice : nil) ?? + (sagVoice?.isEmpty == false ? sagVoice : nil) + let resolvedApiKey = envApiKey?.isEmpty == false ? envApiKey : nil + + return TalkModeGatewayConfigState( + activeProvider: "elevenlabs", + normalizedPayload: false, + missingResolvedPayload: false, + voiceId: resolvedVoice, + voiceAliases: [:], + modelId: defaultModelIdFallback, + outputFormat: nil, + interruptOnSpeech: true, + silenceTimeoutMs: defaultSilenceTimeoutMs, + apiKey: resolvedApiKey, + seamColorHex: nil) + } +} diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift index 8013135b330..1565c8a8152 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift @@ -798,16 +798,6 @@ extension TalkModeRuntime { "silenceTimeoutMs=\(cfg.silenceTimeoutMs, privacy: .public)") } - private struct TalkRuntimeConfig { - let voiceId: String? - let voiceAliases: [String: String] - let modelId: String? - let outputFormat: String? - let interruptOnSpeech: Bool - let silenceTimeoutMs: Int - let apiKey: String? - } - static func selectTalkProviderConfig( _ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? { @@ -818,7 +808,7 @@ extension TalkModeRuntime { TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: self.defaultSilenceTimeoutMs) } - private func fetchTalkConfig() async -> TalkRuntimeConfig { + private func fetchTalkConfig() async -> TalkModeGatewayConfigState { let env = ProcessInfo.processInfo.environment let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines) let sagVoice = env["SAG_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines) @@ -829,73 +819,34 @@ extension TalkModeRuntime { method: .talkConfig, params: ["includeSecrets": AnyCodable(true)], timeoutMs: 8000) - let talk = snap.config?["talk"]?.dictionaryValue - let selection = Self.selectTalkProviderConfig(talk) - if talk != nil, selection == nil { + let parsed = TalkModeGatewayConfigParser.parse( + snapshot: snap, + defaultProvider: Self.defaultTalkProvider, + defaultModelIdFallback: Self.defaultModelIdFallback, + defaultSilenceTimeoutMs: Self.defaultSilenceTimeoutMs, + envVoice: envVoice, + sagVoice: sagVoice, + envApiKey: envApiKey) + if parsed.missingResolvedPayload { self.ttsLogger.info("talk config ignored: normalized payload missing talk.resolved") } - let activeProvider = selection?.provider ?? Self.defaultTalkProvider - let activeConfig = selection?.config - let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk) - let ui = snap.config?["ui"]?.dictionaryValue - let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" await MainActor.run { - AppStateStore.shared.seamColorHex = rawSeam.isEmpty ? nil : rawSeam + AppStateStore.shared.seamColorHex = parsed.seamColorHex } - let voice = activeConfig?["voiceId"]?.stringValue - let rawAliases = activeConfig?["voiceAliases"]?.dictionaryValue - let resolvedAliases: [String: String] = - rawAliases?.reduce(into: [:]) { acc, entry in - let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - let value = entry.value.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - guard !key.isEmpty, !value.isEmpty else { return } - acc[key] = value - } ?? [:] - let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) - let resolvedModel = (model?.isEmpty == false) ? model! : Self.defaultModelIdFallback - let outputFormat = activeConfig?["outputFormat"]?.stringValue - let interrupt = talk?["interruptOnSpeech"]?.boolValue - let apiKey = activeConfig?["apiKey"]?.stringValue - let resolvedVoice: String? = if activeProvider == Self.defaultTalkProvider { - (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ?? - (envVoice?.isEmpty == false ? envVoice : nil) ?? - (sagVoice?.isEmpty == false ? sagVoice : nil) - } else { - (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) - } - let resolvedApiKey: String? = if activeProvider == Self.defaultTalkProvider { - (envApiKey?.isEmpty == false ? envApiKey : nil) ?? - (apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil) - } else { - nil - } - if activeProvider != Self.defaultTalkProvider { + if parsed.activeProvider != Self.defaultTalkProvider { self.ttsLogger - .info("talk provider \(activeProvider, privacy: .public) unsupported; using system voice") - } else if selection?.normalizedPayload == true { + .info("talk provider \(parsed.activeProvider, privacy: .public) unsupported; using system voice") + } else if parsed.normalizedPayload { self.ttsLogger.info("talk config provider from talk.resolved") } - return TalkRuntimeConfig( - voiceId: resolvedVoice, - voiceAliases: resolvedAliases, - modelId: resolvedModel, - outputFormat: outputFormat, - interruptOnSpeech: interrupt ?? true, - silenceTimeoutMs: silenceTimeoutMs, - apiKey: resolvedApiKey) + return parsed } catch { - let resolvedVoice = - (envVoice?.isEmpty == false ? envVoice : nil) ?? - (sagVoice?.isEmpty == false ? sagVoice : nil) - let resolvedApiKey = envApiKey?.isEmpty == false ? envApiKey : nil - return TalkRuntimeConfig( - voiceId: resolvedVoice, - voiceAliases: [:], - modelId: Self.defaultModelIdFallback, - outputFormat: nil, - interruptOnSpeech: true, - silenceTimeoutMs: Self.defaultSilenceTimeoutMs, - apiKey: resolvedApiKey) + return TalkModeGatewayConfigParser.fallback( + defaultModelIdFallback: Self.defaultModelIdFallback, + defaultSilenceTimeoutMs: Self.defaultSilenceTimeoutMs, + envVoice: envVoice, + sagVoice: sagVoice, + envApiKey: envApiKey) } }