diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index 04172c14bfa..f59d33dec6a 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -1970,57 +1970,15 @@ extension TalkModeManager { return trimmed } - struct TalkProviderConfigSelection { - let provider: String - let config: [String: Any] + static func selectTalkProviderConfig(_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? { + TalkConfigParsing.selectProviderConfig( + talk, + defaultProvider: Self.defaultTalkProvider, + allowLegacyFallback: false) } - private static func normalizedTalkProviderID(_ raw: String?) -> String? { - let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() - return trimmed.isEmpty ? nil : trimmed - } - - static func selectTalkProviderConfig(_ talk: [String: Any]?) -> TalkProviderConfigSelection? { - guard let talk else { return nil } - let rawProvider = talk["provider"] as? String - let rawProviders = talk["providers"] as? [String: Any] - guard rawProvider != nil || rawProviders != nil else { return nil } - let providers = rawProviders ?? [:] - let normalizedProviders = providers.reduce(into: [String: [String: Any]]()) { acc, entry in - guard - let providerID = Self.normalizedTalkProviderID(entry.key), - let config = entry.value as? [String: Any] - else { return } - acc[providerID] = config - } - let providerID = - Self.normalizedTalkProviderID(rawProvider) ?? - normalizedProviders.keys.min() ?? - Self.defaultTalkProvider - return TalkProviderConfigSelection( - provider: providerID, - config: normalizedProviders[providerID] ?? [:]) - } - - static func resolvedSilenceTimeoutMs(_ talk: [String: Any]?) -> Int { - switch talk?["silenceTimeoutMs"] { - case let timeout as Int where timeout > 0: - return timeout - case let timeout as Double - where timeout > 0 && timeout.rounded(.towardZero) == timeout && timeout <= Double(Int.max): - return Int(timeout) - case let timeout as NSNumber: - if CFGetTypeID(timeout) == CFBooleanGetTypeID() { - return Self.defaultSilenceTimeoutMs - } - let value = timeout.doubleValue - if value > 0 && value.rounded(.towardZero) == value && value <= Double(Int.max) { - return Int(value) - } - return Self.defaultSilenceTimeoutMs - default: - return Self.defaultSilenceTimeoutMs - } + static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int { + TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: Self.defaultSilenceTimeoutMs) } func reloadConfig() async { @@ -2034,7 +1992,7 @@ extension TalkModeManager { ) guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return } guard let config = json["config"] as? [String: Any] else { return } - let talk = config["talk"] as? [String: Any] + let talk = TalkConfigParsing.bridgeFoundationDictionary(config["talk"] as? [String: Any]) let selection = Self.selectTalkProviderConfig(talk) if talk != nil, selection == nil { GatewayDiagnostics.log( @@ -2043,12 +2001,12 @@ extension TalkModeManager { let activeProvider = selection?.provider ?? Self.defaultTalkProvider let activeConfig = selection?.config let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk) - self.defaultVoiceId = (activeConfig?["voiceId"] as? String)? + self.defaultVoiceId = activeConfig?["voiceId"]?.stringValue? .trimmingCharacters(in: .whitespacesAndNewlines) - if let aliases = activeConfig?["voiceAliases"] as? [String: Any] { + if let aliases = activeConfig?["voiceAliases"]?.dictionaryValue { var resolved: [String: String] = [:] for (key, value) in aliases { - guard let id = value as? String else { continue } + guard let id = value.stringValue else { continue } let normalizedKey = key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() let trimmedId = id.trimmingCharacters(in: .whitespacesAndNewlines) guard !normalizedKey.isEmpty, !trimmedId.isEmpty else { continue } @@ -2061,14 +2019,14 @@ extension TalkModeManager { if !self.voiceOverrideActive { self.currentVoiceId = self.defaultVoiceId } - let model = (activeConfig?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) self.defaultModelId = (model?.isEmpty == false) ? model : Self.defaultModelIdFallback if !self.modelOverrideActive { self.currentModelId = self.defaultModelId } - self.defaultOutputFormat = (activeConfig?["outputFormat"] as? String)? + self.defaultOutputFormat = activeConfig?["outputFormat"]?.stringValue? .trimmingCharacters(in: .whitespacesAndNewlines) - let rawConfigApiKey = (activeConfig?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + let rawConfigApiKey = activeConfig?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) let configApiKey = Self.normalizedTalkApiKey(rawConfigApiKey) let localApiKey = Self.normalizedTalkApiKey( GatewaySettingsStore.loadTalkProviderApiKey(provider: activeProvider)) @@ -2087,7 +2045,7 @@ extension TalkModeManager { self.gatewayTalkDefaultModelId = self.defaultModelId self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false) self.gatewayTalkConfigLoaded = true - if let interrupt = talk?["interruptOnSpeech"] as? Bool { + if let interrupt = talk?["interruptOnSpeech"]?.boolValue { self.interruptOnSpeech = interrupt } self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000 diff --git a/apps/ios/Tests/TalkModeConfigParsingTests.swift b/apps/ios/Tests/TalkModeConfigParsingTests.swift index 6bf9bbef108..bf600385c35 100644 --- a/apps/ios/Tests/TalkModeConfigParsingTests.swift +++ b/apps/ios/Tests/TalkModeConfigParsingTests.swift @@ -1,4 +1,5 @@ import Foundation +import OpenClawKit import Testing @testable import OpenClaw @@ -15,9 +16,10 @@ import Testing "voiceId": "voice-legacy", ] - let selection = TalkModeManager.selectTalkProviderConfig(talk) + let selection = TalkModeManager.selectTalkProviderConfig( + TalkConfigParsing.bridgeFoundationDictionary(talk)) #expect(selection?.provider == "elevenlabs") - #expect(selection?.config["voiceId"] as? String == "voice-normalized") + #expect(selection?.config["voiceId"]?.stringValue == "voice-normalized") } @Test func ignoresLegacyTalkFieldsWhenNormalizedPayloadMissing() { @@ -26,7 +28,8 @@ import Testing "apiKey": "legacy-key", // pragma: allowlist secret ] - let selection = TalkModeManager.selectTalkProviderConfig(talk) + let selection = TalkModeManager.selectTalkProviderConfig( + TalkConfigParsing.bridgeFoundationDictionary(talk)) #expect(selection == nil) } @@ -53,7 +56,7 @@ import Testing "silenceTimeoutMs": 1500, ] - #expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 1500) + #expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 1500) } @Test func defaultsSilenceTimeoutMsWhenMissing() { @@ -65,7 +68,7 @@ import Testing "silenceTimeoutMs": 0, ] - #expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900) + #expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900) } @Test func defaultsSilenceTimeoutMsWhenBool() { @@ -73,6 +76,6 @@ import Testing "silenceTimeoutMs": true, ] - #expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900) + #expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900) } } diff --git a/apps/macos/Sources/OpenClaw/AnyCodable+Helpers.swift b/apps/macos/Sources/OpenClaw/AnyCodable+Helpers.swift index 3cb8f54e396..47420afb7f6 100644 --- a/apps/macos/Sources/OpenClaw/AnyCodable+Helpers.swift +++ b/apps/macos/Sources/OpenClaw/AnyCodable+Helpers.swift @@ -4,40 +4,3 @@ import OpenClawKit // Prefer the OpenClawKit wrapper to keep gateway request payloads consistent. typealias AnyCodable = OpenClawKit.AnyCodable typealias InstanceIdentity = OpenClawKit.InstanceIdentity - -extension AnyCodable { - var stringValue: String? { - self.value as? String - } - - var boolValue: Bool? { - self.value as? Bool - } - - var intValue: Int? { - self.value as? Int - } - - var doubleValue: Double? { - self.value as? Double - } - - var dictionaryValue: [String: AnyCodable]? { - self.value as? [String: AnyCodable] - } - - var arrayValue: [AnyCodable]? { - self.value as? [AnyCodable] - } - - var foundationValue: Any { - switch self.value { - case let dict as [String: AnyCodable]: - dict.mapValues { $0.foundationValue } - case let array as [AnyCodable]: - array.map(\.foundationValue) - default: - self.value - } - } -} diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift index 4ae5b4cc105..1adc3cb0b78 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift @@ -67,7 +67,7 @@ actor TalkModeRuntime { private var fallbackVoiceId: String? private var lastPlaybackWasPCM: Bool = false - private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000 + private var silenceWindow: TimeInterval = .init(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000 private let minSpeechRMS: Double = 1e-3 private let speechBoostFactor: Double = 6.0 @@ -808,95 +808,14 @@ extension TalkModeRuntime { let apiKey: String? } - struct TalkProviderConfigSelection { - let provider: String - let config: [String: AnyCodable] - let normalizedPayload: Bool - } - - private static func normalizedTalkProviderID(_ raw: String?) -> String? { - let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() ?? "" - return trimmed.isEmpty ? nil : trimmed - } - - private static func normalizedTalkProviderConfig(_ value: AnyCodable) -> [String: AnyCodable]? { - if let typed = value.value as? [String: AnyCodable] { - return typed - } - if let foundation = value.value as? [String: Any] { - return foundation.mapValues(AnyCodable.init) - } - if let nsDict = value.value as? NSDictionary { - var converted: [String: AnyCodable] = [:] - for case let (key as String, raw) in nsDict { - converted[key] = AnyCodable(raw) - } - return converted - } - return nil - } - - private static func normalizedTalkProviders(_ raw: AnyCodable?) -> [String: [String: AnyCodable]] { - guard let raw else { return [:] } - var providerMap: [String: AnyCodable] = [:] - if let typed = raw.value as? [String: AnyCodable] { - providerMap = typed - } else if let foundation = raw.value as? [String: Any] { - providerMap = foundation.mapValues(AnyCodable.init) - } else if let nsDict = raw.value as? NSDictionary { - for case let (key as String, value) in nsDict { - providerMap[key] = AnyCodable(value) - } - } else { - return [:] - } - - return providerMap.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in - guard - let providerID = Self.normalizedTalkProviderID(entry.key), - let providerConfig = Self.normalizedTalkProviderConfig(entry.value) - else { return } - acc[providerID] = providerConfig - } - } - static func selectTalkProviderConfig( _ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? { - guard let talk else { return nil } - let rawProvider = talk["provider"]?.stringValue - let rawProviders = talk["providers"] - let hasNormalizedPayload = rawProvider != nil || rawProviders != nil - if hasNormalizedPayload { - let normalizedProviders = Self.normalizedTalkProviders(rawProviders) - let providerID = - Self.normalizedTalkProviderID(rawProvider) ?? - normalizedProviders.keys.min() ?? - Self.defaultTalkProvider - return TalkProviderConfigSelection( - provider: providerID, - config: normalizedProviders[providerID] ?? [:], - normalizedPayload: true) - } - return TalkProviderConfigSelection( - provider: Self.defaultTalkProvider, - config: talk, - normalizedPayload: false) + TalkConfigParsing.selectProviderConfig(talk, defaultProvider: self.defaultTalkProvider) } static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int { - if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 { - return timeout - } - if - let timeout = talk?["silenceTimeoutMs"]?.doubleValue, - timeout > 0, - timeout.rounded(.towardZero) == timeout, - timeout <= Double(Int.max) - { - return Int(timeout) - } - return Self.defaultSilenceTimeoutMs + TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: self.defaultSilenceTimeoutMs) } private func fetchTalkConfig() async -> TalkRuntimeConfig { diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/AnyCodable+Helpers.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/AnyCodable+Helpers.swift new file mode 100644 index 00000000000..ee0d9c78769 --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/AnyCodable+Helpers.swift @@ -0,0 +1,88 @@ +import Foundation + +public extension AnyCodable { + var stringValue: String? { + self.value as? String + } + + var boolValue: Bool? { + if let value = self.value as? Bool { + return value + } + if let number = self.value as? NSNumber, CFGetTypeID(number) == CFBooleanGetTypeID() { + return number.boolValue + } + return nil + } + + var intValue: Int? { + if let value = self.value as? Int { + return value + } + if let number = self.value as? NSNumber, CFGetTypeID(number) != CFBooleanGetTypeID() { + let value = number.doubleValue + if value > 0, value.rounded(.towardZero) == value, value <= Double(Int.max) { + return Int(value) + } + } + return nil + } + + var doubleValue: Double? { + if let value = self.value as? Double { + return value + } + if let value = self.value as? Int { + return Double(value) + } + if let number = self.value as? NSNumber, CFGetTypeID(number) != CFBooleanGetTypeID() { + return number.doubleValue + } + return nil + } + + var dictionaryValue: [String: AnyCodable]? { + if let value = self.value as? [String: AnyCodable] { + return value + } + if let value = self.value as? [String: Any] { + return value.mapValues(AnyCodable.init) + } + if let value = self.value as? NSDictionary { + var converted: [String: AnyCodable] = [:] + for case let (key as String, raw) in value { + converted[key] = AnyCodable(raw) + } + return converted + } + return nil + } + + var arrayValue: [AnyCodable]? { + if let value = self.value as? [AnyCodable] { + return value + } + if let value = self.value as? [Any] { + return value.map(AnyCodable.init) + } + if let value = self.value as? NSArray { + return value.map(AnyCodable.init) + } + return nil + } + + var foundationValue: Any { + switch self.value { + case let dict as [String: AnyCodable]: + dict.mapValues(\.foundationValue) + case let array as [AnyCodable]: + array.map(\.foundationValue) + case let dict as [String: Any]: + dict.mapValues { AnyCodable($0).foundationValue } + case let array as [Any]: + array.map { AnyCodable($0).foundationValue } + default: + self.value + } + } +} diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift new file mode 100644 index 00000000000..f25eba5aa6f --- /dev/null +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkConfigParsing.swift @@ -0,0 +1,81 @@ +import Foundation + +public struct TalkProviderConfigSelection: Sendable { + public let provider: String + public let config: [String: AnyCodable] + public let normalizedPayload: Bool + + public init(provider: String, config: [String: AnyCodable], normalizedPayload: Bool) { + self.provider = provider + self.config = config + self.normalizedPayload = normalizedPayload + } +} + +public enum TalkConfigParsing { + public static func bridgeFoundationDictionary(_ raw: [String: Any]?) -> [String: AnyCodable]? { + raw?.mapValues(AnyCodable.init) + } + + public static func selectProviderConfig( + _ talk: [String: AnyCodable]?, + defaultProvider: String, + allowLegacyFallback: Bool = true, + ) -> TalkProviderConfigSelection? { + guard let talk else { return nil } + let rawProvider = talk["provider"]?.stringValue + let rawProviders = talk["providers"] + let hasNormalizedPayload = rawProvider != nil || rawProviders != nil + if hasNormalizedPayload { + let normalizedProviders = self.normalizedTalkProviders(rawProviders) + let providerID = + self.normalizedTalkProviderID(rawProvider) ?? + normalizedProviders.keys.min() ?? + defaultProvider + return TalkProviderConfigSelection( + provider: providerID, + config: normalizedProviders[providerID] ?? [:], + normalizedPayload: true) + } + guard allowLegacyFallback else { return nil } + return TalkProviderConfigSelection( + provider: defaultProvider, + config: talk, + normalizedPayload: false) + } + + public static func resolvedPositiveInt(_ value: AnyCodable?, fallback: Int) -> Int { + if let timeout = value?.intValue, timeout > 0 { + return timeout + } + if + let timeout = value?.doubleValue, + timeout > 0, + timeout.rounded(.towardZero) == timeout, + timeout <= Double(Int.max) + { + return Int(timeout) + } + return fallback + } + + public static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?, fallback: Int) -> Int { + self.resolvedPositiveInt(talk?["silenceTimeoutMs"], fallback: fallback) + } + + private static func normalizedTalkProviderID(_ raw: String?) -> String? { + let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + return trimmed.isEmpty ? nil : trimmed + } + + private static func normalizedTalkProviders(_ raw: AnyCodable?) -> [String: [String: AnyCodable]] { + guard let providerMap = raw?.dictionaryValue else { return [:] } + return providerMap.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in + guard + let providerID = self.normalizedTalkProviderID(entry.key), + let providerConfig = entry.value.dictionaryValue + else { return } + acc[providerID] = providerConfig + } + } +} diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift new file mode 100644 index 00000000000..aa2f8081d34 --- /dev/null +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/TalkConfigParsingTests.swift @@ -0,0 +1,69 @@ +import OpenClawKit +import Testing + +struct TalkConfigParsingTests { + @Test func prefersNormalizedTalkProviderPayload() { + let talk: [String: AnyCodable] = [ + "provider": AnyCodable("elevenlabs"), + "providers": AnyCodable([ + "elevenlabs": [ + "voiceId": "voice-normalized", + ], + ]), + "voiceId": AnyCodable("voice-legacy"), + ] + + let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: "elevenlabs") + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == true) + #expect(selection?.config["voiceId"]?.stringValue == "voice-normalized") + } + + @Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() { + let talk: [String: AnyCodable] = [ + "voiceId": AnyCodable("voice-legacy"), + "apiKey": AnyCodable("legacy-key"), + ] + + let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: "elevenlabs") + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == false) + #expect(selection?.config["voiceId"]?.stringValue == "voice-legacy") + #expect(selection?.config["apiKey"]?.stringValue == "legacy-key") + } + + @Test func canDisableLegacyFallback() { + let talk: [String: AnyCodable] = [ + "voiceId": AnyCodable("voice-legacy"), + ] + + let selection = TalkConfigParsing.selectProviderConfig( + talk, + defaultProvider: "elevenlabs", + allowLegacyFallback: false) + #expect(selection == nil) + } + + @Test func bridgesFoundationDictionary() { + let raw: [String: Any] = [ + "provider": "elevenlabs", + "providers": [ + "elevenlabs": [ + "voiceId": "voice-normalized", + ], + ], + ] + + let bridged = TalkConfigParsing.bridgeFoundationDictionary(raw) + #expect(bridged?["provider"]?.stringValue == "elevenlabs") + let nested = bridged?["providers"]?.dictionaryValue?["elevenlabs"]?.dictionaryValue + #expect(nested?["voiceId"]?.stringValue == "voice-normalized") + } + + @Test func resolvesPositiveIntegerTimeout() { + #expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(1500), fallback: 700) == 1500) + #expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(0), fallback: 700) == 700) + #expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(true), fallback: 700) == 700) + #expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable("1500"), fallback: 700) == 700) + } +}