refactor: share Apple talk config parsing

This commit is contained in:
Peter Steinberger
2026-03-08 14:43:55 +00:00
parent eba9dcc67a
commit 4f482d2a2b
7 changed files with 265 additions and 184 deletions

View File

@@ -1970,57 +1970,15 @@ extension TalkModeManager {
return trimmed
}
struct TalkProviderConfigSelection {
let provider: String
let config: [String: Any]
static func selectTalkProviderConfig(_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? {
TalkConfigParsing.selectProviderConfig(
talk,
defaultProvider: Self.defaultTalkProvider,
allowLegacyFallback: false)
}
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
return trimmed.isEmpty ? nil : trimmed
}
static func selectTalkProviderConfig(_ talk: [String: Any]?) -> TalkProviderConfigSelection? {
guard let talk else { return nil }
let rawProvider = talk["provider"] as? String
let rawProviders = talk["providers"] as? [String: Any]
guard rawProvider != nil || rawProviders != nil else { return nil }
let providers = rawProviders ?? [:]
let normalizedProviders = providers.reduce(into: [String: [String: Any]]()) { acc, entry in
guard
let providerID = Self.normalizedTalkProviderID(entry.key),
let config = entry.value as? [String: Any]
else { return }
acc[providerID] = config
}
let providerID =
Self.normalizedTalkProviderID(rawProvider) ??
normalizedProviders.keys.min() ??
Self.defaultTalkProvider
return TalkProviderConfigSelection(
provider: providerID,
config: normalizedProviders[providerID] ?? [:])
}
static func resolvedSilenceTimeoutMs(_ talk: [String: Any]?) -> Int {
switch talk?["silenceTimeoutMs"] {
case let timeout as Int where timeout > 0:
return timeout
case let timeout as Double
where timeout > 0 && timeout.rounded(.towardZero) == timeout && timeout <= Double(Int.max):
return Int(timeout)
case let timeout as NSNumber:
if CFGetTypeID(timeout) == CFBooleanGetTypeID() {
return Self.defaultSilenceTimeoutMs
}
let value = timeout.doubleValue
if value > 0 && value.rounded(.towardZero) == value && value <= Double(Int.max) {
return Int(value)
}
return Self.defaultSilenceTimeoutMs
default:
return Self.defaultSilenceTimeoutMs
}
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: Self.defaultSilenceTimeoutMs)
}
func reloadConfig() async {
@@ -2034,7 +1992,7 @@ extension TalkModeManager {
)
guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return }
guard let config = json["config"] as? [String: Any] else { return }
let talk = config["talk"] as? [String: Any]
let talk = TalkConfigParsing.bridgeFoundationDictionary(config["talk"] as? [String: Any])
let selection = Self.selectTalkProviderConfig(talk)
if talk != nil, selection == nil {
GatewayDiagnostics.log(
@@ -2043,12 +2001,12 @@ extension TalkModeManager {
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
let activeConfig = selection?.config
let silenceTimeoutMs = Self.resolvedSilenceTimeoutMs(talk)
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
self.defaultVoiceId = activeConfig?["voiceId"]?.stringValue?
.trimmingCharacters(in: .whitespacesAndNewlines)
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
if let aliases = activeConfig?["voiceAliases"]?.dictionaryValue {
var resolved: [String: String] = [:]
for (key, value) in aliases {
guard let id = value as? String else { continue }
guard let id = value.stringValue else { continue }
let normalizedKey = key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
let trimmedId = id.trimmingCharacters(in: .whitespacesAndNewlines)
guard !normalizedKey.isEmpty, !trimmedId.isEmpty else { continue }
@@ -2061,14 +2019,14 @@ extension TalkModeManager {
if !self.voiceOverrideActive {
self.currentVoiceId = self.defaultVoiceId
}
let model = (activeConfig?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
self.defaultModelId = (model?.isEmpty == false) ? model : Self.defaultModelIdFallback
if !self.modelOverrideActive {
self.currentModelId = self.defaultModelId
}
self.defaultOutputFormat = (activeConfig?["outputFormat"] as? String)?
self.defaultOutputFormat = activeConfig?["outputFormat"]?.stringValue?
.trimmingCharacters(in: .whitespacesAndNewlines)
let rawConfigApiKey = (activeConfig?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
let rawConfigApiKey = activeConfig?["apiKey"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let configApiKey = Self.normalizedTalkApiKey(rawConfigApiKey)
let localApiKey = Self.normalizedTalkApiKey(
GatewaySettingsStore.loadTalkProviderApiKey(provider: activeProvider))
@@ -2087,7 +2045,7 @@ extension TalkModeManager {
self.gatewayTalkDefaultModelId = self.defaultModelId
self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false)
self.gatewayTalkConfigLoaded = true
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
if let interrupt = talk?["interruptOnSpeech"]?.boolValue {
self.interruptOnSpeech = interrupt
}
self.silenceWindow = TimeInterval(silenceTimeoutMs) / 1000

View File

@@ -1,4 +1,5 @@
import Foundation
import OpenClawKit
import Testing
@testable import OpenClaw
@@ -15,9 +16,10 @@ import Testing
"voiceId": "voice-legacy",
]
let selection = TalkModeManager.selectTalkProviderConfig(talk)
let selection = TalkModeManager.selectTalkProviderConfig(
TalkConfigParsing.bridgeFoundationDictionary(talk))
#expect(selection?.provider == "elevenlabs")
#expect(selection?.config["voiceId"] as? String == "voice-normalized")
#expect(selection?.config["voiceId"]?.stringValue == "voice-normalized")
}
@Test func ignoresLegacyTalkFieldsWhenNormalizedPayloadMissing() {
@@ -26,7 +28,8 @@ import Testing
"apiKey": "legacy-key", // pragma: allowlist secret
]
let selection = TalkModeManager.selectTalkProviderConfig(talk)
let selection = TalkModeManager.selectTalkProviderConfig(
TalkConfigParsing.bridgeFoundationDictionary(talk))
#expect(selection == nil)
}
@@ -53,7 +56,7 @@ import Testing
"silenceTimeoutMs": 1500,
]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 1500)
#expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 1500)
}
@Test func defaultsSilenceTimeoutMsWhenMissing() {
@@ -65,7 +68,7 @@ import Testing
"silenceTimeoutMs": 0,
]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900)
#expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900)
}
@Test func defaultsSilenceTimeoutMsWhenBool() {
@@ -73,6 +76,6 @@ import Testing
"silenceTimeoutMs": true,
]
#expect(TalkModeManager.resolvedSilenceTimeoutMs(talk) == 900)
#expect(TalkModeManager.resolvedSilenceTimeoutMs(TalkConfigParsing.bridgeFoundationDictionary(talk)) == 900)
}
}

View File

@@ -4,40 +4,3 @@ import OpenClawKit
// Prefer the OpenClawKit wrapper to keep gateway request payloads consistent.
typealias AnyCodable = OpenClawKit.AnyCodable
typealias InstanceIdentity = OpenClawKit.InstanceIdentity
extension AnyCodable {
var stringValue: String? {
self.value as? String
}
var boolValue: Bool? {
self.value as? Bool
}
var intValue: Int? {
self.value as? Int
}
var doubleValue: Double? {
self.value as? Double
}
var dictionaryValue: [String: AnyCodable]? {
self.value as? [String: AnyCodable]
}
var arrayValue: [AnyCodable]? {
self.value as? [AnyCodable]
}
var foundationValue: Any {
switch self.value {
case let dict as [String: AnyCodable]:
dict.mapValues { $0.foundationValue }
case let array as [AnyCodable]:
array.map(\.foundationValue)
default:
self.value
}
}
}

View File

@@ -67,7 +67,7 @@ actor TalkModeRuntime {
private var fallbackVoiceId: String?
private var lastPlaybackWasPCM: Bool = false
private var silenceWindow: TimeInterval = TimeInterval(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
private var silenceWindow: TimeInterval = .init(TalkModeRuntime.defaultSilenceTimeoutMs) / 1000
private let minSpeechRMS: Double = 1e-3
private let speechBoostFactor: Double = 6.0
@@ -808,95 +808,14 @@ extension TalkModeRuntime {
let apiKey: String?
}
struct TalkProviderConfigSelection {
let provider: String
let config: [String: AnyCodable]
let normalizedPayload: Bool
}
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() ?? ""
return trimmed.isEmpty ? nil : trimmed
}
private static func normalizedTalkProviderConfig(_ value: AnyCodable) -> [String: AnyCodable]? {
if let typed = value.value as? [String: AnyCodable] {
return typed
}
if let foundation = value.value as? [String: Any] {
return foundation.mapValues(AnyCodable.init)
}
if let nsDict = value.value as? NSDictionary {
var converted: [String: AnyCodable] = [:]
for case let (key as String, raw) in nsDict {
converted[key] = AnyCodable(raw)
}
return converted
}
return nil
}
private static func normalizedTalkProviders(_ raw: AnyCodable?) -> [String: [String: AnyCodable]] {
guard let raw else { return [:] }
var providerMap: [String: AnyCodable] = [:]
if let typed = raw.value as? [String: AnyCodable] {
providerMap = typed
} else if let foundation = raw.value as? [String: Any] {
providerMap = foundation.mapValues(AnyCodable.init)
} else if let nsDict = raw.value as? NSDictionary {
for case let (key as String, value) in nsDict {
providerMap[key] = AnyCodable(value)
}
} else {
return [:]
}
return providerMap.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in
guard
let providerID = Self.normalizedTalkProviderID(entry.key),
let providerConfig = Self.normalizedTalkProviderConfig(entry.value)
else { return }
acc[providerID] = providerConfig
}
}
static func selectTalkProviderConfig(
_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection?
{
guard let talk else { return nil }
let rawProvider = talk["provider"]?.stringValue
let rawProviders = talk["providers"]
let hasNormalizedPayload = rawProvider != nil || rawProviders != nil
if hasNormalizedPayload {
let normalizedProviders = Self.normalizedTalkProviders(rawProviders)
let providerID =
Self.normalizedTalkProviderID(rawProvider) ??
normalizedProviders.keys.min() ??
Self.defaultTalkProvider
return TalkProviderConfigSelection(
provider: providerID,
config: normalizedProviders[providerID] ?? [:],
normalizedPayload: true)
}
return TalkProviderConfigSelection(
provider: Self.defaultTalkProvider,
config: talk,
normalizedPayload: false)
TalkConfigParsing.selectProviderConfig(talk, defaultProvider: self.defaultTalkProvider)
}
static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?) -> Int {
if let timeout = talk?["silenceTimeoutMs"]?.intValue, timeout > 0 {
return timeout
}
if
let timeout = talk?["silenceTimeoutMs"]?.doubleValue,
timeout > 0,
timeout.rounded(.towardZero) == timeout,
timeout <= Double(Int.max)
{
return Int(timeout)
}
return Self.defaultSilenceTimeoutMs
TalkConfigParsing.resolvedSilenceTimeoutMs(talk, fallback: self.defaultSilenceTimeoutMs)
}
private func fetchTalkConfig() async -> TalkRuntimeConfig {

View File

@@ -0,0 +1,88 @@
import Foundation
public extension AnyCodable {
var stringValue: String? {
self.value as? String
}
var boolValue: Bool? {
if let value = self.value as? Bool {
return value
}
if let number = self.value as? NSNumber, CFGetTypeID(number) == CFBooleanGetTypeID() {
return number.boolValue
}
return nil
}
var intValue: Int? {
if let value = self.value as? Int {
return value
}
if let number = self.value as? NSNumber, CFGetTypeID(number) != CFBooleanGetTypeID() {
let value = number.doubleValue
if value > 0, value.rounded(.towardZero) == value, value <= Double(Int.max) {
return Int(value)
}
}
return nil
}
var doubleValue: Double? {
if let value = self.value as? Double {
return value
}
if let value = self.value as? Int {
return Double(value)
}
if let number = self.value as? NSNumber, CFGetTypeID(number) != CFBooleanGetTypeID() {
return number.doubleValue
}
return nil
}
var dictionaryValue: [String: AnyCodable]? {
if let value = self.value as? [String: AnyCodable] {
return value
}
if let value = self.value as? [String: Any] {
return value.mapValues(AnyCodable.init)
}
if let value = self.value as? NSDictionary {
var converted: [String: AnyCodable] = [:]
for case let (key as String, raw) in value {
converted[key] = AnyCodable(raw)
}
return converted
}
return nil
}
var arrayValue: [AnyCodable]? {
if let value = self.value as? [AnyCodable] {
return value
}
if let value = self.value as? [Any] {
return value.map(AnyCodable.init)
}
if let value = self.value as? NSArray {
return value.map(AnyCodable.init)
}
return nil
}
var foundationValue: Any {
switch self.value {
case let dict as [String: AnyCodable]:
dict.mapValues(\.foundationValue)
case let array as [AnyCodable]:
array.map(\.foundationValue)
case let dict as [String: Any]:
dict.mapValues { AnyCodable($0).foundationValue }
case let array as [Any]:
array.map { AnyCodable($0).foundationValue }
default:
self.value
}
}
}

View File

@@ -0,0 +1,81 @@
import Foundation
public struct TalkProviderConfigSelection: Sendable {
public let provider: String
public let config: [String: AnyCodable]
public let normalizedPayload: Bool
public init(provider: String, config: [String: AnyCodable], normalizedPayload: Bool) {
self.provider = provider
self.config = config
self.normalizedPayload = normalizedPayload
}
}
public enum TalkConfigParsing {
public static func bridgeFoundationDictionary(_ raw: [String: Any]?) -> [String: AnyCodable]? {
raw?.mapValues(AnyCodable.init)
}
public static func selectProviderConfig(
_ talk: [String: AnyCodable]?,
defaultProvider: String,
allowLegacyFallback: Bool = true,
) -> TalkProviderConfigSelection? {
guard let talk else { return nil }
let rawProvider = talk["provider"]?.stringValue
let rawProviders = talk["providers"]
let hasNormalizedPayload = rawProvider != nil || rawProviders != nil
if hasNormalizedPayload {
let normalizedProviders = self.normalizedTalkProviders(rawProviders)
let providerID =
self.normalizedTalkProviderID(rawProvider) ??
normalizedProviders.keys.min() ??
defaultProvider
return TalkProviderConfigSelection(
provider: providerID,
config: normalizedProviders[providerID] ?? [:],
normalizedPayload: true)
}
guard allowLegacyFallback else { return nil }
return TalkProviderConfigSelection(
provider: defaultProvider,
config: talk,
normalizedPayload: false)
}
public static func resolvedPositiveInt(_ value: AnyCodable?, fallback: Int) -> Int {
if let timeout = value?.intValue, timeout > 0 {
return timeout
}
if
let timeout = value?.doubleValue,
timeout > 0,
timeout.rounded(.towardZero) == timeout,
timeout <= Double(Int.max)
{
return Int(timeout)
}
return fallback
}
public static func resolvedSilenceTimeoutMs(_ talk: [String: AnyCodable]?, fallback: Int) -> Int {
self.resolvedPositiveInt(talk?["silenceTimeoutMs"], fallback: fallback)
}
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
return trimmed.isEmpty ? nil : trimmed
}
private static func normalizedTalkProviders(_ raw: AnyCodable?) -> [String: [String: AnyCodable]] {
guard let providerMap = raw?.dictionaryValue else { return [:] }
return providerMap.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in
guard
let providerID = self.normalizedTalkProviderID(entry.key),
let providerConfig = entry.value.dictionaryValue
else { return }
acc[providerID] = providerConfig
}
}
}

View File

@@ -0,0 +1,69 @@
import OpenClawKit
import Testing
struct TalkConfigParsingTests {
@Test func prefersNormalizedTalkProviderPayload() {
let talk: [String: AnyCodable] = [
"provider": AnyCodable("elevenlabs"),
"providers": AnyCodable([
"elevenlabs": [
"voiceId": "voice-normalized",
],
]),
"voiceId": AnyCodable("voice-legacy"),
]
let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: "elevenlabs")
#expect(selection?.provider == "elevenlabs")
#expect(selection?.normalizedPayload == true)
#expect(selection?.config["voiceId"]?.stringValue == "voice-normalized")
}
@Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() {
let talk: [String: AnyCodable] = [
"voiceId": AnyCodable("voice-legacy"),
"apiKey": AnyCodable("legacy-key"),
]
let selection = TalkConfigParsing.selectProviderConfig(talk, defaultProvider: "elevenlabs")
#expect(selection?.provider == "elevenlabs")
#expect(selection?.normalizedPayload == false)
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
}
@Test func canDisableLegacyFallback() {
let talk: [String: AnyCodable] = [
"voiceId": AnyCodable("voice-legacy"),
]
let selection = TalkConfigParsing.selectProviderConfig(
talk,
defaultProvider: "elevenlabs",
allowLegacyFallback: false)
#expect(selection == nil)
}
@Test func bridgesFoundationDictionary() {
let raw: [String: Any] = [
"provider": "elevenlabs",
"providers": [
"elevenlabs": [
"voiceId": "voice-normalized",
],
],
]
let bridged = TalkConfigParsing.bridgeFoundationDictionary(raw)
#expect(bridged?["provider"]?.stringValue == "elevenlabs")
let nested = bridged?["providers"]?.dictionaryValue?["elevenlabs"]?.dictionaryValue
#expect(nested?["voiceId"]?.stringValue == "voice-normalized")
}
@Test func resolvesPositiveIntegerTimeout() {
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(1500), fallback: 700) == 1500)
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(0), fallback: 700) == 700)
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable(true), fallback: 700) == 700)
#expect(TalkConfigParsing.resolvedPositiveInt(AnyCodable("1500"), fallback: 700) == 700)
}
}