macOS: add MLX Talk provider MVP (#63539)

Merged via squash.

Prepared head SHA: da43563513
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Co-authored-by: ImLukeF <92253590+ImLukeF@users.noreply.github.com>
Reviewed-by: @ImLukeF
This commit is contained in:
Luke
2026-04-09 17:13:34 +10:00
committed by GitHub
parent 2729c91ad5
commit 7c72b694f1
8 changed files with 509 additions and 22 deletions

View File

@@ -6,6 +6,8 @@ Docs: https://docs.openclaw.ai
### Changes
- macOS/Talk: add an experimental local MLX speech provider for Talk Mode, with explicit provider selection, local utterance playback, interruption handling, and system-voice fallback. (#63539) Thanks @ImLukeF.
### Fixes
- fix(browser): auto-generate browser control auth token for none/trusted-proxy modes [AI]. (#63280) Thanks @pgondhi987.

View File

@@ -1,5 +1,5 @@
{
"originHash" : "fb90e7b1977f43661ac91681d16da11f9ddd85630407ef170eaada0a6ee39972",
"originHash" : "31972864afdac74537794e1a3b7bd22484c09ec1be8e3624fb9ea582e9222ad9",
"pins" : [
{
"identity" : "axorcist",
@@ -28,6 +28,15 @@
"version" : "0.1.0"
}
},
{
"identity" : "eventsource",
"kind" : "remoteSourceControl",
"location" : "https://github.com/mattt/EventSource.git",
"state" : {
"revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e",
"version" : "1.4.1"
}
},
{
"identity" : "menubarextraaccess",
"kind" : "remoteSourceControl",
@@ -37,6 +46,33 @@
"version" : "1.2.2"
}
},
{
"identity" : "mlx-audio-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/Blaizzy/mlx-audio-swift",
"state" : {
"revision" : "fcbd04daa1bfebe881932f630af2ba6ce9af3274",
"version" : "0.1.2"
}
},
{
"identity" : "mlx-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift.git",
"state" : {
"revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
"version" : "0.31.3"
}
},
{
"identity" : "mlx-swift-lm",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ml-explore/mlx-swift-lm.git",
"state" : {
"revision" : "25b00d4e22e61ec9c41efda47990cd2084ec87ff",
"version" : "2.31.3"
}
},
{
"identity" : "peekaboo",
"kind" : "remoteSourceControl",
@@ -64,6 +100,33 @@
"version" : "1.2.1"
}
},
{
"identity" : "swift-asn1",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-asn1.git",
"state" : {
"revision" : "9f542610331815e29cc3821d3b6f488db8715517",
"version" : "1.6.0"
}
},
{
"identity" : "swift-atomics",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-atomics.git",
"state" : {
"revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7",
"version" : "1.3.0"
}
},
{
"identity" : "swift-collections",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-collections.git",
"state" : {
"revision" : "6675bc0ff86e61436e615df6fc5174e043e57924",
"version" : "1.4.1"
}
},
{
"identity" : "swift-concurrency-extras",
"kind" : "remoteSourceControl",
@@ -73,6 +136,33 @@
"version" : "1.3.2"
}
},
{
"identity" : "swift-crypto",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-crypto.git",
"state" : {
"revision" : "bb4ba815dab96d4edc1e0b86d7b9acf9ff973a84",
"version" : "4.3.1"
}
},
{
"identity" : "swift-huggingface",
"kind" : "remoteSourceControl",
"location" : "https://github.com/huggingface/swift-huggingface.git",
"state" : {
"revision" : "b721959445b617d0bf03910b2b4aced345fd93bf",
"version" : "0.9.0"
}
},
{
"identity" : "swift-jinja",
"kind" : "remoteSourceControl",
"location" : "https://github.com/huggingface/swift-jinja.git",
"state" : {
"revision" : "0aeefadec459ce8e11a333769950fb86183aca43",
"version" : "2.3.5"
}
},
{
"identity" : "swift-log",
"kind" : "remoteSourceControl",
@@ -82,6 +172,15 @@
"version" : "1.10.1"
}
},
{
"identity" : "swift-nio",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-nio.git",
"state" : {
"revision" : "558f24a4647193b5a0e2104031b71c55d31ff83a",
"version" : "2.97.1"
}
},
{
"identity" : "swift-numerics",
"kind" : "remoteSourceControl",
@@ -109,6 +208,15 @@
"version" : "1.6.4"
}
},
{
"identity" : "swift-transformers",
"kind" : "remoteSourceControl",
"location" : "https://github.com/huggingface/swift-transformers.git",
"state" : {
"revision" : "58c4bc11963a140358d791f678a60a2745a23146",
"version" : "1.2.1"
}
},
{
"identity" : "swiftui-math",
"kind" : "remoteSourceControl",
@@ -126,6 +234,15 @@
"revision" : "5b06b811c0f5313b6b84bbef98c635a630638c38",
"version" : "0.3.1"
}
},
{
"identity" : "yyjson",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ibireme/yyjson.git",
"state" : {
"revision" : "8b4a38dc994a110abaec8a400615567bd996105f",
"version" : "0.12.0"
}
}
],
"version" : 3

View File

@@ -20,6 +20,7 @@ let package = Package(
.package(url: "https://github.com/apple/swift-log.git", from: "1.10.1"),
.package(url: "https://github.com/sparkle-project/Sparkle", from: "2.9.0"),
.package(url: "https://github.com/steipete/Peekaboo.git", branch: "main"),
.package(url: "https://github.com/Blaizzy/mlx-audio-swift", exact: "0.1.2"),
.package(path: "../shared/OpenClawKit"),
.package(path: "../../Swabble"),
],
@@ -54,6 +55,7 @@ let package = Package(
.product(name: "Sparkle", package: "Sparkle"),
.product(name: "PeekabooBridge", package: "Peekaboo"),
.product(name: "PeekabooAutomationKit", package: "Peekaboo"),
.product(name: "MLXAudioTTS", package: "mlx-audio-swift"),
],
exclude: [
"Resources/Info.plist",

View File

@@ -0,0 +1,178 @@
import Foundation
import MLXAudioTTS
import OSLog
// swiftformat:disable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl
/// Runtime access stays serialized through `TalkModeRuntime` actor helper methods.
final class TalkMLXSpeechSynthesizer {
enum SynthesizeError: Error {
case canceled
case modelLoadFailed(String)
case audioGenerationFailed
case audioPlaybackFailed
case timedOut
}
static let shared = TalkMLXSpeechSynthesizer()
static let defaultModelRepo = "mlx-community/Soprano-80M-bf16"
private let logger = Logger(subsystem: "ai.openclaw", category: "talk.mlx")
private var currentToken = UUID()
private var modelRepo: String?
private var model: (any SpeechGenerationModel)?
private init() {}
func stop() {
self.currentToken = UUID()
}
func synthesize(
text: String,
modelRepo: String?,
language: String?,
voicePreset: String?) async throws -> Data {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return Data() }
self.stop()
let token = UUID()
self.currentToken = token
let resolvedRepo = Self.resolvedModelRepo(modelRepo)
let rawModel = try await self.loadModel(
modelRepo: resolvedRepo,
token: token)
let model = UncheckedSpeechModel(raw: rawModel)
guard self.currentToken == token else {
throw SynthesizeError.canceled
}
let audioData: Data
do {
let audio = try await model.generateAudio(
text: trimmed,
voice: voicePreset,
language: language)
audioData = Self.makeWavData(
samples: audio,
sampleRate: Double(model.sampleRateValue()))
} catch {
self.logger.error(
"talk mlx generation failed: \(error.localizedDescription, privacy: .public)")
throw SynthesizeError.audioGenerationFailed
}
guard self.currentToken == token else {
throw SynthesizeError.canceled
}
return audioData
}
private func loadModel(
modelRepo: String,
token: UUID) async throws -> any SpeechGenerationModel {
if let model = self.model, self.modelRepo == modelRepo {
return model
}
self.logger.info("talk mlx loading modelRepo=\(modelRepo, privacy: .public)")
do {
let model = try await TTS.loadModel(modelRepo: modelRepo)
guard self.currentToken == token else {
throw SynthesizeError.canceled
}
self.model = model
self.modelRepo = modelRepo
return model
} catch is CancellationError {
throw SynthesizeError.canceled
} catch {
self.logger.error(
"talk mlx load failed: \(error.localizedDescription, privacy: .public)")
throw SynthesizeError.modelLoadFailed(modelRepo)
}
}
private static func resolvedModelRepo(_ modelRepo: String?) -> String {
let trimmed = modelRepo?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
return trimmed.isEmpty ? Self.defaultModelRepo : trimmed
}
private static func makeWavData(samples: [Float], sampleRate: Double) -> Data {
let channels: UInt16 = 1
let bitsPerSample: UInt16 = 16
let blockAlign = channels * (bitsPerSample / 8)
let sampleRateInt = UInt32(sampleRate.rounded())
let byteRate = sampleRateInt * UInt32(blockAlign)
let dataSize = UInt32(samples.count) * UInt32(blockAlign)
var data = Data(capacity: Int(44 + dataSize))
data.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // RIFF
data.appendLEUInt32(36 + dataSize)
data.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // WAVE
data.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // fmt
data.appendLEUInt32(16)
data.appendLEUInt16(1)
data.appendLEUInt16(channels)
data.appendLEUInt32(sampleRateInt)
data.appendLEUInt32(byteRate)
data.appendLEUInt16(blockAlign)
data.appendLEUInt16(bitsPerSample)
data.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // data
data.appendLEUInt32(dataSize)
for sample in samples {
let clamped = max(-1.0, min(1.0, sample))
let scaled = Int16((clamped * Float(Int16.max)).rounded())
data.appendLEInt16(scaled)
}
return data
}
}
extension TalkMLXSpeechSynthesizer: @unchecked Sendable {}
private struct UncheckedSpeechModel {
let raw: any SpeechGenerationModel
func sampleRateValue() -> Int {
raw.sampleRate
}
func generateAudio(
text: String,
voice: String?,
language: String?) async throws -> [Float] {
let generatedAudio = try await raw.generate(
text: text,
voice: voice,
refAudio: nil,
refText: nil,
language: language)
return generatedAudio.asArray(Float.self)
}
}
extension UncheckedSpeechModel: @unchecked Sendable {}
extension Data {
fileprivate mutating func appendLEUInt16(_ value: UInt16) {
var littleEndian = value.littleEndian
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
}
fileprivate mutating func appendLEUInt32(_ value: UInt32) {
var littleEndian = value.littleEndian
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
}
fileprivate mutating func appendLEInt16(_ value: Int16) {
var littleEndian = value.littleEndian
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
}
}
// swiftformat:enable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl

View File

@@ -44,7 +44,13 @@ enum TalkModeGatewayConfigParser {
acc[key] = value
} ?? [:]
let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let resolvedModel = (model?.isEmpty == false) ? model! : defaultModelIdFallback
let resolvedModel: String? = if model?.isEmpty == false {
model!
} else if activeProvider == defaultProvider {
defaultModelIdFallback
} else {
nil
}
let outputFormat = activeConfig?["outputFormat"]?.stringValue
let interrupt = talk?["interruptOnSpeech"]?.boolValue
let apiKey = activeConfig?["apiKey"]?.stringValue

View File

@@ -10,6 +10,7 @@ actor TalkModeRuntime {
enum PlaybackPlan: Equatable {
case elevenLabsThenSystemVoice(apiKey: String, voiceId: String)
case mlxThenSystemVoice
case systemVoiceOnly
}
@@ -17,6 +18,8 @@ actor TalkModeRuntime {
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs"
private static let mlxTalkProvider = "mlx"
private static let systemTalkProvider = "system"
private static let defaultSilenceTimeoutMs = TalkDefaults.silenceTimeoutMs
private final class RMSMeter: @unchecked Sendable {
@@ -65,6 +68,7 @@ actor TalkModeRuntime {
private var modelOverrideActive = false
private var defaultOutputFormat: String?
private var interruptOnSpeech: Bool = true
private var activeTalkProvider = TalkModeRuntime.defaultTalkProvider
private var lastInterruptedAtSeconds: Double?
private var voiceAliases: [String: String] = [:]
private var lastSpokenText: String?
@@ -462,7 +466,7 @@ actor TalkModeRuntime {
private func playAssistant(text: String) async {
guard let input = await self.preparePlaybackInput(text: text) else { return }
switch Self.playbackPlan(apiKey: input.apiKey, voiceId: input.voiceId) {
switch Self.playbackPlan(provider: input.provider, apiKey: input.apiKey, voiceId: input.voiceId) {
case let .elevenLabsThenSystemVoice(apiKey, voiceId):
do {
try await self.playElevenLabs(input: input, apiKey: apiKey, voiceId: voiceId)
@@ -477,6 +481,23 @@ actor TalkModeRuntime {
self.ttsLogger.error("talk system voice failed: \(error.localizedDescription, privacy: .public)")
}
}
case .mlxThenSystemVoice:
do {
try await self.playMLX(input: input)
} catch TalkMLXSpeechSynthesizer.SynthesizeError.canceled {
self.ttsLogger.info("talk mlx canceled")
return
} catch {
self.ttsLogger
.error(
"talk MLX failed: \(error.localizedDescription, privacy: .public); " +
"falling back to system voice")
do {
try await self.playSystemVoice(input: input)
} catch {
self.ttsLogger.error("talk system voice failed: \(error.localizedDescription, privacy: .public)")
}
}
case .systemVoiceOnly:
do {
try await self.playSystemVoice(input: input)
@@ -491,19 +512,30 @@ actor TalkModeRuntime {
}
}
static func playbackPlan(apiKey: String?, voiceId: String?) -> PlaybackPlan {
guard let apiKey, !apiKey.isEmpty, let voiceId else {
static func playbackPlan(provider: String, apiKey: String?, voiceId: String?) -> PlaybackPlan {
switch provider {
case self.defaultTalkProvider:
guard let apiKey, !apiKey.isEmpty, let voiceId else {
return .systemVoiceOnly
}
return .elevenLabsThenSystemVoice(apiKey: apiKey, voiceId: voiceId)
case self.mlxTalkProvider:
return .mlxThenSystemVoice
case self.systemTalkProvider:
return .systemVoiceOnly
default:
return .systemVoiceOnly
}
return .elevenLabsThenSystemVoice(apiKey: apiKey, voiceId: voiceId)
}
private struct TalkPlaybackInput {
let generation: Int
let provider: String
let cleanedText: String
let directive: TalkDirective?
let apiKey: String?
let voiceId: String?
let voicePreset: String?
let language: String?
let synthTimeoutSeconds: Double
}
@@ -552,18 +584,20 @@ actor TalkModeRuntime {
resolvedVoice ??
self.currentVoiceId ??
self.defaultVoiceId
let voicePreset = preferredVoice
let provider = self.activeTalkProvider
let language = ElevenLabsTTSClient.validatedLanguage(directive?.language)
let voiceId: String? = if let apiKey, !apiKey.isEmpty {
let voiceId: String? = if provider == Self.defaultTalkProvider, let apiKey, !apiKey.isEmpty {
await self.resolveVoiceId(preferred: preferredVoice, apiKey: apiKey)
} else {
nil
}
if apiKey?.isEmpty != false {
if provider == Self.defaultTalkProvider, apiKey?.isEmpty != false {
self.ttsLogger.warning("talk missing ELEVENLABS_API_KEY; falling back to system voice")
} else if voiceId == nil {
} else if provider == Self.defaultTalkProvider, voiceId == nil {
self.ttsLogger.warning("talk missing voiceId; falling back to system voice")
} else if let voiceId {
self.ttsLogger
@@ -579,15 +613,21 @@ actor TalkModeRuntime {
return TalkPlaybackInput(
generation: gen,
provider: provider,
cleanedText: cleaned,
directive: directive,
apiKey: apiKey,
voiceId: voiceId,
voicePreset: voicePreset,
language: language,
synthTimeoutSeconds: synthTimeoutSeconds)
}
private func playElevenLabs(input: TalkPlaybackInput, apiKey: String, voiceId: String) async throws {
private func playElevenLabs(
input: TalkPlaybackInput,
apiKey: String,
voiceId: String) async throws
{
let desiredOutputFormat = input.directive?.outputFormat ?? self.defaultOutputFormat ?? "pcm_44100"
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(desiredOutputFormat)
if outputFormat == nil, !desiredOutputFormat.isEmpty {
@@ -696,6 +736,39 @@ actor TalkModeRuntime {
self.ttsLogger.info("talk system voice done")
}
private func playMLX(input: TalkPlaybackInput) async throws {
self.ttsLogger.info("talk mlx start chars=\(input.cleanedText.count, privacy: .public)")
if self.interruptOnSpeech {
guard await self.prepareForPlayback(generation: input.generation) else { return }
}
await MainActor.run { TalkModeController.shared.updatePhase(.speaking) }
self.phase = .speaking
let modelRepo = input.directive?.modelId ?? self.currentModelId
let audioData: Data
do {
audioData = try await AsyncTimeout.withTimeout(
seconds: input.synthTimeoutSeconds,
onTimeout: {
TalkMLXSpeechSynthesizer.SynthesizeError.timedOut
},
operation: { [self] in
try await self.synthesizeMLXVoice(
text: input.cleanedText,
modelRepo: modelRepo,
language: input.language,
voicePreset: input.voicePreset)
})
} catch TalkMLXSpeechSynthesizer.SynthesizeError.timedOut {
self.stopMLXVoice()
throw TalkMLXSpeechSynthesizer.SynthesizeError.timedOut
}
let result = await self.playTalkAudio(data: audioData)
if !result.finished, result.interruptedAt == nil {
throw TalkMLXSpeechSynthesizer.SynthesizeError.audioPlaybackFailed
}
self.ttsLogger.info("talk mlx done")
}
private func prepareForPlayback(generation: Int) async -> Bool {
await self.startRecognition()
return self.isCurrent(generation)
@@ -750,10 +823,13 @@ actor TalkModeRuntime {
func stopSpeaking(reason: TalkStopReason) async {
let usePCM = self.lastPlaybackWasPCM
let interruptedAt = usePCM ? await self.stopPCM() : await self.stopMP3()
let remoteInterruptedAt = usePCM ? await self.stopPCM() : await self.stopMP3()
_ = usePCM ? await self.stopMP3() : await self.stopPCM()
let localInterruptedAt = await self.stopTalkAudio()
await TalkSystemSpeechSynthesizer.shared.stop()
self.stopMLXVoice()
guard self.phase == .speaking else { return }
let interruptedAt = remoteInterruptedAt ?? localInterruptedAt
if reason == .speech, let interruptedAt {
self.lastInterruptedAtSeconds = interruptedAt
}
@@ -795,6 +871,33 @@ extension TalkModeRuntime {
StreamingAudioPlayer.shared.stop()
}
@MainActor
private func playTalkAudio(data: Data) async -> TalkPlaybackResult {
await TalkAudioPlayer.shared.play(data: data)
}
@MainActor
private func stopTalkAudio() -> Double? {
TalkAudioPlayer.shared.stop()
}
private func synthesizeMLXVoice(
text: String,
modelRepo: String?,
language: String?,
voicePreset: String?) async throws -> Data
{
try await TalkMLXSpeechSynthesizer.shared.synthesize(
text: text,
modelRepo: modelRepo,
language: language,
voicePreset: voicePreset)
}
private func stopMLXVoice() {
TalkMLXSpeechSynthesizer.shared.stop()
}
// MARK: - Config
private func reloadConfig() async {
@@ -810,6 +913,7 @@ extension TalkModeRuntime {
}
self.defaultOutputFormat = cfg.outputFormat
self.interruptOnSpeech = cfg.interruptOnSpeech
self.activeTalkProvider = cfg.activeProvider
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
self.apiKey = cfg.apiKey
let hasApiKey = (cfg.apiKey?.isEmpty == false)
@@ -817,7 +921,8 @@ extension TalkModeRuntime {
let modelLabel = (cfg.modelId?.isEmpty == false) ? cfg.modelId! : "none"
self.logger
.info(
"talk config voiceId=\(voiceLabel, privacy: .public) " +
"talk config provider=\(cfg.activeProvider, privacy: .public) " +
"talk config voiceId=\(voiceLabel, privacy: .public) " +
"modelId=\(modelLabel, privacy: .public) " +
"apiKey=\(hasApiKey, privacy: .public) " +
"interrupt=\(cfg.interruptOnSpeech, privacy: .public) " +
@@ -859,11 +964,17 @@ extension TalkModeRuntime {
await MainActor.run {
AppStateStore.shared.seamColorHex = parsed.seamColorHex
}
if parsed.activeProvider != Self.defaultTalkProvider {
self.ttsLogger
.info("talk provider \(parsed.activeProvider, privacy: .public) unsupported; using system voice")
} else if parsed.normalizedPayload {
if parsed.activeProvider == Self.defaultTalkProvider {
self.ttsLogger.info("talk config provider from talk.resolved")
} else if parsed.activeProvider == Self.mlxTalkProvider ||
parsed.activeProvider == Self.systemTalkProvider
{
self.ttsLogger.info(
"talk provider \(parsed.activeProvider, privacy: .public) active")
} else {
self.ttsLogger
.info(
"talk provider \(parsed.activeProvider, privacy: .public) unsupported; using system voice")
}
return parsed
} catch {

View File

@@ -0,0 +1,48 @@
import OpenClawProtocol
import Testing
@testable import OpenClaw
struct TalkModeGatewayConfigTests {
@Test func `mlx provider does not inherit elevenlabs defaults`() {
let snapshot = ConfigSnapshot(
path: nil,
exists: true,
raw: nil,
hash: nil,
parsed: nil,
valid: true,
config: [
"talk": AnyCodable([
"provider": "mlx",
"providers": [
"mlx": [
"voiceId": "unused-voice",
],
],
"resolved": [
"provider": "mlx",
"config": [
"voiceId": "unused-voice",
],
],
]),
],
issues: nil
)
let parsed = TalkModeGatewayConfigParser.parse(
snapshot: snapshot,
defaultProvider: "elevenlabs",
defaultModelIdFallback: "eleven_v3",
defaultSilenceTimeoutMs: TalkDefaults.silenceTimeoutMs,
envVoice: "env-voice",
sagVoice: "sag-voice",
envApiKey: "env-key"
)
#expect(parsed.activeProvider == "mlx")
#expect(parsed.modelId == nil)
#expect(parsed.apiKey == nil)
#expect(parsed.voiceId == "unused-voice")
}
}

View File

@@ -13,11 +13,34 @@ struct TalkModeRuntimeSpeechTests {
}
@Test func `playback plan falls back only from elevenlabs`() {
#expect(
TalkModeRuntime.playbackPlan(apiKey: "key", voiceId: "voice")
== .elevenLabsThenSystemVoice(apiKey: "key", voiceId: "voice"))
#expect(TalkModeRuntime.playbackPlan(apiKey: nil, voiceId: "voice") == .systemVoiceOnly)
#expect(TalkModeRuntime.playbackPlan(apiKey: "key", voiceId: nil) == .systemVoiceOnly)
#expect(TalkModeRuntime.playbackPlan(apiKey: "", voiceId: "voice") == .systemVoiceOnly)
let elevenLabsPlan = TalkModeRuntime.playbackPlan(
provider: "elevenlabs",
apiKey: "key",
voiceId: "voice"
)
let missingKeyPlan = TalkModeRuntime.playbackPlan(
provider: "elevenlabs",
apiKey: nil,
voiceId: "voice"
)
let missingVoicePlan = TalkModeRuntime.playbackPlan(
provider: "elevenlabs",
apiKey: "key",
voiceId: nil
)
let blankKeyPlan = TalkModeRuntime.playbackPlan(
provider: "elevenlabs",
apiKey: "",
voiceId: "voice"
)
let mlxPlan = TalkModeRuntime.playbackPlan(provider: "mlx", apiKey: nil, voiceId: nil)
let systemPlan = TalkModeRuntime.playbackPlan(provider: "system", apiKey: nil, voiceId: nil)
#expect(elevenLabsPlan == .elevenLabsThenSystemVoice(apiKey: "key", voiceId: "voice"))
#expect(missingKeyPlan == .systemVoiceOnly)
#expect(missingVoicePlan == .systemVoiceOnly)
#expect(blankKeyPlan == .systemVoiceOnly)
#expect(mlxPlan == .mlxThenSystemVoice)
#expect(systemPlan == .systemVoiceOnly)
}
}