From e836b5b6d766d5004bfe0dde61e5bf81f5f0d5d2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 22 Apr 2026 02:12:07 +0100 Subject: [PATCH] ci: isolate mlx from macos swift checks --- .github/workflows/ci.yml | 170 --------------- .gitignore | 2 + apps/macos-mlx-tts/Package.resolved | 141 ++++++++++++ apps/macos-mlx-tts/Package.swift | 27 +++ .../Sources/OpenClawMLXTTSHelper/main.swift | 182 ++++++++++++++++ apps/macos/Package.resolved | 119 +---------- apps/macos/Package.swift | 2 - .../OpenClaw/TalkMLXSpeechSynthesizer.swift | 202 ++++++++---------- scripts/codesign-mac-app.sh | 6 + scripts/package-mac-app.sh | 24 +++ 10 files changed, 474 insertions(+), 401 deletions(-) create mode 100644 apps/macos-mlx-tts/Package.resolved create mode 100644 apps/macos-mlx-tts/Package.swift create mode 100644 apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8b1387829a..b1d664e4c8c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2084,89 +2084,6 @@ jobs: name: canvas-a2ui-bundle path: src/canvas-host/a2ui/ - - name: Patch mlx-audio-swift manifest - # macOS node tests do not build the Swift package; keep SwiftPM resolution - # out of this lane so it stays focused on platform-specific TS tests. - if: ${{ false }} - run: | - set -euo pipefail - swift package resolve --package-path apps/macos >/dev/null - chmod u+w apps/macos/.build/checkouts/mlx-audio-swift/Package.swift - python <<'PY' - from pathlib import Path - - path = Path("apps/macos/.build/checkouts/mlx-audio-swift/Package.swift") - text = path.read_text() - excludes = { - "Sources/MLXAudioTTS": [ - "Models/Llama/README.md", - "Models/Marvis/README.md", - "Models/PocketTTS/README.md", - "Models/Qwen3/README.md", - "Models/Soprano/README.md", - ], - "Sources/MLXAudioSTT": [ - "Models/GLMASR/README.md", - "Models/VoxtralRealtime/README.md", - "Models/Qwen3ASR/README.md", - "Models/GraniteSpeech/README.md", - "Models/Parakeet/README.md", - ], - "Sources/MLXAudioVAD": [ - "Models/SmartTurn/README.md", - "Models/Sortformer/README.md", - ], - "Sources/MLXAudioLID": [ - "README.md", - ], - "Sources/MLXAudioSTS": [ - "Models/SAMAudio/README.md", - "Models/LFMAudio/README.md", - ], - "Sources/Tools/mlx-audio-swift-tts": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-codec": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-sts": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-stt": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-lid": [ - "README.md", - ], - } - - updated = False - for target_path, files in excludes.items(): - already_patched = f'path: "{target_path}",\n exclude: [' in text - if already_patched: - continue - - needle = f' path: "{target_path}"\n' - replacement = ( - f' path: "{target_path}",\n' - " exclude: [\n" - + "".join(f' "{file}",\n' for file in files) - + " ]\n" - ) - - if needle not in text: - raise SystemExit(f"Could not find {target_path} target path in mlx-audio-swift Package.swift") - - text = text.replace(needle, replacement, 1) - updated = True - - if updated: - path.write_text(text) - print(f"Patched {path}") - else: - print("mlx-audio-swift README excludes already present") - PY - - name: TS tests (macOS) env: NODE_OPTIONS: --max-old-space-size=4096 @@ -2245,93 +2162,6 @@ jobs: apps/shared/OpenClawKit/Package.swift \ Swabble/Package.swift - - name: Patch mlx-audio-swift manifest - if: steps.swift-build-cache.outputs.cache-hit != 'true' - run: | - set -euo pipefail - if [ ! -f apps/macos/.build/checkouts/mlx-audio-swift/Package.swift ]; then - swift package resolve --package-path apps/macos >/dev/null - fi - if [ ! -f apps/macos/.build/checkouts/mlx-audio-swift/Package.swift ]; then - echo "mlx-audio-swift checkout missing after swift package resolve" >&2 - exit 1 - fi - chmod u+w apps/macos/.build/checkouts/mlx-audio-swift/Package.swift - python <<'PY' - from pathlib import Path - - path = Path("apps/macos/.build/checkouts/mlx-audio-swift/Package.swift") - text = path.read_text() - excludes = { - "Sources/MLXAudioTTS": [ - "Models/Llama/README.md", - "Models/Marvis/README.md", - "Models/PocketTTS/README.md", - "Models/Qwen3/README.md", - "Models/Soprano/README.md", - ], - "Sources/MLXAudioSTT": [ - "Models/GLMASR/README.md", - "Models/VoxtralRealtime/README.md", - "Models/Qwen3ASR/README.md", - "Models/GraniteSpeech/README.md", - "Models/Parakeet/README.md", - ], - "Sources/MLXAudioVAD": [ - "Models/SmartTurn/README.md", - "Models/Sortformer/README.md", - ], - "Sources/MLXAudioLID": [ - "README.md", - ], - "Sources/MLXAudioSTS": [ - "Models/SAMAudio/README.md", - "Models/LFMAudio/README.md", - ], - "Sources/Tools/mlx-audio-swift-tts": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-codec": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-sts": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-stt": [ - "README.md", - ], - "Sources/Tools/mlx-audio-swift-lid": [ - "README.md", - ], - } - - updated = False - for target_path, files in excludes.items(): - already_patched = f'path: "{target_path}",\n exclude: [' in text - if already_patched: - continue - - needle = f' path: "{target_path}"\n' - replacement = ( - f' path: "{target_path}",\n' - " exclude: [\n" - + "".join(f' "{file}",\n' for file in files) - + " ]\n" - ) - - if needle not in text: - raise SystemExit(f"Could not find {target_path} target path in mlx-audio-swift Package.swift") - - text = text.replace(needle, replacement, 1) - updated = True - - if updated: - path.write_text(text) - print(f"Patched {path}") - else: - print("mlx-audio-swift README excludes already present") - PY - - name: Show toolchain run: | sw_vers diff --git a/.gitignore b/.gitignore index 5901f051b32..7b882fc2402 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ apps/android/benchmark/results/ # Bun build artifacts *.bun-build apps/macos/.build/ +apps/macos-mlx-tts/.build/ apps/shared/MoltbotKit/.build/ apps/shared/OpenClawKit/.build/ apps/shared/OpenClawKit/Package.resolved @@ -57,6 +58,7 @@ vendor/ apps/ios/Clawdbot.xcodeproj/ apps/ios/Clawdbot.xcodeproj/** apps/macos/.build/** +apps/macos-mlx-tts/.build/** **/*.bun-build apps/ios/*.xcfilelist diff --git a/apps/macos-mlx-tts/Package.resolved b/apps/macos-mlx-tts/Package.resolved new file mode 100644 index 00000000000..d859b22ea65 --- /dev/null +++ b/apps/macos-mlx-tts/Package.resolved @@ -0,0 +1,141 @@ +{ + "originHash" : "6b8aa02e612c43e309033a83de5f83b88d9c4267f124d1e062f66385dbbaa7ec", + "pins" : [ + { + "identity" : "eventsource", + "kind" : "remoteSourceControl", + "location" : "https://github.com/mattt/EventSource.git", + "state" : { + "revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e", + "version" : "1.4.1" + } + }, + { + "identity" : "mlx-audio-swift", + "kind" : "remoteSourceControl", + "location" : "https://github.com/Blaizzy/mlx-audio-swift", + "state" : { + "revision" : "fcbd04daa1bfebe881932f630af2ba6ce9af3274", + "version" : "0.1.2" + } + }, + { + "identity" : "mlx-swift", + "kind" : "remoteSourceControl", + "location" : "https://github.com/ml-explore/mlx-swift.git", + "state" : { + "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896", + "version" : "0.31.3" + } + }, + { + "identity" : "mlx-swift-lm", + "kind" : "remoteSourceControl", + "location" : "https://github.com/ml-explore/mlx-swift-lm.git", + "state" : { + "revision" : "25b00d4e22e61ec9c41efda47990cd2084ec87ff", + "version" : "2.31.3" + } + }, + { + "identity" : "swift-asn1", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-asn1.git", + "state" : { + "revision" : "eb50cbd14606a9161cbc5d452f18797c90ef0bab", + "version" : "1.7.0" + } + }, + { + "identity" : "swift-atomics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-atomics.git", + "state" : { + "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", + "version" : "1.3.0" + } + }, + { + "identity" : "swift-collections", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-collections.git", + "state" : { + "revision" : "6675bc0ff86e61436e615df6fc5174e043e57924", + "version" : "1.4.1" + } + }, + { + "identity" : "swift-crypto", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-crypto.git", + "state" : { + "revision" : "476538ccb827f2dd18efc5de754cc87d77127a47", + "version" : "4.4.0" + } + }, + { + "identity" : "swift-huggingface", + "kind" : "remoteSourceControl", + "location" : "https://github.com/huggingface/swift-huggingface.git", + "state" : { + "revision" : "b721959445b617d0bf03910b2b4aced345fd93bf", + "version" : "0.9.0" + } + }, + { + "identity" : "swift-jinja", + "kind" : "remoteSourceControl", + "location" : "https://github.com/huggingface/swift-jinja.git", + "state" : { + "revision" : "0aeefadec459ce8e11a333769950fb86183aca43", + "version" : "2.3.5" + } + }, + { + "identity" : "swift-nio", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-nio.git", + "state" : { + "revision" : "cd6710454f25733900e133c6caf5188952763c36", + "version" : "2.98.0" + } + }, + { + "identity" : "swift-numerics", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-numerics", + "state" : { + "revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2", + "version" : "1.1.1" + } + }, + { + "identity" : "swift-system", + "kind" : "remoteSourceControl", + "location" : "https://github.com/apple/swift-system.git", + "state" : { + "revision" : "7c6ad0fc39d0763e0b699210e4124afd5041c5df", + "version" : "1.6.4" + } + }, + { + "identity" : "swift-transformers", + "kind" : "remoteSourceControl", + "location" : "https://github.com/huggingface/swift-transformers.git", + "state" : { + "revision" : "58c4bc11963a140358d791f678a60a2745a23146", + "version" : "1.2.1" + } + }, + { + "identity" : "yyjson", + "kind" : "remoteSourceControl", + "location" : "https://github.com/ibireme/yyjson.git", + "state" : { + "revision" : "8b4a38dc994a110abaec8a400615567bd996105f", + "version" : "0.12.0" + } + } + ], + "version" : 3 +} diff --git a/apps/macos-mlx-tts/Package.swift b/apps/macos-mlx-tts/Package.swift new file mode 100644 index 00000000000..96dffe3c40e --- /dev/null +++ b/apps/macos-mlx-tts/Package.swift @@ -0,0 +1,27 @@ +// swift-tools-version: 6.2 +// Isolated MLX TTS helper package. Keep this out of apps/macos/Package.swift so +// normal macOS app tests do not compile the full MLX audio stack. + +import PackageDescription + +let package = Package( + name: "OpenClawMLXTTS", + platforms: [ + .macOS(.v15), + ], + products: [ + .executable(name: "openclaw-mlx-tts", targets: ["OpenClawMLXTTSHelper"]), + ], + dependencies: [ + .package(url: "https://github.com/Blaizzy/mlx-audio-swift", exact: "0.1.2"), + ], + targets: [ + .executableTarget( + name: "OpenClawMLXTTSHelper", + dependencies: [ + .product(name: "MLXAudioTTS", package: "mlx-audio-swift"), + ], + swiftSettings: [ + .enableUpcomingFeature("StrictConcurrency"), + ]), + ]) diff --git a/apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift b/apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift new file mode 100644 index 00000000000..98537347b1e --- /dev/null +++ b/apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift @@ -0,0 +1,182 @@ +import Foundation +import MLXAudioTTS + +// swiftformat:disable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl +@main +enum OpenClawMLXTTSHelper { + static func main() async { + do { + let options = try Options.parse(CommandLine.arguments.dropFirst()) + let data = try await synthesize(options) + try data.write(to: options.outputURL, options: [.atomic]) + } catch { + FileHandle.standardError.write(Data("openclaw-mlx-tts: \(error)\n".utf8)) + exit(1) + } + } + + private static func synthesize(_ options: Options) async throws -> Data { + let model = try await TTS.loadModel(modelRepo: options.modelRepo) + let audio = try await UncheckedSpeechModel(raw: model).generateAudio( + text: options.text, + voice: options.voice, + language: options.language) + return makeWavData(samples: audio, sampleRate: Double(model.sampleRate)) + } + + private struct Options { + let text: String + let modelRepo: String + let outputURL: URL + let language: String? + let voice: String? + + static func parse(_ rawArguments: ArraySlice) throws -> Options { + var text: String? + var modelRepo = "mlx-community/Soprano-80M-bf16" + var outputPath: String? + var language: String? + var voice: String? + var iterator = rawArguments.makeIterator() + + while let argument = iterator.next() { + switch argument { + case "--text", "-t": + text = try nextValue(&iterator, argument) + case "--model": + modelRepo = try nextValue(&iterator, argument) + case "--output", "-o": + outputPath = try nextValue(&iterator, argument) + case "--language": + language = try nextValue(&iterator, argument) + case "--voice", "-v": + voice = try nextValue(&iterator, argument) + case "--help", "-h": + throw Usage.requested + default: + if text == nil, !argument.hasPrefix("-") { + text = argument + } else { + throw Usage.invalid("unknown option \(argument)") + } + } + } + + guard let text = text?.trimmingCharacters(in: .whitespacesAndNewlines), !text.isEmpty else { + throw Usage.invalid("missing --text") + } + guard let outputPath, !outputPath.isEmpty else { + throw Usage.invalid("missing --output") + } + + return Options( + text: text, + modelRepo: modelRepo, + outputURL: URL(fileURLWithPath: outputPath), + language: language?.nilIfBlank, + voice: voice?.nilIfBlank) + } + + private static func nextValue( + _ iterator: inout ArraySlice.Iterator, + _ option: String) throws -> String + { + guard let value = iterator.next(), !value.isEmpty else { + throw Usage.invalid("missing value for \(option)") + } + return value + } + } + + private enum Usage: Error, CustomStringConvertible { + case requested + case invalid(String) + + var description: String { + switch self { + case .requested: + "usage: openclaw-mlx-tts --text --output [--model ] [--language ] [--voice ]" + case let .invalid(message): + "\(message)\nusage: openclaw-mlx-tts --text --output [--model ] [--language ] [--voice ]" + } + } + } + + private static func makeWavData(samples: [Float], sampleRate: Double) -> Data { + let channels: UInt16 = 1 + let bitsPerSample: UInt16 = 16 + let blockAlign = channels * (bitsPerSample / 8) + let sampleRateInt = UInt32(sampleRate.rounded()) + let byteRate = sampleRateInt * UInt32(blockAlign) + let dataSize = UInt32(samples.count) * UInt32(blockAlign) + + var data = Data(capacity: Int(44 + dataSize)) + data.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // RIFF + data.appendLEUInt32(36 + dataSize) + data.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // WAVE + + data.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // fmt + data.appendLEUInt32(16) + data.appendLEUInt16(1) + data.appendLEUInt16(channels) + data.appendLEUInt32(sampleRateInt) + data.appendLEUInt32(byteRate) + data.appendLEUInt16(blockAlign) + data.appendLEUInt16(bitsPerSample) + + data.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // data + data.appendLEUInt32(dataSize) + + for sample in samples { + let clamped = max(-1.0, min(1.0, sample)) + let scaled = Int16((clamped * Float(Int16.max)).rounded()) + data.appendLEInt16(scaled) + } + return data + } +} + +private struct UncheckedSpeechModel { + let raw: any SpeechGenerationModel + + func generateAudio( + text: String, + voice: String?, + language: String?) async throws -> [Float] { + let generatedAudio = try await raw.generate( + text: text, + voice: voice, + refAudio: nil, + refText: nil, + language: language) + return generatedAudio.asArray(Float.self) + } +} + +extension UncheckedSpeechModel: @unchecked Sendable {} + +private extension String { + var nilIfBlank: String? { + let trimmed = self.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? nil : trimmed + } +} + +private extension Data { + mutating func appendLEUInt16(_ value: UInt16) { + var littleEndian = value.littleEndian + Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } + } + + mutating func appendLEUInt32(_ value: UInt32) { + var littleEndian = value.littleEndian + Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } + } + + mutating func appendLEInt16(_ value: Int16) { + var littleEndian = value.littleEndian + Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } + } +} + +// swiftformat:enable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl diff --git a/apps/macos/Package.resolved b/apps/macos/Package.resolved index ad308102411..04764565f73 100644 --- a/apps/macos/Package.resolved +++ b/apps/macos/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "31972864afdac74537794e1a3b7bd22484c09ec1be8e3624fb9ea582e9222ad9", + "originHash" : "fb90e7b1977f43661ac91681d16da11f9ddd85630407ef170eaada0a6ee39972", "pins" : [ { "identity" : "axorcist", @@ -28,15 +28,6 @@ "version" : "0.1.0" } }, - { - "identity" : "eventsource", - "kind" : "remoteSourceControl", - "location" : "https://github.com/mattt/EventSource.git", - "state" : { - "revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e", - "version" : "1.4.1" - } - }, { "identity" : "menubarextraaccess", "kind" : "remoteSourceControl", @@ -46,33 +37,6 @@ "version" : "1.2.2" } }, - { - "identity" : "mlx-audio-swift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/Blaizzy/mlx-audio-swift", - "state" : { - "revision" : "fcbd04daa1bfebe881932f630af2ba6ce9af3274", - "version" : "0.1.2" - } - }, - { - "identity" : "mlx-swift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ml-explore/mlx-swift.git", - "state" : { - "revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896", - "version" : "0.31.3" - } - }, - { - "identity" : "mlx-swift-lm", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ml-explore/mlx-swift-lm.git", - "state" : { - "revision" : "25b00d4e22e61ec9c41efda47990cd2084ec87ff", - "version" : "2.31.3" - } - }, { "identity" : "peekaboo", "kind" : "remoteSourceControl", @@ -100,33 +64,6 @@ "version" : "1.2.1" } }, - { - "identity" : "swift-asn1", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-asn1.git", - "state" : { - "revision" : "9f542610331815e29cc3821d3b6f488db8715517", - "version" : "1.6.0" - } - }, - { - "identity" : "swift-atomics", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-atomics.git", - "state" : { - "revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7", - "version" : "1.3.0" - } - }, - { - "identity" : "swift-collections", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-collections.git", - "state" : { - "revision" : "6675bc0ff86e61436e615df6fc5174e043e57924", - "version" : "1.4.1" - } - }, { "identity" : "swift-concurrency-extras", "kind" : "remoteSourceControl", @@ -136,33 +73,6 @@ "version" : "1.3.2" } }, - { - "identity" : "swift-crypto", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-crypto.git", - "state" : { - "revision" : "bb4ba815dab96d4edc1e0b86d7b9acf9ff973a84", - "version" : "4.3.1" - } - }, - { - "identity" : "swift-huggingface", - "kind" : "remoteSourceControl", - "location" : "https://github.com/huggingface/swift-huggingface.git", - "state" : { - "revision" : "b721959445b617d0bf03910b2b4aced345fd93bf", - "version" : "0.9.0" - } - }, - { - "identity" : "swift-jinja", - "kind" : "remoteSourceControl", - "location" : "https://github.com/huggingface/swift-jinja.git", - "state" : { - "revision" : "0aeefadec459ce8e11a333769950fb86183aca43", - "version" : "2.3.5" - } - }, { "identity" : "swift-log", "kind" : "remoteSourceControl", @@ -172,15 +82,6 @@ "version" : "1.10.1" } }, - { - "identity" : "swift-nio", - "kind" : "remoteSourceControl", - "location" : "https://github.com/apple/swift-nio.git", - "state" : { - "revision" : "558f24a4647193b5a0e2104031b71c55d31ff83a", - "version" : "2.97.1" - } - }, { "identity" : "swift-numerics", "kind" : "remoteSourceControl", @@ -208,15 +109,6 @@ "version" : "1.6.4" } }, - { - "identity" : "swift-transformers", - "kind" : "remoteSourceControl", - "location" : "https://github.com/huggingface/swift-transformers.git", - "state" : { - "revision" : "58c4bc11963a140358d791f678a60a2745a23146", - "version" : "1.2.1" - } - }, { "identity" : "swiftui-math", "kind" : "remoteSourceControl", @@ -234,15 +126,6 @@ "revision" : "5b06b811c0f5313b6b84bbef98c635a630638c38", "version" : "0.3.1" } - }, - { - "identity" : "yyjson", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ibireme/yyjson.git", - "state" : { - "revision" : "8b4a38dc994a110abaec8a400615567bd996105f", - "version" : "0.12.0" - } } ], "version" : 3 diff --git a/apps/macos/Package.swift b/apps/macos/Package.swift index 688bb94b169..5c9f849d792 100644 --- a/apps/macos/Package.swift +++ b/apps/macos/Package.swift @@ -20,7 +20,6 @@ let package = Package( .package(url: "https://github.com/apple/swift-log.git", from: "1.10.1"), .package(url: "https://github.com/sparkle-project/Sparkle", from: "2.9.0"), .package(url: "https://github.com/steipete/Peekaboo.git", branch: "main"), - .package(url: "https://github.com/Blaizzy/mlx-audio-swift", exact: "0.1.2"), .package(path: "../shared/OpenClawKit"), .package(path: "../../Swabble"), ], @@ -55,7 +54,6 @@ let package = Package( .product(name: "Sparkle", package: "Sparkle"), .product(name: "PeekabooBridge", package: "Peekaboo"), .product(name: "PeekabooAutomationKit", package: "Peekaboo"), - .product(name: "MLXAudioTTS", package: "mlx-audio-swift"), ], exclude: [ "Resources/Info.plist", diff --git a/apps/macos/Sources/OpenClaw/TalkMLXSpeechSynthesizer.swift b/apps/macos/Sources/OpenClaw/TalkMLXSpeechSynthesizer.swift index 419e9ff22ef..751226ea79f 100644 --- a/apps/macos/Sources/OpenClaw/TalkMLXSpeechSynthesizer.swift +++ b/apps/macos/Sources/OpenClaw/TalkMLXSpeechSynthesizer.swift @@ -1,5 +1,4 @@ import Foundation -import MLXAudioTTS import OSLog // swiftformat:disable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl @@ -18,13 +17,14 @@ final class TalkMLXSpeechSynthesizer { private let logger = Logger(subsystem: "ai.openclaw", category: "talk.mlx") private var currentToken = UUID() - private var modelRepo: String? - private var model: (any SpeechGenerationModel)? + private var currentProcess: Process? private init() {} func stop() { self.currentToken = UUID() + self.currentProcess?.terminate() + self.currentProcess = nil } func synthesize( @@ -39,59 +39,93 @@ final class TalkMLXSpeechSynthesizer { let token = UUID() self.currentToken = token + let tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("openclaw-mlx-tts-\(token.uuidString)", isDirectory: true) + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: tempDir) } + + let outputURL = tempDir.appendingPathComponent("speech.wav") + let invocation = Self.helperInvocation() let resolvedRepo = Self.resolvedModelRepo(modelRepo) - let rawModel = try await self.loadModel( - modelRepo: resolvedRepo, - token: token) - let model = UncheckedSpeechModel(raw: rawModel) + var arguments = invocation.argumentPrefix + arguments += [ + "--text", trimmed, + "--model", resolvedRepo, + "--output", outputURL.path, + ] + if let language = language?.trimmingCharacters(in: .whitespacesAndNewlines), !language.isEmpty { + arguments += ["--language", language] + } + if let voicePreset = voicePreset?.trimmingCharacters(in: .whitespacesAndNewlines), !voicePreset.isEmpty { + arguments += ["--voice", voicePreset] + } + + self.logger.info("talk mlx helper start modelRepo=\(resolvedRepo, privacy: .public)") + let process = Process() + process.executableURL = invocation.executableURL + process.arguments = arguments + let stderr = Pipe() + process.standardError = stderr + process.standardOutput = Pipe() + self.currentProcess = process + + let status: Int32 + do { + status = try await Self.run(process) + } catch { + self.currentProcess = nil + self.logger.error("talk mlx helper launch failed: \(error.localizedDescription, privacy: .public)") + throw SynthesizeError.modelLoadFailed(invocation.displayName) + } + self.currentProcess = nil + guard self.currentToken == token else { throw SynthesizeError.canceled } - - let audioData: Data - do { - let audio = try await model.generateAudio( - text: trimmed, - voice: voicePreset, - language: language) - audioData = Self.makeWavData( - samples: audio, - sampleRate: Double(model.sampleRateValue())) - } catch { + guard status == 0 else { + let errorText = Self.readPipe(stderr) self.logger.error( - "talk mlx generation failed: \(error.localizedDescription, privacy: .public)") + "talk mlx helper failed status=\(status, privacy: .public): \(errorText, privacy: .public)") throw SynthesizeError.audioGenerationFailed } - guard self.currentToken == token else { - throw SynthesizeError.canceled + do { + return try Data(contentsOf: outputURL) + } catch { + self.logger.error("talk mlx helper output missing: \(error.localizedDescription, privacy: .public)") + throw SynthesizeError.audioGenerationFailed } - return audioData } - private func loadModel( - modelRepo: String, - token: UUID) async throws -> any SpeechGenerationModel { - if let model = self.model, self.modelRepo == modelRepo { - return model + private struct HelperInvocation { + let executableURL: URL + let argumentPrefix: [String] + let displayName: String + } + + private static func helperInvocation() -> HelperInvocation { + let fileManager = FileManager.default + if let override = ProcessInfo.processInfo.environment["OPENCLAW_MLX_TTS_BIN"], !override.isEmpty { + return HelperInvocation( + executableURL: URL(fileURLWithPath: override), + argumentPrefix: [], + displayName: override) } - self.logger.info("talk mlx loading modelRepo=\(modelRepo, privacy: .public)") - do { - let model = try await TTS.loadModel(modelRepo: modelRepo) - guard self.currentToken == token else { - throw SynthesizeError.canceled + if let executableDir = Bundle.main.executableURL?.deletingLastPathComponent() { + let bundled = executableDir.appendingPathComponent("openclaw-mlx-tts") + if fileManager.isExecutableFile(atPath: bundled.path) { + return HelperInvocation( + executableURL: bundled, + argumentPrefix: [], + displayName: bundled.path) } - self.model = model - self.modelRepo = modelRepo - return model - } catch is CancellationError { - throw SynthesizeError.canceled - } catch { - self.logger.error( - "talk mlx load failed: \(error.localizedDescription, privacy: .public)") - throw SynthesizeError.modelLoadFailed(modelRepo) } + + return HelperInvocation( + executableURL: URL(fileURLWithPath: "/usr/bin/env"), + argumentPrefix: ["openclaw-mlx-tts"], + displayName: "openclaw-mlx-tts") } private static func resolvedModelRepo(_ modelRepo: String?) -> String { @@ -99,80 +133,26 @@ final class TalkMLXSpeechSynthesizer { return trimmed.isEmpty ? Self.defaultModelRepo : trimmed } - private static func makeWavData(samples: [Float], sampleRate: Double) -> Data { - let channels: UInt16 = 1 - let bitsPerSample: UInt16 = 16 - let blockAlign = channels * (bitsPerSample / 8) - let sampleRateInt = UInt32(sampleRate.rounded()) - let byteRate = sampleRateInt * UInt32(blockAlign) - let dataSize = UInt32(samples.count) * UInt32(blockAlign) - - var data = Data(capacity: Int(44 + dataSize)) - data.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // RIFF - data.appendLEUInt32(36 + dataSize) - data.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // WAVE - - data.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // fmt - data.appendLEUInt32(16) - data.appendLEUInt16(1) - data.appendLEUInt16(channels) - data.appendLEUInt32(sampleRateInt) - data.appendLEUInt32(byteRate) - data.appendLEUInt16(blockAlign) - data.appendLEUInt16(bitsPerSample) - - data.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // data - data.appendLEUInt32(dataSize) - - for sample in samples { - let clamped = max(-1.0, min(1.0, sample)) - let scaled = Int16((clamped * Float(Int16.max)).rounded()) - data.appendLEInt16(scaled) + private static func run(_ process: Process) async throws -> Int32 { + try await withCheckedThrowingContinuation { continuation in + process.terminationHandler = { process in + continuation.resume(returning: process.terminationStatus) + } + do { + try process.run() + } catch { + continuation.resume(throwing: error) + } } - return data + } + + private static func readPipe(_ pipe: Pipe) -> String { + let data = (try? pipe.fileHandleForReading.readToEnd()) ?? Data() + let text = String(data: data, encoding: .utf8) ?? "" + return text.trimmingCharacters(in: .whitespacesAndNewlines) } } extension TalkMLXSpeechSynthesizer: @unchecked Sendable {} -private struct UncheckedSpeechModel { - let raw: any SpeechGenerationModel - - func sampleRateValue() -> Int { - raw.sampleRate - } - - func generateAudio( - text: String, - voice: String?, - language: String?) async throws -> [Float] { - let generatedAudio = try await raw.generate( - text: text, - voice: voice, - refAudio: nil, - refText: nil, - language: language) - return generatedAudio.asArray(Float.self) - } -} - -extension UncheckedSpeechModel: @unchecked Sendable {} - -extension Data { - fileprivate mutating func appendLEUInt16(_ value: UInt16) { - var littleEndian = value.littleEndian - Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } - } - - fileprivate mutating func appendLEUInt32(_ value: UInt32) { - var littleEndian = value.littleEndian - Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } - } - - fileprivate mutating func appendLEInt16(_ value: Int16) { - var littleEndian = value.littleEndian - Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) } - } -} - // swiftformat:enable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl diff --git a/scripts/codesign-mac-app.sh b/scripts/codesign-mac-app.sh index d43987c7a28..840dc4baf06 100755 --- a/scripts/codesign-mac-app.sh +++ b/scripts/codesign-mac-app.sh @@ -252,6 +252,12 @@ if [ -f "$APP_BUNDLE/Contents/MacOS/OpenClaw" ]; then echo "Signing main binary"; sign_item "$APP_BUNDLE/Contents/MacOS/OpenClaw" "$APP_ENTITLEMENTS" fi +# Sign bundled helper binaries before signing the app bundle. +MLX_TTS_HELPER="$APP_BUNDLE/Contents/MacOS/openclaw-mlx-tts" +if [ -f "$MLX_TTS_HELPER" ]; then + echo "Signing MLX TTS helper"; sign_item "$MLX_TTS_HELPER" "$APP_ENTITLEMENTS" +fi + # Sign Sparkle deeply if present SPARKLE="$APP_BUNDLE/Contents/Frameworks/Sparkle.framework" if [ -d "$SPARKLE" ]; then diff --git a/scripts/package-mac-app.sh b/scripts/package-mac-app.sh index 5c53d776217..39265d25517 100755 --- a/scripts/package-mac-app.sh +++ b/scripts/package-mac-app.sh @@ -8,6 +8,9 @@ ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" APP_ROOT="$ROOT_DIR/dist/OpenClaw.app" BUILD_ROOT="$ROOT_DIR/apps/macos/.build" PRODUCT="OpenClaw" +MLX_TTS_HELPER_PRODUCT="openclaw-mlx-tts" +MLX_TTS_HELPER_ROOT="$ROOT_DIR/apps/macos-mlx-tts" +MLX_TTS_HELPER_BUILD_ROOT="$MLX_TTS_HELPER_ROOT/.build" BUNDLE_ID="${BUNDLE_ID:-ai.openclaw.mac.debug}" PKG_VERSION="$(cd "$ROOT_DIR" && node -p "require('./package.json').version" 2>/dev/null || echo "0.0.0")" BUILD_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ") @@ -49,6 +52,14 @@ bin_for_arch() { echo "$(build_path_for_arch "$1")/$BUILD_CONFIG/$PRODUCT" } +helper_build_path_for_arch() { + echo "$MLX_TTS_HELPER_BUILD_ROOT/$1" +} + +helper_bin_for_arch() { + echo "$(helper_build_path_for_arch "$1")/$BUILD_CONFIG/$MLX_TTS_HELPER_PRODUCT" +} + sparkle_framework_for_arch() { echo "$(build_path_for_arch "$1")/$BUILD_CONFIG/Sparkle.framework" } @@ -159,6 +170,7 @@ echo "🔨 Building $PRODUCT ($BUILD_CONFIG) [${BUILD_ARCHS[*]}]" for arch in "${BUILD_ARCHS[@]}"; do BUILD_PATH="$(build_path_for_arch "$arch")" swift build -c "$BUILD_CONFIG" --product "$PRODUCT" --build-path "$BUILD_PATH" --arch "$arch" -Xlinker -rpath -Xlinker @executable_path/../Frameworks + swift build --package-path "$MLX_TTS_HELPER_ROOT" -c "$BUILD_CONFIG" --product "$MLX_TTS_HELPER_PRODUCT" --build-path "$(helper_build_path_for_arch "$arch")" --arch "$arch" done BIN_PRIMARY="$(bin_for_arch "$PRIMARY_ARCH")" @@ -204,6 +216,18 @@ chmod +x "$APP_ROOT/Contents/MacOS/OpenClaw" # SwiftPM outputs ad-hoc signed binaries; strip the signature before install_name_tool to avoid warnings. /usr/bin/codesign --remove-signature "$APP_ROOT/Contents/MacOS/OpenClaw" 2>/dev/null || true +echo "🚚 Copying MLX TTS helper" +cp "$(helper_bin_for_arch "$PRIMARY_ARCH")" "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT" +if [[ "${#BUILD_ARCHS[@]}" -gt 1 ]]; then + HELPER_BIN_INPUTS=() + for arch in "${BUILD_ARCHS[@]}"; do + HELPER_BIN_INPUTS+=("$(helper_bin_for_arch "$arch")") + done + /usr/bin/lipo -create "${HELPER_BIN_INPUTS[@]}" -output "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT" +fi +chmod +x "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT" +/usr/bin/codesign --remove-signature "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT" 2>/dev/null || true + SPARKLE_FRAMEWORK_PRIMARY="$(sparkle_framework_for_arch "$PRIMARY_ARCH")" if [ -d "$SPARKLE_FRAMEWORK_PRIMARY" ]; then echo "✨ Embedding Sparkle.framework"