mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:20:43 +00:00
ci: isolate mlx from macos swift checks
This commit is contained in:
170
.github/workflows/ci.yml
vendored
170
.github/workflows/ci.yml
vendored
@@ -2084,89 +2084,6 @@ jobs:
|
||||
name: canvas-a2ui-bundle
|
||||
path: src/canvas-host/a2ui/
|
||||
|
||||
- name: Patch mlx-audio-swift manifest
|
||||
# macOS node tests do not build the Swift package; keep SwiftPM resolution
|
||||
# out of this lane so it stays focused on platform-specific TS tests.
|
||||
if: ${{ false }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
swift package resolve --package-path apps/macos >/dev/null
|
||||
chmod u+w apps/macos/.build/checkouts/mlx-audio-swift/Package.swift
|
||||
python <<'PY'
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("apps/macos/.build/checkouts/mlx-audio-swift/Package.swift")
|
||||
text = path.read_text()
|
||||
excludes = {
|
||||
"Sources/MLXAudioTTS": [
|
||||
"Models/Llama/README.md",
|
||||
"Models/Marvis/README.md",
|
||||
"Models/PocketTTS/README.md",
|
||||
"Models/Qwen3/README.md",
|
||||
"Models/Soprano/README.md",
|
||||
],
|
||||
"Sources/MLXAudioSTT": [
|
||||
"Models/GLMASR/README.md",
|
||||
"Models/VoxtralRealtime/README.md",
|
||||
"Models/Qwen3ASR/README.md",
|
||||
"Models/GraniteSpeech/README.md",
|
||||
"Models/Parakeet/README.md",
|
||||
],
|
||||
"Sources/MLXAudioVAD": [
|
||||
"Models/SmartTurn/README.md",
|
||||
"Models/Sortformer/README.md",
|
||||
],
|
||||
"Sources/MLXAudioLID": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/MLXAudioSTS": [
|
||||
"Models/SAMAudio/README.md",
|
||||
"Models/LFMAudio/README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-tts": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-codec": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-sts": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-stt": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-lid": [
|
||||
"README.md",
|
||||
],
|
||||
}
|
||||
|
||||
updated = False
|
||||
for target_path, files in excludes.items():
|
||||
already_patched = f'path: "{target_path}",\n exclude: [' in text
|
||||
if already_patched:
|
||||
continue
|
||||
|
||||
needle = f' path: "{target_path}"\n'
|
||||
replacement = (
|
||||
f' path: "{target_path}",\n'
|
||||
" exclude: [\n"
|
||||
+ "".join(f' "{file}",\n' for file in files)
|
||||
+ " ]\n"
|
||||
)
|
||||
|
||||
if needle not in text:
|
||||
raise SystemExit(f"Could not find {target_path} target path in mlx-audio-swift Package.swift")
|
||||
|
||||
text = text.replace(needle, replacement, 1)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
path.write_text(text)
|
||||
print(f"Patched {path}")
|
||||
else:
|
||||
print("mlx-audio-swift README excludes already present")
|
||||
PY
|
||||
|
||||
- name: TS tests (macOS)
|
||||
env:
|
||||
NODE_OPTIONS: --max-old-space-size=4096
|
||||
@@ -2245,93 +2162,6 @@ jobs:
|
||||
apps/shared/OpenClawKit/Package.swift \
|
||||
Swabble/Package.swift
|
||||
|
||||
- name: Patch mlx-audio-swift manifest
|
||||
if: steps.swift-build-cache.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ ! -f apps/macos/.build/checkouts/mlx-audio-swift/Package.swift ]; then
|
||||
swift package resolve --package-path apps/macos >/dev/null
|
||||
fi
|
||||
if [ ! -f apps/macos/.build/checkouts/mlx-audio-swift/Package.swift ]; then
|
||||
echo "mlx-audio-swift checkout missing after swift package resolve" >&2
|
||||
exit 1
|
||||
fi
|
||||
chmod u+w apps/macos/.build/checkouts/mlx-audio-swift/Package.swift
|
||||
python <<'PY'
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("apps/macos/.build/checkouts/mlx-audio-swift/Package.swift")
|
||||
text = path.read_text()
|
||||
excludes = {
|
||||
"Sources/MLXAudioTTS": [
|
||||
"Models/Llama/README.md",
|
||||
"Models/Marvis/README.md",
|
||||
"Models/PocketTTS/README.md",
|
||||
"Models/Qwen3/README.md",
|
||||
"Models/Soprano/README.md",
|
||||
],
|
||||
"Sources/MLXAudioSTT": [
|
||||
"Models/GLMASR/README.md",
|
||||
"Models/VoxtralRealtime/README.md",
|
||||
"Models/Qwen3ASR/README.md",
|
||||
"Models/GraniteSpeech/README.md",
|
||||
"Models/Parakeet/README.md",
|
||||
],
|
||||
"Sources/MLXAudioVAD": [
|
||||
"Models/SmartTurn/README.md",
|
||||
"Models/Sortformer/README.md",
|
||||
],
|
||||
"Sources/MLXAudioLID": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/MLXAudioSTS": [
|
||||
"Models/SAMAudio/README.md",
|
||||
"Models/LFMAudio/README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-tts": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-codec": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-sts": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-stt": [
|
||||
"README.md",
|
||||
],
|
||||
"Sources/Tools/mlx-audio-swift-lid": [
|
||||
"README.md",
|
||||
],
|
||||
}
|
||||
|
||||
updated = False
|
||||
for target_path, files in excludes.items():
|
||||
already_patched = f'path: "{target_path}",\n exclude: [' in text
|
||||
if already_patched:
|
||||
continue
|
||||
|
||||
needle = f' path: "{target_path}"\n'
|
||||
replacement = (
|
||||
f' path: "{target_path}",\n'
|
||||
" exclude: [\n"
|
||||
+ "".join(f' "{file}",\n' for file in files)
|
||||
+ " ]\n"
|
||||
)
|
||||
|
||||
if needle not in text:
|
||||
raise SystemExit(f"Could not find {target_path} target path in mlx-audio-swift Package.swift")
|
||||
|
||||
text = text.replace(needle, replacement, 1)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
path.write_text(text)
|
||||
print(f"Patched {path}")
|
||||
else:
|
||||
print("mlx-audio-swift README excludes already present")
|
||||
PY
|
||||
|
||||
- name: Show toolchain
|
||||
run: |
|
||||
sw_vers
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -36,6 +36,7 @@ apps/android/benchmark/results/
|
||||
# Bun build artifacts
|
||||
*.bun-build
|
||||
apps/macos/.build/
|
||||
apps/macos-mlx-tts/.build/
|
||||
apps/shared/MoltbotKit/.build/
|
||||
apps/shared/OpenClawKit/.build/
|
||||
apps/shared/OpenClawKit/Package.resolved
|
||||
@@ -57,6 +58,7 @@ vendor/
|
||||
apps/ios/Clawdbot.xcodeproj/
|
||||
apps/ios/Clawdbot.xcodeproj/**
|
||||
apps/macos/.build/**
|
||||
apps/macos-mlx-tts/.build/**
|
||||
**/*.bun-build
|
||||
apps/ios/*.xcfilelist
|
||||
|
||||
|
||||
141
apps/macos-mlx-tts/Package.resolved
Normal file
141
apps/macos-mlx-tts/Package.resolved
Normal file
@@ -0,0 +1,141 @@
|
||||
{
|
||||
"originHash" : "6b8aa02e612c43e309033a83de5f83b88d9c4267f124d1e062f66385dbbaa7ec",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "eventsource",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mattt/EventSource.git",
|
||||
"state" : {
|
||||
"revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e",
|
||||
"version" : "1.4.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-audio-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/Blaizzy/mlx-audio-swift",
|
||||
"state" : {
|
||||
"revision" : "fcbd04daa1bfebe881932f630af2ba6ce9af3274",
|
||||
"version" : "0.1.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ml-explore/mlx-swift.git",
|
||||
"state" : {
|
||||
"revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
|
||||
"version" : "0.31.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-swift-lm",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ml-explore/mlx-swift-lm.git",
|
||||
"state" : {
|
||||
"revision" : "25b00d4e22e61ec9c41efda47990cd2084ec87ff",
|
||||
"version" : "2.31.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-asn1",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-asn1.git",
|
||||
"state" : {
|
||||
"revision" : "eb50cbd14606a9161cbc5d452f18797c90ef0bab",
|
||||
"version" : "1.7.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-atomics",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-atomics.git",
|
||||
"state" : {
|
||||
"revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7",
|
||||
"version" : "1.3.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-collections",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-collections.git",
|
||||
"state" : {
|
||||
"revision" : "6675bc0ff86e61436e615df6fc5174e043e57924",
|
||||
"version" : "1.4.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-crypto",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-crypto.git",
|
||||
"state" : {
|
||||
"revision" : "476538ccb827f2dd18efc5de754cc87d77127a47",
|
||||
"version" : "4.4.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-huggingface",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-huggingface.git",
|
||||
"state" : {
|
||||
"revision" : "b721959445b617d0bf03910b2b4aced345fd93bf",
|
||||
"version" : "0.9.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-jinja",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-jinja.git",
|
||||
"state" : {
|
||||
"revision" : "0aeefadec459ce8e11a333769950fb86183aca43",
|
||||
"version" : "2.3.5"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-nio",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-nio.git",
|
||||
"state" : {
|
||||
"revision" : "cd6710454f25733900e133c6caf5188952763c36",
|
||||
"version" : "2.98.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-numerics",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-numerics",
|
||||
"state" : {
|
||||
"revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2",
|
||||
"version" : "1.1.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-system",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-system.git",
|
||||
"state" : {
|
||||
"revision" : "7c6ad0fc39d0763e0b699210e4124afd5041c5df",
|
||||
"version" : "1.6.4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-transformers",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-transformers.git",
|
||||
"state" : {
|
||||
"revision" : "58c4bc11963a140358d791f678a60a2745a23146",
|
||||
"version" : "1.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "yyjson",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ibireme/yyjson.git",
|
||||
"state" : {
|
||||
"revision" : "8b4a38dc994a110abaec8a400615567bd996105f",
|
||||
"version" : "0.12.0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"version" : 3
|
||||
}
|
||||
27
apps/macos-mlx-tts/Package.swift
Normal file
27
apps/macos-mlx-tts/Package.swift
Normal file
@@ -0,0 +1,27 @@
|
||||
// swift-tools-version: 6.2
|
||||
// Isolated MLX TTS helper package. Keep this out of apps/macos/Package.swift so
|
||||
// normal macOS app tests do not compile the full MLX audio stack.
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "OpenClawMLXTTS",
|
||||
platforms: [
|
||||
.macOS(.v15),
|
||||
],
|
||||
products: [
|
||||
.executable(name: "openclaw-mlx-tts", targets: ["OpenClawMLXTTSHelper"]),
|
||||
],
|
||||
dependencies: [
|
||||
.package(url: "https://github.com/Blaizzy/mlx-audio-swift", exact: "0.1.2"),
|
||||
],
|
||||
targets: [
|
||||
.executableTarget(
|
||||
name: "OpenClawMLXTTSHelper",
|
||||
dependencies: [
|
||||
.product(name: "MLXAudioTTS", package: "mlx-audio-swift"),
|
||||
],
|
||||
swiftSettings: [
|
||||
.enableUpcomingFeature("StrictConcurrency"),
|
||||
]),
|
||||
])
|
||||
182
apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift
Normal file
182
apps/macos-mlx-tts/Sources/OpenClawMLXTTSHelper/main.swift
Normal file
@@ -0,0 +1,182 @@
|
||||
import Foundation
|
||||
import MLXAudioTTS
|
||||
|
||||
// swiftformat:disable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl
|
||||
@main
|
||||
enum OpenClawMLXTTSHelper {
|
||||
static func main() async {
|
||||
do {
|
||||
let options = try Options.parse(CommandLine.arguments.dropFirst())
|
||||
let data = try await synthesize(options)
|
||||
try data.write(to: options.outputURL, options: [.atomic])
|
||||
} catch {
|
||||
FileHandle.standardError.write(Data("openclaw-mlx-tts: \(error)\n".utf8))
|
||||
exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
private static func synthesize(_ options: Options) async throws -> Data {
|
||||
let model = try await TTS.loadModel(modelRepo: options.modelRepo)
|
||||
let audio = try await UncheckedSpeechModel(raw: model).generateAudio(
|
||||
text: options.text,
|
||||
voice: options.voice,
|
||||
language: options.language)
|
||||
return makeWavData(samples: audio, sampleRate: Double(model.sampleRate))
|
||||
}
|
||||
|
||||
private struct Options {
|
||||
let text: String
|
||||
let modelRepo: String
|
||||
let outputURL: URL
|
||||
let language: String?
|
||||
let voice: String?
|
||||
|
||||
static func parse(_ rawArguments: ArraySlice<String>) throws -> Options {
|
||||
var text: String?
|
||||
var modelRepo = "mlx-community/Soprano-80M-bf16"
|
||||
var outputPath: String?
|
||||
var language: String?
|
||||
var voice: String?
|
||||
var iterator = rawArguments.makeIterator()
|
||||
|
||||
while let argument = iterator.next() {
|
||||
switch argument {
|
||||
case "--text", "-t":
|
||||
text = try nextValue(&iterator, argument)
|
||||
case "--model":
|
||||
modelRepo = try nextValue(&iterator, argument)
|
||||
case "--output", "-o":
|
||||
outputPath = try nextValue(&iterator, argument)
|
||||
case "--language":
|
||||
language = try nextValue(&iterator, argument)
|
||||
case "--voice", "-v":
|
||||
voice = try nextValue(&iterator, argument)
|
||||
case "--help", "-h":
|
||||
throw Usage.requested
|
||||
default:
|
||||
if text == nil, !argument.hasPrefix("-") {
|
||||
text = argument
|
||||
} else {
|
||||
throw Usage.invalid("unknown option \(argument)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
guard let text = text?.trimmingCharacters(in: .whitespacesAndNewlines), !text.isEmpty else {
|
||||
throw Usage.invalid("missing --text")
|
||||
}
|
||||
guard let outputPath, !outputPath.isEmpty else {
|
||||
throw Usage.invalid("missing --output")
|
||||
}
|
||||
|
||||
return Options(
|
||||
text: text,
|
||||
modelRepo: modelRepo,
|
||||
outputURL: URL(fileURLWithPath: outputPath),
|
||||
language: language?.nilIfBlank,
|
||||
voice: voice?.nilIfBlank)
|
||||
}
|
||||
|
||||
private static func nextValue(
|
||||
_ iterator: inout ArraySlice<String>.Iterator,
|
||||
_ option: String) throws -> String
|
||||
{
|
||||
guard let value = iterator.next(), !value.isEmpty else {
|
||||
throw Usage.invalid("missing value for \(option)")
|
||||
}
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
private enum Usage: Error, CustomStringConvertible {
|
||||
case requested
|
||||
case invalid(String)
|
||||
|
||||
var description: String {
|
||||
switch self {
|
||||
case .requested:
|
||||
"usage: openclaw-mlx-tts --text <text> --output <wav> [--model <hf-repo>] [--language <id>] [--voice <name>]"
|
||||
case let .invalid(message):
|
||||
"\(message)\nusage: openclaw-mlx-tts --text <text> --output <wav> [--model <hf-repo>] [--language <id>] [--voice <name>]"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static func makeWavData(samples: [Float], sampleRate: Double) -> Data {
|
||||
let channels: UInt16 = 1
|
||||
let bitsPerSample: UInt16 = 16
|
||||
let blockAlign = channels * (bitsPerSample / 8)
|
||||
let sampleRateInt = UInt32(sampleRate.rounded())
|
||||
let byteRate = sampleRateInt * UInt32(blockAlign)
|
||||
let dataSize = UInt32(samples.count) * UInt32(blockAlign)
|
||||
|
||||
var data = Data(capacity: Int(44 + dataSize))
|
||||
data.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // RIFF
|
||||
data.appendLEUInt32(36 + dataSize)
|
||||
data.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // WAVE
|
||||
|
||||
data.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // fmt
|
||||
data.appendLEUInt32(16)
|
||||
data.appendLEUInt16(1)
|
||||
data.appendLEUInt16(channels)
|
||||
data.appendLEUInt32(sampleRateInt)
|
||||
data.appendLEUInt32(byteRate)
|
||||
data.appendLEUInt16(blockAlign)
|
||||
data.appendLEUInt16(bitsPerSample)
|
||||
|
||||
data.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // data
|
||||
data.appendLEUInt32(dataSize)
|
||||
|
||||
for sample in samples {
|
||||
let clamped = max(-1.0, min(1.0, sample))
|
||||
let scaled = Int16((clamped * Float(Int16.max)).rounded())
|
||||
data.appendLEInt16(scaled)
|
||||
}
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
||||
private struct UncheckedSpeechModel {
|
||||
let raw: any SpeechGenerationModel
|
||||
|
||||
func generateAudio(
|
||||
text: String,
|
||||
voice: String?,
|
||||
language: String?) async throws -> [Float] {
|
||||
let generatedAudio = try await raw.generate(
|
||||
text: text,
|
||||
voice: voice,
|
||||
refAudio: nil,
|
||||
refText: nil,
|
||||
language: language)
|
||||
return generatedAudio.asArray(Float.self)
|
||||
}
|
||||
}
|
||||
|
||||
extension UncheckedSpeechModel: @unchecked Sendable {}
|
||||
|
||||
private extension String {
|
||||
var nilIfBlank: String? {
|
||||
let trimmed = self.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
}
|
||||
|
||||
private extension Data {
|
||||
mutating func appendLEUInt16(_ value: UInt16) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
|
||||
mutating func appendLEUInt32(_ value: UInt32) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
|
||||
mutating func appendLEInt16(_ value: Int16) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
}
|
||||
|
||||
// swiftformat:enable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"originHash" : "31972864afdac74537794e1a3b7bd22484c09ec1be8e3624fb9ea582e9222ad9",
|
||||
"originHash" : "fb90e7b1977f43661ac91681d16da11f9ddd85630407ef170eaada0a6ee39972",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "axorcist",
|
||||
@@ -28,15 +28,6 @@
|
||||
"version" : "0.1.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "eventsource",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/mattt/EventSource.git",
|
||||
"state" : {
|
||||
"revision" : "a3a85a85214caf642abaa96ae664e4c772a59f6e",
|
||||
"version" : "1.4.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "menubarextraaccess",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -46,33 +37,6 @@
|
||||
"version" : "1.2.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-audio-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/Blaizzy/mlx-audio-swift",
|
||||
"state" : {
|
||||
"revision" : "fcbd04daa1bfebe881932f630af2ba6ce9af3274",
|
||||
"version" : "0.1.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-swift",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ml-explore/mlx-swift.git",
|
||||
"state" : {
|
||||
"revision" : "61b9e011e09a62b489f6bd647958f1555bdf2896",
|
||||
"version" : "0.31.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "mlx-swift-lm",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ml-explore/mlx-swift-lm.git",
|
||||
"state" : {
|
||||
"revision" : "25b00d4e22e61ec9c41efda47990cd2084ec87ff",
|
||||
"version" : "2.31.3"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "peekaboo",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -100,33 +64,6 @@
|
||||
"version" : "1.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-asn1",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-asn1.git",
|
||||
"state" : {
|
||||
"revision" : "9f542610331815e29cc3821d3b6f488db8715517",
|
||||
"version" : "1.6.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-atomics",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-atomics.git",
|
||||
"state" : {
|
||||
"revision" : "b601256eab081c0f92f059e12818ac1d4f178ff7",
|
||||
"version" : "1.3.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-collections",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-collections.git",
|
||||
"state" : {
|
||||
"revision" : "6675bc0ff86e61436e615df6fc5174e043e57924",
|
||||
"version" : "1.4.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-concurrency-extras",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -136,33 +73,6 @@
|
||||
"version" : "1.3.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-crypto",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-crypto.git",
|
||||
"state" : {
|
||||
"revision" : "bb4ba815dab96d4edc1e0b86d7b9acf9ff973a84",
|
||||
"version" : "4.3.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-huggingface",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-huggingface.git",
|
||||
"state" : {
|
||||
"revision" : "b721959445b617d0bf03910b2b4aced345fd93bf",
|
||||
"version" : "0.9.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-jinja",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-jinja.git",
|
||||
"state" : {
|
||||
"revision" : "0aeefadec459ce8e11a333769950fb86183aca43",
|
||||
"version" : "2.3.5"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-log",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -172,15 +82,6 @@
|
||||
"version" : "1.10.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-nio",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/apple/swift-nio.git",
|
||||
"state" : {
|
||||
"revision" : "558f24a4647193b5a0e2104031b71c55d31ff83a",
|
||||
"version" : "2.97.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-numerics",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -208,15 +109,6 @@
|
||||
"version" : "1.6.4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swift-transformers",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/huggingface/swift-transformers.git",
|
||||
"state" : {
|
||||
"revision" : "58c4bc11963a140358d791f678a60a2745a23146",
|
||||
"version" : "1.2.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "swiftui-math",
|
||||
"kind" : "remoteSourceControl",
|
||||
@@ -234,15 +126,6 @@
|
||||
"revision" : "5b06b811c0f5313b6b84bbef98c635a630638c38",
|
||||
"version" : "0.3.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "yyjson",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/ibireme/yyjson.git",
|
||||
"state" : {
|
||||
"revision" : "8b4a38dc994a110abaec8a400615567bd996105f",
|
||||
"version" : "0.12.0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"version" : 3
|
||||
|
||||
@@ -20,7 +20,6 @@ let package = Package(
|
||||
.package(url: "https://github.com/apple/swift-log.git", from: "1.10.1"),
|
||||
.package(url: "https://github.com/sparkle-project/Sparkle", from: "2.9.0"),
|
||||
.package(url: "https://github.com/steipete/Peekaboo.git", branch: "main"),
|
||||
.package(url: "https://github.com/Blaizzy/mlx-audio-swift", exact: "0.1.2"),
|
||||
.package(path: "../shared/OpenClawKit"),
|
||||
.package(path: "../../Swabble"),
|
||||
],
|
||||
@@ -55,7 +54,6 @@ let package = Package(
|
||||
.product(name: "Sparkle", package: "Sparkle"),
|
||||
.product(name: "PeekabooBridge", package: "Peekaboo"),
|
||||
.product(name: "PeekabooAutomationKit", package: "Peekaboo"),
|
||||
.product(name: "MLXAudioTTS", package: "mlx-audio-swift"),
|
||||
],
|
||||
exclude: [
|
||||
"Resources/Info.plist",
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import Foundation
|
||||
import MLXAudioTTS
|
||||
import OSLog
|
||||
|
||||
// swiftformat:disable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl
|
||||
@@ -18,13 +17,14 @@ final class TalkMLXSpeechSynthesizer {
|
||||
|
||||
private let logger = Logger(subsystem: "ai.openclaw", category: "talk.mlx")
|
||||
private var currentToken = UUID()
|
||||
private var modelRepo: String?
|
||||
private var model: (any SpeechGenerationModel)?
|
||||
private var currentProcess: Process?
|
||||
|
||||
private init() {}
|
||||
|
||||
func stop() {
|
||||
self.currentToken = UUID()
|
||||
self.currentProcess?.terminate()
|
||||
self.currentProcess = nil
|
||||
}
|
||||
|
||||
func synthesize(
|
||||
@@ -39,59 +39,93 @@ final class TalkMLXSpeechSynthesizer {
|
||||
let token = UUID()
|
||||
self.currentToken = token
|
||||
|
||||
let tempDir = FileManager.default.temporaryDirectory
|
||||
.appendingPathComponent("openclaw-mlx-tts-\(token.uuidString)", isDirectory: true)
|
||||
try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true)
|
||||
defer { try? FileManager.default.removeItem(at: tempDir) }
|
||||
|
||||
let outputURL = tempDir.appendingPathComponent("speech.wav")
|
||||
let invocation = Self.helperInvocation()
|
||||
let resolvedRepo = Self.resolvedModelRepo(modelRepo)
|
||||
let rawModel = try await self.loadModel(
|
||||
modelRepo: resolvedRepo,
|
||||
token: token)
|
||||
let model = UncheckedSpeechModel(raw: rawModel)
|
||||
var arguments = invocation.argumentPrefix
|
||||
arguments += [
|
||||
"--text", trimmed,
|
||||
"--model", resolvedRepo,
|
||||
"--output", outputURL.path,
|
||||
]
|
||||
if let language = language?.trimmingCharacters(in: .whitespacesAndNewlines), !language.isEmpty {
|
||||
arguments += ["--language", language]
|
||||
}
|
||||
if let voicePreset = voicePreset?.trimmingCharacters(in: .whitespacesAndNewlines), !voicePreset.isEmpty {
|
||||
arguments += ["--voice", voicePreset]
|
||||
}
|
||||
|
||||
self.logger.info("talk mlx helper start modelRepo=\(resolvedRepo, privacy: .public)")
|
||||
let process = Process()
|
||||
process.executableURL = invocation.executableURL
|
||||
process.arguments = arguments
|
||||
let stderr = Pipe()
|
||||
process.standardError = stderr
|
||||
process.standardOutput = Pipe()
|
||||
self.currentProcess = process
|
||||
|
||||
let status: Int32
|
||||
do {
|
||||
status = try await Self.run(process)
|
||||
} catch {
|
||||
self.currentProcess = nil
|
||||
self.logger.error("talk mlx helper launch failed: \(error.localizedDescription, privacy: .public)")
|
||||
throw SynthesizeError.modelLoadFailed(invocation.displayName)
|
||||
}
|
||||
self.currentProcess = nil
|
||||
|
||||
guard self.currentToken == token else {
|
||||
throw SynthesizeError.canceled
|
||||
}
|
||||
|
||||
let audioData: Data
|
||||
do {
|
||||
let audio = try await model.generateAudio(
|
||||
text: trimmed,
|
||||
voice: voicePreset,
|
||||
language: language)
|
||||
audioData = Self.makeWavData(
|
||||
samples: audio,
|
||||
sampleRate: Double(model.sampleRateValue()))
|
||||
} catch {
|
||||
guard status == 0 else {
|
||||
let errorText = Self.readPipe(stderr)
|
||||
self.logger.error(
|
||||
"talk mlx generation failed: \(error.localizedDescription, privacy: .public)")
|
||||
"talk mlx helper failed status=\(status, privacy: .public): \(errorText, privacy: .public)")
|
||||
throw SynthesizeError.audioGenerationFailed
|
||||
}
|
||||
|
||||
guard self.currentToken == token else {
|
||||
throw SynthesizeError.canceled
|
||||
do {
|
||||
return try Data(contentsOf: outputURL)
|
||||
} catch {
|
||||
self.logger.error("talk mlx helper output missing: \(error.localizedDescription, privacy: .public)")
|
||||
throw SynthesizeError.audioGenerationFailed
|
||||
}
|
||||
return audioData
|
||||
}
|
||||
|
||||
private func loadModel(
|
||||
modelRepo: String,
|
||||
token: UUID) async throws -> any SpeechGenerationModel {
|
||||
if let model = self.model, self.modelRepo == modelRepo {
|
||||
return model
|
||||
private struct HelperInvocation {
|
||||
let executableURL: URL
|
||||
let argumentPrefix: [String]
|
||||
let displayName: String
|
||||
}
|
||||
|
||||
private static func helperInvocation() -> HelperInvocation {
|
||||
let fileManager = FileManager.default
|
||||
if let override = ProcessInfo.processInfo.environment["OPENCLAW_MLX_TTS_BIN"], !override.isEmpty {
|
||||
return HelperInvocation(
|
||||
executableURL: URL(fileURLWithPath: override),
|
||||
argumentPrefix: [],
|
||||
displayName: override)
|
||||
}
|
||||
|
||||
self.logger.info("talk mlx loading modelRepo=\(modelRepo, privacy: .public)")
|
||||
do {
|
||||
let model = try await TTS.loadModel(modelRepo: modelRepo)
|
||||
guard self.currentToken == token else {
|
||||
throw SynthesizeError.canceled
|
||||
if let executableDir = Bundle.main.executableURL?.deletingLastPathComponent() {
|
||||
let bundled = executableDir.appendingPathComponent("openclaw-mlx-tts")
|
||||
if fileManager.isExecutableFile(atPath: bundled.path) {
|
||||
return HelperInvocation(
|
||||
executableURL: bundled,
|
||||
argumentPrefix: [],
|
||||
displayName: bundled.path)
|
||||
}
|
||||
self.model = model
|
||||
self.modelRepo = modelRepo
|
||||
return model
|
||||
} catch is CancellationError {
|
||||
throw SynthesizeError.canceled
|
||||
} catch {
|
||||
self.logger.error(
|
||||
"talk mlx load failed: \(error.localizedDescription, privacy: .public)")
|
||||
throw SynthesizeError.modelLoadFailed(modelRepo)
|
||||
}
|
||||
|
||||
return HelperInvocation(
|
||||
executableURL: URL(fileURLWithPath: "/usr/bin/env"),
|
||||
argumentPrefix: ["openclaw-mlx-tts"],
|
||||
displayName: "openclaw-mlx-tts")
|
||||
}
|
||||
|
||||
private static func resolvedModelRepo(_ modelRepo: String?) -> String {
|
||||
@@ -99,80 +133,26 @@ final class TalkMLXSpeechSynthesizer {
|
||||
return trimmed.isEmpty ? Self.defaultModelRepo : trimmed
|
||||
}
|
||||
|
||||
private static func makeWavData(samples: [Float], sampleRate: Double) -> Data {
|
||||
let channels: UInt16 = 1
|
||||
let bitsPerSample: UInt16 = 16
|
||||
let blockAlign = channels * (bitsPerSample / 8)
|
||||
let sampleRateInt = UInt32(sampleRate.rounded())
|
||||
let byteRate = sampleRateInt * UInt32(blockAlign)
|
||||
let dataSize = UInt32(samples.count) * UInt32(blockAlign)
|
||||
|
||||
var data = Data(capacity: Int(44 + dataSize))
|
||||
data.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // RIFF
|
||||
data.appendLEUInt32(36 + dataSize)
|
||||
data.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // WAVE
|
||||
|
||||
data.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // fmt
|
||||
data.appendLEUInt32(16)
|
||||
data.appendLEUInt16(1)
|
||||
data.appendLEUInt16(channels)
|
||||
data.appendLEUInt32(sampleRateInt)
|
||||
data.appendLEUInt32(byteRate)
|
||||
data.appendLEUInt16(blockAlign)
|
||||
data.appendLEUInt16(bitsPerSample)
|
||||
|
||||
data.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // data
|
||||
data.appendLEUInt32(dataSize)
|
||||
|
||||
for sample in samples {
|
||||
let clamped = max(-1.0, min(1.0, sample))
|
||||
let scaled = Int16((clamped * Float(Int16.max)).rounded())
|
||||
data.appendLEInt16(scaled)
|
||||
private static func run(_ process: Process) async throws -> Int32 {
|
||||
try await withCheckedThrowingContinuation { continuation in
|
||||
process.terminationHandler = { process in
|
||||
continuation.resume(returning: process.terminationStatus)
|
||||
}
|
||||
do {
|
||||
try process.run()
|
||||
} catch {
|
||||
continuation.resume(throwing: error)
|
||||
}
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
private static func readPipe(_ pipe: Pipe) -> String {
|
||||
let data = (try? pipe.fileHandleForReading.readToEnd()) ?? Data()
|
||||
let text = String(data: data, encoding: .utf8) ?? ""
|
||||
return text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
}
|
||||
|
||||
extension TalkMLXSpeechSynthesizer: @unchecked Sendable {}
|
||||
|
||||
private struct UncheckedSpeechModel {
|
||||
let raw: any SpeechGenerationModel
|
||||
|
||||
func sampleRateValue() -> Int {
|
||||
raw.sampleRate
|
||||
}
|
||||
|
||||
func generateAudio(
|
||||
text: String,
|
||||
voice: String?,
|
||||
language: String?) async throws -> [Float] {
|
||||
let generatedAudio = try await raw.generate(
|
||||
text: text,
|
||||
voice: voice,
|
||||
refAudio: nil,
|
||||
refText: nil,
|
||||
language: language)
|
||||
return generatedAudio.asArray(Float.self)
|
||||
}
|
||||
}
|
||||
|
||||
extension UncheckedSpeechModel: @unchecked Sendable {}
|
||||
|
||||
extension Data {
|
||||
fileprivate mutating func appendLEUInt16(_ value: UInt16) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
|
||||
fileprivate mutating func appendLEUInt32(_ value: UInt32) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
|
||||
fileprivate mutating func appendLEInt16(_ value: Int16) {
|
||||
var littleEndian = value.littleEndian
|
||||
Swift.withUnsafeBytes(of: &littleEndian) { append(contentsOf: $0) }
|
||||
}
|
||||
}
|
||||
|
||||
// swiftformat:enable wrap wrapMultilineStatementBraces trailingCommas redundantSelf extensionAccessControl
|
||||
|
||||
@@ -252,6 +252,12 @@ if [ -f "$APP_BUNDLE/Contents/MacOS/OpenClaw" ]; then
|
||||
echo "Signing main binary"; sign_item "$APP_BUNDLE/Contents/MacOS/OpenClaw" "$APP_ENTITLEMENTS"
|
||||
fi
|
||||
|
||||
# Sign bundled helper binaries before signing the app bundle.
|
||||
MLX_TTS_HELPER="$APP_BUNDLE/Contents/MacOS/openclaw-mlx-tts"
|
||||
if [ -f "$MLX_TTS_HELPER" ]; then
|
||||
echo "Signing MLX TTS helper"; sign_item "$MLX_TTS_HELPER" "$APP_ENTITLEMENTS"
|
||||
fi
|
||||
|
||||
# Sign Sparkle deeply if present
|
||||
SPARKLE="$APP_BUNDLE/Contents/Frameworks/Sparkle.framework"
|
||||
if [ -d "$SPARKLE" ]; then
|
||||
|
||||
@@ -8,6 +8,9 @@ ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
||||
APP_ROOT="$ROOT_DIR/dist/OpenClaw.app"
|
||||
BUILD_ROOT="$ROOT_DIR/apps/macos/.build"
|
||||
PRODUCT="OpenClaw"
|
||||
MLX_TTS_HELPER_PRODUCT="openclaw-mlx-tts"
|
||||
MLX_TTS_HELPER_ROOT="$ROOT_DIR/apps/macos-mlx-tts"
|
||||
MLX_TTS_HELPER_BUILD_ROOT="$MLX_TTS_HELPER_ROOT/.build"
|
||||
BUNDLE_ID="${BUNDLE_ID:-ai.openclaw.mac.debug}"
|
||||
PKG_VERSION="$(cd "$ROOT_DIR" && node -p "require('./package.json').version" 2>/dev/null || echo "0.0.0")"
|
||||
BUILD_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
@@ -49,6 +52,14 @@ bin_for_arch() {
|
||||
echo "$(build_path_for_arch "$1")/$BUILD_CONFIG/$PRODUCT"
|
||||
}
|
||||
|
||||
helper_build_path_for_arch() {
|
||||
echo "$MLX_TTS_HELPER_BUILD_ROOT/$1"
|
||||
}
|
||||
|
||||
helper_bin_for_arch() {
|
||||
echo "$(helper_build_path_for_arch "$1")/$BUILD_CONFIG/$MLX_TTS_HELPER_PRODUCT"
|
||||
}
|
||||
|
||||
sparkle_framework_for_arch() {
|
||||
echo "$(build_path_for_arch "$1")/$BUILD_CONFIG/Sparkle.framework"
|
||||
}
|
||||
@@ -159,6 +170,7 @@ echo "🔨 Building $PRODUCT ($BUILD_CONFIG) [${BUILD_ARCHS[*]}]"
|
||||
for arch in "${BUILD_ARCHS[@]}"; do
|
||||
BUILD_PATH="$(build_path_for_arch "$arch")"
|
||||
swift build -c "$BUILD_CONFIG" --product "$PRODUCT" --build-path "$BUILD_PATH" --arch "$arch" -Xlinker -rpath -Xlinker @executable_path/../Frameworks
|
||||
swift build --package-path "$MLX_TTS_HELPER_ROOT" -c "$BUILD_CONFIG" --product "$MLX_TTS_HELPER_PRODUCT" --build-path "$(helper_build_path_for_arch "$arch")" --arch "$arch"
|
||||
done
|
||||
|
||||
BIN_PRIMARY="$(bin_for_arch "$PRIMARY_ARCH")"
|
||||
@@ -204,6 +216,18 @@ chmod +x "$APP_ROOT/Contents/MacOS/OpenClaw"
|
||||
# SwiftPM outputs ad-hoc signed binaries; strip the signature before install_name_tool to avoid warnings.
|
||||
/usr/bin/codesign --remove-signature "$APP_ROOT/Contents/MacOS/OpenClaw" 2>/dev/null || true
|
||||
|
||||
echo "🚚 Copying MLX TTS helper"
|
||||
cp "$(helper_bin_for_arch "$PRIMARY_ARCH")" "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT"
|
||||
if [[ "${#BUILD_ARCHS[@]}" -gt 1 ]]; then
|
||||
HELPER_BIN_INPUTS=()
|
||||
for arch in "${BUILD_ARCHS[@]}"; do
|
||||
HELPER_BIN_INPUTS+=("$(helper_bin_for_arch "$arch")")
|
||||
done
|
||||
/usr/bin/lipo -create "${HELPER_BIN_INPUTS[@]}" -output "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT"
|
||||
fi
|
||||
chmod +x "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT"
|
||||
/usr/bin/codesign --remove-signature "$APP_ROOT/Contents/MacOS/$MLX_TTS_HELPER_PRODUCT" 2>/dev/null || true
|
||||
|
||||
SPARKLE_FRAMEWORK_PRIMARY="$(sparkle_framework_for_arch "$PRIMARY_ARCH")"
|
||||
if [ -d "$SPARKLE_FRAMEWORK_PRIMARY" ]; then
|
||||
echo "✨ Embedding Sparkle.framework"
|
||||
|
||||
Reference in New Issue
Block a user