mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
fix(iOS): persist PCM fallback only on explicit API format rejection
This commit is contained in:
committed by
mbelinky
parent
4eb0fe69ca
commit
ccd55d1480
@@ -7,6 +7,23 @@ import Observation
|
||||
import OSLog
|
||||
import Speech
|
||||
|
||||
private final class StreamFailureBox: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var valueInternal: Error?
|
||||
|
||||
func set(_ error: Error) {
|
||||
self.lock.lock()
|
||||
self.valueInternal = error
|
||||
self.lock.unlock()
|
||||
}
|
||||
|
||||
var value: Error? {
|
||||
self.lock.lock()
|
||||
defer { self.lock.unlock() }
|
||||
return self.valueInternal
|
||||
}
|
||||
}
|
||||
|
||||
// This file intentionally centralizes talk mode state + behavior.
|
||||
// It's large, and splitting would force `private` -> `fileprivate` across many members.
|
||||
// We'll refactor into smaller files when the surface stabilizes.
|
||||
@@ -1040,7 +1057,7 @@ final class TalkModeManager: NSObject {
|
||||
let request = makeRequest(outputFormat: outputFormat)
|
||||
|
||||
let client = ElevenLabsTTSClient(apiKey: apiKey)
|
||||
let stream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
let rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
|
||||
if self.interruptOnSpeech {
|
||||
do {
|
||||
@@ -1055,12 +1072,16 @@ final class TalkModeManager: NSObject {
|
||||
let sampleRate = TalkTTSValidation.pcmSampleRate(from: outputFormat)
|
||||
let result: StreamingPlaybackResult
|
||||
if let sampleRate {
|
||||
let streamFailure = StreamFailureBox()
|
||||
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
|
||||
self.lastPlaybackWasPCM = true
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
self.pcmFormatUnavailable = true
|
||||
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
|
||||
self.pcmFormatUnavailable = true
|
||||
}
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
voiceId: voiceId,
|
||||
@@ -1070,7 +1091,7 @@ final class TalkModeManager: NSObject {
|
||||
result = playback
|
||||
} else {
|
||||
self.lastPlaybackWasPCM = false
|
||||
result = await self.mp3Player.play(stream: stream)
|
||||
result = await self.mp3Player.play(stream: rawStream)
|
||||
}
|
||||
let duration = Date().timeIntervalSince(started)
|
||||
self.logger.info("elevenlabs stream finished=\(result.finished, privacy: .public) dur=\(duration, privacy: .public)s")
|
||||
@@ -1545,6 +1566,39 @@ final class TalkModeManager: NSObject {
|
||||
self.pcmFormatUnavailable ? "mp3_44100_128" : "pcm_44100"
|
||||
}
|
||||
|
||||
private static func monitorStreamFailures(
|
||||
_ stream: AsyncThrowingStream<Data, Error>,
|
||||
failureBox: StreamFailureBox
|
||||
) -> AsyncThrowingStream<Data, Error>
|
||||
{
|
||||
AsyncThrowingStream { continuation in
|
||||
let task = Task {
|
||||
do {
|
||||
for try await chunk in stream {
|
||||
continuation.yield(chunk)
|
||||
}
|
||||
continuation.finish()
|
||||
} catch {
|
||||
failureBox.set(error)
|
||||
continuation.finish(throwing: error)
|
||||
}
|
||||
}
|
||||
continuation.onTermination = { _ in
|
||||
task.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static func isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
|
||||
guard let error = error as NSError? else { return false }
|
||||
guard error.domain == "ElevenLabsTTS", error.code >= 400 else { return false }
|
||||
let message = (error.userInfo[NSLocalizedDescriptionKey] as? String ?? error.localizedDescription).lowercased()
|
||||
return message.contains("output_format")
|
||||
|| message.contains("pcm_")
|
||||
|| message.contains("pcm ")
|
||||
|| message.contains("subscription_required")
|
||||
}
|
||||
|
||||
private static func makeBufferedAudioStream(chunks: [Data]) -> AsyncThrowingStream<Data, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
for chunk in chunks {
|
||||
@@ -1586,21 +1640,25 @@ final class TalkModeManager: NSObject {
|
||||
text: text,
|
||||
context: context,
|
||||
outputFormat: context.outputFormat)
|
||||
let stream: AsyncThrowingStream<Data, Error>
|
||||
let rawStream: AsyncThrowingStream<Data, Error>
|
||||
if let prefetchedAudio, !prefetchedAudio.chunks.isEmpty {
|
||||
stream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
|
||||
rawStream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
|
||||
} else {
|
||||
stream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
}
|
||||
let playbackFormat = prefetchedAudio?.outputFormat ?? context.outputFormat
|
||||
let sampleRate = TalkTTSValidation.pcmSampleRate(from: playbackFormat)
|
||||
let result: StreamingPlaybackResult
|
||||
if let sampleRate {
|
||||
let streamFailure = StreamFailureBox()
|
||||
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
|
||||
self.lastPlaybackWasPCM = true
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
self.pcmFormatUnavailable = true
|
||||
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
|
||||
self.pcmFormatUnavailable = true
|
||||
}
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
@@ -1614,7 +1672,7 @@ final class TalkModeManager: NSObject {
|
||||
result = playback
|
||||
} else {
|
||||
self.lastPlaybackWasPCM = false
|
||||
result = await self.mp3Player.play(stream: stream)
|
||||
result = await self.mp3Player.play(stream: rawStream)
|
||||
}
|
||||
if !result.finished, let interruptedAt = result.interruptedAt {
|
||||
self.lastInterruptedAtSeconds = interruptedAt
|
||||
@@ -2118,6 +2176,10 @@ private final class AudioTapDiagnostics: @unchecked Sendable {
|
||||
|
||||
#if DEBUG
|
||||
extension TalkModeManager {
|
||||
static func _test_isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
|
||||
self.isPCMFormatRejectedByAPI(error)
|
||||
}
|
||||
|
||||
func _test_seedTranscript(_ transcript: String) {
|
||||
self.lastTranscript = transcript
|
||||
self.lastHeard = Date()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import Foundation
|
||||
import Testing
|
||||
@testable import OpenClaw
|
||||
|
||||
@@ -28,4 +29,22 @@ import Testing
|
||||
let selection = TalkModeManager.selectTalkProviderConfig(talk)
|
||||
#expect(selection == nil)
|
||||
}
|
||||
|
||||
@Test func detectsPCMFormatRejectionFromElevenLabsError() {
|
||||
let error = NSError(
|
||||
domain: "ElevenLabsTTS",
|
||||
code: 403,
|
||||
userInfo: [
|
||||
NSLocalizedDescriptionKey: "ElevenLabs failed: 403 subscription_required output_format=pcm_44100",
|
||||
])
|
||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error))
|
||||
}
|
||||
|
||||
@Test func ignoresGenericPlaybackFailuresForPCMFormatRejection() {
|
||||
let error = NSError(
|
||||
domain: "StreamingAudio",
|
||||
code: -1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
|
||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user