fix(iOS): persist PCM fallback only on explicit API format rejection

This commit is contained in:
Mariano Belinky
2026-03-03 14:35:15 +01:00
committed by mbelinky
parent 4eb0fe69ca
commit ccd55d1480
2 changed files with 89 additions and 8 deletions

View File

@@ -7,6 +7,23 @@ import Observation
import OSLog
import Speech
private final class StreamFailureBox: @unchecked Sendable {
private let lock = NSLock()
private var valueInternal: Error?
func set(_ error: Error) {
self.lock.lock()
self.valueInternal = error
self.lock.unlock()
}
var value: Error? {
self.lock.lock()
defer { self.lock.unlock() }
return self.valueInternal
}
}
// This file intentionally centralizes talk mode state + behavior.
// It's large, and splitting would force `private` -> `fileprivate` across many members.
// We'll refactor into smaller files when the surface stabilizes.
@@ -1040,7 +1057,7 @@ final class TalkModeManager: NSObject {
let request = makeRequest(outputFormat: outputFormat)
let client = ElevenLabsTTSClient(apiKey: apiKey)
let stream = client.streamSynthesize(voiceId: voiceId, request: request)
let rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
if self.interruptOnSpeech {
do {
@@ -1055,12 +1072,16 @@ final class TalkModeManager: NSObject {
let sampleRate = TalkTTSValidation.pcmSampleRate(from: outputFormat)
let result: StreamingPlaybackResult
if let sampleRate {
let streamFailure = StreamFailureBox()
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
self.lastPlaybackWasPCM = true
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
if !playback.finished, playback.interruptedAt == nil {
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
self.logger.warning("pcm playback failed; retrying mp3")
self.pcmFormatUnavailable = true
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
self.pcmFormatUnavailable = true
}
self.lastPlaybackWasPCM = false
let mp3Stream = client.streamSynthesize(
voiceId: voiceId,
@@ -1070,7 +1091,7 @@ final class TalkModeManager: NSObject {
result = playback
} else {
self.lastPlaybackWasPCM = false
result = await self.mp3Player.play(stream: stream)
result = await self.mp3Player.play(stream: rawStream)
}
let duration = Date().timeIntervalSince(started)
self.logger.info("elevenlabs stream finished=\(result.finished, privacy: .public) dur=\(duration, privacy: .public)s")
@@ -1545,6 +1566,39 @@ final class TalkModeManager: NSObject {
self.pcmFormatUnavailable ? "mp3_44100_128" : "pcm_44100"
}
private static func monitorStreamFailures(
_ stream: AsyncThrowingStream<Data, Error>,
failureBox: StreamFailureBox
) -> AsyncThrowingStream<Data, Error>
{
AsyncThrowingStream { continuation in
let task = Task {
do {
for try await chunk in stream {
continuation.yield(chunk)
}
continuation.finish()
} catch {
failureBox.set(error)
continuation.finish(throwing: error)
}
}
continuation.onTermination = { _ in
task.cancel()
}
}
}
private static func isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
guard let error = error as NSError? else { return false }
guard error.domain == "ElevenLabsTTS", error.code >= 400 else { return false }
let message = (error.userInfo[NSLocalizedDescriptionKey] as? String ?? error.localizedDescription).lowercased()
return message.contains("output_format")
|| message.contains("pcm_")
|| message.contains("pcm ")
|| message.contains("subscription_required")
}
private static func makeBufferedAudioStream(chunks: [Data]) -> AsyncThrowingStream<Data, Error> {
AsyncThrowingStream { continuation in
for chunk in chunks {
@@ -1586,21 +1640,25 @@ final class TalkModeManager: NSObject {
text: text,
context: context,
outputFormat: context.outputFormat)
let stream: AsyncThrowingStream<Data, Error>
let rawStream: AsyncThrowingStream<Data, Error>
if let prefetchedAudio, !prefetchedAudio.chunks.isEmpty {
stream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
rawStream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
} else {
stream = client.streamSynthesize(voiceId: voiceId, request: request)
rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
}
let playbackFormat = prefetchedAudio?.outputFormat ?? context.outputFormat
let sampleRate = TalkTTSValidation.pcmSampleRate(from: playbackFormat)
let result: StreamingPlaybackResult
if let sampleRate {
let streamFailure = StreamFailureBox()
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
self.lastPlaybackWasPCM = true
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
if !playback.finished, playback.interruptedAt == nil {
self.logger.warning("pcm playback failed; retrying mp3")
self.pcmFormatUnavailable = true
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
self.pcmFormatUnavailable = true
}
self.lastPlaybackWasPCM = false
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
let mp3Stream = client.streamSynthesize(
@@ -1614,7 +1672,7 @@ final class TalkModeManager: NSObject {
result = playback
} else {
self.lastPlaybackWasPCM = false
result = await self.mp3Player.play(stream: stream)
result = await self.mp3Player.play(stream: rawStream)
}
if !result.finished, let interruptedAt = result.interruptedAt {
self.lastInterruptedAtSeconds = interruptedAt
@@ -2118,6 +2176,10 @@ private final class AudioTapDiagnostics: @unchecked Sendable {
#if DEBUG
extension TalkModeManager {
static func _test_isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
self.isPCMFormatRejectedByAPI(error)
}
func _test_seedTranscript(_ transcript: String) {
self.lastTranscript = transcript
self.lastHeard = Date()

View File

@@ -1,3 +1,4 @@
import Foundation
import Testing
@testable import OpenClaw
@@ -28,4 +29,22 @@ import Testing
let selection = TalkModeManager.selectTalkProviderConfig(talk)
#expect(selection == nil)
}
@Test func detectsPCMFormatRejectionFromElevenLabsError() {
let error = NSError(
domain: "ElevenLabsTTS",
code: 403,
userInfo: [
NSLocalizedDescriptionKey: "ElevenLabs failed: 403 subscription_required output_format=pcm_44100",
])
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error))
}
@Test func ignoresGenericPlaybackFailuresForPCMFormatRejection() {
let error = NSError(
domain: "StreamingAudio",
code: -1,
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
}
}