mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-12 07:20:45 +00:00
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: f77e3d7644
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
This commit is contained in:
@@ -26,6 +26,7 @@ Docs: https://docs.openclaw.ai
|
||||
- iOS/Gateway keychain hardening: move gateway metadata and TLS fingerprints to device keychain storage with safer migration behavior and rollback-safe writes to reduce credential loss risk during upgrades. (#33029) thanks @mbelinky.
|
||||
- iOS/Concurrency stability: replace risky shared-state access in camera and gateway connection paths with lock-protected access patterns to reduce crash risk under load. (#33241) thanks @mbelinky.
|
||||
- iOS/Security guardrails: limit production API-key sourcing to app config and make deep-link confirmation prompts safer by coalescing queued requests instead of silently dropping them. (#33031) thanks @mbelinky.
|
||||
- iOS/TTS playback fallback: keep voice playback resilient by switching from PCM to MP3 when provider format support is unavailable, while avoiding sticky fallback on generic local playback errors. (#33032) thanks @mbelinky.
|
||||
- Telegram/multi-account default routing clarity: warn only for ambiguous (2+) account setups without an explicit default, add `openclaw doctor` warnings for missing/invalid multi-account defaults across channels, and document explicit-default guidance for channel routing and Telegram config. (#32544) thanks @Sid-Qin.
|
||||
- Telegram/plugin outbound hook parity: run `message_sending` + `message_sent` in Telegram reply delivery, include reply-path hook metadata (`mediaUrls`, `threadId`), and report `message_sent.success=false` when hooks blank text and no outbound message is delivered. (#32649) Thanks @KimGLee.
|
||||
- Agents/Skills runtime loading: propagate run config into embedded attempt and compaction skill-entry loading so explicitly enabled bundled companion skills are discovered consistently when skill snapshots do not already provide resolved entries. Thanks @gumadeiras.
|
||||
|
||||
@@ -7,6 +7,23 @@ import Observation
|
||||
import OSLog
|
||||
import Speech
|
||||
|
||||
private final class StreamFailureBox: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
private var valueInternal: Error?
|
||||
|
||||
func set(_ error: Error) {
|
||||
self.lock.lock()
|
||||
self.valueInternal = error
|
||||
self.lock.unlock()
|
||||
}
|
||||
|
||||
var value: Error? {
|
||||
self.lock.lock()
|
||||
defer { self.lock.unlock() }
|
||||
return self.valueInternal
|
||||
}
|
||||
}
|
||||
|
||||
// This file intentionally centralizes talk mode state + behavior.
|
||||
// It's large, and splitting would force `private` -> `fileprivate` across many members.
|
||||
// We'll refactor into smaller files when the surface stabilizes.
|
||||
@@ -72,6 +89,9 @@ final class TalkModeManager: NSObject {
|
||||
private var mainSessionKey: String = "main"
|
||||
private var fallbackVoiceId: String?
|
||||
private var lastPlaybackWasPCM: Bool = false
|
||||
/// Set when the ElevenLabs API rejects PCM format (e.g. 403 subscription_required).
|
||||
/// Once set, all subsequent requests in this session use MP3 instead of re-trying PCM.
|
||||
private var pcmFormatUnavailable: Bool = false
|
||||
var pcmPlayer: PCMStreamingAudioPlaying = PCMStreamingAudioPlayer.shared
|
||||
var mp3Player: StreamingAudioPlaying = StreamingAudioPlayer.shared
|
||||
|
||||
@@ -1007,7 +1027,8 @@ final class TalkModeManager: NSObject {
|
||||
let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(requestedOutputFormat ?? "pcm_44100")
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(
|
||||
requestedOutputFormat ?? self.effectiveDefaultOutputFormat)
|
||||
if outputFormat == nil, let requestedOutputFormat {
|
||||
self.logger.warning(
|
||||
"talk output_format unsupported for local playback: \(requestedOutputFormat, privacy: .public)")
|
||||
@@ -1036,7 +1057,7 @@ final class TalkModeManager: NSObject {
|
||||
let request = makeRequest(outputFormat: outputFormat)
|
||||
|
||||
let client = ElevenLabsTTSClient(apiKey: apiKey)
|
||||
let stream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
let rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
|
||||
if self.interruptOnSpeech {
|
||||
do {
|
||||
@@ -1051,11 +1072,16 @@ final class TalkModeManager: NSObject {
|
||||
let sampleRate = TalkTTSValidation.pcmSampleRate(from: outputFormat)
|
||||
let result: StreamingPlaybackResult
|
||||
if let sampleRate {
|
||||
let streamFailure = StreamFailureBox()
|
||||
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
|
||||
self.lastPlaybackWasPCM = true
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
|
||||
self.pcmFormatUnavailable = true
|
||||
}
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
voiceId: voiceId,
|
||||
@@ -1065,7 +1091,7 @@ final class TalkModeManager: NSObject {
|
||||
result = playback
|
||||
} else {
|
||||
self.lastPlaybackWasPCM = false
|
||||
result = await self.mp3Player.play(stream: stream)
|
||||
result = await self.mp3Player.play(stream: rawStream)
|
||||
}
|
||||
let duration = Date().timeIntervalSince(started)
|
||||
self.logger.info("elevenlabs stream finished=\(result.finished, privacy: .public) dur=\(duration, privacy: .public)s")
|
||||
@@ -1391,7 +1417,7 @@ final class TalkModeManager: NSObject {
|
||||
|
||||
private func resolveIncrementalPrefetchOutputFormat(context: IncrementalSpeechContext) -> String? {
|
||||
if TalkTTSValidation.pcmSampleRate(from: context.outputFormat) != nil {
|
||||
return ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
return ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
}
|
||||
return context.outputFormat
|
||||
}
|
||||
@@ -1480,7 +1506,8 @@ final class TalkModeManager: NSObject {
|
||||
let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(requestedOutputFormat ?? "pcm_44100")
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(
|
||||
requestedOutputFormat ?? self.effectiveDefaultOutputFormat)
|
||||
if outputFormat == nil, let requestedOutputFormat {
|
||||
self.logger.warning(
|
||||
"talk output_format unsupported for local playback: \(requestedOutputFormat, privacy: .public)")
|
||||
@@ -1534,6 +1561,44 @@ final class TalkModeManager: NSObject {
|
||||
latencyTier: TalkTTSValidation.validatedLatencyTier(context.directive?.latencyTier))
|
||||
}
|
||||
|
||||
/// Returns `mp3_44100_128` when the API has already rejected PCM, otherwise `pcm_44100`.
|
||||
private var effectiveDefaultOutputFormat: String {
|
||||
self.pcmFormatUnavailable ? "mp3_44100_128" : "pcm_44100"
|
||||
}
|
||||
|
||||
private static func monitorStreamFailures(
|
||||
_ stream: AsyncThrowingStream<Data, Error>,
|
||||
failureBox: StreamFailureBox
|
||||
) -> AsyncThrowingStream<Data, Error>
|
||||
{
|
||||
AsyncThrowingStream { continuation in
|
||||
let task = Task {
|
||||
do {
|
||||
for try await chunk in stream {
|
||||
continuation.yield(chunk)
|
||||
}
|
||||
continuation.finish()
|
||||
} catch {
|
||||
failureBox.set(error)
|
||||
continuation.finish(throwing: error)
|
||||
}
|
||||
}
|
||||
continuation.onTermination = { _ in
|
||||
task.cancel()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static func isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
|
||||
guard let error = error as NSError? else { return false }
|
||||
guard error.domain == "ElevenLabsTTS", error.code >= 400 else { return false }
|
||||
let message = (error.userInfo[NSLocalizedDescriptionKey] as? String ?? error.localizedDescription).lowercased()
|
||||
return message.contains("output_format")
|
||||
|| message.contains("pcm_")
|
||||
|| message.contains("pcm ")
|
||||
|| message.contains("subscription_required")
|
||||
}
|
||||
|
||||
private static func makeBufferedAudioStream(chunks: [Data]) -> AsyncThrowingStream<Data, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
for chunk in chunks {
|
||||
@@ -1575,22 +1640,27 @@ final class TalkModeManager: NSObject {
|
||||
text: text,
|
||||
context: context,
|
||||
outputFormat: context.outputFormat)
|
||||
let stream: AsyncThrowingStream<Data, Error>
|
||||
let rawStream: AsyncThrowingStream<Data, Error>
|
||||
if let prefetchedAudio, !prefetchedAudio.chunks.isEmpty {
|
||||
stream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
|
||||
rawStream = Self.makeBufferedAudioStream(chunks: prefetchedAudio.chunks)
|
||||
} else {
|
||||
stream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
rawStream = client.streamSynthesize(voiceId: voiceId, request: request)
|
||||
}
|
||||
let playbackFormat = prefetchedAudio?.outputFormat ?? context.outputFormat
|
||||
let sampleRate = TalkTTSValidation.pcmSampleRate(from: playbackFormat)
|
||||
let result: StreamingPlaybackResult
|
||||
if let sampleRate {
|
||||
let streamFailure = StreamFailureBox()
|
||||
let stream = Self.monitorStreamFailures(rawStream, failureBox: streamFailure)
|
||||
self.lastPlaybackWasPCM = true
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
if Self.isPCMFormatRejectedByAPI(streamFailure.value) {
|
||||
self.pcmFormatUnavailable = true
|
||||
}
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
voiceId: voiceId,
|
||||
request: self.makeIncrementalTTSRequest(
|
||||
@@ -1602,7 +1672,7 @@ final class TalkModeManager: NSObject {
|
||||
result = playback
|
||||
} else {
|
||||
self.lastPlaybackWasPCM = false
|
||||
result = await self.mp3Player.play(stream: stream)
|
||||
result = await self.mp3Player.play(stream: rawStream)
|
||||
}
|
||||
if !result.finished, let interruptedAt = result.interruptedAt {
|
||||
self.lastInterruptedAtSeconds = interruptedAt
|
||||
@@ -1926,6 +1996,7 @@ extension TalkModeManager {
|
||||
|
||||
func reloadConfig() async {
|
||||
guard let gateway else { return }
|
||||
self.pcmFormatUnavailable = false
|
||||
do {
|
||||
let res = try await gateway.request(
|
||||
method: "talk.config",
|
||||
@@ -2105,6 +2176,10 @@ private final class AudioTapDiagnostics: @unchecked Sendable {
|
||||
|
||||
#if DEBUG
|
||||
extension TalkModeManager {
|
||||
static func _test_isPCMFormatRejectedByAPI(_ error: Error?) -> Bool {
|
||||
self.isPCMFormatRejectedByAPI(error)
|
||||
}
|
||||
|
||||
func _test_seedTranscript(_ transcript: String) {
|
||||
self.lastTranscript = transcript
|
||||
self.lastHeard = Date()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import Foundation
|
||||
import Testing
|
||||
@testable import OpenClaw
|
||||
|
||||
@@ -28,4 +29,22 @@ import Testing
|
||||
let selection = TalkModeManager.selectTalkProviderConfig(talk)
|
||||
#expect(selection == nil)
|
||||
}
|
||||
|
||||
@Test func detectsPCMFormatRejectionFromElevenLabsError() {
|
||||
let error = NSError(
|
||||
domain: "ElevenLabsTTS",
|
||||
code: 403,
|
||||
userInfo: [
|
||||
NSLocalizedDescriptionKey: "ElevenLabs failed: 403 subscription_required output_format=pcm_44100",
|
||||
])
|
||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error))
|
||||
}
|
||||
|
||||
@Test func ignoresGenericPlaybackFailuresForPCMFormatRejection() {
|
||||
let error = NSError(
|
||||
domain: "StreamingAudio",
|
||||
code: -1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "queue enqueue failed"])
|
||||
#expect(TalkModeManager._test_isPCMFormatRejectedByAPI(error) == false)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user