diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d018dd872b..94f8bf6ee57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan. +- iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman. - Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd. - Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin. - Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow. diff --git a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkSystemSpeechSynthesizer.swift b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkSystemSpeechSynthesizer.swift index 4cfc536da87..16dd9b9d968 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkSystemSpeechSynthesizer.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawKit/TalkSystemSpeechSynthesizer.swift @@ -12,6 +12,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject { private let synth = AVSpeechSynthesizer() private var speakContinuation: CheckedContinuation? private var currentUtterance: AVSpeechUtterance? + private var didStartCallback: (() -> Void)? private var currentToken = UUID() private var watchdog: Task? @@ -26,17 +27,23 @@ public final class TalkSystemSpeechSynthesizer: NSObject { self.currentToken = UUID() self.watchdog?.cancel() self.watchdog = nil + self.didStartCallback = nil self.synth.stopSpeaking(at: .immediate) self.finishCurrent(with: SpeakError.canceled) } - public func speak(text: String, language: String? = nil) async throws { + public func speak( + text: String, + language: String? = nil, + onStart: (() -> Void)? = nil + ) async throws { let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { return } self.stop() let token = UUID() self.currentToken = token + self.didStartCallback = onStart let utterance = AVSpeechUtterance(string: trimmed) if let language, let voice = AVSpeechSynthesisVoice(language: language) { @@ -76,8 +83,13 @@ public final class TalkSystemSpeechSynthesizer: NSObject { } } - private func handleFinish(error: Error?) { - guard self.currentUtterance != nil else { return } + private func matchesCurrentUtterance(_ utteranceID: ObjectIdentifier) -> Bool { + guard let currentUtterance = self.currentUtterance else { return false } + return ObjectIdentifier(currentUtterance) == utteranceID + } + + private func handleFinish(utteranceID: ObjectIdentifier, error: Error?) { + guard self.matchesCurrentUtterance(utteranceID) else { return } self.watchdog?.cancel() self.watchdog = nil self.finishCurrent(with: error) @@ -85,6 +97,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject { private func finishCurrent(with error: Error?) { self.currentUtterance = nil + self.didStartCallback = nil let cont = self.speakContinuation self.speakContinuation = nil if let error { @@ -96,12 +109,26 @@ public final class TalkSystemSpeechSynthesizer: NSObject { } extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate { + public nonisolated func speechSynthesizer( + _ synthesizer: AVSpeechSynthesizer, + didStart utterance: AVSpeechUtterance) + { + let utteranceID = ObjectIdentifier(utterance) + Task { @MainActor in + guard self.matchesCurrentUtterance(utteranceID) else { return } + let callback = self.didStartCallback + self.didStartCallback = nil + callback?() + } + } + public nonisolated func speechSynthesizer( _ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) { + let utteranceID = ObjectIdentifier(utterance) Task { @MainActor in - self.handleFinish(error: nil) + self.handleFinish(utteranceID: utteranceID, error: nil) } } @@ -109,8 +136,9 @@ extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate { _ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) { + let utteranceID = ObjectIdentifier(utterance) Task { @MainActor in - self.handleFinish(error: SpeakError.canceled) + self.handleFinish(utteranceID: utteranceID, error: SpeakError.canceled) } } }