From a36ccf4156f33a5c5bc8134e2a9d67fc6256fe53 Mon Sep 17 00:00:00 2001 From: Mariano <132747814+mbelinky@users.noreply.github.com> Date: Tue, 3 Mar 2026 22:36:40 +0000 Subject: [PATCH] fix(ios): start incremental speech at soft boundaries (#33305) Merged via squash. Prepared head SHA: d1acf723176f8e9d89f8c229610c9400ab661ec7 Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Reviewed-by: @mbelinky --- CHANGELOG.md | 1 + apps/ios/Sources/Voice/TalkModeManager.swift | 11 ++++++-- ...TalkModeIncrementalSpeechBufferTests.swift | 28 +++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f8bf6ee57..6c1f55ea2f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai - Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan. - iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman. +- iOS/Talk incremental speech pacing: allow long punctuation-free assistant chunks to start speaking at safe whitespace boundaries so voice responses begin sooner instead of waiting for terminal punctuation. (#33305) thanks @mbelinky; original implementation by @ngutman. - Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd. - Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin. - Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow. diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index 01670d12980..921d3f8b182 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -1682,6 +1682,8 @@ final class TalkModeManager: NSObject { } private struct IncrementalSpeechBuffer { + private static let softBoundaryMinChars = 72 + private(set) var latestText: String = "" private(set) var directive: TalkDirective? private var spokenOffset: Int = 0 @@ -1774,8 +1776,9 @@ private struct IncrementalSpeechBuffer { } if !inCodeBlock { - buffer.append(chars[idx]) - if Self.isBoundary(chars[idx]) { + let currentChar = chars[idx] + buffer.append(currentChar) + if Self.isBoundary(currentChar) || Self.isSoftBoundary(currentChar, bufferedChars: buffer.count) { lastBoundary = idx + 1 bufferAtBoundary = buffer inCodeBlockAtBoundary = inCodeBlock @@ -1802,6 +1805,10 @@ private struct IncrementalSpeechBuffer { private static func isBoundary(_ ch: Character) -> Bool { ch == "." || ch == "!" || ch == "?" || ch == "\n" } + + private static func isSoftBoundary(_ ch: Character, bufferedChars: Int) -> Bool { + bufferedChars >= Self.softBoundaryMinChars && ch.isWhitespace + } } extension TalkModeManager { diff --git a/apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift b/apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift new file mode 100644 index 00000000000..9ca88618166 --- /dev/null +++ b/apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift @@ -0,0 +1,28 @@ +import Testing +@testable import OpenClaw + +@MainActor +@Suite struct TalkModeIncrementalSpeechBufferTests { + @Test func emitsSoftBoundaryBeforeTerminalPunctuation() { + let manager = TalkModeManager(allowSimulatorCapture: true) + manager._test_incrementalReset() + + let partial = + "We start speaking earlier by splitting this long stream chunk at a whitespace boundary before punctuation arrives" + let segments = manager._test_incrementalIngest(partial, isFinal: false) + + #expect(segments.count == 1) + #expect(segments[0].count >= 72) + #expect(segments[0].count < partial.count) + } + + @Test func keepsShortChunkBufferedWithoutPunctuation() { + let manager = TalkModeManager(allowSimulatorCapture: true) + manager._test_incrementalReset() + + let short = "short chunk without punctuation" + let segments = manager._test_incrementalIngest(short, isFinal: false) + + #expect(segments.isEmpty) + } +}