fix(ios): start incremental speech at soft boundaries (#33305)

Merged via squash. Prepared head SHA: d1acf72317 Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com> Reviewed-by: @mbelinky
2026-03-12 07:20:45 +00:00 · 2026-03-03 22:36:40 +00:00
parent 22e33ddda9
commit a36ccf4156
3 changed files with 38 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai

 - Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
 - iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman.
+- iOS/Talk incremental speech pacing: allow long punctuation-free assistant chunks to start speaking at safe whitespace boundaries so voice responses begin sooner instead of waiting for terminal punctuation. (#33305) thanks @mbelinky; original implementation by @ngutman.
 - Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd.
 - Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin.
 - Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow.
--- a/apps/ios/Sources/Voice/TalkModeManager.swift
+++ b/apps/ios/Sources/Voice/TalkModeManager.swift
@@ -1682,6 +1682,8 @@ final class TalkModeManager: NSObject {
 }

 private struct IncrementalSpeechBuffer {
+    private static let softBoundaryMinChars = 72
+
    private(set) var latestText: String = ""
    private(set) var directive: TalkDirective?
    private var spokenOffset: Int = 0
@@ -1774,8 +1776,9 @@ private struct IncrementalSpeechBuffer {
            }

            if !inCodeBlock {
-                buffer.append(chars[idx])
-                if Self.isBoundary(chars[idx]) {
+                let currentChar = chars[idx]
+                buffer.append(currentChar)
+                if Self.isBoundary(currentChar) || Self.isSoftBoundary(currentChar, bufferedChars: buffer.count) {
                    lastBoundary = idx + 1
                    bufferAtBoundary = buffer
                    inCodeBlockAtBoundary = inCodeBlock
@@ -1802,6 +1805,10 @@ private struct IncrementalSpeechBuffer {
    private static func isBoundary(_ ch: Character) -> Bool {
        ch == "." || ch == "!" || ch == "?" || ch == "\n"
    }
+
+    private static func isSoftBoundary(_ ch: Character, bufferedChars: Int) -> Bool {
+        bufferedChars >= Self.softBoundaryMinChars && ch.isWhitespace
+    }
 }

 extension TalkModeManager {
--- a/apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift
+++ b/apps/ios/Tests/TalkModeIncrementalSpeechBufferTests.swift
@@ -0,0 +1,28 @@
+import Testing
+@testable import OpenClaw
+
+@MainActor
+@Suite struct TalkModeIncrementalSpeechBufferTests {
+    @Test func emitsSoftBoundaryBeforeTerminalPunctuation() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        manager._test_incrementalReset()
+
+        let partial =
+            "We start speaking earlier by splitting this long stream chunk at a whitespace boundary before punctuation arrives"
+        let segments = manager._test_incrementalIngest(partial, isFinal: false)
+
+        #expect(segments.count == 1)
+        #expect(segments[0].count >= 72)
+        #expect(segments[0].count < partial.count)
+    }
+
+    @Test func keepsShortChunkBufferedWithoutPunctuation() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        manager._test_incrementalReset()
+
+        let short = "short chunk without punctuation"
+        let segments = manager._test_incrementalIngest(short, isFinal: false)
+
+        #expect(segments.isEmpty)
+    }
+}