fix(ios): start incremental speech at soft boundaries (#33305)

Merged via squash.

Prepared head SHA: d1acf72317
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
This commit is contained in:
Mariano
2026-03-03 22:36:40 +00:00
committed by GitHub
parent 22e33ddda9
commit a36ccf4156
3 changed files with 38 additions and 2 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
- Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
- iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman.
- iOS/Talk incremental speech pacing: allow long punctuation-free assistant chunks to start speaking at safe whitespace boundaries so voice responses begin sooner instead of waiting for terminal punctuation. (#33305) thanks @mbelinky; original implementation by @ngutman.
- Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd.
- Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin.
- Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow.

View File

@@ -1682,6 +1682,8 @@ final class TalkModeManager: NSObject {
}
private struct IncrementalSpeechBuffer {
private static let softBoundaryMinChars = 72
private(set) var latestText: String = ""
private(set) var directive: TalkDirective?
private var spokenOffset: Int = 0
@@ -1774,8 +1776,9 @@ private struct IncrementalSpeechBuffer {
}
if !inCodeBlock {
buffer.append(chars[idx])
if Self.isBoundary(chars[idx]) {
let currentChar = chars[idx]
buffer.append(currentChar)
if Self.isBoundary(currentChar) || Self.isSoftBoundary(currentChar, bufferedChars: buffer.count) {
lastBoundary = idx + 1
bufferAtBoundary = buffer
inCodeBlockAtBoundary = inCodeBlock
@@ -1802,6 +1805,10 @@ private struct IncrementalSpeechBuffer {
private static func isBoundary(_ ch: Character) -> Bool {
ch == "." || ch == "!" || ch == "?" || ch == "\n"
}
private static func isSoftBoundary(_ ch: Character, bufferedChars: Int) -> Bool {
bufferedChars >= Self.softBoundaryMinChars && ch.isWhitespace
}
}
extension TalkModeManager {

View File

@@ -0,0 +1,28 @@
import Testing
@testable import OpenClaw
@MainActor
@Suite struct TalkModeIncrementalSpeechBufferTests {
@Test func emitsSoftBoundaryBeforeTerminalPunctuation() {
let manager = TalkModeManager(allowSimulatorCapture: true)
manager._test_incrementalReset()
let partial =
"We start speaking earlier by splitting this long stream chunk at a whitespace boundary before punctuation arrives"
let segments = manager._test_incrementalIngest(partial, isFinal: false)
#expect(segments.count == 1)
#expect(segments[0].count >= 72)
#expect(segments[0].count < partial.count)
}
@Test func keepsShortChunkBufferedWithoutPunctuation() {
let manager = TalkModeManager(allowSimulatorCapture: true)
manager._test_incrementalReset()
let short = "short chunk without punctuation"
let segments = manager._test_incrementalIngest(short, isFinal: false)
#expect(segments.isEmpty)
}
}