From 4748ba491d172f432e1e0d88b0ed583ce7f057a9 Mon Sep 17 00:00:00 2001 From: Greg Mousseau Date: Sat, 28 Feb 2026 01:32:52 -0500 Subject: [PATCH] fix(android): chat history refresh and mic capture improvements for voice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ChatController: - final/aborted/error run events now trigger a history refresh regardless of whether the runId is in pendingRuns; only delta events require the run to be tracked (prevents voice-initiated responses from being silently dropped) MicCaptureManager: - Don't auto-send on onResults silence detection — accumulate transcript segments and send when mic is toggled off, giving the recognizer time to finish processing buffered audio - Capture any partial live transcript if no final segments arrived (2s drain window before stop) - Join multi-segment transcripts with sentence-ending punctuation to avoid run-on text sent to the gateway --- CHANGELOG.md | 13 ++++++++ .../openclaw/android/chat/ChatController.kt | 11 +++---- .../android/voice/MicCaptureManager.kt | 30 ++++++++++++++----- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a30bd5b7744..150cce54528 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ Docs: https://docs.openclaw.ai +## 2026.2.28 (Unreleased) + +### Changes + +- Android/Voice: streaming TTS via ElevenLabs WebSocket for low-latency playback, mic barge-in, voice screen lifecycle (stop TTS on tab switch), and chat message display after voice responses. (#XXXXX) Thanks @gregmousseau. + +### Fixes + +- Android/Voice: fix chat messages not appearing after voice input — ChatController was dropping final chat events for runs it did not initiate; now only delta events are guarded by the pending-run check so history refreshes correctly after any voice response. +- Android/Voice: fix streaming TTS silence on message 2 — deferred EOS is sent in onOpen when finish() was called before the WebSocket connected, and sendText() now returns true (not false) when finished=true to avoid spurious restart loops. +- Android/Voice: fix dual TTS voices — finishStreamingTts coroutine now checks object identity before nulling streamingTts so a mid-drain restart cannot orphan a live TTS session. +- Android/Voice: fix streaming TTS chunks queued correctly when WebSocket is still connecting — moved null-webSocket guard inside the send block so initial chunks are queued rather than causing a false diverge restart. + ## 2026.2.27 ### Changes diff --git a/apps/android/app/src/main/java/ai/openclaw/android/chat/ChatController.kt b/apps/android/app/src/main/java/ai/openclaw/android/chat/ChatController.kt index 335f3b0d70b..a8009f80400 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/chat/ChatController.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/chat/ChatController.kt @@ -311,17 +311,14 @@ class ChatController( if (!sessionKey.isNullOrEmpty() && sessionKey != _sessionKey.value) return val runId = payload["runId"].asStringOrNull() - if (runId != null) { - val isPending = - synchronized(pendingRuns) { - pendingRuns.contains(runId) - } - if (!isPending) return - } + val isPending = + if (runId != null) synchronized(pendingRuns) { pendingRuns.contains(runId) } else true val state = payload["state"].asStringOrNull() when (state) { "delta" -> { + // Only show streaming text for runs we initiated + if (!isPending) return val text = parseAssistantDeltaText(payload) if (!text.isNullOrEmpty()) { _streamingAssistantText.value = text diff --git a/apps/android/app/src/main/java/ai/openclaw/android/voice/MicCaptureManager.kt b/apps/android/app/src/main/java/ai/openclaw/android/voice/MicCaptureManager.kt index 5d7336cdf6f..5457d2dc1aa 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/voice/MicCaptureManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/voice/MicCaptureManager.kt @@ -7,6 +7,7 @@ import android.content.pm.PackageManager import android.os.Bundle import android.os.Handler import android.os.Looper +import android.util.Log import android.speech.RecognitionListener import android.speech.RecognizerIntent import android.speech.SpeechRecognizer @@ -97,9 +98,18 @@ class MicCaptureManager( start() sendQueuedIfIdle() } else { - stop() - flushSessionToQueue() - sendQueuedIfIdle() + // Give the recognizer time to finish processing buffered audio + scope.launch { + delay(2000L) + stop() + // Capture any partial transcript that didn't get a final result from the recognizer + val partial = _liveTranscript.value?.trim().orEmpty() + if (partial.isNotEmpty() && sessionSegments.isEmpty()) { + sessionSegments.add(partial) + } + flushSessionToQueue() + sendQueuedIfIdle() + } } } @@ -124,9 +134,9 @@ class MicCaptureManager( null } ?: return - val runId = pendingRunId ?: return + val runId = pendingRunId ?: run { Log.d("MicCapture", "no pendingRunId — drop"); return } val eventRunId = payload["runId"].asStringOrNull() ?: return - if (eventRunId != runId) return + if (eventRunId != runId) { Log.d("MicCapture", "runId mismatch: event=$eventRunId pending=$runId"); return } when (payload["state"].asStringOrNull()) { "delta" -> { @@ -241,7 +251,11 @@ class MicCaptureManager( } private fun flushSessionToQueue() { - val message = sessionSegments.joinToString(" ").trim() + // Add sentence-ending punctuation between recognizer segments to avoid run-on text + val message = sessionSegments.joinToString(". ") { segment -> + val trimmed = segment.trimEnd() + if (trimmed.isNotEmpty() && trimmed.last() in ".!?,;:") trimmed else trimmed + }.trim().let { if (it.isNotEmpty() && it.last() !in ".!?") "$it." else it } sessionSegments.clear() _liveTranscript.value = null lastFinalSegment = null @@ -517,8 +531,8 @@ class MicCaptureManager( val text = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION).orEmpty().firstOrNull() if (!text.isNullOrBlank()) { onFinalTranscript(text) - flushSessionToQueue() - sendQueuedIfIdle() + // Don't auto-send on silence — accumulate transcript. + // Send happens when mic is toggled off (setMicEnabled(false)). } scheduleRestart() }