From fb7b798f96ada80c9a445ed7de8d4ef88245fffe Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 25 Apr 2026 20:43:01 +0100 Subject: [PATCH] fix(android): prevent duplicate talk playback --- CHANGELOG.md | 2 + .../ai/openclaw/app/voice/TalkModeManager.kt | 58 ++++++++++++++----- .../openclaw/app/voice/TalkModeManagerTest.kt | 45 ++++++++++++++ 3 files changed, 92 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0b263a1cba..7ee13f95b80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,8 @@ Docs: https://docs.openclaw.ai during Gateway startup and log the missing keys as a warning instead of a runtime startup error, while keeping explicit command/tool errors when used. Thanks @steipete. +- Android/Talk Mode: prevent duplicate TTS playback when fast or repeated final + chat events arrive while Talk Mode is waiting for its own response. Fixes #46546. - Tooling/check:changed: pass parent heavy-check lock markers to lint lanes so `pnpm check:changed` no longer waits on its own `lint:extensions` child. Thanks @steipete. diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt index 59a1ebec50a..e12c2bd7aa7 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt @@ -226,14 +226,15 @@ class TalkModeManager( // If this is a response we initiated, handle normally below. // Otherwise, if ttsOnAllResponses, finish streaming TTS on terminal events. val pending = pendingRunId - if (pending == null || runId != pending) { + val knownRun = pending == runId || hasRunCompletion(runId) + if (!knownRun) { if (ttsOnAllResponses && state == "final") { val text = extractTextFromChatEventMessage(obj["message"]) if (!text.isNullOrBlank()) { playTtsForText(text) } } - if (pending == null || runId != pending) return + return } Log.d(tag, "chat event arrived runId=$runId state=$state pendingRunId=$pendingRunId") val terminal = @@ -539,6 +540,7 @@ class TalkModeManager( private suspend fun sendChat(message: String, session: GatewaySession): String { val runId = UUID.randomUUID().toString() + armPendingRun(runId) val params = buildJsonObject { put("sessionKey", JsonPrimitive(mainSessionKey.ifBlank { "main" })) @@ -547,19 +549,29 @@ class TalkModeManager( put("timeoutMs", JsonPrimitive(30_000)) put("idempotencyKey", JsonPrimitive(runId)) } - val res = session.request("chat.send", params.toString()) - val parsed = parseRunId(res) ?: runId - if (parsed != runId) { - pendingRunId = parsed + try { + val res = session.request("chat.send", params.toString()) + val parsed = parseRunId(res) ?: runId + if (parsed != runId) { + pendingRunId = parsed + } + return parsed + } catch (err: Throwable) { + clearPendingRun(runId) + throw err } - return parsed } private suspend fun waitForChatFinal(runId: String): Boolean { - pendingFinal?.cancel() - val deferred = CompletableDeferred() - pendingRunId = runId - pendingFinal = deferred + consumeRunCompletion(runId)?.let { return it } + val deferred = + if (pendingRunId == runId) { + pendingFinal ?: armPendingRun(runId) + } else { + armPendingRun(runId) + } + + consumeRunCompletion(runId)?.let { return it } val result = withContext(Dispatchers.IO) { @@ -570,11 +582,25 @@ class TalkModeManager( } } - if (!result) { + if (!result && pendingRunId == runId) { + clearPendingRun(runId) + } + return result + } + + private fun armPendingRun(runId: String): CompletableDeferred { + pendingFinal?.cancel() + val deferred = CompletableDeferred() + pendingRunId = runId + pendingFinal = deferred + return deferred + } + + private fun clearPendingRun(runId: String) { + if (pendingRunId == runId) { pendingFinal = null pendingRunId = null } - return result } private fun cacheRunCompletion(runId: String, isFinal: Boolean) { @@ -593,6 +619,12 @@ class TalkModeManager( } } + private fun hasRunCompletion(runId: String): Boolean { + synchronized(completedRunsLock) { + return completedRunStates.containsKey(runId) + } + } + private fun consumeRunText(runId: String): String? { synchronized(completedRunsLock) { return completedRunTexts.remove(runId) diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt index 5edca654e2a..0aa6111360e 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt @@ -5,6 +5,7 @@ import ai.openclaw.app.gateway.DeviceAuthTokenStore import ai.openclaw.app.gateway.DeviceIdentityStore import ai.openclaw.app.gateway.GatewaySession import java.util.concurrent.atomic.AtomicLong +import kotlinx.coroutines.CompletableDeferred import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job @@ -49,6 +50,34 @@ class TalkModeManagerTest { assertEquals(12L, playbackGeneration(manager).get()) } + @Test + fun duplicateFinalForPendingTalkRunDoesNotStartAllResponseTts() { + val manager = createManager() + val final = CompletableDeferred() + + manager.ttsOnAllResponses = true + setPrivateField(manager, "pendingRunId", "run-talk") + setPrivateField(manager, "pendingFinal", final) + + manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-talk", text = "spoken once")) + assertTrue(final.isCompleted) + assertEquals(0L, playbackGeneration(manager).get()) + + manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-talk", text = "spoken once")) + + assertEquals(0L, playbackGeneration(manager).get()) + } + + @Test + fun nonPendingFinalStillUsesAllResponseTts() { + val manager = createManager() + + manager.ttsOnAllResponses = true + manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-other", text = "speak this")) + + assertEquals(1L, playbackGeneration(manager).get()) + } + private fun createManager(): TalkModeManager { val app = RuntimeEnvironment.getApplication() val sessionJob = SupervisorJob() @@ -86,6 +115,22 @@ class TalkModeManagerTest { field.isAccessible = true return field.get(target) } + + private fun chatFinalPayload(runId: String, text: String): String { + return """ + { + "runId": "$runId", + "sessionKey": "main", + "state": "final", + "message": { + "role": "assistant", + "content": [ + { "type": "text", "text": "$text" } + ] + } + } + """.trimIndent() + } } private class InMemoryDeviceAuthStore : DeviceAuthTokenStore {