diff --git a/apps/android/app/src/debug/AndroidManifest.xml b/apps/android/app/src/debug/AndroidManifest.xml
new file mode 100644
index 00000000000..2107cb4cb0c
--- /dev/null
+++ b/apps/android/app/src/debug/AndroidManifest.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt b/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt
new file mode 100644
index 00000000000..caff4e5d338
--- /dev/null
+++ b/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt
@@ -0,0 +1,188 @@
+package ai.openclaw.app
+
+import android.app.Service
+import android.content.BroadcastReceiver
+import android.content.Context
+import android.content.Intent
+import android.os.IBinder
+import android.util.Base64
+import android.util.Log
+import kotlinx.coroutines.CoroutineScope
+import kotlinx.coroutines.Dispatchers
+import kotlinx.coroutines.SupervisorJob
+import kotlinx.coroutines.cancel
+import kotlinx.coroutines.delay
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withTimeout
+import kotlinx.serialization.json.JsonNull
+import kotlinx.serialization.json.JsonPrimitive
+import kotlinx.serialization.json.buildJsonObject
+import java.io.File
+
+private const val tag = "VoiceE2E"
+private const val resultFileName = "voice_e2e_result.json"
+
+class VoiceE2eReceiver : BroadcastReceiver() {
+ override fun onReceive(
+ context: Context,
+ intent: Intent,
+ ) {
+ context.startService(
+ Intent(context, VoiceE2eService::class.java)
+ .putExtras(intent),
+ )
+ }
+}
+
+class VoiceE2eService : Service() {
+ private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
+
+ override fun onBind(intent: Intent?): IBinder? = null
+
+ override fun onStartCommand(
+ intent: Intent?,
+ flags: Int,
+ startId: Int,
+ ): Int {
+ val command = intent ?: return START_NOT_STICKY
+ serviceScope.launch {
+ try {
+ runCommand(command)
+ } finally {
+ stopSelf(startId)
+ }
+ }
+ return START_NOT_STICKY
+ }
+
+ override fun onDestroy() {
+ serviceScope.cancel()
+ super.onDestroy()
+ }
+
+ private suspend fun runCommand(intent: Intent) {
+ try {
+ val app = applicationContext as NodeApp
+ val runtime = app.ensureRuntime()
+ val mode =
+ intent
+ .getDecodedStringExtra("mode")
+ ?.trim()
+ .orEmpty()
+ .ifEmpty { "both" }
+ if (mode == "stop") {
+ runtime.cancelMicCapture()
+ runtime.setTalkModeEnabled(false)
+ writeResult("""{"ok":true,"mode":"stop"}""")
+ return
+ }
+
+ val connect = !intent.getBooleanExtra("noConnect", false)
+ val connectTimeoutMs = intent.getLongExtra("connectTimeoutMs", 20_000L)
+ if (connect) {
+ configureGateway(runtime = runtime, intent = intent)
+ }
+ if (connect || !runtime.isConnected.value) {
+ awaitGateway(runtime = runtime, timeoutMs = connectTimeoutMs)
+ }
+
+ startActivity(
+ Intent(actionOpenVoiceE2e)
+ .setClass(this, MainActivity::class.java)
+ .addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP),
+ )
+
+ val transcript =
+ intent
+ .getDecodedStringExtra("transcript")
+ ?.trim()
+ .orEmpty()
+ .ifEmpty { "Reply exactly: Android voice e2e normal path ok." }
+ val realtimeReply =
+ intent
+ .getDecodedStringExtra("realtimeAssistant")
+ ?.trim()
+ .orEmpty()
+ .ifEmpty { "Android realtime voice e2e relay path ok." }
+ val timeoutMs = intent.getLongExtra("timeoutMs", 60_000L)
+ val result =
+ runtime.runVoiceE2e(
+ mode = mode,
+ transcript = transcript,
+ realtimeAssistantText = realtimeReply,
+ timeoutMs = timeoutMs,
+ )
+ val resultJson = encodeResult(result)
+ writeResult(resultJson)
+ Log.i(tag, "PASS $resultJson")
+ } catch (err: Throwable) {
+ val resultJson =
+ buildJsonObject {
+ put("ok", JsonPrimitive(false))
+ put("error", JsonPrimitive(err.message ?: err::class.java.simpleName))
+ }.toString()
+ writeResult(resultJson)
+ Log.e(tag, "FAIL $resultJson", err)
+ }
+ }
+
+ private fun configureGateway(
+ runtime: NodeRuntime,
+ intent: Intent,
+ ) {
+ val host =
+ intent
+ .getDecodedStringExtra("host")
+ ?.trim()
+ .orEmpty()
+ .ifEmpty { "127.0.0.1" }
+ val port = intent.getIntExtra("port", 18789)
+ runtime.setManualEnabled(true)
+ runtime.setManualHost(host)
+ runtime.setManualPort(port)
+ runtime.setManualTls(intent.getBooleanExtra("tls", false))
+ runtime.setGatewayToken(intent.getDecodedStringExtra("token").orEmpty())
+ runtime.setGatewayBootstrapToken(intent.getDecodedStringExtra("bootstrapToken").orEmpty())
+ runtime.setGatewayPassword(intent.getDecodedStringExtra("password").orEmpty())
+ runtime.setOnboardingCompleted(true)
+ runtime.connectManual()
+ }
+
+ private suspend fun awaitGateway(
+ runtime: NodeRuntime,
+ timeoutMs: Long,
+ ) {
+ withTimeout(timeoutMs) {
+ while (!runtime.isConnected.value) {
+ delay(100L)
+ }
+ }
+ }
+
+ private fun encodeResult(result: NodeRuntime.VoiceE2eResult): String =
+ buildJsonObject {
+ put("ok", JsonPrimitive(true))
+ put("normal", result.normal?.let(::encodeSlice) ?: JsonNull)
+ put("realtime", result.realtime?.let(::encodeSlice) ?: JsonNull)
+ }.toString()
+
+ private fun encodeSlice(slice: NodeRuntime.VoiceE2eSliceResult) =
+ buildJsonObject {
+ put("mode", JsonPrimitive(slice.mode))
+ put("status", JsonPrimitive(slice.status))
+ put("userText", slice.userText?.let(::JsonPrimitive) ?: JsonNull)
+ put("assistantText", slice.assistantText?.let(::JsonPrimitive) ?: JsonNull)
+ }
+
+ private fun writeResult(json: String) {
+ File(cacheDir, resultFileName).writeText(json)
+ }
+}
+
+private fun Intent.getDecodedStringExtra(name: String): String? {
+ val encoded = getStringExtra("${name}Base64")
+ if (!encoded.isNullOrBlank()) {
+ return String(Base64.decode(encoded, Base64.NO_WRAP), Charsets.UTF_8)
+ }
+ return getStringExtra(name)
+}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt b/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt
index e1bd409788f..5bae212db80 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt
@@ -3,6 +3,7 @@ package ai.openclaw.app
import android.content.Intent
const val actionAskOpenClaw = "ai.openclaw.app.action.ASK_OPENCLAW"
+const val actionOpenVoiceE2e = "ai.openclaw.app.debug.OPEN_VOICE_E2E"
const val extraAssistantPrompt = "prompt"
enum class HomeDestination {
@@ -19,6 +20,14 @@ data class AssistantLaunchRequest(
val autoSend: Boolean,
)
+fun parseHomeDestinationIntent(intent: Intent?): HomeDestination? {
+ val action = intent?.action ?: return null
+ return when {
+ BuildConfig.DEBUG && action == actionOpenVoiceE2e -> HomeDestination.Voice
+ else -> null
+ }
+}
+
fun parseAssistantLaunchIntent(intent: Intent?): AssistantLaunchRequest? {
val action = intent?.action ?: return null
return when (action) {
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
index 7eb3b3dc604..77c31eb8466 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
@@ -79,6 +79,10 @@ class MainActivity : ComponentActivity() {
}
private fun handleAssistantIntent(intent: android.content.Intent?) {
+ parseHomeDestinationIntent(intent)?.let { destination ->
+ viewModel.requestHomeDestination(destination)
+ return
+ }
val request = parseAssistantLaunchIntent(intent) ?: return
viewModel.handleAssistantLaunch(request)
}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
index 83882975d49..654fc542db5 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
@@ -330,6 +330,10 @@ class MainViewModel(
_requestedHomeDestination.value = null
}
+ fun requestHomeDestination(destination: HomeDestination) {
+ _requestedHomeDestination.value = destination
+ }
+
fun clearChatDraft() {
_chatDraft.value = null
}
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
index 22d2e238c8f..67134cb4d72 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt
@@ -47,6 +47,7 @@ import ai.openclaw.app.protocol.OpenClawCanvasA2UIAction
import ai.openclaw.app.voice.MicCaptureManager
import ai.openclaw.app.voice.TalkModeManager
import ai.openclaw.app.voice.VoiceConversationEntry
+import ai.openclaw.app.voice.VoiceConversationRole
import android.Manifest
import android.content.Context
import android.content.pm.PackageManager
@@ -64,6 +65,7 @@ import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.combine
import kotlinx.coroutines.flow.distinctUntilChanged
import kotlinx.coroutines.launch
+import kotlinx.coroutines.withTimeout
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonArray
@@ -256,6 +258,18 @@ class NodeRuntime(
val previousFingerprintSha256: String? = null,
)
+ data class VoiceE2eSliceResult(
+ val mode: String,
+ val status: String,
+ val userText: String?,
+ val assistantText: String?,
+ )
+
+ data class VoiceE2eResult(
+ val normal: VoiceE2eSliceResult?,
+ val realtime: VoiceE2eSliceResult?,
+ )
+
private val _isConnected = MutableStateFlow(false)
val isConnected: StateFlow = _isConnected.asStateFlow()
private val _nodeConnected = MutableStateFlow(false)
@@ -1187,6 +1201,115 @@ class NodeRuntime(
talkMode.setPlaybackEnabled(value)
}
+ suspend fun runVoiceE2e(
+ mode: String,
+ transcript: String,
+ realtimeAssistantText: String,
+ timeoutMs: Long,
+ ): VoiceE2eResult {
+ if (!BuildConfig.DEBUG) {
+ throw IllegalStateException("voice e2e is debug-only")
+ }
+ if (!_isConnected.value) {
+ throw IllegalStateException("gateway not connected")
+ }
+ if (!hasRecordAudioPermission()) {
+ throw IllegalStateException("microphone permission missing")
+ }
+
+ val normalizedMode = mode.trim().lowercase().ifEmpty { "both" }
+ val runNormal = normalizedMode == "both" || normalizedMode == "normal" || normalizedMode == "dictation"
+ val runRealtime = normalizedMode == "both" || normalizedMode == "realtime" || normalizedMode == "talk"
+ if (!runNormal && !runRealtime) {
+ throw IllegalArgumentException("unknown voice e2e mode: $mode")
+ }
+
+ val previousSpeakerEnabled = speakerEnabled.value
+ setSpeakerEnabled(false)
+ var completed = false
+ return try {
+ VoiceE2eResult(
+ normal =
+ if (runNormal) {
+ runNormalVoiceE2e(transcript = transcript, timeoutMs = timeoutMs)
+ } else {
+ null
+ },
+ realtime =
+ if (runRealtime) {
+ runRealtimeVoiceE2e(
+ transcript = transcript,
+ assistantText = realtimeAssistantText,
+ timeoutMs = timeoutMs,
+ )
+ } else {
+ null
+ },
+ ).also { completed = true }
+ } finally {
+ if (!completed) {
+ stopActiveVoiceSession()
+ }
+ setSpeakerEnabled(previousSpeakerEnabled)
+ }
+ }
+
+ private suspend fun runNormalVoiceE2e(
+ transcript: String,
+ timeoutMs: Long,
+ ): VoiceE2eSliceResult {
+ stopActiveVoiceSession()
+ setVoiceCaptureMode(VoiceCaptureMode.ManualMic)
+ micCapture.submitTranscribedMessage(transcript)
+ awaitVoiceConversation(timeoutMs = timeoutMs) {
+ micCapture.conversation.value.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
+ }
+ val entries = micCapture.conversation.value
+ return VoiceE2eSliceResult(
+ mode = "normal",
+ status = micCapture.statusText.value,
+ userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
+ assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
+ )
+ }
+
+ private suspend fun runRealtimeVoiceE2e(
+ transcript: String,
+ assistantText: String,
+ timeoutMs: Long,
+ ): VoiceE2eSliceResult {
+ stopActiveVoiceSession()
+ setVoiceCaptureMode(VoiceCaptureMode.TalkMode)
+ talkMode.runE2eRealtimeTurn(
+ userText = transcript,
+ assistantText = assistantText,
+ timeoutMs = timeoutMs,
+ )
+ awaitVoiceConversation(timeoutMs = timeoutMs) {
+ val entries = talkMode.conversation.value
+ entries.any { it.role == VoiceConversationRole.User && !it.isStreaming } &&
+ entries.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
+ }
+ val entries = talkMode.conversation.value
+ return VoiceE2eSliceResult(
+ mode = "realtime",
+ status = talkMode.statusText.value,
+ userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
+ assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
+ )
+ }
+
+ private suspend fun awaitVoiceConversation(
+ timeoutMs: Long,
+ ready: () -> Boolean,
+ ) {
+ withTimeout(timeoutMs) {
+ while (!ready()) {
+ delay(100L)
+ }
+ }
+ }
+
private fun setVoiceCaptureMode(
mode: VoiceCaptureMode,
persistManualMic: Boolean = true,
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt
index 52760c73861..d2ac0569c14 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt
@@ -262,6 +262,11 @@ class MicCaptureManager(
}
}
+ internal fun submitTranscribedMessage(text: String) {
+ queueRecognizedMessage(text)
+ sendQueuedIfIdle()
+ }
+
fun handleGatewayEvent(
event: String,
payloadJson: String?,
@@ -701,8 +706,7 @@ class MicCaptureManager(
val text = obj["text"].asStringOrNull()?.trim().orEmpty()
if (text.isNotEmpty()) {
if (text != flushedPartialTranscript) {
- queueRecognizedMessage(text)
- sendQueuedIfIdle()
+ submitTranscribedMessage(text)
} else {
flushedPartialTranscript = null
_liveTranscript.value = null
diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
index dbf4d7c2dbe..bdd720e4b34 100644
--- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt
@@ -480,6 +480,19 @@ class TalkModeManager internal constructor(
pendingRunId = null
}
+ internal suspend fun runE2eRealtimeTurn(
+ userText: String,
+ assistantText: String,
+ timeoutMs: Long,
+ ) {
+ if (!_isEnabled.value) {
+ setEnabled(true)
+ }
+ val sessionId = awaitRealtimeSessionId(timeoutMs)
+ handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "user", text = userText))
+ handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "assistant", text = assistantText))
+ }
+
fun setPlaybackEnabled(enabled: Boolean) {
if (playbackEnabled == enabled) return
playbackEnabled = enabled
@@ -597,6 +610,19 @@ class TalkModeManager internal constructor(
shutdownTextToSpeech()
}
+ private suspend fun awaitRealtimeSessionId(timeoutMs: Long): String =
+ withTimeout(timeoutMs) {
+ while (true) {
+ realtimeSessionId?.let { return@withTimeout it }
+ val status = _statusText.value
+ if (!_isEnabled.value && status.startsWith("Talk failed")) {
+ throw IllegalStateException(status)
+ }
+ delay(100L)
+ }
+ error("unreachable")
+ }
+
private suspend fun startRealtimeRelay(generation: Long) {
if (!isConnected()) {
_statusText.value = "Gateway not connected"
@@ -852,6 +878,19 @@ class TalkModeManager internal constructor(
}
}
+ private fun realtimeTranscriptPayload(
+ sessionId: String,
+ role: String,
+ text: String,
+ ): String =
+ buildJsonObject {
+ put("relaySessionId", JsonPrimitive(sessionId))
+ put("type", JsonPrimitive("transcript"))
+ put("role", JsonPrimitive(role))
+ put("text", JsonPrimitive(text))
+ put("final", JsonPrimitive(true))
+ }.toString()
+
private fun playRealtimeAudio(bytes: ByteArray) {
if (!playbackEnabled || realtimeOutputSuppressed || bytes.isEmpty()) return
val queue = ensureRealtimeAudioQueue()
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt
index c6c63ab6aab..d23ac4c4464 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt
@@ -100,6 +100,40 @@ class MicCaptureManagerTest {
assertEquals(emptyList(), manager.conversation.value)
}
+ @Test
+ @OptIn(ExperimentalCoroutinesApi::class)
+ fun submittedTranscribedMessageUsesGatewayTurnPath() =
+ runTest {
+ val sentMessages = mutableListOf()
+ val manager =
+ createManager(
+ scope = this,
+ sendToGateway = { message, onRunIdKnown ->
+ sentMessages += message
+ onRunIdKnown("run-voice-e2e")
+ "run-voice-e2e"
+ },
+ )
+
+ manager.onGatewayConnectionChanged(true)
+ manager.submitTranscribedMessage("voice e2e message")
+ runCurrent()
+ manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-voice-e2e", text = "voice e2e reply"))
+ advanceUntilIdle()
+
+ assertEquals(listOf("voice e2e message"), sentMessages)
+ assertEquals(
+ listOf(VoiceConversationRole.User, VoiceConversationRole.Assistant),
+ manager.conversation.value.map { it.role },
+ )
+ assertEquals(
+ "voice e2e reply",
+ manager.conversation.value
+ .last()
+ .text,
+ )
+ }
+
@Test
fun pcm16FramesAreEncodedAsPcmuFrames() {
val manager = createManager()
diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt
index 8b9fbd07042..2bbaae092e9 100644
--- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt
@@ -11,6 +11,7 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
+import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.launch
import kotlinx.coroutines.test.advanceUntilIdle
import kotlinx.coroutines.test.currentTime
@@ -327,6 +328,28 @@ class TalkModeManagerTest {
assertTrue(entries.none { it.isStreaming })
}
+ @Test
+ fun e2eRealtimeTurnUsesRelayTranscriptPath() =
+ runTest {
+ val manager = createManager(scope = this)
+
+ setPrivateField(manager, "realtimeSessionId", "relay-1")
+ setMutableStateFlow(manager, "_isEnabled", true)
+ manager.runE2eRealtimeTurn(
+ userText = "voice e2e user",
+ assistantText = "voice e2e assistant",
+ timeoutMs = 1_000L,
+ )
+
+ val entries = manager.conversation.value
+ assertEquals(2, entries.size)
+ assertEquals(VoiceConversationRole.User, entries[0].role)
+ assertEquals("voice e2e user", entries[0].text)
+ assertEquals(VoiceConversationRole.Assistant, entries[1].role)
+ assertEquals("voice e2e assistant", entries[1].text)
+ assertTrue(entries.none { it.isStreaming })
+ }
+
@Test
@OptIn(ExperimentalCoroutinesApi::class)
fun realtimeStartWithoutGatewayTurnsTalkOff() =
@@ -483,6 +506,15 @@ class TalkModeManagerTest {
return field.get(target)
}
+ @Suppress("UNCHECKED_CAST")
+ private fun setMutableStateFlow(
+ target: Any,
+ name: String,
+ value: T,
+ ) {
+ (readPrivateField(target, name) as MutableStateFlow).value = value
+ }
+
private fun shouldAppendRealtimeCapturedFrame(
manager: TalkModeManager,
length: Int,
diff --git a/apps/android/scripts/voice-e2e.sh b/apps/android/scripts/voice-e2e.sh
new file mode 100755
index 00000000000..3d02875429d
--- /dev/null
+++ b/apps/android/scripts/voice-e2e.sh
@@ -0,0 +1,226 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
+ANDROID_DIR="$ROOT_DIR/apps/android"
+PACKAGE_NAME="ai.openclaw.app"
+RECEIVER="$PACKAGE_NAME/.VoiceE2eReceiver"
+RUN_ACTION="ai.openclaw.app.debug.RUN_VOICE_E2E"
+OPEN_ACTION="ai.openclaw.app.debug.OPEN_VOICE_E2E"
+PORT=18789
+HOST="127.0.0.1"
+MODE="both"
+TRANSCRIPT="Reply exactly: Android voice e2e normal path ok."
+REALTIME_ASSISTANT="Android realtime voice e2e relay path ok."
+TIMEOUT_MS=60000
+INSTALL=1
+CONNECT=1
+CLEANUP=0
+START_GATEWAY=0
+
+usage() {
+ cat <<'USAGE'
+Usage: apps/android/scripts/voice-e2e.sh [options]
+
+Options:
+ --mode normal|realtime|both Voice path to test. Default: both.
+ --transcript TEXT Synthetic user transcript for the voice turn.
+ --realtime-assistant TEXT Synthetic realtime assistant relay text.
+ --host HOST Gateway host visible from Android. Default: 127.0.0.1.
+ --port PORT Gateway port. Default: 18789.
+ --timeout-ms MS Per-mode timeout. Default: 60000.
+ --skip-install Reuse the installed debug app.
+ --no-connect Do not rewrite manual gateway settings.
+ --start-gateway Start a temporary local gateway with bws_get_secret.
+ --cleanup Stop voice capture after screenshots.
+USAGE
+}
+
+while [[ $# -gt 0 ]]; do
+ case "$1" in
+ --mode)
+ MODE="$2"
+ shift 2
+ ;;
+ --transcript)
+ TRANSCRIPT="$2"
+ shift 2
+ ;;
+ --realtime-assistant)
+ REALTIME_ASSISTANT="$2"
+ shift 2
+ ;;
+ --host)
+ HOST="$2"
+ shift 2
+ ;;
+ --port)
+ PORT="$2"
+ shift 2
+ ;;
+ --timeout-ms)
+ TIMEOUT_MS="$2"
+ shift 2
+ ;;
+ --skip-install)
+ INSTALL=0
+ shift
+ ;;
+ --no-connect)
+ CONNECT=0
+ shift
+ ;;
+ --start-gateway)
+ START_GATEWAY=1
+ shift
+ ;;
+ --cleanup)
+ CLEANUP=1
+ shift
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ ;;
+ *)
+ echo "unknown option: $1" >&2
+ usage >&2
+ exit 2
+ ;;
+ esac
+done
+
+export JAVA_HOME="${JAVA_HOME:-/opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home}"
+export ANDROID_HOME="${ANDROID_HOME:-/opt/homebrew/share/android-commandlinetools}"
+export ANDROID_SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}"
+export PATH="/opt/homebrew/opt/openjdk@17/bin:$ANDROID_HOME/platform-tools:$ANDROID_HOME/cmdline-tools/latest/bin:$PATH"
+
+ARTIFACT_DIR="/tmp/openclaw-android-voice-e2e-$(date +%Y%m%d-%H%M%S)"
+mkdir -p "$ARTIFACT_DIR"
+
+cleanup_gateway() {
+ if [[ -n "${GATEWAY_PID:-}" ]]; then
+ kill "$GATEWAY_PID" >/dev/null 2>&1 || true
+ fi
+}
+trap cleanup_gateway EXIT
+
+if ! adb devices -l | awk 'NR > 1 && $2 == "device" { found = 1 } END { exit(found ? 0 : 1) }'; then
+ echo "no authorized Android device found" >&2
+ adb devices -l >&2
+ exit 1
+fi
+
+adb reverse "tcp:$PORT" "tcp:$PORT" >/dev/null
+
+if [[ "$START_GATEWAY" -eq 1 ]]; then
+ if command -v bws_get_secret >/dev/null 2>&1; then
+ OPENCLAW_OPENAI_API_KEY="$(bws_get_secret OPENCLAW_OPENAI_API_KEY)"
+ else
+ OPENCLAW_OPENAI_API_KEY="$(zsh -ic 'bws_get_secret OPENCLAW_OPENAI_API_KEY')"
+ fi
+ (
+ cd "$ROOT_DIR"
+ OPENAI_API_KEY="$OPENCLAW_OPENAI_API_KEY" \
+ pnpm openclaw gateway run \
+ --port "$PORT" \
+ --auth none \
+ --bind loopback \
+ --force \
+ --allow-unconfigured \
+ --ws-log compact
+ ) >"$ARTIFACT_DIR/gateway.log" 2>&1 &
+ GATEWAY_PID=$!
+ sleep 4
+ if ! kill -0 "$GATEWAY_PID" >/dev/null 2>&1; then
+ cat "$ARTIFACT_DIR/gateway.log" >&2
+ exit 1
+ fi
+ unset OPENCLAW_OPENAI_API_KEY
+fi
+
+if [[ "$INSTALL" -eq 1 ]]; then
+ (cd "$ANDROID_DIR" && ./gradlew :app:installPlayDebug)
+fi
+
+adb shell pm grant "$PACKAGE_NAME" android.permission.RECORD_AUDIO >/dev/null 2>&1 || true
+adb shell am force-stop "$PACKAGE_NAME" >/dev/null
+adb shell am start -a "$OPEN_ACTION" -n "$PACKAGE_NAME/.MainActivity" >/dev/null
+adb logcat -c
+
+run_mode() {
+ local test_mode="$1"
+ local result_name="$ARTIFACT_DIR/result-$test_mode.json"
+ local screenshot_name="$ARTIFACT_DIR/screen-$test_mode.png"
+ local transcript_base64
+ local realtime_assistant_base64
+ transcript_base64="$(printf '%s' "$TRANSCRIPT" | base64 | tr -d '\n')"
+ realtime_assistant_base64="$(printf '%s' "$REALTIME_ASSISTANT" | base64 | tr -d '\n')"
+
+ adb shell run-as "$PACKAGE_NAME" rm -f cache/voice_e2e_result.json >/dev/null 2>&1 || true
+ local no_connect_flag=true
+ if [[ "$CONNECT" -eq 1 ]]; then
+ no_connect_flag=false
+ fi
+
+ adb shell am broadcast \
+ -a "$RUN_ACTION" \
+ -n "$RECEIVER" \
+ --es mode "$test_mode" \
+ --ez noConnect "$no_connect_flag" \
+ --es host "$HOST" \
+ --ei port "$PORT" \
+ --ez tls false \
+ --el timeoutMs "$TIMEOUT_MS" \
+ --el connectTimeoutMs "$TIMEOUT_MS" \
+ --es transcriptBase64 "$transcript_base64" \
+ --es realtimeAssistantBase64 "$realtime_assistant_base64" >/dev/null
+
+ local deadline=$((SECONDS + TIMEOUT_MS / 1000 + 20))
+ local result=""
+ while [[ "$SECONDS" -lt "$deadline" ]]; do
+ result="$(adb shell run-as "$PACKAGE_NAME" cat cache/voice_e2e_result.json 2>/dev/null | tr -d '\r' || true)"
+ if [[ -n "$result" ]]; then
+ break
+ fi
+ sleep 1
+ done
+
+ if [[ -z "$result" ]]; then
+ echo "voice e2e $test_mode timed out waiting for result" >&2
+ exit 1
+ fi
+ printf '%s\n' "$result" >"$result_name"
+ adb exec-out screencap -p >"$screenshot_name"
+ if ! grep -q '"ok":true' "$result_name"; then
+ echo "voice e2e $test_mode failed: $result" >&2
+ exit 1
+ fi
+}
+
+case "$MODE" in
+ both)
+ run_mode normal
+ run_mode realtime
+ ;;
+ normal|dictation)
+ run_mode normal
+ ;;
+ realtime|talk)
+ run_mode realtime
+ ;;
+ *)
+ echo "unknown mode: $MODE" >&2
+ exit 2
+ ;;
+esac
+
+adb logcat -d -v time |
+ rg -i 'OpenClaw|TalkMode|MicCapture|AudioRecord|SpeechRecognizer|realtime|talk.session|appendAudio|transcript|Talk failed|Transcription failed|Speech network|VoiceE2E' |
+ tail -250 >"$ARTIFACT_DIR/logcat.txt" || true
+
+if [[ "$CLEANUP" -eq 1 ]]; then
+ adb shell am broadcast -a "$RUN_ACTION" -n "$RECEIVER" --es mode stop >/dev/null
+fi
+
+echo "$ARTIFACT_DIR"