diff --git a/apps/android/app/src/debug/AndroidManifest.xml b/apps/android/app/src/debug/AndroidManifest.xml new file mode 100644 index 00000000000..2107cb4cb0c --- /dev/null +++ b/apps/android/app/src/debug/AndroidManifest.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt b/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt new file mode 100644 index 00000000000..caff4e5d338 --- /dev/null +++ b/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt @@ -0,0 +1,188 @@ +package ai.openclaw.app + +import android.app.Service +import android.content.BroadcastReceiver +import android.content.Context +import android.content.Intent +import android.os.IBinder +import android.util.Base64 +import android.util.Log +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.delay +import kotlinx.coroutines.launch +import kotlinx.coroutines.withTimeout +import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonObject +import java.io.File + +private const val tag = "VoiceE2E" +private const val resultFileName = "voice_e2e_result.json" + +class VoiceE2eReceiver : BroadcastReceiver() { + override fun onReceive( + context: Context, + intent: Intent, + ) { + context.startService( + Intent(context, VoiceE2eService::class.java) + .putExtras(intent), + ) + } +} + +class VoiceE2eService : Service() { + private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + + override fun onBind(intent: Intent?): IBinder? = null + + override fun onStartCommand( + intent: Intent?, + flags: Int, + startId: Int, + ): Int { + val command = intent ?: return START_NOT_STICKY + serviceScope.launch { + try { + runCommand(command) + } finally { + stopSelf(startId) + } + } + return START_NOT_STICKY + } + + override fun onDestroy() { + serviceScope.cancel() + super.onDestroy() + } + + private suspend fun runCommand(intent: Intent) { + try { + val app = applicationContext as NodeApp + val runtime = app.ensureRuntime() + val mode = + intent + .getDecodedStringExtra("mode") + ?.trim() + .orEmpty() + .ifEmpty { "both" } + if (mode == "stop") { + runtime.cancelMicCapture() + runtime.setTalkModeEnabled(false) + writeResult("""{"ok":true,"mode":"stop"}""") + return + } + + val connect = !intent.getBooleanExtra("noConnect", false) + val connectTimeoutMs = intent.getLongExtra("connectTimeoutMs", 20_000L) + if (connect) { + configureGateway(runtime = runtime, intent = intent) + } + if (connect || !runtime.isConnected.value) { + awaitGateway(runtime = runtime, timeoutMs = connectTimeoutMs) + } + + startActivity( + Intent(actionOpenVoiceE2e) + .setClass(this, MainActivity::class.java) + .addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP), + ) + + val transcript = + intent + .getDecodedStringExtra("transcript") + ?.trim() + .orEmpty() + .ifEmpty { "Reply exactly: Android voice e2e normal path ok." } + val realtimeReply = + intent + .getDecodedStringExtra("realtimeAssistant") + ?.trim() + .orEmpty() + .ifEmpty { "Android realtime voice e2e relay path ok." } + val timeoutMs = intent.getLongExtra("timeoutMs", 60_000L) + val result = + runtime.runVoiceE2e( + mode = mode, + transcript = transcript, + realtimeAssistantText = realtimeReply, + timeoutMs = timeoutMs, + ) + val resultJson = encodeResult(result) + writeResult(resultJson) + Log.i(tag, "PASS $resultJson") + } catch (err: Throwable) { + val resultJson = + buildJsonObject { + put("ok", JsonPrimitive(false)) + put("error", JsonPrimitive(err.message ?: err::class.java.simpleName)) + }.toString() + writeResult(resultJson) + Log.e(tag, "FAIL $resultJson", err) + } + } + + private fun configureGateway( + runtime: NodeRuntime, + intent: Intent, + ) { + val host = + intent + .getDecodedStringExtra("host") + ?.trim() + .orEmpty() + .ifEmpty { "127.0.0.1" } + val port = intent.getIntExtra("port", 18789) + runtime.setManualEnabled(true) + runtime.setManualHost(host) + runtime.setManualPort(port) + runtime.setManualTls(intent.getBooleanExtra("tls", false)) + runtime.setGatewayToken(intent.getDecodedStringExtra("token").orEmpty()) + runtime.setGatewayBootstrapToken(intent.getDecodedStringExtra("bootstrapToken").orEmpty()) + runtime.setGatewayPassword(intent.getDecodedStringExtra("password").orEmpty()) + runtime.setOnboardingCompleted(true) + runtime.connectManual() + } + + private suspend fun awaitGateway( + runtime: NodeRuntime, + timeoutMs: Long, + ) { + withTimeout(timeoutMs) { + while (!runtime.isConnected.value) { + delay(100L) + } + } + } + + private fun encodeResult(result: NodeRuntime.VoiceE2eResult): String = + buildJsonObject { + put("ok", JsonPrimitive(true)) + put("normal", result.normal?.let(::encodeSlice) ?: JsonNull) + put("realtime", result.realtime?.let(::encodeSlice) ?: JsonNull) + }.toString() + + private fun encodeSlice(slice: NodeRuntime.VoiceE2eSliceResult) = + buildJsonObject { + put("mode", JsonPrimitive(slice.mode)) + put("status", JsonPrimitive(slice.status)) + put("userText", slice.userText?.let(::JsonPrimitive) ?: JsonNull) + put("assistantText", slice.assistantText?.let(::JsonPrimitive) ?: JsonNull) + } + + private fun writeResult(json: String) { + File(cacheDir, resultFileName).writeText(json) + } +} + +private fun Intent.getDecodedStringExtra(name: String): String? { + val encoded = getStringExtra("${name}Base64") + if (!encoded.isNullOrBlank()) { + return String(Base64.decode(encoded, Base64.NO_WRAP), Charsets.UTF_8) + } + return getStringExtra(name) +} diff --git a/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt b/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt index e1bd409788f..5bae212db80 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/AssistantLaunch.kt @@ -3,6 +3,7 @@ package ai.openclaw.app import android.content.Intent const val actionAskOpenClaw = "ai.openclaw.app.action.ASK_OPENCLAW" +const val actionOpenVoiceE2e = "ai.openclaw.app.debug.OPEN_VOICE_E2E" const val extraAssistantPrompt = "prompt" enum class HomeDestination { @@ -19,6 +20,14 @@ data class AssistantLaunchRequest( val autoSend: Boolean, ) +fun parseHomeDestinationIntent(intent: Intent?): HomeDestination? { + val action = intent?.action ?: return null + return when { + BuildConfig.DEBUG && action == actionOpenVoiceE2e -> HomeDestination.Voice + else -> null + } +} + fun parseAssistantLaunchIntent(intent: Intent?): AssistantLaunchRequest? { val action = intent?.action ?: return null return when (action) { diff --git a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt index 7eb3b3dc604..77c31eb8466 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt @@ -79,6 +79,10 @@ class MainActivity : ComponentActivity() { } private fun handleAssistantIntent(intent: android.content.Intent?) { + parseHomeDestinationIntent(intent)?.let { destination -> + viewModel.requestHomeDestination(destination) + return + } val request = parseAssistantLaunchIntent(intent) ?: return viewModel.handleAssistantLaunch(request) } diff --git a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt index 83882975d49..654fc542db5 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt @@ -330,6 +330,10 @@ class MainViewModel( _requestedHomeDestination.value = null } + fun requestHomeDestination(destination: HomeDestination) { + _requestedHomeDestination.value = destination + } + fun clearChatDraft() { _chatDraft.value = null } diff --git a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt index 22d2e238c8f..67134cb4d72 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt @@ -47,6 +47,7 @@ import ai.openclaw.app.protocol.OpenClawCanvasA2UIAction import ai.openclaw.app.voice.MicCaptureManager import ai.openclaw.app.voice.TalkModeManager import ai.openclaw.app.voice.VoiceConversationEntry +import ai.openclaw.app.voice.VoiceConversationRole import android.Manifest import android.content.Context import android.content.pm.PackageManager @@ -64,6 +65,7 @@ import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.combine import kotlinx.coroutines.flow.distinctUntilChanged import kotlinx.coroutines.launch +import kotlinx.coroutines.withTimeout import kotlinx.serialization.Serializable import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonArray @@ -256,6 +258,18 @@ class NodeRuntime( val previousFingerprintSha256: String? = null, ) + data class VoiceE2eSliceResult( + val mode: String, + val status: String, + val userText: String?, + val assistantText: String?, + ) + + data class VoiceE2eResult( + val normal: VoiceE2eSliceResult?, + val realtime: VoiceE2eSliceResult?, + ) + private val _isConnected = MutableStateFlow(false) val isConnected: StateFlow = _isConnected.asStateFlow() private val _nodeConnected = MutableStateFlow(false) @@ -1187,6 +1201,115 @@ class NodeRuntime( talkMode.setPlaybackEnabled(value) } + suspend fun runVoiceE2e( + mode: String, + transcript: String, + realtimeAssistantText: String, + timeoutMs: Long, + ): VoiceE2eResult { + if (!BuildConfig.DEBUG) { + throw IllegalStateException("voice e2e is debug-only") + } + if (!_isConnected.value) { + throw IllegalStateException("gateway not connected") + } + if (!hasRecordAudioPermission()) { + throw IllegalStateException("microphone permission missing") + } + + val normalizedMode = mode.trim().lowercase().ifEmpty { "both" } + val runNormal = normalizedMode == "both" || normalizedMode == "normal" || normalizedMode == "dictation" + val runRealtime = normalizedMode == "both" || normalizedMode == "realtime" || normalizedMode == "talk" + if (!runNormal && !runRealtime) { + throw IllegalArgumentException("unknown voice e2e mode: $mode") + } + + val previousSpeakerEnabled = speakerEnabled.value + setSpeakerEnabled(false) + var completed = false + return try { + VoiceE2eResult( + normal = + if (runNormal) { + runNormalVoiceE2e(transcript = transcript, timeoutMs = timeoutMs) + } else { + null + }, + realtime = + if (runRealtime) { + runRealtimeVoiceE2e( + transcript = transcript, + assistantText = realtimeAssistantText, + timeoutMs = timeoutMs, + ) + } else { + null + }, + ).also { completed = true } + } finally { + if (!completed) { + stopActiveVoiceSession() + } + setSpeakerEnabled(previousSpeakerEnabled) + } + } + + private suspend fun runNormalVoiceE2e( + transcript: String, + timeoutMs: Long, + ): VoiceE2eSliceResult { + stopActiveVoiceSession() + setVoiceCaptureMode(VoiceCaptureMode.ManualMic) + micCapture.submitTranscribedMessage(transcript) + awaitVoiceConversation(timeoutMs = timeoutMs) { + micCapture.conversation.value.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming } + } + val entries = micCapture.conversation.value + return VoiceE2eSliceResult( + mode = "normal", + status = micCapture.statusText.value, + userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text, + assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text, + ) + } + + private suspend fun runRealtimeVoiceE2e( + transcript: String, + assistantText: String, + timeoutMs: Long, + ): VoiceE2eSliceResult { + stopActiveVoiceSession() + setVoiceCaptureMode(VoiceCaptureMode.TalkMode) + talkMode.runE2eRealtimeTurn( + userText = transcript, + assistantText = assistantText, + timeoutMs = timeoutMs, + ) + awaitVoiceConversation(timeoutMs = timeoutMs) { + val entries = talkMode.conversation.value + entries.any { it.role == VoiceConversationRole.User && !it.isStreaming } && + entries.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming } + } + val entries = talkMode.conversation.value + return VoiceE2eSliceResult( + mode = "realtime", + status = talkMode.statusText.value, + userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text, + assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text, + ) + } + + private suspend fun awaitVoiceConversation( + timeoutMs: Long, + ready: () -> Boolean, + ) { + withTimeout(timeoutMs) { + while (!ready()) { + delay(100L) + } + } + } + private fun setVoiceCaptureMode( mode: VoiceCaptureMode, persistManualMic: Boolean = true, diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt index 52760c73861..d2ac0569c14 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt @@ -262,6 +262,11 @@ class MicCaptureManager( } } + internal fun submitTranscribedMessage(text: String) { + queueRecognizedMessage(text) + sendQueuedIfIdle() + } + fun handleGatewayEvent( event: String, payloadJson: String?, @@ -701,8 +706,7 @@ class MicCaptureManager( val text = obj["text"].asStringOrNull()?.trim().orEmpty() if (text.isNotEmpty()) { if (text != flushedPartialTranscript) { - queueRecognizedMessage(text) - sendQueuedIfIdle() + submitTranscribedMessage(text) } else { flushedPartialTranscript = null _liveTranscript.value = null diff --git a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt index dbf4d7c2dbe..bdd720e4b34 100644 --- a/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt @@ -480,6 +480,19 @@ class TalkModeManager internal constructor( pendingRunId = null } + internal suspend fun runE2eRealtimeTurn( + userText: String, + assistantText: String, + timeoutMs: Long, + ) { + if (!_isEnabled.value) { + setEnabled(true) + } + val sessionId = awaitRealtimeSessionId(timeoutMs) + handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "user", text = userText)) + handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "assistant", text = assistantText)) + } + fun setPlaybackEnabled(enabled: Boolean) { if (playbackEnabled == enabled) return playbackEnabled = enabled @@ -597,6 +610,19 @@ class TalkModeManager internal constructor( shutdownTextToSpeech() } + private suspend fun awaitRealtimeSessionId(timeoutMs: Long): String = + withTimeout(timeoutMs) { + while (true) { + realtimeSessionId?.let { return@withTimeout it } + val status = _statusText.value + if (!_isEnabled.value && status.startsWith("Talk failed")) { + throw IllegalStateException(status) + } + delay(100L) + } + error("unreachable") + } + private suspend fun startRealtimeRelay(generation: Long) { if (!isConnected()) { _statusText.value = "Gateway not connected" @@ -852,6 +878,19 @@ class TalkModeManager internal constructor( } } + private fun realtimeTranscriptPayload( + sessionId: String, + role: String, + text: String, + ): String = + buildJsonObject { + put("relaySessionId", JsonPrimitive(sessionId)) + put("type", JsonPrimitive("transcript")) + put("role", JsonPrimitive(role)) + put("text", JsonPrimitive(text)) + put("final", JsonPrimitive(true)) + }.toString() + private fun playRealtimeAudio(bytes: ByteArray) { if (!playbackEnabled || realtimeOutputSuppressed || bytes.isEmpty()) return val queue = ensureRealtimeAudioQueue() diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt index c6c63ab6aab..d23ac4c4464 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt @@ -100,6 +100,40 @@ class MicCaptureManagerTest { assertEquals(emptyList(), manager.conversation.value) } + @Test + @OptIn(ExperimentalCoroutinesApi::class) + fun submittedTranscribedMessageUsesGatewayTurnPath() = + runTest { + val sentMessages = mutableListOf() + val manager = + createManager( + scope = this, + sendToGateway = { message, onRunIdKnown -> + sentMessages += message + onRunIdKnown("run-voice-e2e") + "run-voice-e2e" + }, + ) + + manager.onGatewayConnectionChanged(true) + manager.submitTranscribedMessage("voice e2e message") + runCurrent() + manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-voice-e2e", text = "voice e2e reply")) + advanceUntilIdle() + + assertEquals(listOf("voice e2e message"), sentMessages) + assertEquals( + listOf(VoiceConversationRole.User, VoiceConversationRole.Assistant), + manager.conversation.value.map { it.role }, + ) + assertEquals( + "voice e2e reply", + manager.conversation.value + .last() + .text, + ) + } + @Test fun pcm16FramesAreEncodedAsPcmuFrames() { val manager = createManager() diff --git a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt index 8b9fbd07042..2bbaae092e9 100644 --- a/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt +++ b/apps/android/app/src/test/java/ai/openclaw/app/voice/TalkModeManagerTest.kt @@ -11,6 +11,7 @@ import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.Job import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.launch import kotlinx.coroutines.test.advanceUntilIdle import kotlinx.coroutines.test.currentTime @@ -327,6 +328,28 @@ class TalkModeManagerTest { assertTrue(entries.none { it.isStreaming }) } + @Test + fun e2eRealtimeTurnUsesRelayTranscriptPath() = + runTest { + val manager = createManager(scope = this) + + setPrivateField(manager, "realtimeSessionId", "relay-1") + setMutableStateFlow(manager, "_isEnabled", true) + manager.runE2eRealtimeTurn( + userText = "voice e2e user", + assistantText = "voice e2e assistant", + timeoutMs = 1_000L, + ) + + val entries = manager.conversation.value + assertEquals(2, entries.size) + assertEquals(VoiceConversationRole.User, entries[0].role) + assertEquals("voice e2e user", entries[0].text) + assertEquals(VoiceConversationRole.Assistant, entries[1].role) + assertEquals("voice e2e assistant", entries[1].text) + assertTrue(entries.none { it.isStreaming }) + } + @Test @OptIn(ExperimentalCoroutinesApi::class) fun realtimeStartWithoutGatewayTurnsTalkOff() = @@ -483,6 +506,15 @@ class TalkModeManagerTest { return field.get(target) } + @Suppress("UNCHECKED_CAST") + private fun setMutableStateFlow( + target: Any, + name: String, + value: T, + ) { + (readPrivateField(target, name) as MutableStateFlow).value = value + } + private fun shouldAppendRealtimeCapturedFrame( manager: TalkModeManager, length: Int, diff --git a/apps/android/scripts/voice-e2e.sh b/apps/android/scripts/voice-e2e.sh new file mode 100755 index 00000000000..3d02875429d --- /dev/null +++ b/apps/android/scripts/voice-e2e.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)" +ANDROID_DIR="$ROOT_DIR/apps/android" +PACKAGE_NAME="ai.openclaw.app" +RECEIVER="$PACKAGE_NAME/.VoiceE2eReceiver" +RUN_ACTION="ai.openclaw.app.debug.RUN_VOICE_E2E" +OPEN_ACTION="ai.openclaw.app.debug.OPEN_VOICE_E2E" +PORT=18789 +HOST="127.0.0.1" +MODE="both" +TRANSCRIPT="Reply exactly: Android voice e2e normal path ok." +REALTIME_ASSISTANT="Android realtime voice e2e relay path ok." +TIMEOUT_MS=60000 +INSTALL=1 +CONNECT=1 +CLEANUP=0 +START_GATEWAY=0 + +usage() { + cat <<'USAGE' +Usage: apps/android/scripts/voice-e2e.sh [options] + +Options: + --mode normal|realtime|both Voice path to test. Default: both. + --transcript TEXT Synthetic user transcript for the voice turn. + --realtime-assistant TEXT Synthetic realtime assistant relay text. + --host HOST Gateway host visible from Android. Default: 127.0.0.1. + --port PORT Gateway port. Default: 18789. + --timeout-ms MS Per-mode timeout. Default: 60000. + --skip-install Reuse the installed debug app. + --no-connect Do not rewrite manual gateway settings. + --start-gateway Start a temporary local gateway with bws_get_secret. + --cleanup Stop voice capture after screenshots. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --mode) + MODE="$2" + shift 2 + ;; + --transcript) + TRANSCRIPT="$2" + shift 2 + ;; + --realtime-assistant) + REALTIME_ASSISTANT="$2" + shift 2 + ;; + --host) + HOST="$2" + shift 2 + ;; + --port) + PORT="$2" + shift 2 + ;; + --timeout-ms) + TIMEOUT_MS="$2" + shift 2 + ;; + --skip-install) + INSTALL=0 + shift + ;; + --no-connect) + CONNECT=0 + shift + ;; + --start-gateway) + START_GATEWAY=1 + shift + ;; + --cleanup) + CLEANUP=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "unknown option: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +export JAVA_HOME="${JAVA_HOME:-/opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home}" +export ANDROID_HOME="${ANDROID_HOME:-/opt/homebrew/share/android-commandlinetools}" +export ANDROID_SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}" +export PATH="/opt/homebrew/opt/openjdk@17/bin:$ANDROID_HOME/platform-tools:$ANDROID_HOME/cmdline-tools/latest/bin:$PATH" + +ARTIFACT_DIR="/tmp/openclaw-android-voice-e2e-$(date +%Y%m%d-%H%M%S)" +mkdir -p "$ARTIFACT_DIR" + +cleanup_gateway() { + if [[ -n "${GATEWAY_PID:-}" ]]; then + kill "$GATEWAY_PID" >/dev/null 2>&1 || true + fi +} +trap cleanup_gateway EXIT + +if ! adb devices -l | awk 'NR > 1 && $2 == "device" { found = 1 } END { exit(found ? 0 : 1) }'; then + echo "no authorized Android device found" >&2 + adb devices -l >&2 + exit 1 +fi + +adb reverse "tcp:$PORT" "tcp:$PORT" >/dev/null + +if [[ "$START_GATEWAY" -eq 1 ]]; then + if command -v bws_get_secret >/dev/null 2>&1; then + OPENCLAW_OPENAI_API_KEY="$(bws_get_secret OPENCLAW_OPENAI_API_KEY)" + else + OPENCLAW_OPENAI_API_KEY="$(zsh -ic 'bws_get_secret OPENCLAW_OPENAI_API_KEY')" + fi + ( + cd "$ROOT_DIR" + OPENAI_API_KEY="$OPENCLAW_OPENAI_API_KEY" \ + pnpm openclaw gateway run \ + --port "$PORT" \ + --auth none \ + --bind loopback \ + --force \ + --allow-unconfigured \ + --ws-log compact + ) >"$ARTIFACT_DIR/gateway.log" 2>&1 & + GATEWAY_PID=$! + sleep 4 + if ! kill -0 "$GATEWAY_PID" >/dev/null 2>&1; then + cat "$ARTIFACT_DIR/gateway.log" >&2 + exit 1 + fi + unset OPENCLAW_OPENAI_API_KEY +fi + +if [[ "$INSTALL" -eq 1 ]]; then + (cd "$ANDROID_DIR" && ./gradlew :app:installPlayDebug) +fi + +adb shell pm grant "$PACKAGE_NAME" android.permission.RECORD_AUDIO >/dev/null 2>&1 || true +adb shell am force-stop "$PACKAGE_NAME" >/dev/null +adb shell am start -a "$OPEN_ACTION" -n "$PACKAGE_NAME/.MainActivity" >/dev/null +adb logcat -c + +run_mode() { + local test_mode="$1" + local result_name="$ARTIFACT_DIR/result-$test_mode.json" + local screenshot_name="$ARTIFACT_DIR/screen-$test_mode.png" + local transcript_base64 + local realtime_assistant_base64 + transcript_base64="$(printf '%s' "$TRANSCRIPT" | base64 | tr -d '\n')" + realtime_assistant_base64="$(printf '%s' "$REALTIME_ASSISTANT" | base64 | tr -d '\n')" + + adb shell run-as "$PACKAGE_NAME" rm -f cache/voice_e2e_result.json >/dev/null 2>&1 || true + local no_connect_flag=true + if [[ "$CONNECT" -eq 1 ]]; then + no_connect_flag=false + fi + + adb shell am broadcast \ + -a "$RUN_ACTION" \ + -n "$RECEIVER" \ + --es mode "$test_mode" \ + --ez noConnect "$no_connect_flag" \ + --es host "$HOST" \ + --ei port "$PORT" \ + --ez tls false \ + --el timeoutMs "$TIMEOUT_MS" \ + --el connectTimeoutMs "$TIMEOUT_MS" \ + --es transcriptBase64 "$transcript_base64" \ + --es realtimeAssistantBase64 "$realtime_assistant_base64" >/dev/null + + local deadline=$((SECONDS + TIMEOUT_MS / 1000 + 20)) + local result="" + while [[ "$SECONDS" -lt "$deadline" ]]; do + result="$(adb shell run-as "$PACKAGE_NAME" cat cache/voice_e2e_result.json 2>/dev/null | tr -d '\r' || true)" + if [[ -n "$result" ]]; then + break + fi + sleep 1 + done + + if [[ -z "$result" ]]; then + echo "voice e2e $test_mode timed out waiting for result" >&2 + exit 1 + fi + printf '%s\n' "$result" >"$result_name" + adb exec-out screencap -p >"$screenshot_name" + if ! grep -q '"ok":true' "$result_name"; then + echo "voice e2e $test_mode failed: $result" >&2 + exit 1 + fi +} + +case "$MODE" in + both) + run_mode normal + run_mode realtime + ;; + normal|dictation) + run_mode normal + ;; + realtime|talk) + run_mode realtime + ;; + *) + echo "unknown mode: $MODE" >&2 + exit 2 + ;; +esac + +adb logcat -d -v time | + rg -i 'OpenClaw|TalkMode|MicCapture|AudioRecord|SpeechRecognizer|realtime|talk.session|appendAudio|transcript|Talk failed|Transcription failed|Speech network|VoiceE2E' | + tail -250 >"$ARTIFACT_DIR/logcat.txt" || true + +if [[ "$CLEANUP" -eq 1 ]]; then + adb shell am broadcast -a "$RUN_ACTION" -n "$RECEIVER" --es mode stop >/dev/null +fi + +echo "$ARTIFACT_DIR"