test(android): add voice mode adb e2e harness

This commit is contained in:
Ayaan Zaidi
2026-05-24 22:31:11 +05:30
parent 3db1508f1e
commit e52a3b31e4
11 changed files with 679 additions and 2 deletions

View File

@@ -0,0 +1,14 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<application>
<receiver
android:name=".VoiceE2eReceiver"
android:exported="true">
<intent-filter>
<action android:name="ai.openclaw.app.debug.RUN_VOICE_E2E" />
</intent-filter>
</receiver>
<service
android:name=".VoiceE2eService"
android:exported="false" />
</application>
</manifest>

View File

@@ -0,0 +1,188 @@
package ai.openclaw.app
import android.app.Service
import android.content.BroadcastReceiver
import android.content.Context
import android.content.Intent
import android.os.IBinder
import android.util.Base64
import android.util.Log
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import kotlinx.coroutines.withTimeout
import kotlinx.serialization.json.JsonNull
import kotlinx.serialization.json.JsonPrimitive
import kotlinx.serialization.json.buildJsonObject
import java.io.File
private const val tag = "VoiceE2E"
private const val resultFileName = "voice_e2e_result.json"
class VoiceE2eReceiver : BroadcastReceiver() {
override fun onReceive(
context: Context,
intent: Intent,
) {
context.startService(
Intent(context, VoiceE2eService::class.java)
.putExtras(intent),
)
}
}
class VoiceE2eService : Service() {
private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
override fun onBind(intent: Intent?): IBinder? = null
override fun onStartCommand(
intent: Intent?,
flags: Int,
startId: Int,
): Int {
val command = intent ?: return START_NOT_STICKY
serviceScope.launch {
try {
runCommand(command)
} finally {
stopSelf(startId)
}
}
return START_NOT_STICKY
}
override fun onDestroy() {
serviceScope.cancel()
super.onDestroy()
}
private suspend fun runCommand(intent: Intent) {
try {
val app = applicationContext as NodeApp
val runtime = app.ensureRuntime()
val mode =
intent
.getDecodedStringExtra("mode")
?.trim()
.orEmpty()
.ifEmpty { "both" }
if (mode == "stop") {
runtime.cancelMicCapture()
runtime.setTalkModeEnabled(false)
writeResult("""{"ok":true,"mode":"stop"}""")
return
}
val connect = !intent.getBooleanExtra("noConnect", false)
val connectTimeoutMs = intent.getLongExtra("connectTimeoutMs", 20_000L)
if (connect) {
configureGateway(runtime = runtime, intent = intent)
}
if (connect || !runtime.isConnected.value) {
awaitGateway(runtime = runtime, timeoutMs = connectTimeoutMs)
}
startActivity(
Intent(actionOpenVoiceE2e)
.setClass(this, MainActivity::class.java)
.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP),
)
val transcript =
intent
.getDecodedStringExtra("transcript")
?.trim()
.orEmpty()
.ifEmpty { "Reply exactly: Android voice e2e normal path ok." }
val realtimeReply =
intent
.getDecodedStringExtra("realtimeAssistant")
?.trim()
.orEmpty()
.ifEmpty { "Android realtime voice e2e relay path ok." }
val timeoutMs = intent.getLongExtra("timeoutMs", 60_000L)
val result =
runtime.runVoiceE2e(
mode = mode,
transcript = transcript,
realtimeAssistantText = realtimeReply,
timeoutMs = timeoutMs,
)
val resultJson = encodeResult(result)
writeResult(resultJson)
Log.i(tag, "PASS $resultJson")
} catch (err: Throwable) {
val resultJson =
buildJsonObject {
put("ok", JsonPrimitive(false))
put("error", JsonPrimitive(err.message ?: err::class.java.simpleName))
}.toString()
writeResult(resultJson)
Log.e(tag, "FAIL $resultJson", err)
}
}
private fun configureGateway(
runtime: NodeRuntime,
intent: Intent,
) {
val host =
intent
.getDecodedStringExtra("host")
?.trim()
.orEmpty()
.ifEmpty { "127.0.0.1" }
val port = intent.getIntExtra("port", 18789)
runtime.setManualEnabled(true)
runtime.setManualHost(host)
runtime.setManualPort(port)
runtime.setManualTls(intent.getBooleanExtra("tls", false))
runtime.setGatewayToken(intent.getDecodedStringExtra("token").orEmpty())
runtime.setGatewayBootstrapToken(intent.getDecodedStringExtra("bootstrapToken").orEmpty())
runtime.setGatewayPassword(intent.getDecodedStringExtra("password").orEmpty())
runtime.setOnboardingCompleted(true)
runtime.connectManual()
}
private suspend fun awaitGateway(
runtime: NodeRuntime,
timeoutMs: Long,
) {
withTimeout(timeoutMs) {
while (!runtime.isConnected.value) {
delay(100L)
}
}
}
private fun encodeResult(result: NodeRuntime.VoiceE2eResult): String =
buildJsonObject {
put("ok", JsonPrimitive(true))
put("normal", result.normal?.let(::encodeSlice) ?: JsonNull)
put("realtime", result.realtime?.let(::encodeSlice) ?: JsonNull)
}.toString()
private fun encodeSlice(slice: NodeRuntime.VoiceE2eSliceResult) =
buildJsonObject {
put("mode", JsonPrimitive(slice.mode))
put("status", JsonPrimitive(slice.status))
put("userText", slice.userText?.let(::JsonPrimitive) ?: JsonNull)
put("assistantText", slice.assistantText?.let(::JsonPrimitive) ?: JsonNull)
}
private fun writeResult(json: String) {
File(cacheDir, resultFileName).writeText(json)
}
}
private fun Intent.getDecodedStringExtra(name: String): String? {
val encoded = getStringExtra("${name}Base64")
if (!encoded.isNullOrBlank()) {
return String(Base64.decode(encoded, Base64.NO_WRAP), Charsets.UTF_8)
}
return getStringExtra(name)
}

View File

@@ -3,6 +3,7 @@ package ai.openclaw.app
import android.content.Intent
const val actionAskOpenClaw = "ai.openclaw.app.action.ASK_OPENCLAW"
const val actionOpenVoiceE2e = "ai.openclaw.app.debug.OPEN_VOICE_E2E"
const val extraAssistantPrompt = "prompt"
enum class HomeDestination {
@@ -19,6 +20,14 @@ data class AssistantLaunchRequest(
val autoSend: Boolean,
)
fun parseHomeDestinationIntent(intent: Intent?): HomeDestination? {
val action = intent?.action ?: return null
return when {
BuildConfig.DEBUG && action == actionOpenVoiceE2e -> HomeDestination.Voice
else -> null
}
}
fun parseAssistantLaunchIntent(intent: Intent?): AssistantLaunchRequest? {
val action = intent?.action ?: return null
return when (action) {

View File

@@ -79,6 +79,10 @@ class MainActivity : ComponentActivity() {
}
private fun handleAssistantIntent(intent: android.content.Intent?) {
parseHomeDestinationIntent(intent)?.let { destination ->
viewModel.requestHomeDestination(destination)
return
}
val request = parseAssistantLaunchIntent(intent) ?: return
viewModel.handleAssistantLaunch(request)
}

View File

@@ -330,6 +330,10 @@ class MainViewModel(
_requestedHomeDestination.value = null
}
fun requestHomeDestination(destination: HomeDestination) {
_requestedHomeDestination.value = destination
}
fun clearChatDraft() {
_chatDraft.value = null
}

View File

@@ -47,6 +47,7 @@ import ai.openclaw.app.protocol.OpenClawCanvasA2UIAction
import ai.openclaw.app.voice.MicCaptureManager
import ai.openclaw.app.voice.TalkModeManager
import ai.openclaw.app.voice.VoiceConversationEntry
import ai.openclaw.app.voice.VoiceConversationRole
import android.Manifest
import android.content.Context
import android.content.pm.PackageManager
@@ -64,6 +65,7 @@ import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.combine
import kotlinx.coroutines.flow.distinctUntilChanged
import kotlinx.coroutines.launch
import kotlinx.coroutines.withTimeout
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonArray
@@ -256,6 +258,18 @@ class NodeRuntime(
val previousFingerprintSha256: String? = null,
)
data class VoiceE2eSliceResult(
val mode: String,
val status: String,
val userText: String?,
val assistantText: String?,
)
data class VoiceE2eResult(
val normal: VoiceE2eSliceResult?,
val realtime: VoiceE2eSliceResult?,
)
private val _isConnected = MutableStateFlow(false)
val isConnected: StateFlow<Boolean> = _isConnected.asStateFlow()
private val _nodeConnected = MutableStateFlow(false)
@@ -1187,6 +1201,115 @@ class NodeRuntime(
talkMode.setPlaybackEnabled(value)
}
suspend fun runVoiceE2e(
mode: String,
transcript: String,
realtimeAssistantText: String,
timeoutMs: Long,
): VoiceE2eResult {
if (!BuildConfig.DEBUG) {
throw IllegalStateException("voice e2e is debug-only")
}
if (!_isConnected.value) {
throw IllegalStateException("gateway not connected")
}
if (!hasRecordAudioPermission()) {
throw IllegalStateException("microphone permission missing")
}
val normalizedMode = mode.trim().lowercase().ifEmpty { "both" }
val runNormal = normalizedMode == "both" || normalizedMode == "normal" || normalizedMode == "dictation"
val runRealtime = normalizedMode == "both" || normalizedMode == "realtime" || normalizedMode == "talk"
if (!runNormal && !runRealtime) {
throw IllegalArgumentException("unknown voice e2e mode: $mode")
}
val previousSpeakerEnabled = speakerEnabled.value
setSpeakerEnabled(false)
var completed = false
return try {
VoiceE2eResult(
normal =
if (runNormal) {
runNormalVoiceE2e(transcript = transcript, timeoutMs = timeoutMs)
} else {
null
},
realtime =
if (runRealtime) {
runRealtimeVoiceE2e(
transcript = transcript,
assistantText = realtimeAssistantText,
timeoutMs = timeoutMs,
)
} else {
null
},
).also { completed = true }
} finally {
if (!completed) {
stopActiveVoiceSession()
}
setSpeakerEnabled(previousSpeakerEnabled)
}
}
private suspend fun runNormalVoiceE2e(
transcript: String,
timeoutMs: Long,
): VoiceE2eSliceResult {
stopActiveVoiceSession()
setVoiceCaptureMode(VoiceCaptureMode.ManualMic)
micCapture.submitTranscribedMessage(transcript)
awaitVoiceConversation(timeoutMs = timeoutMs) {
micCapture.conversation.value.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
}
val entries = micCapture.conversation.value
return VoiceE2eSliceResult(
mode = "normal",
status = micCapture.statusText.value,
userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
)
}
private suspend fun runRealtimeVoiceE2e(
transcript: String,
assistantText: String,
timeoutMs: Long,
): VoiceE2eSliceResult {
stopActiveVoiceSession()
setVoiceCaptureMode(VoiceCaptureMode.TalkMode)
talkMode.runE2eRealtimeTurn(
userText = transcript,
assistantText = assistantText,
timeoutMs = timeoutMs,
)
awaitVoiceConversation(timeoutMs = timeoutMs) {
val entries = talkMode.conversation.value
entries.any { it.role == VoiceConversationRole.User && !it.isStreaming } &&
entries.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
}
val entries = talkMode.conversation.value
return VoiceE2eSliceResult(
mode = "realtime",
status = talkMode.statusText.value,
userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
)
}
private suspend fun awaitVoiceConversation(
timeoutMs: Long,
ready: () -> Boolean,
) {
withTimeout(timeoutMs) {
while (!ready()) {
delay(100L)
}
}
}
private fun setVoiceCaptureMode(
mode: VoiceCaptureMode,
persistManualMic: Boolean = true,

View File

@@ -262,6 +262,11 @@ class MicCaptureManager(
}
}
internal fun submitTranscribedMessage(text: String) {
queueRecognizedMessage(text)
sendQueuedIfIdle()
}
fun handleGatewayEvent(
event: String,
payloadJson: String?,
@@ -701,8 +706,7 @@ class MicCaptureManager(
val text = obj["text"].asStringOrNull()?.trim().orEmpty()
if (text.isNotEmpty()) {
if (text != flushedPartialTranscript) {
queueRecognizedMessage(text)
sendQueuedIfIdle()
submitTranscribedMessage(text)
} else {
flushedPartialTranscript = null
_liveTranscript.value = null

View File

@@ -480,6 +480,19 @@ class TalkModeManager internal constructor(
pendingRunId = null
}
internal suspend fun runE2eRealtimeTurn(
userText: String,
assistantText: String,
timeoutMs: Long,
) {
if (!_isEnabled.value) {
setEnabled(true)
}
val sessionId = awaitRealtimeSessionId(timeoutMs)
handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "user", text = userText))
handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "assistant", text = assistantText))
}
fun setPlaybackEnabled(enabled: Boolean) {
if (playbackEnabled == enabled) return
playbackEnabled = enabled
@@ -597,6 +610,19 @@ class TalkModeManager internal constructor(
shutdownTextToSpeech()
}
private suspend fun awaitRealtimeSessionId(timeoutMs: Long): String =
withTimeout(timeoutMs) {
while (true) {
realtimeSessionId?.let { return@withTimeout it }
val status = _statusText.value
if (!_isEnabled.value && status.startsWith("Talk failed")) {
throw IllegalStateException(status)
}
delay(100L)
}
error("unreachable")
}
private suspend fun startRealtimeRelay(generation: Long) {
if (!isConnected()) {
_statusText.value = "Gateway not connected"
@@ -852,6 +878,19 @@ class TalkModeManager internal constructor(
}
}
private fun realtimeTranscriptPayload(
sessionId: String,
role: String,
text: String,
): String =
buildJsonObject {
put("relaySessionId", JsonPrimitive(sessionId))
put("type", JsonPrimitive("transcript"))
put("role", JsonPrimitive(role))
put("text", JsonPrimitive(text))
put("final", JsonPrimitive(true))
}.toString()
private fun playRealtimeAudio(bytes: ByteArray) {
if (!playbackEnabled || realtimeOutputSuppressed || bytes.isEmpty()) return
val queue = ensureRealtimeAudioQueue()

View File

@@ -100,6 +100,40 @@ class MicCaptureManagerTest {
assertEquals(emptyList<VoiceConversationEntry>(), manager.conversation.value)
}
@Test
@OptIn(ExperimentalCoroutinesApi::class)
fun submittedTranscribedMessageUsesGatewayTurnPath() =
runTest {
val sentMessages = mutableListOf<String>()
val manager =
createManager(
scope = this,
sendToGateway = { message, onRunIdKnown ->
sentMessages += message
onRunIdKnown("run-voice-e2e")
"run-voice-e2e"
},
)
manager.onGatewayConnectionChanged(true)
manager.submitTranscribedMessage("voice e2e message")
runCurrent()
manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-voice-e2e", text = "voice e2e reply"))
advanceUntilIdle()
assertEquals(listOf("voice e2e message"), sentMessages)
assertEquals(
listOf(VoiceConversationRole.User, VoiceConversationRole.Assistant),
manager.conversation.value.map { it.role },
)
assertEquals(
"voice e2e reply",
manager.conversation.value
.last()
.text,
)
}
@Test
fun pcm16FramesAreEncodedAsPcmuFrames() {
val manager = createManager()

View File

@@ -11,6 +11,7 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.launch
import kotlinx.coroutines.test.advanceUntilIdle
import kotlinx.coroutines.test.currentTime
@@ -327,6 +328,28 @@ class TalkModeManagerTest {
assertTrue(entries.none { it.isStreaming })
}
@Test
fun e2eRealtimeTurnUsesRelayTranscriptPath() =
runTest {
val manager = createManager(scope = this)
setPrivateField(manager, "realtimeSessionId", "relay-1")
setMutableStateFlow(manager, "_isEnabled", true)
manager.runE2eRealtimeTurn(
userText = "voice e2e user",
assistantText = "voice e2e assistant",
timeoutMs = 1_000L,
)
val entries = manager.conversation.value
assertEquals(2, entries.size)
assertEquals(VoiceConversationRole.User, entries[0].role)
assertEquals("voice e2e user", entries[0].text)
assertEquals(VoiceConversationRole.Assistant, entries[1].role)
assertEquals("voice e2e assistant", entries[1].text)
assertTrue(entries.none { it.isStreaming })
}
@Test
@OptIn(ExperimentalCoroutinesApi::class)
fun realtimeStartWithoutGatewayTurnsTalkOff() =
@@ -483,6 +506,15 @@ class TalkModeManagerTest {
return field.get(target)
}
@Suppress("UNCHECKED_CAST")
private fun <T> setMutableStateFlow(
target: Any,
name: String,
value: T,
) {
(readPrivateField(target, name) as MutableStateFlow<T>).value = value
}
private fun shouldAppendRealtimeCapturedFrame(
manager: TalkModeManager,
length: Int,

226
apps/android/scripts/voice-e2e.sh Executable file
View File

@@ -0,0 +1,226 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
ANDROID_DIR="$ROOT_DIR/apps/android"
PACKAGE_NAME="ai.openclaw.app"
RECEIVER="$PACKAGE_NAME/.VoiceE2eReceiver"
RUN_ACTION="ai.openclaw.app.debug.RUN_VOICE_E2E"
OPEN_ACTION="ai.openclaw.app.debug.OPEN_VOICE_E2E"
PORT=18789
HOST="127.0.0.1"
MODE="both"
TRANSCRIPT="Reply exactly: Android voice e2e normal path ok."
REALTIME_ASSISTANT="Android realtime voice e2e relay path ok."
TIMEOUT_MS=60000
INSTALL=1
CONNECT=1
CLEANUP=0
START_GATEWAY=0
usage() {
cat <<'USAGE'
Usage: apps/android/scripts/voice-e2e.sh [options]
Options:
--mode normal|realtime|both Voice path to test. Default: both.
--transcript TEXT Synthetic user transcript for the voice turn.
--realtime-assistant TEXT Synthetic realtime assistant relay text.
--host HOST Gateway host visible from Android. Default: 127.0.0.1.
--port PORT Gateway port. Default: 18789.
--timeout-ms MS Per-mode timeout. Default: 60000.
--skip-install Reuse the installed debug app.
--no-connect Do not rewrite manual gateway settings.
--start-gateway Start a temporary local gateway with bws_get_secret.
--cleanup Stop voice capture after screenshots.
USAGE
}
while [[ $# -gt 0 ]]; do
case "$1" in
--mode)
MODE="$2"
shift 2
;;
--transcript)
TRANSCRIPT="$2"
shift 2
;;
--realtime-assistant)
REALTIME_ASSISTANT="$2"
shift 2
;;
--host)
HOST="$2"
shift 2
;;
--port)
PORT="$2"
shift 2
;;
--timeout-ms)
TIMEOUT_MS="$2"
shift 2
;;
--skip-install)
INSTALL=0
shift
;;
--no-connect)
CONNECT=0
shift
;;
--start-gateway)
START_GATEWAY=1
shift
;;
--cleanup)
CLEANUP=1
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "unknown option: $1" >&2
usage >&2
exit 2
;;
esac
done
export JAVA_HOME="${JAVA_HOME:-/opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home}"
export ANDROID_HOME="${ANDROID_HOME:-/opt/homebrew/share/android-commandlinetools}"
export ANDROID_SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}"
export PATH="/opt/homebrew/opt/openjdk@17/bin:$ANDROID_HOME/platform-tools:$ANDROID_HOME/cmdline-tools/latest/bin:$PATH"
ARTIFACT_DIR="/tmp/openclaw-android-voice-e2e-$(date +%Y%m%d-%H%M%S)"
mkdir -p "$ARTIFACT_DIR"
cleanup_gateway() {
if [[ -n "${GATEWAY_PID:-}" ]]; then
kill "$GATEWAY_PID" >/dev/null 2>&1 || true
fi
}
trap cleanup_gateway EXIT
if ! adb devices -l | awk 'NR > 1 && $2 == "device" { found = 1 } END { exit(found ? 0 : 1) }'; then
echo "no authorized Android device found" >&2
adb devices -l >&2
exit 1
fi
adb reverse "tcp:$PORT" "tcp:$PORT" >/dev/null
if [[ "$START_GATEWAY" -eq 1 ]]; then
if command -v bws_get_secret >/dev/null 2>&1; then
OPENCLAW_OPENAI_API_KEY="$(bws_get_secret OPENCLAW_OPENAI_API_KEY)"
else
OPENCLAW_OPENAI_API_KEY="$(zsh -ic 'bws_get_secret OPENCLAW_OPENAI_API_KEY')"
fi
(
cd "$ROOT_DIR"
OPENAI_API_KEY="$OPENCLAW_OPENAI_API_KEY" \
pnpm openclaw gateway run \
--port "$PORT" \
--auth none \
--bind loopback \
--force \
--allow-unconfigured \
--ws-log compact
) >"$ARTIFACT_DIR/gateway.log" 2>&1 &
GATEWAY_PID=$!
sleep 4
if ! kill -0 "$GATEWAY_PID" >/dev/null 2>&1; then
cat "$ARTIFACT_DIR/gateway.log" >&2
exit 1
fi
unset OPENCLAW_OPENAI_API_KEY
fi
if [[ "$INSTALL" -eq 1 ]]; then
(cd "$ANDROID_DIR" && ./gradlew :app:installPlayDebug)
fi
adb shell pm grant "$PACKAGE_NAME" android.permission.RECORD_AUDIO >/dev/null 2>&1 || true
adb shell am force-stop "$PACKAGE_NAME" >/dev/null
adb shell am start -a "$OPEN_ACTION" -n "$PACKAGE_NAME/.MainActivity" >/dev/null
adb logcat -c
run_mode() {
local test_mode="$1"
local result_name="$ARTIFACT_DIR/result-$test_mode.json"
local screenshot_name="$ARTIFACT_DIR/screen-$test_mode.png"
local transcript_base64
local realtime_assistant_base64
transcript_base64="$(printf '%s' "$TRANSCRIPT" | base64 | tr -d '\n')"
realtime_assistant_base64="$(printf '%s' "$REALTIME_ASSISTANT" | base64 | tr -d '\n')"
adb shell run-as "$PACKAGE_NAME" rm -f cache/voice_e2e_result.json >/dev/null 2>&1 || true
local no_connect_flag=true
if [[ "$CONNECT" -eq 1 ]]; then
no_connect_flag=false
fi
adb shell am broadcast \
-a "$RUN_ACTION" \
-n "$RECEIVER" \
--es mode "$test_mode" \
--ez noConnect "$no_connect_flag" \
--es host "$HOST" \
--ei port "$PORT" \
--ez tls false \
--el timeoutMs "$TIMEOUT_MS" \
--el connectTimeoutMs "$TIMEOUT_MS" \
--es transcriptBase64 "$transcript_base64" \
--es realtimeAssistantBase64 "$realtime_assistant_base64" >/dev/null
local deadline=$((SECONDS + TIMEOUT_MS / 1000 + 20))
local result=""
while [[ "$SECONDS" -lt "$deadline" ]]; do
result="$(adb shell run-as "$PACKAGE_NAME" cat cache/voice_e2e_result.json 2>/dev/null | tr -d '\r' || true)"
if [[ -n "$result" ]]; then
break
fi
sleep 1
done
if [[ -z "$result" ]]; then
echo "voice e2e $test_mode timed out waiting for result" >&2
exit 1
fi
printf '%s\n' "$result" >"$result_name"
adb exec-out screencap -p >"$screenshot_name"
if ! grep -q '"ok":true' "$result_name"; then
echo "voice e2e $test_mode failed: $result" >&2
exit 1
fi
}
case "$MODE" in
both)
run_mode normal
run_mode realtime
;;
normal|dictation)
run_mode normal
;;
realtime|talk)
run_mode realtime
;;
*)
echo "unknown mode: $MODE" >&2
exit 2
;;
esac
adb logcat -d -v time |
rg -i 'OpenClaw|TalkMode|MicCapture|AudioRecord|SpeechRecognizer|realtime|talk.session|appendAudio|transcript|Talk failed|Transcription failed|Speech network|VoiceE2E' |
tail -250 >"$ARTIFACT_DIR/logcat.txt" || true
if [[ "$CLEANUP" -eq 1 ]]; then
adb shell am broadcast -a "$RUN_ACTION" -n "$RECEIVER" --es mode stop >/dev/null
fi
echo "$ARTIFACT_DIR"