mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-28 06:06:17 +00:00
test(android): add voice mode adb e2e harness
This commit is contained in:
14
apps/android/app/src/debug/AndroidManifest.xml
Normal file
14
apps/android/app/src/debug/AndroidManifest.xml
Normal file
@@ -0,0 +1,14 @@
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<application>
|
||||
<receiver
|
||||
android:name=".VoiceE2eReceiver"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="ai.openclaw.app.debug.RUN_VOICE_E2E" />
|
||||
</intent-filter>
|
||||
</receiver>
|
||||
<service
|
||||
android:name=".VoiceE2eService"
|
||||
android:exported="false" />
|
||||
</application>
|
||||
</manifest>
|
||||
@@ -0,0 +1,188 @@
|
||||
package ai.openclaw.app
|
||||
|
||||
import android.app.Service
|
||||
import android.content.BroadcastReceiver
|
||||
import android.content.Context
|
||||
import android.content.Intent
|
||||
import android.os.IBinder
|
||||
import android.util.Base64
|
||||
import android.util.Log
|
||||
import kotlinx.coroutines.CoroutineScope
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.cancel
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withTimeout
|
||||
import kotlinx.serialization.json.JsonNull
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import kotlinx.serialization.json.buildJsonObject
|
||||
import java.io.File
|
||||
|
||||
private const val tag = "VoiceE2E"
|
||||
private const val resultFileName = "voice_e2e_result.json"
|
||||
|
||||
class VoiceE2eReceiver : BroadcastReceiver() {
|
||||
override fun onReceive(
|
||||
context: Context,
|
||||
intent: Intent,
|
||||
) {
|
||||
context.startService(
|
||||
Intent(context, VoiceE2eService::class.java)
|
||||
.putExtras(intent),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class VoiceE2eService : Service() {
|
||||
private val serviceScope = CoroutineScope(SupervisorJob() + Dispatchers.IO)
|
||||
|
||||
override fun onBind(intent: Intent?): IBinder? = null
|
||||
|
||||
override fun onStartCommand(
|
||||
intent: Intent?,
|
||||
flags: Int,
|
||||
startId: Int,
|
||||
): Int {
|
||||
val command = intent ?: return START_NOT_STICKY
|
||||
serviceScope.launch {
|
||||
try {
|
||||
runCommand(command)
|
||||
} finally {
|
||||
stopSelf(startId)
|
||||
}
|
||||
}
|
||||
return START_NOT_STICKY
|
||||
}
|
||||
|
||||
override fun onDestroy() {
|
||||
serviceScope.cancel()
|
||||
super.onDestroy()
|
||||
}
|
||||
|
||||
private suspend fun runCommand(intent: Intent) {
|
||||
try {
|
||||
val app = applicationContext as NodeApp
|
||||
val runtime = app.ensureRuntime()
|
||||
val mode =
|
||||
intent
|
||||
.getDecodedStringExtra("mode")
|
||||
?.trim()
|
||||
.orEmpty()
|
||||
.ifEmpty { "both" }
|
||||
if (mode == "stop") {
|
||||
runtime.cancelMicCapture()
|
||||
runtime.setTalkModeEnabled(false)
|
||||
writeResult("""{"ok":true,"mode":"stop"}""")
|
||||
return
|
||||
}
|
||||
|
||||
val connect = !intent.getBooleanExtra("noConnect", false)
|
||||
val connectTimeoutMs = intent.getLongExtra("connectTimeoutMs", 20_000L)
|
||||
if (connect) {
|
||||
configureGateway(runtime = runtime, intent = intent)
|
||||
}
|
||||
if (connect || !runtime.isConnected.value) {
|
||||
awaitGateway(runtime = runtime, timeoutMs = connectTimeoutMs)
|
||||
}
|
||||
|
||||
startActivity(
|
||||
Intent(actionOpenVoiceE2e)
|
||||
.setClass(this, MainActivity::class.java)
|
||||
.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK or Intent.FLAG_ACTIVITY_SINGLE_TOP or Intent.FLAG_ACTIVITY_CLEAR_TOP),
|
||||
)
|
||||
|
||||
val transcript =
|
||||
intent
|
||||
.getDecodedStringExtra("transcript")
|
||||
?.trim()
|
||||
.orEmpty()
|
||||
.ifEmpty { "Reply exactly: Android voice e2e normal path ok." }
|
||||
val realtimeReply =
|
||||
intent
|
||||
.getDecodedStringExtra("realtimeAssistant")
|
||||
?.trim()
|
||||
.orEmpty()
|
||||
.ifEmpty { "Android realtime voice e2e relay path ok." }
|
||||
val timeoutMs = intent.getLongExtra("timeoutMs", 60_000L)
|
||||
val result =
|
||||
runtime.runVoiceE2e(
|
||||
mode = mode,
|
||||
transcript = transcript,
|
||||
realtimeAssistantText = realtimeReply,
|
||||
timeoutMs = timeoutMs,
|
||||
)
|
||||
val resultJson = encodeResult(result)
|
||||
writeResult(resultJson)
|
||||
Log.i(tag, "PASS $resultJson")
|
||||
} catch (err: Throwable) {
|
||||
val resultJson =
|
||||
buildJsonObject {
|
||||
put("ok", JsonPrimitive(false))
|
||||
put("error", JsonPrimitive(err.message ?: err::class.java.simpleName))
|
||||
}.toString()
|
||||
writeResult(resultJson)
|
||||
Log.e(tag, "FAIL $resultJson", err)
|
||||
}
|
||||
}
|
||||
|
||||
private fun configureGateway(
|
||||
runtime: NodeRuntime,
|
||||
intent: Intent,
|
||||
) {
|
||||
val host =
|
||||
intent
|
||||
.getDecodedStringExtra("host")
|
||||
?.trim()
|
||||
.orEmpty()
|
||||
.ifEmpty { "127.0.0.1" }
|
||||
val port = intent.getIntExtra("port", 18789)
|
||||
runtime.setManualEnabled(true)
|
||||
runtime.setManualHost(host)
|
||||
runtime.setManualPort(port)
|
||||
runtime.setManualTls(intent.getBooleanExtra("tls", false))
|
||||
runtime.setGatewayToken(intent.getDecodedStringExtra("token").orEmpty())
|
||||
runtime.setGatewayBootstrapToken(intent.getDecodedStringExtra("bootstrapToken").orEmpty())
|
||||
runtime.setGatewayPassword(intent.getDecodedStringExtra("password").orEmpty())
|
||||
runtime.setOnboardingCompleted(true)
|
||||
runtime.connectManual()
|
||||
}
|
||||
|
||||
private suspend fun awaitGateway(
|
||||
runtime: NodeRuntime,
|
||||
timeoutMs: Long,
|
||||
) {
|
||||
withTimeout(timeoutMs) {
|
||||
while (!runtime.isConnected.value) {
|
||||
delay(100L)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun encodeResult(result: NodeRuntime.VoiceE2eResult): String =
|
||||
buildJsonObject {
|
||||
put("ok", JsonPrimitive(true))
|
||||
put("normal", result.normal?.let(::encodeSlice) ?: JsonNull)
|
||||
put("realtime", result.realtime?.let(::encodeSlice) ?: JsonNull)
|
||||
}.toString()
|
||||
|
||||
private fun encodeSlice(slice: NodeRuntime.VoiceE2eSliceResult) =
|
||||
buildJsonObject {
|
||||
put("mode", JsonPrimitive(slice.mode))
|
||||
put("status", JsonPrimitive(slice.status))
|
||||
put("userText", slice.userText?.let(::JsonPrimitive) ?: JsonNull)
|
||||
put("assistantText", slice.assistantText?.let(::JsonPrimitive) ?: JsonNull)
|
||||
}
|
||||
|
||||
private fun writeResult(json: String) {
|
||||
File(cacheDir, resultFileName).writeText(json)
|
||||
}
|
||||
}
|
||||
|
||||
private fun Intent.getDecodedStringExtra(name: String): String? {
|
||||
val encoded = getStringExtra("${name}Base64")
|
||||
if (!encoded.isNullOrBlank()) {
|
||||
return String(Base64.decode(encoded, Base64.NO_WRAP), Charsets.UTF_8)
|
||||
}
|
||||
return getStringExtra(name)
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package ai.openclaw.app
|
||||
import android.content.Intent
|
||||
|
||||
const val actionAskOpenClaw = "ai.openclaw.app.action.ASK_OPENCLAW"
|
||||
const val actionOpenVoiceE2e = "ai.openclaw.app.debug.OPEN_VOICE_E2E"
|
||||
const val extraAssistantPrompt = "prompt"
|
||||
|
||||
enum class HomeDestination {
|
||||
@@ -19,6 +20,14 @@ data class AssistantLaunchRequest(
|
||||
val autoSend: Boolean,
|
||||
)
|
||||
|
||||
fun parseHomeDestinationIntent(intent: Intent?): HomeDestination? {
|
||||
val action = intent?.action ?: return null
|
||||
return when {
|
||||
BuildConfig.DEBUG && action == actionOpenVoiceE2e -> HomeDestination.Voice
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
|
||||
fun parseAssistantLaunchIntent(intent: Intent?): AssistantLaunchRequest? {
|
||||
val action = intent?.action ?: return null
|
||||
return when (action) {
|
||||
|
||||
@@ -79,6 +79,10 @@ class MainActivity : ComponentActivity() {
|
||||
}
|
||||
|
||||
private fun handleAssistantIntent(intent: android.content.Intent?) {
|
||||
parseHomeDestinationIntent(intent)?.let { destination ->
|
||||
viewModel.requestHomeDestination(destination)
|
||||
return
|
||||
}
|
||||
val request = parseAssistantLaunchIntent(intent) ?: return
|
||||
viewModel.handleAssistantLaunch(request)
|
||||
}
|
||||
|
||||
@@ -330,6 +330,10 @@ class MainViewModel(
|
||||
_requestedHomeDestination.value = null
|
||||
}
|
||||
|
||||
fun requestHomeDestination(destination: HomeDestination) {
|
||||
_requestedHomeDestination.value = destination
|
||||
}
|
||||
|
||||
fun clearChatDraft() {
|
||||
_chatDraft.value = null
|
||||
}
|
||||
|
||||
@@ -47,6 +47,7 @@ import ai.openclaw.app.protocol.OpenClawCanvasA2UIAction
|
||||
import ai.openclaw.app.voice.MicCaptureManager
|
||||
import ai.openclaw.app.voice.TalkModeManager
|
||||
import ai.openclaw.app.voice.VoiceConversationEntry
|
||||
import ai.openclaw.app.voice.VoiceConversationRole
|
||||
import android.Manifest
|
||||
import android.content.Context
|
||||
import android.content.pm.PackageManager
|
||||
@@ -64,6 +65,7 @@ import kotlinx.coroutines.flow.asStateFlow
|
||||
import kotlinx.coroutines.flow.combine
|
||||
import kotlinx.coroutines.flow.distinctUntilChanged
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withTimeout
|
||||
import kotlinx.serialization.Serializable
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
@@ -256,6 +258,18 @@ class NodeRuntime(
|
||||
val previousFingerprintSha256: String? = null,
|
||||
)
|
||||
|
||||
data class VoiceE2eSliceResult(
|
||||
val mode: String,
|
||||
val status: String,
|
||||
val userText: String?,
|
||||
val assistantText: String?,
|
||||
)
|
||||
|
||||
data class VoiceE2eResult(
|
||||
val normal: VoiceE2eSliceResult?,
|
||||
val realtime: VoiceE2eSliceResult?,
|
||||
)
|
||||
|
||||
private val _isConnected = MutableStateFlow(false)
|
||||
val isConnected: StateFlow<Boolean> = _isConnected.asStateFlow()
|
||||
private val _nodeConnected = MutableStateFlow(false)
|
||||
@@ -1187,6 +1201,115 @@ class NodeRuntime(
|
||||
talkMode.setPlaybackEnabled(value)
|
||||
}
|
||||
|
||||
suspend fun runVoiceE2e(
|
||||
mode: String,
|
||||
transcript: String,
|
||||
realtimeAssistantText: String,
|
||||
timeoutMs: Long,
|
||||
): VoiceE2eResult {
|
||||
if (!BuildConfig.DEBUG) {
|
||||
throw IllegalStateException("voice e2e is debug-only")
|
||||
}
|
||||
if (!_isConnected.value) {
|
||||
throw IllegalStateException("gateway not connected")
|
||||
}
|
||||
if (!hasRecordAudioPermission()) {
|
||||
throw IllegalStateException("microphone permission missing")
|
||||
}
|
||||
|
||||
val normalizedMode = mode.trim().lowercase().ifEmpty { "both" }
|
||||
val runNormal = normalizedMode == "both" || normalizedMode == "normal" || normalizedMode == "dictation"
|
||||
val runRealtime = normalizedMode == "both" || normalizedMode == "realtime" || normalizedMode == "talk"
|
||||
if (!runNormal && !runRealtime) {
|
||||
throw IllegalArgumentException("unknown voice e2e mode: $mode")
|
||||
}
|
||||
|
||||
val previousSpeakerEnabled = speakerEnabled.value
|
||||
setSpeakerEnabled(false)
|
||||
var completed = false
|
||||
return try {
|
||||
VoiceE2eResult(
|
||||
normal =
|
||||
if (runNormal) {
|
||||
runNormalVoiceE2e(transcript = transcript, timeoutMs = timeoutMs)
|
||||
} else {
|
||||
null
|
||||
},
|
||||
realtime =
|
||||
if (runRealtime) {
|
||||
runRealtimeVoiceE2e(
|
||||
transcript = transcript,
|
||||
assistantText = realtimeAssistantText,
|
||||
timeoutMs = timeoutMs,
|
||||
)
|
||||
} else {
|
||||
null
|
||||
},
|
||||
).also { completed = true }
|
||||
} finally {
|
||||
if (!completed) {
|
||||
stopActiveVoiceSession()
|
||||
}
|
||||
setSpeakerEnabled(previousSpeakerEnabled)
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun runNormalVoiceE2e(
|
||||
transcript: String,
|
||||
timeoutMs: Long,
|
||||
): VoiceE2eSliceResult {
|
||||
stopActiveVoiceSession()
|
||||
setVoiceCaptureMode(VoiceCaptureMode.ManualMic)
|
||||
micCapture.submitTranscribedMessage(transcript)
|
||||
awaitVoiceConversation(timeoutMs = timeoutMs) {
|
||||
micCapture.conversation.value.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
|
||||
}
|
||||
val entries = micCapture.conversation.value
|
||||
return VoiceE2eSliceResult(
|
||||
mode = "normal",
|
||||
status = micCapture.statusText.value,
|
||||
userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
|
||||
assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
|
||||
)
|
||||
}
|
||||
|
||||
private suspend fun runRealtimeVoiceE2e(
|
||||
transcript: String,
|
||||
assistantText: String,
|
||||
timeoutMs: Long,
|
||||
): VoiceE2eSliceResult {
|
||||
stopActiveVoiceSession()
|
||||
setVoiceCaptureMode(VoiceCaptureMode.TalkMode)
|
||||
talkMode.runE2eRealtimeTurn(
|
||||
userText = transcript,
|
||||
assistantText = assistantText,
|
||||
timeoutMs = timeoutMs,
|
||||
)
|
||||
awaitVoiceConversation(timeoutMs = timeoutMs) {
|
||||
val entries = talkMode.conversation.value
|
||||
entries.any { it.role == VoiceConversationRole.User && !it.isStreaming } &&
|
||||
entries.any { it.role == VoiceConversationRole.Assistant && !it.isStreaming }
|
||||
}
|
||||
val entries = talkMode.conversation.value
|
||||
return VoiceE2eSliceResult(
|
||||
mode = "realtime",
|
||||
status = talkMode.statusText.value,
|
||||
userText = entries.lastOrNull { it.role == VoiceConversationRole.User }?.text,
|
||||
assistantText = entries.lastOrNull { it.role == VoiceConversationRole.Assistant }?.text,
|
||||
)
|
||||
}
|
||||
|
||||
private suspend fun awaitVoiceConversation(
|
||||
timeoutMs: Long,
|
||||
ready: () -> Boolean,
|
||||
) {
|
||||
withTimeout(timeoutMs) {
|
||||
while (!ready()) {
|
||||
delay(100L)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun setVoiceCaptureMode(
|
||||
mode: VoiceCaptureMode,
|
||||
persistManualMic: Boolean = true,
|
||||
|
||||
@@ -262,6 +262,11 @@ class MicCaptureManager(
|
||||
}
|
||||
}
|
||||
|
||||
internal fun submitTranscribedMessage(text: String) {
|
||||
queueRecognizedMessage(text)
|
||||
sendQueuedIfIdle()
|
||||
}
|
||||
|
||||
fun handleGatewayEvent(
|
||||
event: String,
|
||||
payloadJson: String?,
|
||||
@@ -701,8 +706,7 @@ class MicCaptureManager(
|
||||
val text = obj["text"].asStringOrNull()?.trim().orEmpty()
|
||||
if (text.isNotEmpty()) {
|
||||
if (text != flushedPartialTranscript) {
|
||||
queueRecognizedMessage(text)
|
||||
sendQueuedIfIdle()
|
||||
submitTranscribedMessage(text)
|
||||
} else {
|
||||
flushedPartialTranscript = null
|
||||
_liveTranscript.value = null
|
||||
|
||||
@@ -480,6 +480,19 @@ class TalkModeManager internal constructor(
|
||||
pendingRunId = null
|
||||
}
|
||||
|
||||
internal suspend fun runE2eRealtimeTurn(
|
||||
userText: String,
|
||||
assistantText: String,
|
||||
timeoutMs: Long,
|
||||
) {
|
||||
if (!_isEnabled.value) {
|
||||
setEnabled(true)
|
||||
}
|
||||
val sessionId = awaitRealtimeSessionId(timeoutMs)
|
||||
handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "user", text = userText))
|
||||
handleGatewayEvent("talk.event", realtimeTranscriptPayload(sessionId = sessionId, role = "assistant", text = assistantText))
|
||||
}
|
||||
|
||||
fun setPlaybackEnabled(enabled: Boolean) {
|
||||
if (playbackEnabled == enabled) return
|
||||
playbackEnabled = enabled
|
||||
@@ -597,6 +610,19 @@ class TalkModeManager internal constructor(
|
||||
shutdownTextToSpeech()
|
||||
}
|
||||
|
||||
private suspend fun awaitRealtimeSessionId(timeoutMs: Long): String =
|
||||
withTimeout(timeoutMs) {
|
||||
while (true) {
|
||||
realtimeSessionId?.let { return@withTimeout it }
|
||||
val status = _statusText.value
|
||||
if (!_isEnabled.value && status.startsWith("Talk failed")) {
|
||||
throw IllegalStateException(status)
|
||||
}
|
||||
delay(100L)
|
||||
}
|
||||
error("unreachable")
|
||||
}
|
||||
|
||||
private suspend fun startRealtimeRelay(generation: Long) {
|
||||
if (!isConnected()) {
|
||||
_statusText.value = "Gateway not connected"
|
||||
@@ -852,6 +878,19 @@ class TalkModeManager internal constructor(
|
||||
}
|
||||
}
|
||||
|
||||
private fun realtimeTranscriptPayload(
|
||||
sessionId: String,
|
||||
role: String,
|
||||
text: String,
|
||||
): String =
|
||||
buildJsonObject {
|
||||
put("relaySessionId", JsonPrimitive(sessionId))
|
||||
put("type", JsonPrimitive("transcript"))
|
||||
put("role", JsonPrimitive(role))
|
||||
put("text", JsonPrimitive(text))
|
||||
put("final", JsonPrimitive(true))
|
||||
}.toString()
|
||||
|
||||
private fun playRealtimeAudio(bytes: ByteArray) {
|
||||
if (!playbackEnabled || realtimeOutputSuppressed || bytes.isEmpty()) return
|
||||
val queue = ensureRealtimeAudioQueue()
|
||||
|
||||
@@ -100,6 +100,40 @@ class MicCaptureManagerTest {
|
||||
assertEquals(emptyList<VoiceConversationEntry>(), manager.conversation.value)
|
||||
}
|
||||
|
||||
@Test
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
fun submittedTranscribedMessageUsesGatewayTurnPath() =
|
||||
runTest {
|
||||
val sentMessages = mutableListOf<String>()
|
||||
val manager =
|
||||
createManager(
|
||||
scope = this,
|
||||
sendToGateway = { message, onRunIdKnown ->
|
||||
sentMessages += message
|
||||
onRunIdKnown("run-voice-e2e")
|
||||
"run-voice-e2e"
|
||||
},
|
||||
)
|
||||
|
||||
manager.onGatewayConnectionChanged(true)
|
||||
manager.submitTranscribedMessage("voice e2e message")
|
||||
runCurrent()
|
||||
manager.handleGatewayEvent("chat", chatFinalPayload(runId = "run-voice-e2e", text = "voice e2e reply"))
|
||||
advanceUntilIdle()
|
||||
|
||||
assertEquals(listOf("voice e2e message"), sentMessages)
|
||||
assertEquals(
|
||||
listOf(VoiceConversationRole.User, VoiceConversationRole.Assistant),
|
||||
manager.conversation.value.map { it.role },
|
||||
)
|
||||
assertEquals(
|
||||
"voice e2e reply",
|
||||
manager.conversation.value
|
||||
.last()
|
||||
.text,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun pcm16FramesAreEncodedAsPcmuFrames() {
|
||||
val manager = createManager()
|
||||
|
||||
@@ -11,6 +11,7 @@ import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.ExperimentalCoroutinesApi
|
||||
import kotlinx.coroutines.Job
|
||||
import kotlinx.coroutines.SupervisorJob
|
||||
import kotlinx.coroutines.flow.MutableStateFlow
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.test.advanceUntilIdle
|
||||
import kotlinx.coroutines.test.currentTime
|
||||
@@ -327,6 +328,28 @@ class TalkModeManagerTest {
|
||||
assertTrue(entries.none { it.isStreaming })
|
||||
}
|
||||
|
||||
@Test
|
||||
fun e2eRealtimeTurnUsesRelayTranscriptPath() =
|
||||
runTest {
|
||||
val manager = createManager(scope = this)
|
||||
|
||||
setPrivateField(manager, "realtimeSessionId", "relay-1")
|
||||
setMutableStateFlow(manager, "_isEnabled", true)
|
||||
manager.runE2eRealtimeTurn(
|
||||
userText = "voice e2e user",
|
||||
assistantText = "voice e2e assistant",
|
||||
timeoutMs = 1_000L,
|
||||
)
|
||||
|
||||
val entries = manager.conversation.value
|
||||
assertEquals(2, entries.size)
|
||||
assertEquals(VoiceConversationRole.User, entries[0].role)
|
||||
assertEquals("voice e2e user", entries[0].text)
|
||||
assertEquals(VoiceConversationRole.Assistant, entries[1].role)
|
||||
assertEquals("voice e2e assistant", entries[1].text)
|
||||
assertTrue(entries.none { it.isStreaming })
|
||||
}
|
||||
|
||||
@Test
|
||||
@OptIn(ExperimentalCoroutinesApi::class)
|
||||
fun realtimeStartWithoutGatewayTurnsTalkOff() =
|
||||
@@ -483,6 +506,15 @@ class TalkModeManagerTest {
|
||||
return field.get(target)
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
private fun <T> setMutableStateFlow(
|
||||
target: Any,
|
||||
name: String,
|
||||
value: T,
|
||||
) {
|
||||
(readPrivateField(target, name) as MutableStateFlow<T>).value = value
|
||||
}
|
||||
|
||||
private fun shouldAppendRealtimeCapturedFrame(
|
||||
manager: TalkModeManager,
|
||||
length: Int,
|
||||
|
||||
226
apps/android/scripts/voice-e2e.sh
Executable file
226
apps/android/scripts/voice-e2e.sh
Executable file
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)"
|
||||
ANDROID_DIR="$ROOT_DIR/apps/android"
|
||||
PACKAGE_NAME="ai.openclaw.app"
|
||||
RECEIVER="$PACKAGE_NAME/.VoiceE2eReceiver"
|
||||
RUN_ACTION="ai.openclaw.app.debug.RUN_VOICE_E2E"
|
||||
OPEN_ACTION="ai.openclaw.app.debug.OPEN_VOICE_E2E"
|
||||
PORT=18789
|
||||
HOST="127.0.0.1"
|
||||
MODE="both"
|
||||
TRANSCRIPT="Reply exactly: Android voice e2e normal path ok."
|
||||
REALTIME_ASSISTANT="Android realtime voice e2e relay path ok."
|
||||
TIMEOUT_MS=60000
|
||||
INSTALL=1
|
||||
CONNECT=1
|
||||
CLEANUP=0
|
||||
START_GATEWAY=0
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: apps/android/scripts/voice-e2e.sh [options]
|
||||
|
||||
Options:
|
||||
--mode normal|realtime|both Voice path to test. Default: both.
|
||||
--transcript TEXT Synthetic user transcript for the voice turn.
|
||||
--realtime-assistant TEXT Synthetic realtime assistant relay text.
|
||||
--host HOST Gateway host visible from Android. Default: 127.0.0.1.
|
||||
--port PORT Gateway port. Default: 18789.
|
||||
--timeout-ms MS Per-mode timeout. Default: 60000.
|
||||
--skip-install Reuse the installed debug app.
|
||||
--no-connect Do not rewrite manual gateway settings.
|
||||
--start-gateway Start a temporary local gateway with bws_get_secret.
|
||||
--cleanup Stop voice capture after screenshots.
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--mode)
|
||||
MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--transcript)
|
||||
TRANSCRIPT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--realtime-assistant)
|
||||
REALTIME_ASSISTANT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--host)
|
||||
HOST="$2"
|
||||
shift 2
|
||||
;;
|
||||
--port)
|
||||
PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--timeout-ms)
|
||||
TIMEOUT_MS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--skip-install)
|
||||
INSTALL=0
|
||||
shift
|
||||
;;
|
||||
--no-connect)
|
||||
CONNECT=0
|
||||
shift
|
||||
;;
|
||||
--start-gateway)
|
||||
START_GATEWAY=1
|
||||
shift
|
||||
;;
|
||||
--cleanup)
|
||||
CLEANUP=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "unknown option: $1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
export JAVA_HOME="${JAVA_HOME:-/opt/homebrew/opt/openjdk@17/libexec/openjdk.jdk/Contents/Home}"
|
||||
export ANDROID_HOME="${ANDROID_HOME:-/opt/homebrew/share/android-commandlinetools}"
|
||||
export ANDROID_SDK_ROOT="${ANDROID_SDK_ROOT:-$ANDROID_HOME}"
|
||||
export PATH="/opt/homebrew/opt/openjdk@17/bin:$ANDROID_HOME/platform-tools:$ANDROID_HOME/cmdline-tools/latest/bin:$PATH"
|
||||
|
||||
ARTIFACT_DIR="/tmp/openclaw-android-voice-e2e-$(date +%Y%m%d-%H%M%S)"
|
||||
mkdir -p "$ARTIFACT_DIR"
|
||||
|
||||
cleanup_gateway() {
|
||||
if [[ -n "${GATEWAY_PID:-}" ]]; then
|
||||
kill "$GATEWAY_PID" >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
trap cleanup_gateway EXIT
|
||||
|
||||
if ! adb devices -l | awk 'NR > 1 && $2 == "device" { found = 1 } END { exit(found ? 0 : 1) }'; then
|
||||
echo "no authorized Android device found" >&2
|
||||
adb devices -l >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
adb reverse "tcp:$PORT" "tcp:$PORT" >/dev/null
|
||||
|
||||
if [[ "$START_GATEWAY" -eq 1 ]]; then
|
||||
if command -v bws_get_secret >/dev/null 2>&1; then
|
||||
OPENCLAW_OPENAI_API_KEY="$(bws_get_secret OPENCLAW_OPENAI_API_KEY)"
|
||||
else
|
||||
OPENCLAW_OPENAI_API_KEY="$(zsh -ic 'bws_get_secret OPENCLAW_OPENAI_API_KEY')"
|
||||
fi
|
||||
(
|
||||
cd "$ROOT_DIR"
|
||||
OPENAI_API_KEY="$OPENCLAW_OPENAI_API_KEY" \
|
||||
pnpm openclaw gateway run \
|
||||
--port "$PORT" \
|
||||
--auth none \
|
||||
--bind loopback \
|
||||
--force \
|
||||
--allow-unconfigured \
|
||||
--ws-log compact
|
||||
) >"$ARTIFACT_DIR/gateway.log" 2>&1 &
|
||||
GATEWAY_PID=$!
|
||||
sleep 4
|
||||
if ! kill -0 "$GATEWAY_PID" >/dev/null 2>&1; then
|
||||
cat "$ARTIFACT_DIR/gateway.log" >&2
|
||||
exit 1
|
||||
fi
|
||||
unset OPENCLAW_OPENAI_API_KEY
|
||||
fi
|
||||
|
||||
if [[ "$INSTALL" -eq 1 ]]; then
|
||||
(cd "$ANDROID_DIR" && ./gradlew :app:installPlayDebug)
|
||||
fi
|
||||
|
||||
adb shell pm grant "$PACKAGE_NAME" android.permission.RECORD_AUDIO >/dev/null 2>&1 || true
|
||||
adb shell am force-stop "$PACKAGE_NAME" >/dev/null
|
||||
adb shell am start -a "$OPEN_ACTION" -n "$PACKAGE_NAME/.MainActivity" >/dev/null
|
||||
adb logcat -c
|
||||
|
||||
run_mode() {
|
||||
local test_mode="$1"
|
||||
local result_name="$ARTIFACT_DIR/result-$test_mode.json"
|
||||
local screenshot_name="$ARTIFACT_DIR/screen-$test_mode.png"
|
||||
local transcript_base64
|
||||
local realtime_assistant_base64
|
||||
transcript_base64="$(printf '%s' "$TRANSCRIPT" | base64 | tr -d '\n')"
|
||||
realtime_assistant_base64="$(printf '%s' "$REALTIME_ASSISTANT" | base64 | tr -d '\n')"
|
||||
|
||||
adb shell run-as "$PACKAGE_NAME" rm -f cache/voice_e2e_result.json >/dev/null 2>&1 || true
|
||||
local no_connect_flag=true
|
||||
if [[ "$CONNECT" -eq 1 ]]; then
|
||||
no_connect_flag=false
|
||||
fi
|
||||
|
||||
adb shell am broadcast \
|
||||
-a "$RUN_ACTION" \
|
||||
-n "$RECEIVER" \
|
||||
--es mode "$test_mode" \
|
||||
--ez noConnect "$no_connect_flag" \
|
||||
--es host "$HOST" \
|
||||
--ei port "$PORT" \
|
||||
--ez tls false \
|
||||
--el timeoutMs "$TIMEOUT_MS" \
|
||||
--el connectTimeoutMs "$TIMEOUT_MS" \
|
||||
--es transcriptBase64 "$transcript_base64" \
|
||||
--es realtimeAssistantBase64 "$realtime_assistant_base64" >/dev/null
|
||||
|
||||
local deadline=$((SECONDS + TIMEOUT_MS / 1000 + 20))
|
||||
local result=""
|
||||
while [[ "$SECONDS" -lt "$deadline" ]]; do
|
||||
result="$(adb shell run-as "$PACKAGE_NAME" cat cache/voice_e2e_result.json 2>/dev/null | tr -d '\r' || true)"
|
||||
if [[ -n "$result" ]]; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
if [[ -z "$result" ]]; then
|
||||
echo "voice e2e $test_mode timed out waiting for result" >&2
|
||||
exit 1
|
||||
fi
|
||||
printf '%s\n' "$result" >"$result_name"
|
||||
adb exec-out screencap -p >"$screenshot_name"
|
||||
if ! grep -q '"ok":true' "$result_name"; then
|
||||
echo "voice e2e $test_mode failed: $result" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
case "$MODE" in
|
||||
both)
|
||||
run_mode normal
|
||||
run_mode realtime
|
||||
;;
|
||||
normal|dictation)
|
||||
run_mode normal
|
||||
;;
|
||||
realtime|talk)
|
||||
run_mode realtime
|
||||
;;
|
||||
*)
|
||||
echo "unknown mode: $MODE" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
adb logcat -d -v time |
|
||||
rg -i 'OpenClaw|TalkMode|MicCapture|AudioRecord|SpeechRecognizer|realtime|talk.session|appendAudio|transcript|Talk failed|Transcription failed|Speech network|VoiceE2E' |
|
||||
tail -250 >"$ARTIFACT_DIR/logcat.txt" || true
|
||||
|
||||
if [[ "$CLEANUP" -eq 1 ]]; then
|
||||
adb shell am broadcast -a "$RUN_ACTION" -n "$RECEIVER" --es mode stop >/dev/null
|
||||
fi
|
||||
|
||||
echo "$ARTIFACT_DIR"
|
||||
Reference in New Issue
Block a user