diff --git a/apps/macos/Sources/OpenClaw/AppState.swift b/apps/macos/Sources/OpenClaw/AppState.swift index 626acb69a2f..9e008682482 100644 --- a/apps/macos/Sources/OpenClaw/AppState.swift +++ b/apps/macos/Sources/OpenClaw/AppState.swift @@ -176,6 +176,23 @@ final class AppState { } } + var talkPhaseSoundsEnabled: Bool { + didSet { + self.ifNotPreview { + UserDefaults.standard.set(self.talkPhaseSoundsEnabled, forKey: talkPhaseSoundsEnabledKey) + } + } + } + + var talkShiftToStopEnabled: Bool { + didSet { + self.ifNotPreview { + UserDefaults.standard.set(self.talkShiftToStopEnabled, forKey: talkShiftToStopEnabledKey) + Task { TalkSpeechInterruptMonitor.shared.setEnabled(self.talkShiftToStopEnabled && self.talkEnabled) } + } + } + } + /// Gateway-provided UI accent color (hex). Optional; clients provide a default. var seamColorHex: String? @@ -309,6 +326,18 @@ final class AppState { self.voiceWakeTriggersTalkMode = UserDefaults.standard .object(forKey: voiceWakeTriggersTalkModeKey) as? Bool ?? false self.talkEnabled = UserDefaults.standard.bool(forKey: talkEnabledKey) + if let storedPhaseSounds = UserDefaults.standard.object(forKey: talkPhaseSoundsEnabledKey) as? Bool { + self.talkPhaseSoundsEnabled = storedPhaseSounds + } else { + self.talkPhaseSoundsEnabled = true + UserDefaults.standard.set(true, forKey: talkPhaseSoundsEnabledKey) + } + if let storedShiftToStop = UserDefaults.standard.object(forKey: talkShiftToStopEnabledKey) as? Bool { + self.talkShiftToStopEnabled = storedShiftToStop + } else { + self.talkShiftToStopEnabled = true + UserDefaults.standard.set(true, forKey: talkShiftToStopEnabledKey) + } self.seamColorHex = nil if let storedHeartbeats = UserDefaults.standard.object(forKey: heartbeatsEnabledKey) as? Bool { self.heartbeatsEnabled = storedHeartbeats @@ -778,6 +807,8 @@ extension AppState { state.voiceWakeAdditionalLocaleIDs = ["en-US", "de-DE"] state.voicePushToTalkEnabled = false state.talkEnabled = false + state.talkPhaseSoundsEnabled = true + state.talkShiftToStopEnabled = true state.iconOverride = .system state.heartbeatsEnabled = true state.connectionMode = .local diff --git a/apps/macos/Sources/OpenClaw/Constants.swift b/apps/macos/Sources/OpenClaw/Constants.swift index 878a7cfe1c3..49e0992d1bd 100644 --- a/apps/macos/Sources/OpenClaw/Constants.swift +++ b/apps/macos/Sources/OpenClaw/Constants.swift @@ -24,6 +24,8 @@ let voiceWakeAdditionalLocalesKey = "openclaw.voiceWakeAdditionalLocaleIDs" let voicePushToTalkEnabledKey = "openclaw.voicePushToTalkEnabled" let voiceWakeTriggersTalkModeKey = "openclaw.voiceWakeTriggersTalkMode" let talkEnabledKey = "openclaw.talkEnabled" +let talkPhaseSoundsEnabledKey = "openclaw.talkPhaseSoundsEnabled" +let talkShiftToStopEnabledKey = "openclaw.talkShiftToStopEnabled" let iconOverrideKey = "openclaw.iconOverride" let connectionModeKey = "openclaw.connectionMode" let remoteTargetKey = "openclaw.remoteTarget" diff --git a/apps/macos/Sources/OpenClaw/TalkModeController.swift b/apps/macos/Sources/OpenClaw/TalkModeController.swift index cbddaea1be8..0b53e4015bb 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeController.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeController.swift @@ -1,3 +1,4 @@ +import AppKit import Observation @MainActor @@ -17,6 +18,10 @@ final class TalkModeController { } else { TalkOverlayController.shared.dismiss() } + TalkSpeechInterruptMonitor.shared.setEnabled(enabled && AppStateStore.shared.talkShiftToStopEnabled) + // Talk Mode and Push-to-Talk share the right Option key — disable PTT while Talk Mode is active. + let pttEnabled = !enabled && AppStateStore.shared.voicePushToTalkEnabled + VoicePushToTalkHotkey.shared.setEnabled(pttEnabled) await TalkModeRuntime.shared.setEnabled(enabled) // Resume voice wake listener *after* TalkMode audio is fully torn down. // Check swabbleEnabled (not voiceWakeTriggersTalkMode) so the paused wake listener @@ -27,8 +32,15 @@ final class TalkModeController { } func updatePhase(_ phase: TalkModePhase) { + let previousPhase = self.phase self.phase = phase TalkOverlayController.shared.updatePhase(phase) + + // Play distinct system sounds for each phase transition. + if phase != previousPhase { + Self.playPhaseSound(phase, previousPhase: previousPhase) + } + let effectivePhase = self.isPaused ? "paused" : phase.rawValue Task { await GatewayConnection.shared.talkMode( @@ -37,6 +49,25 @@ final class TalkModeController { } } + private static func playPhaseSound(_ phase: TalkModePhase, previousPhase: TalkModePhase) { + guard AppStateStore.shared.talkPhaseSoundsEnabled else { return } + let soundName: String? = switch phase { + case .thinking: + "Tink" // 생각 중: 짧고 가벼운 소리 + case .speaking: + "Pop" // 대답 시작: 톡 소리 + case .listening: + // 대답 중단(speaking→listening): 부드러운 종료음 + // 듣기 시작(thinking→listening 등): 잠수함 소리 + previousPhase == .speaking ? "Bottle" : "Submarine" + case .idle: + nil + } + if let soundName { + NSSound(named: NSSound.Name(soundName))?.play() + } + } + func updateLevel(_ level: Double) { TalkOverlayController.shared.updateLevel(level) } diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift index 8f407093b9f..8ec32302138 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift @@ -1021,12 +1021,22 @@ extension TalkModeRuntime { self.defaultOutputFormat = cfg.outputFormat self.interruptOnSpeech = cfg.interruptOnSpeech self.activeTalkProvider = cfg.activeProvider - self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000 + let configuredSilenceMs = cfg.silenceTimeoutMs + let locale = await MainActor.run { AppStateStore.shared.voiceWakeLocaleID } + let isCJKLocale = locale.hasPrefix("ko") || locale.hasPrefix("ja") || locale.hasPrefix("zh") + let effectiveSilenceMs = isCJKLocale ? max(configuredSilenceMs, 2000) : configuredSilenceMs + if isCJKLocale, configuredSilenceMs < 2000 { + self.logger + .info( + "talk CJK locale: silence timeout clamped " + + "\(configuredSilenceMs, privacy: .public)ms -> 2000ms") + } + self.silenceWindow = TimeInterval(effectiveSilenceMs) / 1000 self.speechLocaleID = cfg.speechLocaleID self.apiKey = cfg.apiKey let hasApiKey = (cfg.apiKey?.isEmpty == false) - let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none" - let modelLabel = (cfg.modelId?.isEmpty == false) ? cfg.modelId! : "none" + let voiceLabel = cfg.voiceId.flatMap { $0.isEmpty ? nil : $0 } ?? "none" + let modelLabel = cfg.modelId.flatMap { $0.isEmpty ? nil : $0 } ?? "none" self.logger .info( "talk config provider=\(cfg.activeProvider, privacy: .public) " + diff --git a/apps/macos/Sources/OpenClaw/TalkSpeechInterruptMonitor.swift b/apps/macos/Sources/OpenClaw/TalkSpeechInterruptMonitor.swift new file mode 100644 index 00000000000..5f6d7bffcb9 --- /dev/null +++ b/apps/macos/Sources/OpenClaw/TalkSpeechInterruptMonitor.swift @@ -0,0 +1,57 @@ +import AppKit +import OSLog + +/// Monitors right Option key (keyCode 61) to interrupt Talk Mode speech. +/// Independent of Push-to-Talk — active whenever Talk Mode is enabled. +final class TalkSpeechInterruptMonitor: @unchecked Sendable { + static let shared = TalkSpeechInterruptMonitor() + + private let logger = Logger(subsystem: "ai.openclaw", category: "talk.interrupt") + private var globalMonitor: Any? + private var localMonitor: Any? + + func setEnabled(_ enabled: Bool) { + DispatchQueue.main.async { [weak self] in + guard let self else { return } + if enabled { + self.startMonitoring() + } else { + self.stopMonitoring() + } + } + } + + private func startMonitoring() { + guard self.globalMonitor == nil, self.localMonitor == nil else { return } + self.globalMonitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in + self?.handleFlags(keyCode: event.keyCode, modifierFlags: event.modifierFlags) + } + self.localMonitor = NSEvent.addLocalMonitorForEvents(matching: .flagsChanged) { [weak self] event in + self?.handleFlags(keyCode: event.keyCode, modifierFlags: event.modifierFlags) + return event + } + self.logger.info("talk interrupt monitor started") + } + + private func stopMonitoring() { + if let globalMonitor { + NSEvent.removeMonitor(globalMonitor) + self.globalMonitor = nil + } + if let localMonitor { + NSEvent.removeMonitor(localMonitor) + self.localMonitor = nil + } + self.logger.info("talk interrupt monitor stopped") + } + + private func handleFlags(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags) { + // Right Option key down (keyCode 61). + guard keyCode == 61, modifierFlags.contains(.option) else { return } + Task { @MainActor in + guard TalkModeController.shared.phase == .speaking else { return } + self.logger.info("right option — interrupting talk mode speech") + TalkModeController.shared.stopSpeaking(reason: .userTap) + } + } +} diff --git a/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift b/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift index 1a76804b247..872dcc224a6 100644 --- a/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift +++ b/apps/macos/Sources/OpenClaw/VoicePushToTalk.swift @@ -80,6 +80,7 @@ final class VoicePushToTalkHotkey: @unchecked Sendable { private func updateModifierState(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags) { // assert(Thread.isMainThread) - Removed for Swift 6 + // Right Option (keyCode 61) acts as a hold-to-talk modifier. if keyCode == 61 { self.optionDown = modifierFlags.contains(.option) diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeSettings.swift b/apps/macos/Sources/OpenClaw/VoiceWakeSettings.swift index a3f8653bcbd..770f5034468 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeSettings.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeSettings.swift @@ -72,6 +72,31 @@ struct VoiceWakeSettings: View { binding: self.$state.voicePushToTalkEnabled) .disabled(!voiceWakeSupported) + if self.state.voicePushToTalkEnabled, self.state.talkEnabled { + Text("Push-to-Talk is paused while Talk Mode is active. It resumes when Talk Mode is turned off.") + .font(.footnote) + .foregroundStyle(.secondary) + .padding(.leading, 20) + } + + SettingsToggleRow( + title: "Play phase-transition sounds", + subtitle: """ + Play short system sounds when Talk Mode switches between + listening, thinking, and speaking. + """, + binding: self.$state.talkPhaseSoundsEnabled) + .disabled(!voiceWakeSupported) + + SettingsToggleRow( + title: "Press Right Option to stop speech", + subtitle: """ + Tap the right Option key to interrupt the assistant while it is + speaking and return to listening. + """, + binding: self.$state.talkShiftToStopEnabled) + .disabled(!voiceWakeSupported) + if !voiceWakeSupported { Label("Voice Wake requires macOS 26 or newer.", systemImage: "exclamationmark.triangle.fill") .font(.callout)