mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 18:30:44 +00:00
fix(talk): Talk Mode TTS improvements for CJK languages (#53553)
* feat(talk): add distinct system sounds for each Talk Mode phase Play a short system sound on phase transitions to give the user audible feedback: - thinking: Tink - speaking: Pop - listening (after speech interrupted): Bottle - listening (after thinking): Submarine Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(talk): add right Shift key to interrupt Talk Mode speech Add TalkSpeechInterruptMonitor — a dedicated global key monitor that listens for right Shift (keyCode 60) to interrupt Talk Mode speech. Independent of Push-to-Talk, so it works even when PTT is disabled. Stops only the current response; the next conversation cycle continues normally via sendAndSpeak's resumeListeningIfNeeded flow. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(talk): increase silence detection timeout for CJK locales Korean, Japanese, and Chinese speakers need longer pauses between phrases. When the app locale is CJK, enforce a minimum 2000ms silence window (vs the default 1500ms) to avoid premature transcript submission. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(talk): remove force-unwraps and log CJK silence clamp in reloadConfig Replace non-idiomatic force-unwraps (cfg.voiceId!, cfg.modelId!) with safe flatMap unwrapping, and add an info log when CJK locale clamps the silence timeout so the override is observable in diagnostics. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(talk): add settings toggle to mute phase-transition sounds Add a "Play phase-transition sounds" checkbox to Voice Wake settings. When disabled, Talk Mode phase transitions (Tink/Pop/Bottle/Submarine) are silent. Defaults to enabled to preserve existing behavior. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat(talk): add toggle for Right Option speech interrupt Add a "Press Right Option to stop speech" checkbox to Voice Wake settings. Also change the interrupt key from right Shift to right Option (keyCode 61) to avoid conflicts with typing. Defaults to enabled to preserve existing behavior. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(talk): disable Push-to-Talk while Talk Mode is active Talk Mode and Push-to-Talk both use the right Option key (keyCode 61). Disable PTT when Talk Mode is enabled to prevent conflicting handlers, and restore PTT when Talk Mode is disabled. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(talk): show info when PTT is paused during Talk Mode Display a footnote under the Push-to-Talk toggle when both PTT and Talk Mode are enabled, explaining that PTT is paused while Talk Mode is active and resumes when Talk Mode is turned off. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fixup: SwiftFormat lint on TalkModeController phase sound switch Resolves macos-swift CI lint failures introduced by Korean comment formatting in 'feat(talk): add distinct system sounds for each Talk Mode phase'. - Collapse consecutive spaces between sound name and comment - Move floating comments above the listening case expression so they're at the correct indent level Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: hongsw <hongsw@hongswui-Macmini.local> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Fabian Williams <fabian@adotob.com>
This commit is contained in:
@@ -176,6 +176,23 @@ final class AppState {
|
||||
}
|
||||
}
|
||||
|
||||
var talkPhaseSoundsEnabled: Bool {
|
||||
didSet {
|
||||
self.ifNotPreview {
|
||||
UserDefaults.standard.set(self.talkPhaseSoundsEnabled, forKey: talkPhaseSoundsEnabledKey)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var talkShiftToStopEnabled: Bool {
|
||||
didSet {
|
||||
self.ifNotPreview {
|
||||
UserDefaults.standard.set(self.talkShiftToStopEnabled, forKey: talkShiftToStopEnabledKey)
|
||||
Task { TalkSpeechInterruptMonitor.shared.setEnabled(self.talkShiftToStopEnabled && self.talkEnabled) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gateway-provided UI accent color (hex). Optional; clients provide a default.
|
||||
var seamColorHex: String?
|
||||
|
||||
@@ -309,6 +326,18 @@ final class AppState {
|
||||
self.voiceWakeTriggersTalkMode = UserDefaults.standard
|
||||
.object(forKey: voiceWakeTriggersTalkModeKey) as? Bool ?? false
|
||||
self.talkEnabled = UserDefaults.standard.bool(forKey: talkEnabledKey)
|
||||
if let storedPhaseSounds = UserDefaults.standard.object(forKey: talkPhaseSoundsEnabledKey) as? Bool {
|
||||
self.talkPhaseSoundsEnabled = storedPhaseSounds
|
||||
} else {
|
||||
self.talkPhaseSoundsEnabled = true
|
||||
UserDefaults.standard.set(true, forKey: talkPhaseSoundsEnabledKey)
|
||||
}
|
||||
if let storedShiftToStop = UserDefaults.standard.object(forKey: talkShiftToStopEnabledKey) as? Bool {
|
||||
self.talkShiftToStopEnabled = storedShiftToStop
|
||||
} else {
|
||||
self.talkShiftToStopEnabled = true
|
||||
UserDefaults.standard.set(true, forKey: talkShiftToStopEnabledKey)
|
||||
}
|
||||
self.seamColorHex = nil
|
||||
if let storedHeartbeats = UserDefaults.standard.object(forKey: heartbeatsEnabledKey) as? Bool {
|
||||
self.heartbeatsEnabled = storedHeartbeats
|
||||
@@ -778,6 +807,8 @@ extension AppState {
|
||||
state.voiceWakeAdditionalLocaleIDs = ["en-US", "de-DE"]
|
||||
state.voicePushToTalkEnabled = false
|
||||
state.talkEnabled = false
|
||||
state.talkPhaseSoundsEnabled = true
|
||||
state.talkShiftToStopEnabled = true
|
||||
state.iconOverride = .system
|
||||
state.heartbeatsEnabled = true
|
||||
state.connectionMode = .local
|
||||
|
||||
@@ -24,6 +24,8 @@ let voiceWakeAdditionalLocalesKey = "openclaw.voiceWakeAdditionalLocaleIDs"
|
||||
let voicePushToTalkEnabledKey = "openclaw.voicePushToTalkEnabled"
|
||||
let voiceWakeTriggersTalkModeKey = "openclaw.voiceWakeTriggersTalkMode"
|
||||
let talkEnabledKey = "openclaw.talkEnabled"
|
||||
let talkPhaseSoundsEnabledKey = "openclaw.talkPhaseSoundsEnabled"
|
||||
let talkShiftToStopEnabledKey = "openclaw.talkShiftToStopEnabled"
|
||||
let iconOverrideKey = "openclaw.iconOverride"
|
||||
let connectionModeKey = "openclaw.connectionMode"
|
||||
let remoteTargetKey = "openclaw.remoteTarget"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import AppKit
|
||||
import Observation
|
||||
|
||||
@MainActor
|
||||
@@ -17,6 +18,10 @@ final class TalkModeController {
|
||||
} else {
|
||||
TalkOverlayController.shared.dismiss()
|
||||
}
|
||||
TalkSpeechInterruptMonitor.shared.setEnabled(enabled && AppStateStore.shared.talkShiftToStopEnabled)
|
||||
// Talk Mode and Push-to-Talk share the right Option key — disable PTT while Talk Mode is active.
|
||||
let pttEnabled = !enabled && AppStateStore.shared.voicePushToTalkEnabled
|
||||
VoicePushToTalkHotkey.shared.setEnabled(pttEnabled)
|
||||
await TalkModeRuntime.shared.setEnabled(enabled)
|
||||
// Resume voice wake listener *after* TalkMode audio is fully torn down.
|
||||
// Check swabbleEnabled (not voiceWakeTriggersTalkMode) so the paused wake listener
|
||||
@@ -27,8 +32,15 @@ final class TalkModeController {
|
||||
}
|
||||
|
||||
func updatePhase(_ phase: TalkModePhase) {
|
||||
let previousPhase = self.phase
|
||||
self.phase = phase
|
||||
TalkOverlayController.shared.updatePhase(phase)
|
||||
|
||||
// Play distinct system sounds for each phase transition.
|
||||
if phase != previousPhase {
|
||||
Self.playPhaseSound(phase, previousPhase: previousPhase)
|
||||
}
|
||||
|
||||
let effectivePhase = self.isPaused ? "paused" : phase.rawValue
|
||||
Task {
|
||||
await GatewayConnection.shared.talkMode(
|
||||
@@ -37,6 +49,25 @@ final class TalkModeController {
|
||||
}
|
||||
}
|
||||
|
||||
private static func playPhaseSound(_ phase: TalkModePhase, previousPhase: TalkModePhase) {
|
||||
guard AppStateStore.shared.talkPhaseSoundsEnabled else { return }
|
||||
let soundName: String? = switch phase {
|
||||
case .thinking:
|
||||
"Tink" // 생각 중: 짧고 가벼운 소리
|
||||
case .speaking:
|
||||
"Pop" // 대답 시작: 톡 소리
|
||||
case .listening:
|
||||
// 대답 중단(speaking→listening): 부드러운 종료음
|
||||
// 듣기 시작(thinking→listening 등): 잠수함 소리
|
||||
previousPhase == .speaking ? "Bottle" : "Submarine"
|
||||
case .idle:
|
||||
nil
|
||||
}
|
||||
if let soundName {
|
||||
NSSound(named: NSSound.Name(soundName))?.play()
|
||||
}
|
||||
}
|
||||
|
||||
func updateLevel(_ level: Double) {
|
||||
TalkOverlayController.shared.updateLevel(level)
|
||||
}
|
||||
|
||||
@@ -1021,12 +1021,22 @@ extension TalkModeRuntime {
|
||||
self.defaultOutputFormat = cfg.outputFormat
|
||||
self.interruptOnSpeech = cfg.interruptOnSpeech
|
||||
self.activeTalkProvider = cfg.activeProvider
|
||||
self.silenceWindow = TimeInterval(cfg.silenceTimeoutMs) / 1000
|
||||
let configuredSilenceMs = cfg.silenceTimeoutMs
|
||||
let locale = await MainActor.run { AppStateStore.shared.voiceWakeLocaleID }
|
||||
let isCJKLocale = locale.hasPrefix("ko") || locale.hasPrefix("ja") || locale.hasPrefix("zh")
|
||||
let effectiveSilenceMs = isCJKLocale ? max(configuredSilenceMs, 2000) : configuredSilenceMs
|
||||
if isCJKLocale, configuredSilenceMs < 2000 {
|
||||
self.logger
|
||||
.info(
|
||||
"talk CJK locale: silence timeout clamped " +
|
||||
"\(configuredSilenceMs, privacy: .public)ms -> 2000ms")
|
||||
}
|
||||
self.silenceWindow = TimeInterval(effectiveSilenceMs) / 1000
|
||||
self.speechLocaleID = cfg.speechLocaleID
|
||||
self.apiKey = cfg.apiKey
|
||||
let hasApiKey = (cfg.apiKey?.isEmpty == false)
|
||||
let voiceLabel = (cfg.voiceId?.isEmpty == false) ? cfg.voiceId! : "none"
|
||||
let modelLabel = (cfg.modelId?.isEmpty == false) ? cfg.modelId! : "none"
|
||||
let voiceLabel = cfg.voiceId.flatMap { $0.isEmpty ? nil : $0 } ?? "none"
|
||||
let modelLabel = cfg.modelId.flatMap { $0.isEmpty ? nil : $0 } ?? "none"
|
||||
self.logger
|
||||
.info(
|
||||
"talk config provider=\(cfg.activeProvider, privacy: .public) " +
|
||||
|
||||
57
apps/macos/Sources/OpenClaw/TalkSpeechInterruptMonitor.swift
Normal file
57
apps/macos/Sources/OpenClaw/TalkSpeechInterruptMonitor.swift
Normal file
@@ -0,0 +1,57 @@
|
||||
import AppKit
|
||||
import OSLog
|
||||
|
||||
/// Monitors right Option key (keyCode 61) to interrupt Talk Mode speech.
|
||||
/// Independent of Push-to-Talk — active whenever Talk Mode is enabled.
|
||||
final class TalkSpeechInterruptMonitor: @unchecked Sendable {
|
||||
static let shared = TalkSpeechInterruptMonitor()
|
||||
|
||||
private let logger = Logger(subsystem: "ai.openclaw", category: "talk.interrupt")
|
||||
private var globalMonitor: Any?
|
||||
private var localMonitor: Any?
|
||||
|
||||
func setEnabled(_ enabled: Bool) {
|
||||
DispatchQueue.main.async { [weak self] in
|
||||
guard let self else { return }
|
||||
if enabled {
|
||||
self.startMonitoring()
|
||||
} else {
|
||||
self.stopMonitoring()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func startMonitoring() {
|
||||
guard self.globalMonitor == nil, self.localMonitor == nil else { return }
|
||||
self.globalMonitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
||||
self?.handleFlags(keyCode: event.keyCode, modifierFlags: event.modifierFlags)
|
||||
}
|
||||
self.localMonitor = NSEvent.addLocalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
||||
self?.handleFlags(keyCode: event.keyCode, modifierFlags: event.modifierFlags)
|
||||
return event
|
||||
}
|
||||
self.logger.info("talk interrupt monitor started")
|
||||
}
|
||||
|
||||
private func stopMonitoring() {
|
||||
if let globalMonitor {
|
||||
NSEvent.removeMonitor(globalMonitor)
|
||||
self.globalMonitor = nil
|
||||
}
|
||||
if let localMonitor {
|
||||
NSEvent.removeMonitor(localMonitor)
|
||||
self.localMonitor = nil
|
||||
}
|
||||
self.logger.info("talk interrupt monitor stopped")
|
||||
}
|
||||
|
||||
private func handleFlags(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags) {
|
||||
// Right Option key down (keyCode 61).
|
||||
guard keyCode == 61, modifierFlags.contains(.option) else { return }
|
||||
Task { @MainActor in
|
||||
guard TalkModeController.shared.phase == .speaking else { return }
|
||||
self.logger.info("right option — interrupting talk mode speech")
|
||||
TalkModeController.shared.stopSpeaking(reason: .userTap)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,6 +80,7 @@ final class VoicePushToTalkHotkey: @unchecked Sendable {
|
||||
|
||||
private func updateModifierState(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags) {
|
||||
// assert(Thread.isMainThread) - Removed for Swift 6
|
||||
|
||||
// Right Option (keyCode 61) acts as a hold-to-talk modifier.
|
||||
if keyCode == 61 {
|
||||
self.optionDown = modifierFlags.contains(.option)
|
||||
|
||||
@@ -72,6 +72,31 @@ struct VoiceWakeSettings: View {
|
||||
binding: self.$state.voicePushToTalkEnabled)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
if self.state.voicePushToTalkEnabled, self.state.talkEnabled {
|
||||
Text("Push-to-Talk is paused while Talk Mode is active. It resumes when Talk Mode is turned off.")
|
||||
.font(.footnote)
|
||||
.foregroundStyle(.secondary)
|
||||
.padding(.leading, 20)
|
||||
}
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Play phase-transition sounds",
|
||||
subtitle: """
|
||||
Play short system sounds when Talk Mode switches between
|
||||
listening, thinking, and speaking.
|
||||
""",
|
||||
binding: self.$state.talkPhaseSoundsEnabled)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
SettingsToggleRow(
|
||||
title: "Press Right Option to stop speech",
|
||||
subtitle: """
|
||||
Tap the right Option key to interrupt the assistant while it is
|
||||
speaking and return to listening.
|
||||
""",
|
||||
binding: self.$state.talkShiftToStopEnabled)
|
||||
.disabled(!voiceWakeSupported)
|
||||
|
||||
if !voiceWakeSupported {
|
||||
Label("Voice Wake requires macOS 26 or newer.", systemImage: "exclamationmark.triangle.fill")
|
||||
.font(.callout)
|
||||
|
||||
Reference in New Issue
Block a user