From afe1abc29745702b1e1f0b0757dd68736933304a Mon Sep 17 00:00:00 2001 From: Longbiao CHEN Date: Tue, 7 Apr 2026 03:08:29 +0800 Subject: [PATCH] feat(voicewake): refresh trigger routing on main --- Swabble/Sources/SwabbleKit/WakeWordGate.swift | 35 +- .../SwabbleKitTests/WakeWordGateTests.swift | 15 + .../Sources/OpenClaw/GatewayConnection.swift | 5 + .../OpenClaw/VoiceSessionCoordinator.swift | 22 +- .../Sources/OpenClaw/VoiceWakeForwarder.swift | 4 +- .../VoiceWakeRecognitionDebugSupport.swift | 6 +- .../Sources/OpenClaw/VoiceWakeRuntime.swift | 64 +++- .../Sources/OpenClaw/VoiceWakeTextUtils.swift | 95 ++++- .../OpenClawProtocol/GatewayModels.swift | 4 + .../GatewayConnectionControlTests.swift | 54 ++- .../VoiceWakeRuntimeTests.swift | 57 +++ .../OpenClawProtocol/GatewayModels.swift | 4 + docs/nodes/voicewake.md | 23 ++ .../firecrawl/src/firecrawl-scrape-tool.ts | 4 +- extensions/line/runtime-api.ts | 2 +- package.json | 8 +- scripts/lib/plugin-sdk-entrypoints.json | 2 +- ...ed-runner.sanitize-session-history.test.ts | 2 +- src/gateway/method-scopes.ts | 2 + src/gateway/protocol/schema/agent.ts | 1 + src/gateway/server-methods-list.ts | 3 + src/gateway/server-methods.ts | 2 + src/gateway/server-methods/agent.test.ts | 248 ++++++++++++- src/gateway/server-methods/agent.ts | 110 +++++- src/gateway/server-methods/shared-types.ts | 3 + .../server-methods/voicewake-routing.ts | 46 +++ src/gateway/server.impl.ts | 42 +++ .../server.models-voicewake-misc.test.ts | 156 +++++++++ .../server/ws-connection/message-handler.ts | 22 +- src/infra/infra-store.test.ts | 246 ++++++++++++- src/infra/voicewake-routing.test.ts | 109 ++++++ src/infra/voicewake-routing.ts | 325 ++++++++++++++++++ 32 files changed, 1656 insertions(+), 65 deletions(-) create mode 100644 src/gateway/server-methods/voicewake-routing.ts create mode 100644 src/infra/voicewake-routing.test.ts create mode 100644 src/infra/voicewake-routing.ts diff --git a/Swabble/Sources/SwabbleKit/WakeWordGate.swift b/Swabble/Sources/SwabbleKit/WakeWordGate.swift index 1a1479b630b..b36855c1dc7 100644 --- a/Swabble/Sources/SwabbleKit/WakeWordGate.swift +++ b/Swabble/Sources/SwabbleKit/WakeWordGate.swift @@ -35,11 +35,18 @@ public struct WakeWordGateMatch: Sendable, Equatable { public let triggerEndTime: TimeInterval public let postGap: TimeInterval public let command: String + public let trigger: String? - public init(triggerEndTime: TimeInterval, postGap: TimeInterval, command: String) { + public init( + triggerEndTime: TimeInterval, + postGap: TimeInterval, + command: String, + trigger: String? = nil) + { self.triggerEndTime = triggerEndTime self.postGap = postGap self.command = command + self.trigger = trigger } } @@ -53,13 +60,17 @@ public enum WakeWordGate { } private struct TriggerTokens { + let source: String let tokens: [String] } private struct MatchCandidate { let index: Int + let endIndex: Int + let tokenCount: Int let triggerEnd: TimeInterval let gap: TimeInterval + let trigger: String } public static func match( @@ -87,9 +98,19 @@ public enum WakeWordGate { let gap = nextToken.start - triggerEnd if gap < config.minPostTriggerGap { continue } - if let best, i <= best.index { continue } + let endIndex = i + count - 1 + if let best { + if endIndex < best.endIndex { continue } + if endIndex == best.endIndex, count <= best.tokenCount { continue } + } - best = MatchCandidate(index: i, triggerEnd: triggerEnd, gap: gap) + best = MatchCandidate( + index: i, + endIndex: endIndex, + tokenCount: count, + triggerEnd: triggerEnd, + gap: gap, + trigger: trigger.source) } } @@ -97,7 +118,11 @@ public enum WakeWordGate { let command = commandText(transcript: transcript, segments: segments, triggerEndTime: best.triggerEnd) .trimmingCharacters(in: Self.whitespaceAndPunctuation) guard command.count >= config.minCommandLength else { return nil } - return WakeWordGateMatch(triggerEndTime: best.triggerEnd, postGap: best.gap, command: command) + return WakeWordGateMatch( + triggerEndTime: best.triggerEnd, + postGap: best.gap, + command: command, + trigger: best.trigger) } public static func commandText( @@ -145,7 +170,7 @@ public enum WakeWordGate { .map { normalizeToken(String($0)) } .filter { !$0.isEmpty } if tokens.isEmpty { continue } - output.append(TriggerTokens(tokens: tokens)) + output.append(TriggerTokens(source: tokens.joined(separator: " "), tokens: tokens)) } return output } diff --git a/Swabble/Tests/SwabbleKitTests/WakeWordGateTests.swift b/Swabble/Tests/SwabbleKitTests/WakeWordGateTests.swift index 7e5b4abdd74..13be4550d4d 100644 --- a/Swabble/Tests/SwabbleKitTests/WakeWordGateTests.swift +++ b/Swabble/Tests/SwabbleKitTests/WakeWordGateTests.swift @@ -47,6 +47,21 @@ import Testing #expect(match?.command == "do it") } + @Test func matchPrefersMostSpecificTriggerWhenOverlapping() { + let transcript = "hey clawd do it" + let segments = makeSegments( + transcript: transcript, + words: [ + ("hey", 0.0, 0.1), + ("clawd", 0.2, 0.1), + ("do", 0.8, 0.1), + ("it", 1.0, 0.1), + ]) + let config = WakeWordGateConfig(triggers: ["clawd", "hey clawd"], minPostTriggerGap: 0.3) + let match = WakeWordGate.match(transcript: transcript, segments: segments, config: config) + #expect(match?.trigger == "hey clawd") + } + @Test func commandTextHandlesForeignRangeIndices() { let transcript = "hey clawd do thing" let other = "do thing" diff --git a/apps/macos/Sources/OpenClaw/GatewayConnection.swift b/apps/macos/Sources/OpenClaw/GatewayConnection.swift index 5368703ad08..261b94b02c4 100644 --- a/apps/macos/Sources/OpenClaw/GatewayConnection.swift +++ b/apps/macos/Sources/OpenClaw/GatewayConnection.swift @@ -42,6 +42,7 @@ struct GatewayAgentInvocation { var channel: GatewayAgentChannel = .last var timeoutSeconds: Int? var idempotencyKey: String = UUID().uuidString + var voiceWakeTrigger: String? } /// Single, shared Gateway websocket connection for the whole app. @@ -499,6 +500,10 @@ extension GatewayConnection { if let timeout = invocation.timeoutSeconds { params["timeout"] = AnyCodable(timeout) } + if let trigger = invocation.voiceWakeTrigger { + params["voiceWakeTrigger"] = AnyCodable( + trigger.trimmingCharacters(in: .whitespacesAndNewlines)) + } do { try await self.requestVoid(method: .agent, params: params) diff --git a/apps/macos/Sources/OpenClaw/VoiceSessionCoordinator.swift b/apps/macos/Sources/OpenClaw/VoiceSessionCoordinator.swift index 87c32d26670..2898c3008f0 100644 --- a/apps/macos/Sources/OpenClaw/VoiceSessionCoordinator.swift +++ b/apps/macos/Sources/OpenClaw/VoiceSessionCoordinator.swift @@ -17,6 +17,7 @@ final class VoiceSessionCoordinator { var isFinal: Bool var sendChime: VoiceWakeChime var autoSendDelay: TimeInterval? + var voiceWakeTrigger: String? } private let logger = Logger(subsystem: "ai.openclaw", category: "voicewake.coordinator") @@ -28,7 +29,8 @@ final class VoiceSessionCoordinator { source: Source, text: String, attributed: NSAttributedString? = nil, - forwardEnabled: Bool = false) -> UUID + forwardEnabled: Bool = false, + voiceWakeTrigger: String? = nil) -> UUID { let token = UUID() self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)") @@ -40,7 +42,8 @@ final class VoiceSessionCoordinator { attributed: attributedText, isFinal: false, sendChime: .none, - autoSendDelay: nil) + autoSendDelay: nil, + voiceWakeTrigger: voiceWakeTrigger) self.session = session VoiceWakeOverlayController.shared.startSession( token: token, @@ -63,7 +66,8 @@ final class VoiceSessionCoordinator { token: UUID, text: String, sendChime: VoiceWakeChime, - autoSendAfter: TimeInterval?) + autoSendAfter: TimeInterval?, + voiceWakeTrigger: String? = nil) { guard let session, session.token == token else { return } self.logger @@ -73,6 +77,9 @@ final class VoiceSessionCoordinator { self.session?.isFinal = true self.session?.sendChime = sendChime self.session?.autoSendDelay = autoSendAfter + if let voiceWakeTrigger { + self.session?.voiceWakeTrigger = voiceWakeTrigger + } let attributed = VoiceWakeOverlayController.shared.makeAttributed(from: text) VoiceWakeOverlayController.shared.presentFinal( @@ -86,15 +93,20 @@ final class VoiceSessionCoordinator { func sendNow(token: UUID, reason: String = "explicit") { guard let session, session.token == token else { return } let text = session.text.trimmingCharacters(in: .whitespacesAndNewlines) + let voiceWakeTrigger = session.voiceWakeTrigger + let sendChime = session.sendChime guard !text.isEmpty else { self.logger.info("coordinator sendNow \(reason) empty -> dismiss") VoiceWakeOverlayController.shared.dismiss(token: token, reason: .empty, outcome: .empty) self.clearSession() return } - VoiceWakeOverlayController.shared.beginSendUI(token: token, sendChime: session.sendChime) + VoiceWakeOverlayController.shared.beginSendUI(token: token, sendChime: sendChime) Task.detached { - _ = await VoiceWakeForwarder.forward(transcript: text) + _ = await VoiceWakeForwarder.forward( + transcript: text, + options: .init( + voiceWakeTrigger: voiceWakeTrigger)) } } diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift b/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift index 57a240afc57..8f88f363e02 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeForwarder.swift @@ -38,6 +38,7 @@ enum VoiceWakeForwarder { var deliver: Bool = true var to: String? var channel: GatewayAgentChannel = .webchat + var voiceWakeTrigger: String? } @discardableResult @@ -53,7 +54,8 @@ enum VoiceWakeForwarder { thinking: options.thinking, deliver: deliver, to: options.to, - channel: options.channel)) + channel: options.channel, + voiceWakeTrigger: options.voiceWakeTrigger)) if result.ok { self.logger.info("voice wake forward ok") diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeRecognitionDebugSupport.swift b/apps/macos/Sources/OpenClaw/VoiceWakeRecognitionDebugSupport.swift index 8dc29b93de8..ab436c0b1c5 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeRecognitionDebugSupport.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeRecognitionDebugSupport.swift @@ -41,7 +41,11 @@ enum VoiceWakeRecognitionDebugSupport { minCommandLength: config.minCommandLength, trimWake: trimWake) else { return nil } - return WakeWordGateMatch(triggerEndTime: 0, postGap: 0, command: command) + return WakeWordGateMatch( + triggerEndTime: 0, + postGap: 0, + command: command, + trigger: VoiceWakeTextUtils.matchedTriggerWord(transcript: transcript, triggers: triggers)) } static func transcriptSummary( diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift b/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift index bb6c3dba541..3d8c93abbd0 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeRuntime.swift @@ -37,6 +37,7 @@ actor VoiceWakeRuntime { private var listeningState: ListeningState = .idle private var overlayToken: UUID? private var activeTriggerEndTime: TimeInterval? + private var activeTriggerWord: String? private var scheduledRestartTask: Task? private var lastLoggedText: String? private var lastLoggedAt: Date? @@ -256,6 +257,7 @@ actor VoiceWakeRuntime { self.currentConfig = nil self.listeningState = .idle self.activeTriggerEndTime = nil + self.activeTriggerWord = nil self.logger.debug("voicewake runtime stopped") DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "stopped") @@ -366,7 +368,11 @@ actor VoiceWakeRuntime { } else { self.logger.info("voicewake runtime detected len=\(match.command.count)") } - await self.beginCapture(command: match.command, triggerEndTime: match.triggerEndTime, config: config) + await self.beginCapture( + command: match.command, + triggerEndTime: match.triggerEndTime, + triggerWord: match.trigger, + config: config) } else if !transcript.isEmpty, update.error == nil { if self.isTriggerOnly(transcript: transcript, triggers: config.triggers) { self.preDetectTask?.cancel() @@ -494,13 +500,33 @@ actor VoiceWakeRuntime { return } self.logger.info("voicewake runtime detected (trigger-only pause)") - await self.beginCapture(command: "", triggerEndTime: nil, config: config) + let matchedTrigger = self.matchedTriggerWord(transcript: lastText, triggers: triggers) + await self.beginCapture( + command: "", + triggerEndTime: nil, + triggerWord: matchedTrigger, + config: config) } private func isTriggerOnly(transcript: String, triggers: [String]) -> Bool { + Self.isTriggerOnlyText(transcript: transcript, triggers: triggers) + } + + private func matchedTriggerWord(transcript: String, triggers: [String]) -> String? { + Self.matchedTriggerWordText(transcript: transcript, triggers: triggers) + } + + private static func isTriggerOnlyText(transcript: String, triggers: [String]) -> Bool { guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return false } - guard VoiceWakeTextUtils.startsWithTrigger(transcript: transcript, triggers: triggers) else { return false } - return Self.trimmedAfterTrigger(transcript, triggers: triggers).isEmpty + guard + VoiceWakeTextUtils.startsWithTrigger(transcript: transcript, triggers: triggers) + || VoiceWakeTextUtils.hasOnlyFillerBeforeTrigger(transcript: transcript, triggers: triggers) + else { return false } + return self.trimmedAfterTrigger(transcript, triggers: triggers).isEmpty + } + + private static func matchedTriggerWordText(transcript: String, triggers: [String]) -> String? { + VoiceWakeTextUtils.matchedTriggerWord(transcript: transcript, triggers: triggers) } private func preDetectSilenceCheck( @@ -527,10 +553,16 @@ actor VoiceWakeRuntime { await self.beginCapture( command: match.command, triggerEndTime: match.triggerEndTime, + triggerWord: match.trigger, config: config) } - private func beginCapture(command: String, triggerEndTime: TimeInterval?, config: RuntimeConfig) async { + private func beginCapture( + command: String, + triggerEndTime: TimeInterval?, + triggerWord: String?, + config: RuntimeConfig) async + { // When "Trigger Talk Mode" is enabled, skip the capture/overlay flow entirely // and activate Talk Mode immediately. Talk Mode handles its own STT pipeline. // Pause the wake listener to avoid two audio pipelines competing on the mic @@ -545,7 +577,6 @@ actor VoiceWakeRuntime { await AppStateStore.shared.setTalkEnabled(true) return } - self.listeningState = .voiceWake self.isCapturing = true DiagnosticsFileLog.shared.log(category: "voicewake.runtime", event: "beginCapture") @@ -557,6 +588,7 @@ actor VoiceWakeRuntime { self.heardBeyondTrigger = !command.isEmpty self.triggerChimePlayed = false self.activeTriggerEndTime = triggerEndTime + self.activeTriggerWord = triggerWord self.preDetectTask?.cancel() self.preDetectTask = nil self.triggerOnlyTask?.cancel() @@ -577,7 +609,8 @@ actor VoiceWakeRuntime { source: .wakeWord, text: snapshot, attributed: attributed, - forwardEnabled: true) + forwardEnabled: true, + voiceWakeTrigger: triggerWord) } // Keep the "ears" boosted for the capture window so the status icon animates while recording. @@ -632,7 +665,9 @@ actor VoiceWakeRuntime { self.lastHeard = nil self.heardBeyondTrigger = false self.triggerChimePlayed = false + let triggerWord = self.activeTriggerWord self.activeTriggerEndTime = nil + self.activeTriggerWord = nil self.lastTranscript = nil self.lastTranscriptAt = nil self.preDetectTask?.cancel() @@ -653,14 +688,17 @@ actor VoiceWakeRuntime { token: token, text: finalTranscript, sendChime: sendChime, - autoSendAfter: delay) + autoSendAfter: delay, + voiceWakeTrigger: triggerWord) } } else if !finalTranscript.isEmpty { if sendChime != .none { await MainActor.run { VoiceWakeChimePlayer.play(sendChime, reason: "voicewake.send") } } Task.detached { - await VoiceWakeForwarder.forward(transcript: finalTranscript) + await VoiceWakeForwarder.forward( + transcript: finalTranscript, + options: .init(voiceWakeTrigger: triggerWord)) } } self.overlayToken = nil @@ -784,6 +822,14 @@ actor VoiceWakeRuntime { !self.trimmedAfterTrigger(text, triggers: triggers).isEmpty } + static func _testIsTriggerOnly(_ text: String, triggers: [String]) -> Bool { + self.isTriggerOnlyText(transcript: text, triggers: triggers) + } + + static func _testMatchedTriggerWord(_ text: String, triggers: [String]) -> String? { + self.matchedTriggerWordText(transcript: text, triggers: triggers) + } + static func _testAttributedColor(isFinal: Bool) -> NSColor { VoiceOverlayTextFormatting.makeAttributed(committed: "sample", volatile: "", isFinal: isFinal) .attribute(.foregroundColor, at: 0, effectiveRange: nil) as? NSColor ?? .clear diff --git a/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift b/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift index 9311765ad5c..58fe4fde9c9 100644 --- a/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift +++ b/apps/macos/Sources/OpenClaw/VoiceWakeTextUtils.swift @@ -4,6 +4,11 @@ import SwabbleKit enum VoiceWakeTextUtils { private static let whitespaceAndPunctuation = CharacterSet.whitespacesAndNewlines .union(.punctuationCharacters) + .union(.symbols) + private static let wakePrefixFillers: Set = [ + "a", "ah", "eh", "er", "erm", "hey", "hmm", "huh", "mhm", "mm", "oh", "uh", "um", + "yo", "呃", "嗯", "啊", "诶", "欸", + ] typealias TrimWake = (String, [String]) -> String static func normalizeToken(_ token: String) -> String { @@ -12,6 +17,43 @@ enum VoiceWakeTextUtils { .lowercased() } + private static func normalizedTriggerTokens(_ trigger: String) -> [String] { + trigger + .split(whereSeparator: { $0.isWhitespace }) + .map { self.normalizeToken(String($0)) } + .filter { !$0.isEmpty } + } + + private static func bestRawTriggerMatch( + transcript: String, + triggers: [String]) -> (range: Range, normalizedTrigger: String)? + { + var bestMatch: (range: Range, normalizedTrigger: String, tokenCount: Int)? + + for trigger in triggers { + let normalizedTokens = self.normalizedTriggerTokens(trigger) + guard !normalizedTokens.isEmpty else { continue } + let rawTrigger = trigger.trimmingCharacters(in: self.whitespaceAndPunctuation) + guard !rawTrigger.isEmpty, + let range = transcript.range( + of: rawTrigger, + options: [.caseInsensitive, .diacriticInsensitive, .widthInsensitive]) + else { continue } + + let tokenCount = normalizedTokens.count + if let bestMatch { + if range.lowerBound > bestMatch.range.lowerBound { continue } + if range.lowerBound == bestMatch.range.lowerBound, tokenCount <= bestMatch.tokenCount { + continue + } + } + + bestMatch = (range, normalizedTokens.joined(separator: " "), tokenCount) + } + + return bestMatch.map { (range: $0.range, normalizedTrigger: $0.normalizedTrigger) } + } + static func startsWithTrigger(transcript: String, triggers: [String]) -> Bool { let tokens = transcript .split(whereSeparator: { $0.isWhitespace }) @@ -19,10 +61,7 @@ enum VoiceWakeTextUtils { .filter { !$0.isEmpty } guard !tokens.isEmpty else { return false } for trigger in triggers { - let triggerTokens = trigger - .split(whereSeparator: { $0.isWhitespace }) - .map { self.normalizeToken(String($0)) } - .filter { !$0.isEmpty } + let triggerTokens = self.normalizedTriggerTokens(trigger) guard !triggerTokens.isEmpty, tokens.count >= triggerTokens.count else { continue } if zip(triggerTokens, tokens.prefix(triggerTokens.count)).allSatisfy({ $0 == $1 }) { return true @@ -40,9 +79,55 @@ enum VoiceWakeTextUtils { guard !transcript.isEmpty else { return nil } guard !self.normalizeToken(transcript).isEmpty else { return nil } guard WakeWordGate.matchesTextOnly(text: transcript, triggers: triggers) else { return nil } - guard self.startsWithTrigger(transcript: transcript, triggers: triggers) else { return nil } + guard + self.startsWithTrigger(transcript: transcript, triggers: triggers) + || self.hasOnlyFillerBeforeTrigger(transcript: transcript, triggers: triggers) + else { return nil } let trimmed = trimWake(transcript, triggers) guard trimmed.count >= minCommandLength else { return nil } return trimmed } + + static func hasOnlyFillerBeforeTrigger(transcript: String, triggers: [String]) -> Bool { + guard let match = self.bestRawTriggerMatch(transcript: transcript, triggers: triggers) else { return false } + let prefixTokens = transcript[.. String? { + if let rawMatch = self.bestRawTriggerMatch(transcript: transcript, triggers: triggers) { + return rawMatch.normalizedTrigger + } + + let transcriptTokens = transcript + .split(whereSeparator: { $0.isWhitespace }) + .map { self.normalizeToken(String($0)) } + .filter { !$0.isEmpty } + guard !transcriptTokens.isEmpty else { return nil } + + var bestStartIndex = Int.max + var bestTokenCount = -1 + var bestTokens: [String]? + + for trigger in triggers { + let triggerTokens = self.normalizedTriggerTokens(trigger) + guard !triggerTokens.isEmpty, transcriptTokens.count >= triggerTokens.count else { continue } + for index in 0...(transcriptTokens.count - triggerTokens.count) { + let candidate = transcriptTokens[index..<(index + triggerTokens.count)] + guard zip(triggerTokens, candidate).allSatisfy({ $0 == $1 }) else { continue } + if index < bestStartIndex || (index == bestStartIndex && triggerTokens.count > bestTokenCount) { + bestStartIndex = index + bestTokenCount = triggerTokens.count + bestTokens = triggerTokens + } + } + } + + return bestTokens?.joined(separator: " ") + } } diff --git a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift index c0e02d76b79..9bcc9ae6ea3 100644 --- a/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/macos/Sources/OpenClawProtocol/GatewayModels.swift @@ -600,6 +600,7 @@ public struct AgentParams: Codable, Sendable { public let bootstrapcontextrunkind: AnyCodable? public let internalevents: [[String: AnyCodable]]? public let inputprovenance: [String: AnyCodable]? + public let voicewaketrigger: String? public let idempotencykey: String public let label: String? @@ -632,6 +633,7 @@ public struct AgentParams: Codable, Sendable { bootstrapcontextrunkind: AnyCodable?, internalevents: [[String: AnyCodable]]?, inputprovenance: [String: AnyCodable]?, + voicewaketrigger: String?, idempotencykey: String, label: String?) { @@ -663,6 +665,7 @@ public struct AgentParams: Codable, Sendable { self.bootstrapcontextrunkind = bootstrapcontextrunkind self.internalevents = internalevents self.inputprovenance = inputprovenance + self.voicewaketrigger = voicewaketrigger self.idempotencykey = idempotencykey self.label = label } @@ -696,6 +699,7 @@ public struct AgentParams: Codable, Sendable { case bootstrapcontextrunkind = "bootstrapContextRunKind" case internalevents = "internalEvents" case inputprovenance = "inputProvenance" + case voicewaketrigger = "voiceWakeTrigger" case idempotencykey = "idempotencyKey" case label } diff --git a/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift b/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift index 9dfc1858ae9..258b4e67c8e 100644 --- a/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift +++ b/apps/macos/Tests/OpenClawIPCTests/GatewayConnectionControlTests.swift @@ -6,6 +6,7 @@ import Testing private final class FakeWebSocketTask: WebSocketTasking, @unchecked Sendable { var state: URLSessionTask.State = .running + private(set) var sentMessages: [URLSessionWebSocketTask.Message] = [] func resume() {} @@ -13,7 +14,9 @@ private final class FakeWebSocketTask: WebSocketTasking, @unchecked Sendable { self.state = .canceling } - func send(_: URLSessionWebSocketTask.Message) async throws {} + func send(_ message: URLSessionWebSocketTask.Message) async throws { + self.sentMessages.append(message) + } func receive() async throws -> URLSessionWebSocketTask.Message { throw URLError(.cannotConnectToHost) @@ -25,29 +28,33 @@ private final class FakeWebSocketTask: WebSocketTasking, @unchecked Sendable { } private final class FakeWebSocketSession: WebSocketSessioning, @unchecked Sendable { + let task = FakeWebSocketTask() + func makeWebSocketTask(url _: URL) -> WebSocketTaskBox { - WebSocketTaskBox(task: FakeWebSocketTask()) + WebSocketTaskBox(task: self.task) } } -private func makeTestGatewayConnection() -> GatewayConnection { - GatewayConnection( +private func makeTestGatewayConnection() -> (GatewayConnection, FakeWebSocketSession) { + let session = FakeWebSocketSession() + let connection = GatewayConnection( configProvider: { (url: URL(string: "ws://127.0.0.1:1")!, token: nil, password: nil) }, - sessionBox: WebSocketSessionBox(session: FakeWebSocketSession())) + sessionBox: WebSocketSessionBox(session: session)) + return (connection, session) } @Suite(.serialized) struct GatewayConnectionControlTests { @Test func `status fails when process missing`() async { - let connection = makeTestGatewayConnection() + let (connection, _) = makeTestGatewayConnection() let result = await connection.status() #expect(result.ok == false) #expect(result.error != nil) } @Test func `reject empty message`() async { - let connection = makeTestGatewayConnection() + let (connection, _) = makeTestGatewayConnection() let result = await connection.sendAgent( message: "", thinking: nil, @@ -56,4 +63,37 @@ private func makeTestGatewayConnection() -> GatewayConnection { to: nil) #expect(result.ok == false) } + + @Test func `send agent keeps empty voice wake trigger field`() async throws { + let (connection, session) = makeTestGatewayConnection() + _ = await connection.sendAgent(GatewayAgentInvocation( + message: "test", + sessionKey: "main", + thinking: nil, + deliver: false, + to: nil, + channel: .last, + timeoutSeconds: nil, + idempotencyKey: "idem-1", + voiceWakeTrigger: " ")) + + guard let lastMessage = session.task.sentMessages.last else { + Issue.record("expected websocket send payload") + return + } + let payloadData: Data + switch lastMessage { + case .string(let text): + payloadData = Data(text.utf8) + case .data(let data): + payloadData = data + @unknown default: + Issue.record("unexpected websocket message type") + return + } + + let json = try JSONSerialization.jsonObject(with: payloadData) as? [String: Any] + let params = json?["params"] as? [String: Any] + #expect(params?["voiceWakeTrigger"] as? String == "") + } } diff --git a/apps/macos/Tests/OpenClawIPCTests/VoiceWakeRuntimeTests.swift b/apps/macos/Tests/OpenClawIPCTests/VoiceWakeRuntimeTests.swift index fcf3f3b1158..5480b5f6177 100644 --- a/apps/macos/Tests/OpenClawIPCTests/VoiceWakeRuntimeTests.swift +++ b/apps/macos/Tests/OpenClawIPCTests/VoiceWakeRuntimeTests.swift @@ -35,6 +35,63 @@ struct VoiceWakeRuntimeTests { #expect(VoiceWakeRuntime._testHasContentAfterTrigger(text, triggers: triggers)) } + @Test func `trigger only allows filler before trigger`() { + let triggers = ["openclaw"] + let text = "uh openclaw" + #expect(VoiceWakeRuntime._testIsTriggerOnly(text, triggers: triggers)) + } + + @Test func `trigger only rejects trailing wake word mentions in ordinary speech`() { + let triggers = ["openclaw"] + let text = "tell me about openclaw" + #expect(!VoiceWakeRuntime._testIsTriggerOnly(text, triggers: triggers)) + } + + @Test func `matched trigger finds trigger not at transcript start`() { + let triggers = ["openclaw"] + let text = "uh openclaw" + #expect(VoiceWakeRuntime._testMatchedTriggerWord(text, triggers: triggers) == "openclaw") + } + + @Test func `matched trigger prefers most specific overlapping phrase`() { + let triggers = ["openclaw", "hey openclaw"] + let text = "hey openclaw" + #expect(VoiceWakeRuntime._testMatchedTriggerWord(text, triggers: triggers) == "hey openclaw") + } + + @Test func `matched trigger handles width insensitive forms without whitespace tokens`() { + let triggers = ["openclaw"] + let text = "OpenClaw" + #expect(VoiceWakeRuntime._testMatchedTriggerWord(text, triggers: triggers) == "openclaw") + } + + @Test func `matched trigger handles chinese forms without whitespace tokens`() { + let triggers = ["小爪"] + let text = "嘿小爪" + #expect(VoiceWakeRuntime._testMatchedTriggerWord(text, triggers: triggers) == "小爪") + } + + @Test func `text only fallback populates matched trigger`() { + let transcript = "hey openclaw do thing" + let config = WakeWordGateConfig(triggers: ["openclaw"], minCommandLength: 1) + let match = VoiceWakeRecognitionDebugSupport.textOnlyFallbackMatch( + transcript: transcript, + triggers: ["openclaw"], + config: config, + trimWake: VoiceWakeRuntime._testTrimmedAfterTrigger) + #expect(match?.trigger == "openclaw") + } + + @Test func `text only fallback keeps the first trigger phrase when later words match another trigger`() { + let transcript = "openclaw tell me about computer vision" + let config = WakeWordGateConfig(triggers: ["openclaw", "computer"], minCommandLength: 1) + let match = VoiceWakeRecognitionDebugSupport.textOnlyFallbackMatch( + transcript: transcript, + triggers: ["openclaw", "computer"], + config: config, + trimWake: VoiceWakeRuntime._testTrimmedAfterTrigger) + #expect(match?.trigger == "openclaw") + } @Test func `trims after chinese trigger keeps post speech`() { let triggers = ["小爪", "openclaw"] let text = "嘿 小爪 帮我打开设置" diff --git a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift index c0e02d76b79..9bcc9ae6ea3 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift @@ -600,6 +600,7 @@ public struct AgentParams: Codable, Sendable { public let bootstrapcontextrunkind: AnyCodable? public let internalevents: [[String: AnyCodable]]? public let inputprovenance: [String: AnyCodable]? + public let voicewaketrigger: String? public let idempotencykey: String public let label: String? @@ -632,6 +633,7 @@ public struct AgentParams: Codable, Sendable { bootstrapcontextrunkind: AnyCodable?, internalevents: [[String: AnyCodable]]?, inputprovenance: [String: AnyCodable]?, + voicewaketrigger: String?, idempotencykey: String, label: String?) { @@ -663,6 +665,7 @@ public struct AgentParams: Codable, Sendable { self.bootstrapcontextrunkind = bootstrapcontextrunkind self.internalevents = internalevents self.inputprovenance = inputprovenance + self.voicewaketrigger = voicewaketrigger self.idempotencykey = idempotencykey self.label = label } @@ -696,6 +699,7 @@ public struct AgentParams: Codable, Sendable { case bootstrapcontextrunkind = "bootstrapContextRunKind" case internalevents = "internalEvents" case inputprovenance = "inputProvenance" + case voicewaketrigger = "voiceWakeTrigger" case idempotencykey = "idempotencyKey" case label } diff --git a/docs/nodes/voicewake.md b/docs/nodes/voicewake.md index b9bc43fea4f..dbacca885e2 100644 --- a/docs/nodes/voicewake.md +++ b/docs/nodes/voicewake.md @@ -37,9 +37,32 @@ Notes: - Triggers are normalized (trimmed, empties dropped). Empty lists fall back to defaults. - Limits are enforced for safety (count/length caps). +### Routing methods (trigger → target) + +- `voicewake.routing.get` → `{ config: VoiceWakeRoutingConfig }` +- `voicewake.routing.set` with params `{ config: VoiceWakeRoutingConfig }` → `{ config: VoiceWakeRoutingConfig }` + +`VoiceWakeRoutingConfig` shape: + +```json +{ + "version": 1, + "defaultTarget": { "mode": "current" }, + "routes": [{ "trigger": "robot wake", "target": { "sessionKey": "agent:main:main" } }], + "updatedAtMs": 1730000000000 +} +``` + +Route targets support exactly one of: + +- `{ "mode": "current" }` +- `{ "agentId": "main" }` +- `{ "sessionKey": "agent:main:main" }` + ### Events - `voicewake.changed` payload `{ triggers: string[] }` +- `voicewake.routing.changed` payload `{ config: VoiceWakeRoutingConfig }` Who receives it: diff --git a/extensions/firecrawl/src/firecrawl-scrape-tool.ts b/extensions/firecrawl/src/firecrawl-scrape-tool.ts index c741b82c3da..405ed74f21b 100644 --- a/extensions/firecrawl/src/firecrawl-scrape-tool.ts +++ b/extensions/firecrawl/src/firecrawl-scrape-tool.ts @@ -7,9 +7,9 @@ import { import { Type } from "typebox"; import { runFirecrawlScrape } from "./firecrawl-client.js"; -function optionalStringEnum( +function optionalStringEnum( values: T, - options: { description?: string } = {}, + options: { description?: string; title?: string; default?: T[number] } = {}, ) { return Type.Optional( Type.Unsafe({ diff --git a/extensions/line/runtime-api.ts b/extensions/line/runtime-api.ts index 7bd5ff9394b..07cb6f0c7b4 100644 --- a/extensions/line/runtime-api.ts +++ b/extensions/line/runtime-api.ts @@ -69,7 +69,7 @@ export type { } from "./src/types.js"; export * from "./src/webhook-node.js"; export * from "./src/webhook.js"; -export * from "./src/webhook-utils.js"; +export { parseLineWebhookBody } from "./src/webhook-utils.js"; export { datetimePickerAction, messageAction, postbackAction, uriAction } from "./src/actions.js"; export type { Action } from "./src/actions.js"; export { diff --git a/package.json b/package.json index 7358d38c8d1..6a559db1ad2 100644 --- a/package.json +++ b/package.json @@ -878,14 +878,14 @@ "types": "./dist/plugin-sdk/line.d.ts", "default": "./dist/plugin-sdk/line.js" }, - "./plugin-sdk/line-core": { - "types": "./dist/plugin-sdk/line-core.d.ts", - "default": "./dist/plugin-sdk/line-core.js" - }, "./plugin-sdk/line-runtime": { "types": "./dist/plugin-sdk/line-runtime.d.ts", "default": "./dist/plugin-sdk/line-runtime.js" }, + "./plugin-sdk/line-core": { + "types": "./dist/plugin-sdk/line-core.d.ts", + "default": "./dist/plugin-sdk/line-core.js" + }, "./plugin-sdk/line-surface": { "types": "./dist/plugin-sdk/line-surface.d.ts", "default": "./dist/plugin-sdk/line-surface.js" diff --git a/scripts/lib/plugin-sdk-entrypoints.json b/scripts/lib/plugin-sdk-entrypoints.json index a6ab71c473d..a24465010b2 100644 --- a/scripts/lib/plugin-sdk-entrypoints.json +++ b/scripts/lib/plugin-sdk-entrypoints.json @@ -205,8 +205,8 @@ "persistent-dedupe", "keyed-async-queue", "line", - "line-core", "line-runtime", + "line-core", "line-surface", "llm-task", "matrix", diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index d4331f3827f..19966daf49f 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -160,7 +160,7 @@ describe("sanitizeSessionHistory", () => { }; const getAssistantContentTypes = (messages: AgentMessage[]) => - getAssistantMessage(messages).content.map((block: { type: string }) => block.type); + getAssistantMessage(messages).content.map((block) => block.type); const makeThinkingAndTextAssistantMessages = ( thinkingSignature: string = "some_sig", diff --git a/src/gateway/method-scopes.ts b/src/gateway/method-scopes.ts index cb0e93e429d..54b8b1e330e 100644 --- a/src/gateway/method-scopes.ts +++ b/src/gateway/method-scopes.ts @@ -90,6 +90,7 @@ const METHOD_SCOPE_GROUPS: Record = { "skills.search", "skills.detail", "voicewake.get", + "voicewake.routing.get", "sessions.list", "sessions.get", "sessions.preview", @@ -134,6 +135,7 @@ const METHOD_SCOPE_GROUPS: Record = { "tts.setProvider", "tts.setPersona", "voicewake.set", + "voicewake.routing.set", "node.invoke", "chat.send", "chat.abort", diff --git a/src/gateway/protocol/schema/agent.ts b/src/gateway/protocol/schema/agent.ts index 09fae0517af..a2efb0f5899 100644 --- a/src/gateway/protocol/schema/agent.ts +++ b/src/gateway/protocol/schema/agent.ts @@ -162,6 +162,7 @@ export const AgentParamsSchema = Type.Object( ), internalEvents: Type.Optional(Type.Array(AgentInternalEventSchema)), inputProvenance: Type.Optional(InputProvenanceSchema), + voiceWakeTrigger: Type.Optional(Type.String()), idempotencyKey: NonEmptyString, label: Type.Optional(SessionLabelString), }, diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts index 5ac65f2d74b..4cf4c605228 100644 --- a/src/gateway/server-methods-list.ts +++ b/src/gateway/server-methods-list.ts @@ -76,6 +76,8 @@ const BASE_METHODS = [ "voicewake.set", "secrets.reload", "secrets.resolve", + "voicewake.routing.get", + "voicewake.routing.set", "sessions.list", "sessions.subscribe", "sessions.unsubscribe", @@ -164,6 +166,7 @@ export const GATEWAY_EVENTS = [ "device.pair.requested", "device.pair.resolved", "voicewake.changed", + "voicewake.routing.changed", "exec.approval.requested", "exec.approval.resolved", "plugin.approval.requested", diff --git a/src/gateway/server-methods.ts b/src/gateway/server-methods.ts index d40b176970b..a14974a014a 100644 --- a/src/gateway/server-methods.ts +++ b/src/gateway/server-methods.ts @@ -35,6 +35,7 @@ import { ttsHandlers } from "./server-methods/tts.js"; import type { GatewayRequestHandlers, GatewayRequestOptions } from "./server-methods/types.js"; import { updateHandlers } from "./server-methods/update.js"; import { usageHandlers } from "./server-methods/usage.js"; +import { voicewakeRoutingHandlers } from "./server-methods/voicewake-routing.js"; import { voicewakeHandlers } from "./server-methods/voicewake.js"; import { webHandlers } from "./server-methods/web.js"; import { wizardHandlers } from "./server-methods/wizard.js"; @@ -73,6 +74,7 @@ export const coreGatewayHandlers: GatewayRequestHandlers = { ...connectHandlers, ...logsHandlers, ...voicewakeHandlers, + ...voicewakeRoutingHandlers, ...healthHandlers, ...channelsHandlers, ...chatHandlers, diff --git a/src/gateway/server-methods/agent.test.ts b/src/gateway/server-methods/agent.test.ts index fac604e6fbb..915e546491b 100644 --- a/src/gateway/server-methods/agent.test.ts +++ b/src/gateway/server-methods/agent.test.ts @@ -32,6 +32,8 @@ const mocks = vi.hoisted(() => ({ resolveBareResetBootstrapFileAccess: vi.fn(() => true), listAgentIds: vi.fn(() => ["main"]), loadConfigReturn: {} as Record, + loadVoiceWakeRoutingConfig: vi.fn(), + resolveVoiceWakeRouteByTrigger: vi.fn(), })); vi.mock("../session-utils.js", async () => { @@ -50,7 +52,10 @@ vi.mock("../../config/sessions.js", async () => { return { ...actual, updateSessionStore: mocks.updateSessionStore, - resolveAgentIdFromSessionKey: () => "main", + resolveAgentIdFromSessionKey: (sessionKey: string) => { + const m = /^agent:([^:]+):/.exec(sessionKey.trim()); + return m?.[1] ?? "main"; + }, resolveExplicitAgentSessionKey: mocks.resolveExplicitAgentSessionKey, resolveAgentMainSessionKey: ({ cfg, @@ -114,6 +119,11 @@ vi.mock("../session-reset-service.js", () => ({ (mocks.performGatewaySessionReset as (...args: unknown[]) => unknown)(...args), })); +vi.mock("../../infra/voicewake-routing.js", () => ({ + loadVoiceWakeRoutingConfig: mocks.loadVoiceWakeRoutingConfig, + resolveVoiceWakeRouteByTrigger: mocks.resolveVoiceWakeRouteByTrigger, +})); + vi.mock("../../sessions/send-policy.js", () => ({ resolveSendPolicy: () => "allow", })); @@ -965,6 +975,7 @@ describe("gateway agent handler", () => { }; return await updater(store); }); + mocks.agentCommand.mockResolvedValue({ payloads: [{ text: "ok" }], meta: { durationMs: 100 }, @@ -1438,6 +1449,241 @@ describe("gateway agent handler", () => { }); }); + it("routes voice wake trigger to configured session target", async () => { + mocks.loadVoiceWakeRoutingConfig.mockResolvedValue({ + version: 1, + defaultTarget: { mode: "current" }, + routes: [], + updatedAtMs: 0, + }); + mocks.resolveVoiceWakeRouteByTrigger.mockReturnValue({ sessionKey: "agent:main:voice" }); + + mocks.loadSessionEntry.mockReturnValue({ + cfg: {}, + storePath: "/tmp/sessions.json", + entry: { + sessionId: "voice-session-id", + updatedAt: Date.now(), + }, + canonicalKey: "agent:main:voice", + }); + mocks.updateSessionStore.mockResolvedValue(undefined); + mocks.agentCommand.mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { durationMs: 100 }, + }); + const respond = vi.fn(); + await agentHandlers.agent({ + params: { + message: "do thing", + sessionKey: "main", + voiceWakeTrigger: "robot wake", + idempotencyKey: "test-voice-route", + }, + respond, + context: makeContext(), + req: { type: "req", id: "voice-1", method: "agent" }, + client: null, + isWebchatConnect: () => false, + }); + + await vi.waitFor(() => expect(mocks.agentCommand).toHaveBeenCalled()); + const callArgs = mocks.agentCommand.mock.calls.at(-1)?.[0] as { sessionKey?: string }; + expect(callArgs.sessionKey).toBe("agent:main:voice"); + }); + + it("ignores voice wake session route targeting unknown agent", async () => { + mocks.loadVoiceWakeRoutingConfig.mockResolvedValue({ + version: 1, + defaultTarget: { mode: "current" }, + routes: [], + updatedAtMs: 0, + }); + mocks.resolveVoiceWakeRouteByTrigger.mockReturnValue({ sessionKey: "agent:ghost:main" }); + + mocks.loadSessionEntry.mockReturnValue({ + cfg: {}, + storePath: "/tmp/sessions.json", + entry: { + sessionId: "main-session-id", + updatedAt: Date.now(), + }, + canonicalKey: "agent:main:main", + }); + mocks.updateSessionStore.mockResolvedValue(undefined); + mocks.agentCommand.mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { durationMs: 100 }, + }); + + const respond = vi.fn(); + await agentHandlers.agent({ + params: { + message: "do thing", + sessionKey: "main", + voiceWakeTrigger: "robot wake", + idempotencyKey: "test-voice-route-unknown", + }, + respond, + context: makeContext(), + req: { type: "req", id: "voice-2", method: "agent" }, + client: null, + isWebchatConnect: () => false, + }); + + await vi.waitFor(() => expect(mocks.agentCommand).toHaveBeenCalled()); + const callArgs = mocks.agentCommand.mock.calls.at(-1)?.[0] as { sessionKey?: string }; + expect(callArgs.sessionKey).toBe("agent:main:main"); + }); + + it("applies default voice wake route when trigger field is present but empty", async () => { + mocks.loadVoiceWakeRoutingConfig.mockResolvedValue({ + version: 1, + defaultTarget: { sessionKey: "agent:main:voice" }, + routes: [], + updatedAtMs: 0, + }); + mocks.resolveVoiceWakeRouteByTrigger.mockReturnValue({ sessionKey: "agent:main:voice" }); + + mocks.loadSessionEntry.mockImplementation((sessionKey: string) => ({ + cfg: {}, + storePath: "/tmp/sessions.json", + entry: { + sessionId: "voice-session-id", + updatedAt: Date.now(), + }, + canonicalKey: sessionKey === "main" ? "agent:main:main" : sessionKey, + })); + mocks.updateSessionStore.mockResolvedValue(undefined); + mocks.agentCommand.mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { durationMs: 100 }, + }); + + const respond = vi.fn(); + await agentHandlers.agent({ + params: { + message: "do thing", + sessionKey: "main", + voiceWakeTrigger: " ", + idempotencyKey: "test-voice-route-default-target", + }, + respond, + context: makeContext(), + req: { type: "req", id: "voice-3", method: "agent" }, + client: null, + isWebchatConnect: () => false, + }); + + await vi.waitFor(() => expect(mocks.agentCommand).toHaveBeenCalled()); + const callArgs = mocks.agentCommand.mock.calls.at(-1)?.[0] as { sessionKey?: string }; + expect(callArgs.sessionKey).toBe("agent:main:voice"); + expect(mocks.resolveVoiceWakeRouteByTrigger).toHaveBeenCalledWith({ + trigger: undefined, + config: expect.any(Object), + }); + }); + + it("trims whitespace-only delivery fields before disabling voice wake auto-routing", async () => { + mocks.loadVoiceWakeRoutingConfig.mockResolvedValue({ + version: 1, + defaultTarget: { sessionKey: "agent:main:voice" }, + routes: [], + updatedAtMs: 0, + }); + mocks.resolveVoiceWakeRouteByTrigger.mockReturnValue({ sessionKey: "agent:main:voice" }); + + mocks.loadSessionEntry.mockImplementation((sessionKey: string) => ({ + cfg: {}, + storePath: "/tmp/sessions.json", + entry: { + sessionId: "voice-session-id", + updatedAt: Date.now(), + }, + canonicalKey: sessionKey === "main" ? "agent:main:main" : sessionKey, + })); + mocks.updateSessionStore.mockResolvedValue(undefined); + mocks.agentCommand.mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { durationMs: 100 }, + }); + + const respond = vi.fn(); + await agentHandlers.agent({ + params: { + message: "do thing", + sessionKey: "main", + to: " ", + replyTo: " ", + voiceWakeTrigger: "robot wake", + idempotencyKey: "test-voice-route-whitespace-delivery", + }, + respond, + context: makeContext(), + req: { type: "req", id: "voice-4", method: "agent" }, + client: null, + isWebchatConnect: () => false, + }); + + await vi.waitFor(() => expect(mocks.agentCommand).toHaveBeenCalled()); + const callArgs = mocks.agentCommand.mock.calls.at(-1)?.[0] as { sessionKey?: string }; + expect(callArgs.sessionKey).toBe("agent:main:voice"); + expect(mocks.resolveVoiceWakeRouteByTrigger).toHaveBeenCalledWith({ + trigger: "robot wake", + config: expect.any(Object), + }); + }); + + it("treats explicit sessionId as an opt-out for voice wake auto-routing", async () => { + mocks.loadVoiceWakeRoutingConfig.mockResolvedValue({ + version: 1, + defaultTarget: { sessionKey: "agent:main:voice" }, + routes: [], + updatedAtMs: 0, + }); + mocks.resolveVoiceWakeRouteByTrigger.mockReturnValue({ sessionKey: "agent:main:voice" }); + + mocks.loadSessionEntry.mockImplementation((sessionKey: string) => ({ + cfg: {}, + storePath: "/tmp/sessions.json", + entry: { + sessionId: sessionKey === "main" ? "main-session-id" : "voice-session-id", + updatedAt: Date.now(), + }, + canonicalKey: sessionKey === "main" ? "agent:main:main" : sessionKey, + })); + mocks.updateSessionStore.mockResolvedValue(undefined); + mocks.agentCommand.mockResolvedValue({ + payloads: [{ text: "ok" }], + meta: { durationMs: 100 }, + }); + + const respond = vi.fn(); + await agentHandlers.agent({ + params: { + message: "do thing", + sessionKey: "main", + sessionId: "caller-selected-session-id", + voiceWakeTrigger: "robot wake", + idempotencyKey: "test-voice-route-explicit-session-id", + }, + respond, + context: makeContext(), + req: { type: "req", id: "voice-5", method: "agent" }, + client: null, + isWebchatConnect: () => false, + }); + + await vi.waitFor(() => expect(mocks.agentCommand).toHaveBeenCalled()); + const callArgs = mocks.agentCommand.mock.calls.at(-1)?.[0] as { + sessionId?: string; + sessionKey?: string; + }; + expect(callArgs.sessionId).toBe("caller-selected-session-id"); + expect(callArgs.sessionKey).toBe("agent:main:main"); + expect(mocks.resolveVoiceWakeRouteByTrigger).not.toHaveBeenCalled(); + }); + it("handles missing cliSessionIds gracefully", async () => { mockMainSessionEntry({}); diff --git a/src/gateway/server-methods/agent.ts b/src/gateway/server-methods/agent.ts index bdfb04a5422..11d99495ea0 100644 --- a/src/gateway/server-methods/agent.ts +++ b/src/gateway/server-methods/agent.ts @@ -1,10 +1,15 @@ import { randomUUID } from "node:crypto"; -import { listAgentIds, resolveAgentWorkspaceDir } from "../../agents/agent-scope.js"; +import { + listAgentIds, + resolveDefaultAgentId, + resolveAgentWorkspaceDir, +} from "../../agents/agent-scope.js"; import { isTimeoutError } from "../../agents/failover-error.js"; import { resolveAgentAvatar, resolvePublicAgentAvatarSource, } from "../../agents/identity-avatar.js"; +import type { GatewayRequestHandlerOptions, GatewayRequestHandlers } from "./types.js"; import type { AgentInternalEvent } from "../../agents/internal-events.js"; import { normalizeSpawnedRunMetadata, @@ -44,6 +49,10 @@ import { shouldDowngradeDeliveryToSessionOnly } from "../../infra/outbound/best- import { resolveMessageChannelSelection } from "../../infra/outbound/channel-selection.js"; import { isAbortError } from "../../infra/unhandled-rejections.js"; import type { PromptImageOrderEntry } from "../../media/prompt-image-order.js"; +import { + loadVoiceWakeRoutingConfig, + resolveVoiceWakeRouteByTrigger, +} from "../../infra/voicewake-routing.js"; import { classifySessionKeyShape, isAcpSessionKey, @@ -104,7 +113,6 @@ import { waitForTerminalGatewayDedupe, } from "./agent-wait-dedupe.js"; import { normalizeRpcAttachmentsToChatAttachments } from "./attachment-normalize.js"; -import type { GatewayRequestHandlerOptions, GatewayRequestHandlers } from "./types.js"; const RESET_COMMAND_RE = /^\/(new|reset)(?:\s+([\s\S]*))?$/i; @@ -426,6 +434,8 @@ export const agentHandlers: GatewayRequestHandlers = { cleanupBundleMcpOnRunEnd?: boolean; label?: string; inputProvenance?: InputProvenance; + workspaceDir?: string; + voiceWakeTrigger?: string; }; const senderIsOwner = resolveSenderIsOwnerFromClient(client); const allowModelOverride = resolveAllowModelOverrideFromClient(client); @@ -540,21 +550,19 @@ export const agentHandlers: GatewayRequestHandlers = { } } + const knownAgents = listAgentIds(cfg); const agentIdRaw = normalizeOptionalString(request.agentId) ?? ""; - const agentId = agentIdRaw ? normalizeAgentId(agentIdRaw) : undefined; - if (agentId) { - const knownAgents = listAgentIds(cfg); - if (!knownAgents.includes(agentId)) { - respond( - false, - undefined, - errorShape( - ErrorCodes.INVALID_REQUEST, - `invalid agent params: unknown agent id "${request.agentId}"`, - ), - ); - return; - } + let agentId = agentIdRaw ? normalizeAgentId(agentIdRaw) : undefined; + if (agentId && !knownAgents.includes(agentId)) { + respond( + false, + undefined, + errorShape( + ErrorCodes.INVALID_REQUEST, + `invalid agent params: unknown agent id "${request.agentId}"`, + ), + ); + return; } const requestedSessionKeyRaw = normalizeOptionalString(request.sessionKey); @@ -595,6 +603,76 @@ export const agentHandlers: GatewayRequestHandlers = { return; } } + const voiceWakeTrigger = normalizeOptionalString(request.voiceWakeTrigger) ?? ""; + const replyTo = normalizeOptionalString(request.replyTo) ?? ""; + const to = normalizeOptionalString(request.to) ?? ""; + const explicitVoiceWakeSessionTarget = + !agentId && requestedSessionKeyRaw + ? (() => { + const { cfg: sessionCfg, canonicalKey } = loadSessionEntry(requestedSessionKeyRaw); + const routedAgentId = resolveAgentIdFromSessionKey(canonicalKey); + const defaultAgentId = normalizeAgentId(resolveDefaultAgentId(sessionCfg)); + if (routedAgentId !== defaultAgentId) { + return true; + } + const mainSessionKey = resolveAgentMainSessionKey({ + cfg: sessionCfg, + agentId: routedAgentId, + }); + return canonicalKey !== mainSessionKey; + })() + : false; + const canAutoRouteVoiceWake = + !agentId && + !explicitVoiceWakeSessionTarget && + !requestedSessionId && + !replyTo && + !to; + const hasVoiceWakeTriggerField = Object.prototype.hasOwnProperty.call( + request, + "voiceWakeTrigger", + ); + if (hasVoiceWakeTriggerField && canAutoRouteVoiceWake) { + try { + const routingConfig = await loadVoiceWakeRoutingConfig(); + const route = resolveVoiceWakeRouteByTrigger({ + trigger: voiceWakeTrigger || undefined, + config: routingConfig, + }); + if ("agentId" in route) { + if (knownAgents.includes(route.agentId)) { + agentId = route.agentId; + requestedSessionKey = resolveExplicitAgentSessionKey({ + cfg, + agentId, + }); + } else { + context.logGateway.warn( + `voicewake routing ignored unknown agentId="${route.agentId}" trigger="${voiceWakeTrigger}"`, + ); + } + } else if ("sessionKey" in route) { + if (classifySessionKeyShape(route.sessionKey) !== "malformed_agent") { + const canonicalRouteSession = loadSessionEntry(route.sessionKey).canonicalKey; + const routedAgentId = resolveAgentIdFromSessionKey(canonicalRouteSession); + if (knownAgents.includes(routedAgentId)) { + requestedSessionKey = canonicalRouteSession; + agentId = routedAgentId; + } else { + context.logGateway.warn( + `voicewake routing ignored unknown session agent="${routedAgentId}" sessionKey="${canonicalRouteSession}" trigger="${voiceWakeTrigger}"`, + ); + } + } else { + context.logGateway.warn( + `voicewake routing ignored malformed sessionKey="${route.sessionKey}" trigger="${voiceWakeTrigger}"`, + ); + } + } + } catch (err) { + context.logGateway.warn(`voicewake routing load failed: ${formatForLog(err)}`); + } + } let resolvedSessionId = requestedSessionId; let sessionEntry: SessionEntry | undefined; let bestEffortDeliver = requestedBestEffortDeliver ?? false; diff --git a/src/gateway/server-methods/shared-types.ts b/src/gateway/server-methods/shared-types.ts index 0d0573a28c8..65bc863582d 100644 --- a/src/gateway/server-methods/shared-types.ts +++ b/src/gateway/server-methods/shared-types.ts @@ -105,6 +105,9 @@ export type GatewayRequestContext = { prompter: import("../../wizard/prompts.js").WizardPrompter, ) => Promise; broadcastVoiceWakeChanged: (triggers: string[]) => void; + broadcastVoiceWakeRoutingChanged: ( + config: import("../../infra/voicewake-routing.js").VoiceWakeRoutingConfig, + ) => void; unavailableGatewayMethods?: ReadonlySet; }; diff --git a/src/gateway/server-methods/voicewake-routing.ts b/src/gateway/server-methods/voicewake-routing.ts new file mode 100644 index 00000000000..a5e1e860394 --- /dev/null +++ b/src/gateway/server-methods/voicewake-routing.ts @@ -0,0 +1,46 @@ +import type { GatewayRequestHandlers } from "./types.js"; +import { + loadVoiceWakeRoutingConfig, + normalizeVoiceWakeRoutingConfig, + setVoiceWakeRoutingConfig, + validateVoiceWakeRoutingConfigInput, +} from "../../infra/voicewake-routing.js"; +import { ErrorCodes, errorShape } from "../protocol/index.js"; + +export const voicewakeRoutingHandlers: GatewayRequestHandlers = { + "voicewake.routing.get": async ({ respond }) => { + try { + respond(true, { config: await loadVoiceWakeRoutingConfig() }); + } catch (err) { + respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err))); + } + }, + "voicewake.routing.set": async ({ params, respond, context }) => { + if ( + !params || + params.config === null || + typeof params.config !== "object" || + Array.isArray(params.config) + ) { + respond( + false, + undefined, + errorShape(ErrorCodes.INVALID_REQUEST, "voicewake.routing.set requires config: object"), + ); + return; + } + const validated = validateVoiceWakeRoutingConfigInput(params.config); + if (!validated.ok) { + respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, validated.message)); + return; + } + try { + const normalized = normalizeVoiceWakeRoutingConfig(params.config); + const config = await setVoiceWakeRoutingConfig(normalized); + context.broadcastVoiceWakeRoutingChanged(config); + respond(true, { config }); + } catch (err) { + respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err))); + } + }, +}; diff --git a/src/gateway/server.impl.ts b/src/gateway/server.impl.ts index ca9c22c3f3f..8202dedfa49 100644 --- a/src/gateway/server.impl.ts +++ b/src/gateway/server.impl.ts @@ -27,6 +27,8 @@ import { isTruthyEnvValue, isVitestRuntimeEnv, logAcceptedEnvOption } from "../i import { ensureOpenClawCliOnPath } from "../infra/path-env.js"; import { setGatewaySigusr1RestartPolicy, setPreRestartDeferralCheck } from "../infra/restart.js"; import { enqueueSystemEvent } from "../infra/system-events.js"; +import { scheduleGatewayUpdateCheck } from "../infra/update-startup.js"; +import type { VoiceWakeRoutingConfig } from "../infra/voicewake-routing.js"; import { startDiagnosticHeartbeat, stopDiagnosticHeartbeat } from "../logging/diagnostic.js"; import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js"; import { runGlobalGatewayStopSafely } from "../plugins/hook-runner-global.js"; @@ -621,6 +623,41 @@ export async function startGatewayServer( await runClosePrelude(); await createCloseHandler()({ reason: "gateway startup failed" }); }; +<<<<<<< HEAD +======= + const nodeRegistry = new NodeRegistry(); + const nodePresenceTimers = new Map>(); + const nodeSubscriptions = createNodeSubscriptionManager(); + const sessionEventSubscribers = createSessionEventSubscriberRegistry(); + const sessionMessageSubscribers = createSessionMessageSubscriberRegistry(); + const nodeSendEvent = (opts: { nodeId: string; event: string; payloadJSON?: string | null }) => { + const payload = safeParseJson(opts.payloadJSON ?? null); + nodeRegistry.sendEvent(opts.nodeId, opts.event, payload); + }; + const nodeSendToSession = (sessionKey: string, event: string, payload: unknown) => + nodeSubscriptions.sendToSession(sessionKey, event, payload, nodeSendEvent); + const nodeSendToAllSubscribed = (event: string, payload: unknown) => + nodeSubscriptions.sendToAllSubscribed(event, payload, nodeSendEvent); + const nodeSubscribe = nodeSubscriptions.subscribe; + const nodeUnsubscribe = nodeSubscriptions.unsubscribe; + const nodeUnsubscribeAll = nodeSubscriptions.unsubscribeAll; + const broadcastVoiceWakeChanged = (triggers: string[]) => { + broadcast("voicewake.changed", { triggers }, { dropIfSlow: true }); + }; + const broadcastVoiceWakeRoutingChanged = (config: VoiceWakeRoutingConfig) => { + broadcast("voicewake.routing.changed", { config }, { dropIfSlow: true }); + }; + const hasMobileNodeConnected = () => hasConnectedMobileNode(nodeRegistry); + applyGatewayLaneConcurrency(cfgAtStart); + + let cronState = buildGatewayCronService({ + cfg: cfgAtStart, + deps, + broadcast, + }); + let { cron, storePath: cronStorePath } = cronState; + deps.cron = cron; +>>>>>>> 85f70db0b2 (feat(voicewake): refresh trigger routing on main) try { const earlyRuntime = await startupTrace.measure("runtime.early", () => @@ -771,7 +808,12 @@ export async function startGatewayServer( wizardRunner, broadcastVoiceWakeChanged, unavailableGatewayMethods, +<<<<<<< HEAD }); +======= + broadcastVoiceWakeRoutingChanged, + }; +>>>>>>> 85f70db0b2 (feat(voicewake): refresh trigger routing on main) setFallbackGatewayContextResolver(() => gatewayRequestContext); diff --git a/src/gateway/server.models-voicewake-misc.test.ts b/src/gateway/server.models-voicewake-misc.test.ts index fba65ecdc16..bab80488728 100644 --- a/src/gateway/server.models-voicewake-misc.test.ts +++ b/src/gateway/server.models-voicewake-misc.test.ts @@ -304,6 +304,162 @@ describe("gateway server models + voicewake", () => { }); }); + test("voicewake.routing.get/set persists and broadcasts", { timeout: 60_000 }, async () => { + await withTempHome(async (homeDir) => { + const initial = await rpcReq<{ + config?: { version?: number; defaultTarget?: unknown; routes?: unknown[] }; + }>(ws, "voicewake.routing.get"); + expect(initial.ok).toBe(true); + expect(initial.payload?.config?.version).toBe(1); + expect(initial.payload?.config?.defaultTarget).toEqual({ mode: "current" }); + expect(initial.payload?.config?.routes).toEqual([]); + + const changedP = onceMessage<{ + type: "event"; + event: string; + payload?: Record | null; + }>(ws, (o) => o.type === "event" && o.event === "voicewake.routing.changed"); + + const setRes = await rpcReq<{ + config?: { routes?: Array<{ trigger?: string; target?: unknown }>; updatedAtMs?: number }; + }>(ws, "voicewake.routing.set", { + config: { + defaultTarget: { mode: "current" }, + routes: [{ trigger: " Robot Wake ", target: { agentId: "main" } }], + }, + }); + expect(setRes.ok).toBe(true); + expect(setRes.payload?.config?.routes).toEqual([ + { trigger: "robot wake", target: { agentId: "main" } }, + ]); + expect(typeof setRes.payload?.config?.updatedAtMs).toBe("number"); + + const changed = await changedP; + expect(changed.event).toBe("voicewake.routing.changed"); + expect( + (changed.payload as { config?: { routes?: unknown } } | undefined)?.config?.routes, + ).toEqual([{ trigger: "robot wake", target: { agentId: "main" } }]); + + const after = await rpcReq<{ + config?: { routes?: Array<{ trigger?: string; target?: unknown }> }; + }>(ws, "voicewake.routing.get"); + expect(after.ok).toBe(true); + expect(after.payload?.config?.routes).toEqual([ + { trigger: "robot wake", target: { agentId: "main" } }, + ]); + + const onDisk = JSON.parse( + await fs.readFile( + path.join(homeDir, ".openclaw", "settings", "voicewake-routing.json"), + "utf8", + ), + ) as { routes?: unknown }; + expect(onDisk.routes).toEqual([{ trigger: "robot wake", target: { agentId: "main" } }]); + + const invalid = await rpcReq(ws, "voicewake.routing.set", { config: null }); + expect(invalid.ok).toBe(false); + expect(invalid.error?.message ?? "").toMatch( + /voicewake\.routing\.set requires config: object/i, + ); + + const badRoutes = await rpcReq(ws, "voicewake.routing.set", { + config: { routes: "oops" }, + }); + expect(badRoutes.ok).toBe(false); + expect(badRoutes.error?.message ?? "").toMatch(/config\.routes must be an array/i); + + const badTarget = await rpcReq(ws, "voicewake.routing.set", { + config: { + routes: [ + { trigger: "robot wake", target: { agentId: "main", sessionKey: "agent:main:main" } }, + ], + }, + }); + expect(badTarget.ok).toBe(false); + expect(badTarget.error?.message ?? "").toMatch( + /config\.routes\[0\]\.target cannot include both agentId and sessionKey/i, + ); + + const badAgentId = await rpcReq(ws, "voicewake.routing.set", { + config: { + routes: [{ trigger: "robot wake", target: { agentId: "!!!" } }], + }, + }); + expect(badAgentId.ok).toBe(false); + expect(badAgentId.error?.message ?? "").toMatch( + /config\.routes\[0\]\.target\.agentId must be a valid agent id/i, + ); + + const badSessionKey = await rpcReq(ws, "voicewake.routing.set", { + config: { + routes: [{ trigger: "robot wake", target: { sessionKey: "agent::main" } }], + }, + }); + expect(badSessionKey.ok).toBe(false); + expect(badSessionKey.error?.message ?? "").toMatch( + /config\.routes\[0\]\.target\.sessionKey must be a canonical agent session key/i, + ); + + const stillStored = await rpcReq<{ + config?: { routes?: Array<{ trigger?: string; target?: unknown }> }; + }>(ws, "voicewake.routing.get"); + expect(stillStored.ok).toBe(true); + expect(stillStored.payload?.config?.routes).toEqual([ + { trigger: "robot wake", target: { agentId: "main" } }, + ]); + }); + }); + + test("pushes voicewake.routing.changed to nodes on connect and on updates", async () => { + await withTempHome(async () => { + const nodeWs = new WebSocket(`ws://127.0.0.1:${port}`); + trackConnectChallengeNonce(nodeWs); + await new Promise((resolve) => nodeWs.once("open", resolve)); + const firstEventP = onceMessage<{ + type: "event"; + event: string; + payload?: Record | null; + }>(nodeWs, (o) => o.type === "event" && o.event === "voicewake.routing.changed"); + await connectOk(nodeWs, { + role: "node", + client: { + id: GATEWAY_CLIENT_NAMES.NODE_HOST, + version: "1.0.0", + platform: "ios", + mode: GATEWAY_CLIENT_MODES.NODE, + }, + }); + + const first = await firstEventP; + expect(first.event).toBe("voicewake.routing.changed"); + expect( + (first.payload as { config?: { routes?: unknown[] } } | undefined)?.config?.routes, + ).toEqual([]); + + const broadcastP = onceMessage<{ + type: "event"; + event: string; + payload?: Record | null; + }>(nodeWs, (o) => o.type === "event" && o.event === "voicewake.routing.changed"); + + const setRes = await rpcReq(ws, "voicewake.routing.set", { + config: { + defaultTarget: { mode: "current" }, + routes: [{ trigger: "hello", target: { sessionKey: "agent:main:main" } }], + }, + }); + expect(setRes.ok).toBe(true); + + const broadcast = await broadcastP; + expect(broadcast.event).toBe("voicewake.routing.changed"); + expect( + (broadcast.payload as { config?: { routes?: unknown } } | undefined)?.config?.routes, + ).toEqual([{ trigger: "hello", target: { sessionKey: "agent:main:main" } }]); + + nodeWs.close(); + }); + }); + test("models.list returns model catalog", async () => { seedPiCatalog(); diff --git a/src/gateway/server/ws-connection/message-handler.ts b/src/gateway/server/ws-connection/message-handler.ts index b5f24fff948..2ddc00d8d5a 100644 --- a/src/gateway/server/ws-connection/message-handler.ts +++ b/src/gateway/server/ws-connection/message-handler.ts @@ -1,6 +1,10 @@ import type { IncomingMessage } from "node:http"; -import os from "node:os"; import type { WebSocket } from "ws"; +import os from "node:os"; +import type { createSubsystemLogger } from "../../../logging/subsystem.js"; +import type { GatewayAuthResult, ResolvedGatewayAuth } from "../../auth.js"; +import type { GatewayRequestContext, GatewayRequestHandlers } from "../../server-methods/types.js"; +import type { GatewayWsClient } from "../ws-types.js"; import { loadConfig } from "../../../config/config.js"; import { getBoundDeviceBootstrapProfile, @@ -33,10 +37,10 @@ import { } from "../../../infra/node-pairing.js"; import { recordRemoteNodeInfo, refreshRemoteNodeBins } from "../../../infra/skills-remote.js"; import { upsertPresence } from "../../../infra/system-presence.js"; +import { loadVoiceWakeRoutingConfig } from "../../../infra/voicewake-routing.js"; import { loadVoiceWakeConfig } from "../../../infra/voicewake.js"; import { rawDataToString } from "../../../infra/ws.js"; import { logRejectedLargePayload } from "../../../logging/diagnostic-payload.js"; -import type { createSubsystemLogger } from "../../../logging/subsystem.js"; import { resolveBootstrapProfileScopesForRole, type DeviceBootstrapProfile, @@ -50,8 +54,6 @@ import { } from "../../../utils/message-channel.js"; import { resolveRuntimeServiceVersion } from "../../../version.js"; import type { AuthRateLimiter } from "../../auth-rate-limit.js"; -import type { ResolvedGatewayAuth } from "../../auth.js"; -import type { GatewayAuthResult } from "../../auth.js"; import { hasForwardedRequestHeaders, isLocalDirectRequest } from "../../auth.js"; import { buildCanvasScopedHostUrl, @@ -100,7 +102,6 @@ import { TICK_INTERVAL_MS, } from "../../server-constants.js"; import { handleGatewayRequest } from "../../server-methods.js"; -import type { GatewayRequestContext, GatewayRequestHandlers } from "../../server-methods/types.js"; import { formatError } from "../../server-utils.js"; import { formatForLog, logWs } from "../../ws-log.js"; import { truncateCloseReason } from "../close-reason.js"; @@ -1413,6 +1414,17 @@ export function attachGatewayWsMessageHandler(params: { `voicewake snapshot failed for ${nodeSession.nodeId}: ${formatForLog(err)}`, ), ); + void loadVoiceWakeRoutingConfig() + .then((routing) => { + context.nodeRegistry.sendEvent(nodeSession.nodeId, "voicewake.routing.changed", { + config: routing, + }); + }) + .catch((err) => + logGateway.warn( + `voicewake routing snapshot failed for ${nodeSession.nodeId}: ${formatForLog(err)}`, + ), + ); } try { diff --git a/src/infra/infra-store.test.ts b/src/infra/infra-store.test.ts index dfa6b1715c4..ef3657ef0af 100644 --- a/src/infra/infra-store.test.ts +++ b/src/infra/infra-store.test.ts @@ -1,8 +1,31 @@ import fs from "node:fs/promises"; +import os from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { withTempDir } from "../test-utils/temp-dir.js"; +import { + getChannelActivity, + recordChannelActivity, + resetChannelActivityForTest, +} from "./channel-activity.js"; +import { createDedupeCache } from "./dedupe.js"; +import { + emitDiagnosticEvent, + onDiagnosticEvent, + resetDiagnosticEventsForTest, +} from "./diagnostic-events.js"; import { readSessionStoreJson5 } from "./state-migrations.fs.js"; +import { + loadVoiceWakeRoutingConfig, + normalizeVoiceWakeTriggerWord, + resolveVoiceWakeRouteByTrigger, + setVoiceWakeRoutingConfig, +} from "./voicewake-routing.js"; +import { + defaultVoiceWakeTriggers, + loadVoiceWakeConfig, + setVoiceWakeTriggers, +} from "./voicewake.js"; describe("infra store", () => { describe("state migrations fs", () => { @@ -33,4 +56,225 @@ describe("infra store", () => { }); }); }); + describe("voicewake store", () => { + it("returns defaults when missing", async () => { + await withTempDir("openclaw-voicewake-", async (baseDir) => { + const cfg = await loadVoiceWakeConfig(baseDir); + expect(cfg.triggers).toEqual(defaultVoiceWakeTriggers()); + expect(cfg.updatedAtMs).toBe(0); + }); + }); + + it("sanitizes and persists triggers", async () => { + await withTempDir("openclaw-voicewake-", async (baseDir) => { + const saved = await setVoiceWakeTriggers([" hi ", "", " there "], baseDir); + expect(saved.triggers).toEqual(["hi", "there"]); + expect(saved.updatedAtMs).toBeGreaterThan(0); + + const loaded = await loadVoiceWakeConfig(baseDir); + expect(loaded.triggers).toEqual(["hi", "there"]); + expect(loaded.updatedAtMs).toBeGreaterThan(0); + }); + }); + + it("falls back to defaults when triggers empty", async () => { + await withTempDir("openclaw-voicewake-", async (baseDir) => { + const saved = await setVoiceWakeTriggers(["", " "], baseDir); + expect(saved.triggers).toEqual(defaultVoiceWakeTriggers()); + }); + }); + + it("sanitizes malformed persisted config values", async () => { + await withTempDir("openclaw-voicewake-", async (baseDir) => { + await fs.mkdir(path.join(baseDir, "settings"), { recursive: true }); + await fs.writeFile( + path.join(baseDir, "settings", "voicewake.json"), + JSON.stringify({ + triggers: [" wake ", "", 42, null], + updatedAtMs: -1, + }), + "utf-8", + ); + + const loaded = await loadVoiceWakeConfig(baseDir); + expect(loaded.triggers).toEqual(["wake"]); + expect(loaded.updatedAtMs).toBe(0); + }); + }); + }); + + describe("voicewake routing store", () => { + it("returns defaults when missing", async () => { + const baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-voicewake-routing-")); + const cfg = await loadVoiceWakeRoutingConfig(baseDir); + expect(cfg.version).toBe(1); + expect(cfg.defaultTarget).toEqual({ mode: "current" }); + expect(cfg.routes).toEqual([]); + expect(cfg.updatedAtMs).toBe(0); + }); + + it("normalizes and persists routing config", async () => { + const baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-voicewake-routing-")); + const saved = await setVoiceWakeRoutingConfig( + { + defaultTarget: { mode: "current" }, + routes: [ + { trigger: " Hello Bot ", target: { agentId: "main" } }, + { trigger: "", target: { sessionKey: "agent:main:main" } }, + ], + }, + baseDir, + ); + expect(saved.routes).toEqual([{ trigger: "hello bot", target: { agentId: "main" } }]); + expect(saved.updatedAtMs).toBeGreaterThan(0); + + const loaded = await loadVoiceWakeRoutingConfig(baseDir); + expect(loaded.routes).toEqual([{ trigger: "hello bot", target: { agentId: "main" } }]); + }); + + it("resolves routes by normalized trigger", () => { + const result = resolveVoiceWakeRouteByTrigger({ + trigger: " HELLO BOT ", + config: { + version: 1, + defaultTarget: { mode: "current" }, + routes: [{ trigger: "hello bot", target: { sessionKey: "agent:main:main" } }], + updatedAtMs: 0, + }, + }); + expect(result).toEqual({ sessionKey: "agent:main:main" }); + expect(normalizeVoiceWakeTriggerWord(" X Y ")).toBe("x y"); + }); + }); + + describe("diagnostic-events", () => { + it("emits monotonic seq", async () => { + resetDiagnosticEventsForTest(); + const seqs: number[] = []; + const stop = onDiagnosticEvent((evt) => seqs.push(evt.seq)); + + emitDiagnosticEvent({ + type: "model.usage", + usage: { total: 1 }, + }); + emitDiagnosticEvent({ + type: "model.usage", + usage: { total: 2 }, + }); + + stop(); + + expect(seqs).toEqual([1, 2]); + }); + + it("emits message-flow events", async () => { + resetDiagnosticEventsForTest(); + const types: string[] = []; + const stop = onDiagnosticEvent((evt) => types.push(evt.type)); + + emitDiagnosticEvent({ + type: "webhook.received", + channel: "telegram", + updateType: "telegram-post", + }); + emitDiagnosticEvent({ + type: "message.queued", + channel: "telegram", + source: "telegram", + queueDepth: 1, + }); + emitDiagnosticEvent({ + type: "session.state", + state: "processing", + reason: "run_started", + }); + + stop(); + + expect(types).toEqual(["webhook.received", "message.queued", "session.state"]); + }); + }); + + describe("channel activity", () => { + beforeEach(() => { + resetChannelActivityForTest(); + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-01-08T00:00:00Z")); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("records inbound/outbound separately", () => { + recordChannelActivity({ channel: "telegram", direction: "inbound" }); + vi.advanceTimersByTime(1000); + recordChannelActivity({ channel: "telegram", direction: "outbound" }); + const res = getChannelActivity({ channel: "telegram" }); + expect(res.inboundAt).toBe(1767830400000); + expect(res.outboundAt).toBe(1767830401000); + }); + + it("isolates accounts", () => { + recordChannelActivity({ + channel: "whatsapp", + accountId: "a", + direction: "inbound", + at: 1, + }); + recordChannelActivity({ + channel: "whatsapp", + accountId: "b", + direction: "inbound", + at: 2, + }); + expect(getChannelActivity({ channel: "whatsapp", accountId: "a" })).toEqual({ + inboundAt: 1, + outboundAt: null, + }); + expect(getChannelActivity({ channel: "whatsapp", accountId: "b" })).toEqual({ + inboundAt: 2, + outboundAt: null, + }); + }); + }); + + describe("createDedupeCache", () => { + it("marks duplicates within TTL", () => { + const cache = createDedupeCache({ ttlMs: 1000, maxSize: 10 }); + expect(cache.check("a", 100)).toBe(false); + expect(cache.check("a", 500)).toBe(true); + }); + + it("expires entries after TTL", () => { + const cache = createDedupeCache({ ttlMs: 1000, maxSize: 10 }); + expect(cache.check("a", 100)).toBe(false); + expect(cache.check("a", 1501)).toBe(false); + }); + + it("evicts oldest entries when over max size", () => { + const cache = createDedupeCache({ ttlMs: 10_000, maxSize: 2 }); + expect(cache.check("a", 100)).toBe(false); + expect(cache.check("b", 200)).toBe(false); + expect(cache.check("c", 300)).toBe(false); + expect(cache.check("a", 400)).toBe(false); + }); + + it("prunes expired entries even when refreshed keys are older in insertion order", () => { + const cache = createDedupeCache({ ttlMs: 100, maxSize: 10 }); + expect(cache.check("a", 0)).toBe(false); + expect(cache.check("b", 50)).toBe(false); + expect(cache.check("a", 120)).toBe(false); + expect(cache.check("c", 200)).toBe(false); + expect(cache.size()).toBe(2); + }); + + it("supports non-mutating existence checks via peek()", () => { + const cache = createDedupeCache({ ttlMs: 1000, maxSize: 10 }); + expect(cache.peek("a", 100)).toBe(false); + expect(cache.check("a", 100)).toBe(false); + expect(cache.peek("a", 200)).toBe(true); + expect(cache.peek("a", 1201)).toBe(false); + }); + }); }); diff --git a/src/infra/voicewake-routing.test.ts b/src/infra/voicewake-routing.test.ts new file mode 100644 index 00000000000..5a319fde57c --- /dev/null +++ b/src/infra/voicewake-routing.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from "vitest"; +import { + normalizeVoiceWakeRoutingConfig, + normalizeVoiceWakeTriggerWord, + resolveVoiceWakeRouteByTrigger, + validateVoiceWakeRoutingConfigInput, +} from "./voicewake-routing.js"; + +describe("voicewake routing normalization", () => { + it("normalizes punctuation-heavy triggers to token-equivalent spacing", () => { + expect(normalizeVoiceWakeTriggerWord(" Hey, Bot!! ")).toBe("hey bot"); + }); + + it("normalizes agentId targets before persisting routes", () => { + const normalized = normalizeVoiceWakeRoutingConfig({ + defaultTarget: { mode: "current" }, + routes: [{ trigger: "Wake", target: { agentId: " Main Agent " } }], + }); + expect(normalized.routes).toHaveLength(1); + expect(normalized.routes[0]?.target).toEqual({ agentId: "main-agent" }); + }); + + it("resolves trigger routing with punctuation-insensitive trigger values", () => { + const config = normalizeVoiceWakeRoutingConfig({ + defaultTarget: { mode: "current" }, + routes: [{ trigger: "Hey, Bot", target: { sessionKey: "agent:main:voice" } }], + }); + expect(resolveVoiceWakeRouteByTrigger({ trigger: "hey bot", config })).toEqual({ + sessionKey: "agent:main:voice", + }); + }); + + it("rejects invalid route agent ids instead of normalizing them to main", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: [{ trigger: "wake", target: { agentId: "!!!" } }], + }), + ).toEqual({ + ok: false, + message: "config.routes[0].target.agentId must be a valid agent id", + }); + }); + + it("rejects malformed session keys instead of persisting dead routes", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: [{ trigger: "wake", target: { sessionKey: "agent::main" } }], + }), + ).toEqual({ + ok: false, + message: "config.routes[0].target.sessionKey must be a canonical agent session key", + }); + }); + + it("rejects session keys with empty path segments", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: [{ trigger: "wake", target: { sessionKey: "agent:main:main:" } }], + }), + ).toEqual({ + ok: false, + message: "config.routes[0].target.sessionKey must be a canonical agent session key", + }); + }); + + it("rejects duplicate triggers after normalization", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: [ + { trigger: "Hey Bot", target: { mode: "current" } }, + { trigger: "hey, bot", target: { agentId: "main" } }, + ], + }), + ).toEqual({ + ok: false, + message: "config.routes[1].trigger duplicates config.routes[0].trigger after normalization", + }); + }); + + it("rejects oversized route lists", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: Array.from({ length: 33 }, (_, index) => ({ + trigger: `wake ${index}`, + target: { mode: "current" as const }, + })), + }), + ).toEqual({ + ok: false, + message: "config.routes must contain at most 32 entries", + }); + }); + + it("rejects oversized triggers", () => { + expect( + validateVoiceWakeRoutingConfigInput({ + routes: [ + { + trigger: "x".repeat(65), + target: { mode: "current" as const }, + }, + ], + }), + ).toEqual({ + ok: false, + message: "config.routes[0].trigger must be at most 64 characters", + }); + }); +}); diff --git a/src/infra/voicewake-routing.ts b/src/infra/voicewake-routing.ts new file mode 100644 index 00000000000..6ebcc5d5f50 --- /dev/null +++ b/src/infra/voicewake-routing.ts @@ -0,0 +1,325 @@ +import path from "node:path"; +import { resolveStateDir } from "../config/paths.js"; +import { + classifySessionKeyShape, + isValidAgentId, + normalizeAgentId, +} from "../routing/session-key.js"; +import { createAsyncLock, readJsonFile, writeJsonAtomic } from "./json-files.js"; + +export type VoiceWakeRouteTarget = + | { mode: "current"; agentId?: undefined; sessionKey?: undefined } + | { agentId: string; sessionKey?: undefined; mode?: undefined } + | { sessionKey: string; agentId?: undefined; mode?: undefined }; + +export type VoiceWakeRouteRule = { + trigger: string; + target: VoiceWakeRouteTarget; +}; + +export type VoiceWakeRoutingConfig = { + version: 1; + defaultTarget: VoiceWakeRouteTarget; + routes: VoiceWakeRouteRule[]; + updatedAtMs: number; +}; + +const MAX_VOICEWAKE_ROUTES = 32; +const MAX_VOICEWAKE_TRIGGER_LENGTH = 64; + +const DEFAULT_ROUTING: VoiceWakeRoutingConfig = { + version: 1, + defaultTarget: { mode: "current" }, + routes: [], + updatedAtMs: 0, +}; + +function resolvePath(baseDir?: string) { + const root = baseDir ?? resolveStateDir(); + return path.join(root, "settings", "voicewake-routing.json"); +} + +export function normalizeVoiceWakeTriggerWord(value: string): string { + return value + .toLowerCase() + .split(/\s+/) + .map((token) => token.replace(/^[\p{P}\p{S}]+|[\p{P}\p{S}]+$/gu, "")) + .filter(Boolean) + .join(" "); +} + +function normalizeOptionalString(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed ? trimmed : undefined; +} + +function normalizeRouteTarget(value: unknown): VoiceWakeRouteTarget | null { + if (!value || typeof value !== "object") { + return null; + } + const rec = value as { mode?: unknown; agentId?: unknown; sessionKey?: unknown }; + const mode = normalizeOptionalString(rec.mode); + if (mode === "current") { + return { mode: "current" }; + } + const agentId = normalizeOptionalString(rec.agentId); + const sessionKey = normalizeOptionalString(rec.sessionKey); + if (agentId && !sessionKey) { + return { agentId: normalizeAgentId(agentId) }; + } + if (sessionKey && !agentId) { + return { sessionKey }; + } + return null; +} + +function normalizeRouteRule(value: unknown): VoiceWakeRouteRule | null { + if (!value || typeof value !== "object") { + return null; + } + const rec = value as { trigger?: unknown; target?: unknown }; + const triggerRaw = normalizeOptionalString(rec.trigger); + if (!triggerRaw) { + return null; + } + const trigger = normalizeVoiceWakeTriggerWord(triggerRaw); + if (!trigger) { + return null; + } + const target = normalizeRouteTarget(rec.target); + if (!target) { + return null; + } + return { trigger, target }; +} + +function isCanonicalAgentSessionKey(value: string): boolean { + const trimmed = value.trim(); + if (classifySessionKeyShape(trimmed) !== "agent") { + return false; + } + return !trimmed.split(":").some((part) => part.length === 0); +} + +function isPlainObject(value: unknown): value is Record { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + +function validateRouteTargetInput( + value: unknown, + label: string, +): { ok: true } | { ok: false; message: string } { + if (!isPlainObject(value)) { + return { ok: false, message: `${label} must be an object` }; + } + const rec = value as { mode?: unknown; agentId?: unknown; sessionKey?: unknown }; + const mode = normalizeOptionalString(rec.mode); + const agentId = normalizeOptionalString(rec.agentId); + const sessionKey = normalizeOptionalString(rec.sessionKey); + if (mode !== undefined) { + if (mode !== "current") { + return { + ok: false, + message: `${label}.mode must be "current" when provided`, + }; + } + if (agentId !== undefined || sessionKey !== undefined) { + return { + ok: false, + message: `${label} cannot mix mode with agentId or sessionKey`, + }; + } + return { ok: true }; + } + if (agentId !== undefined && sessionKey !== undefined) { + return { + ok: false, + message: `${label} cannot include both agentId and sessionKey`, + }; + } + if (agentId !== undefined) { + if (!isValidAgentId(agentId)) { + return { + ok: false, + message: `${label}.agentId must be a valid agent id`, + }; + } + return { ok: true }; + } + if (sessionKey !== undefined) { + if (!isCanonicalAgentSessionKey(sessionKey)) { + return { + ok: false, + message: `${label}.sessionKey must be a canonical agent session key`, + }; + } + return { ok: true }; + } + return { + ok: false, + message: `${label} must include mode, agentId, or sessionKey`, + }; +} + +export function validateVoiceWakeRoutingConfigInput( + input: unknown, +): { ok: true } | { ok: false; message: string } { + if (!isPlainObject(input)) { + return { ok: false, message: "config must be an object" }; + } + const rec = input as { + defaultTarget?: unknown; + routes?: unknown; + }; + if (rec.defaultTarget !== undefined) { + const validatedDefaultTarget = validateRouteTargetInput( + rec.defaultTarget, + "config.defaultTarget", + ); + if (!validatedDefaultTarget.ok) { + return validatedDefaultTarget; + } + } + if (rec.routes !== undefined && !Array.isArray(rec.routes)) { + return { ok: false, message: "config.routes must be an array" }; + } + if (Array.isArray(rec.routes)) { + if (rec.routes.length > MAX_VOICEWAKE_ROUTES) { + return { + ok: false, + message: `config.routes must contain at most ${MAX_VOICEWAKE_ROUTES} entries`, + }; + } + const normalizedTriggers = new Map(); + for (const [index, route] of rec.routes.entries()) { + if (!isPlainObject(route)) { + return { ok: false, message: `config.routes[${index}] must be an object` }; + } + const trigger = normalizeOptionalString(route.trigger); + const normalizedTrigger = trigger ? normalizeVoiceWakeTriggerWord(trigger) : ""; + if (!trigger || !normalizedTrigger) { + return { + ok: false, + message: `config.routes[${index}].trigger must be a non-empty string`, + }; + } + if (trigger.length > MAX_VOICEWAKE_TRIGGER_LENGTH) { + return { + ok: false, + message: `config.routes[${index}].trigger must be at most ${MAX_VOICEWAKE_TRIGGER_LENGTH} characters`, + }; + } + const duplicateIndex = normalizedTriggers.get(normalizedTrigger); + if (duplicateIndex !== undefined) { + return { + ok: false, + message: `config.routes[${index}].trigger duplicates config.routes[${duplicateIndex}].trigger after normalization`, + }; + } + normalizedTriggers.set(normalizedTrigger, index); + const validatedTarget = validateRouteTargetInput( + route.target, + `config.routes[${index}].target`, + ); + if (!validatedTarget.ok) { + return validatedTarget; + } + } + } + return { ok: true }; +} +export function normalizeVoiceWakeRoutingConfig(input: unknown): VoiceWakeRoutingConfig { + if (!input || typeof input !== "object") { + return { ...DEFAULT_ROUTING }; + } + const rec = input as { + version?: unknown; + defaultTarget?: unknown; + routes?: unknown; + updatedAtMs?: unknown; + }; + const defaultTarget = normalizeRouteTarget(rec.defaultTarget) ?? { mode: "current" as const }; + const routes = Array.isArray(rec.routes) + ? rec.routes + .map((entry) => normalizeRouteRule(entry)) + .filter((entry): entry is VoiceWakeRouteRule => Boolean(entry)) + : []; + const updatedAtMs = + typeof rec.updatedAtMs === "number" && Number.isFinite(rec.updatedAtMs) && rec.updatedAtMs > 0 + ? Math.floor(rec.updatedAtMs) + : 0; + return { + version: 1, + defaultTarget, + routes, + updatedAtMs, + }; +} + +const withLock = createAsyncLock(); + +export async function loadVoiceWakeRoutingConfig( + baseDir?: string, +): Promise { + const filePath = resolvePath(baseDir); + const existing = await readJsonFile(filePath); + if (!existing) { + return { ...DEFAULT_ROUTING }; + } + return normalizeVoiceWakeRoutingConfig(existing); +} + +export async function setVoiceWakeRoutingConfig( + config: unknown, + baseDir?: string, +): Promise { + const normalized = normalizeVoiceWakeRoutingConfig(config); + const filePath = resolvePath(baseDir); + return await withLock(async () => { + const next: VoiceWakeRoutingConfig = { + ...normalized, + updatedAtMs: Date.now(), + }; + await writeJsonAtomic(filePath, next); + return next; + }); +} + +export type VoiceWakeResolvedRoute = + | { mode: "current" } + | { agentId: string } + | { sessionKey: string }; + +export function resolveVoiceWakeRouteTarget( + routeTarget: VoiceWakeRouteTarget | undefined, +): VoiceWakeResolvedRoute { + if (!routeTarget || ("mode" in routeTarget && routeTarget.mode === "current")) { + return { mode: "current" }; + } + if ("agentId" in routeTarget && routeTarget.agentId) { + return { agentId: routeTarget.agentId }; + } + if ("sessionKey" in routeTarget && routeTarget.sessionKey) { + return { sessionKey: routeTarget.sessionKey }; + } + return { mode: "current" }; +} + +export function resolveVoiceWakeRouteByTrigger(params: { + trigger: string | undefined; + config: VoiceWakeRoutingConfig; +}): VoiceWakeResolvedRoute { + const normalizedTrigger = normalizeOptionalString(params.trigger) + ? normalizeVoiceWakeTriggerWord(params.trigger as string) + : ""; + if (normalizedTrigger) { + const matched = params.config.routes.find((route) => route.trigger === normalizedTrigger); + if (matched) { + return resolveVoiceWakeRouteTarget(matched.target); + } + } + return resolveVoiceWakeRouteTarget(params.config.defaultTarget); +}