fix: render talk transcripts in native webchat

This commit is contained in:
Peter Steinberger
2026-05-02 02:46:59 +01:00
parent 225b71db1e
commit ff45bc1f88
8 changed files with 164 additions and 12 deletions

View File

@@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai
- WhatsApp: close long-lived web sockets through Baileys `end(error)` before falling back to raw websocket close, so listener teardown runs Baileys cleanup instead of leaving zombie sockets. Fixes #52442. Thanks @essendigitalgroup-cyber.
- Twitch/plugins: emit a flat JSON Schema for Twitch channel config so single-account and multi-account configs validate before runtime load, and add source-checkout diagnostics for missing pnpm workspace dependencies. Thanks @vincentkoc.
- Gateway/sessions: move hot transcript reads and mirror appends onto async bounded IO with serialized parent-linked writes, keeping large session histories from stalling Gateway requests and channel replies. Fixes #75656. Thanks @DerFlash.
- macOS/Talk Mode: subscribe native WebChat to active-session transcript updates and render external spoken user turns in the chat thread instead of only showing assistant replies. Fixes #75155. Thanks @SledderBling.
- macOS/Voice Wake: accept trigger-only phrases in the built-in Voice Wake test, matching the settings UI and runtime trigger-only path instead of requiring extra command text after the wake word. Fixes #64986. Thanks @zoiks65.
- Cron/TTS: run cron announce payloads through the normal TTS directive transform before outbound delivery, so scheduled `[[tts]]` replies generate voice payloads instead of leaking raw tags. Fixes #52125. Thanks @kenchen3000.
- WhatsApp: save downloadable quoted image media from reply context as inbound media, so agents can inspect an image that a user replied to instead of only seeing `<media:image>`. Fixes #59174. Thanks @gaffner.

View File

@@ -133,6 +133,13 @@ struct MacGatewayChatTransport: OpenClawChatTransport {
timeoutMs: 10000)
}
func setActiveSessionKey(_ sessionKey: String) async throws {
_ = try await GatewayConnection.shared.request(
method: "sessions.messages.subscribe",
params: ["key": AnyCodable(sessionKey)],
timeoutMs: 10000)
}
func events() -> AsyncStream<OpenClawChatTransportEvent> {
AsyncStream { continuation in
let task = Task {
@@ -184,6 +191,15 @@ struct MacGatewayChatTransport: OpenClawChatTransport {
return nil
}
return .chat(chat)
case "session.message":
guard let payload = evt.payload else { return nil }
guard let message = try? JSONDecoder().decode(
OpenClawSessionMessageEventPayload.self,
from: JSONEncoder().encode(payload))
else {
return nil
}
return .sessionMessage(message)
case "agent":
guard let payload = evt.payload else { return nil }
guard let agent = try? JSONDecoder().decode(

View File

@@ -80,6 +80,37 @@ struct MacGatewayChatTransportMappingTests {
}
}
@Test func `session message event maps to session message`() {
let payload = OpenClawProtocol.AnyCodable([
"sessionKey": OpenClawProtocol.AnyCodable("agent:main:main"),
"messageId": OpenClawProtocol.AnyCodable("msg-1"),
"messageSeq": OpenClawProtocol.AnyCodable(7),
"message": OpenClawProtocol.AnyCodable([
"role": OpenClawProtocol.AnyCodable("user"),
"content": OpenClawProtocol.AnyCodable([
OpenClawProtocol.AnyCodable([
"type": OpenClawProtocol.AnyCodable("text"),
"text": OpenClawProtocol.AnyCodable("spoken transcript"),
]),
]),
"timestamp": OpenClawProtocol.AnyCodable(1234.5),
]),
])
let frame = EventFrame(type: "event", event: "session.message", payload: payload, seq: 1, stateversion: nil)
let mapped = MacGatewayChatTransport.mapPushToTransportEvent(.event(frame))
switch mapped {
case let .sessionMessage(message):
#expect(message.sessionKey == "agent:main:main")
#expect(message.messageId == "msg-1")
#expect(message.messageSeq == 7)
#expect(message.message?.role == "user")
#expect(message.message?.content.first?.text == "spoken transcript")
default:
Issue.record("expected .sessionMessage from session.message event, got \(String(describing: mapped))")
}
}
@Test func `unknown event maps to nil`() {
let frame = EventFrame(
type: "event",

View File

@@ -269,6 +269,25 @@ public struct OpenClawChatEventPayload: Codable, Sendable {
public let errorMessage: String?
}
public struct OpenClawSessionMessageEventPayload: Codable, Sendable {
public let sessionKey: String?
public let message: OpenClawChatMessage?
public let messageId: String?
public let messageSeq: Int?
public init(
sessionKey: String?,
message: OpenClawChatMessage?,
messageId: String?,
messageSeq: Int?)
{
self.sessionKey = sessionKey
self.message = message
self.messageId = messageId
self.messageSeq = messageSeq
}
}
public struct OpenClawAgentEventPayload: Codable, Sendable, Identifiable {
public var id: String {
"\(self.runId)-\(self.seq ?? -1)"

View File

@@ -4,6 +4,7 @@ public enum OpenClawChatTransportEvent: Sendable {
case health(ok: Bool)
case tick
case chat(OpenClawChatEventPayload)
case sessionMessage(OpenClawSessionMessageEventPayload)
case agent(OpenClawAgentEventPayload)
case seqGap
}

View File

@@ -950,6 +950,8 @@ public final class OpenClawChatViewModel {
Task { await self.pollHealthIfNeeded(force: false) }
case let .chat(chat):
self.handleChatEvent(chat)
case let .sessionMessage(message):
self.handleSessionMessageEvent(message)
case let .agent(agent):
self.handleAgentEvent(agent)
case .seqGap:
@@ -962,6 +964,26 @@ public final class OpenClawChatViewModel {
}
}
private func handleSessionMessageEvent(_ payload: OpenClawSessionMessageEventPayload) {
if let sessionKey = payload.sessionKey,
!Self.matchesCurrentSessionKey(incoming: sessionKey, current: self.sessionKey)
{
return
}
guard let message = payload.message else { return }
guard message.role.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() == "user" else {
return
}
if self.pendingRunCount > 0 {
return
}
let sanitized = Self.stripInboundMetadata(from: message)
let reconciled = Self.reconcileMessageIDs(previous: self.messages, incoming: self.messages + [sanitized])
self.messages = Self.dedupeMessages(reconciled)
}
private func handleChatEvent(_ chat: OpenClawChatEventPayload) {
let isOurRun = chat.runId.flatMap { self.pendingRuns.contains($0) } ?? false

View File

@@ -689,6 +689,69 @@ extension TestChatTransportState {
}
}
@Test func appendsExternalSessionUserMessageForActiveSession() async throws {
let now = Date().timeIntervalSince1970 * 1000
let (transport, vm) = await makeViewModel(historyResponses: [historyPayload()])
await MainActor.run { vm.load() }
try await waitUntil("bootstrap history loaded") { await MainActor.run { vm.messages.isEmpty } }
transport.emit(
.sessionMessage(
OpenClawSessionMessageEventPayload(
sessionKey: "agent:main:main",
message: OpenClawChatMessage(
role: "user",
content: [
OpenClawChatMessageContent(
type: "text",
text: "spoken transcript",
mimeType: nil,
fileName: nil,
content: nil),
],
timestamp: now),
messageId: "msg-1",
messageSeq: 1)))
try await waitUntil("external transcript visible") {
await MainActor.run {
vm.messages.count == 1 &&
vm.messages.first?.role == "user" &&
vm.messages.first?.content.first?.text == "spoken transcript"
}
}
}
@Test func ignoresExternalSessionUserMessageForOtherSession() async throws {
let now = Date().timeIntervalSince1970 * 1000
let (transport, vm) = await makeViewModel(historyResponses: [historyPayload()])
await MainActor.run { vm.load() }
try await waitUntil("bootstrap history loaded") { await MainActor.run { vm.messages.isEmpty } }
transport.emit(
.sessionMessage(
OpenClawSessionMessageEventPayload(
sessionKey: "other",
message: OpenClawChatMessage(
role: "user",
content: [
OpenClawChatMessageContent(
type: "text",
text: "other transcript",
mimeType: nil,
fileName: nil,
content: nil),
],
timestamp: now),
messageId: "msg-2",
messageSeq: 2)))
try await Task.sleep(nanoseconds: 50_000_000)
#expect(await MainActor.run { vm.messages.isEmpty })
}
@Test func preservesMessageIDsAcrossHistoryRefreshes() async throws {
let now = Date().timeIntervalSince1970 * 1000
let history1 = historyPayload(messages: [chatTextMessage(role: "user", text: "hello", timestamp: now)])

View File

@@ -123,18 +123,17 @@ async function expectNoMessageWithin(params: {
timeoutMs?: number;
}): Promise<void> {
const timeoutMs = params.timeoutMs ?? 300;
vi.useFakeTimers();
try {
const outcome = params
.watch()
.then(() => "received")
.catch(() => "timeout");
await params.action?.();
await vi.advanceTimersByTimeAsync(timeoutMs);
await expect(outcome).resolves.toBe("timeout");
} finally {
vi.useRealTimers();
}
let received = false;
const watch = params
.watch()
.then(() => {
received = true;
})
.catch(() => undefined);
await params.action?.();
await new Promise((resolve) => setTimeout(resolve, timeoutMs));
expect(received).toBe(false);
await watch;
}
describe("session.message websocket events", () => {