From 1cf68b9243aa2e8683cd2c920623e2b624250dfb Mon Sep 17 00:00:00 2001 From: Val Alexander <68980965+BunsDev@users.noreply.github.com> Date: Mon, 27 Apr 2026 10:35:34 -0500 Subject: [PATCH] fix(control-ui): keep google talk off webrtc Keep Google Live Talk browser sessions on the supported WebSocket/gateway-relay paths instead of falling back to browser WebRTC, remove stale browser-native voice controls that bypass Talk/TTS provider settings, and harden the Google Live URL plus realtime relay resource controls. Verification: - pnpm test ui/src/ui/realtime-talk.test.ts ui/src/ui/realtime-talk-google-live.test.ts src/gateway/talk-realtime-relay.test.ts src/gateway/server-methods/talk.test.ts - pnpm check:changed --- CHANGELOG.md | 1 + src/gateway/server-methods/talk.test.ts | 92 +++++++++++++++++++++ src/gateway/server-methods/talk.ts | 17 +++- src/gateway/talk-realtime-relay.test.ts | 34 ++++++++ src/gateway/talk-realtime-relay.ts | 31 +++++++ ui/src/styles/chat/grouped.css | 8 +- ui/src/ui/chat/grouped-render.test.ts | 19 +++-- ui/src/ui/chat/grouped-render.ts | 61 +------------- ui/src/ui/chat/realtime-talk-google-live.ts | 29 ++++++- ui/src/ui/chat/realtime-talk.ts | 29 ++++++- ui/src/ui/realtime-talk-google-live.test.ts | 52 ++++++++++++ ui/src/ui/realtime-talk.test.ts | 51 ++++++++++++ ui/src/ui/views/chat.test.ts | 9 ++ ui/src/ui/views/chat.ts | 71 +--------------- 14 files changed, 354 insertions(+), 150 deletions(-) create mode 100644 ui/src/ui/realtime-talk-google-live.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aeeae584be..40a173efe42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Gateway/device tokens: stop echoing rotated bearer tokens from shared/admin `device.token.rotate` responses while preserving the same-device token handoff needed by token-only clients before reconnect. (#66773) Thanks @MoerAI. +- Control UI/Talk: keep Google Live browser sessions on the WebSocket transport instead of falling back to WebRTC, validate browser Google Live WebSocket endpoints, cap Gateway relay sessions per browser connection, and remove stale browser-native voice buttons that did not use the configured Talk/TTS provider. Thanks @BunsDev. - Agents/subagents: enforce `subagents.allowAgents` for explicit same-agent `sessions_spawn(agentId=...)` calls instead of auto-allowing requester self-targets. Fixes #72827. Thanks @oiGaDio. - ACP/sessions_spawn: let explicit `sessions_spawn(runtime="acp")` bootstrap turns run while `acp.dispatch.enabled=false` still blocks automatic ACP thread dispatch. Fixes #63591. Thanks @moeedahmed. - CLI/update: install npm global updates into a verified temporary prefix before swapping the package tree into place, preventing mixed old/new installs and stale packaged files from breaking `openclaw update` verification. Thanks @shakkernerd. diff --git a/src/gateway/server-methods/talk.test.ts b/src/gateway/server-methods/talk.test.ts index d30f5a017d3..89a34299a2d 100644 --- a/src/gateway/server-methods/talk.test.ts +++ b/src/gateway/server-methods/talk.test.ts @@ -8,6 +8,9 @@ const mocks = vi.hoisted(() => ({ canonicalizeSpeechProviderId: vi.fn((providerId: string | undefined) => providerId), getSpeechProvider: vi.fn(), synthesizeSpeech: vi.fn(), + getRealtimeVoiceProvider: vi.fn(), + resolveConfiguredRealtimeVoiceProvider: vi.fn(), + createTalkRealtimeRelaySession: vi.fn(), })); vi.mock("../../config/config.js", () => ({ @@ -23,6 +26,22 @@ vi.mock("../../tts/tts.js", () => ({ synthesizeSpeech: mocks.synthesizeSpeech, })); +vi.mock("../../realtime-voice/provider-registry.js", () => ({ + getRealtimeVoiceProvider: mocks.getRealtimeVoiceProvider, +})); + +vi.mock("../../realtime-voice/provider-resolver.js", () => ({ + resolveConfiguredRealtimeVoiceProvider: mocks.resolveConfiguredRealtimeVoiceProvider, +})); + +vi.mock("../talk-realtime-relay.js", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + createTalkRealtimeRelaySession: mocks.createTalkRealtimeRelaySession, + }; +}); + function createTalkConfig(apiKey: unknown): OpenClawConfig { return { talk: { @@ -112,3 +131,76 @@ describe("talk.speak handler", () => { ); }); }); + +describe("talk.realtime.session handler", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("falls back to the gateway relay when Google returns a WebRTC-shaped browser session", async () => { + const createBrowserSession = vi.fn(async () => ({ + provider: "google", + clientSecret: "legacy-google-secret", + })); + const createBridge = vi.fn(); + const provider = { + id: "google", + label: "Google Live Voice", + isConfigured: () => true, + createBrowserSession, + createBridge, + }; + mocks.getRealtimeVoiceProvider.mockReturnValue(provider); + mocks.resolveConfiguredRealtimeVoiceProvider.mockReturnValue({ + provider, + providerConfig: { apiKey: "gemini-key" }, + }); + mocks.createTalkRealtimeRelaySession.mockReturnValue({ + provider: "google", + transport: "gateway-relay", + relaySessionId: "relay-1", + audio: { + inputEncoding: "pcm16", + inputSampleRateHz: 24000, + outputEncoding: "pcm16", + outputSampleRateHz: 24000, + }, + }); + + const respond = vi.fn(); + await talkHandlers["talk.realtime.session"]({ + req: { type: "req", id: "1", method: "talk.realtime.session" }, + params: { sessionKey: "main", provider: "google" }, + client: { connId: "conn-1" } as never, + isWebchatConnect: () => false, + respond: respond as never, + context: { + getRuntimeConfig: () => + ({ + talk: { + provider: "google", + providers: { google: { apiKey: "gemini-key" } }, + }, + }) as OpenClawConfig, + } as never, + }); + + expect(createBrowserSession).toHaveBeenCalledTimes(1); + expect(mocks.createTalkRealtimeRelaySession).toHaveBeenCalledWith( + expect.objectContaining({ + connId: "conn-1", + provider, + providerConfig: { apiKey: "gemini-key" }, + }), + ); + expect(respond).toHaveBeenCalledWith( + true, + expect.objectContaining({ + provider: "google", + transport: "gateway-relay", + relaySessionId: "relay-1", + }), + undefined, + ); + }); +}); diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts index 87f745ebb40..4d77b08ca16 100644 --- a/src/gateway/server-methods/talk.ts +++ b/src/gateway/server-methods/talk.ts @@ -13,7 +13,10 @@ import { } from "../../realtime-voice/agent-consult-tool.js"; import { getRealtimeVoiceProvider } from "../../realtime-voice/provider-registry.js"; import { resolveConfiguredRealtimeVoiceProvider } from "../../realtime-voice/provider-resolver.js"; -import type { RealtimeVoiceProviderConfig } from "../../realtime-voice/provider-types.js"; +import type { + RealtimeVoiceBrowserSession, + RealtimeVoiceProviderConfig, +} from "../../realtime-voice/provider-types.js"; import { normalizeLowercaseStringOrEmpty, normalizeOptionalLowercaseString, @@ -226,6 +229,12 @@ function withRealtimeBrowserOverrides( return Object.keys(overrides).length > 0 ? { ...providerConfig, ...overrides } : providerConfig; } +function isUnsupportedBrowserWebRtcSession(session: RealtimeVoiceBrowserSession): boolean { + const provider = normalizeLowercaseStringOrEmpty(session.provider); + const transport = (session as { transport?: string }).transport ?? "webrtc-sdp"; + return provider === "google" && transport === "webrtc-sdp"; +} + function isFallbackEligibleTalkReason(reason: TalkSpeakReason): boolean { return ( reason === "talk_unconfigured" || @@ -459,8 +468,10 @@ export const talkHandlers: GatewayRequestHandlers = { model: normalizeOptionalString(typedParams.model), voice: normalizeOptionalString(typedParams.voice), }); - respond(true, session, undefined); - return; + if (!isUnsupportedBrowserWebRtcSession(session)) { + respond(true, session, undefined); + return; + } } const connId = client?.connId; diff --git a/src/gateway/talk-realtime-relay.test.ts b/src/gateway/talk-realtime-relay.test.ts index 1a7125b2ae3..0c76eb15cab 100644 --- a/src/gateway/talk-realtime-relay.test.ts +++ b/src/gateway/talk-realtime-relay.test.ts @@ -178,4 +178,38 @@ describe("talk realtime gateway relay", () => { }), ).toThrow("Unknown realtime relay session"); }); + + it("caps active relay sessions per browser connection", () => { + const provider: RealtimeVoiceProviderPlugin = { + id: "relay-test", + label: "Relay Test", + isConfigured: () => true, + createBridge: () => ({ + connect: vi.fn(async () => undefined), + sendAudio: vi.fn(), + setMediaTimestamp: vi.fn(), + submitToolResult: vi.fn(), + acknowledgeMark: vi.fn(), + close: vi.fn(), + isConnected: vi.fn(() => true), + }), + }; + const createSession = (connId: string) => + createTalkRealtimeRelaySession({ + context: { broadcastToConnIds: vi.fn() } as never, + connId, + provider, + providerConfig: {}, + instructions: "brief", + tools: [], + }); + + createSession("conn-1"); + createSession("conn-1"); + + expect(() => createSession("conn-1")).toThrow( + "Too many active realtime relay sessions for this connection", + ); + expect(() => createSession("conn-2")).not.toThrow(); + }); }); diff --git a/src/gateway/talk-realtime-relay.ts b/src/gateway/talk-realtime-relay.ts index 9af95d1d25e..65244f93844 100644 --- a/src/gateway/talk-realtime-relay.ts +++ b/src/gateway/talk-realtime-relay.ts @@ -14,6 +14,8 @@ import type { GatewayRequestContext } from "./server-methods/shared-types.js"; const RELAY_SESSION_TTL_MS = 30 * 60 * 1000; const MAX_AUDIO_BASE64_BYTES = 512 * 1024; +const MAX_RELAY_SESSIONS_PER_CONN = 2; +const MAX_RELAY_SESSIONS_GLOBAL = 64; const RELAY_EVENT = "talk.realtime.relay"; export type TalkRealtimeRelayEvent = @@ -94,9 +96,38 @@ function closeRelaySession(session: RelaySession, reason: "completed" | "error") }); } +function pruneExpiredRelaySessions(nowMs = Date.now()): void { + for (const session of relaySessions.values()) { + if (nowMs > session.expiresAtMs) { + closeRelaySession(session, "completed"); + } + } +} + +function countRelaySessionsForConn(connId: string): number { + let count = 0; + for (const session of relaySessions.values()) { + if (session.connId === connId) { + count += 1; + } + } + return count; +} + +function enforceRelaySessionLimits(connId: string): void { + pruneExpiredRelaySessions(); + if (relaySessions.size >= MAX_RELAY_SESSIONS_GLOBAL) { + throw new Error("Too many active realtime relay sessions"); + } + if (countRelaySessionsForConn(connId) >= MAX_RELAY_SESSIONS_PER_CONN) { + throw new Error("Too many active realtime relay sessions for this connection"); + } +} + export function createTalkRealtimeRelaySession( params: CreateTalkRealtimeRelaySessionParams, ): TalkRealtimeRelaySessionResult { + enforceRelaySessionLimits(params.connId); const relaySessionId = randomUUID(); const expiresAtMs = Date.now() + RELAY_SESSION_TTL_MS; let relay: RelaySession | undefined; diff --git a/ui/src/styles/chat/grouped.css b/ui/src/styles/chat/grouped.css index c19deb16fee..b5329e6b5be 100644 --- a/ui/src/styles/chat/grouped.css +++ b/ui/src/styles/chat/grouped.css @@ -65,7 +65,7 @@ line-height: 1.2; } -/* ── Group footer action buttons (TTS, delete) ── */ +/* ── Group footer action buttons ── */ .chat-group-footer button { background: none; border: none; @@ -106,12 +106,6 @@ stroke-linejoin: round; } -.chat-tts-btn--active { - opacity: 1 !important; - pointer-events: auto !important; - color: var(--accent); -} - .chat-group-delete:hover { color: var(--danger) !important; } diff --git a/ui/src/ui/chat/grouped-render.test.ts b/ui/src/ui/chat/grouped-render.test.ts index 67a116707ba..2af5f1dd279 100644 --- a/ui/src/ui/chat/grouped-render.test.ts +++ b/ui/src/ui/chat/grouped-render.test.ts @@ -89,13 +89,6 @@ vi.mock("../tool-display.ts", () => ({ }), })); -vi.mock("./speech.ts", () => ({ - isTtsSpeaking: () => false, - isTtsSupported: () => false, - speakText: () => false, - stopTts: () => undefined, -})); - type RenderMessageGroupOptions = Parameters[1]; function renderAssistantMessage( @@ -262,6 +255,18 @@ afterEach(() => { }); describe("grouped chat rendering", () => { + it("does not render the stale assistant read-aloud footer action", () => { + const container = document.createElement("div"); + renderAssistantMessage(container, { + role: "assistant", + content: "hello from assistant", + timestamp: 1000, + }); + + expect(container.querySelector(".chat-tts-btn")).toBeNull(); + expect(container.querySelector('[aria-label="Read aloud"]')).toBeNull(); + }); + it("positions delete confirm by message side", () => { const container = document.createElement("div"); clearDeleteConfirmSkip(); diff --git a/ui/src/ui/chat/grouped-render.ts b/ui/src/ui/chat/grouped-render.ts index 0630558b68b..80423d2be68 100644 --- a/ui/src/ui/chat/grouped-render.ts +++ b/ui/src/ui/chat/grouped-render.ts @@ -19,14 +19,9 @@ import { resolveLocalUserName } from "../user-identity.ts"; export { resolveAssistantTextAvatar } from "../views/agents-utils.ts"; import { renderChatAvatar } from "./chat-avatar.ts"; import { renderCopyAsMarkdownButton } from "./copy-as-markdown.ts"; -import { - extractTextCached, - extractThinkingCached, - formatReasoningMarkdown, -} from "./message-extract.ts"; +import { extractThinkingCached, formatReasoningMarkdown } from "./message-extract.ts"; import { isToolResultMessage, normalizeMessage } from "./message-normalizer.ts"; import { normalizeRoleForGrouping } from "./role-normalizer.ts"; -import { isTtsSupported, speakText, stopTts, isTtsSpeaking } from "./speech.ts"; import { extractToolCards, renderExpandedToolCardContent, @@ -465,7 +460,6 @@ export function renderMessageGroup(