diff --git a/CHANGELOG.md b/CHANGELOG.md index d274a8a0291..0aa25f9858d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Channels/WhatsApp: allow `@whiskeysockets/libsignal-node` in `onlyBuiltDependencies` so pnpm v9+ `blockExoticSubdeps` no longer rejects the baileys git-tarball subdep and silences all inbound agent replies. Fixes #76539. Thanks @ottodeng and @vincentkoc. - Gateway/systemd: preserve operator-added secrets in the Gateway env file across re-stage while clearing OpenClaw-managed keys (such as `OPENCLAW_GATEWAY_TOKEN`) so a fresh staging value is never shadowed by a stale env-file copy; operator secrets are also retained when the state-dir `.env` is empty. Fixes #76860. Thanks @hclsys. +- Realtime transcription: report socket closes before provider readiness as closed-before-ready failures instead of mislabeling them as connection timeouts for OpenAI, xAI, and Deepgram streaming transcription. Thanks @vincentkoc. - OpenAI/Google Meet: fail realtime voice connection attempts when the socket closes before `session.updated`, avoiding stuck Meet joins waiting on a bridge that never became ready. Thanks @vincentkoc. - QA/cache: require the full `CACHE-OK ` marker before live cache probes stop retrying, so suffix-only prose cannot hide a broken probe response. Thanks @vincentkoc. - Slack/Matrix: avoid creating blank progress-draft messages when `streaming.progress.label=false` and progress tool lines are disabled. Thanks @vincentkoc. diff --git a/extensions/deepgram/realtime-transcription-provider.ts b/extensions/deepgram/realtime-transcription-provider.ts index 9401d538020..49cdc1a7b4e 100644 --- a/extensions/deepgram/realtime-transcription-provider.ts +++ b/extensions/deepgram/realtime-transcription-provider.ts @@ -232,6 +232,8 @@ function createDeepgramRealtimeTranscriptionSession( reconnectDelayMs: DEEPGRAM_REALTIME_RECONNECT_DELAY_MS, maxQueuedBytes: DEEPGRAM_REALTIME_MAX_QUEUED_BYTES, connectTimeoutMessage: "Deepgram realtime transcription connection timeout", + connectClosedBeforeReadyMessage: + "Deepgram realtime transcription connection closed before ready", reconnectLimitMessage: "Deepgram realtime transcription reconnect limit reached", sendAudio: (audio, transport) => { transport.sendBinary(audio); diff --git a/extensions/openai/realtime-transcription-provider.ts b/extensions/openai/realtime-transcription-provider.ts index 8c68c8650d5..16fd9e575a7 100644 --- a/extensions/openai/realtime-transcription-provider.ts +++ b/extensions/openai/realtime-transcription-provider.ts @@ -138,6 +138,7 @@ function createOpenAIRealtimeTranscriptionSession( maxReconnectAttempts: OPENAI_REALTIME_TRANSCRIPTION_MAX_RECONNECT_ATTEMPTS, reconnectDelayMs: OPENAI_REALTIME_TRANSCRIPTION_RECONNECT_DELAY_MS, connectTimeoutMessage: "OpenAI realtime transcription connection timeout", + connectClosedBeforeReadyMessage: "OpenAI realtime transcription connection closed before ready", reconnectLimitMessage: "OpenAI realtime transcription reconnect limit reached", sendAudio: (audio, transport) => { transport.sendJson({ diff --git a/extensions/xai/realtime-transcription-provider.ts b/extensions/xai/realtime-transcription-provider.ts index bae22582b29..6db59141108 100644 --- a/extensions/xai/realtime-transcription-provider.ts +++ b/extensions/xai/realtime-transcription-provider.ts @@ -226,6 +226,7 @@ function createXaiRealtimeTranscriptionSession( reconnectDelayMs: XAI_REALTIME_STT_RECONNECT_DELAY_MS, maxQueuedBytes: XAI_REALTIME_STT_MAX_QUEUED_BYTES, connectTimeoutMessage: "xAI realtime transcription connection timeout", + connectClosedBeforeReadyMessage: "xAI realtime transcription connection closed before ready", reconnectLimitMessage: "xAI realtime transcription reconnect limit reached", sendAudio: (audio, transport) => { transport.sendBinary(audio); diff --git a/src/realtime-transcription/websocket-session.test.ts b/src/realtime-transcription/websocket-session.test.ts index 8ca0ad67657..5607cb15754 100644 --- a/src/realtime-transcription/websocket-session.test.ts +++ b/src/realtime-transcription/websocket-session.test.ts @@ -13,6 +13,7 @@ afterEach(async () => { }); async function createRealtimeServer(params?: { + closeOnConnection?: boolean; initialEvent?: unknown; onBinary?: (payload: Buffer) => void; onText?: (payload: unknown) => void; @@ -25,6 +26,10 @@ async function createRealtimeServer(params?: { wss.handleUpgrade(request, socket, head, (ws) => { clients.add(ws); ws.on("close", () => clients.delete(ws)); + if (params?.closeOnConnection) { + ws.close(1011, "setup failed"); + return; + } if (params?.initialEvent) { ws.send(JSON.stringify(params.initialEvent)); } @@ -153,4 +158,27 @@ describe("createRealtimeTranscriptionWebSocketSession", () => { expect(session.isConnected()).toBe(false); expect(onError).toHaveBeenCalledWith(expect.any(Error)); }); + + it("reports pre-ready closes separately from connection timeouts", async () => { + const server = await createRealtimeServer({ closeOnConnection: true }); + const onError = vi.fn(); + const session = createRealtimeTranscriptionWebSocketSession({ + providerId: "test", + callbacks: { onError }, + url: server.url, + connectTimeoutMessage: "test realtime transcription connection timeout", + connectClosedBeforeReadyMessage: "test realtime transcription connection closed before ready", + sendAudio: (audio, transport) => { + transport.sendBinary(audio); + }, + }); + + await expect(session.connect()).rejects.toThrow( + "test realtime transcription connection closed before ready", + ); + expect(onError).toHaveBeenCalledWith(expect.any(Error)); + expect(onError.mock.calls[0]?.[0]).toMatchObject({ + message: "test realtime transcription connection closed before ready", + }); + }); }); diff --git a/src/realtime-transcription/websocket-session.ts b/src/realtime-transcription/websocket-session.ts index f6c2f0c674e..5510c45a937 100644 --- a/src/realtime-transcription/websocket-session.ts +++ b/src/realtime-transcription/websocket-session.ts @@ -20,6 +20,7 @@ export type RealtimeTranscriptionWebSocketTransport = { export type RealtimeTranscriptionWebSocketSessionOptions = { callbacks: RealtimeTranscriptionSessionCallbacks; + connectClosedBeforeReadyMessage?: string; connectTimeoutMessage?: string; connectTimeoutMs?: number; closeTimeoutMs?: number; @@ -267,7 +268,7 @@ class WebSocketRealtimeTranscriptionSession implements RealtimeTranscript if (!opened || !settled) { failConnect( new Error( - this.options.connectTimeoutMessage ?? + this.options.connectClosedBeforeReadyMessage ?? `${this.options.providerId} realtime transcription connection closed before ready`, ), );