From 36cecbb9b01bcfc97b18ce81aee44aac4c77e95a Mon Sep 17 00:00:00 2001 From: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> Date: Sat, 2 May 2026 20:36:57 +0000 Subject: [PATCH] fix(clawsweeper): address review for automerge-openclaw-openclaw-76021 (2) --- ui/src/ui/app-chat.ts | 2 +- ui/src/ui/app.test.ts | 74 ++++++++++++++++++++++++++++++++++-- ui/src/ui/app.ts | 50 ++++++++++++++++-------- ui/src/ui/views/chat.test.ts | 14 +++++++ ui/src/ui/views/chat.ts | 16 +++++--- 5 files changed, 130 insertions(+), 26 deletions(-) diff --git a/ui/src/ui/app-chat.ts b/ui/src/ui/app-chat.ts index 32aa04b53c0..d2d8717d070 100644 --- a/ui/src/ui/app-chat.ts +++ b/ui/src/ui/app-chat.ts @@ -75,7 +75,7 @@ export type ChatHost = ChatInputHistoryState & { onSlashAction?: (action: string) => void | Promise; }; -export type ChatDictationStatus = "idle" | "recording" | "transcribing" | "error"; +export type ChatDictationStatus = "idle" | "starting" | "recording" | "transcribing" | "error"; type ChatTranscribeAudioResult = { text?: unknown; diff --git a/ui/src/ui/app.test.ts b/ui/src/ui/app.test.ts index 33ef5bab3a5..1373a0657c4 100644 --- a/ui/src/ui/app.test.ts +++ b/ui/src/ui/app.test.ts @@ -60,10 +60,28 @@ type AppWithDictationInternals = { chatDictationDetail: string | null; chatDictationChunks: Blob[]; toggleChatDictation: () => Promise; + cancelChatDictation: () => void; }; let originalMediaDevices: PropertyDescriptor | undefined; +function createDeferred() { + let resolve!: (value: T) => void; + let reject!: (error: unknown) => void; + const promise = new Promise((resolvePromise, rejectPromise) => { + resolve = resolvePromise; + reject = rejectPromise; + }); + return { promise, resolve, reject }; +} + +function createMockStream(track = { stop: vi.fn() }) { + return { + getTracks: () => [track], + track, + } as unknown as MediaStream & { track: { stop: ReturnType } }; +} + async function createRecordingApp() { const { OpenClawApp } = await import("./app.ts"); const app = new OpenClawApp(); @@ -83,9 +101,7 @@ describe("OpenClawApp dictation recorder lifecycle", () => { Object.defineProperty(globalThis.navigator, "mediaDevices", { configurable: true, value: { - getUserMedia: vi.fn(async () => ({ - getTracks: () => [{ stop: vi.fn() }], - })), + getUserMedia: vi.fn(async () => createMockStream()), }, }); }); @@ -130,4 +146,56 @@ describe("OpenClawApp dictation recorder lifecycle", () => { type: "audio/webm", }); }); + + it("ignores duplicate starts while microphone permission is pending", async () => { + const app = await createRecordingApp(); + const pendingUserMedia = createDeferred(); + const getUserMedia = vi.fn(() => pendingUserMedia.promise); + Object.defineProperty(globalThis.navigator, "mediaDevices", { + configurable: true, + value: { getUserMedia }, + }); + const stream = createMockStream(); + + const firstStart = app.toggleChatDictation(); + const secondStart = app.toggleChatDictation(); + + expect(getUserMedia).toHaveBeenCalledTimes(1); + await secondStart; + expect(app.chatDictationStatus).toBe("starting"); + + pendingUserMedia.resolve(stream); + await firstStart; + + expect(MockMediaRecorder.instances).toHaveLength(1); + expect(MockMediaRecorder.instances[0].state).toBe("recording"); + expect(stream.track.stop).not.toHaveBeenCalled(); + + MockMediaRecorder.instances[0].emitData(new Blob(["audio"], { type: "audio/webm" })); + MockMediaRecorder.instances[0].stop(); + + expect(stream.track.stop).toHaveBeenCalledTimes(1); + expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1); + }); + + it("stops a microphone stream that resolves after pending dictation is canceled", async () => { + const app = await createRecordingApp(); + const pendingUserMedia = createDeferred(); + const getUserMedia = vi.fn(() => pendingUserMedia.promise); + Object.defineProperty(globalThis.navigator, "mediaDevices", { + configurable: true, + value: { getUserMedia }, + }); + const stream = createMockStream(); + + const start = app.toggleChatDictation(); + app.cancelChatDictation(); + pendingUserMedia.resolve(stream); + await start; + + expect(MockMediaRecorder.instances).toHaveLength(0); + expect(stream.track.stop).toHaveBeenCalledTimes(1); + expect(app.chatDictationStatus).toBe("idle"); + expect(transcribeChatAudioMock).not.toHaveBeenCalled(); + }); }); diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts index 5394c0ac9f1..c76913f36fa 100644 --- a/ui/src/ui/app.ts +++ b/ui/src/ui/app.ts @@ -230,6 +230,7 @@ export class OpenClawApp extends LitElement { private chatDictationStream: MediaStream | null = null; private chatDictationChunks: Blob[] = []; private chatDictationCancelNextStop = false; + private chatDictationStartToken = 0; @state() chatManualRefreshInFlight = false; @state() chatMobileControlsOpen = false; private chatMobileControlsTrigger: HTMLElement | null = null; @@ -958,7 +959,7 @@ export class OpenClawApp extends LitElement { this.chatDictationRecorder.stop(); return; } - if (this.chatDictationStatus === "transcribing") { + if (this.chatDictationStatus === "starting" || this.chatDictationStatus === "transcribing") { return; } if (!this.client || !this.connected) { @@ -974,8 +975,16 @@ export class OpenClawApp extends LitElement { return; } + const startToken = ++this.chatDictationStartToken; + this.chatDictationStatus = "starting"; + this.chatDictationDetail = "Starting dictation..."; + let stream: MediaStream | null = null; try { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + if (this.chatDictationStartToken !== startToken || this.chatDictationStatus !== "starting") { + this.stopMediaStream(stream); + return; + } const mimeType = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"].find((candidate) => MediaRecorder.isTypeSupported(candidate), ); @@ -992,9 +1001,10 @@ export class OpenClawApp extends LitElement { } }); recorder.addEventListener("error", (event) => { - if (this.chatDictationRecorder === recorder) { - this.chatDictationRecorder = null; + if (this.chatDictationRecorder !== recorder) { + return; } + this.chatDictationRecorder = null; this.chatDictationChunks = []; this.chatDictationStatus = "error"; this.chatDictationDetail = @@ -1003,20 +1013,17 @@ export class OpenClawApp extends LitElement { this.stopChatDictationStream(); }); recorder.addEventListener("stop", () => { - const isCurrentRecorder = this.chatDictationRecorder === recorder; - const chunks = isCurrentRecorder ? this.chatDictationChunks : []; - if (isCurrentRecorder) { - this.chatDictationChunks = []; + if (this.chatDictationRecorder !== recorder) { + return; } + const chunks = this.chatDictationChunks; + this.chatDictationChunks = []; const canceledByRequest = this.chatDictationCancelNextStop; - const canceled = canceledByRequest || !isCurrentRecorder; this.chatDictationCancelNextStop = false; - if (isCurrentRecorder) { - this.chatDictationRecorder = null; - this.stopChatDictationStream(); - } - if (canceled) { - if (canceledByRequest && this.chatDictationStatus !== "error") { + this.chatDictationRecorder = null; + this.stopChatDictationStream(); + if (canceledByRequest) { + if (this.chatDictationStatus !== "error") { this.chatDictationStatus = "idle"; this.chatDictationDetail = null; } @@ -1034,6 +1041,12 @@ export class OpenClawApp extends LitElement { this.chatDictationDetail = "Recording dictation..."; recorder.start(); } catch (error) { + if (stream && this.chatDictationStream !== stream) { + this.stopMediaStream(stream); + } + if (this.chatDictationStartToken !== startToken) { + return; + } this.chatDictationRecorder = null; this.stopChatDictationStream(); this.chatDictationStatus = "error"; @@ -1043,11 +1056,16 @@ export class OpenClawApp extends LitElement { } private stopChatDictationStream() { - this.chatDictationStream?.getTracks().forEach((track) => track.stop()); + this.stopMediaStream(this.chatDictationStream); this.chatDictationStream = null; } + private stopMediaStream(stream: MediaStream | null) { + stream?.getTracks().forEach((track) => track.stop()); + } + cancelChatDictation() { + this.chatDictationStartToken += 1; if (this.chatDictationRecorder?.state === "recording") { this.chatDictationCancelNextStop = true; this.chatDictationRecorder.stop(); diff --git a/ui/src/ui/views/chat.test.ts b/ui/src/ui/views/chat.test.ts index 484ab553fa8..01d15840eef 100644 --- a/ui/src/ui/views/chat.test.ts +++ b/ui/src/ui/views/chat.test.ts @@ -466,6 +466,20 @@ describe("chat voice controls", () => { expect(container.textContent).toContain("Recording dictation"); }); + it("disables duplicate dictation starts while microphone access is pending", () => { + const container = renderChatView({ + chatDictationStatus: "starting", + chatDictationDetail: null, + }); + + const button = container.querySelector( + '[aria-label="Dictate with server STT"]', + ); + expect(button).not.toBeNull(); + expect(button!.disabled).toBe(true); + expect(container.textContent).toContain("Starting dictation"); + }); + it("keeps stop dictation enabled while recording after disconnect", () => { const container = renderChatView({ connected: false, diff --git a/ui/src/ui/views/chat.ts b/ui/src/ui/views/chat.ts index 300b0daf494..6790eab1a49 100644 --- a/ui/src/ui/views/chat.ts +++ b/ui/src/ui/views/chat.ts @@ -1206,11 +1206,13 @@ export function renderChat(props: ChatProps) { ? html`
${props.chatDictationDetail ?? - (props.chatDictationStatus === "recording" - ? "Recording dictation..." - : props.chatDictationStatus === "transcribing" - ? "Transcribing dictation..." - : "Dictation unavailable")} + (props.chatDictationStatus === "starting" + ? "Starting dictation..." + : props.chatDictationStatus === "recording" + ? "Recording dictation..." + : props.chatDictationStatus === "transcribing" + ? "Transcribing dictation..." + : "Dictation unavailable")}
` : props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript @@ -1282,7 +1284,9 @@ export function renderChat(props: ChatProps) { : "Dictate with server STT"} ?disabled=${props.chatDictationStatus === "recording" ? false - : !props.connected || props.chatDictationStatus === "transcribing"} + : !props.connected || + props.chatDictationStatus === "starting" || + props.chatDictationStatus === "transcribing"} > ${props.chatDictationStatus === "recording" ? icons.stop : icons.mic}