diff --git a/ui/src/ui/app.test.ts b/ui/src/ui/app.test.ts new file mode 100644 index 00000000000..9b8c5441e47 --- /dev/null +++ b/ui/src/ui/app.test.ts @@ -0,0 +1,133 @@ +/* @vitest-environment jsdom */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const { transcribeChatAudioMock } = vi.hoisted(() => ({ + transcribeChatAudioMock: vi.fn(), +})); + +vi.mock("./app-chat.ts", async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + transcribeChatAudio: transcribeChatAudioMock, + }; +}); + +class MockMediaRecorder extends EventTarget { + static instances: MockMediaRecorder[] = []; + static isTypeSupported = vi.fn((mimeType: string) => mimeType === "audio/webm"); + + readonly mimeType: string; + state: RecordingState = "inactive"; + + constructor( + readonly stream: MediaStream, + options?: MediaRecorderOptions, + ) { + super(); + this.mimeType = options?.mimeType ?? ""; + MockMediaRecorder.instances.push(this); + } + + start() { + this.state = "recording"; + } + + stop() { + this.state = "inactive"; + this.dispatchEvent(new Event("stop")); + } + + emitData(data: Blob) { + const event = new Event("dataavailable") as Event & { data: Blob }; + Object.defineProperty(event, "data", { value: data }); + this.dispatchEvent(event); + } + + emitError(message: string) { + const event = new Event("error") as Event & { error: Error; message: string }; + Object.defineProperty(event, "error", { value: new Error(message) }); + Object.defineProperty(event, "message", { value: message }); + this.dispatchEvent(event); + } +} + +type AppWithDictationInternals = { + client: unknown; + connected: boolean; + chatDictationStatus: string; + chatDictationDetail: string | null; + chatDictationChunks: Blob[]; + toggleChatDictation: () => Promise; +}; + +let originalMediaDevices: PropertyDescriptor | undefined; + +async function createRecordingApp() { + const { OpenClawApp } = await import("./app.ts"); + const app = new OpenClawApp(); + app.client = { request: vi.fn() } as never; + app.connected = true; + return app as unknown as AppWithDictationInternals; +} + +describe("OpenClawApp dictation recorder lifecycle", () => { + beforeEach(() => { + transcribeChatAudioMock.mockReset(); + transcribeChatAudioMock.mockResolvedValue(null); + MockMediaRecorder.instances = []; + MockMediaRecorder.isTypeSupported.mockClear(); + vi.stubGlobal("MediaRecorder", MockMediaRecorder); + originalMediaDevices = Object.getOwnPropertyDescriptor(globalThis.navigator, "mediaDevices"); + Object.defineProperty(globalThis.navigator, "mediaDevices", { + configurable: true, + value: { + getUserMedia: vi.fn(async () => ({ + getTracks: () => [{ stop: vi.fn() }], + })), + }, + }); + }); + + afterEach(() => { + if (originalMediaDevices) { + Object.defineProperty(globalThis.navigator, "mediaDevices", originalMediaDevices); + } else { + Reflect.deleteProperty(globalThis.navigator, "mediaDevices"); + } + vi.unstubAllGlobals(); + }); + + it("does not submit collected audio after a recorder error and later stop", async () => { + const app = await createRecordingApp(); + await app.toggleChatDictation(); + const recorder = MockMediaRecorder.instances[0]!; + + recorder.emitData(new Blob(["audio"], { type: "audio/webm" })); + recorder.emitError("microphone failed"); + recorder.emitData(new Blob(["late audio"], { type: "audio/webm" })); + recorder.stop(); + + expect(transcribeChatAudioMock).not.toHaveBeenCalled(); + expect(app.chatDictationStatus).toBe("error"); + expect(app.chatDictationDetail).toBe("microphone failed"); + expect(app.chatDictationChunks).toEqual([]); + }); + + it("releases recorded chunks after copying them for normal transcription", async () => { + const app = await createRecordingApp(); + await app.toggleChatDictation(); + const recorder = MockMediaRecorder.instances[0]!; + recorder.emitData(new Blob(["audio"], { type: "audio/webm" })); + + await app.toggleChatDictation(); + + expect(app.chatDictationChunks).toEqual([]); + expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1); + expect(transcribeChatAudioMock.mock.calls[0]?.[1]).toMatchObject({ + size: 5, + type: "audio/webm", + }); + }); +}); diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts index 548c1dd9bf0..5394c0ac9f1 100644 --- a/ui/src/ui/app.ts +++ b/ui/src/ui/app.ts @@ -984,11 +984,18 @@ export class OpenClawApp extends LitElement { this.chatDictationRecorder = recorder; this.chatDictationChunks = []; recorder.addEventListener("dataavailable", (event) => { + if (this.chatDictationRecorder !== recorder || this.chatDictationCancelNextStop) { + return; + } if (event.data.size > 0) { this.chatDictationChunks.push(event.data); } }); recorder.addEventListener("error", (event) => { + if (this.chatDictationRecorder === recorder) { + this.chatDictationRecorder = null; + } + this.chatDictationChunks = []; this.chatDictationStatus = "error"; this.chatDictationDetail = event.message || event.error?.message || "Dictation recording failed"; @@ -996,14 +1003,23 @@ export class OpenClawApp extends LitElement { this.stopChatDictationStream(); }); recorder.addEventListener("stop", () => { - const chunks = this.chatDictationChunks; - const canceled = this.chatDictationCancelNextStop; + const isCurrentRecorder = this.chatDictationRecorder === recorder; + const chunks = isCurrentRecorder ? this.chatDictationChunks : []; + if (isCurrentRecorder) { + this.chatDictationChunks = []; + } + const canceledByRequest = this.chatDictationCancelNextStop; + const canceled = canceledByRequest || !isCurrentRecorder; this.chatDictationCancelNextStop = false; - this.chatDictationRecorder = null; - this.stopChatDictationStream(); + if (isCurrentRecorder) { + this.chatDictationRecorder = null; + this.stopChatDictationStream(); + } if (canceled) { - this.chatDictationStatus = "idle"; - this.chatDictationDetail = null; + if (canceledByRequest && this.chatDictationStatus !== "error") { + this.chatDictationStatus = "idle"; + this.chatDictationDetail = null; + } return; } const blob = new Blob(chunks, { @@ -1038,6 +1054,7 @@ export class OpenClawApp extends LitElement { } this.chatDictationRecorder = null; this.chatDictationChunks = []; + this.chatDictationCancelNextStop = false; this.stopChatDictationStream(); this.chatDictationStatus = "idle"; this.chatDictationDetail = null;