feat(webchat): add server-side dictation

This commit is contained in:
clawsweeper
2026-05-02 20:30:31 +00:00
parent 92b28bd80d
commit 2f53db94a8
2 changed files with 156 additions and 6 deletions

133
ui/src/ui/app.test.ts Normal file
View File

@@ -0,0 +1,133 @@
/* @vitest-environment jsdom */
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const { transcribeChatAudioMock } = vi.hoisted(() => ({
transcribeChatAudioMock: vi.fn(),
}));
vi.mock("./app-chat.ts", async (importOriginal) => {
const actual = await importOriginal<typeof import("./app-chat.ts")>();
return {
...actual,
transcribeChatAudio: transcribeChatAudioMock,
};
});
class MockMediaRecorder extends EventTarget {
static instances: MockMediaRecorder[] = [];
static isTypeSupported = vi.fn((mimeType: string) => mimeType === "audio/webm");
readonly mimeType: string;
state: RecordingState = "inactive";
constructor(
readonly stream: MediaStream,
options?: MediaRecorderOptions,
) {
super();
this.mimeType = options?.mimeType ?? "";
MockMediaRecorder.instances.push(this);
}
start() {
this.state = "recording";
}
stop() {
this.state = "inactive";
this.dispatchEvent(new Event("stop"));
}
emitData(data: Blob) {
const event = new Event("dataavailable") as Event & { data: Blob };
Object.defineProperty(event, "data", { value: data });
this.dispatchEvent(event);
}
emitError(message: string) {
const event = new Event("error") as Event & { error: Error; message: string };
Object.defineProperty(event, "error", { value: new Error(message) });
Object.defineProperty(event, "message", { value: message });
this.dispatchEvent(event);
}
}
type AppWithDictationInternals = {
client: unknown;
connected: boolean;
chatDictationStatus: string;
chatDictationDetail: string | null;
chatDictationChunks: Blob[];
toggleChatDictation: () => Promise<void>;
};
let originalMediaDevices: PropertyDescriptor | undefined;
async function createRecordingApp() {
const { OpenClawApp } = await import("./app.ts");
const app = new OpenClawApp();
app.client = { request: vi.fn() } as never;
app.connected = true;
return app as unknown as AppWithDictationInternals;
}
describe("OpenClawApp dictation recorder lifecycle", () => {
beforeEach(() => {
transcribeChatAudioMock.mockReset();
transcribeChatAudioMock.mockResolvedValue(null);
MockMediaRecorder.instances = [];
MockMediaRecorder.isTypeSupported.mockClear();
vi.stubGlobal("MediaRecorder", MockMediaRecorder);
originalMediaDevices = Object.getOwnPropertyDescriptor(globalThis.navigator, "mediaDevices");
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: {
getUserMedia: vi.fn(async () => ({
getTracks: () => [{ stop: vi.fn() }],
})),
},
});
});
afterEach(() => {
if (originalMediaDevices) {
Object.defineProperty(globalThis.navigator, "mediaDevices", originalMediaDevices);
} else {
Reflect.deleteProperty(globalThis.navigator, "mediaDevices");
}
vi.unstubAllGlobals();
});
it("does not submit collected audio after a recorder error and later stop", async () => {
const app = await createRecordingApp();
await app.toggleChatDictation();
const recorder = MockMediaRecorder.instances[0]!;
recorder.emitData(new Blob(["audio"], { type: "audio/webm" }));
recorder.emitError("microphone failed");
recorder.emitData(new Blob(["late audio"], { type: "audio/webm" }));
recorder.stop();
expect(transcribeChatAudioMock).not.toHaveBeenCalled();
expect(app.chatDictationStatus).toBe("error");
expect(app.chatDictationDetail).toBe("microphone failed");
expect(app.chatDictationChunks).toEqual([]);
});
it("releases recorded chunks after copying them for normal transcription", async () => {
const app = await createRecordingApp();
await app.toggleChatDictation();
const recorder = MockMediaRecorder.instances[0]!;
recorder.emitData(new Blob(["audio"], { type: "audio/webm" }));
await app.toggleChatDictation();
expect(app.chatDictationChunks).toEqual([]);
expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1);
expect(transcribeChatAudioMock.mock.calls[0]?.[1]).toMatchObject({
size: 5,
type: "audio/webm",
});
});
});

View File

@@ -984,11 +984,18 @@ export class OpenClawApp extends LitElement {
this.chatDictationRecorder = recorder;
this.chatDictationChunks = [];
recorder.addEventListener("dataavailable", (event) => {
if (this.chatDictationRecorder !== recorder || this.chatDictationCancelNextStop) {
return;
}
if (event.data.size > 0) {
this.chatDictationChunks.push(event.data);
}
});
recorder.addEventListener("error", (event) => {
if (this.chatDictationRecorder === recorder) {
this.chatDictationRecorder = null;
}
this.chatDictationChunks = [];
this.chatDictationStatus = "error";
this.chatDictationDetail =
event.message || event.error?.message || "Dictation recording failed";
@@ -996,14 +1003,23 @@ export class OpenClawApp extends LitElement {
this.stopChatDictationStream();
});
recorder.addEventListener("stop", () => {
const chunks = this.chatDictationChunks;
const canceled = this.chatDictationCancelNextStop;
const isCurrentRecorder = this.chatDictationRecorder === recorder;
const chunks = isCurrentRecorder ? this.chatDictationChunks : [];
if (isCurrentRecorder) {
this.chatDictationChunks = [];
}
const canceledByRequest = this.chatDictationCancelNextStop;
const canceled = canceledByRequest || !isCurrentRecorder;
this.chatDictationCancelNextStop = false;
this.chatDictationRecorder = null;
this.stopChatDictationStream();
if (isCurrentRecorder) {
this.chatDictationRecorder = null;
this.stopChatDictationStream();
}
if (canceled) {
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;
if (canceledByRequest && this.chatDictationStatus !== "error") {
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;
}
return;
}
const blob = new Blob(chunks, {
@@ -1038,6 +1054,7 @@ export class OpenClawApp extends LitElement {
}
this.chatDictationRecorder = null;
this.chatDictationChunks = [];
this.chatDictationCancelNextStop = false;
this.stopChatDictationStream();
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;