fix(clawsweeper): address review for automerge-openclaw-openclaw-76021 (2)

This commit is contained in:
clawsweeper
2026-05-02 20:36:57 +00:00
parent 6e3eb5d869
commit 36cecbb9b0
5 changed files with 130 additions and 26 deletions

View File

@@ -75,7 +75,7 @@ export type ChatHost = ChatInputHistoryState & {
onSlashAction?: (action: string) => void | Promise<void>;
};
export type ChatDictationStatus = "idle" | "recording" | "transcribing" | "error";
export type ChatDictationStatus = "idle" | "starting" | "recording" | "transcribing" | "error";
type ChatTranscribeAudioResult = {
text?: unknown;

View File

@@ -60,10 +60,28 @@ type AppWithDictationInternals = {
chatDictationDetail: string | null;
chatDictationChunks: Blob[];
toggleChatDictation: () => Promise<void>;
cancelChatDictation: () => void;
};
let originalMediaDevices: PropertyDescriptor | undefined;
function createDeferred<T>() {
let resolve!: (value: T) => void;
let reject!: (error: unknown) => void;
const promise = new Promise<T>((resolvePromise, rejectPromise) => {
resolve = resolvePromise;
reject = rejectPromise;
});
return { promise, resolve, reject };
}
function createMockStream(track = { stop: vi.fn() }) {
return {
getTracks: () => [track],
track,
} as unknown as MediaStream & { track: { stop: ReturnType<typeof vi.fn> } };
}
async function createRecordingApp() {
const { OpenClawApp } = await import("./app.ts");
const app = new OpenClawApp();
@@ -83,9 +101,7 @@ describe("OpenClawApp dictation recorder lifecycle", () => {
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: {
getUserMedia: vi.fn(async () => ({
getTracks: () => [{ stop: vi.fn() }],
})),
getUserMedia: vi.fn(async () => createMockStream()),
},
});
});
@@ -130,4 +146,56 @@ describe("OpenClawApp dictation recorder lifecycle", () => {
type: "audio/webm",
});
});
it("ignores duplicate starts while microphone permission is pending", async () => {
const app = await createRecordingApp();
const pendingUserMedia = createDeferred<MediaStream>();
const getUserMedia = vi.fn(() => pendingUserMedia.promise);
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: { getUserMedia },
});
const stream = createMockStream();
const firstStart = app.toggleChatDictation();
const secondStart = app.toggleChatDictation();
expect(getUserMedia).toHaveBeenCalledTimes(1);
await secondStart;
expect(app.chatDictationStatus).toBe("starting");
pendingUserMedia.resolve(stream);
await firstStart;
expect(MockMediaRecorder.instances).toHaveLength(1);
expect(MockMediaRecorder.instances[0].state).toBe("recording");
expect(stream.track.stop).not.toHaveBeenCalled();
MockMediaRecorder.instances[0].emitData(new Blob(["audio"], { type: "audio/webm" }));
MockMediaRecorder.instances[0].stop();
expect(stream.track.stop).toHaveBeenCalledTimes(1);
expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1);
});
it("stops a microphone stream that resolves after pending dictation is canceled", async () => {
const app = await createRecordingApp();
const pendingUserMedia = createDeferred<MediaStream>();
const getUserMedia = vi.fn(() => pendingUserMedia.promise);
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: { getUserMedia },
});
const stream = createMockStream();
const start = app.toggleChatDictation();
app.cancelChatDictation();
pendingUserMedia.resolve(stream);
await start;
expect(MockMediaRecorder.instances).toHaveLength(0);
expect(stream.track.stop).toHaveBeenCalledTimes(1);
expect(app.chatDictationStatus).toBe("idle");
expect(transcribeChatAudioMock).not.toHaveBeenCalled();
});
});

View File

@@ -230,6 +230,7 @@ export class OpenClawApp extends LitElement {
private chatDictationStream: MediaStream | null = null;
private chatDictationChunks: Blob[] = [];
private chatDictationCancelNextStop = false;
private chatDictationStartToken = 0;
@state() chatManualRefreshInFlight = false;
@state() chatMobileControlsOpen = false;
private chatMobileControlsTrigger: HTMLElement | null = null;
@@ -958,7 +959,7 @@ export class OpenClawApp extends LitElement {
this.chatDictationRecorder.stop();
return;
}
if (this.chatDictationStatus === "transcribing") {
if (this.chatDictationStatus === "starting" || this.chatDictationStatus === "transcribing") {
return;
}
if (!this.client || !this.connected) {
@@ -974,8 +975,16 @@ export class OpenClawApp extends LitElement {
return;
}
const startToken = ++this.chatDictationStartToken;
this.chatDictationStatus = "starting";
this.chatDictationDetail = "Starting dictation...";
let stream: MediaStream | null = null;
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
if (this.chatDictationStartToken !== startToken || this.chatDictationStatus !== "starting") {
this.stopMediaStream(stream);
return;
}
const mimeType = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"].find((candidate) =>
MediaRecorder.isTypeSupported(candidate),
);
@@ -992,9 +1001,10 @@ export class OpenClawApp extends LitElement {
}
});
recorder.addEventListener("error", (event) => {
if (this.chatDictationRecorder === recorder) {
this.chatDictationRecorder = null;
if (this.chatDictationRecorder !== recorder) {
return;
}
this.chatDictationRecorder = null;
this.chatDictationChunks = [];
this.chatDictationStatus = "error";
this.chatDictationDetail =
@@ -1003,20 +1013,17 @@ export class OpenClawApp extends LitElement {
this.stopChatDictationStream();
});
recorder.addEventListener("stop", () => {
const isCurrentRecorder = this.chatDictationRecorder === recorder;
const chunks = isCurrentRecorder ? this.chatDictationChunks : [];
if (isCurrentRecorder) {
this.chatDictationChunks = [];
if (this.chatDictationRecorder !== recorder) {
return;
}
const chunks = this.chatDictationChunks;
this.chatDictationChunks = [];
const canceledByRequest = this.chatDictationCancelNextStop;
const canceled = canceledByRequest || !isCurrentRecorder;
this.chatDictationCancelNextStop = false;
if (isCurrentRecorder) {
this.chatDictationRecorder = null;
this.stopChatDictationStream();
}
if (canceled) {
if (canceledByRequest && this.chatDictationStatus !== "error") {
this.chatDictationRecorder = null;
this.stopChatDictationStream();
if (canceledByRequest) {
if (this.chatDictationStatus !== "error") {
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;
}
@@ -1034,6 +1041,12 @@ export class OpenClawApp extends LitElement {
this.chatDictationDetail = "Recording dictation...";
recorder.start();
} catch (error) {
if (stream && this.chatDictationStream !== stream) {
this.stopMediaStream(stream);
}
if (this.chatDictationStartToken !== startToken) {
return;
}
this.chatDictationRecorder = null;
this.stopChatDictationStream();
this.chatDictationStatus = "error";
@@ -1043,11 +1056,16 @@ export class OpenClawApp extends LitElement {
}
private stopChatDictationStream() {
this.chatDictationStream?.getTracks().forEach((track) => track.stop());
this.stopMediaStream(this.chatDictationStream);
this.chatDictationStream = null;
}
private stopMediaStream(stream: MediaStream | null) {
stream?.getTracks().forEach((track) => track.stop());
}
cancelChatDictation() {
this.chatDictationStartToken += 1;
if (this.chatDictationRecorder?.state === "recording") {
this.chatDictationCancelNextStop = true;
this.chatDictationRecorder.stop();

View File

@@ -466,6 +466,20 @@ describe("chat voice controls", () => {
expect(container.textContent).toContain("Recording dictation");
});
it("disables duplicate dictation starts while microphone access is pending", () => {
const container = renderChatView({
chatDictationStatus: "starting",
chatDictationDetail: null,
});
const button = container.querySelector<HTMLButtonElement>(
'[aria-label="Dictate with server STT"]',
);
expect(button).not.toBeNull();
expect(button!.disabled).toBe(true);
expect(container.textContent).toContain("Starting dictation");
});
it("keeps stop dictation enabled while recording after disconnect", () => {
const container = renderChatView({
connected: false,

View File

@@ -1206,11 +1206,13 @@ export function renderChat(props: ChatProps) {
? html`
<div class="agent-chat__stt-interim agent-chat__dictation-status">
${props.chatDictationDetail ??
(props.chatDictationStatus === "recording"
? "Recording dictation..."
: props.chatDictationStatus === "transcribing"
? "Transcribing dictation..."
: "Dictation unavailable")}
(props.chatDictationStatus === "starting"
? "Starting dictation..."
: props.chatDictationStatus === "recording"
? "Recording dictation..."
: props.chatDictationStatus === "transcribing"
? "Transcribing dictation..."
: "Dictation unavailable")}
</div>
`
: props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript
@@ -1282,7 +1284,9 @@ export function renderChat(props: ChatProps) {
: "Dictate with server STT"}
?disabled=${props.chatDictationStatus === "recording"
? false
: !props.connected || props.chatDictationStatus === "transcribing"}
: !props.connected ||
props.chatDictationStatus === "starting" ||
props.chatDictationStatus === "transcribing"}
>
${props.chatDictationStatus === "recording" ? icons.stop : icons.mic}
</button>