feat(webchat): add server-side dictation (#76021)

Summary:
- This PR adds WebChat server-side dictation through a new authenticated `chat.transcribeAudio` Gateway RPC, MediaRecorder composer controls, docs/changelog updates, and focused gateway/UI tests.
- Reproducibility: yes. Current main reproduces the missing feature by inspection: the Gateway method list, write scopes, docs, and WebChat voice-control test have no `chat.transcribeAudio` server-dictation path.

ClawSweeper fixups:
- Included follow-up commit: feat(webchat): add server-side dictation
- Included follow-up commit: fix(clawsweeper): address review for automerge-openclaw-openclaw-7602…

Validation:
- ClawSweeper review passed for head 850571380a.
- Required merge gates passed before the squash merge.

Prepared head SHA: 850571380a
Review: https://github.com/openclaw/openclaw/pull/76021#issuecomment-4363514226

Co-authored-by: Peter Steinberger <steipete@gmail.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
This commit is contained in:
Peter Steinberger
2026-05-03 00:09:23 +01:00
committed by GitHub
parent 15bbf4f2f3
commit 68359cacbf
23 changed files with 847 additions and 23 deletions

View File

@@ -663,10 +663,19 @@
background: color-mix(in srgb, var(--danger, #ef4444) 14%, transparent);
}
.agent-chat__input-btn--dictating {
color: var(--danger, #ef4444);
background: color-mix(in srgb, var(--danger, #ef4444) 14%, transparent);
}
.agent-chat__talk-status {
color: var(--text);
}
.agent-chat__dictation-status {
color: var(--text);
}
.agent-chat__input-divider {
width: 1px;
height: 16px;

View File

@@ -44,6 +44,7 @@ let handleAbortChat: typeof import("./app-chat.ts").handleAbortChat;
let refreshChatAvatar: typeof import("./app-chat.ts").refreshChatAvatar;
let clearPendingQueueItemsForRun: typeof import("./app-chat.ts").clearPendingQueueItemsForRun;
let removeQueuedMessage: typeof import("./app-chat.ts").removeQueuedMessage;
let transcribeChatAudio: typeof import("./app-chat.ts").transcribeChatAudio;
async function loadChatHelpers(): Promise<void> {
({
@@ -54,6 +55,7 @@ async function loadChatHelpers(): Promise<void> {
refreshChatAvatar,
clearPendingQueueItemsForRun,
removeQueuedMessage,
transcribeChatAudio,
} = await import("./app-chat.ts"));
}
@@ -103,12 +105,73 @@ function makeHost(overrides?: Partial<ChatHost>): ChatHost {
toolStreamById: new Map(),
toolStreamOrder: [],
toolStreamSyncTimer: null,
chatDictationStatus: "idle",
chatDictationDetail: null,
updateComplete: Promise.resolve(),
...overrides,
};
return host as ChatHost;
}
describe("transcribeChatAudio", () => {
beforeAll(async () => {
await loadChatHelpers();
});
it("sends recorded audio to the gateway and appends the transcript to the draft", async () => {
const request = vi.fn(async () => ({ text: "new words" }));
const host = makeHost({
client: { request } as never,
chatMessage: "existing",
});
await transcribeChatAudio(host, new Blob([new Uint8Array([1, 2, 3])], { type: "audio/webm" }));
expect(request).toHaveBeenCalledWith("chat.transcribeAudio", {
audioBase64: "AQID",
mimeType: "audio/webm",
});
expect(host.chatMessage).toBe("existing new words");
expect(host.chatDictationStatus).toBe("idle");
expect(host.chatDictationDetail).toBeNull();
});
it("surfaces gateway transcription errors without changing the draft", async () => {
const request = vi.fn(async () => {
throw new Error("no provider");
});
const host = makeHost({
client: { request } as never,
chatMessage: "existing",
});
await transcribeChatAudio(host, new Blob([new Uint8Array([1])], { type: "audio/ogg" }));
expect(host.chatMessage).toBe("existing");
expect(host.chatDictationStatus).toBe("error");
expect(host.chatDictationDetail).toBe("no provider");
expect(host.lastError).toBe("no provider");
});
it("rejects oversized dictation before sending it over the gateway socket", async () => {
const request = vi.fn();
const host = makeHost({
client: { request } as never,
chatMessage: "existing",
});
await transcribeChatAudio(
host,
new Blob([new Uint8Array(18 * 1024 * 1024 + 1)], { type: "audio/webm" }),
);
expect(request).not.toHaveBeenCalled();
expect(host.chatMessage).toBe("existing");
expect(host.chatDictationStatus).toBe("error");
expect(host.chatDictationDetail).toContain("too large");
});
});
function createSessionsResult(sessions: GatewaySessionRow[]): SessionsListResult {
return {
ts: 0,

View File

@@ -17,6 +17,7 @@ import {
type ChatInputHistoryKeyResult,
type ChatInputHistoryState,
} from "./chat/input-history.ts";
import { bytesToBase64 } from "./chat/realtime-talk-audio.ts";
import type { ChatSideResult } from "./chat/side-result.ts";
import { executeSlashCommand } from "./chat/slash-command-executor.ts";
import { parseSlashCommand, refreshSlashCommands } from "./chat/slash-commands.ts";
@@ -68,10 +69,22 @@ export type ChatHost = ChatInputHistoryState & {
refreshSessionsAfterChat: Set<string>;
pendingAbort?: { runId?: string | null; sessionKey: string } | null;
chatSubmitGuards?: Map<string, Promise<void>>;
chatDictationStatus?: ChatDictationStatus;
chatDictationDetail?: string | null;
/** Callback for slash-command side effects that need app-level access. */
onSlashAction?: (action: string) => void | Promise<void>;
};
export type ChatDictationStatus = "idle" | "starting" | "recording" | "transcribing" | "error";
type ChatTranscribeAudioResult = {
text?: unknown;
provider?: unknown;
model?: unknown;
};
export const CHAT_TRANSCRIBE_AUDIO_MAX_BYTES = 18 * 1024 * 1024;
export type ChatSendOptions = {
confirmReset?: boolean;
restoreDraft?: boolean;
@@ -123,6 +136,60 @@ export function isChatStopCommand(text: string) {
);
}
function appendDictationText(draft: string, transcript: string): string {
const text = transcript.trim();
if (!text) {
return draft;
}
const current = draft.trimEnd();
return current ? `${current} ${text}` : text;
}
export async function transcribeChatAudio(host: ChatHost, audio: Blob): Promise<string | null> {
if (!host.client || !host.connected) {
host.chatDictationStatus = "error";
host.chatDictationDetail = "Gateway not connected";
host.lastError = host.chatDictationDetail;
return null;
}
if (audio.size <= 0) {
host.chatDictationStatus = "error";
host.chatDictationDetail = "No audio captured";
host.lastError = host.chatDictationDetail;
return null;
}
if (audio.size > CHAT_TRANSCRIBE_AUDIO_MAX_BYTES) {
host.chatDictationStatus = "error";
host.chatDictationDetail = `Audio clip is too large for WebChat dictation. Keep recordings under ${CHAT_TRANSCRIBE_AUDIO_MAX_BYTES} bytes.`;
host.lastError = host.chatDictationDetail;
return null;
}
host.chatDictationStatus = "transcribing";
host.chatDictationDetail = "Transcribing dictation...";
try {
const bytes = new Uint8Array(await audio.arrayBuffer());
const mimeType = audio.type || "audio/webm";
const result = await host.client.request<ChatTranscribeAudioResult>("chat.transcribeAudio", {
audioBase64: bytesToBase64(bytes),
mimeType,
});
const transcript = typeof result.text === "string" ? result.text.trim() : "";
if (!transcript) {
throw new Error("No transcript returned");
}
host.chatMessage = appendDictationText(host.chatMessage, transcript);
host.chatDictationStatus = "idle";
host.chatDictationDetail = null;
return transcript;
} catch (err) {
host.chatDictationStatus = "error";
host.chatDictationDetail = err instanceof Error ? err.message : String(err);
host.lastError = host.chatDictationDetail;
return null;
}
}
function isChatResetCommand(text: string) {
const trimmed = text.trim();
if (!trimmed) {

View File

@@ -103,6 +103,7 @@ type GatewayHost = {
sessionKey: string;
chatRunId: string | null;
pendingAbort?: { runId?: string | null; sessionKey: string } | null;
cancelChatDictation?: () => void;
refreshSessionsAfterChat: Set<string>;
execApprovalQueue: ExecApprovalRequest[];
execApprovalError: string | null;
@@ -483,6 +484,7 @@ export function connectGateway(host: GatewayHost, options?: ConnectGatewayOption
return;
}
host.connected = false;
host.cancelChatDictation?.();
// Code 1012 = Service Restart (expected during config saves, don't show as error)
host.lastErrorCode =
resolveGatewayErrorDetailCode(error) ??

View File

@@ -34,6 +34,8 @@ describe("handleDisconnected", () => {
});
const removeSpy = vi.spyOn(window, "removeEventListener").mockImplementation(() => undefined);
const host = createHost();
const cancelChatDictation = vi.fn();
Object.assign(host, { cancelChatDictation });
const disconnectSpy = (
host.topbarObserver as unknown as { disconnect: ReturnType<typeof vi.fn> }
).disconnect;
@@ -42,6 +44,7 @@ describe("handleDisconnected", () => {
expect(removeSpy).toHaveBeenCalledWith("popstate", host.popStateHandler);
expect(host.connectGeneration).toBe(1);
expect(cancelChatDictation).toHaveBeenCalledTimes(1);
expect(host.client).toBeNull();
expect(host.connected).toBe(false);
expect(disconnectSpy).toHaveBeenCalledTimes(1);

View File

@@ -41,6 +41,7 @@ type LifecycleHost = {
realtimeTalkStatus?: string;
realtimeTalkDetail?: string | null;
realtimeTalkTranscript?: string | null;
cancelChatDictation?: () => void;
chatLoading: boolean;
chatMessages: unknown[];
chatToolMessages: unknown[];
@@ -91,6 +92,7 @@ export function handleDisconnected(host: LifecycleHost) {
host.realtimeTalkStatus = "idle";
host.realtimeTalkDetail = null;
host.realtimeTalkTranscript = null;
host.cancelChatDictation?.();
host.client?.stop();
host.client = null;
host.connected = false;

View File

@@ -2342,6 +2342,8 @@ export function renderApp(state: AppViewState) {
realtimeTalkStatus: state.realtimeTalkStatus,
realtimeTalkDetail: state.realtimeTalkDetail,
realtimeTalkTranscript: state.realtimeTalkTranscript,
chatDictationStatus: state.chatDictationStatus,
chatDictationDetail: state.chatDictationDetail,
connected: state.connected,
canSend: state.connected,
disabledReason: chatDisabledReason,
@@ -2373,6 +2375,7 @@ export function renderApp(state: AppViewState) {
onSend: () => state.handleSendChat(),
onCompact: () => state.handleSendChat("/compact", { restoreDraft: true }),
onToggleRealtimeTalk: () => state.toggleRealtimeTalk(),
onToggleChatDictation: () => state.toggleChatDictation(),
canAbort: hasAbortableSessionRun(state),
onAbort: () => void state.handleAbortChat(),
onQueueRemove: (id) => state.removeQueuedMessage(id),

View File

@@ -1,4 +1,4 @@
import type { ChatSendOptions } from "./app-chat.ts";
import type { ChatDictationStatus, ChatSendOptions } from "./app-chat.ts";
import type { EventLogEntry } from "./app-events.ts";
import type { CompactionStatus, FallbackStatus } from "./app-tool-stream.ts";
import type { ChatInputHistoryKeyInput, ChatInputHistoryKeyResult } from "./chat/input-history.ts";
@@ -119,6 +119,8 @@ export type AppViewState = {
realtimeTalkStatus: RealtimeTalkStatus;
realtimeTalkDetail: string | null;
realtimeTalkTranscript: string | null;
chatDictationStatus: ChatDictationStatus;
chatDictationDetail: string | null;
chatManualRefreshInFlight: boolean;
chatMobileControlsOpen: boolean;
nodesLoading: boolean;
@@ -470,6 +472,7 @@ export type AppViewState = {
resetChatInputHistoryNavigation: () => void;
handleSendChat: (messageOverride?: string, opts?: ChatSendOptions) => Promise<void>;
toggleRealtimeTalk: () => Promise<void>;
toggleChatDictation: () => Promise<void>;
steerQueuedChatMessage: (id: string) => Promise<void>;
handleAbortChat: () => Promise<void>;
removeQueuedMessage: (id: string) => void;

205
ui/src/ui/app.test.ts Normal file
View File

@@ -0,0 +1,205 @@
/* @vitest-environment jsdom */
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const { transcribeChatAudioMock } = vi.hoisted(() => ({
transcribeChatAudioMock: vi.fn(),
}));
vi.mock("./app-chat.ts", async (importOriginal) => {
const actual = await importOriginal<typeof import("./app-chat.ts")>();
return {
...actual,
transcribeChatAudio: transcribeChatAudioMock,
};
});
class MockMediaRecorder extends EventTarget {
static instances: MockMediaRecorder[] = [];
static isTypeSupported = vi.fn((mimeType: string) => mimeType === "audio/webm");
readonly mimeType: string;
state: RecordingState = "inactive";
constructor(
readonly stream: MediaStream,
options?: MediaRecorderOptions,
) {
super();
this.mimeType = options?.mimeType ?? "";
MockMediaRecorder.instances.push(this);
}
start() {
this.state = "recording";
}
stop() {
this.state = "inactive";
this.dispatchEvent(new Event("stop"));
}
emitData(data: Blob) {
const event = new Event("dataavailable") as Event & { data: Blob };
Object.defineProperty(event, "data", { value: data });
this.dispatchEvent(event);
}
emitError(message: string) {
const event = new Event("error") as Event & { error: Error; message: string };
Object.defineProperty(event, "error", { value: new Error(message) });
Object.defineProperty(event, "message", { value: message });
this.dispatchEvent(event);
}
}
type AppWithDictationInternals = {
client: unknown;
connected: boolean;
chatDictationStatus: string;
chatDictationDetail: string | null;
chatDictationChunks: Blob[];
toggleChatDictation: () => Promise<void>;
cancelChatDictation: () => void;
};
let originalMediaDevices: PropertyDescriptor | undefined;
function createDeferred<T>() {
let resolve!: (value: T) => void;
let reject!: (error: unknown) => void;
const promise = new Promise<T>((resolvePromise, rejectPromise) => {
resolve = resolvePromise;
reject = rejectPromise;
});
return { promise, resolve, reject };
}
function createMockStream(track = { stop: vi.fn() }) {
return {
getTracks: () => [track],
track,
} as unknown as MediaStream & { track: { stop: ReturnType<typeof vi.fn> } };
}
async function createRecordingApp() {
const { OpenClawApp } = await import("./app.ts");
const app = new OpenClawApp();
app.client = { request: vi.fn() } as never;
app.connected = true;
return app as unknown as AppWithDictationInternals;
}
describe("OpenClawApp dictation recorder lifecycle", () => {
beforeEach(() => {
transcribeChatAudioMock.mockReset();
transcribeChatAudioMock.mockResolvedValue(null);
MockMediaRecorder.instances = [];
MockMediaRecorder.isTypeSupported.mockClear();
vi.stubGlobal("MediaRecorder", MockMediaRecorder);
originalMediaDevices = Object.getOwnPropertyDescriptor(globalThis.navigator, "mediaDevices");
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: {
getUserMedia: vi.fn(async () => createMockStream()),
},
});
});
afterEach(() => {
if (originalMediaDevices) {
Object.defineProperty(globalThis.navigator, "mediaDevices", originalMediaDevices);
} else {
Reflect.deleteProperty(globalThis.navigator, "mediaDevices");
}
vi.unstubAllGlobals();
});
it("does not submit collected audio after a recorder error and later stop", async () => {
const app = await createRecordingApp();
await app.toggleChatDictation();
const recorder = MockMediaRecorder.instances[0];
recorder.emitData(new Blob(["audio"], { type: "audio/webm" }));
recorder.emitError("microphone failed");
recorder.emitData(new Blob(["late audio"], { type: "audio/webm" }));
recorder.stop();
expect(transcribeChatAudioMock).not.toHaveBeenCalled();
expect(app.chatDictationStatus).toBe("error");
expect(app.chatDictationDetail).toBe("microphone failed");
expect(app.chatDictationChunks).toEqual([]);
});
it("releases recorded chunks after copying them for normal transcription", async () => {
const app = await createRecordingApp();
await app.toggleChatDictation();
const recorder = MockMediaRecorder.instances[0];
recorder.emitData(new Blob(["audio"], { type: "audio/webm" }));
const transcription = createDeferred<null>();
transcribeChatAudioMock.mockReturnValueOnce(transcription.promise);
await app.toggleChatDictation();
expect(app.chatDictationChunks).toEqual([]);
expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1);
expect(transcribeChatAudioMock.mock.calls[0]?.[1]).toMatchObject({
size: 5,
type: "audio/webm",
});
transcription.resolve(null);
await transcription.promise;
});
it("ignores duplicate starts while microphone permission is pending", async () => {
const app = await createRecordingApp();
const pendingUserMedia = createDeferred<MediaStream>();
const getUserMedia = vi.fn(() => pendingUserMedia.promise);
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: { getUserMedia },
});
const stream = createMockStream();
const firstStart = app.toggleChatDictation();
const secondStart = app.toggleChatDictation();
expect(getUserMedia).toHaveBeenCalledTimes(1);
await secondStart;
expect(app.chatDictationStatus).toBe("starting");
pendingUserMedia.resolve(stream);
await firstStart;
expect(MockMediaRecorder.instances).toHaveLength(1);
expect(MockMediaRecorder.instances[0].state).toBe("recording");
expect(stream.track.stop).not.toHaveBeenCalled();
MockMediaRecorder.instances[0].emitData(new Blob(["audio"], { type: "audio/webm" }));
MockMediaRecorder.instances[0].stop();
expect(stream.track.stop).toHaveBeenCalledTimes(1);
expect(transcribeChatAudioMock).toHaveBeenCalledTimes(1);
});
it("stops a microphone stream that resolves after pending dictation is canceled", async () => {
const app = await createRecordingApp();
const pendingUserMedia = createDeferred<MediaStream>();
const getUserMedia = vi.fn(() => pendingUserMedia.promise);
Object.defineProperty(globalThis.navigator, "mediaDevices", {
configurable: true,
value: { getUserMedia },
});
const stream = createMockStream();
const start = app.toggleChatDictation();
app.cancelChatDictation();
pendingUserMedia.resolve(stream);
await start;
expect(MockMediaRecorder.instances).toHaveLength(0);
expect(stream.track.stop).toHaveBeenCalledTimes(1);
expect(app.chatDictationStatus).toBe("idle");
expect(transcribeChatAudioMock).not.toHaveBeenCalled();
});
});

View File

@@ -22,8 +22,10 @@ import {
removeQueuedMessage as removeQueuedMessageInternal,
resetChatInputHistoryNavigation as resetChatInputHistoryNavigationInternal,
steerQueuedChatMessage as steerQueuedChatMessageInternal,
transcribeChatAudio as transcribeChatAudioInternal,
type ChatInputHistoryKeyInput,
type ChatInputHistoryKeyResult,
type ChatDictationStatus,
} from "./app-chat.ts";
import { DEFAULT_CRON_FORM, DEFAULT_LOG_LEVEL_FILTERS } from "./app-defaults.ts";
import type { EventLogEntry } from "./app-events.ts";
@@ -222,6 +224,13 @@ export class OpenClawApp extends LitElement {
@state() realtimeTalkDetail: string | null = null;
@state() realtimeTalkTranscript: string | null = null;
private realtimeTalkSession: RealtimeTalkSession | null = null;
@state() chatDictationStatus: ChatDictationStatus = "idle";
@state() chatDictationDetail: string | null = null;
private chatDictationRecorder: MediaRecorder | null = null;
private chatDictationStream: MediaStream | null = null;
private chatDictationChunks: Blob[] = [];
private chatDictationCancelNextStop = false;
private chatDictationStartToken = 0;
@state() chatManualRefreshInFlight = false;
@state() chatMobileControlsOpen = false;
private chatMobileControlsTrigger: HTMLElement | null = null;
@@ -944,6 +953,129 @@ export class OpenClawApp extends LitElement {
}
}
async toggleChatDictation() {
if (this.chatDictationRecorder && this.chatDictationStatus === "recording") {
this.chatDictationRecorder.stop();
return;
}
if (this.chatDictationStatus === "starting" || this.chatDictationStatus === "transcribing") {
return;
}
if (!this.client || !this.connected) {
this.chatDictationStatus = "error";
this.chatDictationDetail = "Gateway not connected";
this.lastError = this.chatDictationDetail;
return;
}
if (!navigator.mediaDevices?.getUserMedia || typeof MediaRecorder === "undefined") {
this.chatDictationStatus = "error";
this.chatDictationDetail = "Browser microphone recording is unavailable";
this.lastError = this.chatDictationDetail;
return;
}
const startToken = ++this.chatDictationStartToken;
this.chatDictationStatus = "starting";
this.chatDictationDetail = "Starting dictation...";
let stream: MediaStream | null = null;
try {
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
if (this.chatDictationStartToken !== startToken || this.chatDictationStatus !== "starting") {
this.stopMediaStream(stream);
return;
}
const mimeType = ["audio/webm;codecs=opus", "audio/webm", "audio/mp4"].find((candidate) =>
MediaRecorder.isTypeSupported(candidate),
);
const recorder = new MediaRecorder(stream, mimeType ? { mimeType } : undefined);
this.chatDictationStream = stream;
this.chatDictationRecorder = recorder;
this.chatDictationChunks = [];
recorder.addEventListener("dataavailable", (event) => {
if (this.chatDictationRecorder !== recorder || this.chatDictationCancelNextStop) {
return;
}
if (event.data.size > 0) {
this.chatDictationChunks.push(event.data);
}
});
recorder.addEventListener("error", (event) => {
if (this.chatDictationRecorder !== recorder) {
return;
}
this.chatDictationRecorder = null;
this.chatDictationChunks = [];
this.chatDictationStatus = "error";
this.chatDictationDetail =
event.message || event.error?.message || "Dictation recording failed";
this.lastError = this.chatDictationDetail;
this.stopChatDictationStream();
});
recorder.addEventListener("stop", () => {
if (this.chatDictationRecorder !== recorder) {
return;
}
const chunks = this.chatDictationChunks.splice(0);
const canceledByRequest = this.chatDictationCancelNextStop;
this.chatDictationCancelNextStop = false;
this.chatDictationRecorder = null;
this.stopChatDictationStream();
if (canceledByRequest) {
if (this.chatDictationStatus !== "error") {
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;
}
return;
}
const blob = new Blob(chunks, {
type: recorder.mimeType || chunks[0]?.type || "audio/webm",
});
void transcribeChatAudioInternal(
this as unknown as Parameters<typeof transcribeChatAudioInternal>[0],
blob,
);
});
this.chatDictationStatus = "recording";
this.chatDictationDetail = "Recording dictation...";
recorder.start();
} catch (error) {
if (stream && this.chatDictationStream !== stream) {
this.stopMediaStream(stream);
}
if (this.chatDictationStartToken !== startToken) {
return;
}
this.chatDictationRecorder = null;
this.stopChatDictationStream();
this.chatDictationStatus = "error";
this.chatDictationDetail = error instanceof Error ? error.message : String(error);
this.lastError = this.chatDictationDetail;
}
}
private stopChatDictationStream() {
this.stopMediaStream(this.chatDictationStream);
this.chatDictationStream = null;
}
private stopMediaStream(stream: MediaStream | null) {
stream?.getTracks().forEach((track) => track.stop());
}
cancelChatDictation() {
this.chatDictationStartToken += 1;
if (this.chatDictationRecorder?.state === "recording") {
this.chatDictationCancelNextStop = true;
this.chatDictationRecorder.stop();
}
this.chatDictationRecorder = null;
this.chatDictationChunks = [];
this.chatDictationCancelNextStop = false;
this.stopChatDictationStream();
this.chatDictationStatus = "idle";
this.chatDictationDetail = null;
}
async steerQueuedChatMessage(id: string) {
await steerQueuedChatMessageInternal(
this as unknown as Parameters<typeof steerQueuedChatMessageInternal>[0],

View File

@@ -334,6 +334,8 @@ function renderChatView(overrides: Partial<Parameters<typeof renderChat>[0]> = {
realtimeTalkStatus: "idle",
realtimeTalkDetail: null,
realtimeTalkTranscript: null,
chatDictationStatus: "idle",
chatDictationDetail: null,
connected: true,
canSend: true,
disabledReason: null,
@@ -366,6 +368,7 @@ function renderChatView(overrides: Partial<Parameters<typeof renderChat>[0]> = {
onSend: () => undefined,
onCompact: () => undefined,
onToggleRealtimeTalk: () => undefined,
onToggleChatDictation: () => undefined,
onAbort: () => undefined,
onQueueRemove: () => undefined,
onQueueSteer: () => undefined,
@@ -445,12 +448,48 @@ describe("chat loading skeleton", () => {
});
describe("chat voice controls", () => {
it("keeps Talk visible without the stale browser dictation button", () => {
it("shows server dictation and Talk without the stale browser dictation button", () => {
const container = renderChatView();
expect(container.querySelector('[aria-label="Dictate with server STT"]')).not.toBeNull();
expect(container.querySelector('[aria-label="Start Talk"]')).not.toBeNull();
expect(container.querySelector('[aria-label="Voice input"]')).toBeNull();
});
it("shows dictation recording state", () => {
const container = renderChatView({
chatDictationStatus: "recording",
chatDictationDetail: null,
});
expect(container.querySelector('[aria-label="Stop dictation"]')).not.toBeNull();
expect(container.textContent).toContain("Recording dictation");
});
it("disables duplicate dictation starts while microphone access is pending", () => {
const container = renderChatView({
chatDictationStatus: "starting",
chatDictationDetail: null,
});
const button = container.querySelector<HTMLButtonElement>(
'[aria-label="Dictate with server STT"]',
);
expect(button).not.toBeNull();
expect(button!.disabled).toBe(true);
expect(container.textContent).toContain("Starting dictation");
});
it("keeps stop dictation enabled while recording after disconnect", () => {
const container = renderChatView({
connected: false,
chatDictationStatus: "recording",
});
const button = container.querySelector<HTMLButtonElement>('[aria-label="Stop dictation"]');
expect(button).not.toBeNull();
expect(button!.disabled).toBe(false);
});
});
describe("chat slash menu accessibility", () => {

View File

@@ -3,6 +3,7 @@ import { ifDefined } from "lit/directives/if-defined.js";
import { ref } from "lit/directives/ref.js";
import { repeat } from "lit/directives/repeat.js";
import { t } from "../../i18n/index.ts";
import type { ChatDictationStatus } from "../app-chat.ts";
import type { CompactionStatus, FallbackStatus } from "../app-tool-stream.ts";
import {
getChatAttachmentPreviewUrl,
@@ -77,6 +78,8 @@ export type ChatProps = {
realtimeTalkStatus?: RealtimeTalkStatus;
realtimeTalkDetail?: string | null;
realtimeTalkTranscript?: string | null;
chatDictationStatus?: ChatDictationStatus;
chatDictationDetail?: string | null;
connected: boolean;
canSend: boolean;
disabledReason: string | null;
@@ -110,6 +113,7 @@ export type ChatProps = {
onSend: () => void;
onCompact?: () => void | Promise<void>;
onToggleRealtimeTalk?: () => void;
onToggleChatDictation?: () => void;
onAbort?: () => void;
onQueueRemove: (id: string) => void;
onQueueSteer?: (id: string) => void;
@@ -1198,19 +1202,32 @@ export function renderChat(props: ChatProps) {
@change=${(e: Event) => handleFileSelect(e, props)}
/>
${props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript
${props.chatDictationStatus && props.chatDictationStatus !== "idle"
? html`
<div class="agent-chat__stt-interim agent-chat__talk-status">
${props.realtimeTalkDetail ??
props.realtimeTalkTranscript ??
(props.realtimeTalkStatus === "thinking"
? "Asking OpenClaw..."
: props.realtimeTalkStatus === "connecting"
? "Connecting Talk..."
: "Talk live")}
<div class="agent-chat__stt-interim agent-chat__dictation-status">
${props.chatDictationDetail ??
(props.chatDictationStatus === "starting"
? "Starting dictation..."
: props.chatDictationStatus === "recording"
? "Recording dictation..."
: props.chatDictationStatus === "transcribing"
? "Transcribing dictation..."
: "Dictation unavailable")}
</div>
`
: nothing}
: props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript
? html`
<div class="agent-chat__stt-interim agent-chat__talk-status">
${props.realtimeTalkDetail ??
props.realtimeTalkTranscript ??
(props.realtimeTalkStatus === "thinking"
? "Asking OpenClaw..."
: props.realtimeTalkStatus === "connecting"
? "Connecting Talk..."
: "Talk live")}
</div>
`
: nothing}
<div class="agent-chat__composer-combobox">
<textarea
@@ -1252,6 +1269,29 @@ export function renderChat(props: ChatProps) {
${icons.paperclip}
</button>
${props.onToggleChatDictation
? html`
<button
class="agent-chat__input-btn ${props.chatDictationStatus === "recording"
? "agent-chat__input-btn--dictating"
: ""}"
@click=${props.onToggleChatDictation}
title=${props.chatDictationStatus === "recording"
? "Stop dictation"
: "Dictate with server STT"}
aria-label=${props.chatDictationStatus === "recording"
? "Stop dictation"
: "Dictate with server STT"}
?disabled=${props.chatDictationStatus === "recording"
? false
: !props.connected ||
props.chatDictationStatus === "starting" ||
props.chatDictationStatus === "transcribing"}
>
${props.chatDictationStatus === "recording" ? icons.stop : icons.mic}
</button>
`
: nothing}
${props.onToggleRealtimeTalk
? html`
<button