From 04066d246abc4e13a9507e1a93e12be75ae41753 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Fri, 24 Apr 2026 03:33:29 +0100
Subject: [PATCH] feat: add browser realtime talk

---
 CHANGELOG.md                                  |   1 +
 .../.generated/plugin-sdk-api-baseline.sha256 |   4 +-
 docs/providers/openai.md                      |  26 +-
 docs/web/control-ui.md                        |   9 +
 extensions/google-meet/src/agent-consult.ts   |  34 +-
 extensions/google-meet/src/config.ts          |   4 +-
 extensions/openai/realtime-voice-provider.ts  |  78 ++++-
 src/gateway/method-scopes.ts                  |   1 +
 src/gateway/protocol/index.ts                 |  14 +
 src/gateway/protocol/schema/channels.ts       |  22 ++
 .../protocol/schema/protocol-schemas.ts       |   4 +
 src/gateway/protocol/schema/types.ts          |   2 +
 src/gateway/server-methods-list.ts            |   1 +
 src/gateway/server-methods/talk.ts            | 126 ++++++++
 src/plugin-sdk/realtime-voice.ts              |   6 +
 src/plugins/types.ts                          |   5 +
 src/realtime-voice/agent-consult-tool.ts      |  28 ++
 src/realtime-voice/provider-types.ts          |  16 +
 ui/src/styles/chat/layout.css                 |   9 +
 ui/src/ui/app-lifecycle.ts                    |  11 +
 ui/src/ui/app-render.ts                       |   5 +
 ui/src/ui/app-view-state.ts                   |   6 +
 ui/src/ui/app.ts                              |  51 +++
 ui/src/ui/chat/realtime-talk.ts               | 300 ++++++++++++++++++
 ui/src/ui/gateway.ts                          |  13 +
 ui/src/ui/views/chat.ts                       |  34 ++
 26 files changed, 765 insertions(+), 45 deletions(-)
 create mode 100644 src/realtime-voice/agent-consult-tool.ts
 create mode 100644 ui/src/ui/chat/realtime-talk.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebda08f4c06..4cf86d6fdf6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ Docs: https://docs.openclaw.ai
 ### Changes
 
 - Control UI/chat: add a Steer action on queued messages so a browser follow-up can be injected into the active run without retyping it.
+- Control UI/Talk: add browser WebRTC realtime voice sessions backed by OpenAI Realtime, with Gateway-minted ephemeral client secrets and `openclaw_agent_consult` handoff to the full OpenClaw agent.
 - Agents/tools: add optional per-call `timeoutMs` support for image, video, music, and TTS generation tools so agents can extend provider request timeouts only when a specific generation needs it.
 - Agents/subagents: add optional forked context for native `sessions_spawn` runs so agents can let a child inherit the requester transcript when needed, while keeping clean isolated sessions as the default; includes prompt guidance, context-engine hook metadata, docs, and QA coverage.
 - Codex harness: add structured debug logging for embedded harness selection decisions so `/status` stays simple while gateway logs explain auto-selection and Pi fallback reasons. (#70760) Thanks @100yenadmin.
diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index 9a694d66467..627b848d38c 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-793ed905cb0ba93b9a2f8c2c85c3cfb4d194dd9263353e74952bf9e382b03dc2  plugin-sdk-api-baseline.json
-032e7fd6f48344c9b3b98fd3e877e6d30cab92ed9a39dd309796cf1f0220820f  plugin-sdk-api-baseline.jsonl
+96905c33f4498446f612ae17dee6affdf84ef0e2e5a0f25bf7191c315f5b826f  plugin-sdk-api-baseline.json
+d8eb6331562fde29531eaac18409bb7fabcc70623bf25395f8e5710a49765f0f  plugin-sdk-api-baseline.jsonl
diff --git a/docs/providers/openai.md b/docs/providers/openai.md
index c79a26d0f3f..ce1c6b7e0d4 100644
--- a/docs/providers/openai.md
+++ b/docs/providers/openai.md
@@ -25,19 +25,19 @@ API-enabled model such as `openai/gpt-5.4` for `OPENAI_API_KEY` setups.
 
 ## OpenClaw feature coverage
 
-| OpenAI capability         | OpenClaw surface                                       | Status                                                 |
-| ------------------------- | ------------------------------------------------------ | ------------------------------------------------------ |
-| Chat / Responses          | `openai/<model>` model provider                        | Yes                                                    |
-| Codex subscription models | `openai-codex/<model>` with `openai-codex` OAuth       | Yes                                                    |
-| Codex app-server harness  | `openai/<model>` with `embeddedHarness.runtime: codex` | Yes                                                    |
-| Server-side web search    | Native OpenAI Responses tool                           | Yes, when web search is enabled and no provider pinned |
-| Images                    | `image_generate`                                       | Yes                                                    |
-| Videos                    | `video_generate`                                       | Yes                                                    |
-| Text-to-speech            | `messages.tts.provider: "openai"` / `tts`              | Yes                                                    |
-| Batch speech-to-text      | `tools.media.audio` / media understanding              | Yes                                                    |
-| Streaming speech-to-text  | Voice Call `streaming.provider: "openai"`              | Yes                                                    |
-| Realtime voice            | Voice Call `realtime.provider: "openai"`               | Yes                                                    |
-| Embeddings                | memory embedding provider                              | Yes                                                    |
+| OpenAI capability         | OpenClaw surface                                           | Status                                                 |
+| ------------------------- | ---------------------------------------------------------- | ------------------------------------------------------ |
+| Chat / Responses          | `openai/<model>` model provider                            | Yes                                                    |
+| Codex subscription models | `openai-codex/<model>` with `openai-codex` OAuth           | Yes                                                    |
+| Codex app-server harness  | `openai/<model>` with `embeddedHarness.runtime: codex`     | Yes                                                    |
+| Server-side web search    | Native OpenAI Responses tool                               | Yes, when web search is enabled and no provider pinned |
+| Images                    | `image_generate`                                           | Yes                                                    |
+| Videos                    | `video_generate`                                           | Yes                                                    |
+| Text-to-speech            | `messages.tts.provider: "openai"` / `tts`                  | Yes                                                    |
+| Batch speech-to-text      | `tools.media.audio` / media understanding                  | Yes                                                    |
+| Streaming speech-to-text  | Voice Call `streaming.provider: "openai"`                  | Yes                                                    |
+| Realtime voice            | Voice Call `realtime.provider: "openai"` / Control UI Talk | Yes                                                    |
+| Embeddings                | memory embedding provider                                  | Yes                                                    |
 
 ## Getting started
 
diff --git a/docs/web/control-ui.md b/docs/web/control-ui.md
index b0a3da3c4a3..213f2e65fa7 100644
--- a/docs/web/control-ui.md
+++ b/docs/web/control-ui.md
@@ -105,6 +105,11 @@ locale picker lives in the Gateway Access card, not under Appearance.
 ## What it can do (today)
 
 - Chat with the model via Gateway WS (`chat.history`, `chat.send`, `chat.abort`, `chat.inject`)
+- Talk to OpenAI Realtime directly from the browser via WebRTC. The Gateway
+  mints a short-lived Realtime client secret with `talk.realtime.session`; the
+  browser sends microphone audio directly to OpenAI and relays
+  `openclaw_agent_consult` tool calls back through `chat.send` for the larger
+  configured OpenClaw model.
 - Stream tool calls + live tool output cards in Chat (agent events)
 - Channels: built-in plus bundled/external plugin channels status, QR login, and per-channel config (`channels.status`, `web.login.*`, `config.patch`)
 - Instances: presence list + refresh (`system-presence`)
@@ -151,6 +156,10 @@ Cron jobs panel notes:
 - `chat.history` also strips display-only inline directive tags from visible assistant text (for example `[[reply_to_*]]` and `[[audio_as_voice]]`), plain-text tool-call XML payloads (including `<tool_call>...</tool_call>`, `<function_call>...</function_call>`, `<tool_calls>...</tool_calls>`, `<function_calls>...</function_calls>`, and truncated tool-call blocks), and leaked ASCII/full-width model control tokens, and omits assistant entries whose whole visible text is only the exact silent token `NO_REPLY` / `no_reply`.
 - `chat.inject` appends an assistant note to the session transcript and broadcasts a `chat` event for UI-only updates (no agent run, no channel delivery).
 - The chat header model and thinking pickers patch the active session immediately through `sessions.patch`; they are persistent session overrides, not one-turn-only send options.
+- Talk mode uses the registered realtime voice provider. Configure OpenAI with
+  `talk.provider: "openai"` plus `talk.providers.openai.apiKey`, or reuse the
+  Voice Call realtime provider config. The browser never receives the standard
+  OpenAI API key; it receives only the ephemeral Realtime client secret.
 - Stop:
   - Click **Stop** (calls `chat.abort`)
   - While a run is active, normal follow-ups queue. Click **Steer** on a queued message to inject that follow-up into the running turn.
diff --git a/extensions/google-meet/src/agent-consult.ts b/extensions/google-meet/src/agent-consult.ts
index 1e606768372..1229a6a5ca3 100644
--- a/extensions/google-meet/src/agent-consult.ts
+++ b/extensions/google-meet/src/agent-consult.ts
@@ -1,7 +1,11 @@
 import { randomUUID } from "node:crypto";
 import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
 import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
-import type { RealtimeVoiceTool } from "openclaw/plugin-sdk/realtime-voice";
+import {
+  REALTIME_VOICE_AGENT_CONSULT_TOOL,
+  REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
+  type RealtimeVoiceTool,
+} from "openclaw/plugin-sdk/realtime-voice";
 import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
 import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js";
 
@@ -11,32 +15,8 @@ type AgentPayload = {
   isReasoning?: boolean;
 };
 
-export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
-
-export const GOOGLE_MEET_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
-  type: "function",
-  name: GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
-  description:
-    "Ask the full OpenClaw agent for deeper reasoning, current information, or tool-backed help before speaking in the meeting.",
-  parameters: {
-    type: "object",
-    properties: {
-      question: {
-        type: "string",
-        description: "The concrete question or task the meeting participant asked.",
-      },
-      context: {
-        type: "string",
-        description: "Optional relevant meeting context or transcript summary.",
-      },
-      responseStyle: {
-        type: "string",
-        description: "Optional style hint for the spoken answer.",
-      },
-    },
-    required: ["question"],
-  },
-};
+export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME;
+export const GOOGLE_MEET_AGENT_CONSULT_TOOL = REALTIME_VOICE_AGENT_CONSULT_TOOL;
 
 export function resolveGoogleMeetRealtimeTools(policy: GoogleMeetToolPolicy): RealtimeVoiceTool[] {
   return policy === "none" ? [] : [GOOGLE_MEET_AGENT_CONSULT_TOOL];
diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts
index aa12162fd90..c18e341f835 100644
--- a/extensions/google-meet/src/config.ts
+++ b/extensions/google-meet/src/config.ts
@@ -1,3 +1,4 @@
+import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
 import {
   normalizeOptionalLowercaseString,
   normalizeOptionalString,
@@ -94,8 +95,7 @@ export const DEFAULT_GOOGLE_MEET_AUDIO_OUTPUT_COMMAND = [
   "-",
 ] as const;
 
-export const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS =
-  "You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call openclaw_agent_consult before answering.";
+export const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS = `You are joining a private Google Meet as an OpenClaw agent. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
 
 export const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
   enabled: true,
diff --git a/extensions/openai/realtime-voice-provider.ts b/extensions/openai/realtime-voice-provider.ts
index 8d37f5f7f55..eb17661952e 100644
--- a/extensions/openai/realtime-voice-provider.ts
+++ b/extensions/openai/realtime-voice-provider.ts
@@ -6,6 +6,8 @@ import {
 } from "openclaw/plugin-sdk/proxy-capture";
 import type {
   RealtimeVoiceBridge,
+  RealtimeVoiceBrowserSession,
+  RealtimeVoiceBrowserSessionCreateRequest,
   RealtimeVoiceBridgeCreateRequest,
   RealtimeVoiceProviderConfig,
   RealtimeVoiceProviderPlugin,
@@ -59,6 +61,8 @@ type OpenAIRealtimeVoiceBridgeConfig = RealtimeVoiceBridgeCreateRequest & {
   azureApiVersion?: string;
 };
 
+const OPENAI_REALTIME_DEFAULT_MODEL = "gpt-realtime-1.5";
+
 type RealtimeEvent = {
   type: string;
   delta?: string;
@@ -117,7 +121,7 @@ function base64ToBuffer(b64: string): Buffer {
 }
 
 class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
-  private static readonly DEFAULT_MODEL = "gpt-realtime-1.5";
+  private static readonly DEFAULT_MODEL = OPENAI_REALTIME_DEFAULT_MODEL;
   private static readonly MAX_RECONNECT_ATTEMPTS = 5;
   private static readonly BASE_RECONNECT_DELAY_MS = 1000;
   private static readonly CONNECT_TIMEOUT_MS = 10_000;
@@ -579,6 +583,77 @@ class OpenAIRealtimeVoiceBridge implements RealtimeVoiceBridge {
   }
 }
 
+function readStringField(value: unknown, key: string): string | undefined {
+  if (!value || typeof value !== "object") {
+    return undefined;
+  }
+  const raw = (value as Record<string, unknown>)[key];
+  return typeof raw === "string" && raw.trim() ? raw.trim() : undefined;
+}
+
+async function createOpenAIRealtimeBrowserSession(
+  req: RealtimeVoiceBrowserSessionCreateRequest,
+): Promise<RealtimeVoiceBrowserSession> {
+  const config = normalizeProviderConfig(req.providerConfig);
+  const apiKey = config.apiKey || process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    throw new Error("OpenAI API key missing");
+  }
+  if (config.azureEndpoint || config.azureDeployment) {
+    throw new Error("OpenAI Realtime browser sessions do not support Azure endpoints yet");
+  }
+
+  const model = req.model ?? config.model ?? OPENAI_REALTIME_DEFAULT_MODEL;
+  const voice = (req.voice ?? config.voice ?? "alloy") as OpenAIRealtimeVoice;
+  const session: Record<string, unknown> = {
+    type: "realtime",
+    model,
+    instructions: req.instructions,
+    audio: {
+      output: { voice },
+    },
+  };
+  if (req.tools && req.tools.length > 0) {
+    session.tools = req.tools;
+    session.tool_choice = "auto";
+  }
+
+  const response = await fetch("https://api.openai.com/v1/realtime/client_secrets", {
+    method: "POST",
+    headers: {
+      Authorization: `Bearer ${apiKey}`,
+      "Content-Type": "application/json",
+    },
+    body: JSON.stringify({ session }),
+  });
+  if (!response.ok) {
+    const detail = await response.text().catch(() => "");
+    throw new Error(
+      `OpenAI Realtime browser session failed (${response.status}): ${detail || response.statusText}`,
+    );
+  }
+  const payload = (await response.json()) as unknown;
+  const nestedSecret =
+    payload && typeof payload === "object"
+      ? (payload as Record<string, unknown>).client_secret
+      : undefined;
+  const clientSecret = readStringField(payload, "value") ?? readStringField(nestedSecret, "value");
+  if (!clientSecret) {
+    throw new Error("OpenAI Realtime browser session did not return a client secret");
+  }
+  const expiresAt =
+    payload && typeof payload === "object"
+      ? (payload as Record<string, unknown>).expires_at
+      : undefined;
+  return {
+    provider: "openai",
+    clientSecret,
+    model,
+    voice,
+    ...(typeof expiresAt === "number" ? { expiresAt } : {}),
+  };
+}
+
 export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin {
   return {
     id: "openai",
@@ -607,6 +682,7 @@ export function buildOpenAIRealtimeVoiceProvider(): RealtimeVoiceProviderPlugin
         azureApiVersion: config.azureApiVersion,
       });
     },
+    createBrowserSession: createOpenAIRealtimeBrowserSession,
   };
 }
 
diff --git a/src/gateway/method-scopes.ts b/src/gateway/method-scopes.ts
index 19664b9606c..834be3dea7d 100644
--- a/src/gateway/method-scopes.ts
+++ b/src/gateway/method-scopes.ts
@@ -125,6 +125,7 @@ const METHOD_SCOPE_GROUPS: Record<OperatorScope, readonly string[]> = {
     "agent.wait",
     "wake",
     "talk.mode",
+    "talk.realtime.session",
     "talk.speak",
     "tts.enable",
     "tts.disable",
diff --git a/src/gateway/protocol/index.ts b/src/gateway/protocol/index.ts
index 7945e0fb677..908b95363a9 100644
--- a/src/gateway/protocol/index.ts
+++ b/src/gateway/protocol/index.ts
@@ -52,6 +52,10 @@ import {
   TalkConfigParamsSchema,
   type TalkConfigResult,
   TalkConfigResultSchema,
+  type TalkRealtimeSessionParams,
+  TalkRealtimeSessionParamsSchema,
+  type TalkRealtimeSessionResult,
+  TalkRealtimeSessionResultSchema,
   type TalkSpeakParams,
   TalkSpeakParamsSchema,
   type TalkSpeakResult,
@@ -428,6 +432,12 @@ export const validateWizardStatusParams = ajv.compile<WizardStatusParams>(Wizard
 export const validateTalkModeParams = ajv.compile<TalkModeParams>(TalkModeParamsSchema);
 export const validateTalkConfigParams = ajv.compile<TalkConfigParams>(TalkConfigParamsSchema);
 export const validateTalkConfigResult = ajv.compile<TalkConfigResult>(TalkConfigResultSchema);
+export const validateTalkRealtimeSessionParams = ajv.compile<TalkRealtimeSessionParams>(
+  TalkRealtimeSessionParamsSchema,
+);
+export const validateTalkRealtimeSessionResult = ajv.compile<TalkRealtimeSessionResult>(
+  TalkRealtimeSessionResultSchema,
+);
 export const validateTalkSpeakParams = ajv.compile<TalkSpeakParams>(TalkSpeakParamsSchema);
 export const validateTalkSpeakResult = ajv.compile<TalkSpeakResult>(TalkSpeakResultSchema);
 export const validateChannelsStatusParams = ajv.compile<ChannelsStatusParams>(
@@ -616,6 +626,8 @@ export {
   WizardStatusResultSchema,
   TalkConfigParamsSchema,
   TalkConfigResultSchema,
+  TalkRealtimeSessionParamsSchema,
+  TalkRealtimeSessionResultSchema,
   TalkSpeakParamsSchema,
   TalkSpeakResultSchema,
   ChannelsStatusParamsSchema,
@@ -720,6 +732,8 @@ export type {
   WizardStatusResult,
   TalkConfigParams,
   TalkConfigResult,
+  TalkRealtimeSessionParams,
+  TalkRealtimeSessionResult,
   TalkSpeakParams,
   TalkSpeakResult,
   TalkModeParams,
diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts
index 5e134af1a27..20ce30eadf1 100644
--- a/src/gateway/protocol/schema/channels.ts
+++ b/src/gateway/protocol/schema/channels.ts
@@ -36,6 +36,28 @@ export const TalkSpeakParamsSchema = Type.Object(
   { additionalProperties: false },
 );
 
+export const TalkRealtimeSessionParamsSchema = Type.Object(
+  {
+    sessionKey: Type.Optional(Type.String()),
+    provider: Type.Optional(Type.String()),
+    model: Type.Optional(Type.String()),
+    voice: Type.Optional(Type.String()),
+    instructions: Type.Optional(Type.String()),
+  },
+  { additionalProperties: false },
+);
+
+export const TalkRealtimeSessionResultSchema = Type.Object(
+  {
+    provider: NonEmptyString,
+    clientSecret: NonEmptyString,
+    model: Type.Optional(Type.String()),
+    voice: Type.Optional(Type.String()),
+    expiresAt: Type.Optional(Type.Number()),
+  },
+  { additionalProperties: false },
+);
+
 const talkProviderFieldSchemas = {
   apiKey: Type.Optional(SecretInputSchema),
 };
diff --git a/src/gateway/protocol/schema/protocol-schemas.ts b/src/gateway/protocol/schema/protocol-schemas.ts
index 0f9c443740e..c3972fbf764 100644
--- a/src/gateway/protocol/schema/protocol-schemas.ts
+++ b/src/gateway/protocol/schema/protocol-schemas.ts
@@ -54,6 +54,8 @@ import {
   ChannelsLogoutParamsSchema,
   TalkConfigParamsSchema,
   TalkConfigResultSchema,
+  TalkRealtimeSessionParamsSchema,
+  TalkRealtimeSessionResultSchema,
   TalkSpeakParamsSchema,
   TalkSpeakResultSchema,
   ChannelsStatusParamsSchema,
@@ -279,6 +281,8 @@ export const ProtocolSchemas = {
   TalkModeParams: TalkModeParamsSchema,
   TalkConfigParams: TalkConfigParamsSchema,
   TalkConfigResult: TalkConfigResultSchema,
+  TalkRealtimeSessionParams: TalkRealtimeSessionParamsSchema,
+  TalkRealtimeSessionResult: TalkRealtimeSessionResultSchema,
   TalkSpeakParams: TalkSpeakParamsSchema,
   TalkSpeakResult: TalkSpeakResultSchema,
   ChannelsStatusParams: ChannelsStatusParamsSchema,
diff --git a/src/gateway/protocol/schema/types.ts b/src/gateway/protocol/schema/types.ts
index 9a1ef1ddfd1..fb4da4202b7 100644
--- a/src/gateway/protocol/schema/types.ts
+++ b/src/gateway/protocol/schema/types.ts
@@ -80,6 +80,8 @@ export type WizardStatusResult = SchemaType<"WizardStatusResult">;
 export type TalkModeParams = SchemaType<"TalkModeParams">;
 export type TalkConfigParams = SchemaType<"TalkConfigParams">;
 export type TalkConfigResult = SchemaType<"TalkConfigResult">;
+export type TalkRealtimeSessionParams = SchemaType<"TalkRealtimeSessionParams">;
+export type TalkRealtimeSessionResult = SchemaType<"TalkRealtimeSessionResult">;
 export type TalkSpeakParams = SchemaType<"TalkSpeakParams">;
 export type TalkSpeakResult = SchemaType<"TalkSpeakResult">;
 export type ChannelsStatusParams = SchemaType<"ChannelsStatusParams">;
diff --git a/src/gateway/server-methods-list.ts b/src/gateway/server-methods-list.ts
index 6506d48a23f..352ca7bcd29 100644
--- a/src/gateway/server-methods-list.ts
+++ b/src/gateway/server-methods-list.ts
@@ -48,6 +48,7 @@ const BASE_METHODS = [
   "wizard.cancel",
   "wizard.status",
   "talk.config",
+  "talk.realtime.session",
   "talk.speak",
   "talk.mode",
   "commands.list",
diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts
index 8aea28f2090..e419b73ac06 100644
--- a/src/gateway/server-methods/talk.ts
+++ b/src/gateway/server-methods/talk.ts
@@ -7,6 +7,13 @@ import {
 } from "../../config/talk.js";
 import type { TalkConfigResponse, TalkProviderConfig } from "../../config/types.gateway.js";
 import type { OpenClawConfig, TtsConfig, TtsProviderConfigMap } from "../../config/types.js";
+import {
+  REALTIME_VOICE_AGENT_CONSULT_TOOL,
+  REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
+} from "../../realtime-voice/agent-consult-tool.js";
+import { getRealtimeVoiceProvider } from "../../realtime-voice/provider-registry.js";
+import { resolveConfiguredRealtimeVoiceProvider } from "../../realtime-voice/provider-resolver.js";
+import type { RealtimeVoiceProviderConfig } from "../../realtime-voice/provider-types.js";
 import {
   normalizeLowercaseStringOrEmpty,
   normalizeOptionalLowercaseString,
@@ -22,6 +29,7 @@ import {
   type TalkSpeakParams,
   validateTalkConfigParams,
   validateTalkModeParams,
+  validateTalkRealtimeSessionParams,
   validateTalkSpeakParams,
 } from "../protocol/index.js";
 import { formatForLog } from "../ws-log.js";
@@ -136,6 +144,63 @@ function buildTalkTtsConfig(
   };
 }
 
+function getRecord(value: unknown): Record<string, unknown> | undefined {
+  return asRecord(value) ?? undefined;
+}
+
+function getVoiceCallRealtimeConfig(config: OpenClawConfig): {
+  provider?: string;
+  providers?: Record<string, RealtimeVoiceProviderConfig>;
+} {
+  const plugins = getRecord(config.plugins);
+  const entries = getRecord(plugins?.entries);
+  const voiceCall = getRecord(entries?.["voice-call"]);
+  const pluginConfig = getRecord(voiceCall?.config);
+  const realtime = getRecord(pluginConfig?.realtime);
+  const providersRaw = getRecord(realtime?.providers);
+  const providers: Record<string, RealtimeVoiceProviderConfig> = {};
+  if (providersRaw) {
+    for (const [providerId, providerConfig] of Object.entries(providersRaw)) {
+      const record = getRecord(providerConfig);
+      if (record) {
+        providers[providerId] = record;
+      }
+    }
+  }
+  return {
+    provider: normalizeOptionalString(realtime?.provider),
+    providers: Object.keys(providers).length > 0 ? providers : undefined,
+  };
+}
+
+function buildTalkRealtimeConfig(config: OpenClawConfig, requestedProvider?: string) {
+  const voiceCallRealtime = getVoiceCallRealtimeConfig(config);
+  const talkProviderConfigs = config.talk?.providers as
+    | Record<string, RealtimeVoiceProviderConfig>
+    | undefined;
+  const talkProvider = normalizeOptionalString(config.talk?.provider);
+  const talkProviderSupportsRealtime = talkProvider
+    ? Boolean(getRealtimeVoiceProvider(talkProvider, config))
+    : false;
+  const provider =
+    normalizeOptionalString(requestedProvider) ??
+    (talkProviderSupportsRealtime ? talkProvider : undefined) ??
+    voiceCallRealtime.provider;
+  return {
+    provider,
+    providers: {
+      ...voiceCallRealtime.providers,
+      ...talkProviderConfigs,
+    },
+  };
+}
+
+function buildRealtimeInstructions(extra: string | undefined): string {
+  const base = `You are OpenClaw's realtime voice interface. Keep spoken replies concise. If the user asks for code, repository state, tools, files, current OpenClaw context, or deeper reasoning, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} and then summarize the result naturally.`;
+  const trimmed = normalizeOptionalString(extra);
+  return trimmed ? `${base}\n\n${trimmed}` : base;
+}
+
 function isFallbackEligibleTalkReason(reason: TalkSpeakReason): boolean {
   return (
     reason === "talk_unconfigured" ||
@@ -334,6 +399,67 @@ export const talkHandlers: GatewayRequestHandlers = {
 
     respond(true, { config: configPayload }, undefined);
   },
+  "talk.realtime.session": async ({ params, respond }) => {
+    if (!validateTalkRealtimeSessionParams(params)) {
+      respond(
+        false,
+        undefined,
+        errorShape(
+          ErrorCodes.INVALID_REQUEST,
+          `invalid talk.realtime.session params: ${formatValidationErrors(validateTalkRealtimeSessionParams.errors)}`,
+        ),
+      );
+      return;
+    }
+    const typedParams = params as {
+      provider?: string;
+      model?: string;
+      voice?: string;
+      instructions?: string;
+    };
+    try {
+      const runtimeConfig = loadConfig();
+      const realtimeConfig = buildTalkRealtimeConfig(runtimeConfig, typedParams.provider);
+      const resolution = resolveConfiguredRealtimeVoiceProvider({
+        configuredProviderId: realtimeConfig.provider,
+        providerConfigs: realtimeConfig.providers,
+        cfg: runtimeConfig,
+        cfgForResolve: runtimeConfig,
+        noRegisteredProviderMessage: "No realtime voice provider registered",
+      });
+      if (!resolution.provider.createBrowserSession) {
+        respond(
+          false,
+          undefined,
+          errorShape(
+            ErrorCodes.UNAVAILABLE,
+            `Realtime voice provider "${resolution.provider.id}" does not support browser WebRTC sessions`,
+          ),
+        );
+        return;
+      }
+      const session = await resolution.provider.createBrowserSession({
+        providerConfig: resolution.providerConfig,
+        instructions: buildRealtimeInstructions(typedParams.instructions),
+        tools: [REALTIME_VOICE_AGENT_CONSULT_TOOL],
+        model: normalizeOptionalString(typedParams.model),
+        voice: normalizeOptionalString(typedParams.voice),
+      });
+      respond(
+        true,
+        {
+          provider: session.provider,
+          clientSecret: session.clientSecret,
+          ...(session.model ? { model: session.model } : {}),
+          ...(session.voice ? { voice: session.voice } : {}),
+          ...(typeof session.expiresAt === "number" ? { expiresAt: session.expiresAt } : {}),
+        },
+        undefined,
+      );
+    } catch (err) {
+      respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, formatForLog(err)));
+    }
+  },
   "talk.speak": async ({ params, respond }) => {
     if (!validateTalkSpeakParams(params)) {
       respond(
diff --git a/src/plugin-sdk/realtime-voice.ts b/src/plugin-sdk/realtime-voice.ts
index 4dcfb7c9c76..07aedef6299 100644
--- a/src/plugin-sdk/realtime-voice.ts
+++ b/src/plugin-sdk/realtime-voice.ts
@@ -2,6 +2,8 @@ export type { RealtimeVoiceProviderPlugin } from "../plugins/types.js";
 export type {
   RealtimeVoiceBridge,
   RealtimeVoiceBridgeCallbacks,
+  RealtimeVoiceBrowserSession,
+  RealtimeVoiceBrowserSessionCreateRequest,
   RealtimeVoiceBridgeCreateRequest,
   RealtimeVoiceCloseReason,
   RealtimeVoiceProviderConfig,
@@ -12,6 +14,10 @@ export type {
   RealtimeVoiceTool,
   RealtimeVoiceToolCallEvent,
 } from "../realtime-voice/provider-types.js";
+export {
+  REALTIME_VOICE_AGENT_CONSULT_TOOL,
+  REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
+} from "../realtime-voice/agent-consult-tool.js";
 export {
   canonicalizeRealtimeVoiceProviderId,
   getRealtimeVoiceProvider,
diff --git a/src/plugins/types.ts b/src/plugins/types.ts
index 47b9bcca476..203d654330a 100644
--- a/src/plugins/types.ts
+++ b/src/plugins/types.ts
@@ -40,6 +40,8 @@ import type {
 } from "../realtime-transcription/provider-types.js";
 import type {
   RealtimeVoiceBridge,
+  RealtimeVoiceBrowserSession,
+  RealtimeVoiceBrowserSessionCreateRequest,
   RealtimeVoiceBridgeCreateRequest,
   RealtimeVoiceProviderConfig,
   RealtimeVoiceProviderConfiguredContext,
@@ -1661,6 +1663,9 @@ export type RealtimeVoiceProviderPlugin = {
   resolveConfig?: (ctx: RealtimeVoiceProviderResolveConfigContext) => RealtimeVoiceProviderConfig;
   isConfigured: (ctx: RealtimeVoiceProviderConfiguredContext) => boolean;
   createBridge: (req: RealtimeVoiceBridgeCreateRequest) => RealtimeVoiceBridge;
+  createBrowserSession?: (
+    req: RealtimeVoiceBrowserSessionCreateRequest,
+  ) => Promise<RealtimeVoiceBrowserSession>;
 };
 
 export type PluginRealtimeVoiceProviderEntry = RealtimeVoiceProviderPlugin & {
diff --git a/src/realtime-voice/agent-consult-tool.ts b/src/realtime-voice/agent-consult-tool.ts
new file mode 100644
index 00000000000..bdafb384581
--- /dev/null
+++ b/src/realtime-voice/agent-consult-tool.ts
@@ -0,0 +1,28 @@
+import type { RealtimeVoiceTool } from "./provider-types.js";
+
+export const REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
+
+export const REALTIME_VOICE_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
+  type: "function",
+  name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
+  description:
+    "Ask the full OpenClaw agent for deeper reasoning, current information, or tool-backed help before speaking.",
+  parameters: {
+    type: "object",
+    properties: {
+      question: {
+        type: "string",
+        description: "The concrete question or task the user asked.",
+      },
+      context: {
+        type: "string",
+        description: "Optional relevant context or transcript summary.",
+      },
+      responseStyle: {
+        type: "string",
+        description: "Optional style hint for the spoken answer.",
+      },
+    },
+    required: ["question"],
+  },
+};
diff --git a/src/realtime-voice/provider-types.ts b/src/realtime-voice/provider-types.ts
index 8f317e93298..ba72ff89640 100644
--- a/src/realtime-voice/provider-types.ts
+++ b/src/realtime-voice/provider-types.ts
@@ -53,6 +53,22 @@ export type RealtimeVoiceBridgeCreateRequest = RealtimeVoiceBridgeCallbacks & {
   tools?: RealtimeVoiceTool[];
 };
 
+export type RealtimeVoiceBrowserSessionCreateRequest = {
+  providerConfig: RealtimeVoiceProviderConfig;
+  instructions?: string;
+  tools?: RealtimeVoiceTool[];
+  model?: string;
+  voice?: string;
+};
+
+export type RealtimeVoiceBrowserSession = {
+  provider: RealtimeVoiceProviderId;
+  clientSecret: string;
+  model?: string;
+  voice?: string;
+  expiresAt?: number;
+};
+
 export type RealtimeVoiceBridge = {
   connect(): Promise<void>;
   sendAudio(audio: Buffer): void;
diff --git a/ui/src/styles/chat/layout.css b/ui/src/styles/chat/layout.css
index d4bade8e9db..e26f1d2f7b6 100644
--- a/ui/src/styles/chat/layout.css
+++ b/ui/src/styles/chat/layout.css
@@ -584,6 +584,15 @@
   background: color-mix(in srgb, var(--accent) 12%, transparent);
 }
 
+.agent-chat__input-btn--talk {
+  color: var(--danger, #ef4444);
+  background: color-mix(in srgb, var(--danger, #ef4444) 14%, transparent);
+}
+
+.agent-chat__talk-status {
+  color: var(--text);
+}
+
 .agent-chat__input-divider {
   width: 1px;
   height: 16px;
diff --git a/ui/src/ui/app-lifecycle.ts b/ui/src/ui/app-lifecycle.ts
index 8fd8ecdda39..67e25af88f3 100644
--- a/ui/src/ui/app-lifecycle.ts
+++ b/ui/src/ui/app-lifecycle.ts
@@ -33,6 +33,11 @@ type LifecycleHost = {
   allowExternalEmbedUrls: boolean;
   chatHasAutoScrolled: boolean;
   chatManualRefreshInFlight: boolean;
+  realtimeTalkSession?: { stop: () => void } | null;
+  realtimeTalkActive?: boolean;
+  realtimeTalkStatus?: string;
+  realtimeTalkDetail?: string | null;
+  realtimeTalkTranscript?: string | null;
   chatLoading: boolean;
   chatMessages: unknown[];
   chatToolMessages: unknown[];
@@ -77,6 +82,12 @@ export function handleDisconnected(host: LifecycleHost) {
   stopNodesPolling(host as unknown as Parameters<typeof stopNodesPolling>[0]);
   stopLogsPolling(host as unknown as Parameters<typeof stopLogsPolling>[0]);
   stopDebugPolling(host as unknown as Parameters<typeof stopDebugPolling>[0]);
+  host.realtimeTalkSession?.stop();
+  host.realtimeTalkSession = null;
+  host.realtimeTalkActive = false;
+  host.realtimeTalkStatus = "idle";
+  host.realtimeTalkDetail = null;
+  host.realtimeTalkTranscript = null;
   host.client?.stop();
   host.client = null;
   host.connected = false;
diff --git a/ui/src/ui/app-render.ts b/ui/src/ui/app-render.ts
index 2cf5e69bc1f..c12675947e0 100644
--- a/ui/src/ui/app-render.ts
+++ b/ui/src/ui/app-render.ts
@@ -2228,6 +2228,10 @@ export function renderApp(state: AppViewState) {
               streamStartedAt: state.chatStreamStartedAt,
               draft: state.chatMessage,
               queue: state.chatQueue,
+              realtimeTalkActive: state.realtimeTalkActive,
+              realtimeTalkStatus: state.realtimeTalkStatus,
+              realtimeTalkDetail: state.realtimeTalkDetail,
+              realtimeTalkTranscript: state.realtimeTalkTranscript,
               connected: state.connected,
               canSend: state.connected,
               disabledReason: chatDisabledReason,
@@ -2256,6 +2260,7 @@ export function renderApp(state: AppViewState) {
               attachments: state.chatAttachments,
               onAttachmentsChange: (next) => (state.chatAttachments = next),
               onSend: () => state.handleSendChat(),
+              onToggleRealtimeTalk: () => state.toggleRealtimeTalk(),
               canAbort: Boolean(state.chatRunId),
               onAbort: () => void state.handleAbortChat(),
               onQueueRemove: (id) => state.removeQueuedMessage(id),
diff --git a/ui/src/ui/app-view-state.ts b/ui/src/ui/app-view-state.ts
index 528b9363ee4..73cf7aaf370 100644
--- a/ui/src/ui/app-view-state.ts
+++ b/ui/src/ui/app-view-state.ts
@@ -1,5 +1,6 @@
 import type { EventLogEntry } from "./app-events.ts";
 import type { CompactionStatus, FallbackStatus } from "./app-tool-stream.ts";
+import type { RealtimeTalkStatus } from "./chat/realtime-talk.ts";
 import type { ChatSideResult } from "./chat/side-result.ts";
 import type { CronModelSuggestionsState, CronState } from "./controllers/cron.ts";
 import type { DevicePairingList } from "./controllers/devices.ts";
@@ -92,6 +93,10 @@ export type AppViewState = {
   chatModelsLoading: boolean;
   chatModelCatalog: ModelCatalogEntry[];
   chatQueue: ChatQueueItem[];
+  realtimeTalkActive: boolean;
+  realtimeTalkStatus: RealtimeTalkStatus;
+  realtimeTalkDetail: string | null;
+  realtimeTalkTranscript: string | null;
   chatManualRefreshInFlight: boolean;
   nodesLoading: boolean;
   nodes: Array<Record<string, unknown>>;
@@ -425,6 +430,7 @@ export type AppViewState = {
     setPassword: (next: string) => void;
     setChatMessage: (next: string) => void;
     handleSendChat: (messageOverride?: string, opts?: { restoreDraft?: boolean }) => Promise<void>;
+    toggleRealtimeTalk: () => Promise<void>;
     steerQueuedChatMessage: (id: string) => Promise<void>;
     handleAbortChat: () => Promise<void>;
     removeQueuedMessage: (id: string) => void;
diff --git a/ui/src/ui/app.ts b/ui/src/ui/app.ts
index d955a73423d..e8ffedd38bd 100644
--- a/ui/src/ui/app.ts
+++ b/ui/src/ui/app.ts
@@ -57,6 +57,7 @@ import {
 import type { AppViewState } from "./app-view-state.ts";
 import { normalizeAssistantIdentity } from "./assistant-identity.ts";
 import { exportChatMarkdown } from "./chat/export.ts";
+import { RealtimeTalkSession, type RealtimeTalkStatus } from "./chat/realtime-talk.ts";
 import type { ChatSideResult } from "./chat/side-result.ts";
 import {
   loadToolsEffective as loadToolsEffectiveInternal,
@@ -192,6 +193,11 @@ export class OpenClawApp extends LitElement {
   @state() chatModelCatalog: ModelCatalogEntry[] = [];
   @state() chatQueue: ChatQueueItem[] = [];
   @state() chatAttachments: ChatAttachment[] = [];
+  @state() realtimeTalkActive = false;
+  @state() realtimeTalkStatus: RealtimeTalkStatus = "idle";
+  @state() realtimeTalkDetail: string | null = null;
+  @state() realtimeTalkTranscript: string | null = null;
+  private realtimeTalkSession: RealtimeTalkSession | null = null;
   @state() chatManualRefreshInFlight = false;
   @state() navDrawerOpen = false;
 
@@ -710,6 +716,51 @@ export class OpenClawApp extends LitElement {
     );
   }
 
+  async toggleRealtimeTalk() {
+    if (this.realtimeTalkSession) {
+      this.realtimeTalkSession.stop();
+      this.realtimeTalkSession = null;
+      this.realtimeTalkActive = false;
+      this.realtimeTalkStatus = "idle";
+      this.realtimeTalkDetail = null;
+      this.realtimeTalkTranscript = null;
+      return;
+    }
+    if (!this.client || !this.connected) {
+      this.lastError = "Gateway not connected";
+      return;
+    }
+    this.realtimeTalkActive = true;
+    this.realtimeTalkStatus = "connecting";
+    this.realtimeTalkDetail = null;
+    this.realtimeTalkTranscript = null;
+    const session = new RealtimeTalkSession(this.client, this.sessionKey, {
+      onStatus: (status, detail) => {
+        this.realtimeTalkStatus = status;
+        this.realtimeTalkDetail = detail ?? null;
+        if (status === "idle" || status === "error") {
+          this.realtimeTalkActive = status !== "idle";
+        }
+      },
+      onTranscript: (entry) => {
+        this.realtimeTalkTranscript = `${entry.role === "user" ? "You" : "OpenClaw"}: ${entry.text}`;
+      },
+    });
+    this.realtimeTalkSession = session;
+    try {
+      await session.start();
+    } catch (error) {
+      session.stop();
+      if (this.realtimeTalkSession === session) {
+        this.realtimeTalkSession = null;
+      }
+      this.realtimeTalkActive = false;
+      this.realtimeTalkStatus = "error";
+      this.realtimeTalkDetail = error instanceof Error ? error.message : String(error);
+      this.lastError = this.realtimeTalkDetail;
+    }
+  }
+
   async steerQueuedChatMessage(id: string) {
     await steerQueuedChatMessageInternal(
       this as unknown as Parameters<typeof steerQueuedChatMessageInternal>[0],
diff --git a/ui/src/ui/chat/realtime-talk.ts b/ui/src/ui/chat/realtime-talk.ts
new file mode 100644
index 00000000000..bb980bf2775
--- /dev/null
+++ b/ui/src/ui/chat/realtime-talk.ts
@@ -0,0 +1,300 @@
+import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "../../../../src/realtime-voice/agent-consult-tool.js";
+import type { GatewayBrowserClient, GatewayEventFrame } from "../gateway.ts";
+import { generateUUID } from "../uuid.ts";
+
+export type RealtimeTalkStatus = "idle" | "connecting" | "listening" | "thinking" | "error";
+
+export type RealtimeTalkCallbacks = {
+  onStatus?: (status: RealtimeTalkStatus, detail?: string) => void;
+  onTranscript?: (entry: { role: "user" | "assistant"; text: string; final: boolean }) => void;
+};
+
+export type RealtimeTalkSessionResult = {
+  provider: string;
+  clientSecret: string;
+  model?: string;
+  voice?: string;
+  expiresAt?: number;
+};
+
+type RealtimeServerEvent = {
+  type?: string;
+  item_id?: string;
+  call_id?: string;
+  name?: string;
+  delta?: string;
+  transcript?: string;
+  arguments?: string;
+};
+
+type ToolBuffer = {
+  name: string;
+  callId: string;
+  args: string;
+};
+
+type ChatPayload = {
+  runId?: string;
+  state?: string;
+  errorMessage?: string;
+  message?: unknown;
+};
+
+function extractTextFromMessage(message: unknown): string {
+  if (!message || typeof message !== "object") {
+    return "";
+  }
+  const record = message as Record<string, unknown>;
+  if (typeof record.text === "string") {
+    return record.text;
+  }
+  const content = Array.isArray(record.content) ? record.content : [];
+  const parts = content
+    .map((block) => {
+      if (!block || typeof block !== "object") {
+        return "";
+      }
+      const entry = block as Record<string, unknown>;
+      return entry.type === "text" && typeof entry.text === "string" ? entry.text : "";
+    })
+    .filter(Boolean);
+  return parts.join("\n\n").trim();
+}
+
+function waitForChatResult(params: {
+  client: GatewayBrowserClient;
+  runId: string;
+  timeoutMs: number;
+}): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const timer = window.setTimeout(() => {
+      unsubscribe();
+      reject(new Error("OpenClaw tool call timed out"));
+    }, params.timeoutMs);
+    const unsubscribe = params.client.addEventListener((evt: GatewayEventFrame) => {
+      if (evt.event !== "chat") {
+        return;
+      }
+      const payload = evt.payload as ChatPayload | undefined;
+      if (!payload || payload.runId !== params.runId) {
+        return;
+      }
+      if (payload.state === "final") {
+        window.clearTimeout(timer);
+        unsubscribe();
+        resolve(extractTextFromMessage(payload.message) || "OpenClaw finished with no text.");
+      } else if (payload.state === "error") {
+        window.clearTimeout(timer);
+        unsubscribe();
+        reject(new Error(payload.errorMessage ?? "OpenClaw tool call failed"));
+      }
+    });
+  });
+}
+
+export class RealtimeTalkSession {
+  private peer: RTCPeerConnection | null = null;
+  private channel: RTCDataChannel | null = null;
+  private media: MediaStream | null = null;
+  private audio: HTMLAudioElement | null = null;
+  private closed = false;
+  private toolBuffers = new Map<string, ToolBuffer>();
+
+  constructor(
+    private readonly client: GatewayBrowserClient,
+    private readonly sessionKey: string,
+    private readonly callbacks: RealtimeTalkCallbacks = {},
+  ) {}
+
+  async start(): Promise<void> {
+    if (!navigator.mediaDevices?.getUserMedia || typeof RTCPeerConnection === "undefined") {
+      throw new Error("Realtime Talk requires browser WebRTC and microphone access");
+    }
+    this.closed = false;
+    this.callbacks.onStatus?.("connecting");
+    const session = await this.client.request<RealtimeTalkSessionResult>("talk.realtime.session", {
+      sessionKey: this.sessionKey,
+    });
+    this.peer = new RTCPeerConnection();
+    this.audio = document.createElement("audio");
+    this.audio.autoplay = true;
+    this.audio.style.display = "none";
+    document.body.append(this.audio);
+    this.peer.addEventListener("track", (event) => {
+      if (this.audio) {
+        this.audio.srcObject = event.streams[0];
+      }
+    });
+    this.media = await navigator.mediaDevices.getUserMedia({ audio: true });
+    for (const track of this.media.getAudioTracks()) {
+      this.peer.addTrack(track, this.media);
+    }
+    this.channel = this.peer.createDataChannel("oai-events");
+    this.channel.addEventListener("open", () => this.callbacks.onStatus?.("listening"));
+    this.channel.addEventListener("message", (event) => this.handleRealtimeEvent(event.data));
+    this.peer.addEventListener("connectionstatechange", () => {
+      if (this.closed) {
+        return;
+      }
+      if (this.peer?.connectionState === "failed" || this.peer?.connectionState === "closed") {
+        this.callbacks.onStatus?.("error", "Realtime connection closed");
+      }
+    });
+
+    const offer = await this.peer.createOffer();
+    await this.peer.setLocalDescription(offer);
+    const sdp = await fetch("https://api.openai.com/v1/realtime/calls", {
+      method: "POST",
+      body: offer.sdp,
+      headers: {
+        Authorization: `Bearer ${session.clientSecret}`,
+        "Content-Type": "application/sdp",
+      },
+    });
+    if (!sdp.ok) {
+      throw new Error(`Realtime WebRTC setup failed (${sdp.status})`);
+    }
+    await this.peer.setRemoteDescription({
+      type: "answer",
+      sdp: await sdp.text(),
+    });
+  }
+
+  stop(): void {
+    this.closed = true;
+    this.callbacks.onStatus?.("idle");
+    this.channel?.close();
+    this.channel = null;
+    this.peer?.close();
+    this.peer = null;
+    this.media?.getTracks().forEach((track) => track.stop());
+    this.media = null;
+    this.audio?.remove();
+    this.audio = null;
+    this.toolBuffers.clear();
+  }
+
+  private send(event: unknown): void {
+    if (this.channel?.readyState === "open") {
+      this.channel.send(JSON.stringify(event));
+    }
+  }
+
+  private handleRealtimeEvent(data: unknown): void {
+    let event: RealtimeServerEvent;
+    try {
+      event = JSON.parse(String(data)) as RealtimeServerEvent;
+    } catch {
+      return;
+    }
+    switch (event.type) {
+      case "conversation.item.input_audio_transcription.completed":
+        if (event.transcript) {
+          this.callbacks.onTranscript?.({ role: "user", text: event.transcript, final: true });
+        }
+        return;
+      case "response.audio_transcript.done":
+        if (event.transcript) {
+          this.callbacks.onTranscript?.({
+            role: "assistant",
+            text: event.transcript,
+            final: true,
+          });
+        }
+        return;
+      case "response.function_call_arguments.delta":
+        this.bufferToolDelta(event);
+        return;
+      case "response.function_call_arguments.done":
+        void this.handleToolCall(event);
+        return;
+      default:
+        return;
+    }
+  }
+
+  private bufferToolDelta(event: RealtimeServerEvent): void {
+    const key = event.item_id ?? "unknown";
+    const existing = this.toolBuffers.get(key);
+    if (existing) {
+      existing.args += event.delta ?? "";
+      return;
+    }
+    this.toolBuffers.set(key, {
+      name: event.name ?? "",
+      callId: event.call_id ?? "",
+      args: event.delta ?? "",
+    });
+  }
+
+  private async handleToolCall(event: RealtimeServerEvent): Promise<void> {
+    const key = event.item_id ?? "unknown";
+    const buffered = this.toolBuffers.get(key);
+    this.toolBuffers.delete(key);
+    const name = buffered?.name || event.name || "";
+    const callId = buffered?.callId || event.call_id || "";
+    if (name !== REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME || !callId) {
+      return;
+    }
+    this.callbacks.onStatus?.("thinking");
+    let question = "";
+    try {
+      const args = JSON.parse(buffered?.args || event.arguments || "{}") as {
+        question?: unknown;
+        context?: unknown;
+        responseStyle?: unknown;
+      };
+      question = typeof args.question === "string" ? args.question.trim() : "";
+      const context = typeof args.context === "string" ? args.context.trim() : "";
+      const responseStyle = typeof args.responseStyle === "string" ? args.responseStyle.trim() : "";
+      if (context || responseStyle) {
+        question = [
+          question,
+          context ? `Context:\n${context}` : undefined,
+          responseStyle ? `Spoken style:\n${responseStyle}` : undefined,
+        ]
+          .filter(Boolean)
+          .join("\n\n");
+      }
+    } catch {}
+    if (!question) {
+      this.submitToolResult(callId, {
+        error: `${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} requires a question`,
+      });
+      this.callbacks.onStatus?.("listening");
+      return;
+    }
+    try {
+      const idempotencyKey = generateUUID();
+      const response = await this.client.request<{ runId?: string }>("chat.send", {
+        sessionKey: this.sessionKey,
+        message: question,
+        idempotencyKey,
+      });
+      const result = await waitForChatResult({
+        client: this.client,
+        runId: response.runId ?? idempotencyKey,
+        timeoutMs: 120_000,
+      });
+      this.submitToolResult(callId, { result });
+    } catch (error) {
+      this.submitToolResult(callId, {
+        error: error instanceof Error ? error.message : String(error),
+      });
+    } finally {
+      this.callbacks.onStatus?.("listening");
+    }
+  }
+
+  private submitToolResult(callId: string, result: unknown): void {
+    this.send({
+      type: "conversation.item.create",
+      item: {
+        type: "function_call_output",
+        call_id: callId,
+        output: JSON.stringify(result),
+      },
+    });
+    this.send({ type: "response.create" });
+  }
+}
diff --git a/ui/src/ui/gateway.ts b/ui/src/ui/gateway.ts
index dc34e84aeff..9e3a4990281 100644
--- a/ui/src/ui/gateway.ts
+++ b/ui/src/ui/gateway.ts
@@ -223,6 +223,8 @@ export type GatewayBrowserClientOptions = {
   onGap?: (info: { expected: number; received: number }) => void;
 };
 
+export type GatewayEventListener = (evt: GatewayEventFrame) => void;
+
 // 4008 = application-defined code (browser rejects 1008 "Policy Violation")
 const CONNECT_FAILED_CLOSE_CODE = 4008;
 
@@ -298,6 +300,7 @@ export class GatewayBrowserClient {
   private pendingConnectError: GatewayErrorInfo | undefined;
   private pendingDeviceTokenRetry = false;
   private deviceTokenRetryBudgetUsed = false;
+  private eventListeners = new Set<GatewayEventListener>();
 
   constructor(private opts: GatewayBrowserClientOptions) {}
 
@@ -549,6 +552,9 @@ export class GatewayBrowserClient {
       }
       try {
         this.opts.onEvent?.(evt);
+        for (const listener of this.eventListeners) {
+          listener(evt);
+        }
       } catch (err) {
         console.error("[gateway] event handler error:", err);
       }
@@ -625,6 +631,13 @@ export class GatewayBrowserClient {
     return p;
   }
 
+  addEventListener(listener: GatewayEventListener): () => void {
+    this.eventListeners.add(listener);
+    return () => {
+      this.eventListeners.delete(listener);
+    };
+  }
+
   private queueConnect() {
     this.connectNonce = null;
     this.connectSent = false;
diff --git a/ui/src/ui/views/chat.ts b/ui/src/ui/views/chat.ts
index 896b5254239..ae8621787f3 100644
--- a/ui/src/ui/views/chat.ts
+++ b/ui/src/ui/views/chat.ts
@@ -18,6 +18,7 @@ import {
 import { InputHistory } from "../chat/input-history.ts";
 import { PinnedMessages } from "../chat/pinned-messages.ts";
 import { getPinnedMessageSummary } from "../chat/pinned-summary.ts";
+import type { RealtimeTalkStatus } from "../chat/realtime-talk.ts";
 import { renderChatRunControls } from "../chat/run-controls.ts";
 import { getOrCreateSessionCacheValue } from "../chat/session-cache.ts";
 import { renderSideResult } from "../chat/side-result-render.ts";
@@ -65,6 +66,10 @@ export type ChatProps = {
   assistantAvatarUrl?: string | null;
   draft: string;
   queue: ChatQueueItem[];
+  realtimeTalkActive?: boolean;
+  realtimeTalkStatus?: RealtimeTalkStatus;
+  realtimeTalkDetail?: string | null;
+  realtimeTalkTranscript?: string | null;
   connected: boolean;
   canSend: boolean;
   disabledReason: string | null;
@@ -95,6 +100,7 @@ export type ChatProps = {
   onDraftChange: (next: string) => void;
   onRequestUpdate?: () => void;
   onSend: () => void;
+  onToggleRealtimeTalk?: () => void;
   onAbort?: () => void;
   onQueueRemove: (id: string) => void;
   onQueueSteer?: (id: string) => void;
@@ -1207,6 +1213,19 @@ export function renderChat(props: ChatProps) {
         ${vs.sttRecording && vs.sttInterimText
           ? html`<div class="agent-chat__stt-interim">${vs.sttInterimText}</div>`
           : nothing}
+        ${props.realtimeTalkActive || props.realtimeTalkDetail || props.realtimeTalkTranscript
+          ? html`
+              <div class="agent-chat__stt-interim agent-chat__talk-status">
+                ${props.realtimeTalkDetail ??
+                props.realtimeTalkTranscript ??
+                (props.realtimeTalkStatus === "thinking"
+                  ? "Asking OpenClaw..."
+                  : props.realtimeTalkStatus === "connecting"
+                    ? "Connecting Talk..."
+                    : "Talk live")}
+              </div>
+            `
+          : nothing}
 
         <textarea
           ${ref((el) => el && adjustTextareaHeight(el as HTMLTextAreaElement))}
@@ -1288,6 +1307,21 @@ export function renderChat(props: ChatProps) {
                   </button>
                 `
               : nothing}
+            ${props.onToggleRealtimeTalk
+              ? html`
+                  <button
+                    class="agent-chat__input-btn ${props.realtimeTalkActive
+                      ? "agent-chat__input-btn--talk"
+                      : ""}"
+                    @click=${props.onToggleRealtimeTalk}
+                    title=${props.realtimeTalkActive ? "Stop Talk" : "Start Talk"}
+                    aria-label=${props.realtimeTalkActive ? "Stop Talk" : "Start Talk"}
+                    ?disabled=${!props.connected}
+                  >
+                    ${props.realtimeTalkActive ? icons.volume2 : icons.radio}
+                  </button>
+                `
+              : nothing}
             ${tokens ? html`<span class="agent-chat__token-count">${tokens}</span>` : nothing}
           </div>