From cfb0c34ff6bda189defa3152e8dce15d93cb1afb Mon Sep 17 00:00:00 2001
From: VACInc <3279061+VACInc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 01:30:49 -0400
Subject: [PATCH] feat: add realtime consult overrides

---
 docs/gateway/config-agents.md                |  4 ++
 docs/gateway/configuration-reference.md      |  2 +
 docs/nodes/talk.md                           |  2 +
 docs/plugins/voice-call.md                   |  2 +
 extensions/voice-call/README.md              |  2 +
 extensions/voice-call/openclaw.plugin.json   | 17 ++++++
 extensions/voice-call/src/config.test.ts     | 28 ++++++++++
 extensions/voice-call/src/config.ts          | 22 ++++++++
 extensions/voice-call/src/runtime.test.ts    | 58 ++++++++++++++++++++
 extensions/voice-call/src/runtime.ts         | 13 +++--
 src/auto-reply/get-reply-options.types.ts    |  4 ++
 src/auto-reply/reply/get-reply-directives.ts | 13 ++++-
 src/config/schema.help.quality.test.ts       |  2 +
 src/config/schema.help.ts                    |  4 ++
 src/config/schema.labels.ts                  |  2 +
 src/config/talk.normalize.test.ts            |  4 ++
 src/config/talk.ts                           | 23 +++++++-
 src/config/types.gateway.ts                  | 12 ++++
 src/config/zod-schema.talk.test.ts           | 12 ++++
 src/config/zod-schema.ts                     |  4 ++
 src/gateway/protocol/schema/channels.ts      |  2 +
 src/gateway/protocol/schema/logs-chat.ts     |  1 +
 src/gateway/server-methods/chat.ts           |  3 +
 src/gateway/server-methods/talk-client.ts    |  8 +++
 src/gateway/server-methods/talk.test.ts      | 40 ++++++++++++++
 src/talk/agent-consult-runtime.test.ts       |  2 +
 src/talk/agent-consult-runtime.ts            |  2 +
 ui/src/ui/chat/realtime-talk-shared.ts       | 10 ++++
 ui/src/ui/chat/realtime-talk.ts              |  2 +
 ui/src/ui/realtime-talk-consult.test.ts      | 54 ++++++++++++++++++
 30 files changed, 346 insertions(+), 8 deletions(-)
 create mode 100644 ui/src/ui/realtime-talk-consult.test.ts

diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md
index 11a304df8c3..bbdea0635fb 100644
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -1378,6 +1378,8 @@ Defaults for Talk mode (macOS/iOS/Android).
       },
       system: {},
     },
+    consultThinkingLevel: "low",
+    consultFastMode: true,
     speechLocale: "ru-RU",
     silenceTimeoutMs: 1500,
     interruptOnSpeech: true,
@@ -1405,6 +1407,8 @@ Defaults for Talk mode (macOS/iOS/Android).
 - `providers.*.voiceAliases` lets Talk directives use friendly names.
 - `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`.
 - macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development.
+- `consultThinkingLevel` controls the thinking level for the full OpenClaw agent run behind Control UI Talk realtime `openclaw_agent_consult` calls. Leave unset to preserve normal session/model behavior.
+- `consultFastMode` sets a one-shot fast-mode override for Control UI Talk realtime consults without changing the session's normal fast-mode setting.
 - `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default.
 - `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).
 
diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md
index e5d869b3a05..3d696f3f783 100644
--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -48,6 +48,8 @@ Moved to a dedicated page - see
 - `session.*` (session lifecycle, compaction, pruning)
 - `messages.*` (message delivery, TTS, markdown rendering)
 - `talk.*` (Talk mode)
+  - `talk.consultThinkingLevel`: thinking level override for the full OpenClaw agent run behind Control UI Talk realtime consults
+  - `talk.consultFastMode`: one-shot fast-mode override for Control UI Talk realtime consults
   - `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS
   - `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)
 
diff --git a/docs/nodes/talk.md b/docs/nodes/talk.md
index 4fa1f65cd6f..3280233fb9e 100644
--- a/docs/nodes/talk.md
+++ b/docs/nodes/talk.md
@@ -102,6 +102,8 @@ Defaults:
 - `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset.
 - `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset.
 - `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available).
+- `consultThinkingLevel`: optional thinking level override for the full OpenClaw agent run behind realtime `openclaw_agent_consult` calls.
+- `consultFastMode`: optional fast-mode override for realtime `openclaw_agent_consult` calls.
 - `realtime.provider`: selects the active browser/server realtime voice provider. Use `openai` for WebRTC, `google` for provider WebSocket, or a bridge-only provider through Gateway relay.
 - `realtime.providers.<provider>` stores provider-owned realtime config. The browser receives only ephemeral or constrained session credentials, never a standard API key.
 - `realtime.providers.openai.voice`: built-in OpenAI Realtime voice id. Current `gpt-realtime-2` voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`; `marin` and `cedar` are recommended for best quality.
diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md
index 5d65eeaf69d..eaeceebea2f 100644
--- a/docs/plugins/voice-call.md
+++ b/docs/plugins/voice-call.md
@@ -316,6 +316,8 @@ for tool work, current information, memory lookups, or workspace state.
                 instructions: "Speak briefly. Call openclaw_agent_consult before using deeper tools.",
                 toolPolicy: "safe-read-only",
                 consultPolicy: "substantive",
+                consultThinkingLevel: "low",
+                consultFastMode: true,
                 agentContext: { enabled: true },
                 providers: {
                   google: {
diff --git a/extensions/voice-call/README.md b/extensions/voice-call/README.md
index 1832a0169c7..5c7b03f2e73 100644
--- a/extensions/voice-call/README.md
+++ b/extensions/voice-call/README.md
@@ -106,6 +106,8 @@ Notes:
 - advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
 - `responseModel` is optional. When unset, voice responses use the runtime default model.
 - `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
+- `realtime.consultThinkingLevel` is optional. When set, it overrides the thinking level used by the model behind realtime `openclaw_agent_consult` calls.
+- `realtime.consultFastMode` is optional. When set, it toggles fast mode for realtime `openclaw_agent_consult` calls.
 
 ## Stale call reaper
 
diff --git a/extensions/voice-call/openclaw.plugin.json b/extensions/voice-call/openclaw.plugin.json
index 5abc35329d5..7a4c4a80885 100644
--- a/extensions/voice-call/openclaw.plugin.json
+++ b/extensions/voice-call/openclaw.plugin.json
@@ -153,6 +153,16 @@
       "help": "Guides when the realtime voice model should call openclaw_agent_consult.",
       "advanced": true
     },
+    "realtime.consultThinkingLevel": {
+      "label": "Consult Thinking Level",
+      "help": "Optional override for the regular agent run behind realtime openclaw_agent_consult calls.",
+      "advanced": true
+    },
+    "realtime.consultFastMode": {
+      "label": "Consult Fast Mode",
+      "help": "Optional fast mode override for the regular agent run behind realtime openclaw_agent_consult calls.",
+      "advanced": true
+    },
     "realtime.fastContext.enabled": {
       "label": "Enable Fast Realtime Context",
       "help": "Searches memory/session context before the full consult agent.",
@@ -515,6 +525,13 @@
             "type": "string",
             "enum": ["auto", "substantive", "always"]
           },
+          "consultThinkingLevel": {
+            "type": "string",
+            "enum": ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]
+          },
+          "consultFastMode": {
+            "type": "boolean"
+          },
           "tools": {
             "type": "array",
             "items": {
diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts
index 8fcb0f4e332..c09092ce573 100644
--- a/extensions/voice-call/src/config.test.ts
+++ b/extensions/voice-call/src/config.test.ts
@@ -396,6 +396,8 @@ describe("normalizeVoiceCallConfig", () => {
       sources: ["memory", "sessions"],
       fallbackToConsult: false,
     });
+    expect(normalized.realtime.consultThinkingLevel).toBeUndefined();
+    expect(normalized.realtime.consultFastMode).toBeUndefined();
     expect(normalized.realtime.agentContext).toEqual({
       enabled: false,
       maxChars: 6000,
@@ -468,6 +470,32 @@ describe("resolveVoiceCallConfig realtime settings", () => {
     expect(resolved.realtime.provider).toBeUndefined();
   });
 
+  it("preserves configured realtime consult overrides", () => {
+    const resolved = resolveVoiceCallConfig({
+      enabled: true,
+      provider: "mock",
+      realtime: {
+        consultThinkingLevel: "low",
+        consultFastMode: true,
+      },
+    });
+
+    expect(resolved.realtime.consultThinkingLevel).toBe("low");
+    expect(resolved.realtime.consultFastMode).toBe(true);
+  });
+
+  it("rejects invalid realtime consult thinking levels", () => {
+    expect(() =>
+      resolveVoiceCallConfig({
+        enabled: true,
+        provider: "mock",
+        realtime: {
+          consultThinkingLevel: "turbo",
+        },
+      } as never),
+    ).toThrow(/Invalid option/);
+  });
+
   it("leaves responseModel unset so voice responses can inherit runtime defaults", () => {
     const resolved = resolveVoiceCallConfig({
       enabled: true,
diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts
index ec52ba52575..146f822840b 100644
--- a/extensions/voice-call/src/config.ts
+++ b/extensions/voice-call/src/config.ts
@@ -287,6 +287,20 @@ export type VoiceCallRealtimeAgentContextConfig = z.infer<
   typeof VoiceCallRealtimeAgentContextConfigSchema
 >;
 
+export const VoiceCallRealtimeConsultThinkingLevelSchema = z.enum([
+  "off",
+  "minimal",
+  "low",
+  "medium",
+  "high",
+  "xhigh",
+  "adaptive",
+  "max",
+]);
+export type VoiceCallRealtimeConsultThinkingLevel = z.infer<
+  typeof VoiceCallRealtimeConsultThinkingLevelSchema
+>;
+
 const VoiceCallStreamingProvidersConfigSchema = z
   .record(z.string(), z.record(z.string(), z.unknown()))
   .default({});
@@ -305,6 +319,10 @@ const VoiceCallRealtimeConfigSchema = z
     toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
     /** Guidance for when the realtime model should call the OpenClaw agent consult tool. */
     consultPolicy: VoiceCallRealtimeConsultPolicySchema.default("auto"),
+    /** Optional thinking level override for the regular agent behind realtime consults. */
+    consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional(),
+    /** Optional fast mode override for the regular agent behind realtime consults. */
+    consultFastMode: z.boolean().optional(),
     /** Tool definitions exposed to the realtime provider. */
     tools: z.array(RealtimeToolSchema).default([]),
     /** Low-latency memory/session context for the consult tool. */
@@ -686,6 +704,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
         defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
       tools:
         (config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
+      consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional().parse(
+        config.realtime?.consultThinkingLevel ?? defaults.realtime.consultThinkingLevel,
+      ),
+      consultFastMode: config.realtime?.consultFastMode ?? defaults.realtime.consultFastMode,
       fastContext: realtimeFastContext,
       agentContext: realtimeAgentContext,
       providers: realtimeProviders,
diff --git a/extensions/voice-call/src/runtime.test.ts b/extensions/voice-call/src/runtime.test.ts
index a2973c96788..2af3778e933 100644
--- a/extensions/voice-call/src/runtime.test.ts
+++ b/extensions/voice-call/src/runtime.test.ts
@@ -537,4 +537,62 @@ describe("createVoiceCallRuntime lifecycle", () => {
     });
     expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
   });
+
+  it("uses the configured realtime consult thinking level when set", async () => {
+    const config = createBaseConfig();
+    config.inboundPolicy = "allowlist";
+    config.realtime.enabled = true;
+    config.realtime.consultThinkingLevel = "low";
+    config.realtime.consultFastMode = true;
+    const sessionStore: Record<string, unknown> = {};
+    const runEmbeddedPiAgent = vi.fn(async () => ({
+      payloads: [{ text: "Done." }],
+      meta: {},
+    }));
+    const agentRuntime = {
+      defaults: { provider: "openai", model: "gpt-5.4" },
+      resolveAgentDir: vi.fn(() => "/tmp/agent"),
+      resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
+      resolveAgentIdentity: vi.fn(),
+      resolveThinkingDefault: vi.fn(() => "high"),
+      resolveAgentTimeoutMs: vi.fn(() => 30_000),
+      ensureAgentWorkspace: vi.fn(async () => {}),
+      session: {
+        resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
+        loadSessionStore: vi.fn(() => sessionStore),
+        saveSessionStore: vi.fn(async () => {}),
+        updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore)),
+        resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
+      },
+      runEmbeddedPiAgent,
+    };
+    mocks.managerGetCall.mockReturnValue({
+      callId: "call-1",
+      direction: "outbound",
+      from: "+15550001234",
+      to: "+15550009999",
+      transcript: [],
+    });
+
+    await createVoiceCallRuntime({
+      config,
+      coreConfig: {} as CoreConfig,
+      agentRuntime: agentRuntime as never,
+    });
+
+    const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
+      | ((args: unknown, callId: string) => Promise<unknown>)
+      | undefined;
+    await expect(handler?.({ question: "Turn on the lights." }, "call-1")).resolves.toEqual({
+      text: "Done.",
+    });
+
+    expect(agentRuntime.resolveThinkingDefault).not.toHaveBeenCalled();
+    expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
+      expect.objectContaining({
+        thinkLevel: "low",
+        fastMode: true,
+      }),
+    );
+  });
 });
diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts
index 9484f1b8dfd..1a1557843ad 100644
--- a/extensions/voice-call/src/runtime.ts
+++ b/extensions/voice-call/src/runtime.ts
@@ -372,11 +372,13 @@ export async function createVoiceCallRuntime(params: {
             voiceConfig: effectiveConfig,
             agentRuntime,
           });
-          const thinkLevel = agentRuntime.resolveThinkingDefault({
-            cfg,
-            provider: agentProvider,
-            model,
-          });
+          const thinkLevel =
+            effectiveConfig.realtime.consultThinkingLevel ??
+            agentRuntime.resolveThinkingDefault({
+              cfg,
+              provider: agentProvider,
+              model,
+            });
           return await consultRealtimeVoiceAgent({
             cfg,
             agentRuntime,
@@ -395,6 +397,7 @@ export async function createVoiceCallRuntime(params: {
             provider: agentProvider,
             model,
             thinkLevel,
+            fastMode: effectiveConfig.realtime.consultFastMode,
             timeoutMs: effectiveConfig.responseTimeoutMs,
             spawnedBy: requesterSessionKey,
             contextMode: requesterSessionKey ? "fork" : undefined,
diff --git a/src/auto-reply/get-reply-options.types.ts b/src/auto-reply/get-reply-options.types.ts
index e9219bbc1f4..625d70f6504 100644
--- a/src/auto-reply/get-reply-options.types.ts
+++ b/src/auto-reply/get-reply-options.types.ts
@@ -53,6 +53,10 @@ export type GetReplyOptions = {
   suppressTyping?: boolean;
   /** Resolved heartbeat model override (provider/model string from merged per-agent config). */
   heartbeatModelOverride?: string;
+  /** One-shot thinking level override for this run; does not persist to the session. */
+  thinkingLevelOverride?: string;
+  /** One-shot fast-mode override for this run; does not persist to the session. */
+  fastModeOverride?: boolean;
   /** Controls bootstrap workspace context injection (default: full). */
   bootstrapContextMode?: "full" | "lightweight";
   /** If true, suppress tool error warning payloads for this run. */
diff --git a/src/auto-reply/reply/get-reply-directives.ts b/src/auto-reply/reply/get-reply-directives.ts
index 593383feb58..0336a6dc384 100644
--- a/src/auto-reply/reply/get-reply-directives.ts
+++ b/src/auto-reply/reply/get-reply-directives.ts
@@ -14,7 +14,13 @@ import {
 } from "../../shared/string-coerce.js";
 import { shouldHandleTextCommands } from "../commands-text-routing.js";
 import type { MsgContext, TemplateContext } from "../templating.js";
-import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
+import {
+  normalizeThinkLevel,
+  type ElevatedLevel,
+  type ReasoningLevel,
+  type ThinkLevel,
+  type VerboseLevel,
+} from "../thinking.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
 import { resolveBlockStreamingChunking } from "./block-streaming.js";
 import { buildCommandContext } from "./commands-context.js";
@@ -417,8 +423,11 @@ export async function resolveReplyDirectives(params: {
   });
   const defaultActivation = defaultGroupActivation(requireMention);
   const resolvedThinkLevel =
-    directives.thinkLevel ?? (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
+    normalizeThinkLevel(opts?.thinkingLevelOverride) ??
+    directives.thinkLevel ??
+    (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
   const resolvedFastMode =
+    opts?.fastModeOverride ??
     directives.fastMode ??
     resolveFastModeState({
       cfg,
diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts
index 2efbac67d78..fc1641631f7 100644
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -324,8 +324,10 @@ const TARGET_KEYS = [
   "discovery.mdns.mode",
   "gateway.controlUi.embedSandbox",
   "talk",
+  "talk.consultFastMode",
   "talk.interruptOnSpeech",
   "talk.silenceTimeoutMs",
+  "talk.consultThinkingLevel",
   "meta",
   "env",
   "env.shellEnv",
diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts
index a8139ab2916..9b4bad5c420 100644
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -167,6 +167,10 @@ export const FIELD_HELP: Record<string, string> = {
     "Talk byte/session transport: webrtc, provider-websocket, gateway-relay, or managed-room.",
   "talk.realtime.brain":
     "Talk reasoning strategy: agent-consult for Gateway-mediated agent help, direct-tools for owner-only tool calls, or none.",
+  "talk.consultThinkingLevel":
+    "Use this to override the thinking level for the regular agent run behind Talk realtime consults.",
+  "talk.consultFastMode":
+    "Use this to set true or false fast mode for the regular agent run behind Talk realtime consults.",
   "talk.speechLocale":
     'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
   "talk.interruptOnSpeech":
diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts
index 6a24d598275..77da70839e4 100644
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -841,6 +841,8 @@ export const FIELD_LABELS: Record<string, string> = {
   "talk.speechLocale": "Talk Speech Locale",
   "talk.interruptOnSpeech": "Talk Interrupt on Speech",
   "talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
+  "talk.consultThinkingLevel": "Talk Consult Thinking Level",
+  "talk.consultFastMode": "Talk Consult Fast Mode",
   messages: "Messages",
   "messages.messagePrefix": "Inbound Message Prefix",
   "messages.visibleReplies": "Visible Replies",
diff --git a/src/config/talk.normalize.test.ts b/src/config/talk.normalize.test.ts
index a2e7220469b..77ef5873128 100644
--- a/src/config/talk.normalize.test.ts
+++ b/src/config/talk.normalize.test.ts
@@ -10,6 +10,8 @@ describe("talk normalization", () => {
       modelId: "eleven_v3",
       outputFormat: "pcm_44100",
       apiKey: "secret-key", // pragma: allowlist secret
+      consultThinkingLevel: " low ",
+      consultFastMode: true,
       speechLocale: " ru-RU ",
       interruptOnSpeech: false,
       silenceTimeoutMs: 1500,
@@ -17,6 +19,8 @@ describe("talk normalization", () => {
 
     expect(normalized).toEqual({
       speechLocale: "ru-RU",
+      consultThinkingLevel: "low",
+      consultFastMode: true,
       interruptOnSpeech: false,
       silenceTimeoutMs: 1500,
     });
diff --git a/src/config/talk.ts b/src/config/talk.ts
index fd5c71643c1..8ef63e4e669 100644
--- a/src/config/talk.ts
+++ b/src/config/talk.ts
@@ -1,4 +1,5 @@
-import { normalizeOptionalString } from "../shared/string-coerce.js";
+import { normalizeThinkLevel } from "../auto-reply/thinking.js";
+import { normalizeFastMode, normalizeOptionalString } from "../shared/string-coerce.js";
 import { isRecord } from "../utils.js";
 import type {
   ResolvedTalkConfig,
@@ -157,6 +158,20 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
   if (typeof source.interruptOnSpeech === "boolean") {
     normalized.interruptOnSpeech = source.interruptOnSpeech;
   }
+  const consultThinkingLevel = normalizeThinkLevel(
+    normalizeOptionalString(source.consultThinkingLevel),
+  );
+  if (consultThinkingLevel) {
+    normalized.consultThinkingLevel = consultThinkingLevel;
+  }
+  const rawConsultFastMode = source.consultFastMode;
+  const consultFastMode =
+    typeof rawConsultFastMode === "boolean" || typeof rawConsultFastMode === "string"
+      ? normalizeFastMode(rawConsultFastMode)
+      : undefined;
+  if (consultFastMode !== undefined) {
+    normalized.consultFastMode = consultFastMode;
+  }
   const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
   if (silenceTimeoutMs !== undefined) {
     normalized.silenceTimeoutMs = silenceTimeoutMs;
@@ -225,6 +240,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un
   if (typeof normalized?.silenceTimeoutMs === "number") {
     payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
   }
+  if (typeof normalized?.consultThinkingLevel === "string") {
+    payload.consultThinkingLevel = normalized.consultThinkingLevel;
+  }
+  if (typeof normalized?.consultFastMode === "boolean") {
+    payload.consultFastMode = normalized.consultFastMode;
+  }
   if (typeof normalized?.speechLocale === "string") {
     payload.speechLocale = normalized.speechLocale;
   }
diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts
index fdbc89e97a5..baf87dba364 100644
--- a/src/config/types.gateway.ts
+++ b/src/config/types.gateway.ts
@@ -76,6 +76,18 @@ export type TalkConfig = {
   providers?: Record<string, TalkProviderConfig>;
   /** Realtime Talk provider, model, voice, mode, transport, and brain config. */
   realtime?: TalkRealtimeConfig;
+  /** Optional thinking level override for the agent run behind Talk realtime consults. */
+  consultThinkingLevel?:
+    | "off"
+    | "minimal"
+    | "low"
+    | "medium"
+    | "high"
+    | "xhigh"
+    | "adaptive"
+    | "max";
+  /** Optional fast mode override for the agent run behind Talk realtime consults. */
+  consultFastMode?: boolean;
   /** BCP 47 locale id used for Talk speech recognition on device nodes. */
   speechLocale?: string;
   /** Stop speaking when user starts talking (default: true). */
diff --git a/src/config/zod-schema.talk.test.ts b/src/config/zod-schema.talk.test.ts
index 111394283e8..d207319ef40 100644
--- a/src/config/zod-schema.talk.test.ts
+++ b/src/config/zod-schema.talk.test.ts
@@ -6,12 +6,24 @@ describe("OpenClawSchema talk validation", () => {
     expect(
       OpenClawSchema.safeParse({
         talk: {
+          consultThinkingLevel: "low",
+          consultFastMode: true,
           silenceTimeoutMs: 1500,
         },
       }),
     ).toMatchObject({ success: true });
   });
 
+  it("rejects invalid talk.consultThinkingLevel", () => {
+    expect(() =>
+      OpenClawSchema.parse({
+        talk: {
+          consultThinkingLevel: "turbo",
+        },
+      }),
+    ).toThrow(/consultThinkingLevel/i);
+  });
+
   it.each([
     ["boolean", true],
     ["string", "1500"],
diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts
index d39ca664192..ce528fdf4aa 100644
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -268,6 +268,10 @@ const TalkSchema = z
     provider: z.string().optional(),
     providers: z.record(z.string(), TalkProviderEntrySchema).optional(),
     realtime: TalkRealtimeSchema.optional(),
+    consultThinkingLevel: z
+      .enum(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"])
+      .optional(),
+    consultFastMode: z.boolean().optional(),
     speechLocale: z.string().optional(),
     interruptOnSpeech: z.boolean().optional(),
     silenceTimeoutMs: z.number().int().positive().optional(),
diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts
index caca4028528..db2144abbbc 100644
--- a/src/gateway/protocol/schema/channels.ts
+++ b/src/gateway/protocol/schema/channels.ts
@@ -514,6 +514,8 @@ const TalkConfigSchema = Type.Object(
     providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
     realtime: Type.Optional(TalkRealtimeConfigSchema),
     resolved: Type.Optional(ResolvedTalkConfigSchema),
+    consultThinkingLevel: Type.Optional(Type.String()),
+    consultFastMode: Type.Optional(Type.Boolean()),
     speechLocale: Type.Optional(Type.String()),
     interruptOnSpeech: Type.Optional(Type.Boolean()),
     silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
diff --git a/src/gateway/protocol/schema/logs-chat.ts b/src/gateway/protocol/schema/logs-chat.ts
index 01468e0c230..df934fdcb8c 100644
--- a/src/gateway/protocol/schema/logs-chat.ts
+++ b/src/gateway/protocol/schema/logs-chat.ts
@@ -38,6 +38,7 @@ export const ChatSendParamsSchema = Type.Object(
     sessionId: Type.Optional(NonEmptyString),
     message: Type.String(),
     thinking: Type.Optional(Type.String()),
+    fastMode: Type.Optional(Type.Boolean()),
     deliver: Type.Optional(Type.Boolean()),
     originatingChannel: Type.Optional(Type.String()),
     originatingTo: Type.Optional(Type.String()),
diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts
index 8ff12ec5452..cd49bd6846c 100644
--- a/src/gateway/server-methods/chat.ts
+++ b/src/gateway/server-methods/chat.ts
@@ -1904,6 +1904,7 @@ export const chatHandlers: GatewayRequestHandlers = {
       sessionId?: string;
       message: string;
       thinking?: string;
+      fastMode?: boolean;
       deliver?: boolean;
       originatingChannel?: string;
       originatingTo?: string;
@@ -2503,6 +2504,8 @@ export const chatHandlers: GatewayRequestHandlers = {
               abortSignal: activeRunAbort.controller.signal,
               images: parsedImages.length > 0 ? parsedImages : undefined,
               imageOrder: imageOrder.length > 0 ? imageOrder : undefined,
+              thinkingLevelOverride: p.thinking,
+              fastModeOverride: p.fastMode,
               onAgentRunStart: (runId) => {
                 agentRunStarted = true;
                 if (!hasBeforeAgentRunGate) {
diff --git a/src/gateway/server-methods/talk-client.ts b/src/gateway/server-methods/talk-client.ts
index 4d80bbcc400..ba19d4f4ad9 100644
--- a/src/gateway/server-methods/talk-client.ts
+++ b/src/gateway/server-methods/talk-client.ts
@@ -1,4 +1,5 @@
 import { randomUUID } from "node:crypto";
+import { normalizeTalkSection } from "../../config/talk.js";
 import {
   normalizeOptionalLowercaseString,
   normalizeOptionalString,
@@ -45,6 +46,7 @@ async function startRealtimeToolCallAgentConsult(params: {
     return { ok: false, error: errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err)) };
   }
   const idempotencyKey = `talk-${params.callId}-${randomUUID()}`;
+  const normalizedTalk = normalizeTalkSection(params.request.context.getRuntimeConfig().talk);
   let chatResponse: { ok: true; result: unknown } | { ok: false; error: ErrorShape } | undefined;
   await chatHandlers["chat.send"]({
     ...params.request,
@@ -57,6 +59,12 @@ async function startRealtimeToolCallAgentConsult(params: {
       sessionKey: params.sessionKey,
       message,
       idempotencyKey,
+      ...(normalizedTalk?.consultThinkingLevel
+        ? { thinking: normalizedTalk.consultThinkingLevel }
+        : {}),
+      ...(typeof normalizedTalk?.consultFastMode === "boolean"
+        ? { fastMode: normalizedTalk.consultFastMode }
+        : {}),
     },
     respond: (ok: boolean, result?: unknown, error?: ErrorShape) => {
       chatResponse = ok
diff --git a/src/gateway/server-methods/talk.test.ts b/src/gateway/server-methods/talk.test.ts
index 3e4059ce838..5e9b6153177 100644
--- a/src/gateway/server-methods/talk.test.ts
+++ b/src/gateway/server-methods/talk.test.ts
@@ -1088,6 +1088,46 @@ describe("talk.client.toolCall handler", () => {
     );
   });
 
+  it("passes configured consult thinking and fast-mode overrides to chat.send", async () => {
+    const respond = vi.fn();
+
+    await talkHandlers["talk.client.toolCall"]({
+      req: { type: "req", id: "1", method: "talk.client.toolCall" },
+      params: {
+        sessionKey: "main",
+        callId: "call-1",
+        name: "openclaw_agent_consult",
+        args: { question: "Are the basement lights off?" },
+      },
+      client: { connId: "conn-1" } as never,
+      isWebchatConnect: () => false,
+      respond: respond as never,
+      context: {
+        getRuntimeConfig: () =>
+          ({
+            talk: {
+              consultThinkingLevel: "low",
+              consultFastMode: true,
+            },
+          }) as OpenClawConfig,
+      } as never,
+    });
+
+    expect(mocks.chatSend).toHaveBeenCalledWith(
+      expect.objectContaining({
+        params: expect.objectContaining({
+          thinking: "low",
+          fastMode: true,
+        }),
+      }),
+    );
+    expect(respond).toHaveBeenCalledWith(
+      true,
+      expect.objectContaining({ runId: "run-voice-1" }),
+      undefined,
+    );
+  });
+
   it("links relay-owned agent consult runs so relay cancellation can abort them", async () => {
     const respond = vi.fn();
 
diff --git a/src/talk/agent-consult-runtime.test.ts b/src/talk/agent-consult-runtime.test.ts
index 2017115f35b..bd728851ab8 100644
--- a/src/talk/agent-consult-runtime.test.ts
+++ b/src/talk/agent-consult-runtime.test.ts
@@ -127,6 +127,7 @@ describe("realtime voice agent consult runtime", () => {
       provider: "openai",
       model: "gpt-5.4",
       thinkLevel: "high",
+      fastMode: true,
       timeoutMs: 10_000,
     });
 
@@ -149,6 +150,7 @@ describe("realtime voice agent consult runtime", () => {
     expect(call.provider).toBe("openai");
     expect(call.model).toBe("gpt-5.4");
     expect(call.thinkLevel).toBe("high");
+    expect(call.fastMode).toBe(true);
     expect(call.timeoutMs).toBe(10_000);
     expect(call.prompt).toContain("Caller: Can you check this?");
     expect(call.extraSystemPrompt).toContain("delegated requests");
diff --git a/src/talk/agent-consult-runtime.ts b/src/talk/agent-consult-runtime.ts
index 76bac88b4c5..dbf416e0094 100644
--- a/src/talk/agent-consult-runtime.ts
+++ b/src/talk/agent-consult-runtime.ts
@@ -199,6 +199,7 @@ export async function consultRealtimeVoiceAgent(params: {
   provider?: RunEmbeddedPiAgentParams["provider"];
   model?: RunEmbeddedPiAgentParams["model"];
   thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"];
+  fastMode?: RunEmbeddedPiAgentParams["fastMode"];
   timeoutMs?: number;
   toolsAllow?: string[];
   extraSystemPrompt?: string;
@@ -264,6 +265,7 @@ export async function consultRealtimeVoiceAgent(params: {
     provider: params.provider,
     model: params.model,
     thinkLevel: params.thinkLevel ?? "high",
+    fastMode: params.fastMode,
     verboseLevel: "off",
     reasoningLevel: "off",
     toolResultFormat: "plain",
diff --git a/ui/src/ui/chat/realtime-talk-shared.ts b/ui/src/ui/chat/realtime-talk-shared.ts
index 4b46c5f303b..0bf65c08a2e 100644
--- a/ui/src/ui/chat/realtime-talk-shared.ts
+++ b/ui/src/ui/chat/realtime-talk-shared.ts
@@ -38,6 +38,8 @@ export type RealtimeTalkWebRtcSdpSessionResult = {
   model?: string;
   voice?: string;
   expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };
 
 export type RealtimeTalkJsonPcmWebSocketSessionResult = {
@@ -51,6 +53,8 @@ export type RealtimeTalkJsonPcmWebSocketSessionResult = {
   model?: string;
   voice?: string;
   expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };
 
 export type RealtimeTalkGatewayRelaySessionResult = {
@@ -61,6 +65,8 @@ export type RealtimeTalkGatewayRelaySessionResult = {
   model?: string;
   voice?: string;
   expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };
 
 export type RealtimeTalkManagedRoomSessionResult = {
@@ -71,6 +77,8 @@ export type RealtimeTalkManagedRoomSessionResult = {
   model?: string;
   voice?: string;
   expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };
 
 export type RealtimeTalkSessionResult =
@@ -88,6 +96,8 @@ export type RealtimeTalkTransportContext = {
   client: GatewayBrowserClient;
   sessionKey: string;
   callbacks: RealtimeTalkCallbacks;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };
 
 export function createRealtimeTalkEventEmitter(
diff --git a/ui/src/ui/chat/realtime-talk.ts b/ui/src/ui/chat/realtime-talk.ts
index 841f074546a..acc92906c65 100644
--- a/ui/src/ui/chat/realtime-talk.ts
+++ b/ui/src/ui/chat/realtime-talk.ts
@@ -74,6 +74,8 @@ export class RealtimeTalkSession {
       client: this.client,
       sessionKey: this.sessionKey,
       callbacks: this.callbacks,
+      consultThinkingLevel: session.consultThinkingLevel,
+      consultFastMode: session.consultFastMode,
     });
     await this.transport.start();
   }
diff --git a/ui/src/ui/realtime-talk-consult.test.ts b/ui/src/ui/realtime-talk-consult.test.ts
new file mode 100644
index 00000000000..66ebafd00b9
--- /dev/null
+++ b/ui/src/ui/realtime-talk-consult.test.ts
@@ -0,0 +1,54 @@
+/* @vitest-environment jsdom */
+
+import { describe, expect, it, vi } from "vitest";
+import { submitRealtimeTalkConsult } from "./chat/realtime-talk-shared.js";
+
+describe("RealtimeTalkSession consult handoff", () => {
+  it("submits realtime consults through the Gateway tool-call endpoint", async () => {
+    let listener: ((event: { event: string; payload?: unknown }) => void) | undefined;
+    const request = vi.fn(async (method: string, _params: unknown) => {
+      if (method === "talk.client.toolCall") {
+        window.setTimeout(() => {
+          listener?.({
+            event: "chat",
+            payload: {
+              runId: "run-1",
+              state: "final",
+              message: { text: "Basement lights are off." },
+            },
+          });
+        }, 0);
+        return { runId: "run-1" };
+      }
+      throw new Error(`unexpected request: ${method}`);
+    });
+    const addEventListener = vi.fn((callback: typeof listener) => {
+      listener = callback;
+      return () => {
+        listener = undefined;
+      };
+    });
+    const submit = vi.fn();
+
+    await submitRealtimeTalkConsult({
+      ctx: {
+        client: { request, addEventListener },
+        sessionKey: "agent:main:main",
+        callbacks: {},
+      } as never,
+      callId: "call-1",
+      args: { question: "Are the basement lights off?" },
+      submit,
+    });
+
+    expect(request).toHaveBeenCalledWith(
+      "talk.client.toolCall",
+      expect.objectContaining({
+        sessionKey: "agent:main:main",
+        name: "openclaw_agent_consult",
+        args: { question: "Are the basement lights off?" },
+      }),
+    );
+    expect(submit).toHaveBeenCalledWith("call-1", { result: "Basement lights are off." });
+  });
+});