feat: add realtime consult overrides

2026-05-12 18:10:43 +00:00 · 2026-04-28 01:30:49 -04:00
parent 4235f6b192
commit cfb0c34ff6
30 changed files with 346 additions and 8 deletions
--- a/docs/gateway/config-agents.md
+++ b/docs/gateway/config-agents.md
@@ -1378,6 +1378,8 @@ Defaults for Talk mode (macOS/iOS/Android).
      },
      system: {},
    },
+    consultThinkingLevel: "low",
+    consultFastMode: true,
    speechLocale: "ru-RU",
    silenceTimeoutMs: 1500,
    interruptOnSpeech: true,
@@ -1405,6 +1407,8 @@ Defaults for Talk mode (macOS/iOS/Android).
 - `providers.*.voiceAliases` lets Talk directives use friendly names.
 - `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`.
 - macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development.
+- `consultThinkingLevel` controls the thinking level for the full OpenClaw agent run behind Control UI Talk realtime `openclaw_agent_consult` calls. Leave unset to preserve normal session/model behavior.
+- `consultFastMode` sets a one-shot fast-mode override for Control UI Talk realtime consults without changing the session's normal fast-mode setting.
 - `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default.
 - `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).

--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -48,6 +48,8 @@ Moved to a dedicated page - see
 - `session.*` (session lifecycle, compaction, pruning)
 - `messages.*` (message delivery, TTS, markdown rendering)
 - `talk.*` (Talk mode)
+  - `talk.consultThinkingLevel`: thinking level override for the full OpenClaw agent run behind Control UI Talk realtime consults
+  - `talk.consultFastMode`: one-shot fast-mode override for Control UI Talk realtime consults
  - `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS
  - `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)

--- a/docs/nodes/talk.md
+++ b/docs/nodes/talk.md
@@ -102,6 +102,8 @@ Defaults:
 - `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset.
 - `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset.
 - `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available).
+- `consultThinkingLevel`: optional thinking level override for the full OpenClaw agent run behind realtime `openclaw_agent_consult` calls.
+- `consultFastMode`: optional fast-mode override for realtime `openclaw_agent_consult` calls.
 - `realtime.provider`: selects the active browser/server realtime voice provider. Use `openai` for WebRTC, `google` for provider WebSocket, or a bridge-only provider through Gateway relay.
 - `realtime.providers.<provider>` stores provider-owned realtime config. The browser receives only ephemeral or constrained session credentials, never a standard API key.
 - `realtime.providers.openai.voice`: built-in OpenAI Realtime voice id. Current `gpt-realtime-2` voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`; `marin` and `cedar` are recommended for best quality.
--- a/docs/plugins/voice-call.md
+++ b/docs/plugins/voice-call.md
@@ -316,6 +316,8 @@ for tool work, current information, memory lookups, or workspace state.
                instructions: "Speak briefly. Call openclaw_agent_consult before using deeper tools.",
                toolPolicy: "safe-read-only",
                consultPolicy: "substantive",
+                consultThinkingLevel: "low",
+                consultFastMode: true,
                agentContext: { enabled: true },
                providers: {
                  google: {
--- a/extensions/voice-call/README.md
+++ b/extensions/voice-call/README.md
@@ -106,6 +106,8 @@ Notes:
 - advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
 - `responseModel` is optional. When unset, voice responses use the runtime default model.
 - `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
+- `realtime.consultThinkingLevel` is optional. When set, it overrides the thinking level used by the model behind realtime `openclaw_agent_consult` calls.
+- `realtime.consultFastMode` is optional. When set, it toggles fast mode for realtime `openclaw_agent_consult` calls.

 ## Stale call reaper

--- a/extensions/voice-call/openclaw.plugin.json
+++ b/extensions/voice-call/openclaw.plugin.json
@@ -153,6 +153,16 @@
      "help": "Guides when the realtime voice model should call openclaw_agent_consult.",
      "advanced": true
    },
+    "realtime.consultThinkingLevel": {
+      "label": "Consult Thinking Level",
+      "help": "Optional override for the regular agent run behind realtime openclaw_agent_consult calls.",
+      "advanced": true
+    },
+    "realtime.consultFastMode": {
+      "label": "Consult Fast Mode",
+      "help": "Optional fast mode override for the regular agent run behind realtime openclaw_agent_consult calls.",
+      "advanced": true
+    },
    "realtime.fastContext.enabled": {
      "label": "Enable Fast Realtime Context",
      "help": "Searches memory/session context before the full consult agent.",
@@ -515,6 +525,13 @@
            "type": "string",
            "enum": ["auto", "substantive", "always"]
          },
+          "consultThinkingLevel": {
+            "type": "string",
+            "enum": ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]
+          },
+          "consultFastMode": {
+            "type": "boolean"
+          },
          "tools": {
            "type": "array",
            "items": {
--- a/extensions/voice-call/src/config.test.ts
+++ b/extensions/voice-call/src/config.test.ts
@@ -396,6 +396,8 @@ describe("normalizeVoiceCallConfig", () => {
      sources: ["memory", "sessions"],
      fallbackToConsult: false,
    });
+    expect(normalized.realtime.consultThinkingLevel).toBeUndefined();
+    expect(normalized.realtime.consultFastMode).toBeUndefined();
    expect(normalized.realtime.agentContext).toEqual({
      enabled: false,
      maxChars: 6000,
@@ -468,6 +470,32 @@ describe("resolveVoiceCallConfig realtime settings", () => {
    expect(resolved.realtime.provider).toBeUndefined();
  });

+  it("preserves configured realtime consult overrides", () => {
+    const resolved = resolveVoiceCallConfig({
+      enabled: true,
+      provider: "mock",
+      realtime: {
+        consultThinkingLevel: "low",
+        consultFastMode: true,
+      },
+    });
+
+    expect(resolved.realtime.consultThinkingLevel).toBe("low");
+    expect(resolved.realtime.consultFastMode).toBe(true);
+  });
+
+  it("rejects invalid realtime consult thinking levels", () => {
+    expect(() =>
+      resolveVoiceCallConfig({
+        enabled: true,
+        provider: "mock",
+        realtime: {
+          consultThinkingLevel: "turbo",
+        },
+      } as never),
+    ).toThrow(/Invalid option/);
+  });
+
  it("leaves responseModel unset so voice responses can inherit runtime defaults", () => {
    const resolved = resolveVoiceCallConfig({
      enabled: true,
--- a/extensions/voice-call/src/config.ts
+++ b/extensions/voice-call/src/config.ts
@@ -287,6 +287,20 @@ export type VoiceCallRealtimeAgentContextConfig = z.infer<
  typeof VoiceCallRealtimeAgentContextConfigSchema
 >;

+export const VoiceCallRealtimeConsultThinkingLevelSchema = z.enum([
+  "off",
+  "minimal",
+  "low",
+  "medium",
+  "high",
+  "xhigh",
+  "adaptive",
+  "max",
+]);
+export type VoiceCallRealtimeConsultThinkingLevel = z.infer<
+  typeof VoiceCallRealtimeConsultThinkingLevelSchema
+>;
+
 const VoiceCallStreamingProvidersConfigSchema = z
  .record(z.string(), z.record(z.string(), z.unknown()))
  .default({});
@@ -305,6 +319,10 @@ const VoiceCallRealtimeConfigSchema = z
    toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
    /** Guidance for when the realtime model should call the OpenClaw agent consult tool. */
    consultPolicy: VoiceCallRealtimeConsultPolicySchema.default("auto"),
+    /** Optional thinking level override for the regular agent behind realtime consults. */
+    consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional(),
+    /** Optional fast mode override for the regular agent behind realtime consults. */
+    consultFastMode: z.boolean().optional(),
    /** Tool definitions exposed to the realtime provider. */
    tools: z.array(RealtimeToolSchema).default([]),
    /** Low-latency memory/session context for the consult tool. */
@@ -686,6 +704,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
        defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
      tools:
        (config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
+      consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional().parse(
+        config.realtime?.consultThinkingLevel ?? defaults.realtime.consultThinkingLevel,
+      ),
+      consultFastMode: config.realtime?.consultFastMode ?? defaults.realtime.consultFastMode,
      fastContext: realtimeFastContext,
      agentContext: realtimeAgentContext,
      providers: realtimeProviders,
--- a/extensions/voice-call/src/runtime.test.ts
+++ b/extensions/voice-call/src/runtime.test.ts
@@ -537,4 +537,62 @@ describe("createVoiceCallRuntime lifecycle", () => {
    });
    expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
  });
+
+  it("uses the configured realtime consult thinking level when set", async () => {
+    const config = createBaseConfig();
+    config.inboundPolicy = "allowlist";
+    config.realtime.enabled = true;
+    config.realtime.consultThinkingLevel = "low";
+    config.realtime.consultFastMode = true;
+    const sessionStore: Record<string, unknown> = {};
+    const runEmbeddedPiAgent = vi.fn(async () => ({
+      payloads: [{ text: "Done." }],
+      meta: {},
+    }));
+    const agentRuntime = {
+      defaults: { provider: "openai", model: "gpt-5.4" },
+      resolveAgentDir: vi.fn(() => "/tmp/agent"),
+      resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
+      resolveAgentIdentity: vi.fn(),
+      resolveThinkingDefault: vi.fn(() => "high"),
+      resolveAgentTimeoutMs: vi.fn(() => 30_000),
+      ensureAgentWorkspace: vi.fn(async () => {}),
+      session: {
+        resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
+        loadSessionStore: vi.fn(() => sessionStore),
+        saveSessionStore: vi.fn(async () => {}),
+        updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore)),
+        resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
+      },
+      runEmbeddedPiAgent,
+    };
+    mocks.managerGetCall.mockReturnValue({
+      callId: "call-1",
+      direction: "outbound",
+      from: "+15550001234",
+      to: "+15550009999",
+      transcript: [],
+    });
+
+    await createVoiceCallRuntime({
+      config,
+      coreConfig: {} as CoreConfig,
+      agentRuntime: agentRuntime as never,
+    });
+
+    const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
+      | ((args: unknown, callId: string) => Promise<unknown>)
+      | undefined;
+    await expect(handler?.({ question: "Turn on the lights." }, "call-1")).resolves.toEqual({
+      text: "Done.",
+    });
+
+    expect(agentRuntime.resolveThinkingDefault).not.toHaveBeenCalled();
+    expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
+      expect.objectContaining({
+        thinkLevel: "low",
+        fastMode: true,
+      }),
+    );
+  });
 });
--- a/extensions/voice-call/src/runtime.ts
+++ b/extensions/voice-call/src/runtime.ts
@@ -372,11 +372,13 @@ export async function createVoiceCallRuntime(params: {
            voiceConfig: effectiveConfig,
            agentRuntime,
          });
-          const thinkLevel = agentRuntime.resolveThinkingDefault({
-            cfg,
-            provider: agentProvider,
-            model,
-          });
+          const thinkLevel =
+            effectiveConfig.realtime.consultThinkingLevel ??
+            agentRuntime.resolveThinkingDefault({
+              cfg,
+              provider: agentProvider,
+              model,
+            });
          return await consultRealtimeVoiceAgent({
            cfg,
            agentRuntime,
@@ -395,6 +397,7 @@ export async function createVoiceCallRuntime(params: {
            provider: agentProvider,
            model,
            thinkLevel,
+            fastMode: effectiveConfig.realtime.consultFastMode,
            timeoutMs: effectiveConfig.responseTimeoutMs,
            spawnedBy: requesterSessionKey,
            contextMode: requesterSessionKey ? "fork" : undefined,
--- a/src/auto-reply/get-reply-options.types.ts
+++ b/src/auto-reply/get-reply-options.types.ts
@@ -53,6 +53,10 @@ export type GetReplyOptions = {
  suppressTyping?: boolean;
  /** Resolved heartbeat model override (provider/model string from merged per-agent config). */
  heartbeatModelOverride?: string;
+  /** One-shot thinking level override for this run; does not persist to the session. */
+  thinkingLevelOverride?: string;
+  /** One-shot fast-mode override for this run; does not persist to the session. */
+  fastModeOverride?: boolean;
  /** Controls bootstrap workspace context injection (default: full). */
  bootstrapContextMode?: "full" | "lightweight";
  /** If true, suppress tool error warning payloads for this run. */
--- a/src/auto-reply/reply/get-reply-directives.ts
+++ b/src/auto-reply/reply/get-reply-directives.ts
@@ -14,7 +14,13 @@ import {
 } from "../../shared/string-coerce.js";
 import { shouldHandleTextCommands } from "../commands-text-routing.js";
 import type { MsgContext, TemplateContext } from "../templating.js";
-import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
+import {
+  normalizeThinkLevel,
+  type ElevatedLevel,
+  type ReasoningLevel,
+  type ThinkLevel,
+  type VerboseLevel,
+} from "../thinking.js";
 import type { GetReplyOptions, ReplyPayload } from "../types.js";
 import { resolveBlockStreamingChunking } from "./block-streaming.js";
 import { buildCommandContext } from "./commands-context.js";
@@ -417,8 +423,11 @@ export async function resolveReplyDirectives(params: {
  });
  const defaultActivation = defaultGroupActivation(requireMention);
  const resolvedThinkLevel =
-    directives.thinkLevel ?? (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
+    normalizeThinkLevel(opts?.thinkingLevelOverride) ??
+    directives.thinkLevel ??
+    (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
  const resolvedFastMode =
+    opts?.fastModeOverride ??
    directives.fastMode ??
    resolveFastModeState({
      cfg,
--- a/src/config/schema.help.quality.test.ts
+++ b/src/config/schema.help.quality.test.ts
@@ -324,8 +324,10 @@ const TARGET_KEYS = [
  "discovery.mdns.mode",
  "gateway.controlUi.embedSandbox",
  "talk",
+  "talk.consultFastMode",
  "talk.interruptOnSpeech",
  "talk.silenceTimeoutMs",
+  "talk.consultThinkingLevel",
  "meta",
  "env",
  "env.shellEnv",
--- a/src/config/schema.help.ts
+++ b/src/config/schema.help.ts
@@ -167,6 +167,10 @@ export const FIELD_HELP: Record<string, string> = {
    "Talk byte/session transport: webrtc, provider-websocket, gateway-relay, or managed-room.",
  "talk.realtime.brain":
    "Talk reasoning strategy: agent-consult for Gateway-mediated agent help, direct-tools for owner-only tool calls, or none.",
+  "talk.consultThinkingLevel":
+    "Use this to override the thinking level for the regular agent run behind Talk realtime consults.",
+  "talk.consultFastMode":
+    "Use this to set true or false fast mode for the regular agent run behind Talk realtime consults.",
  "talk.speechLocale":
    'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
  "talk.interruptOnSpeech":
--- a/src/config/schema.labels.ts
+++ b/src/config/schema.labels.ts
@@ -841,6 +841,8 @@ export const FIELD_LABELS: Record<string, string> = {
  "talk.speechLocale": "Talk Speech Locale",
  "talk.interruptOnSpeech": "Talk Interrupt on Speech",
  "talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
+  "talk.consultThinkingLevel": "Talk Consult Thinking Level",
+  "talk.consultFastMode": "Talk Consult Fast Mode",
  messages: "Messages",
  "messages.messagePrefix": "Inbound Message Prefix",
  "messages.visibleReplies": "Visible Replies",
--- a/src/config/talk.normalize.test.ts
+++ b/src/config/talk.normalize.test.ts
@@ -10,6 +10,8 @@ describe("talk normalization", () => {
      modelId: "eleven_v3",
      outputFormat: "pcm_44100",
      apiKey: "secret-key", // pragma: allowlist secret
+      consultThinkingLevel: " low ",
+      consultFastMode: true,
      speechLocale: " ru-RU ",
      interruptOnSpeech: false,
      silenceTimeoutMs: 1500,
@@ -17,6 +19,8 @@ describe("talk normalization", () => {

    expect(normalized).toEqual({
      speechLocale: "ru-RU",
+      consultThinkingLevel: "low",
+      consultFastMode: true,
      interruptOnSpeech: false,
      silenceTimeoutMs: 1500,
    });
--- a/src/config/talk.ts
+++ b/src/config/talk.ts
@@ -1,4 +1,5 @@
-import { normalizeOptionalString } from "../shared/string-coerce.js";
+import { normalizeThinkLevel } from "../auto-reply/thinking.js";
+import { normalizeFastMode, normalizeOptionalString } from "../shared/string-coerce.js";
 import { isRecord } from "../utils.js";
 import type {
  ResolvedTalkConfig,
@@ -157,6 +158,20 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
  if (typeof source.interruptOnSpeech === "boolean") {
    normalized.interruptOnSpeech = source.interruptOnSpeech;
  }
+  const consultThinkingLevel = normalizeThinkLevel(
+    normalizeOptionalString(source.consultThinkingLevel),
+  );
+  if (consultThinkingLevel) {
+    normalized.consultThinkingLevel = consultThinkingLevel;
+  }
+  const rawConsultFastMode = source.consultFastMode;
+  const consultFastMode =
+    typeof rawConsultFastMode === "boolean" || typeof rawConsultFastMode === "string"
+      ? normalizeFastMode(rawConsultFastMode)
+      : undefined;
+  if (consultFastMode !== undefined) {
+    normalized.consultFastMode = consultFastMode;
+  }
  const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
  if (silenceTimeoutMs !== undefined) {
    normalized.silenceTimeoutMs = silenceTimeoutMs;
@@ -225,6 +240,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un
  if (typeof normalized?.silenceTimeoutMs === "number") {
    payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
  }
+  if (typeof normalized?.consultThinkingLevel === "string") {
+    payload.consultThinkingLevel = normalized.consultThinkingLevel;
+  }
+  if (typeof normalized?.consultFastMode === "boolean") {
+    payload.consultFastMode = normalized.consultFastMode;
+  }
  if (typeof normalized?.speechLocale === "string") {
    payload.speechLocale = normalized.speechLocale;
  }
--- a/src/config/types.gateway.ts
+++ b/src/config/types.gateway.ts
@@ -76,6 +76,18 @@ export type TalkConfig = {
  providers?: Record<string, TalkProviderConfig>;
  /** Realtime Talk provider, model, voice, mode, transport, and brain config. */
  realtime?: TalkRealtimeConfig;
+  /** Optional thinking level override for the agent run behind Talk realtime consults. */
+  consultThinkingLevel?:
+    | "off"
+    | "minimal"
+    | "low"
+    | "medium"
+    | "high"
+    | "xhigh"
+    | "adaptive"
+    | "max";
+  /** Optional fast mode override for the agent run behind Talk realtime consults. */
+  consultFastMode?: boolean;
  /** BCP 47 locale id used for Talk speech recognition on device nodes. */
  speechLocale?: string;
  /** Stop speaking when user starts talking (default: true). */
--- a/src/config/zod-schema.talk.test.ts
+++ b/src/config/zod-schema.talk.test.ts
@@ -6,12 +6,24 @@ describe("OpenClawSchema talk validation", () => {
    expect(
      OpenClawSchema.safeParse({
        talk: {
+          consultThinkingLevel: "low",
+          consultFastMode: true,
          silenceTimeoutMs: 1500,
        },
      }),
    ).toMatchObject({ success: true });
  });

+  it("rejects invalid talk.consultThinkingLevel", () => {
+    expect(() =>
+      OpenClawSchema.parse({
+        talk: {
+          consultThinkingLevel: "turbo",
+        },
+      }),
+    ).toThrow(/consultThinkingLevel/i);
+  });
+
  it.each([
    ["boolean", true],
    ["string", "1500"],
--- a/src/config/zod-schema.ts
+++ b/src/config/zod-schema.ts
@@ -268,6 +268,10 @@ const TalkSchema = z
    provider: z.string().optional(),
    providers: z.record(z.string(), TalkProviderEntrySchema).optional(),
    realtime: TalkRealtimeSchema.optional(),
+    consultThinkingLevel: z
+      .enum(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"])
+      .optional(),
+    consultFastMode: z.boolean().optional(),
    speechLocale: z.string().optional(),
    interruptOnSpeech: z.boolean().optional(),
    silenceTimeoutMs: z.number().int().positive().optional(),
--- a/src/gateway/protocol/schema/channels.ts
+++ b/src/gateway/protocol/schema/channels.ts
@@ -514,6 +514,8 @@ const TalkConfigSchema = Type.Object(
    providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
    realtime: Type.Optional(TalkRealtimeConfigSchema),
    resolved: Type.Optional(ResolvedTalkConfigSchema),
+    consultThinkingLevel: Type.Optional(Type.String()),
+    consultFastMode: Type.Optional(Type.Boolean()),
    speechLocale: Type.Optional(Type.String()),
    interruptOnSpeech: Type.Optional(Type.Boolean()),
    silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
--- a/src/gateway/protocol/schema/logs-chat.ts
+++ b/src/gateway/protocol/schema/logs-chat.ts
@@ -38,6 +38,7 @@ export const ChatSendParamsSchema = Type.Object(
    sessionId: Type.Optional(NonEmptyString),
    message: Type.String(),
    thinking: Type.Optional(Type.String()),
+    fastMode: Type.Optional(Type.Boolean()),
    deliver: Type.Optional(Type.Boolean()),
    originatingChannel: Type.Optional(Type.String()),
    originatingTo: Type.Optional(Type.String()),
--- a/src/gateway/server-methods/chat.ts
+++ b/src/gateway/server-methods/chat.ts
@@ -1904,6 +1904,7 @@ export const chatHandlers: GatewayRequestHandlers = {
      sessionId?: string;
      message: string;
      thinking?: string;
+      fastMode?: boolean;
      deliver?: boolean;
      originatingChannel?: string;
      originatingTo?: string;
@@ -2503,6 +2504,8 @@ export const chatHandlers: GatewayRequestHandlers = {
              abortSignal: activeRunAbort.controller.signal,
              images: parsedImages.length > 0 ? parsedImages : undefined,
              imageOrder: imageOrder.length > 0 ? imageOrder : undefined,
+              thinkingLevelOverride: p.thinking,
+              fastModeOverride: p.fastMode,
              onAgentRunStart: (runId) => {
                agentRunStarted = true;
                if (!hasBeforeAgentRunGate) {
--- a/src/gateway/server-methods/talk-client.ts
+++ b/src/gateway/server-methods/talk-client.ts
@@ -1,4 +1,5 @@
 import { randomUUID } from "node:crypto";
+import { normalizeTalkSection } from "../../config/talk.js";
 import {
  normalizeOptionalLowercaseString,
  normalizeOptionalString,
@@ -45,6 +46,7 @@ async function startRealtimeToolCallAgentConsult(params: {
    return { ok: false, error: errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err)) };
  }
  const idempotencyKey = `talk-${params.callId}-${randomUUID()}`;
+  const normalizedTalk = normalizeTalkSection(params.request.context.getRuntimeConfig().talk);
  let chatResponse: { ok: true; result: unknown } | { ok: false; error: ErrorShape } | undefined;
  await chatHandlers["chat.send"]({
    ...params.request,
@@ -57,6 +59,12 @@ async function startRealtimeToolCallAgentConsult(params: {
      sessionKey: params.sessionKey,
      message,
      idempotencyKey,
+      ...(normalizedTalk?.consultThinkingLevel
+        ? { thinking: normalizedTalk.consultThinkingLevel }
+        : {}),
+      ...(typeof normalizedTalk?.consultFastMode === "boolean"
+        ? { fastMode: normalizedTalk.consultFastMode }
+        : {}),
    },
    respond: (ok: boolean, result?: unknown, error?: ErrorShape) => {
      chatResponse = ok
--- a/src/gateway/server-methods/talk.test.ts
+++ b/src/gateway/server-methods/talk.test.ts
@@ -1088,6 +1088,46 @@ describe("talk.client.toolCall handler", () => {
    );
  });

+  it("passes configured consult thinking and fast-mode overrides to chat.send", async () => {
+    const respond = vi.fn();
+
+    await talkHandlers["talk.client.toolCall"]({
+      req: { type: "req", id: "1", method: "talk.client.toolCall" },
+      params: {
+        sessionKey: "main",
+        callId: "call-1",
+        name: "openclaw_agent_consult",
+        args: { question: "Are the basement lights off?" },
+      },
+      client: { connId: "conn-1" } as never,
+      isWebchatConnect: () => false,
+      respond: respond as never,
+      context: {
+        getRuntimeConfig: () =>
+          ({
+            talk: {
+              consultThinkingLevel: "low",
+              consultFastMode: true,
+            },
+          }) as OpenClawConfig,
+      } as never,
+    });
+
+    expect(mocks.chatSend).toHaveBeenCalledWith(
+      expect.objectContaining({
+        params: expect.objectContaining({
+          thinking: "low",
+          fastMode: true,
+        }),
+      }),
+    );
+    expect(respond).toHaveBeenCalledWith(
+      true,
+      expect.objectContaining({ runId: "run-voice-1" }),
+      undefined,
+    );
+  });
+
  it("links relay-owned agent consult runs so relay cancellation can abort them", async () => {
    const respond = vi.fn();

--- a/src/talk/agent-consult-runtime.test.ts
+++ b/src/talk/agent-consult-runtime.test.ts
@@ -127,6 +127,7 @@ describe("realtime voice agent consult runtime", () => {
      provider: "openai",
      model: "gpt-5.4",
      thinkLevel: "high",
+      fastMode: true,
      timeoutMs: 10_000,
    });

@@ -149,6 +150,7 @@ describe("realtime voice agent consult runtime", () => {
    expect(call.provider).toBe("openai");
    expect(call.model).toBe("gpt-5.4");
    expect(call.thinkLevel).toBe("high");
+    expect(call.fastMode).toBe(true);
    expect(call.timeoutMs).toBe(10_000);
    expect(call.prompt).toContain("Caller: Can you check this?");
    expect(call.extraSystemPrompt).toContain("delegated requests");
--- a/src/talk/agent-consult-runtime.ts
+++ b/src/talk/agent-consult-runtime.ts
@@ -199,6 +199,7 @@ export async function consultRealtimeVoiceAgent(params: {
  provider?: RunEmbeddedPiAgentParams["provider"];
  model?: RunEmbeddedPiAgentParams["model"];
  thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"];
+  fastMode?: RunEmbeddedPiAgentParams["fastMode"];
  timeoutMs?: number;
  toolsAllow?: string[];
  extraSystemPrompt?: string;
@@ -264,6 +265,7 @@ export async function consultRealtimeVoiceAgent(params: {
    provider: params.provider,
    model: params.model,
    thinkLevel: params.thinkLevel ?? "high",
+    fastMode: params.fastMode,
    verboseLevel: "off",
    reasoningLevel: "off",
    toolResultFormat: "plain",
--- a/ui/src/ui/chat/realtime-talk-shared.ts
+++ b/ui/src/ui/chat/realtime-talk-shared.ts
@@ -38,6 +38,8 @@ export type RealtimeTalkWebRtcSdpSessionResult = {
  model?: string;
  voice?: string;
  expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };

 export type RealtimeTalkJsonPcmWebSocketSessionResult = {
@@ -51,6 +53,8 @@ export type RealtimeTalkJsonPcmWebSocketSessionResult = {
  model?: string;
  voice?: string;
  expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };

 export type RealtimeTalkGatewayRelaySessionResult = {
@@ -61,6 +65,8 @@ export type RealtimeTalkGatewayRelaySessionResult = {
  model?: string;
  voice?: string;
  expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };

 export type RealtimeTalkManagedRoomSessionResult = {
@@ -71,6 +77,8 @@ export type RealtimeTalkManagedRoomSessionResult = {
  model?: string;
  voice?: string;
  expiresAt?: number;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };

 export type RealtimeTalkSessionResult =
@@ -88,6 +96,8 @@ export type RealtimeTalkTransportContext = {
  client: GatewayBrowserClient;
  sessionKey: string;
  callbacks: RealtimeTalkCallbacks;
+  consultThinkingLevel?: string;
+  consultFastMode?: boolean;
 };

 export function createRealtimeTalkEventEmitter(
--- a/ui/src/ui/chat/realtime-talk.ts
+++ b/ui/src/ui/chat/realtime-talk.ts
@@ -74,6 +74,8 @@ export class RealtimeTalkSession {
      client: this.client,
      sessionKey: this.sessionKey,
      callbacks: this.callbacks,
+      consultThinkingLevel: session.consultThinkingLevel,
+      consultFastMode: session.consultFastMode,
    });
    await this.transport.start();
  }
--- a/ui/src/ui/realtime-talk-consult.test.ts
+++ b/ui/src/ui/realtime-talk-consult.test.ts
@@ -0,0 +1,54 @@
+/* @vitest-environment jsdom */
+
+import { describe, expect, it, vi } from "vitest";
+import { submitRealtimeTalkConsult } from "./chat/realtime-talk-shared.js";
+
+describe("RealtimeTalkSession consult handoff", () => {
+  it("submits realtime consults through the Gateway tool-call endpoint", async () => {
+    let listener: ((event: { event: string; payload?: unknown }) => void) | undefined;
+    const request = vi.fn(async (method: string, _params: unknown) => {
+      if (method === "talk.client.toolCall") {
+        window.setTimeout(() => {
+          listener?.({
+            event: "chat",
+            payload: {
+              runId: "run-1",
+              state: "final",
+              message: { text: "Basement lights are off." },
+            },
+          });
+        }, 0);
+        return { runId: "run-1" };
+      }
+      throw new Error(`unexpected request: ${method}`);
+    });
+    const addEventListener = vi.fn((callback: typeof listener) => {
+      listener = callback;
+      return () => {
+        listener = undefined;
+      };
+    });
+    const submit = vi.fn();
+
+    await submitRealtimeTalkConsult({
+      ctx: {
+        client: { request, addEventListener },
+        sessionKey: "agent:main:main",
+        callbacks: {},
+      } as never,
+      callId: "call-1",
+      args: { question: "Are the basement lights off?" },
+      submit,
+    });
+
+    expect(request).toHaveBeenCalledWith(
+      "talk.client.toolCall",
+      expect.objectContaining({
+        sessionKey: "agent:main:main",
+        name: "openclaw_agent_consult",
+        args: { question: "Are the basement lights off?" },
+      }),
+    );
+    expect(submit).toHaveBeenCalledWith("call-1", { result: "Basement lights are off." });
+  });
+});