From cfb0c34ff6bda189defa3152e8dce15d93cb1afb Mon Sep 17 00:00:00 2001 From: VACInc <3279061+VACInc@users.noreply.github.com> Date: Tue, 28 Apr 2026 01:30:49 -0400 Subject: [PATCH] feat: add realtime consult overrides --- docs/gateway/config-agents.md | 4 ++ docs/gateway/configuration-reference.md | 2 + docs/nodes/talk.md | 2 + docs/plugins/voice-call.md | 2 + extensions/voice-call/README.md | 2 + extensions/voice-call/openclaw.plugin.json | 17 ++++++ extensions/voice-call/src/config.test.ts | 28 ++++++++++ extensions/voice-call/src/config.ts | 22 ++++++++ extensions/voice-call/src/runtime.test.ts | 58 ++++++++++++++++++++ extensions/voice-call/src/runtime.ts | 13 +++-- src/auto-reply/get-reply-options.types.ts | 4 ++ src/auto-reply/reply/get-reply-directives.ts | 13 ++++- src/config/schema.help.quality.test.ts | 2 + src/config/schema.help.ts | 4 ++ src/config/schema.labels.ts | 2 + src/config/talk.normalize.test.ts | 4 ++ src/config/talk.ts | 23 +++++++- src/config/types.gateway.ts | 12 ++++ src/config/zod-schema.talk.test.ts | 12 ++++ src/config/zod-schema.ts | 4 ++ src/gateway/protocol/schema/channels.ts | 2 + src/gateway/protocol/schema/logs-chat.ts | 1 + src/gateway/server-methods/chat.ts | 3 + src/gateway/server-methods/talk-client.ts | 8 +++ src/gateway/server-methods/talk.test.ts | 40 ++++++++++++++ src/talk/agent-consult-runtime.test.ts | 2 + src/talk/agent-consult-runtime.ts | 2 + ui/src/ui/chat/realtime-talk-shared.ts | 10 ++++ ui/src/ui/chat/realtime-talk.ts | 2 + ui/src/ui/realtime-talk-consult.test.ts | 54 ++++++++++++++++++ 30 files changed, 346 insertions(+), 8 deletions(-) create mode 100644 ui/src/ui/realtime-talk-consult.test.ts diff --git a/docs/gateway/config-agents.md b/docs/gateway/config-agents.md index 11a304df8c3..bbdea0635fb 100644 --- a/docs/gateway/config-agents.md +++ b/docs/gateway/config-agents.md @@ -1378,6 +1378,8 @@ Defaults for Talk mode (macOS/iOS/Android). }, system: {}, }, + consultThinkingLevel: "low", + consultFastMode: true, speechLocale: "ru-RU", silenceTimeoutMs: 1500, interruptOnSpeech: true, @@ -1405,6 +1407,8 @@ Defaults for Talk mode (macOS/iOS/Android). - `providers.*.voiceAliases` lets Talk directives use friendly names. - `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`. - macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development. +- `consultThinkingLevel` controls the thinking level for the full OpenClaw agent run behind Control UI Talk realtime `openclaw_agent_consult` calls. Leave unset to preserve normal session/model behavior. +- `consultFastMode` sets a one-shot fast-mode override for Control UI Talk realtime consults without changing the session's normal fast-mode setting. - `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default. - `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`). diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index e5d869b3a05..3d696f3f783 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -48,6 +48,8 @@ Moved to a dedicated page - see - `session.*` (session lifecycle, compaction, pruning) - `messages.*` (message delivery, TTS, markdown rendering) - `talk.*` (Talk mode) + - `talk.consultThinkingLevel`: thinking level override for the full OpenClaw agent run behind Control UI Talk realtime consults + - `talk.consultFastMode`: one-shot fast-mode override for Control UI Talk realtime consults - `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS - `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`) diff --git a/docs/nodes/talk.md b/docs/nodes/talk.md index 4fa1f65cd6f..3280233fb9e 100644 --- a/docs/nodes/talk.md +++ b/docs/nodes/talk.md @@ -102,6 +102,8 @@ Defaults: - `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset. - `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset. - `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available). +- `consultThinkingLevel`: optional thinking level override for the full OpenClaw agent run behind realtime `openclaw_agent_consult` calls. +- `consultFastMode`: optional fast-mode override for realtime `openclaw_agent_consult` calls. - `realtime.provider`: selects the active browser/server realtime voice provider. Use `openai` for WebRTC, `google` for provider WebSocket, or a bridge-only provider through Gateway relay. - `realtime.providers.` stores provider-owned realtime config. The browser receives only ephemeral or constrained session credentials, never a standard API key. - `realtime.providers.openai.voice`: built-in OpenAI Realtime voice id. Current `gpt-realtime-2` voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`; `marin` and `cedar` are recommended for best quality. diff --git a/docs/plugins/voice-call.md b/docs/plugins/voice-call.md index 5d65eeaf69d..eaeceebea2f 100644 --- a/docs/plugins/voice-call.md +++ b/docs/plugins/voice-call.md @@ -316,6 +316,8 @@ for tool work, current information, memory lookups, or workspace state. instructions: "Speak briefly. Call openclaw_agent_consult before using deeper tools.", toolPolicy: "safe-read-only", consultPolicy: "substantive", + consultThinkingLevel: "low", + consultFastMode: true, agentContext: { enabled: true }, providers: { google: { diff --git a/extensions/voice-call/README.md b/extensions/voice-call/README.md index 1832a0169c7..5c7b03f2e73 100644 --- a/extensions/voice-call/README.md +++ b/extensions/voice-call/README.md @@ -106,6 +106,8 @@ Notes: - advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call` - `responseModel` is optional. When unset, voice responses use the runtime default model. - `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh. +- `realtime.consultThinkingLevel` is optional. When set, it overrides the thinking level used by the model behind realtime `openclaw_agent_consult` calls. +- `realtime.consultFastMode` is optional. When set, it toggles fast mode for realtime `openclaw_agent_consult` calls. ## Stale call reaper diff --git a/extensions/voice-call/openclaw.plugin.json b/extensions/voice-call/openclaw.plugin.json index 5abc35329d5..7a4c4a80885 100644 --- a/extensions/voice-call/openclaw.plugin.json +++ b/extensions/voice-call/openclaw.plugin.json @@ -153,6 +153,16 @@ "help": "Guides when the realtime voice model should call openclaw_agent_consult.", "advanced": true }, + "realtime.consultThinkingLevel": { + "label": "Consult Thinking Level", + "help": "Optional override for the regular agent run behind realtime openclaw_agent_consult calls.", + "advanced": true + }, + "realtime.consultFastMode": { + "label": "Consult Fast Mode", + "help": "Optional fast mode override for the regular agent run behind realtime openclaw_agent_consult calls.", + "advanced": true + }, "realtime.fastContext.enabled": { "label": "Enable Fast Realtime Context", "help": "Searches memory/session context before the full consult agent.", @@ -515,6 +525,13 @@ "type": "string", "enum": ["auto", "substantive", "always"] }, + "consultThinkingLevel": { + "type": "string", + "enum": ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"] + }, + "consultFastMode": { + "type": "boolean" + }, "tools": { "type": "array", "items": { diff --git a/extensions/voice-call/src/config.test.ts b/extensions/voice-call/src/config.test.ts index 8fcb0f4e332..c09092ce573 100644 --- a/extensions/voice-call/src/config.test.ts +++ b/extensions/voice-call/src/config.test.ts @@ -396,6 +396,8 @@ describe("normalizeVoiceCallConfig", () => { sources: ["memory", "sessions"], fallbackToConsult: false, }); + expect(normalized.realtime.consultThinkingLevel).toBeUndefined(); + expect(normalized.realtime.consultFastMode).toBeUndefined(); expect(normalized.realtime.agentContext).toEqual({ enabled: false, maxChars: 6000, @@ -468,6 +470,32 @@ describe("resolveVoiceCallConfig realtime settings", () => { expect(resolved.realtime.provider).toBeUndefined(); }); + it("preserves configured realtime consult overrides", () => { + const resolved = resolveVoiceCallConfig({ + enabled: true, + provider: "mock", + realtime: { + consultThinkingLevel: "low", + consultFastMode: true, + }, + }); + + expect(resolved.realtime.consultThinkingLevel).toBe("low"); + expect(resolved.realtime.consultFastMode).toBe(true); + }); + + it("rejects invalid realtime consult thinking levels", () => { + expect(() => + resolveVoiceCallConfig({ + enabled: true, + provider: "mock", + realtime: { + consultThinkingLevel: "turbo", + }, + } as never), + ).toThrow(/Invalid option/); + }); + it("leaves responseModel unset so voice responses can inherit runtime defaults", () => { const resolved = resolveVoiceCallConfig({ enabled: true, diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index ec52ba52575..146f822840b 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -287,6 +287,20 @@ export type VoiceCallRealtimeAgentContextConfig = z.infer< typeof VoiceCallRealtimeAgentContextConfigSchema >; +export const VoiceCallRealtimeConsultThinkingLevelSchema = z.enum([ + "off", + "minimal", + "low", + "medium", + "high", + "xhigh", + "adaptive", + "max", +]); +export type VoiceCallRealtimeConsultThinkingLevel = z.infer< + typeof VoiceCallRealtimeConsultThinkingLevelSchema +>; + const VoiceCallStreamingProvidersConfigSchema = z .record(z.string(), z.record(z.string(), z.unknown())) .default({}); @@ -305,6 +319,10 @@ const VoiceCallRealtimeConfigSchema = z toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"), /** Guidance for when the realtime model should call the OpenClaw agent consult tool. */ consultPolicy: VoiceCallRealtimeConsultPolicySchema.default("auto"), + /** Optional thinking level override for the regular agent behind realtime consults. */ + consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional(), + /** Optional fast mode override for the regular agent behind realtime consults. */ + consultFastMode: z.boolean().optional(), /** Tool definitions exposed to the realtime provider. */ tools: z.array(RealtimeToolSchema).default([]), /** Low-latency memory/session context for the consult tool. */ @@ -686,6 +704,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path), tools: (config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools, + consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional().parse( + config.realtime?.consultThinkingLevel ?? defaults.realtime.consultThinkingLevel, + ), + consultFastMode: config.realtime?.consultFastMode ?? defaults.realtime.consultFastMode, fastContext: realtimeFastContext, agentContext: realtimeAgentContext, providers: realtimeProviders, diff --git a/extensions/voice-call/src/runtime.test.ts b/extensions/voice-call/src/runtime.test.ts index a2973c96788..2af3778e933 100644 --- a/extensions/voice-call/src/runtime.test.ts +++ b/extensions/voice-call/src/runtime.test.ts @@ -537,4 +537,62 @@ describe("createVoiceCallRuntime lifecycle", () => { }); expect(runEmbeddedPiAgent).not.toHaveBeenCalled(); }); + + it("uses the configured realtime consult thinking level when set", async () => { + const config = createBaseConfig(); + config.inboundPolicy = "allowlist"; + config.realtime.enabled = true; + config.realtime.consultThinkingLevel = "low"; + config.realtime.consultFastMode = true; + const sessionStore: Record = {}; + const runEmbeddedPiAgent = vi.fn(async () => ({ + payloads: [{ text: "Done." }], + meta: {}, + })); + const agentRuntime = { + defaults: { provider: "openai", model: "gpt-5.4" }, + resolveAgentDir: vi.fn(() => "/tmp/agent"), + resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"), + resolveAgentIdentity: vi.fn(), + resolveThinkingDefault: vi.fn(() => "high"), + resolveAgentTimeoutMs: vi.fn(() => 30_000), + ensureAgentWorkspace: vi.fn(async () => {}), + session: { + resolveStorePath: vi.fn(() => "/tmp/sessions.json"), + loadSessionStore: vi.fn(() => sessionStore), + saveSessionStore: vi.fn(async () => {}), + updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore)), + resolveSessionFilePath: vi.fn(() => "/tmp/session.json"), + }, + runEmbeddedPiAgent, + }; + mocks.managerGetCall.mockReturnValue({ + callId: "call-1", + direction: "outbound", + from: "+15550001234", + to: "+15550009999", + transcript: [], + }); + + await createVoiceCallRuntime({ + config, + coreConfig: {} as CoreConfig, + agentRuntime: agentRuntime as never, + }); + + const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as + | ((args: unknown, callId: string) => Promise) + | undefined; + await expect(handler?.({ question: "Turn on the lights." }, "call-1")).resolves.toEqual({ + text: "Done.", + }); + + expect(agentRuntime.resolveThinkingDefault).not.toHaveBeenCalled(); + expect(runEmbeddedPiAgent).toHaveBeenCalledWith( + expect.objectContaining({ + thinkLevel: "low", + fastMode: true, + }), + ); + }); }); diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index 9484f1b8dfd..1a1557843ad 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -372,11 +372,13 @@ export async function createVoiceCallRuntime(params: { voiceConfig: effectiveConfig, agentRuntime, }); - const thinkLevel = agentRuntime.resolveThinkingDefault({ - cfg, - provider: agentProvider, - model, - }); + const thinkLevel = + effectiveConfig.realtime.consultThinkingLevel ?? + agentRuntime.resolveThinkingDefault({ + cfg, + provider: agentProvider, + model, + }); return await consultRealtimeVoiceAgent({ cfg, agentRuntime, @@ -395,6 +397,7 @@ export async function createVoiceCallRuntime(params: { provider: agentProvider, model, thinkLevel, + fastMode: effectiveConfig.realtime.consultFastMode, timeoutMs: effectiveConfig.responseTimeoutMs, spawnedBy: requesterSessionKey, contextMode: requesterSessionKey ? "fork" : undefined, diff --git a/src/auto-reply/get-reply-options.types.ts b/src/auto-reply/get-reply-options.types.ts index e9219bbc1f4..625d70f6504 100644 --- a/src/auto-reply/get-reply-options.types.ts +++ b/src/auto-reply/get-reply-options.types.ts @@ -53,6 +53,10 @@ export type GetReplyOptions = { suppressTyping?: boolean; /** Resolved heartbeat model override (provider/model string from merged per-agent config). */ heartbeatModelOverride?: string; + /** One-shot thinking level override for this run; does not persist to the session. */ + thinkingLevelOverride?: string; + /** One-shot fast-mode override for this run; does not persist to the session. */ + fastModeOverride?: boolean; /** Controls bootstrap workspace context injection (default: full). */ bootstrapContextMode?: "full" | "lightweight"; /** If true, suppress tool error warning payloads for this run. */ diff --git a/src/auto-reply/reply/get-reply-directives.ts b/src/auto-reply/reply/get-reply-directives.ts index 593383feb58..0336a6dc384 100644 --- a/src/auto-reply/reply/get-reply-directives.ts +++ b/src/auto-reply/reply/get-reply-directives.ts @@ -14,7 +14,13 @@ import { } from "../../shared/string-coerce.js"; import { shouldHandleTextCommands } from "../commands-text-routing.js"; import type { MsgContext, TemplateContext } from "../templating.js"; -import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js"; +import { + normalizeThinkLevel, + type ElevatedLevel, + type ReasoningLevel, + type ThinkLevel, + type VerboseLevel, +} from "../thinking.js"; import type { GetReplyOptions, ReplyPayload } from "../types.js"; import { resolveBlockStreamingChunking } from "./block-streaming.js"; import { buildCommandContext } from "./commands-context.js"; @@ -417,8 +423,11 @@ export async function resolveReplyDirectives(params: { }); const defaultActivation = defaultGroupActivation(requireMention); const resolvedThinkLevel = - directives.thinkLevel ?? (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined); + normalizeThinkLevel(opts?.thinkingLevelOverride) ?? + directives.thinkLevel ?? + (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined); const resolvedFastMode = + opts?.fastModeOverride ?? directives.fastMode ?? resolveFastModeState({ cfg, diff --git a/src/config/schema.help.quality.test.ts b/src/config/schema.help.quality.test.ts index 2efbac67d78..fc1641631f7 100644 --- a/src/config/schema.help.quality.test.ts +++ b/src/config/schema.help.quality.test.ts @@ -324,8 +324,10 @@ const TARGET_KEYS = [ "discovery.mdns.mode", "gateway.controlUi.embedSandbox", "talk", + "talk.consultFastMode", "talk.interruptOnSpeech", "talk.silenceTimeoutMs", + "talk.consultThinkingLevel", "meta", "env", "env.shellEnv", diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index a8139ab2916..9b4bad5c420 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -167,6 +167,10 @@ export const FIELD_HELP: Record = { "Talk byte/session transport: webrtc, provider-websocket, gateway-relay, or managed-room.", "talk.realtime.brain": "Talk reasoning strategy: agent-consult for Gateway-mediated agent help, direct-tools for owner-only tool calls, or none.", + "talk.consultThinkingLevel": + "Use this to override the thinking level for the regular agent run behind Talk realtime consults.", + "talk.consultFastMode": + "Use this to set true or false fast mode for the regular agent run behind Talk realtime consults.", "talk.speechLocale": 'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.', "talk.interruptOnSpeech": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 6a24d598275..77da70839e4 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -841,6 +841,8 @@ export const FIELD_LABELS: Record = { "talk.speechLocale": "Talk Speech Locale", "talk.interruptOnSpeech": "Talk Interrupt on Speech", "talk.silenceTimeoutMs": "Talk Silence Timeout (ms)", + "talk.consultThinkingLevel": "Talk Consult Thinking Level", + "talk.consultFastMode": "Talk Consult Fast Mode", messages: "Messages", "messages.messagePrefix": "Inbound Message Prefix", "messages.visibleReplies": "Visible Replies", diff --git a/src/config/talk.normalize.test.ts b/src/config/talk.normalize.test.ts index a2e7220469b..77ef5873128 100644 --- a/src/config/talk.normalize.test.ts +++ b/src/config/talk.normalize.test.ts @@ -10,6 +10,8 @@ describe("talk normalization", () => { modelId: "eleven_v3", outputFormat: "pcm_44100", apiKey: "secret-key", // pragma: allowlist secret + consultThinkingLevel: " low ", + consultFastMode: true, speechLocale: " ru-RU ", interruptOnSpeech: false, silenceTimeoutMs: 1500, @@ -17,6 +19,8 @@ describe("talk normalization", () => { expect(normalized).toEqual({ speechLocale: "ru-RU", + consultThinkingLevel: "low", + consultFastMode: true, interruptOnSpeech: false, silenceTimeoutMs: 1500, }); diff --git a/src/config/talk.ts b/src/config/talk.ts index fd5c71643c1..8ef63e4e669 100644 --- a/src/config/talk.ts +++ b/src/config/talk.ts @@ -1,4 +1,5 @@ -import { normalizeOptionalString } from "../shared/string-coerce.js"; +import { normalizeThinkLevel } from "../auto-reply/thinking.js"; +import { normalizeFastMode, normalizeOptionalString } from "../shared/string-coerce.js"; import { isRecord } from "../utils.js"; import type { ResolvedTalkConfig, @@ -157,6 +158,20 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig if (typeof source.interruptOnSpeech === "boolean") { normalized.interruptOnSpeech = source.interruptOnSpeech; } + const consultThinkingLevel = normalizeThinkLevel( + normalizeOptionalString(source.consultThinkingLevel), + ); + if (consultThinkingLevel) { + normalized.consultThinkingLevel = consultThinkingLevel; + } + const rawConsultFastMode = source.consultFastMode; + const consultFastMode = + typeof rawConsultFastMode === "boolean" || typeof rawConsultFastMode === "string" + ? normalizeFastMode(rawConsultFastMode) + : undefined; + if (consultFastMode !== undefined) { + normalized.consultFastMode = consultFastMode; + } const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs); if (silenceTimeoutMs !== undefined) { normalized.silenceTimeoutMs = silenceTimeoutMs; @@ -225,6 +240,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un if (typeof normalized?.silenceTimeoutMs === "number") { payload.silenceTimeoutMs = normalized.silenceTimeoutMs; } + if (typeof normalized?.consultThinkingLevel === "string") { + payload.consultThinkingLevel = normalized.consultThinkingLevel; + } + if (typeof normalized?.consultFastMode === "boolean") { + payload.consultFastMode = normalized.consultFastMode; + } if (typeof normalized?.speechLocale === "string") { payload.speechLocale = normalized.speechLocale; } diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index fdbc89e97a5..baf87dba364 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -76,6 +76,18 @@ export type TalkConfig = { providers?: Record; /** Realtime Talk provider, model, voice, mode, transport, and brain config. */ realtime?: TalkRealtimeConfig; + /** Optional thinking level override for the agent run behind Talk realtime consults. */ + consultThinkingLevel?: + | "off" + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh" + | "adaptive" + | "max"; + /** Optional fast mode override for the agent run behind Talk realtime consults. */ + consultFastMode?: boolean; /** BCP 47 locale id used for Talk speech recognition on device nodes. */ speechLocale?: string; /** Stop speaking when user starts talking (default: true). */ diff --git a/src/config/zod-schema.talk.test.ts b/src/config/zod-schema.talk.test.ts index 111394283e8..d207319ef40 100644 --- a/src/config/zod-schema.talk.test.ts +++ b/src/config/zod-schema.talk.test.ts @@ -6,12 +6,24 @@ describe("OpenClawSchema talk validation", () => { expect( OpenClawSchema.safeParse({ talk: { + consultThinkingLevel: "low", + consultFastMode: true, silenceTimeoutMs: 1500, }, }), ).toMatchObject({ success: true }); }); + it("rejects invalid talk.consultThinkingLevel", () => { + expect(() => + OpenClawSchema.parse({ + talk: { + consultThinkingLevel: "turbo", + }, + }), + ).toThrow(/consultThinkingLevel/i); + }); + it.each([ ["boolean", true], ["string", "1500"], diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index d39ca664192..ce528fdf4aa 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -268,6 +268,10 @@ const TalkSchema = z provider: z.string().optional(), providers: z.record(z.string(), TalkProviderEntrySchema).optional(), realtime: TalkRealtimeSchema.optional(), + consultThinkingLevel: z + .enum(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]) + .optional(), + consultFastMode: z.boolean().optional(), speechLocale: z.string().optional(), interruptOnSpeech: z.boolean().optional(), silenceTimeoutMs: z.number().int().positive().optional(), diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts index caca4028528..db2144abbbc 100644 --- a/src/gateway/protocol/schema/channels.ts +++ b/src/gateway/protocol/schema/channels.ts @@ -514,6 +514,8 @@ const TalkConfigSchema = Type.Object( providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)), realtime: Type.Optional(TalkRealtimeConfigSchema), resolved: Type.Optional(ResolvedTalkConfigSchema), + consultThinkingLevel: Type.Optional(Type.String()), + consultFastMode: Type.Optional(Type.Boolean()), speechLocale: Type.Optional(Type.String()), interruptOnSpeech: Type.Optional(Type.Boolean()), silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })), diff --git a/src/gateway/protocol/schema/logs-chat.ts b/src/gateway/protocol/schema/logs-chat.ts index 01468e0c230..df934fdcb8c 100644 --- a/src/gateway/protocol/schema/logs-chat.ts +++ b/src/gateway/protocol/schema/logs-chat.ts @@ -38,6 +38,7 @@ export const ChatSendParamsSchema = Type.Object( sessionId: Type.Optional(NonEmptyString), message: Type.String(), thinking: Type.Optional(Type.String()), + fastMode: Type.Optional(Type.Boolean()), deliver: Type.Optional(Type.Boolean()), originatingChannel: Type.Optional(Type.String()), originatingTo: Type.Optional(Type.String()), diff --git a/src/gateway/server-methods/chat.ts b/src/gateway/server-methods/chat.ts index 8ff12ec5452..cd49bd6846c 100644 --- a/src/gateway/server-methods/chat.ts +++ b/src/gateway/server-methods/chat.ts @@ -1904,6 +1904,7 @@ export const chatHandlers: GatewayRequestHandlers = { sessionId?: string; message: string; thinking?: string; + fastMode?: boolean; deliver?: boolean; originatingChannel?: string; originatingTo?: string; @@ -2503,6 +2504,8 @@ export const chatHandlers: GatewayRequestHandlers = { abortSignal: activeRunAbort.controller.signal, images: parsedImages.length > 0 ? parsedImages : undefined, imageOrder: imageOrder.length > 0 ? imageOrder : undefined, + thinkingLevelOverride: p.thinking, + fastModeOverride: p.fastMode, onAgentRunStart: (runId) => { agentRunStarted = true; if (!hasBeforeAgentRunGate) { diff --git a/src/gateway/server-methods/talk-client.ts b/src/gateway/server-methods/talk-client.ts index 4d80bbcc400..ba19d4f4ad9 100644 --- a/src/gateway/server-methods/talk-client.ts +++ b/src/gateway/server-methods/talk-client.ts @@ -1,4 +1,5 @@ import { randomUUID } from "node:crypto"; +import { normalizeTalkSection } from "../../config/talk.js"; import { normalizeOptionalLowercaseString, normalizeOptionalString, @@ -45,6 +46,7 @@ async function startRealtimeToolCallAgentConsult(params: { return { ok: false, error: errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err)) }; } const idempotencyKey = `talk-${params.callId}-${randomUUID()}`; + const normalizedTalk = normalizeTalkSection(params.request.context.getRuntimeConfig().talk); let chatResponse: { ok: true; result: unknown } | { ok: false; error: ErrorShape } | undefined; await chatHandlers["chat.send"]({ ...params.request, @@ -57,6 +59,12 @@ async function startRealtimeToolCallAgentConsult(params: { sessionKey: params.sessionKey, message, idempotencyKey, + ...(normalizedTalk?.consultThinkingLevel + ? { thinking: normalizedTalk.consultThinkingLevel } + : {}), + ...(typeof normalizedTalk?.consultFastMode === "boolean" + ? { fastMode: normalizedTalk.consultFastMode } + : {}), }, respond: (ok: boolean, result?: unknown, error?: ErrorShape) => { chatResponse = ok diff --git a/src/gateway/server-methods/talk.test.ts b/src/gateway/server-methods/talk.test.ts index 3e4059ce838..5e9b6153177 100644 --- a/src/gateway/server-methods/talk.test.ts +++ b/src/gateway/server-methods/talk.test.ts @@ -1088,6 +1088,46 @@ describe("talk.client.toolCall handler", () => { ); }); + it("passes configured consult thinking and fast-mode overrides to chat.send", async () => { + const respond = vi.fn(); + + await talkHandlers["talk.client.toolCall"]({ + req: { type: "req", id: "1", method: "talk.client.toolCall" }, + params: { + sessionKey: "main", + callId: "call-1", + name: "openclaw_agent_consult", + args: { question: "Are the basement lights off?" }, + }, + client: { connId: "conn-1" } as never, + isWebchatConnect: () => false, + respond: respond as never, + context: { + getRuntimeConfig: () => + ({ + talk: { + consultThinkingLevel: "low", + consultFastMode: true, + }, + }) as OpenClawConfig, + } as never, + }); + + expect(mocks.chatSend).toHaveBeenCalledWith( + expect.objectContaining({ + params: expect.objectContaining({ + thinking: "low", + fastMode: true, + }), + }), + ); + expect(respond).toHaveBeenCalledWith( + true, + expect.objectContaining({ runId: "run-voice-1" }), + undefined, + ); + }); + it("links relay-owned agent consult runs so relay cancellation can abort them", async () => { const respond = vi.fn(); diff --git a/src/talk/agent-consult-runtime.test.ts b/src/talk/agent-consult-runtime.test.ts index 2017115f35b..bd728851ab8 100644 --- a/src/talk/agent-consult-runtime.test.ts +++ b/src/talk/agent-consult-runtime.test.ts @@ -127,6 +127,7 @@ describe("realtime voice agent consult runtime", () => { provider: "openai", model: "gpt-5.4", thinkLevel: "high", + fastMode: true, timeoutMs: 10_000, }); @@ -149,6 +150,7 @@ describe("realtime voice agent consult runtime", () => { expect(call.provider).toBe("openai"); expect(call.model).toBe("gpt-5.4"); expect(call.thinkLevel).toBe("high"); + expect(call.fastMode).toBe(true); expect(call.timeoutMs).toBe(10_000); expect(call.prompt).toContain("Caller: Can you check this?"); expect(call.extraSystemPrompt).toContain("delegated requests"); diff --git a/src/talk/agent-consult-runtime.ts b/src/talk/agent-consult-runtime.ts index 76bac88b4c5..dbf416e0094 100644 --- a/src/talk/agent-consult-runtime.ts +++ b/src/talk/agent-consult-runtime.ts @@ -199,6 +199,7 @@ export async function consultRealtimeVoiceAgent(params: { provider?: RunEmbeddedPiAgentParams["provider"]; model?: RunEmbeddedPiAgentParams["model"]; thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"]; + fastMode?: RunEmbeddedPiAgentParams["fastMode"]; timeoutMs?: number; toolsAllow?: string[]; extraSystemPrompt?: string; @@ -264,6 +265,7 @@ export async function consultRealtimeVoiceAgent(params: { provider: params.provider, model: params.model, thinkLevel: params.thinkLevel ?? "high", + fastMode: params.fastMode, verboseLevel: "off", reasoningLevel: "off", toolResultFormat: "plain", diff --git a/ui/src/ui/chat/realtime-talk-shared.ts b/ui/src/ui/chat/realtime-talk-shared.ts index 4b46c5f303b..0bf65c08a2e 100644 --- a/ui/src/ui/chat/realtime-talk-shared.ts +++ b/ui/src/ui/chat/realtime-talk-shared.ts @@ -38,6 +38,8 @@ export type RealtimeTalkWebRtcSdpSessionResult = { model?: string; voice?: string; expiresAt?: number; + consultThinkingLevel?: string; + consultFastMode?: boolean; }; export type RealtimeTalkJsonPcmWebSocketSessionResult = { @@ -51,6 +53,8 @@ export type RealtimeTalkJsonPcmWebSocketSessionResult = { model?: string; voice?: string; expiresAt?: number; + consultThinkingLevel?: string; + consultFastMode?: boolean; }; export type RealtimeTalkGatewayRelaySessionResult = { @@ -61,6 +65,8 @@ export type RealtimeTalkGatewayRelaySessionResult = { model?: string; voice?: string; expiresAt?: number; + consultThinkingLevel?: string; + consultFastMode?: boolean; }; export type RealtimeTalkManagedRoomSessionResult = { @@ -71,6 +77,8 @@ export type RealtimeTalkManagedRoomSessionResult = { model?: string; voice?: string; expiresAt?: number; + consultThinkingLevel?: string; + consultFastMode?: boolean; }; export type RealtimeTalkSessionResult = @@ -88,6 +96,8 @@ export type RealtimeTalkTransportContext = { client: GatewayBrowserClient; sessionKey: string; callbacks: RealtimeTalkCallbacks; + consultThinkingLevel?: string; + consultFastMode?: boolean; }; export function createRealtimeTalkEventEmitter( diff --git a/ui/src/ui/chat/realtime-talk.ts b/ui/src/ui/chat/realtime-talk.ts index 841f074546a..acc92906c65 100644 --- a/ui/src/ui/chat/realtime-talk.ts +++ b/ui/src/ui/chat/realtime-talk.ts @@ -74,6 +74,8 @@ export class RealtimeTalkSession { client: this.client, sessionKey: this.sessionKey, callbacks: this.callbacks, + consultThinkingLevel: session.consultThinkingLevel, + consultFastMode: session.consultFastMode, }); await this.transport.start(); } diff --git a/ui/src/ui/realtime-talk-consult.test.ts b/ui/src/ui/realtime-talk-consult.test.ts new file mode 100644 index 00000000000..66ebafd00b9 --- /dev/null +++ b/ui/src/ui/realtime-talk-consult.test.ts @@ -0,0 +1,54 @@ +/* @vitest-environment jsdom */ + +import { describe, expect, it, vi } from "vitest"; +import { submitRealtimeTalkConsult } from "./chat/realtime-talk-shared.js"; + +describe("RealtimeTalkSession consult handoff", () => { + it("submits realtime consults through the Gateway tool-call endpoint", async () => { + let listener: ((event: { event: string; payload?: unknown }) => void) | undefined; + const request = vi.fn(async (method: string, _params: unknown) => { + if (method === "talk.client.toolCall") { + window.setTimeout(() => { + listener?.({ + event: "chat", + payload: { + runId: "run-1", + state: "final", + message: { text: "Basement lights are off." }, + }, + }); + }, 0); + return { runId: "run-1" }; + } + throw new Error(`unexpected request: ${method}`); + }); + const addEventListener = vi.fn((callback: typeof listener) => { + listener = callback; + return () => { + listener = undefined; + }; + }); + const submit = vi.fn(); + + await submitRealtimeTalkConsult({ + ctx: { + client: { request, addEventListener }, + sessionKey: "agent:main:main", + callbacks: {}, + } as never, + callId: "call-1", + args: { question: "Are the basement lights off?" }, + submit, + }); + + expect(request).toHaveBeenCalledWith( + "talk.client.toolCall", + expect.objectContaining({ + sessionKey: "agent:main:main", + name: "openclaw_agent_consult", + args: { question: "Are the basement lights off?" }, + }), + ); + expect(submit).toHaveBeenCalledWith("call-1", { result: "Basement lights are off." }); + }); +});