mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-12 18:10:43 +00:00
feat: add realtime consult overrides
This commit is contained in:
committed by
Peter Steinberger
parent
4235f6b192
commit
cfb0c34ff6
@@ -1378,6 +1378,8 @@ Defaults for Talk mode (macOS/iOS/Android).
|
||||
},
|
||||
system: {},
|
||||
},
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
speechLocale: "ru-RU",
|
||||
silenceTimeoutMs: 1500,
|
||||
interruptOnSpeech: true,
|
||||
@@ -1405,6 +1407,8 @@ Defaults for Talk mode (macOS/iOS/Android).
|
||||
- `providers.*.voiceAliases` lets Talk directives use friendly names.
|
||||
- `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`.
|
||||
- macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development.
|
||||
- `consultThinkingLevel` controls the thinking level for the full OpenClaw agent run behind Control UI Talk realtime `openclaw_agent_consult` calls. Leave unset to preserve normal session/model behavior.
|
||||
- `consultFastMode` sets a one-shot fast-mode override for Control UI Talk realtime consults without changing the session's normal fast-mode setting.
|
||||
- `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default.
|
||||
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).
|
||||
|
||||
|
||||
@@ -48,6 +48,8 @@ Moved to a dedicated page - see
|
||||
- `session.*` (session lifecycle, compaction, pruning)
|
||||
- `messages.*` (message delivery, TTS, markdown rendering)
|
||||
- `talk.*` (Talk mode)
|
||||
- `talk.consultThinkingLevel`: thinking level override for the full OpenClaw agent run behind Control UI Talk realtime consults
|
||||
- `talk.consultFastMode`: one-shot fast-mode override for Control UI Talk realtime consults
|
||||
- `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS
|
||||
- `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)
|
||||
|
||||
|
||||
@@ -102,6 +102,8 @@ Defaults:
|
||||
- `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset.
|
||||
- `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset.
|
||||
- `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available).
|
||||
- `consultThinkingLevel`: optional thinking level override for the full OpenClaw agent run behind realtime `openclaw_agent_consult` calls.
|
||||
- `consultFastMode`: optional fast-mode override for realtime `openclaw_agent_consult` calls.
|
||||
- `realtime.provider`: selects the active browser/server realtime voice provider. Use `openai` for WebRTC, `google` for provider WebSocket, or a bridge-only provider through Gateway relay.
|
||||
- `realtime.providers.<provider>` stores provider-owned realtime config. The browser receives only ephemeral or constrained session credentials, never a standard API key.
|
||||
- `realtime.providers.openai.voice`: built-in OpenAI Realtime voice id. Current `gpt-realtime-2` voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`; `marin` and `cedar` are recommended for best quality.
|
||||
|
||||
@@ -316,6 +316,8 @@ for tool work, current information, memory lookups, or workspace state.
|
||||
instructions: "Speak briefly. Call openclaw_agent_consult before using deeper tools.",
|
||||
toolPolicy: "safe-read-only",
|
||||
consultPolicy: "substantive",
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
agentContext: { enabled: true },
|
||||
providers: {
|
||||
google: {
|
||||
|
||||
@@ -106,6 +106,8 @@ Notes:
|
||||
- advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
|
||||
- `responseModel` is optional. When unset, voice responses use the runtime default model.
|
||||
- `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
|
||||
- `realtime.consultThinkingLevel` is optional. When set, it overrides the thinking level used by the model behind realtime `openclaw_agent_consult` calls.
|
||||
- `realtime.consultFastMode` is optional. When set, it toggles fast mode for realtime `openclaw_agent_consult` calls.
|
||||
|
||||
## Stale call reaper
|
||||
|
||||
|
||||
@@ -153,6 +153,16 @@
|
||||
"help": "Guides when the realtime voice model should call openclaw_agent_consult.",
|
||||
"advanced": true
|
||||
},
|
||||
"realtime.consultThinkingLevel": {
|
||||
"label": "Consult Thinking Level",
|
||||
"help": "Optional override for the regular agent run behind realtime openclaw_agent_consult calls.",
|
||||
"advanced": true
|
||||
},
|
||||
"realtime.consultFastMode": {
|
||||
"label": "Consult Fast Mode",
|
||||
"help": "Optional fast mode override for the regular agent run behind realtime openclaw_agent_consult calls.",
|
||||
"advanced": true
|
||||
},
|
||||
"realtime.fastContext.enabled": {
|
||||
"label": "Enable Fast Realtime Context",
|
||||
"help": "Searches memory/session context before the full consult agent.",
|
||||
@@ -515,6 +525,13 @@
|
||||
"type": "string",
|
||||
"enum": ["auto", "substantive", "always"]
|
||||
},
|
||||
"consultThinkingLevel": {
|
||||
"type": "string",
|
||||
"enum": ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]
|
||||
},
|
||||
"consultFastMode": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
||||
@@ -396,6 +396,8 @@ describe("normalizeVoiceCallConfig", () => {
|
||||
sources: ["memory", "sessions"],
|
||||
fallbackToConsult: false,
|
||||
});
|
||||
expect(normalized.realtime.consultThinkingLevel).toBeUndefined();
|
||||
expect(normalized.realtime.consultFastMode).toBeUndefined();
|
||||
expect(normalized.realtime.agentContext).toEqual({
|
||||
enabled: false,
|
||||
maxChars: 6000,
|
||||
@@ -468,6 +470,32 @@ describe("resolveVoiceCallConfig realtime settings", () => {
|
||||
expect(resolved.realtime.provider).toBeUndefined();
|
||||
});
|
||||
|
||||
it("preserves configured realtime consult overrides", () => {
|
||||
const resolved = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
realtime: {
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(resolved.realtime.consultThinkingLevel).toBe("low");
|
||||
expect(resolved.realtime.consultFastMode).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects invalid realtime consult thinking levels", () => {
|
||||
expect(() =>
|
||||
resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
provider: "mock",
|
||||
realtime: {
|
||||
consultThinkingLevel: "turbo",
|
||||
},
|
||||
} as never),
|
||||
).toThrow(/Invalid option/);
|
||||
});
|
||||
|
||||
it("leaves responseModel unset so voice responses can inherit runtime defaults", () => {
|
||||
const resolved = resolveVoiceCallConfig({
|
||||
enabled: true,
|
||||
|
||||
@@ -287,6 +287,20 @@ export type VoiceCallRealtimeAgentContextConfig = z.infer<
|
||||
typeof VoiceCallRealtimeAgentContextConfigSchema
|
||||
>;
|
||||
|
||||
export const VoiceCallRealtimeConsultThinkingLevelSchema = z.enum([
|
||||
"off",
|
||||
"minimal",
|
||||
"low",
|
||||
"medium",
|
||||
"high",
|
||||
"xhigh",
|
||||
"adaptive",
|
||||
"max",
|
||||
]);
|
||||
export type VoiceCallRealtimeConsultThinkingLevel = z.infer<
|
||||
typeof VoiceCallRealtimeConsultThinkingLevelSchema
|
||||
>;
|
||||
|
||||
const VoiceCallStreamingProvidersConfigSchema = z
|
||||
.record(z.string(), z.record(z.string(), z.unknown()))
|
||||
.default({});
|
||||
@@ -305,6 +319,10 @@ const VoiceCallRealtimeConfigSchema = z
|
||||
toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
|
||||
/** Guidance for when the realtime model should call the OpenClaw agent consult tool. */
|
||||
consultPolicy: VoiceCallRealtimeConsultPolicySchema.default("auto"),
|
||||
/** Optional thinking level override for the regular agent behind realtime consults. */
|
||||
consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional(),
|
||||
/** Optional fast mode override for the regular agent behind realtime consults. */
|
||||
consultFastMode: z.boolean().optional(),
|
||||
/** Tool definitions exposed to the realtime provider. */
|
||||
tools: z.array(RealtimeToolSchema).default([]),
|
||||
/** Low-latency memory/session context for the consult tool. */
|
||||
@@ -686,6 +704,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
|
||||
defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
|
||||
tools:
|
||||
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
|
||||
consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional().parse(
|
||||
config.realtime?.consultThinkingLevel ?? defaults.realtime.consultThinkingLevel,
|
||||
),
|
||||
consultFastMode: config.realtime?.consultFastMode ?? defaults.realtime.consultFastMode,
|
||||
fastContext: realtimeFastContext,
|
||||
agentContext: realtimeAgentContext,
|
||||
providers: realtimeProviders,
|
||||
|
||||
@@ -537,4 +537,62 @@ describe("createVoiceCallRuntime lifecycle", () => {
|
||||
});
|
||||
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses the configured realtime consult thinking level when set", async () => {
|
||||
const config = createBaseConfig();
|
||||
config.inboundPolicy = "allowlist";
|
||||
config.realtime.enabled = true;
|
||||
config.realtime.consultThinkingLevel = "low";
|
||||
config.realtime.consultFastMode = true;
|
||||
const sessionStore: Record<string, unknown> = {};
|
||||
const runEmbeddedPiAgent = vi.fn(async () => ({
|
||||
payloads: [{ text: "Done." }],
|
||||
meta: {},
|
||||
}));
|
||||
const agentRuntime = {
|
||||
defaults: { provider: "openai", model: "gpt-5.4" },
|
||||
resolveAgentDir: vi.fn(() => "/tmp/agent"),
|
||||
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
|
||||
resolveAgentIdentity: vi.fn(),
|
||||
resolveThinkingDefault: vi.fn(() => "high"),
|
||||
resolveAgentTimeoutMs: vi.fn(() => 30_000),
|
||||
ensureAgentWorkspace: vi.fn(async () => {}),
|
||||
session: {
|
||||
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||
loadSessionStore: vi.fn(() => sessionStore),
|
||||
saveSessionStore: vi.fn(async () => {}),
|
||||
updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore)),
|
||||
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
|
||||
},
|
||||
runEmbeddedPiAgent,
|
||||
};
|
||||
mocks.managerGetCall.mockReturnValue({
|
||||
callId: "call-1",
|
||||
direction: "outbound",
|
||||
from: "+15550001234",
|
||||
to: "+15550009999",
|
||||
transcript: [],
|
||||
});
|
||||
|
||||
await createVoiceCallRuntime({
|
||||
config,
|
||||
coreConfig: {} as CoreConfig,
|
||||
agentRuntime: agentRuntime as never,
|
||||
});
|
||||
|
||||
const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
|
||||
| ((args: unknown, callId: string) => Promise<unknown>)
|
||||
| undefined;
|
||||
await expect(handler?.({ question: "Turn on the lights." }, "call-1")).resolves.toEqual({
|
||||
text: "Done.",
|
||||
});
|
||||
|
||||
expect(agentRuntime.resolveThinkingDefault).not.toHaveBeenCalled();
|
||||
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
thinkLevel: "low",
|
||||
fastMode: true,
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -372,11 +372,13 @@ export async function createVoiceCallRuntime(params: {
|
||||
voiceConfig: effectiveConfig,
|
||||
agentRuntime,
|
||||
});
|
||||
const thinkLevel = agentRuntime.resolveThinkingDefault({
|
||||
cfg,
|
||||
provider: agentProvider,
|
||||
model,
|
||||
});
|
||||
const thinkLevel =
|
||||
effectiveConfig.realtime.consultThinkingLevel ??
|
||||
agentRuntime.resolveThinkingDefault({
|
||||
cfg,
|
||||
provider: agentProvider,
|
||||
model,
|
||||
});
|
||||
return await consultRealtimeVoiceAgent({
|
||||
cfg,
|
||||
agentRuntime,
|
||||
@@ -395,6 +397,7 @@ export async function createVoiceCallRuntime(params: {
|
||||
provider: agentProvider,
|
||||
model,
|
||||
thinkLevel,
|
||||
fastMode: effectiveConfig.realtime.consultFastMode,
|
||||
timeoutMs: effectiveConfig.responseTimeoutMs,
|
||||
spawnedBy: requesterSessionKey,
|
||||
contextMode: requesterSessionKey ? "fork" : undefined,
|
||||
|
||||
@@ -53,6 +53,10 @@ export type GetReplyOptions = {
|
||||
suppressTyping?: boolean;
|
||||
/** Resolved heartbeat model override (provider/model string from merged per-agent config). */
|
||||
heartbeatModelOverride?: string;
|
||||
/** One-shot thinking level override for this run; does not persist to the session. */
|
||||
thinkingLevelOverride?: string;
|
||||
/** One-shot fast-mode override for this run; does not persist to the session. */
|
||||
fastModeOverride?: boolean;
|
||||
/** Controls bootstrap workspace context injection (default: full). */
|
||||
bootstrapContextMode?: "full" | "lightweight";
|
||||
/** If true, suppress tool error warning payloads for this run. */
|
||||
|
||||
@@ -14,7 +14,13 @@ import {
|
||||
} from "../../shared/string-coerce.js";
|
||||
import { shouldHandleTextCommands } from "../commands-text-routing.js";
|
||||
import type { MsgContext, TemplateContext } from "../templating.js";
|
||||
import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
|
||||
import {
|
||||
normalizeThinkLevel,
|
||||
type ElevatedLevel,
|
||||
type ReasoningLevel,
|
||||
type ThinkLevel,
|
||||
type VerboseLevel,
|
||||
} from "../thinking.js";
|
||||
import type { GetReplyOptions, ReplyPayload } from "../types.js";
|
||||
import { resolveBlockStreamingChunking } from "./block-streaming.js";
|
||||
import { buildCommandContext } from "./commands-context.js";
|
||||
@@ -417,8 +423,11 @@ export async function resolveReplyDirectives(params: {
|
||||
});
|
||||
const defaultActivation = defaultGroupActivation(requireMention);
|
||||
const resolvedThinkLevel =
|
||||
directives.thinkLevel ?? (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
|
||||
normalizeThinkLevel(opts?.thinkingLevelOverride) ??
|
||||
directives.thinkLevel ??
|
||||
(targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
|
||||
const resolvedFastMode =
|
||||
opts?.fastModeOverride ??
|
||||
directives.fastMode ??
|
||||
resolveFastModeState({
|
||||
cfg,
|
||||
|
||||
@@ -324,8 +324,10 @@ const TARGET_KEYS = [
|
||||
"discovery.mdns.mode",
|
||||
"gateway.controlUi.embedSandbox",
|
||||
"talk",
|
||||
"talk.consultFastMode",
|
||||
"talk.interruptOnSpeech",
|
||||
"talk.silenceTimeoutMs",
|
||||
"talk.consultThinkingLevel",
|
||||
"meta",
|
||||
"env",
|
||||
"env.shellEnv",
|
||||
|
||||
@@ -167,6 +167,10 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Talk byte/session transport: webrtc, provider-websocket, gateway-relay, or managed-room.",
|
||||
"talk.realtime.brain":
|
||||
"Talk reasoning strategy: agent-consult for Gateway-mediated agent help, direct-tools for owner-only tool calls, or none.",
|
||||
"talk.consultThinkingLevel":
|
||||
"Use this to override the thinking level for the regular agent run behind Talk realtime consults.",
|
||||
"talk.consultFastMode":
|
||||
"Use this to set true or false fast mode for the regular agent run behind Talk realtime consults.",
|
||||
"talk.speechLocale":
|
||||
'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
|
||||
"talk.interruptOnSpeech":
|
||||
|
||||
@@ -841,6 +841,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"talk.speechLocale": "Talk Speech Locale",
|
||||
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
|
||||
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
|
||||
"talk.consultThinkingLevel": "Talk Consult Thinking Level",
|
||||
"talk.consultFastMode": "Talk Consult Fast Mode",
|
||||
messages: "Messages",
|
||||
"messages.messagePrefix": "Inbound Message Prefix",
|
||||
"messages.visibleReplies": "Visible Replies",
|
||||
|
||||
@@ -10,6 +10,8 @@ describe("talk normalization", () => {
|
||||
modelId: "eleven_v3",
|
||||
outputFormat: "pcm_44100",
|
||||
apiKey: "secret-key", // pragma: allowlist secret
|
||||
consultThinkingLevel: " low ",
|
||||
consultFastMode: true,
|
||||
speechLocale: " ru-RU ",
|
||||
interruptOnSpeech: false,
|
||||
silenceTimeoutMs: 1500,
|
||||
@@ -17,6 +19,8 @@ describe("talk normalization", () => {
|
||||
|
||||
expect(normalized).toEqual({
|
||||
speechLocale: "ru-RU",
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
interruptOnSpeech: false,
|
||||
silenceTimeoutMs: 1500,
|
||||
});
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import { normalizeThinkLevel } from "../auto-reply/thinking.js";
|
||||
import { normalizeFastMode, normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import { isRecord } from "../utils.js";
|
||||
import type {
|
||||
ResolvedTalkConfig,
|
||||
@@ -157,6 +158,20 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
|
||||
if (typeof source.interruptOnSpeech === "boolean") {
|
||||
normalized.interruptOnSpeech = source.interruptOnSpeech;
|
||||
}
|
||||
const consultThinkingLevel = normalizeThinkLevel(
|
||||
normalizeOptionalString(source.consultThinkingLevel),
|
||||
);
|
||||
if (consultThinkingLevel) {
|
||||
normalized.consultThinkingLevel = consultThinkingLevel;
|
||||
}
|
||||
const rawConsultFastMode = source.consultFastMode;
|
||||
const consultFastMode =
|
||||
typeof rawConsultFastMode === "boolean" || typeof rawConsultFastMode === "string"
|
||||
? normalizeFastMode(rawConsultFastMode)
|
||||
: undefined;
|
||||
if (consultFastMode !== undefined) {
|
||||
normalized.consultFastMode = consultFastMode;
|
||||
}
|
||||
const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
|
||||
if (silenceTimeoutMs !== undefined) {
|
||||
normalized.silenceTimeoutMs = silenceTimeoutMs;
|
||||
@@ -225,6 +240,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un
|
||||
if (typeof normalized?.silenceTimeoutMs === "number") {
|
||||
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
|
||||
}
|
||||
if (typeof normalized?.consultThinkingLevel === "string") {
|
||||
payload.consultThinkingLevel = normalized.consultThinkingLevel;
|
||||
}
|
||||
if (typeof normalized?.consultFastMode === "boolean") {
|
||||
payload.consultFastMode = normalized.consultFastMode;
|
||||
}
|
||||
if (typeof normalized?.speechLocale === "string") {
|
||||
payload.speechLocale = normalized.speechLocale;
|
||||
}
|
||||
|
||||
@@ -76,6 +76,18 @@ export type TalkConfig = {
|
||||
providers?: Record<string, TalkProviderConfig>;
|
||||
/** Realtime Talk provider, model, voice, mode, transport, and brain config. */
|
||||
realtime?: TalkRealtimeConfig;
|
||||
/** Optional thinking level override for the agent run behind Talk realtime consults. */
|
||||
consultThinkingLevel?:
|
||||
| "off"
|
||||
| "minimal"
|
||||
| "low"
|
||||
| "medium"
|
||||
| "high"
|
||||
| "xhigh"
|
||||
| "adaptive"
|
||||
| "max";
|
||||
/** Optional fast mode override for the agent run behind Talk realtime consults. */
|
||||
consultFastMode?: boolean;
|
||||
/** BCP 47 locale id used for Talk speech recognition on device nodes. */
|
||||
speechLocale?: string;
|
||||
/** Stop speaking when user starts talking (default: true). */
|
||||
|
||||
@@ -6,12 +6,24 @@ describe("OpenClawSchema talk validation", () => {
|
||||
expect(
|
||||
OpenClawSchema.safeParse({
|
||||
talk: {
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
silenceTimeoutMs: 1500,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({ success: true });
|
||||
});
|
||||
|
||||
it("rejects invalid talk.consultThinkingLevel", () => {
|
||||
expect(() =>
|
||||
OpenClawSchema.parse({
|
||||
talk: {
|
||||
consultThinkingLevel: "turbo",
|
||||
},
|
||||
}),
|
||||
).toThrow(/consultThinkingLevel/i);
|
||||
});
|
||||
|
||||
it.each([
|
||||
["boolean", true],
|
||||
["string", "1500"],
|
||||
|
||||
@@ -268,6 +268,10 @@ const TalkSchema = z
|
||||
provider: z.string().optional(),
|
||||
providers: z.record(z.string(), TalkProviderEntrySchema).optional(),
|
||||
realtime: TalkRealtimeSchema.optional(),
|
||||
consultThinkingLevel: z
|
||||
.enum(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"])
|
||||
.optional(),
|
||||
consultFastMode: z.boolean().optional(),
|
||||
speechLocale: z.string().optional(),
|
||||
interruptOnSpeech: z.boolean().optional(),
|
||||
silenceTimeoutMs: z.number().int().positive().optional(),
|
||||
|
||||
@@ -514,6 +514,8 @@ const TalkConfigSchema = Type.Object(
|
||||
providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
|
||||
realtime: Type.Optional(TalkRealtimeConfigSchema),
|
||||
resolved: Type.Optional(ResolvedTalkConfigSchema),
|
||||
consultThinkingLevel: Type.Optional(Type.String()),
|
||||
consultFastMode: Type.Optional(Type.Boolean()),
|
||||
speechLocale: Type.Optional(Type.String()),
|
||||
interruptOnSpeech: Type.Optional(Type.Boolean()),
|
||||
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),
|
||||
|
||||
@@ -38,6 +38,7 @@ export const ChatSendParamsSchema = Type.Object(
|
||||
sessionId: Type.Optional(NonEmptyString),
|
||||
message: Type.String(),
|
||||
thinking: Type.Optional(Type.String()),
|
||||
fastMode: Type.Optional(Type.Boolean()),
|
||||
deliver: Type.Optional(Type.Boolean()),
|
||||
originatingChannel: Type.Optional(Type.String()),
|
||||
originatingTo: Type.Optional(Type.String()),
|
||||
|
||||
@@ -1904,6 +1904,7 @@ export const chatHandlers: GatewayRequestHandlers = {
|
||||
sessionId?: string;
|
||||
message: string;
|
||||
thinking?: string;
|
||||
fastMode?: boolean;
|
||||
deliver?: boolean;
|
||||
originatingChannel?: string;
|
||||
originatingTo?: string;
|
||||
@@ -2503,6 +2504,8 @@ export const chatHandlers: GatewayRequestHandlers = {
|
||||
abortSignal: activeRunAbort.controller.signal,
|
||||
images: parsedImages.length > 0 ? parsedImages : undefined,
|
||||
imageOrder: imageOrder.length > 0 ? imageOrder : undefined,
|
||||
thinkingLevelOverride: p.thinking,
|
||||
fastModeOverride: p.fastMode,
|
||||
onAgentRunStart: (runId) => {
|
||||
agentRunStarted = true;
|
||||
if (!hasBeforeAgentRunGate) {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { normalizeTalkSection } from "../../config/talk.js";
|
||||
import {
|
||||
normalizeOptionalLowercaseString,
|
||||
normalizeOptionalString,
|
||||
@@ -45,6 +46,7 @@ async function startRealtimeToolCallAgentConsult(params: {
|
||||
return { ok: false, error: errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err)) };
|
||||
}
|
||||
const idempotencyKey = `talk-${params.callId}-${randomUUID()}`;
|
||||
const normalizedTalk = normalizeTalkSection(params.request.context.getRuntimeConfig().talk);
|
||||
let chatResponse: { ok: true; result: unknown } | { ok: false; error: ErrorShape } | undefined;
|
||||
await chatHandlers["chat.send"]({
|
||||
...params.request,
|
||||
@@ -57,6 +59,12 @@ async function startRealtimeToolCallAgentConsult(params: {
|
||||
sessionKey: params.sessionKey,
|
||||
message,
|
||||
idempotencyKey,
|
||||
...(normalizedTalk?.consultThinkingLevel
|
||||
? { thinking: normalizedTalk.consultThinkingLevel }
|
||||
: {}),
|
||||
...(typeof normalizedTalk?.consultFastMode === "boolean"
|
||||
? { fastMode: normalizedTalk.consultFastMode }
|
||||
: {}),
|
||||
},
|
||||
respond: (ok: boolean, result?: unknown, error?: ErrorShape) => {
|
||||
chatResponse = ok
|
||||
|
||||
@@ -1088,6 +1088,46 @@ describe("talk.client.toolCall handler", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes configured consult thinking and fast-mode overrides to chat.send", async () => {
|
||||
const respond = vi.fn();
|
||||
|
||||
await talkHandlers["talk.client.toolCall"]({
|
||||
req: { type: "req", id: "1", method: "talk.client.toolCall" },
|
||||
params: {
|
||||
sessionKey: "main",
|
||||
callId: "call-1",
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "Are the basement lights off?" },
|
||||
},
|
||||
client: { connId: "conn-1" } as never,
|
||||
isWebchatConnect: () => false,
|
||||
respond: respond as never,
|
||||
context: {
|
||||
getRuntimeConfig: () =>
|
||||
({
|
||||
talk: {
|
||||
consultThinkingLevel: "low",
|
||||
consultFastMode: true,
|
||||
},
|
||||
}) as OpenClawConfig,
|
||||
} as never,
|
||||
});
|
||||
|
||||
expect(mocks.chatSend).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
params: expect.objectContaining({
|
||||
thinking: "low",
|
||||
fastMode: true,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(respond).toHaveBeenCalledWith(
|
||||
true,
|
||||
expect.objectContaining({ runId: "run-voice-1" }),
|
||||
undefined,
|
||||
);
|
||||
});
|
||||
|
||||
it("links relay-owned agent consult runs so relay cancellation can abort them", async () => {
|
||||
const respond = vi.fn();
|
||||
|
||||
|
||||
@@ -127,6 +127,7 @@ describe("realtime voice agent consult runtime", () => {
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
thinkLevel: "high",
|
||||
fastMode: true,
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
|
||||
@@ -149,6 +150,7 @@ describe("realtime voice agent consult runtime", () => {
|
||||
expect(call.provider).toBe("openai");
|
||||
expect(call.model).toBe("gpt-5.4");
|
||||
expect(call.thinkLevel).toBe("high");
|
||||
expect(call.fastMode).toBe(true);
|
||||
expect(call.timeoutMs).toBe(10_000);
|
||||
expect(call.prompt).toContain("Caller: Can you check this?");
|
||||
expect(call.extraSystemPrompt).toContain("delegated requests");
|
||||
|
||||
@@ -199,6 +199,7 @@ export async function consultRealtimeVoiceAgent(params: {
|
||||
provider?: RunEmbeddedPiAgentParams["provider"];
|
||||
model?: RunEmbeddedPiAgentParams["model"];
|
||||
thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"];
|
||||
fastMode?: RunEmbeddedPiAgentParams["fastMode"];
|
||||
timeoutMs?: number;
|
||||
toolsAllow?: string[];
|
||||
extraSystemPrompt?: string;
|
||||
@@ -264,6 +265,7 @@ export async function consultRealtimeVoiceAgent(params: {
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
thinkLevel: params.thinkLevel ?? "high",
|
||||
fastMode: params.fastMode,
|
||||
verboseLevel: "off",
|
||||
reasoningLevel: "off",
|
||||
toolResultFormat: "plain",
|
||||
|
||||
@@ -38,6 +38,8 @@ export type RealtimeTalkWebRtcSdpSessionResult = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
expiresAt?: number;
|
||||
consultThinkingLevel?: string;
|
||||
consultFastMode?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeTalkJsonPcmWebSocketSessionResult = {
|
||||
@@ -51,6 +53,8 @@ export type RealtimeTalkJsonPcmWebSocketSessionResult = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
expiresAt?: number;
|
||||
consultThinkingLevel?: string;
|
||||
consultFastMode?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeTalkGatewayRelaySessionResult = {
|
||||
@@ -61,6 +65,8 @@ export type RealtimeTalkGatewayRelaySessionResult = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
expiresAt?: number;
|
||||
consultThinkingLevel?: string;
|
||||
consultFastMode?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeTalkManagedRoomSessionResult = {
|
||||
@@ -71,6 +77,8 @@ export type RealtimeTalkManagedRoomSessionResult = {
|
||||
model?: string;
|
||||
voice?: string;
|
||||
expiresAt?: number;
|
||||
consultThinkingLevel?: string;
|
||||
consultFastMode?: boolean;
|
||||
};
|
||||
|
||||
export type RealtimeTalkSessionResult =
|
||||
@@ -88,6 +96,8 @@ export type RealtimeTalkTransportContext = {
|
||||
client: GatewayBrowserClient;
|
||||
sessionKey: string;
|
||||
callbacks: RealtimeTalkCallbacks;
|
||||
consultThinkingLevel?: string;
|
||||
consultFastMode?: boolean;
|
||||
};
|
||||
|
||||
export function createRealtimeTalkEventEmitter(
|
||||
|
||||
@@ -74,6 +74,8 @@ export class RealtimeTalkSession {
|
||||
client: this.client,
|
||||
sessionKey: this.sessionKey,
|
||||
callbacks: this.callbacks,
|
||||
consultThinkingLevel: session.consultThinkingLevel,
|
||||
consultFastMode: session.consultFastMode,
|
||||
});
|
||||
await this.transport.start();
|
||||
}
|
||||
|
||||
54
ui/src/ui/realtime-talk-consult.test.ts
Normal file
54
ui/src/ui/realtime-talk-consult.test.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
/* @vitest-environment jsdom */
|
||||
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { submitRealtimeTalkConsult } from "./chat/realtime-talk-shared.js";
|
||||
|
||||
describe("RealtimeTalkSession consult handoff", () => {
|
||||
it("submits realtime consults through the Gateway tool-call endpoint", async () => {
|
||||
let listener: ((event: { event: string; payload?: unknown }) => void) | undefined;
|
||||
const request = vi.fn(async (method: string, _params: unknown) => {
|
||||
if (method === "talk.client.toolCall") {
|
||||
window.setTimeout(() => {
|
||||
listener?.({
|
||||
event: "chat",
|
||||
payload: {
|
||||
runId: "run-1",
|
||||
state: "final",
|
||||
message: { text: "Basement lights are off." },
|
||||
},
|
||||
});
|
||||
}, 0);
|
||||
return { runId: "run-1" };
|
||||
}
|
||||
throw new Error(`unexpected request: ${method}`);
|
||||
});
|
||||
const addEventListener = vi.fn((callback: typeof listener) => {
|
||||
listener = callback;
|
||||
return () => {
|
||||
listener = undefined;
|
||||
};
|
||||
});
|
||||
const submit = vi.fn();
|
||||
|
||||
await submitRealtimeTalkConsult({
|
||||
ctx: {
|
||||
client: { request, addEventListener },
|
||||
sessionKey: "agent:main:main",
|
||||
callbacks: {},
|
||||
} as never,
|
||||
callId: "call-1",
|
||||
args: { question: "Are the basement lights off?" },
|
||||
submit,
|
||||
});
|
||||
|
||||
expect(request).toHaveBeenCalledWith(
|
||||
"talk.client.toolCall",
|
||||
expect.objectContaining({
|
||||
sessionKey: "agent:main:main",
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "Are the basement lights off?" },
|
||||
}),
|
||||
);
|
||||
expect(submit).toHaveBeenCalledWith("call-1", { result: "Basement lights are off." });
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user