feat: add realtime consult overrides

This commit is contained in:
VACInc
2026-04-28 01:30:49 -04:00
committed by Peter Steinberger
parent 4235f6b192
commit cfb0c34ff6
30 changed files with 346 additions and 8 deletions

View File

@@ -1378,6 +1378,8 @@ Defaults for Talk mode (macOS/iOS/Android).
},
system: {},
},
consultThinkingLevel: "low",
consultFastMode: true,
speechLocale: "ru-RU",
silenceTimeoutMs: 1500,
interruptOnSpeech: true,
@@ -1405,6 +1407,8 @@ Defaults for Talk mode (macOS/iOS/Android).
- `providers.*.voiceAliases` lets Talk directives use friendly names.
- `providers.mlx.modelId` selects the Hugging Face repo used by the macOS local MLX helper. If omitted, macOS uses `mlx-community/Soprano-80M-bf16`.
- macOS MLX playback runs through the bundled `openclaw-mlx-tts` helper when present, or an executable on `PATH`; `OPENCLAW_MLX_TTS_BIN` overrides the helper path for development.
- `consultThinkingLevel` controls the thinking level for the full OpenClaw agent run behind Control UI Talk realtime `openclaw_agent_consult` calls. Leave unset to preserve normal session/model behavior.
- `consultFastMode` sets a one-shot fast-mode override for Control UI Talk realtime consults without changing the session's normal fast-mode setting.
- `speechLocale` sets the BCP 47 locale id used by iOS/macOS Talk speech recognition. Leave unset to use the device default.
- `silenceTimeoutMs` controls how long Talk mode waits after user silence before it sends the transcript. Unset keeps the platform default pause window (`700 ms on macOS and Android, 900 ms on iOS`).

View File

@@ -48,6 +48,8 @@ Moved to a dedicated page - see
- `session.*` (session lifecycle, compaction, pruning)
- `messages.*` (message delivery, TTS, markdown rendering)
- `talk.*` (Talk mode)
- `talk.consultThinkingLevel`: thinking level override for the full OpenClaw agent run behind Control UI Talk realtime consults
- `talk.consultFastMode`: one-shot fast-mode override for Control UI Talk realtime consults
- `talk.speechLocale`: optional BCP 47 locale id for Talk speech recognition on iOS/macOS
- `talk.silenceTimeoutMs`: when unset, Talk keeps the platform default pause window before sending the transcript (`700 ms on macOS and Android, 900 ms on iOS`)

View File

@@ -102,6 +102,8 @@ Defaults:
- `providers.elevenlabs.modelId`: defaults to `eleven_v3` when unset.
- `providers.mlx.modelId`: defaults to `mlx-community/Soprano-80M-bf16` when unset.
- `providers.elevenlabs.apiKey`: falls back to `ELEVENLABS_API_KEY` (or gateway shell profile if available).
- `consultThinkingLevel`: optional thinking level override for the full OpenClaw agent run behind realtime `openclaw_agent_consult` calls.
- `consultFastMode`: optional fast-mode override for realtime `openclaw_agent_consult` calls.
- `realtime.provider`: selects the active browser/server realtime voice provider. Use `openai` for WebRTC, `google` for provider WebSocket, or a bridge-only provider through Gateway relay.
- `realtime.providers.<provider>` stores provider-owned realtime config. The browser receives only ephemeral or constrained session credentials, never a standard API key.
- `realtime.providers.openai.voice`: built-in OpenAI Realtime voice id. Current `gpt-realtime-2` voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and `cedar`; `marin` and `cedar` are recommended for best quality.

View File

@@ -316,6 +316,8 @@ for tool work, current information, memory lookups, or workspace state.
instructions: "Speak briefly. Call openclaw_agent_consult before using deeper tools.",
toolPolicy: "safe-read-only",
consultPolicy: "substantive",
consultThinkingLevel: "low",
consultFastMode: true,
agentContext: { enabled: true },
providers: {
google: {

View File

@@ -106,6 +106,8 @@ Notes:
- advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
- `responseModel` is optional. When unset, voice responses use the runtime default model.
- `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
- `realtime.consultThinkingLevel` is optional. When set, it overrides the thinking level used by the model behind realtime `openclaw_agent_consult` calls.
- `realtime.consultFastMode` is optional. When set, it toggles fast mode for realtime `openclaw_agent_consult` calls.
## Stale call reaper

View File

@@ -153,6 +153,16 @@
"help": "Guides when the realtime voice model should call openclaw_agent_consult.",
"advanced": true
},
"realtime.consultThinkingLevel": {
"label": "Consult Thinking Level",
"help": "Optional override for the regular agent run behind realtime openclaw_agent_consult calls.",
"advanced": true
},
"realtime.consultFastMode": {
"label": "Consult Fast Mode",
"help": "Optional fast mode override for the regular agent run behind realtime openclaw_agent_consult calls.",
"advanced": true
},
"realtime.fastContext.enabled": {
"label": "Enable Fast Realtime Context",
"help": "Searches memory/session context before the full consult agent.",
@@ -515,6 +525,13 @@
"type": "string",
"enum": ["auto", "substantive", "always"]
},
"consultThinkingLevel": {
"type": "string",
"enum": ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]
},
"consultFastMode": {
"type": "boolean"
},
"tools": {
"type": "array",
"items": {

View File

@@ -396,6 +396,8 @@ describe("normalizeVoiceCallConfig", () => {
sources: ["memory", "sessions"],
fallbackToConsult: false,
});
expect(normalized.realtime.consultThinkingLevel).toBeUndefined();
expect(normalized.realtime.consultFastMode).toBeUndefined();
expect(normalized.realtime.agentContext).toEqual({
enabled: false,
maxChars: 6000,
@@ -468,6 +470,32 @@ describe("resolveVoiceCallConfig realtime settings", () => {
expect(resolved.realtime.provider).toBeUndefined();
});
it("preserves configured realtime consult overrides", () => {
const resolved = resolveVoiceCallConfig({
enabled: true,
provider: "mock",
realtime: {
consultThinkingLevel: "low",
consultFastMode: true,
},
});
expect(resolved.realtime.consultThinkingLevel).toBe("low");
expect(resolved.realtime.consultFastMode).toBe(true);
});
it("rejects invalid realtime consult thinking levels", () => {
expect(() =>
resolveVoiceCallConfig({
enabled: true,
provider: "mock",
realtime: {
consultThinkingLevel: "turbo",
},
} as never),
).toThrow(/Invalid option/);
});
it("leaves responseModel unset so voice responses can inherit runtime defaults", () => {
const resolved = resolveVoiceCallConfig({
enabled: true,

View File

@@ -287,6 +287,20 @@ export type VoiceCallRealtimeAgentContextConfig = z.infer<
typeof VoiceCallRealtimeAgentContextConfigSchema
>;
export const VoiceCallRealtimeConsultThinkingLevelSchema = z.enum([
"off",
"minimal",
"low",
"medium",
"high",
"xhigh",
"adaptive",
"max",
]);
export type VoiceCallRealtimeConsultThinkingLevel = z.infer<
typeof VoiceCallRealtimeConsultThinkingLevelSchema
>;
const VoiceCallStreamingProvidersConfigSchema = z
.record(z.string(), z.record(z.string(), z.unknown()))
.default({});
@@ -305,6 +319,10 @@ const VoiceCallRealtimeConfigSchema = z
toolPolicy: VoiceCallRealtimeToolPolicySchema.default("safe-read-only"),
/** Guidance for when the realtime model should call the OpenClaw agent consult tool. */
consultPolicy: VoiceCallRealtimeConsultPolicySchema.default("auto"),
/** Optional thinking level override for the regular agent behind realtime consults. */
consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional(),
/** Optional fast mode override for the regular agent behind realtime consults. */
consultFastMode: z.boolean().optional(),
/** Tool definitions exposed to the realtime provider. */
tools: z.array(RealtimeToolSchema).default([]),
/** Low-latency memory/session context for the consult tool. */
@@ -686,6 +704,10 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
defaultRealtimeStreamPathForServePath(serve.path ?? defaults.serve.path),
tools:
(config.realtime?.tools as RealtimeToolConfig[] | undefined) ?? defaults.realtime.tools,
consultThinkingLevel: VoiceCallRealtimeConsultThinkingLevelSchema.optional().parse(
config.realtime?.consultThinkingLevel ?? defaults.realtime.consultThinkingLevel,
),
consultFastMode: config.realtime?.consultFastMode ?? defaults.realtime.consultFastMode,
fastContext: realtimeFastContext,
agentContext: realtimeAgentContext,
providers: realtimeProviders,

View File

@@ -537,4 +537,62 @@ describe("createVoiceCallRuntime lifecycle", () => {
});
expect(runEmbeddedPiAgent).not.toHaveBeenCalled();
});
it("uses the configured realtime consult thinking level when set", async () => {
const config = createBaseConfig();
config.inboundPolicy = "allowlist";
config.realtime.enabled = true;
config.realtime.consultThinkingLevel = "low";
config.realtime.consultFastMode = true;
const sessionStore: Record<string, unknown> = {};
const runEmbeddedPiAgent = vi.fn(async () => ({
payloads: [{ text: "Done." }],
meta: {},
}));
const agentRuntime = {
defaults: { provider: "openai", model: "gpt-5.4" },
resolveAgentDir: vi.fn(() => "/tmp/agent"),
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
resolveAgentIdentity: vi.fn(),
resolveThinkingDefault: vi.fn(() => "high"),
resolveAgentTimeoutMs: vi.fn(() => 30_000),
ensureAgentWorkspace: vi.fn(async () => {}),
session: {
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
loadSessionStore: vi.fn(() => sessionStore),
saveSessionStore: vi.fn(async () => {}),
updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore)),
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
},
runEmbeddedPiAgent,
};
mocks.managerGetCall.mockReturnValue({
callId: "call-1",
direction: "outbound",
from: "+15550001234",
to: "+15550009999",
transcript: [],
});
await createVoiceCallRuntime({
config,
coreConfig: {} as CoreConfig,
agentRuntime: agentRuntime as never,
});
const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
| ((args: unknown, callId: string) => Promise<unknown>)
| undefined;
await expect(handler?.({ question: "Turn on the lights." }, "call-1")).resolves.toEqual({
text: "Done.",
});
expect(agentRuntime.resolveThinkingDefault).not.toHaveBeenCalled();
expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
thinkLevel: "low",
fastMode: true,
}),
);
});
});

View File

@@ -372,11 +372,13 @@ export async function createVoiceCallRuntime(params: {
voiceConfig: effectiveConfig,
agentRuntime,
});
const thinkLevel = agentRuntime.resolveThinkingDefault({
cfg,
provider: agentProvider,
model,
});
const thinkLevel =
effectiveConfig.realtime.consultThinkingLevel ??
agentRuntime.resolveThinkingDefault({
cfg,
provider: agentProvider,
model,
});
return await consultRealtimeVoiceAgent({
cfg,
agentRuntime,
@@ -395,6 +397,7 @@ export async function createVoiceCallRuntime(params: {
provider: agentProvider,
model,
thinkLevel,
fastMode: effectiveConfig.realtime.consultFastMode,
timeoutMs: effectiveConfig.responseTimeoutMs,
spawnedBy: requesterSessionKey,
contextMode: requesterSessionKey ? "fork" : undefined,

View File

@@ -53,6 +53,10 @@ export type GetReplyOptions = {
suppressTyping?: boolean;
/** Resolved heartbeat model override (provider/model string from merged per-agent config). */
heartbeatModelOverride?: string;
/** One-shot thinking level override for this run; does not persist to the session. */
thinkingLevelOverride?: string;
/** One-shot fast-mode override for this run; does not persist to the session. */
fastModeOverride?: boolean;
/** Controls bootstrap workspace context injection (default: full). */
bootstrapContextMode?: "full" | "lightweight";
/** If true, suppress tool error warning payloads for this run. */

View File

@@ -14,7 +14,13 @@ import {
} from "../../shared/string-coerce.js";
import { shouldHandleTextCommands } from "../commands-text-routing.js";
import type { MsgContext, TemplateContext } from "../templating.js";
import type { ElevatedLevel, ReasoningLevel, ThinkLevel, VerboseLevel } from "../thinking.js";
import {
normalizeThinkLevel,
type ElevatedLevel,
type ReasoningLevel,
type ThinkLevel,
type VerboseLevel,
} from "../thinking.js";
import type { GetReplyOptions, ReplyPayload } from "../types.js";
import { resolveBlockStreamingChunking } from "./block-streaming.js";
import { buildCommandContext } from "./commands-context.js";
@@ -417,8 +423,11 @@ export async function resolveReplyDirectives(params: {
});
const defaultActivation = defaultGroupActivation(requireMention);
const resolvedThinkLevel =
directives.thinkLevel ?? (targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
normalizeThinkLevel(opts?.thinkingLevelOverride) ??
directives.thinkLevel ??
(targetSessionEntry?.thinkingLevel as ThinkLevel | undefined);
const resolvedFastMode =
opts?.fastModeOverride ??
directives.fastMode ??
resolveFastModeState({
cfg,

View File

@@ -324,8 +324,10 @@ const TARGET_KEYS = [
"discovery.mdns.mode",
"gateway.controlUi.embedSandbox",
"talk",
"talk.consultFastMode",
"talk.interruptOnSpeech",
"talk.silenceTimeoutMs",
"talk.consultThinkingLevel",
"meta",
"env",
"env.shellEnv",

View File

@@ -167,6 +167,10 @@ export const FIELD_HELP: Record<string, string> = {
"Talk byte/session transport: webrtc, provider-websocket, gateway-relay, or managed-room.",
"talk.realtime.brain":
"Talk reasoning strategy: agent-consult for Gateway-mediated agent help, direct-tools for owner-only tool calls, or none.",
"talk.consultThinkingLevel":
"Use this to override the thinking level for the regular agent run behind Talk realtime consults.",
"talk.consultFastMode":
"Use this to set true or false fast mode for the regular agent run behind Talk realtime consults.",
"talk.speechLocale":
'BCP 47 locale id for Talk speech recognition on device nodes, for example "ru-RU". Leave unset to use each device default.',
"talk.interruptOnSpeech":

View File

@@ -841,6 +841,8 @@ export const FIELD_LABELS: Record<string, string> = {
"talk.speechLocale": "Talk Speech Locale",
"talk.interruptOnSpeech": "Talk Interrupt on Speech",
"talk.silenceTimeoutMs": "Talk Silence Timeout (ms)",
"talk.consultThinkingLevel": "Talk Consult Thinking Level",
"talk.consultFastMode": "Talk Consult Fast Mode",
messages: "Messages",
"messages.messagePrefix": "Inbound Message Prefix",
"messages.visibleReplies": "Visible Replies",

View File

@@ -10,6 +10,8 @@ describe("talk normalization", () => {
modelId: "eleven_v3",
outputFormat: "pcm_44100",
apiKey: "secret-key", // pragma: allowlist secret
consultThinkingLevel: " low ",
consultFastMode: true,
speechLocale: " ru-RU ",
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
@@ -17,6 +19,8 @@ describe("talk normalization", () => {
expect(normalized).toEqual({
speechLocale: "ru-RU",
consultThinkingLevel: "low",
consultFastMode: true,
interruptOnSpeech: false,
silenceTimeoutMs: 1500,
});

View File

@@ -1,4 +1,5 @@
import { normalizeOptionalString } from "../shared/string-coerce.js";
import { normalizeThinkLevel } from "../auto-reply/thinking.js";
import { normalizeFastMode, normalizeOptionalString } from "../shared/string-coerce.js";
import { isRecord } from "../utils.js";
import type {
ResolvedTalkConfig,
@@ -157,6 +158,20 @@ export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig
if (typeof source.interruptOnSpeech === "boolean") {
normalized.interruptOnSpeech = source.interruptOnSpeech;
}
const consultThinkingLevel = normalizeThinkLevel(
normalizeOptionalString(source.consultThinkingLevel),
);
if (consultThinkingLevel) {
normalized.consultThinkingLevel = consultThinkingLevel;
}
const rawConsultFastMode = source.consultFastMode;
const consultFastMode =
typeof rawConsultFastMode === "boolean" || typeof rawConsultFastMode === "string"
? normalizeFastMode(rawConsultFastMode)
: undefined;
if (consultFastMode !== undefined) {
normalized.consultFastMode = consultFastMode;
}
const silenceTimeoutMs = normalizeSilenceTimeoutMs(source.silenceTimeoutMs);
if (silenceTimeoutMs !== undefined) {
normalized.silenceTimeoutMs = silenceTimeoutMs;
@@ -225,6 +240,12 @@ export function buildTalkConfigResponse(value: unknown): TalkConfigResponse | un
if (typeof normalized?.silenceTimeoutMs === "number") {
payload.silenceTimeoutMs = normalized.silenceTimeoutMs;
}
if (typeof normalized?.consultThinkingLevel === "string") {
payload.consultThinkingLevel = normalized.consultThinkingLevel;
}
if (typeof normalized?.consultFastMode === "boolean") {
payload.consultFastMode = normalized.consultFastMode;
}
if (typeof normalized?.speechLocale === "string") {
payload.speechLocale = normalized.speechLocale;
}

View File

@@ -76,6 +76,18 @@ export type TalkConfig = {
providers?: Record<string, TalkProviderConfig>;
/** Realtime Talk provider, model, voice, mode, transport, and brain config. */
realtime?: TalkRealtimeConfig;
/** Optional thinking level override for the agent run behind Talk realtime consults. */
consultThinkingLevel?:
| "off"
| "minimal"
| "low"
| "medium"
| "high"
| "xhigh"
| "adaptive"
| "max";
/** Optional fast mode override for the agent run behind Talk realtime consults. */
consultFastMode?: boolean;
/** BCP 47 locale id used for Talk speech recognition on device nodes. */
speechLocale?: string;
/** Stop speaking when user starts talking (default: true). */

View File

@@ -6,12 +6,24 @@ describe("OpenClawSchema talk validation", () => {
expect(
OpenClawSchema.safeParse({
talk: {
consultThinkingLevel: "low",
consultFastMode: true,
silenceTimeoutMs: 1500,
},
}),
).toMatchObject({ success: true });
});
it("rejects invalid talk.consultThinkingLevel", () => {
expect(() =>
OpenClawSchema.parse({
talk: {
consultThinkingLevel: "turbo",
},
}),
).toThrow(/consultThinkingLevel/i);
});
it.each([
["boolean", true],
["string", "1500"],

View File

@@ -268,6 +268,10 @@ const TalkSchema = z
provider: z.string().optional(),
providers: z.record(z.string(), TalkProviderEntrySchema).optional(),
realtime: TalkRealtimeSchema.optional(),
consultThinkingLevel: z
.enum(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"])
.optional(),
consultFastMode: z.boolean().optional(),
speechLocale: z.string().optional(),
interruptOnSpeech: z.boolean().optional(),
silenceTimeoutMs: z.number().int().positive().optional(),

View File

@@ -514,6 +514,8 @@ const TalkConfigSchema = Type.Object(
providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
realtime: Type.Optional(TalkRealtimeConfigSchema),
resolved: Type.Optional(ResolvedTalkConfigSchema),
consultThinkingLevel: Type.Optional(Type.String()),
consultFastMode: Type.Optional(Type.Boolean()),
speechLocale: Type.Optional(Type.String()),
interruptOnSpeech: Type.Optional(Type.Boolean()),
silenceTimeoutMs: Type.Optional(Type.Integer({ minimum: 1 })),

View File

@@ -38,6 +38,7 @@ export const ChatSendParamsSchema = Type.Object(
sessionId: Type.Optional(NonEmptyString),
message: Type.String(),
thinking: Type.Optional(Type.String()),
fastMode: Type.Optional(Type.Boolean()),
deliver: Type.Optional(Type.Boolean()),
originatingChannel: Type.Optional(Type.String()),
originatingTo: Type.Optional(Type.String()),

View File

@@ -1904,6 +1904,7 @@ export const chatHandlers: GatewayRequestHandlers = {
sessionId?: string;
message: string;
thinking?: string;
fastMode?: boolean;
deliver?: boolean;
originatingChannel?: string;
originatingTo?: string;
@@ -2503,6 +2504,8 @@ export const chatHandlers: GatewayRequestHandlers = {
abortSignal: activeRunAbort.controller.signal,
images: parsedImages.length > 0 ? parsedImages : undefined,
imageOrder: imageOrder.length > 0 ? imageOrder : undefined,
thinkingLevelOverride: p.thinking,
fastModeOverride: p.fastMode,
onAgentRunStart: (runId) => {
agentRunStarted = true;
if (!hasBeforeAgentRunGate) {

View File

@@ -1,4 +1,5 @@
import { randomUUID } from "node:crypto";
import { normalizeTalkSection } from "../../config/talk.js";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
@@ -45,6 +46,7 @@ async function startRealtimeToolCallAgentConsult(params: {
return { ok: false, error: errorShape(ErrorCodes.INVALID_REQUEST, formatForLog(err)) };
}
const idempotencyKey = `talk-${params.callId}-${randomUUID()}`;
const normalizedTalk = normalizeTalkSection(params.request.context.getRuntimeConfig().talk);
let chatResponse: { ok: true; result: unknown } | { ok: false; error: ErrorShape } | undefined;
await chatHandlers["chat.send"]({
...params.request,
@@ -57,6 +59,12 @@ async function startRealtimeToolCallAgentConsult(params: {
sessionKey: params.sessionKey,
message,
idempotencyKey,
...(normalizedTalk?.consultThinkingLevel
? { thinking: normalizedTalk.consultThinkingLevel }
: {}),
...(typeof normalizedTalk?.consultFastMode === "boolean"
? { fastMode: normalizedTalk.consultFastMode }
: {}),
},
respond: (ok: boolean, result?: unknown, error?: ErrorShape) => {
chatResponse = ok

View File

@@ -1088,6 +1088,46 @@ describe("talk.client.toolCall handler", () => {
);
});
it("passes configured consult thinking and fast-mode overrides to chat.send", async () => {
const respond = vi.fn();
await talkHandlers["talk.client.toolCall"]({
req: { type: "req", id: "1", method: "talk.client.toolCall" },
params: {
sessionKey: "main",
callId: "call-1",
name: "openclaw_agent_consult",
args: { question: "Are the basement lights off?" },
},
client: { connId: "conn-1" } as never,
isWebchatConnect: () => false,
respond: respond as never,
context: {
getRuntimeConfig: () =>
({
talk: {
consultThinkingLevel: "low",
consultFastMode: true,
},
}) as OpenClawConfig,
} as never,
});
expect(mocks.chatSend).toHaveBeenCalledWith(
expect.objectContaining({
params: expect.objectContaining({
thinking: "low",
fastMode: true,
}),
}),
);
expect(respond).toHaveBeenCalledWith(
true,
expect.objectContaining({ runId: "run-voice-1" }),
undefined,
);
});
it("links relay-owned agent consult runs so relay cancellation can abort them", async () => {
const respond = vi.fn();

View File

@@ -127,6 +127,7 @@ describe("realtime voice agent consult runtime", () => {
provider: "openai",
model: "gpt-5.4",
thinkLevel: "high",
fastMode: true,
timeoutMs: 10_000,
});
@@ -149,6 +150,7 @@ describe("realtime voice agent consult runtime", () => {
expect(call.provider).toBe("openai");
expect(call.model).toBe("gpt-5.4");
expect(call.thinkLevel).toBe("high");
expect(call.fastMode).toBe(true);
expect(call.timeoutMs).toBe(10_000);
expect(call.prompt).toContain("Caller: Can you check this?");
expect(call.extraSystemPrompt).toContain("delegated requests");

View File

@@ -199,6 +199,7 @@ export async function consultRealtimeVoiceAgent(params: {
provider?: RunEmbeddedPiAgentParams["provider"];
model?: RunEmbeddedPiAgentParams["model"];
thinkLevel?: RunEmbeddedPiAgentParams["thinkLevel"];
fastMode?: RunEmbeddedPiAgentParams["fastMode"];
timeoutMs?: number;
toolsAllow?: string[];
extraSystemPrompt?: string;
@@ -264,6 +265,7 @@ export async function consultRealtimeVoiceAgent(params: {
provider: params.provider,
model: params.model,
thinkLevel: params.thinkLevel ?? "high",
fastMode: params.fastMode,
verboseLevel: "off",
reasoningLevel: "off",
toolResultFormat: "plain",

View File

@@ -38,6 +38,8 @@ export type RealtimeTalkWebRtcSdpSessionResult = {
model?: string;
voice?: string;
expiresAt?: number;
consultThinkingLevel?: string;
consultFastMode?: boolean;
};
export type RealtimeTalkJsonPcmWebSocketSessionResult = {
@@ -51,6 +53,8 @@ export type RealtimeTalkJsonPcmWebSocketSessionResult = {
model?: string;
voice?: string;
expiresAt?: number;
consultThinkingLevel?: string;
consultFastMode?: boolean;
};
export type RealtimeTalkGatewayRelaySessionResult = {
@@ -61,6 +65,8 @@ export type RealtimeTalkGatewayRelaySessionResult = {
model?: string;
voice?: string;
expiresAt?: number;
consultThinkingLevel?: string;
consultFastMode?: boolean;
};
export type RealtimeTalkManagedRoomSessionResult = {
@@ -71,6 +77,8 @@ export type RealtimeTalkManagedRoomSessionResult = {
model?: string;
voice?: string;
expiresAt?: number;
consultThinkingLevel?: string;
consultFastMode?: boolean;
};
export type RealtimeTalkSessionResult =
@@ -88,6 +96,8 @@ export type RealtimeTalkTransportContext = {
client: GatewayBrowserClient;
sessionKey: string;
callbacks: RealtimeTalkCallbacks;
consultThinkingLevel?: string;
consultFastMode?: boolean;
};
export function createRealtimeTalkEventEmitter(

View File

@@ -74,6 +74,8 @@ export class RealtimeTalkSession {
client: this.client,
sessionKey: this.sessionKey,
callbacks: this.callbacks,
consultThinkingLevel: session.consultThinkingLevel,
consultFastMode: session.consultFastMode,
});
await this.transport.start();
}

View File

@@ -0,0 +1,54 @@
/* @vitest-environment jsdom */
import { describe, expect, it, vi } from "vitest";
import { submitRealtimeTalkConsult } from "./chat/realtime-talk-shared.js";
describe("RealtimeTalkSession consult handoff", () => {
it("submits realtime consults through the Gateway tool-call endpoint", async () => {
let listener: ((event: { event: string; payload?: unknown }) => void) | undefined;
const request = vi.fn(async (method: string, _params: unknown) => {
if (method === "talk.client.toolCall") {
window.setTimeout(() => {
listener?.({
event: "chat",
payload: {
runId: "run-1",
state: "final",
message: { text: "Basement lights are off." },
},
});
}, 0);
return { runId: "run-1" };
}
throw new Error(`unexpected request: ${method}`);
});
const addEventListener = vi.fn((callback: typeof listener) => {
listener = callback;
return () => {
listener = undefined;
};
});
const submit = vi.fn();
await submitRealtimeTalkConsult({
ctx: {
client: { request, addEventListener },
sessionKey: "agent:main:main",
callbacks: {},
} as never,
callId: "call-1",
args: { question: "Are the basement lights off?" },
submit,
});
expect(request).toHaveBeenCalledWith(
"talk.client.toolCall",
expect.objectContaining({
sessionKey: "agent:main:main",
name: "openclaw_agent_consult",
args: { question: "Are the basement lights off?" },
}),
);
expect(submit).toHaveBeenCalledWith("call-1", { result: "Basement lights are off." });
});
});