From 304126ad79319c2074a2453a25cb538603d460e9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 24 Apr 2026 23:45:27 +0100 Subject: [PATCH] refactor(realtime-voice): centralize consult policy helpers --- .../.generated/plugin-sdk-api-baseline.sha256 | 4 +- extensions/google-meet/src/config.ts | 17 ++--- extensions/voice-call/src/config.ts | 9 +-- .../voice-call/src/realtime-defaults.ts | 3 + extensions/voice-call/src/runtime.ts | 19 ++--- extensions/voice-call/src/test-fixtures.ts | 4 +- src/plugin-sdk/realtime-voice.ts | 7 +- src/realtime-voice/agent-consult-runtime.ts | 34 ++------- src/realtime-voice/agent-consult-tool.test.ts | 43 ++++++++++++ src/realtime-voice/agent-consult-tool.ts | 69 ++++++++++++++++++- 10 files changed, 142 insertions(+), 67 deletions(-) create mode 100644 extensions/voice-call/src/realtime-defaults.ts diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index b6e8b620918..f5fe9f6c451 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -c1501890ed21debd56a119381101e6fec9bde0a4deae94bddcb464369cf0e51a plugin-sdk-api-baseline.json -10ee97c1acf0b2a725f0f89b357146ce769ed39f46cccd3e40d0a5d00571c599 plugin-sdk-api-baseline.jsonl +b4fb88ca434fb92a38bb068cc0b1863b1f22bcde2ce21499c3077ea7e8460775 plugin-sdk-api-baseline.json +0f373c8820c0cd17b13dddf520dd286d9dec85234eb0a7f94dac07432572ede7 plugin-sdk-api-baseline.jsonl diff --git a/extensions/google-meet/src/config.ts b/extensions/google-meet/src/config.ts index 10918c64eeb..92efb86382f 100644 --- a/extensions/google-meet/src/config.ts +++ b/extensions/google-meet/src/config.ts @@ -1,4 +1,8 @@ -import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice"; +import { + REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, + resolveRealtimeVoiceAgentConsultToolPolicy, + type RealtimeVoiceAgentConsultToolPolicy, +} from "openclaw/plugin-sdk/realtime-voice"; import { normalizeOptionalLowercaseString, normalizeOptionalString, @@ -6,7 +10,7 @@ import { export type GoogleMeetTransport = "chrome" | "chrome-node" | "twilio"; export type GoogleMeetMode = "realtime" | "transcribe"; -export type GoogleMeetToolPolicy = "safe-read-only" | "owner" | "none"; +export type GoogleMeetToolPolicy = RealtimeVoiceAgentConsultToolPolicy; export type GoogleMeetConfig = { enabled: boolean; @@ -259,13 +263,6 @@ function resolveMode(value: unknown, fallback: GoogleMeetMode): GoogleMeetMode { return normalized === "realtime" || normalized === "transcribe" ? normalized : fallback; } -function resolveToolPolicy(value: unknown, fallback: GoogleMeetToolPolicy): GoogleMeetToolPolicy { - const normalized = normalizeOptionalLowercaseString(value); - return normalized === "safe-read-only" || normalized === "owner" || normalized === "none" - ? normalized - : fallback; -} - export function resolveGoogleMeetConfig(input: unknown): GoogleMeetConfig { return resolveGoogleMeetConfigWithEnv(input); } @@ -364,7 +361,7 @@ export function resolveGoogleMeetConfigWithEnv( introMessage: normalizeOptionalString(realtime.introMessage) ?? DEFAULT_GOOGLE_MEET_CONFIG.realtime.introMessage, - toolPolicy: resolveToolPolicy( + toolPolicy: resolveRealtimeVoiceAgentConsultToolPolicy( realtime.toolPolicy, DEFAULT_GOOGLE_MEET_CONFIG.realtime.toolPolicy, ), diff --git a/extensions/voice-call/src/config.ts b/extensions/voice-call/src/config.ts index fd2e07aba68..18ae073d384 100644 --- a/extensions/voice-call/src/config.ts +++ b/extensions/voice-call/src/config.ts @@ -1,10 +1,13 @@ import { - REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, + REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES, type RealtimeVoiceAgentConsultToolPolicy, } from "openclaw/plugin-sdk/realtime-voice"; import { z } from "openclaw/plugin-sdk/zod"; import { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema } from "../api.js"; import { deepMergeDefined } from "./deep-merge.js"; +import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js"; + +export { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js"; // ----------------------------------------------------------------------------- // Phone Number Validation @@ -209,11 +212,9 @@ export type VoiceCallRealtimeProvidersConfig = z.infer< typeof VoiceCallRealtimeProvidersConfigSchema >; -export const VoiceCallRealtimeToolPolicySchema = z.enum(["safe-read-only", "owner", "none"]); +export const VoiceCallRealtimeToolPolicySchema = z.enum(REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES); export type VoiceCallRealtimeToolPolicy = RealtimeVoiceAgentConsultToolPolicy; -export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`; - export const VoiceCallStreamingProvidersConfigSchema = z .record(z.string(), z.record(z.string(), z.unknown())) .default({}); diff --git a/extensions/voice-call/src/realtime-defaults.ts b/extensions/voice-call/src/realtime-defaults.ts new file mode 100644 index 00000000000..b6c6adb981c --- /dev/null +++ b/extensions/voice-call/src/realtime-defaults.ts @@ -0,0 +1,3 @@ +import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice"; + +export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`; diff --git a/extensions/voice-call/src/runtime.ts b/extensions/voice-call/src/runtime.ts index 9341d75fe6d..054b7cb8a43 100644 --- a/extensions/voice-call/src/runtime.ts +++ b/extensions/voice-call/src/runtime.ts @@ -6,7 +6,6 @@ import { resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow, type RealtimeVoiceAgentConsultTranscriptEntry, - type RealtimeVoiceTool, type ResolvedRealtimeVoiceProvider, } from "openclaw/plugin-sdk/realtime-voice"; import type { VoiceCallConfig } from "./config.js"; @@ -85,19 +84,6 @@ function loadRealtimeHandler(): Promise { return realtimeHandlerPromise; } -function resolveRealtimeTools(config: VoiceCallConfig): RealtimeVoiceTool[] { - const tools = new Map(); - for (const tool of resolveRealtimeVoiceAgentConsultTools(config.realtime.toolPolicy)) { - tools.set(tool.name, tool); - } - for (const tool of config.realtime.tools) { - if (!tools.has(tool.name)) { - tools.set(tool.name, tool); - } - } - return [...tools.values()]; -} - function resolveVoiceCallConsultSessionKey(call: { sessionKey?: string; from?: string; @@ -298,7 +284,10 @@ export async function createVoiceCallRuntime(params: { const { RealtimeCallHandler } = await loadRealtimeHandler(); const realtimeConfig = { ...config.realtime, - tools: resolveRealtimeTools(config), + tools: resolveRealtimeVoiceAgentConsultTools( + config.realtime.toolPolicy, + config.realtime.tools, + ), }; const realtimeHandler = new RealtimeCallHandler( realtimeConfig, diff --git a/extensions/voice-call/src/test-fixtures.ts b/extensions/voice-call/src/test-fixtures.ts index da4e0965a7f..4821409a44f 100644 --- a/extensions/voice-call/src/test-fixtures.ts +++ b/extensions/voice-call/src/test-fixtures.ts @@ -1,4 +1,5 @@ import type { VoiceCallConfig } from "./config.js"; +import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js"; export function createVoiceCallBaseConfig(params?: { provider?: "telnyx" | "twilio" | "plivo" | "mock"; @@ -46,8 +47,7 @@ export function createVoiceCallBaseConfig(params?: { realtime: { enabled: false, streamPath: "/voice/stream/realtime", - instructions: - "You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call openclaw_agent_consult before answering.", + instructions: DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS, toolPolicy: "safe-read-only", tools: [], providers: {}, diff --git a/src/plugin-sdk/realtime-voice.ts b/src/plugin-sdk/realtime-voice.ts index a8fc1d273d8..ec9d59c600e 100644 --- a/src/plugin-sdk/realtime-voice.ts +++ b/src/plugin-sdk/realtime-voice.ts @@ -18,17 +18,20 @@ export { buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPrompt, collectRealtimeVoiceAgentConsultVisibleText, + isRealtimeVoiceAgentConsultToolPolicy, parseRealtimeVoiceAgentConsultArgs, REALTIME_VOICE_AGENT_CONSULT_TOOL, REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, + REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES, + resolveRealtimeVoiceAgentConsultToolPolicy, + resolveRealtimeVoiceAgentConsultTools, + resolveRealtimeVoiceAgentConsultToolsAllow, type RealtimeVoiceAgentConsultArgs, type RealtimeVoiceAgentConsultToolPolicy, type RealtimeVoiceAgentConsultTranscriptEntry, } from "../realtime-voice/agent-consult-tool.js"; export { consultRealtimeVoiceAgent, - resolveRealtimeVoiceAgentConsultTools, - resolveRealtimeVoiceAgentConsultToolsAllow, type RealtimeVoiceAgentConsultResult, type RealtimeVoiceAgentConsultRuntime, } from "../realtime-voice/agent-consult-runtime.js"; diff --git a/src/realtime-voice/agent-consult-runtime.ts b/src/realtime-voice/agent-consult-runtime.ts index cbe18b02017..871794cd1c5 100644 --- a/src/realtime-voice/agent-consult-runtime.ts +++ b/src/realtime-voice/agent-consult-runtime.ts @@ -5,41 +5,15 @@ import type { RuntimeLogger, PluginRuntimeCore } from "../plugins/runtime/types- import { buildRealtimeVoiceAgentConsultPrompt, collectRealtimeVoiceAgentConsultVisibleText, - REALTIME_VOICE_AGENT_CONSULT_TOOL, - type RealtimeVoiceAgentConsultToolPolicy, type RealtimeVoiceAgentConsultTranscriptEntry, } from "./agent-consult-tool.js"; -import type { RealtimeVoiceTool } from "./provider-types.js"; export type RealtimeVoiceAgentConsultRuntime = PluginRuntimeCore["agent"]; export type RealtimeVoiceAgentConsultResult = { text: string }; - -const SAFE_READ_ONLY_TOOLS = [ - "read", - "web_search", - "web_fetch", - "x_search", - "memory_search", - "memory_get", -] as const; - -export function resolveRealtimeVoiceAgentConsultTools( - policy: RealtimeVoiceAgentConsultToolPolicy, -): RealtimeVoiceTool[] { - return policy === "none" ? [] : [REALTIME_VOICE_AGENT_CONSULT_TOOL]; -} - -export function resolveRealtimeVoiceAgentConsultToolsAllow( - policy: RealtimeVoiceAgentConsultToolPolicy, -): string[] | undefined { - if (policy === "owner") { - return undefined; - } - if (policy === "safe-read-only") { - return [...SAFE_READ_ONLY_TOOLS]; - } - return []; -} +export { + resolveRealtimeVoiceAgentConsultTools, + resolveRealtimeVoiceAgentConsultToolsAllow, +} from "./agent-consult-tool.js"; export async function consultRealtimeVoiceAgent(params: { cfg: OpenClawConfig; diff --git a/src/realtime-voice/agent-consult-tool.test.ts b/src/realtime-voice/agent-consult-tool.test.ts index 0b8c62d6d05..358ef9923f5 100644 --- a/src/realtime-voice/agent-consult-tool.test.ts +++ b/src/realtime-voice/agent-consult-tool.test.ts @@ -4,6 +4,10 @@ import { buildRealtimeVoiceAgentConsultPrompt, collectRealtimeVoiceAgentConsultVisibleText, parseRealtimeVoiceAgentConsultArgs, + REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, + resolveRealtimeVoiceAgentConsultToolPolicy, + resolveRealtimeVoiceAgentConsultTools, + resolveRealtimeVoiceAgentConsultToolsAllow, } from "./agent-consult-tool.js"; describe("realtime voice agent consult tool", () => { @@ -52,4 +56,43 @@ describe("realtime voice agent consult tool", () => { ]), ).toBe("first\n\nsecond"); }); + + it("normalizes policy values and resolves shared tool exposure", () => { + expect(resolveRealtimeVoiceAgentConsultToolPolicy(" OWNER ", "safe-read-only")).toBe("owner"); + expect(resolveRealtimeVoiceAgentConsultToolPolicy("bad", "safe-read-only")).toBe( + "safe-read-only", + ); + expect(resolveRealtimeVoiceAgentConsultTools("safe-read-only")).toEqual([ + expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }), + ]); + expect(resolveRealtimeVoiceAgentConsultTools("none")).toEqual([]); + expect(resolveRealtimeVoiceAgentConsultToolsAllow("safe-read-only")).toEqual([ + "read", + "web_search", + "web_fetch", + "x_search", + "memory_search", + "memory_get", + ]); + expect(resolveRealtimeVoiceAgentConsultToolsAllow("owner")).toBeUndefined(); + expect(resolveRealtimeVoiceAgentConsultToolsAllow("none")).toEqual([]); + }); + + it("keeps the shared consult tool ahead of custom realtime tools and dedupes by name", () => { + const customTool = { + type: "function" as const, + name: "custom_lookup", + description: "Custom lookup", + parameters: { type: "object" as const, properties: {} }, + }; + const duplicateConsultTool = { ...customTool, name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }; + + expect( + resolveRealtimeVoiceAgentConsultTools("safe-read-only", [duplicateConsultTool, customTool]), + ).toEqual([ + expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }), + customTool, + ]); + expect(resolveRealtimeVoiceAgentConsultTools("none", [customTool])).toEqual([customTool]); + }); }); diff --git a/src/realtime-voice/agent-consult-tool.ts b/src/realtime-voice/agent-consult-tool.ts index 1772fed1ea7..4c76d014bd7 100644 --- a/src/realtime-voice/agent-consult-tool.ts +++ b/src/realtime-voice/agent-consult-tool.ts @@ -1,8 +1,17 @@ -import { normalizeOptionalString } from "../shared/string-coerce.js"; +import { + normalizeOptionalLowercaseString, + normalizeOptionalString, +} from "../shared/string-coerce.js"; import type { RealtimeVoiceTool } from "./provider-types.js"; export const REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult"; -export type RealtimeVoiceAgentConsultToolPolicy = "safe-read-only" | "owner" | "none"; +export const REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES = [ + "safe-read-only", + "owner", + "none", +] as const; +export type RealtimeVoiceAgentConsultToolPolicy = + (typeof REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES)[number]; export type RealtimeVoiceAgentConsultArgs = { question: string; context?: string; @@ -38,6 +47,62 @@ export const REALTIME_VOICE_AGENT_CONSULT_TOOL: RealtimeVoiceTool = { }, }; +const SAFE_READ_ONLY_TOOLS = [ + "read", + "web_search", + "web_fetch", + "x_search", + "memory_search", + "memory_get", +] as const; + +export function isRealtimeVoiceAgentConsultToolPolicy( + value: unknown, +): value is RealtimeVoiceAgentConsultToolPolicy { + return ( + typeof value === "string" && + REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES.includes( + value as RealtimeVoiceAgentConsultToolPolicy, + ) + ); +} + +export function resolveRealtimeVoiceAgentConsultToolPolicy( + value: unknown, + fallback: RealtimeVoiceAgentConsultToolPolicy, +): RealtimeVoiceAgentConsultToolPolicy { + const normalized = normalizeOptionalLowercaseString(value); + return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback; +} + +export function resolveRealtimeVoiceAgentConsultTools( + policy: RealtimeVoiceAgentConsultToolPolicy, + customTools: RealtimeVoiceTool[] = [], +): RealtimeVoiceTool[] { + const tools = new Map(); + if (policy !== "none") { + tools.set(REALTIME_VOICE_AGENT_CONSULT_TOOL.name, REALTIME_VOICE_AGENT_CONSULT_TOOL); + } + for (const tool of customTools) { + if (!tools.has(tool.name)) { + tools.set(tool.name, tool); + } + } + return [...tools.values()]; +} + +export function resolveRealtimeVoiceAgentConsultToolsAllow( + policy: RealtimeVoiceAgentConsultToolPolicy, +): string[] | undefined { + if (policy === "owner") { + return undefined; + } + if (policy === "safe-read-only") { + return [...SAFE_READ_ONLY_TOOLS]; + } + return []; +} + export function parseRealtimeVoiceAgentConsultArgs(args: unknown): RealtimeVoiceAgentConsultArgs { const question = readConsultStringArg(args, "question"); if (!question) {