refactor(realtime-voice): centralize consult policy helpers

This commit is contained in:
Peter Steinberger
2026-04-24 23:45:27 +01:00
parent a7696b496a
commit 304126ad79
10 changed files with 142 additions and 67 deletions

View File

@@ -1,2 +1,2 @@
c1501890ed21debd56a119381101e6fec9bde0a4deae94bddcb464369cf0e51a plugin-sdk-api-baseline.json
10ee97c1acf0b2a725f0f89b357146ce769ed39f46cccd3e40d0a5d00571c599 plugin-sdk-api-baseline.jsonl
b4fb88ca434fb92a38bb068cc0b1863b1f22bcde2ce21499c3077ea7e8460775 plugin-sdk-api-baseline.json
0f373c8820c0cd17b13dddf520dd286d9dec85234eb0a7f94dac07432572ede7 plugin-sdk-api-baseline.jsonl

View File

@@ -1,4 +1,8 @@
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
import {
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
resolveRealtimeVoiceAgentConsultToolPolicy,
type RealtimeVoiceAgentConsultToolPolicy,
} from "openclaw/plugin-sdk/realtime-voice";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
@@ -6,7 +10,7 @@ import {
export type GoogleMeetTransport = "chrome" | "chrome-node" | "twilio";
export type GoogleMeetMode = "realtime" | "transcribe";
export type GoogleMeetToolPolicy = "safe-read-only" | "owner" | "none";
export type GoogleMeetToolPolicy = RealtimeVoiceAgentConsultToolPolicy;
export type GoogleMeetConfig = {
enabled: boolean;
@@ -259,13 +263,6 @@ function resolveMode(value: unknown, fallback: GoogleMeetMode): GoogleMeetMode {
return normalized === "realtime" || normalized === "transcribe" ? normalized : fallback;
}
function resolveToolPolicy(value: unknown, fallback: GoogleMeetToolPolicy): GoogleMeetToolPolicy {
const normalized = normalizeOptionalLowercaseString(value);
return normalized === "safe-read-only" || normalized === "owner" || normalized === "none"
? normalized
: fallback;
}
export function resolveGoogleMeetConfig(input: unknown): GoogleMeetConfig {
return resolveGoogleMeetConfigWithEnv(input);
}
@@ -364,7 +361,7 @@ export function resolveGoogleMeetConfigWithEnv(
introMessage:
normalizeOptionalString(realtime.introMessage) ??
DEFAULT_GOOGLE_MEET_CONFIG.realtime.introMessage,
toolPolicy: resolveToolPolicy(
toolPolicy: resolveRealtimeVoiceAgentConsultToolPolicy(
realtime.toolPolicy,
DEFAULT_GOOGLE_MEET_CONFIG.realtime.toolPolicy,
),

View File

@@ -1,10 +1,13 @@
import {
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES,
type RealtimeVoiceAgentConsultToolPolicy,
} from "openclaw/plugin-sdk/realtime-voice";
import { z } from "openclaw/plugin-sdk/zod";
import { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema } from "../api.js";
import { deepMergeDefined } from "./deep-merge.js";
import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
export { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
// -----------------------------------------------------------------------------
// Phone Number Validation
@@ -209,11 +212,9 @@ export type VoiceCallRealtimeProvidersConfig = z.infer<
typeof VoiceCallRealtimeProvidersConfigSchema
>;
export const VoiceCallRealtimeToolPolicySchema = z.enum(["safe-read-only", "owner", "none"]);
export const VoiceCallRealtimeToolPolicySchema = z.enum(REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES);
export type VoiceCallRealtimeToolPolicy = RealtimeVoiceAgentConsultToolPolicy;
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;
export const VoiceCallStreamingProvidersConfigSchema = z
.record(z.string(), z.record(z.string(), z.unknown()))
.default({});

View File

@@ -0,0 +1,3 @@
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME } from "openclaw/plugin-sdk/realtime-voice";
export const DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS = `You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} before answering.`;

View File

@@ -6,7 +6,6 @@ import {
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
type RealtimeVoiceAgentConsultTranscriptEntry,
type RealtimeVoiceTool,
type ResolvedRealtimeVoiceProvider,
} from "openclaw/plugin-sdk/realtime-voice";
import type { VoiceCallConfig } from "./config.js";
@@ -85,19 +84,6 @@ function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
return realtimeHandlerPromise;
}
function resolveRealtimeTools(config: VoiceCallConfig): RealtimeVoiceTool[] {
const tools = new Map<string, RealtimeVoiceTool>();
for (const tool of resolveRealtimeVoiceAgentConsultTools(config.realtime.toolPolicy)) {
tools.set(tool.name, tool);
}
for (const tool of config.realtime.tools) {
if (!tools.has(tool.name)) {
tools.set(tool.name, tool);
}
}
return [...tools.values()];
}
function resolveVoiceCallConsultSessionKey(call: {
sessionKey?: string;
from?: string;
@@ -298,7 +284,10 @@ export async function createVoiceCallRuntime(params: {
const { RealtimeCallHandler } = await loadRealtimeHandler();
const realtimeConfig = {
...config.realtime,
tools: resolveRealtimeTools(config),
tools: resolveRealtimeVoiceAgentConsultTools(
config.realtime.toolPolicy,
config.realtime.tools,
),
};
const realtimeHandler = new RealtimeCallHandler(
realtimeConfig,

View File

@@ -1,4 +1,5 @@
import type { VoiceCallConfig } from "./config.js";
import { DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS } from "./realtime-defaults.js";
export function createVoiceCallBaseConfig(params?: {
provider?: "telnyx" | "twilio" | "plivo" | "mock";
@@ -46,8 +47,7 @@ export function createVoiceCallBaseConfig(params?: {
realtime: {
enabled: false,
streamPath: "/voice/stream/realtime",
instructions:
"You are OpenClaw's phone-call realtime voice interface. Keep spoken replies brief and natural. When a question needs deeper reasoning, current information, or tools, call openclaw_agent_consult before answering.",
instructions: DEFAULT_VOICE_CALL_REALTIME_INSTRUCTIONS,
toolPolicy: "safe-read-only",
tools: [],
providers: {},

View File

@@ -18,17 +18,20 @@ export {
buildRealtimeVoiceAgentConsultChatMessage,
buildRealtimeVoiceAgentConsultPrompt,
collectRealtimeVoiceAgentConsultVisibleText,
isRealtimeVoiceAgentConsultToolPolicy,
parseRealtimeVoiceAgentConsultArgs,
REALTIME_VOICE_AGENT_CONSULT_TOOL,
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES,
resolveRealtimeVoiceAgentConsultToolPolicy,
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
type RealtimeVoiceAgentConsultArgs,
type RealtimeVoiceAgentConsultToolPolicy,
type RealtimeVoiceAgentConsultTranscriptEntry,
} from "../realtime-voice/agent-consult-tool.js";
export {
consultRealtimeVoiceAgent,
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
type RealtimeVoiceAgentConsultResult,
type RealtimeVoiceAgentConsultRuntime,
} from "../realtime-voice/agent-consult-runtime.js";

View File

@@ -5,41 +5,15 @@ import type { RuntimeLogger, PluginRuntimeCore } from "../plugins/runtime/types-
import {
buildRealtimeVoiceAgentConsultPrompt,
collectRealtimeVoiceAgentConsultVisibleText,
REALTIME_VOICE_AGENT_CONSULT_TOOL,
type RealtimeVoiceAgentConsultToolPolicy,
type RealtimeVoiceAgentConsultTranscriptEntry,
} from "./agent-consult-tool.js";
import type { RealtimeVoiceTool } from "./provider-types.js";
export type RealtimeVoiceAgentConsultRuntime = PluginRuntimeCore["agent"];
export type RealtimeVoiceAgentConsultResult = { text: string };
const SAFE_READ_ONLY_TOOLS = [
"read",
"web_search",
"web_fetch",
"x_search",
"memory_search",
"memory_get",
] as const;
export function resolveRealtimeVoiceAgentConsultTools(
policy: RealtimeVoiceAgentConsultToolPolicy,
): RealtimeVoiceTool[] {
return policy === "none" ? [] : [REALTIME_VOICE_AGENT_CONSULT_TOOL];
}
export function resolveRealtimeVoiceAgentConsultToolsAllow(
policy: RealtimeVoiceAgentConsultToolPolicy,
): string[] | undefined {
if (policy === "owner") {
return undefined;
}
if (policy === "safe-read-only") {
return [...SAFE_READ_ONLY_TOOLS];
}
return [];
}
export {
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
} from "./agent-consult-tool.js";
export async function consultRealtimeVoiceAgent(params: {
cfg: OpenClawConfig;

View File

@@ -4,6 +4,10 @@ import {
buildRealtimeVoiceAgentConsultPrompt,
collectRealtimeVoiceAgentConsultVisibleText,
parseRealtimeVoiceAgentConsultArgs,
REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
resolveRealtimeVoiceAgentConsultToolPolicy,
resolveRealtimeVoiceAgentConsultTools,
resolveRealtimeVoiceAgentConsultToolsAllow,
} from "./agent-consult-tool.js";
describe("realtime voice agent consult tool", () => {
@@ -52,4 +56,43 @@ describe("realtime voice agent consult tool", () => {
]),
).toBe("first\n\nsecond");
});
it("normalizes policy values and resolves shared tool exposure", () => {
expect(resolveRealtimeVoiceAgentConsultToolPolicy(" OWNER ", "safe-read-only")).toBe("owner");
expect(resolveRealtimeVoiceAgentConsultToolPolicy("bad", "safe-read-only")).toBe(
"safe-read-only",
);
expect(resolveRealtimeVoiceAgentConsultTools("safe-read-only")).toEqual([
expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }),
]);
expect(resolveRealtimeVoiceAgentConsultTools("none")).toEqual([]);
expect(resolveRealtimeVoiceAgentConsultToolsAllow("safe-read-only")).toEqual([
"read",
"web_search",
"web_fetch",
"x_search",
"memory_search",
"memory_get",
]);
expect(resolveRealtimeVoiceAgentConsultToolsAllow("owner")).toBeUndefined();
expect(resolveRealtimeVoiceAgentConsultToolsAllow("none")).toEqual([]);
});
it("keeps the shared consult tool ahead of custom realtime tools and dedupes by name", () => {
const customTool = {
type: "function" as const,
name: "custom_lookup",
description: "Custom lookup",
parameters: { type: "object" as const, properties: {} },
};
const duplicateConsultTool = { ...customTool, name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME };
expect(
resolveRealtimeVoiceAgentConsultTools("safe-read-only", [duplicateConsultTool, customTool]),
).toEqual([
expect.objectContaining({ name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME }),
customTool,
]);
expect(resolveRealtimeVoiceAgentConsultTools("none", [customTool])).toEqual([customTool]);
});
});

View File

@@ -1,8 +1,17 @@
import { normalizeOptionalString } from "../shared/string-coerce.js";
import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
} from "../shared/string-coerce.js";
import type { RealtimeVoiceTool } from "./provider-types.js";
export const REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
export type RealtimeVoiceAgentConsultToolPolicy = "safe-read-only" | "owner" | "none";
export const REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES = [
"safe-read-only",
"owner",
"none",
] as const;
export type RealtimeVoiceAgentConsultToolPolicy =
(typeof REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES)[number];
export type RealtimeVoiceAgentConsultArgs = {
question: string;
context?: string;
@@ -38,6 +47,62 @@ export const REALTIME_VOICE_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
},
};
const SAFE_READ_ONLY_TOOLS = [
"read",
"web_search",
"web_fetch",
"x_search",
"memory_search",
"memory_get",
] as const;
export function isRealtimeVoiceAgentConsultToolPolicy(
value: unknown,
): value is RealtimeVoiceAgentConsultToolPolicy {
return (
typeof value === "string" &&
REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES.includes(
value as RealtimeVoiceAgentConsultToolPolicy,
)
);
}
export function resolveRealtimeVoiceAgentConsultToolPolicy(
value: unknown,
fallback: RealtimeVoiceAgentConsultToolPolicy,
): RealtimeVoiceAgentConsultToolPolicy {
const normalized = normalizeOptionalLowercaseString(value);
return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback;
}
export function resolveRealtimeVoiceAgentConsultTools(
policy: RealtimeVoiceAgentConsultToolPolicy,
customTools: RealtimeVoiceTool[] = [],
): RealtimeVoiceTool[] {
const tools = new Map<string, RealtimeVoiceTool>();
if (policy !== "none") {
tools.set(REALTIME_VOICE_AGENT_CONSULT_TOOL.name, REALTIME_VOICE_AGENT_CONSULT_TOOL);
}
for (const tool of customTools) {
if (!tools.has(tool.name)) {
tools.set(tool.name, tool);
}
}
return [...tools.values()];
}
export function resolveRealtimeVoiceAgentConsultToolsAllow(
policy: RealtimeVoiceAgentConsultToolPolicy,
): string[] | undefined {
if (policy === "owner") {
return undefined;
}
if (policy === "safe-read-only") {
return [...SAFE_READ_ONLY_TOOLS];
}
return [];
}
export function parseRealtimeVoiceAgentConsultArgs(args: unknown): RealtimeVoiceAgentConsultArgs {
const question = readConsultStringArg(args, "question");
if (!question) {