Files
openclaw/src/realtime-voice/agent-consult-tool.ts
scoootscooob 79dd65e208 feat(voice-call): improve realtime Meet voice agent
* feat(voice-call): inject agent context into realtime voice

* fix(voice-call): stabilize realtime meet audio

* fix(voice-call): delegate realtime consults to agent

* Improve realtime Meet voice consult routing

* Pin voice consult delivery to call session

* Move voice changelog entries to changes

* fix(voice-call): isolate final realtime transcripts

* test(voice-call): trim redundant realtime coverage
2026-05-05 12:56:31 -07:00

197 lines
6.2 KiB
TypeScript

import {
normalizeOptionalLowercaseString,
normalizeOptionalString,
} from "../shared/string-coerce.js";
import type { RealtimeVoiceTool } from "./provider-types.js";
export const REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
export const REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES = [
"safe-read-only",
"owner",
"none",
] as const;
export type RealtimeVoiceAgentConsultToolPolicy =
(typeof REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES)[number];
export type RealtimeVoiceAgentConsultArgs = {
question: string;
context?: string;
responseStyle?: string;
};
export type RealtimeVoiceAgentConsultTranscriptEntry = {
role: "user" | "assistant";
text: string;
};
export const REALTIME_VOICE_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
type: "function",
name: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
description:
"Delegate the caller's request to the configured OpenClaw agent for normal tool-backed work, context, memory, or reasoning before speaking.",
parameters: {
type: "object",
properties: {
question: {
type: "string",
description: "The concrete question or task the user asked.",
},
context: {
type: "string",
description: "Optional relevant context or transcript summary.",
},
responseStyle: {
type: "string",
description: "Optional style hint for the spoken answer.",
},
},
required: ["question"],
},
};
export function buildRealtimeVoiceAgentConsultWorkingResponse(
audienceLabel = "person",
): Record<string, unknown> {
return {
status: "working",
tool: REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
message: `Tell the ${audienceLabel} briefly that you are checking, then wait for the final OpenClaw result before answering with the actual result.`,
};
}
const SAFE_READ_ONLY_TOOLS = [
"read",
"web_search",
"web_fetch",
"x_search",
"memory_search",
"memory_get",
] as const;
export function isRealtimeVoiceAgentConsultToolPolicy(
value: unknown,
): value is RealtimeVoiceAgentConsultToolPolicy {
return (
typeof value === "string" &&
REALTIME_VOICE_AGENT_CONSULT_TOOL_POLICIES.includes(
value as RealtimeVoiceAgentConsultToolPolicy,
)
);
}
export function resolveRealtimeVoiceAgentConsultToolPolicy(
value: unknown,
fallback: RealtimeVoiceAgentConsultToolPolicy,
): RealtimeVoiceAgentConsultToolPolicy {
const normalized = normalizeOptionalLowercaseString(value);
return isRealtimeVoiceAgentConsultToolPolicy(normalized) ? normalized : fallback;
}
export function resolveRealtimeVoiceAgentConsultTools(
policy: RealtimeVoiceAgentConsultToolPolicy,
customTools: RealtimeVoiceTool[] = [],
): RealtimeVoiceTool[] {
const tools = new Map<string, RealtimeVoiceTool>();
if (policy !== "none") {
tools.set(REALTIME_VOICE_AGENT_CONSULT_TOOL.name, REALTIME_VOICE_AGENT_CONSULT_TOOL);
}
for (const tool of customTools) {
if (!tools.has(tool.name)) {
tools.set(tool.name, tool);
}
}
return [...tools.values()];
}
export function resolveRealtimeVoiceAgentConsultToolsAllow(
policy: RealtimeVoiceAgentConsultToolPolicy,
): string[] | undefined {
if (policy === "owner") {
return undefined;
}
if (policy === "safe-read-only") {
return [...SAFE_READ_ONLY_TOOLS];
}
return [];
}
export function parseRealtimeVoiceAgentConsultArgs(args: unknown): RealtimeVoiceAgentConsultArgs {
const question =
readConsultStringArg(args, "question") ??
readConsultStringArg(args, "prompt") ??
readConsultStringArg(args, "query") ??
readConsultStringArg(args, "task");
if (!question) {
throw new Error("question required");
}
return {
question,
context: readConsultStringArg(args, "context"),
responseStyle: readConsultStringArg(args, "responseStyle"),
};
}
export function buildRealtimeVoiceAgentConsultChatMessage(args: unknown): string {
const parsed = parseRealtimeVoiceAgentConsultArgs(args);
return [
parsed.question,
parsed.context ? `Context:\n${parsed.context}` : undefined,
parsed.responseStyle ? `Spoken style:\n${parsed.responseStyle}` : undefined,
]
.filter(Boolean)
.join("\n\n");
}
export function buildRealtimeVoiceAgentConsultPrompt(params: {
args: unknown;
transcript: RealtimeVoiceAgentConsultTranscriptEntry[];
surface: string;
userLabel: string;
assistantLabel?: string;
questionSourceLabel?: string;
}): string {
const parsed = parseRealtimeVoiceAgentConsultArgs(params.args);
const assistantLabel = params.assistantLabel ?? "Agent";
const questionSourceLabel = params.questionSourceLabel ?? params.userLabel.toLowerCase();
const transcript = params.transcript
.slice(-12)
.map(
(entry) => `${entry.role === "assistant" ? assistantLabel : params.userLabel}: ${entry.text}`,
)
.join("\n");
return [
`Live voice request from the ${questionSourceLabel} during ${params.surface}.`,
"Act as the configured OpenClaw agent on behalf of this user. Use available tools when the request asks you to do work.",
"When finished, return only the concise result the realtime voice agent should speak back.",
"Do not include markdown, tool logs, or private reasoning. Include citations only when the spoken answer needs them.",
parsed.responseStyle ? `Spoken style: ${parsed.responseStyle}` : undefined,
transcript ? `Recent voice transcript for context:\n${transcript}` : undefined,
parsed.context ? `Additional realtime context:\n${parsed.context}` : undefined,
`User request:\n${parsed.question}`,
]
.filter(Boolean)
.join("\n\n");
}
export function collectRealtimeVoiceAgentConsultVisibleText(
payloads: Array<{ text?: unknown; isError?: boolean; isReasoning?: boolean }>,
): string | null {
const chunks: string[] = [];
for (const payload of payloads) {
if (payload.isError || payload.isReasoning) {
continue;
}
const text = normalizeOptionalString(payload.text);
if (text) {
chunks.push(text);
}
}
return chunks.length > 0 ? chunks.join("\n\n").trim() : null;
}
function readConsultStringArg(args: unknown, key: string): string | undefined {
if (!args || typeof args !== "object" || Array.isArray(args)) {
return undefined;
}
return normalizeOptionalString((args as Record<string, unknown>)[key]);
}