feat(plugins): give google meet realtime agent consult

This commit is contained in:
Peter Steinberger
2026-04-24 02:55:38 +01:00
parent 3361593442
commit e314190403
9 changed files with 380 additions and 46 deletions

View File

@@ -22,6 +22,7 @@ import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/t
const voiceCallMocks = vi.hoisted(() => ({
joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })),
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
}));
const fetchGuardMocks = vi.hoisted(() => ({
@@ -45,6 +46,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
vi.mock("./src/voice-call-gateway.js", () => ({
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
}));
const noopLogger = {
@@ -168,6 +170,24 @@ describe("google-meet plugin", () => {
});
});
it("uses a provider-safe flat tool parameter schema", () => {
const { tools } = setup();
const tool = tools[0] as { parameters: unknown };
expect(JSON.stringify(tool.parameters)).not.toContain("anyOf");
expect(tool.parameters).toMatchObject({
type: "object",
properties: {
action: {
type: "string",
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
},
transport: { type: "string", enum: ["chrome", "twilio"] },
mode: { type: "string", enum: ["realtime", "transcribe"] },
},
});
});
it("normalizes Meet URLs, codes, and space names for the Meet API", () => {
expect(normalizeGoogleMeetSpaceName("spaces/abc-defg-hij")).toBe("spaces/abc-defg-hij");
expect(normalizeGoogleMeetSpaceName("abc-defg-hij")).toBe("spaces/abc-defg-hij");
@@ -323,6 +343,26 @@ describe("google-meet plugin", () => {
});
});
it("hangs up delegated Twilio calls on leave", async () => {
const { tools } = setup({ defaultTransport: "twilio" });
const tool = tools[0] as {
execute: (id: string, params: unknown) => Promise<{ details: { session: { id: string } } }>;
};
const joined = await tool.execute("id", {
action: "join",
url: "https://meet.google.com/abc-defg-hij",
dialInNumber: "+15551234567",
pin: "123456",
});
await tool.execute("id", { action: "leave", sessionId: joined.details.session.id });
expect(voiceCallMocks.endMeetVoiceCallGatewayCall).toHaveBeenCalledWith({
config: expect.objectContaining({ defaultTransport: "twilio" }),
callId: "call-1",
});
});
it("reports setup status through the tool", async () => {
const { tools } = setup({
chrome: {
@@ -415,6 +455,13 @@ describe("google-meet plugin", () => {
| {
onAudio: (audio: Buffer) => void;
onMark?: (markName: string) => void;
onToolCall?: (event: {
itemId: string;
callId: string;
name: string;
args: unknown;
}) => void;
tools?: unknown[];
}
| undefined;
const sendAudio = vi.fn();
@@ -464,12 +511,33 @@ describe("google-meet plugin", () => {
const inputProcess = makeProcess({ stdout: inputStdout, stdin: null });
const outputProcess = makeProcess({ stdin: outputStdin, stdout: null });
const spawnMock = vi.fn().mockReturnValueOnce(outputProcess).mockReturnValueOnce(inputProcess);
const sessionStore: Record<string, unknown> = {};
const runtime = {
agent: {
resolveAgentDir: vi.fn(() => "/tmp/agent"),
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
ensureAgentWorkspace: vi.fn(async () => {}),
session: {
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
loadSessionStore: vi.fn(() => sessionStore),
saveSessionStore: vi.fn(async () => {}),
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
},
runEmbeddedPiAgent: vi.fn(async () => ({
payloads: [{ text: "Use the Portugal launch data." }],
meta: {},
})),
resolveAgentTimeoutMs: vi.fn(() => 1000),
},
};
const handle = await startCommandRealtimeAudioBridge({
config: resolveGoogleMeetConfig({
realtime: { provider: "openai", model: "gpt-realtime" },
}),
fullConfig: {} as never,
runtime: runtime as never,
meetingSessionId: "meet-1",
inputCommand: ["capture-meet"],
outputCommand: ["play-meet"],
logger: noopLogger,
@@ -480,6 +548,12 @@ describe("google-meet plugin", () => {
inputStdout.write(Buffer.from([1, 2, 3]));
callbacks?.onAudio(Buffer.from([4, 5]));
callbacks?.onMark?.("mark-1");
callbacks?.onToolCall?.({
itemId: "item-1",
callId: "tool-call-1",
name: "openclaw_agent_consult",
args: { question: "What should I say about launch timing?" },
});
expect(spawnMock).toHaveBeenNthCalledWith(1, "play-meet", [], {
stdio: ["pipe", "ignore", "pipe"],
@@ -490,6 +564,25 @@ describe("google-meet plugin", () => {
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3]));
expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]);
expect(bridge.acknowledgeMark).toHaveBeenCalled();
expect(callbacks).toMatchObject({
tools: [
expect.objectContaining({
name: "openclaw_agent_consult",
}),
],
});
await vi.waitFor(() => {
expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", {
text: "Use the Portugal launch data.",
});
});
expect(runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
expect.objectContaining({
messageProvider: "google-meet",
thinkLevel: "high",
toolsAllow: ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"],
}),
);
await handle.stop();
expect(bridge.close).toHaveBeenCalled();

View File

@@ -103,46 +103,27 @@ const googleMeetConfigSchema = {
},
};
const GoogleMeetToolSchema = Type.Union([
Type.Object({
action: Type.Literal("join"),
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
transport: Type.Optional(Type.Union([Type.Literal("chrome"), Type.Literal("twilio")])),
mode: Type.Optional(Type.Union([Type.Literal("realtime"), Type.Literal("transcribe")])),
dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })),
pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })),
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
const GoogleMeetToolSchema = Type.Object({
action: Type.String({
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
description: "Google Meet action to run",
}),
Type.Object({
action: Type.Literal("status"),
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
}),
Type.Object({
action: Type.Literal("setup_status"),
}),
Type.Object({
action: Type.Literal("resolve_space"),
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
}),
Type.Object({
action: Type.Literal("preflight"),
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
}),
Type.Object({
action: Type.Literal("leave"),
sessionId: Type.String({ description: "Meet session ID" }),
}),
]);
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
transport: Type.Optional(
Type.String({ enum: ["chrome", "twilio"], description: "Join transport" }),
),
mode: Type.Optional(Type.String({ enum: ["realtime", "transcribe"], description: "Join mode" })),
dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })),
pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })),
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
});
function asParamRecord(params: unknown): Record<string, unknown> {
return params && typeof params === "object" && !Array.isArray(params)

View File

@@ -0,0 +1,163 @@
import { randomUUID } from "node:crypto";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
import type { RealtimeVoiceTool } from "openclaw/plugin-sdk/realtime-voice";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js";
type AgentPayload = {
text?: string;
isError?: boolean;
isReasoning?: boolean;
};
export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
export const GOOGLE_MEET_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
type: "function",
name: GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
description:
"Ask the full OpenClaw agent for deeper reasoning, current information, or tool-backed help before speaking in the meeting.",
parameters: {
type: "object",
properties: {
question: {
type: "string",
description: "The concrete question or task the meeting participant asked.",
},
context: {
type: "string",
description: "Optional relevant meeting context or transcript summary.",
},
responseStyle: {
type: "string",
description: "Optional style hint for the spoken answer.",
},
},
required: ["question"],
},
};
export function resolveGoogleMeetRealtimeTools(policy: GoogleMeetToolPolicy): RealtimeVoiceTool[] {
return policy === "none" ? [] : [GOOGLE_MEET_AGENT_CONSULT_TOOL];
}
function normalizeToolArgString(args: unknown, key: string): string | undefined {
if (!args || typeof args !== "object" || Array.isArray(args)) {
return undefined;
}
return normalizeOptionalString((args as Record<string, unknown>)[key]);
}
function collectVisibleText(payloads: AgentPayload[]): string | null {
const chunks: string[] = [];
for (const payload of payloads) {
if (payload.isError || payload.isReasoning) {
continue;
}
const text = normalizeOptionalString(payload.text);
if (text) {
chunks.push(text);
}
}
return chunks.length > 0 ? chunks.join("\n\n").trim() : null;
}
function resolveToolsAllow(policy: GoogleMeetToolPolicy): string[] | undefined {
if (policy === "owner") {
return undefined;
}
if (policy === "safe-read-only") {
return ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"];
}
return [];
}
function buildPrompt(params: {
args: unknown;
transcript: Array<{ role: "user" | "assistant"; text: string }>;
}): string {
const question = normalizeToolArgString(params.args, "question");
if (!question) {
throw new Error("question required");
}
const context = normalizeToolArgString(params.args, "context");
const responseStyle = normalizeToolArgString(params.args, "responseStyle");
const transcript = params.transcript
.slice(-12)
.map((entry) => `${entry.role === "assistant" ? "Agent" : "Participant"}: ${entry.text}`)
.join("\n");
return [
"You are helping an OpenClaw realtime voice agent during a private Google Meet.",
"Answer the participant's question with the strongest useful reasoning and available tools.",
"Return only the concise answer the realtime voice agent should speak next.",
"Do not include markdown, citations unless needed, tool logs, or private reasoning.",
responseStyle ? `Spoken style: ${responseStyle}` : undefined,
transcript ? `Recent meeting transcript:\n${transcript}` : undefined,
context ? `Additional context:\n${context}` : undefined,
`Question:\n${question}`,
]
.filter(Boolean)
.join("\n\n");
}
export async function consultOpenClawAgentForGoogleMeet(params: {
config: GoogleMeetConfig;
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
logger: RuntimeLogger;
meetingSessionId: string;
args: unknown;
transcript: Array<{ role: "user" | "assistant"; text: string }>;
}): Promise<{ text: string }> {
const agentId = "main";
const sessionKey = `google-meet:${params.meetingSessionId}`;
const cfg = params.fullConfig;
const agentDir = params.runtime.agent.resolveAgentDir(cfg, agentId);
const workspaceDir = params.runtime.agent.resolveAgentWorkspaceDir(cfg, agentId);
await params.runtime.agent.ensureAgentWorkspace({ dir: workspaceDir });
const storePath = params.runtime.agent.session.resolveStorePath(cfg.session?.store, { agentId });
const sessionStore = params.runtime.agent.session.loadSessionStore(storePath);
const now = Date.now();
const existing = sessionStore[sessionKey] as
| { sessionId?: string; updatedAt?: number }
| undefined;
const sessionId = normalizeOptionalString(existing?.sessionId) ?? randomUUID();
sessionStore[sessionKey] = { ...existing, sessionId, updatedAt: now };
await params.runtime.agent.session.saveSessionStore(storePath, sessionStore);
const sessionFile = params.runtime.agent.session.resolveSessionFilePath(
sessionId,
sessionStore[sessionKey],
{ agentId },
);
const result = await params.runtime.agent.runEmbeddedPiAgent({
sessionId,
sessionKey,
messageProvider: "google-meet",
sessionFile,
workspaceDir,
config: cfg,
prompt: buildPrompt({ args: params.args, transcript: params.transcript }),
thinkLevel: "high",
verboseLevel: "off",
reasoningLevel: "off",
toolResultFormat: "plain",
toolsAllow: resolveToolsAllow(params.config.realtime.toolPolicy),
timeoutMs: params.runtime.agent.resolveAgentTimeoutMs({ cfg }),
runId: `google-meet:${params.meetingSessionId}:${Date.now()}`,
lane: "google-meet",
extraSystemPrompt:
"You are a behind-the-scenes consultant for a live meeting voice agent. Be accurate, brief, and speakable.",
agentDir,
});
const text = collectVisibleText((result.payloads ?? []) as AgentPayload[]);
if (!text) {
const reason = result.meta?.aborted ? "agent run aborted" : "agent returned no speakable text";
params.logger.warn(`[google-meet] agent consult produced no answer: ${reason}`);
return { text: "I need a moment to verify that before answering." };
}
return { text };
}

View File

@@ -2,7 +2,7 @@ import { spawn } from "node:child_process";
import type { Writable } from "node:stream";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import type { RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
import {
createRealtimeVoiceBridgeSession,
resolveConfiguredRealtimeVoiceProvider,
@@ -10,6 +10,11 @@ import {
type RealtimeVoiceProviderConfig,
type RealtimeVoiceProviderPlugin,
} from "openclaw/plugin-sdk/realtime-voice";
import {
consultOpenClawAgentForGoogleMeet,
GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
resolveGoogleMeetRealtimeTools,
} from "./agent-consult.js";
import type { GoogleMeetConfig } from "./config.js";
type BridgeProcess = {
@@ -70,6 +75,8 @@ export function resolveGoogleMeetRealtimeProvider(params: {
export async function startCommandRealtimeAudioBridge(params: {
config: GoogleMeetConfig;
fullConfig: OpenClawConfig;
runtime: PluginRuntime;
meetingSessionId: string;
inputCommand: string[];
outputCommand: string[];
logger: RuntimeLogger;
@@ -136,11 +143,13 @@ export async function startCommandRealtimeAudioBridge(params: {
fullConfig: params.fullConfig,
providers: params.providers,
});
const transcript: Array<{ role: "user" | "assistant"; text: string }> = [];
bridge = createRealtimeVoiceBridgeSession({
provider: resolved.provider,
providerConfig: resolved.providerConfig,
instructions: params.config.realtime.instructions,
markStrategy: "ack-immediately",
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
audioSink: {
isOpen: () => !stopped,
sendAudio: (muLaw) => {
@@ -149,9 +158,38 @@ export async function startCommandRealtimeAudioBridge(params: {
},
onTranscript: (role, text, isFinal) => {
if (isFinal) {
transcript.push({ role, text });
if (transcript.length > 40) {
transcript.splice(0, transcript.length - 40);
}
params.logger.debug?.(`[google-meet] ${role}: ${text}`);
}
},
onToolCall: (event, session) => {
if (event.name !== GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME) {
session.submitToolResult(event.callId || event.itemId, {
error: `Tool "${event.name}" not available`,
});
return;
}
void consultOpenClawAgentForGoogleMeet({
config: params.config,
fullConfig: params.fullConfig,
runtime: params.runtime,
logger: params.logger,
meetingSessionId: params.meetingSessionId,
args: event.args,
transcript,
})
.then((result) => {
session.submitToolResult(event.callId || event.itemId, result);
})
.catch((error: Error) => {
session.submitToolResult(event.callId || event.itemId, {
error: formatErrorMessage(error),
});
});
},
onError: fail("realtime voice bridge"),
onClose: (reason) => {
if (reason === "error") {

View File

@@ -12,7 +12,7 @@ import type {
GoogleMeetJoinResult,
GoogleMeetSession,
} from "./transports/types.js";
import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
import { endMeetVoiceCallGatewayCall, joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
function nowIso(): string {
return new Date().toISOString();
@@ -110,6 +110,7 @@ export class GoogleMeetRuntime {
runtime: this.params.runtime,
config: this.params.config,
fullConfig: this.params.fullConfig,
meetingSessionId: session.id,
mode,
url,
logger: this.params.logger,
@@ -161,6 +162,14 @@ export class GoogleMeetRuntime {
voiceCallId: voiceCallResult?.callId,
dtmfSent: voiceCallResult?.dtmfSent,
};
if (voiceCallResult?.callId) {
this.#sessionStops.set(session.id, async () => {
await endMeetVoiceCallGatewayCall({
config: this.params.config,
callId: voiceCallResult.callId,
});
});
}
session.notes.push(
this.params.config.voiceCall.enabled
? "Twilio transport delegated the call to the voice-call plugin and sent configured DTMF."

View File

@@ -46,6 +46,7 @@ export async function launchChromeMeet(params: {
runtime: PluginRuntime;
config: GoogleMeetConfig;
fullConfig: OpenClawConfig;
meetingSessionId: string;
mode: "realtime" | "transcribe";
url: string;
logger: RuntimeLogger;
@@ -99,6 +100,8 @@ export async function launchChromeMeet(params: {
...(await startCommandRealtimeAudioBridge({
config: params.config,
fullConfig: params.fullConfig,
runtime: params.runtime,
meetingSessionId: params.meetingSessionId,
inputCommand: params.config.chrome.audioInputCommand,
outputCommand: params.config.chrome.audioOutputCommand,
logger: params.logger,
@@ -116,13 +119,30 @@ export async function launchChromeMeet(params: {
}
argv.push(params.url);
const result = await params.runtime.system.runCommandWithTimeout(argv, {
timeoutMs: params.config.chrome.joinTimeoutMs,
});
if (result.code !== 0) {
let commandPairBridgeStopped = false;
const stopCommandPairBridge = async () => {
if (commandPairBridgeStopped) {
return;
}
commandPairBridgeStopped = true;
if (audioBridge?.type === "command-pair") {
await audioBridge.stop();
}
};
try {
const result = await params.runtime.system.runCommandWithTimeout(argv, {
timeoutMs: params.config.chrome.joinTimeoutMs,
});
if (result.code === 0) {
return { launched: true, audioBridge };
}
await stopCommandPairBridge();
throw new Error(
`failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`,
);
} catch (error) {
await stopCommandPairBridge();
throw error;
}
return { launched: true, audioBridge };
}

View File

@@ -82,3 +82,23 @@ export async function joinMeetViaVoiceCallGateway(params: {
await client?.stopAndWait({ timeoutMs: 1_000 });
}
}
export async function endMeetVoiceCallGatewayCall(params: {
config: GoogleMeetConfig;
callId: string;
}): Promise<void> {
let client: VoiceCallGatewayClient | undefined;
try {
client = await createConnectedGatewayClient(params.config);
await client.request(
"voicecall.end",
{
callId: params.callId,
},
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
);
} finally {
await client?.stopAndWait({ timeoutMs: 1_000 });
}
}