mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:50:43 +00:00
feat(plugins): give google meet realtime agent consult
This commit is contained in:
@@ -16,10 +16,12 @@ Docs: https://docs.openclaw.ai
|
||||
- Providers/OpenAI: add image generation and reference-image editing through Codex OAuth, so `openai/gpt-image-2` works without an `OPENAI_API_KEY`. Fixes #70703.
|
||||
- Providers/OpenRouter: add image generation and reference-image editing through `image_generate`, so OpenRouter image models work with `OPENROUTER_API_KEY`. Fixes #55066 via #67668. Thanks @notamicrodose.
|
||||
- Image generation: let agents request provider-supported quality and output format hints, and pass OpenAI-specific background, moderation, compression, and user hints through the `image_generate` tool. (#70503) Thanks @ottodeng.
|
||||
- Plugins/Google Meet: let realtime Meet sessions consult the full OpenClaw agent for deeper answers while staying in the live voice loop.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Gateway/WebChat: preserve image attachments for text-only primary models by offloading them as media refs instead of dropping them, so configured image tools can still inspect the original file. Fixes #68513, #44276, #51656, #70212.
|
||||
- Plugins/Google Meet: hang up delegated Twilio calls on leave, clean up Chrome realtime audio bridges when launch fails, and use a flat provider-safe tool schema.
|
||||
- Media understanding: honor explicit image-model configuration before native-vision skips, including `agents.defaults.imageModel`, `tools.media.image.models`, and provider image defaults such as MiniMax VL when the active chat model is text-only. Fixes #47614, #63722, #69171.
|
||||
- Codex/media understanding: support `codex/*` image models through bounded Codex app-server image turns, while keeping `openai-codex/*` on the OpenAI Codex OAuth route and validating app-server responses against generated protocol contracts. Fixes #70201.
|
||||
- Providers/OpenAI Codex: synthesize the `openai-codex/gpt-5.5` OAuth model row when Codex catalog discovery omits it, so cron and subagent runs do not fail with `Unknown model` while the account is authenticated.
|
||||
|
||||
@@ -14,6 +14,8 @@ The plugin is explicit by design:
|
||||
|
||||
- It only joins an explicit `https://meet.google.com/...` URL.
|
||||
- `realtime` voice is the default mode.
|
||||
- Realtime voice can call back into the full OpenClaw agent when deeper
|
||||
reasoning or tools are needed.
|
||||
- Auth starts as personal Google OAuth or an already signed-in Chrome profile.
|
||||
- There is no automatic consent announcement.
|
||||
- The default Chrome audio backend is `BlackHole 2ch`.
|
||||
@@ -212,6 +214,12 @@ call still needs a participant path. This plugin keeps that boundary visible:
|
||||
Chrome handles browser participation and local audio routing; Twilio handles
|
||||
phone dial-in participation.
|
||||
|
||||
Realtime mode gives the voice model one tool, `openclaw_agent_consult`, unless
|
||||
`realtime.toolPolicy` is `none`. The tool asks the normal OpenClaw agent for a
|
||||
concise spoken answer, using recent meeting transcript as context. With
|
||||
`safe-read-only`, the consult run is limited to read/search/memory tools. With
|
||||
`owner`, it inherits the normal agent tool policy.
|
||||
|
||||
Chrome realtime mode needs either:
|
||||
|
||||
- `chrome.audioInputCommand` plus `chrome.audioOutputCommand`: OpenClaw owns the
|
||||
|
||||
@@ -22,6 +22,7 @@ import { buildMeetDtmfSequence, normalizeDialInNumber } from "./src/transports/t
|
||||
|
||||
const voiceCallMocks = vi.hoisted(() => ({
|
||||
joinMeetViaVoiceCallGateway: vi.fn(async () => ({ callId: "call-1", dtmfSent: true })),
|
||||
endMeetVoiceCallGatewayCall: vi.fn(async () => {}),
|
||||
}));
|
||||
|
||||
const fetchGuardMocks = vi.hoisted(() => ({
|
||||
@@ -45,6 +46,7 @@ vi.mock("openclaw/plugin-sdk/ssrf-runtime", () => ({
|
||||
|
||||
vi.mock("./src/voice-call-gateway.js", () => ({
|
||||
joinMeetViaVoiceCallGateway: voiceCallMocks.joinMeetViaVoiceCallGateway,
|
||||
endMeetVoiceCallGatewayCall: voiceCallMocks.endMeetVoiceCallGatewayCall,
|
||||
}));
|
||||
|
||||
const noopLogger = {
|
||||
@@ -168,6 +170,24 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("uses a provider-safe flat tool parameter schema", () => {
|
||||
const { tools } = setup();
|
||||
const tool = tools[0] as { parameters: unknown };
|
||||
|
||||
expect(JSON.stringify(tool.parameters)).not.toContain("anyOf");
|
||||
expect(tool.parameters).toMatchObject({
|
||||
type: "object",
|
||||
properties: {
|
||||
action: {
|
||||
type: "string",
|
||||
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
|
||||
},
|
||||
transport: { type: "string", enum: ["chrome", "twilio"] },
|
||||
mode: { type: "string", enum: ["realtime", "transcribe"] },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("normalizes Meet URLs, codes, and space names for the Meet API", () => {
|
||||
expect(normalizeGoogleMeetSpaceName("spaces/abc-defg-hij")).toBe("spaces/abc-defg-hij");
|
||||
expect(normalizeGoogleMeetSpaceName("abc-defg-hij")).toBe("spaces/abc-defg-hij");
|
||||
@@ -323,6 +343,26 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("hangs up delegated Twilio calls on leave", async () => {
|
||||
const { tools } = setup({ defaultTransport: "twilio" });
|
||||
const tool = tools[0] as {
|
||||
execute: (id: string, params: unknown) => Promise<{ details: { session: { id: string } } }>;
|
||||
};
|
||||
const joined = await tool.execute("id", {
|
||||
action: "join",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
dialInNumber: "+15551234567",
|
||||
pin: "123456",
|
||||
});
|
||||
|
||||
await tool.execute("id", { action: "leave", sessionId: joined.details.session.id });
|
||||
|
||||
expect(voiceCallMocks.endMeetVoiceCallGatewayCall).toHaveBeenCalledWith({
|
||||
config: expect.objectContaining({ defaultTransport: "twilio" }),
|
||||
callId: "call-1",
|
||||
});
|
||||
});
|
||||
|
||||
it("reports setup status through the tool", async () => {
|
||||
const { tools } = setup({
|
||||
chrome: {
|
||||
@@ -415,6 +455,13 @@ describe("google-meet plugin", () => {
|
||||
| {
|
||||
onAudio: (audio: Buffer) => void;
|
||||
onMark?: (markName: string) => void;
|
||||
onToolCall?: (event: {
|
||||
itemId: string;
|
||||
callId: string;
|
||||
name: string;
|
||||
args: unknown;
|
||||
}) => void;
|
||||
tools?: unknown[];
|
||||
}
|
||||
| undefined;
|
||||
const sendAudio = vi.fn();
|
||||
@@ -464,12 +511,33 @@ describe("google-meet plugin", () => {
|
||||
const inputProcess = makeProcess({ stdout: inputStdout, stdin: null });
|
||||
const outputProcess = makeProcess({ stdin: outputStdin, stdout: null });
|
||||
const spawnMock = vi.fn().mockReturnValueOnce(outputProcess).mockReturnValueOnce(inputProcess);
|
||||
const sessionStore: Record<string, unknown> = {};
|
||||
const runtime = {
|
||||
agent: {
|
||||
resolveAgentDir: vi.fn(() => "/tmp/agent"),
|
||||
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
|
||||
ensureAgentWorkspace: vi.fn(async () => {}),
|
||||
session: {
|
||||
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||
loadSessionStore: vi.fn(() => sessionStore),
|
||||
saveSessionStore: vi.fn(async () => {}),
|
||||
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
|
||||
},
|
||||
runEmbeddedPiAgent: vi.fn(async () => ({
|
||||
payloads: [{ text: "Use the Portugal launch data." }],
|
||||
meta: {},
|
||||
})),
|
||||
resolveAgentTimeoutMs: vi.fn(() => 1000),
|
||||
},
|
||||
};
|
||||
|
||||
const handle = await startCommandRealtimeAudioBridge({
|
||||
config: resolveGoogleMeetConfig({
|
||||
realtime: { provider: "openai", model: "gpt-realtime" },
|
||||
}),
|
||||
fullConfig: {} as never,
|
||||
runtime: runtime as never,
|
||||
meetingSessionId: "meet-1",
|
||||
inputCommand: ["capture-meet"],
|
||||
outputCommand: ["play-meet"],
|
||||
logger: noopLogger,
|
||||
@@ -480,6 +548,12 @@ describe("google-meet plugin", () => {
|
||||
inputStdout.write(Buffer.from([1, 2, 3]));
|
||||
callbacks?.onAudio(Buffer.from([4, 5]));
|
||||
callbacks?.onMark?.("mark-1");
|
||||
callbacks?.onToolCall?.({
|
||||
itemId: "item-1",
|
||||
callId: "tool-call-1",
|
||||
name: "openclaw_agent_consult",
|
||||
args: { question: "What should I say about launch timing?" },
|
||||
});
|
||||
|
||||
expect(spawnMock).toHaveBeenNthCalledWith(1, "play-meet", [], {
|
||||
stdio: ["pipe", "ignore", "pipe"],
|
||||
@@ -490,6 +564,25 @@ describe("google-meet plugin", () => {
|
||||
expect(sendAudio).toHaveBeenCalledWith(Buffer.from([1, 2, 3]));
|
||||
expect(outputStdinWrites).toEqual([Buffer.from([4, 5])]);
|
||||
expect(bridge.acknowledgeMark).toHaveBeenCalled();
|
||||
expect(callbacks).toMatchObject({
|
||||
tools: [
|
||||
expect.objectContaining({
|
||||
name: "openclaw_agent_consult",
|
||||
}),
|
||||
],
|
||||
});
|
||||
await vi.waitFor(() => {
|
||||
expect(bridge.submitToolResult).toHaveBeenCalledWith("tool-call-1", {
|
||||
text: "Use the Portugal launch data.",
|
||||
});
|
||||
});
|
||||
expect(runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
messageProvider: "google-meet",
|
||||
thinkLevel: "high",
|
||||
toolsAllow: ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"],
|
||||
}),
|
||||
);
|
||||
|
||||
await handle.stop();
|
||||
expect(bridge.close).toHaveBeenCalled();
|
||||
|
||||
@@ -103,46 +103,27 @@ const googleMeetConfigSchema = {
|
||||
},
|
||||
};
|
||||
|
||||
const GoogleMeetToolSchema = Type.Union([
|
||||
Type.Object({
|
||||
action: Type.Literal("join"),
|
||||
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
|
||||
transport: Type.Optional(Type.Union([Type.Literal("chrome"), Type.Literal("twilio")])),
|
||||
mode: Type.Optional(Type.Union([Type.Literal("realtime"), Type.Literal("transcribe")])),
|
||||
dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })),
|
||||
pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
|
||||
const GoogleMeetToolSchema = Type.Object({
|
||||
action: Type.String({
|
||||
enum: ["join", "status", "setup_status", "resolve_space", "preflight", "leave"],
|
||||
description: "Google Meet action to run",
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("status"),
|
||||
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("setup_status"),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("resolve_space"),
|
||||
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
|
||||
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
|
||||
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
|
||||
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
|
||||
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
|
||||
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("preflight"),
|
||||
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
|
||||
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
|
||||
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
|
||||
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
|
||||
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
|
||||
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
|
||||
}),
|
||||
Type.Object({
|
||||
action: Type.Literal("leave"),
|
||||
sessionId: Type.String({ description: "Meet session ID" }),
|
||||
}),
|
||||
]);
|
||||
url: Type.Optional(Type.String({ description: "Explicit https://meet.google.com/... URL" })),
|
||||
transport: Type.Optional(
|
||||
Type.String({ enum: ["chrome", "twilio"], description: "Join transport" }),
|
||||
),
|
||||
mode: Type.Optional(Type.String({ enum: ["realtime", "transcribe"], description: "Join mode" })),
|
||||
dialInNumber: Type.Optional(Type.String({ description: "Meet dial-in number for Twilio" })),
|
||||
pin: Type.Optional(Type.String({ description: "Meet phone PIN for Twilio" })),
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
|
||||
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
|
||||
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
|
||||
accessToken: Type.Optional(Type.String({ description: "Access token override" })),
|
||||
refreshToken: Type.Optional(Type.String({ description: "Refresh token override" })),
|
||||
clientId: Type.Optional(Type.String({ description: "OAuth client id override" })),
|
||||
clientSecret: Type.Optional(Type.String({ description: "OAuth client secret override" })),
|
||||
expiresAt: Type.Optional(Type.Number({ description: "Cached access token expiry ms" })),
|
||||
});
|
||||
|
||||
function asParamRecord(params: unknown): Record<string, unknown> {
|
||||
return params && typeof params === "object" && !Array.isArray(params)
|
||||
|
||||
163
extensions/google-meet/src/agent-consult.ts
Normal file
163
extensions/google-meet/src/agent-consult.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import type { RealtimeVoiceTool } from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
|
||||
import type { GoogleMeetConfig, GoogleMeetToolPolicy } from "./config.js";
|
||||
|
||||
type AgentPayload = {
|
||||
text?: string;
|
||||
isError?: boolean;
|
||||
isReasoning?: boolean;
|
||||
};
|
||||
|
||||
export const GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME = "openclaw_agent_consult";
|
||||
|
||||
export const GOOGLE_MEET_AGENT_CONSULT_TOOL: RealtimeVoiceTool = {
|
||||
type: "function",
|
||||
name: GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
|
||||
description:
|
||||
"Ask the full OpenClaw agent for deeper reasoning, current information, or tool-backed help before speaking in the meeting.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
question: {
|
||||
type: "string",
|
||||
description: "The concrete question or task the meeting participant asked.",
|
||||
},
|
||||
context: {
|
||||
type: "string",
|
||||
description: "Optional relevant meeting context or transcript summary.",
|
||||
},
|
||||
responseStyle: {
|
||||
type: "string",
|
||||
description: "Optional style hint for the spoken answer.",
|
||||
},
|
||||
},
|
||||
required: ["question"],
|
||||
},
|
||||
};
|
||||
|
||||
export function resolveGoogleMeetRealtimeTools(policy: GoogleMeetToolPolicy): RealtimeVoiceTool[] {
|
||||
return policy === "none" ? [] : [GOOGLE_MEET_AGENT_CONSULT_TOOL];
|
||||
}
|
||||
|
||||
function normalizeToolArgString(args: unknown, key: string): string | undefined {
|
||||
if (!args || typeof args !== "object" || Array.isArray(args)) {
|
||||
return undefined;
|
||||
}
|
||||
return normalizeOptionalString((args as Record<string, unknown>)[key]);
|
||||
}
|
||||
|
||||
function collectVisibleText(payloads: AgentPayload[]): string | null {
|
||||
const chunks: string[] = [];
|
||||
for (const payload of payloads) {
|
||||
if (payload.isError || payload.isReasoning) {
|
||||
continue;
|
||||
}
|
||||
const text = normalizeOptionalString(payload.text);
|
||||
if (text) {
|
||||
chunks.push(text);
|
||||
}
|
||||
}
|
||||
return chunks.length > 0 ? chunks.join("\n\n").trim() : null;
|
||||
}
|
||||
|
||||
function resolveToolsAllow(policy: GoogleMeetToolPolicy): string[] | undefined {
|
||||
if (policy === "owner") {
|
||||
return undefined;
|
||||
}
|
||||
if (policy === "safe-read-only") {
|
||||
return ["read", "web_search", "web_fetch", "x_search", "memory_search", "memory_get"];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function buildPrompt(params: {
|
||||
args: unknown;
|
||||
transcript: Array<{ role: "user" | "assistant"; text: string }>;
|
||||
}): string {
|
||||
const question = normalizeToolArgString(params.args, "question");
|
||||
if (!question) {
|
||||
throw new Error("question required");
|
||||
}
|
||||
const context = normalizeToolArgString(params.args, "context");
|
||||
const responseStyle = normalizeToolArgString(params.args, "responseStyle");
|
||||
const transcript = params.transcript
|
||||
.slice(-12)
|
||||
.map((entry) => `${entry.role === "assistant" ? "Agent" : "Participant"}: ${entry.text}`)
|
||||
.join("\n");
|
||||
return [
|
||||
"You are helping an OpenClaw realtime voice agent during a private Google Meet.",
|
||||
"Answer the participant's question with the strongest useful reasoning and available tools.",
|
||||
"Return only the concise answer the realtime voice agent should speak next.",
|
||||
"Do not include markdown, citations unless needed, tool logs, or private reasoning.",
|
||||
responseStyle ? `Spoken style: ${responseStyle}` : undefined,
|
||||
transcript ? `Recent meeting transcript:\n${transcript}` : undefined,
|
||||
context ? `Additional context:\n${context}` : undefined,
|
||||
`Question:\n${question}`,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
export async function consultOpenClawAgentForGoogleMeet(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
runtime: PluginRuntime;
|
||||
logger: RuntimeLogger;
|
||||
meetingSessionId: string;
|
||||
args: unknown;
|
||||
transcript: Array<{ role: "user" | "assistant"; text: string }>;
|
||||
}): Promise<{ text: string }> {
|
||||
const agentId = "main";
|
||||
const sessionKey = `google-meet:${params.meetingSessionId}`;
|
||||
const cfg = params.fullConfig;
|
||||
const agentDir = params.runtime.agent.resolveAgentDir(cfg, agentId);
|
||||
const workspaceDir = params.runtime.agent.resolveAgentWorkspaceDir(cfg, agentId);
|
||||
await params.runtime.agent.ensureAgentWorkspace({ dir: workspaceDir });
|
||||
|
||||
const storePath = params.runtime.agent.session.resolveStorePath(cfg.session?.store, { agentId });
|
||||
const sessionStore = params.runtime.agent.session.loadSessionStore(storePath);
|
||||
const now = Date.now();
|
||||
const existing = sessionStore[sessionKey] as
|
||||
| { sessionId?: string; updatedAt?: number }
|
||||
| undefined;
|
||||
const sessionId = normalizeOptionalString(existing?.sessionId) ?? randomUUID();
|
||||
sessionStore[sessionKey] = { ...existing, sessionId, updatedAt: now };
|
||||
await params.runtime.agent.session.saveSessionStore(storePath, sessionStore);
|
||||
|
||||
const sessionFile = params.runtime.agent.session.resolveSessionFilePath(
|
||||
sessionId,
|
||||
sessionStore[sessionKey],
|
||||
{ agentId },
|
||||
);
|
||||
const result = await params.runtime.agent.runEmbeddedPiAgent({
|
||||
sessionId,
|
||||
sessionKey,
|
||||
messageProvider: "google-meet",
|
||||
sessionFile,
|
||||
workspaceDir,
|
||||
config: cfg,
|
||||
prompt: buildPrompt({ args: params.args, transcript: params.transcript }),
|
||||
thinkLevel: "high",
|
||||
verboseLevel: "off",
|
||||
reasoningLevel: "off",
|
||||
toolResultFormat: "plain",
|
||||
toolsAllow: resolveToolsAllow(params.config.realtime.toolPolicy),
|
||||
timeoutMs: params.runtime.agent.resolveAgentTimeoutMs({ cfg }),
|
||||
runId: `google-meet:${params.meetingSessionId}:${Date.now()}`,
|
||||
lane: "google-meet",
|
||||
extraSystemPrompt:
|
||||
"You are a behind-the-scenes consultant for a live meeting voice agent. Be accurate, brief, and speakable.",
|
||||
agentDir,
|
||||
});
|
||||
|
||||
const text = collectVisibleText((result.payloads ?? []) as AgentPayload[]);
|
||||
if (!text) {
|
||||
const reason = result.meta?.aborted ? "agent run aborted" : "agent returned no speakable text";
|
||||
params.logger.warn(`[google-meet] agent consult produced no answer: ${reason}`);
|
||||
return { text: "I need a moment to verify that before answering." };
|
||||
}
|
||||
return { text };
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { spawn } from "node:child_process";
|
||||
import type { Writable } from "node:stream";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-runtime";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import type { RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import {
|
||||
createRealtimeVoiceBridgeSession,
|
||||
resolveConfiguredRealtimeVoiceProvider,
|
||||
@@ -10,6 +10,11 @@ import {
|
||||
type RealtimeVoiceProviderConfig,
|
||||
type RealtimeVoiceProviderPlugin,
|
||||
} from "openclaw/plugin-sdk/realtime-voice";
|
||||
import {
|
||||
consultOpenClawAgentForGoogleMeet,
|
||||
GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME,
|
||||
resolveGoogleMeetRealtimeTools,
|
||||
} from "./agent-consult.js";
|
||||
import type { GoogleMeetConfig } from "./config.js";
|
||||
|
||||
type BridgeProcess = {
|
||||
@@ -70,6 +75,8 @@ export function resolveGoogleMeetRealtimeProvider(params: {
|
||||
export async function startCommandRealtimeAudioBridge(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
runtime: PluginRuntime;
|
||||
meetingSessionId: string;
|
||||
inputCommand: string[];
|
||||
outputCommand: string[];
|
||||
logger: RuntimeLogger;
|
||||
@@ -136,11 +143,13 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
const transcript: Array<{ role: "user" | "assistant"; text: string }> = [];
|
||||
bridge = createRealtimeVoiceBridgeSession({
|
||||
provider: resolved.provider,
|
||||
providerConfig: resolved.providerConfig,
|
||||
instructions: params.config.realtime.instructions,
|
||||
markStrategy: "ack-immediately",
|
||||
tools: resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy),
|
||||
audioSink: {
|
||||
isOpen: () => !stopped,
|
||||
sendAudio: (muLaw) => {
|
||||
@@ -149,9 +158,38 @@ export async function startCommandRealtimeAudioBridge(params: {
|
||||
},
|
||||
onTranscript: (role, text, isFinal) => {
|
||||
if (isFinal) {
|
||||
transcript.push({ role, text });
|
||||
if (transcript.length > 40) {
|
||||
transcript.splice(0, transcript.length - 40);
|
||||
}
|
||||
params.logger.debug?.(`[google-meet] ${role}: ${text}`);
|
||||
}
|
||||
},
|
||||
onToolCall: (event, session) => {
|
||||
if (event.name !== GOOGLE_MEET_AGENT_CONSULT_TOOL_NAME) {
|
||||
session.submitToolResult(event.callId || event.itemId, {
|
||||
error: `Tool "${event.name}" not available`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
void consultOpenClawAgentForGoogleMeet({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
logger: params.logger,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
args: event.args,
|
||||
transcript,
|
||||
})
|
||||
.then((result) => {
|
||||
session.submitToolResult(event.callId || event.itemId, result);
|
||||
})
|
||||
.catch((error: Error) => {
|
||||
session.submitToolResult(event.callId || event.itemId, {
|
||||
error: formatErrorMessage(error),
|
||||
});
|
||||
});
|
||||
},
|
||||
onError: fail("realtime voice bridge"),
|
||||
onClose: (reason) => {
|
||||
if (reason === "error") {
|
||||
|
||||
@@ -12,7 +12,7 @@ import type {
|
||||
GoogleMeetJoinResult,
|
||||
GoogleMeetSession,
|
||||
} from "./transports/types.js";
|
||||
import { joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
|
||||
import { endMeetVoiceCallGatewayCall, joinMeetViaVoiceCallGateway } from "./voice-call-gateway.js";
|
||||
|
||||
function nowIso(): string {
|
||||
return new Date().toISOString();
|
||||
@@ -110,6 +110,7 @@ export class GoogleMeetRuntime {
|
||||
runtime: this.params.runtime,
|
||||
config: this.params.config,
|
||||
fullConfig: this.params.fullConfig,
|
||||
meetingSessionId: session.id,
|
||||
mode,
|
||||
url,
|
||||
logger: this.params.logger,
|
||||
@@ -161,6 +162,14 @@ export class GoogleMeetRuntime {
|
||||
voiceCallId: voiceCallResult?.callId,
|
||||
dtmfSent: voiceCallResult?.dtmfSent,
|
||||
};
|
||||
if (voiceCallResult?.callId) {
|
||||
this.#sessionStops.set(session.id, async () => {
|
||||
await endMeetVoiceCallGatewayCall({
|
||||
config: this.params.config,
|
||||
callId: voiceCallResult.callId,
|
||||
});
|
||||
});
|
||||
}
|
||||
session.notes.push(
|
||||
this.params.config.voiceCall.enabled
|
||||
? "Twilio transport delegated the call to the voice-call plugin and sent configured DTMF."
|
||||
|
||||
@@ -46,6 +46,7 @@ export async function launchChromeMeet(params: {
|
||||
runtime: PluginRuntime;
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
meetingSessionId: string;
|
||||
mode: "realtime" | "transcribe";
|
||||
url: string;
|
||||
logger: RuntimeLogger;
|
||||
@@ -99,6 +100,8 @@ export async function launchChromeMeet(params: {
|
||||
...(await startCommandRealtimeAudioBridge({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
@@ -116,13 +119,30 @@ export async function launchChromeMeet(params: {
|
||||
}
|
||||
argv.push(params.url);
|
||||
|
||||
const result = await params.runtime.system.runCommandWithTimeout(argv, {
|
||||
timeoutMs: params.config.chrome.joinTimeoutMs,
|
||||
});
|
||||
if (result.code !== 0) {
|
||||
let commandPairBridgeStopped = false;
|
||||
const stopCommandPairBridge = async () => {
|
||||
if (commandPairBridgeStopped) {
|
||||
return;
|
||||
}
|
||||
commandPairBridgeStopped = true;
|
||||
if (audioBridge?.type === "command-pair") {
|
||||
await audioBridge.stop();
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await params.runtime.system.runCommandWithTimeout(argv, {
|
||||
timeoutMs: params.config.chrome.joinTimeoutMs,
|
||||
});
|
||||
if (result.code === 0) {
|
||||
return { launched: true, audioBridge };
|
||||
}
|
||||
await stopCommandPairBridge();
|
||||
throw new Error(
|
||||
`failed to launch Chrome for Meet: ${result.stderr || result.stdout || result.code}`,
|
||||
);
|
||||
} catch (error) {
|
||||
await stopCommandPairBridge();
|
||||
throw error;
|
||||
}
|
||||
return { launched: true, audioBridge };
|
||||
}
|
||||
|
||||
@@ -82,3 +82,23 @@ export async function joinMeetViaVoiceCallGateway(params: {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function endMeetVoiceCallGatewayCall(params: {
|
||||
config: GoogleMeetConfig;
|
||||
callId: string;
|
||||
}): Promise<void> {
|
||||
let client: VoiceCallGatewayClient | undefined;
|
||||
|
||||
try {
|
||||
client = await createConnectedGatewayClient(params.config);
|
||||
await client.request(
|
||||
"voicecall.end",
|
||||
{
|
||||
callId: params.callId,
|
||||
},
|
||||
{ timeoutMs: params.config.voiceCall.requestTimeoutMs },
|
||||
);
|
||||
} finally {
|
||||
await client?.stopAndWait({ timeoutMs: 1_000 });
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user