mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 14:20:44 +00:00
feat(google-meet): default talk-back to agent mode
This commit is contained in:
@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { PassThrough, Writable } from "node:stream";
|
||||
import { createContext, Script } from "node:vm";
|
||||
import type { RealtimeTranscriptionProviderPlugin } from "openclaw/plugin-sdk/realtime-transcription";
|
||||
import type { RealtimeVoiceProviderPlugin } from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import plugin, { __testing as googleMeetPluginTesting } from "./index.js";
|
||||
@@ -25,8 +26,10 @@ import {
|
||||
import { handleGoogleMeetNodeHostCommand } from "./src/node-host.js";
|
||||
import { startNodeRealtimeAudioBridge } from "./src/realtime-node.js";
|
||||
import {
|
||||
convertGoogleMeetTtsAudioForBridge,
|
||||
extendGoogleMeetOutputEchoSuppression,
|
||||
isGoogleMeetLikelyAssistantEchoTranscript,
|
||||
startCommandAgentAudioBridge,
|
||||
startCommandRealtimeAudioBridge,
|
||||
} from "./src/realtime.js";
|
||||
import { GoogleMeetRuntime, normalizeMeetUrl } from "./src/runtime.js";
|
||||
@@ -94,19 +97,6 @@ function setup(
|
||||
return harness;
|
||||
}
|
||||
|
||||
async function withProcessPlatform<T>(
|
||||
platform: NodeJS.Platform,
|
||||
callback: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: platform });
|
||||
try {
|
||||
return await callback();
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
}
|
||||
|
||||
function jsonResponse(value: unknown): Response {
|
||||
return new Response(JSON.stringify(value), {
|
||||
status: 200,
|
||||
@@ -324,13 +314,13 @@ describe("google-meet plugin", () => {
|
||||
googleMeetPluginTesting.setPlatformForTests();
|
||||
});
|
||||
|
||||
it("defaults to chrome realtime with safe read-only tools", () => {
|
||||
it("defaults to chrome agent mode with safe read-only tools", () => {
|
||||
expect(resolveGoogleMeetConfig({})).toMatchObject({
|
||||
enabled: true,
|
||||
defaults: {},
|
||||
preview: { enrollmentAcknowledged: false },
|
||||
defaultTransport: "chrome",
|
||||
defaultMode: "realtime",
|
||||
defaultMode: "agent",
|
||||
chrome: {
|
||||
audioBackend: "blackhole-2ch",
|
||||
launch: true,
|
||||
@@ -537,7 +527,7 @@ describe("google-meet plugin", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("keeps the agent tool visible on non-macOS hosts but blocks local Chrome realtime joins", async () => {
|
||||
it("keeps the agent tool visible on non-macOS hosts but blocks local Chrome talk-back joins", async () => {
|
||||
const { cliRegistrations, methods, tools } = setup(undefined, { registerPlatform: "linux" });
|
||||
const tool = tools[0] as {
|
||||
execute: (id: string, params: unknown) => Promise<{ isError?: boolean; content: unknown }>;
|
||||
@@ -555,7 +545,7 @@ describe("google-meet plugin", () => {
|
||||
).toBe(true);
|
||||
|
||||
const blocked = await tool.execute("id", { action: "join" });
|
||||
expect(JSON.stringify(blocked)).toContain("local Chrome realtime audio is macOS-only");
|
||||
expect(JSON.stringify(blocked)).toContain("local Chrome talk-back audio is macOS-only");
|
||||
|
||||
expect(
|
||||
googleMeetPluginTesting.isGoogleMeetAgentToolActionUnsupportedOnHost({
|
||||
@@ -631,7 +621,7 @@ describe("google-meet plugin", () => {
|
||||
description: expect.stringContaining("recover_current_tab"),
|
||||
},
|
||||
transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] },
|
||||
mode: { type: "string", enum: ["realtime", "transcribe"] },
|
||||
mode: { type: "string", enum: ["agent", "bidi", "realtime", "transcribe"] },
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -1077,7 +1067,7 @@ describe("google-meet plugin", () => {
|
||||
|
||||
expect(result.details.session).toMatchObject({
|
||||
transport: "twilio",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
twilio: {
|
||||
dialInNumber: "+15551234567",
|
||||
pinProvided: true,
|
||||
@@ -1179,6 +1169,53 @@ describe("google-meet plugin", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects agent-mode external audio bridges in setup status", async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
try {
|
||||
const { tools } = setup(
|
||||
{
|
||||
defaultMode: "agent",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
audioInputCommand: ["capture-meet"],
|
||||
audioOutputCommand: ["play-meet"],
|
||||
},
|
||||
},
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
if (argv[0] === "/usr/sbin/system_profiler") {
|
||||
return { code: 0, stdout: "BlackHole 2ch", stderr: "" };
|
||||
}
|
||||
return { code: 0, stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
const tool = tools[0] as {
|
||||
execute: (
|
||||
id: string,
|
||||
params: unknown,
|
||||
) => Promise<{ details: { ok?: boolean; checks?: unknown[] } }>;
|
||||
};
|
||||
|
||||
const result = await tool.execute("id", { action: "setup_status" });
|
||||
|
||||
expect(result.details.ok).toBe(false);
|
||||
expect(result.details.checks).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
id: "audio-bridge",
|
||||
ok: false,
|
||||
message: expect.stringContaining("chrome.audioBridgeCommand is bidi-only"),
|
||||
}),
|
||||
]),
|
||||
);
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("reports attendance through the tool", async () => {
|
||||
stubMeetArtifactsApi();
|
||||
const { tools } = setup();
|
||||
@@ -1894,209 +1931,223 @@ describe("google-meet plugin", () => {
|
||||
});
|
||||
|
||||
it("grants local Chrome Meet media permissions against the opened tab", async () => {
|
||||
const callGatewayFromCli = mockLocalMeetBrowserRequest({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
});
|
||||
const { methods } = setup({
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
});
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
try {
|
||||
const callGatewayFromCli = mockLocalMeetBrowserRequest({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
});
|
||||
const { methods } = setup({
|
||||
defaultMode: "bidi",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
});
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await withProcessPlatform("darwin", async () => {
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(callGatewayFromCli).toHaveBeenCalledWith(
|
||||
"browser.request",
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
path: "/permissions/grant",
|
||||
body: expect.objectContaining({
|
||||
origin: "https://meet.google.com",
|
||||
permissions: ["audioCapture", "videoCapture"],
|
||||
targetId: "local-meet-tab",
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(callGatewayFromCli).toHaveBeenCalledWith(
|
||||
"browser.request",
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
path: "/permissions/grant",
|
||||
body: expect.objectContaining({
|
||||
origin: "https://meet.google.com",
|
||||
permissions: ["audioCapture", "videoCapture"],
|
||||
targetId: "local-meet-tab",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
{ progress: false },
|
||||
);
|
||||
{ progress: false },
|
||||
);
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("starts the local realtime audio bridge after Meet is inspected", async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
const events: string[] = [];
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as {
|
||||
path?: string;
|
||||
body?: { fn?: string; targetId?: string; url?: string };
|
||||
};
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
try {
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as {
|
||||
path?: string;
|
||||
body?: { fn?: string; targetId?: string; url?: string };
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "bidi",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await withProcessPlatform("darwin", async () => {
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events.indexOf("browser:/act")).toBeGreaterThan(-1);
|
||||
expect(events.indexOf("command:bridge start")).toBeGreaterThan(events.indexOf("browser:/act"));
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events.indexOf("browser:/act")).toBeGreaterThan(-1);
|
||||
expect(events.indexOf("command:bridge start")).toBeGreaterThan(
|
||||
events.indexOf("browser:/act"),
|
||||
);
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("does not start the local realtime audio bridge while Meet admission is pending", async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
const events: string[] = [];
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as { path?: string; body?: { targetId?: string; url?: string } };
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: false,
|
||||
lobbyWaiting: true,
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "meet-admission-required",
|
||||
manualActionMessage: "Admit the OpenClaw browser participant in Google Meet.",
|
||||
try {
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as { path?: string; body?: { targetId?: string; url?: string } };
|
||||
events.push(`browser:${request.path}`);
|
||||
if (request.path === "/tabs") {
|
||||
return { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
waitForInCallMs: 1,
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return {
|
||||
result: JSON.stringify({
|
||||
inCall: false,
|
||||
lobbyWaiting: true,
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "meet-admission-required",
|
||||
manualActionMessage: "Admit the OpenClaw browser participant in Google Meet.",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup(
|
||||
{
|
||||
defaultMode: "bidi",
|
||||
defaultTransport: "chrome",
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
waitForInCallMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
{
|
||||
runCommandWithTimeoutHandler: async (argv) => {
|
||||
events.push(`command:${argv.join(" ")}`);
|
||||
return argv[0] === "/usr/sbin/system_profiler"
|
||||
? { code: 0, stdout: "BlackHole 2ch", stderr: "" }
|
||||
: { code: 0, stdout: "", stderr: "" };
|
||||
},
|
||||
},
|
||||
);
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const respond = vi.fn();
|
||||
|
||||
await withProcessPlatform("darwin", async () => {
|
||||
await handler?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond,
|
||||
});
|
||||
});
|
||||
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events).toContain("browser:/act");
|
||||
expect(events).not.toContain("command:bridge start");
|
||||
expect(respond.mock.calls[0]?.[0]).toBe(true);
|
||||
expect(events).toContain("browser:/act");
|
||||
expect(events).not.toContain("command:bridge start");
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("refreshes observe-only caption health when status is requested", async () => {
|
||||
@@ -2220,7 +2271,7 @@ describe("google-meet plugin", () => {
|
||||
let openedTab = false;
|
||||
const { methods, nodesInvoke } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultMode: "agent",
|
||||
defaultTransport: "chrome-node",
|
||||
},
|
||||
{
|
||||
@@ -2462,7 +2513,7 @@ describe("google-meet plugin", () => {
|
||||
expect(result.micMuted).toBe(true);
|
||||
expect(localMic.click).toHaveBeenCalledTimes(1);
|
||||
expect(remoteMute.click).not.toHaveBeenCalled();
|
||||
expect(result.notes).toContain("Attempted to turn on the Meet microphone for realtime mode.");
|
||||
expect(result.notes).toContain("Attempted to turn on the Meet microphone for talk-back mode.");
|
||||
});
|
||||
|
||||
it("blocks realtime speech while the Meet microphone remains muted", async () => {
|
||||
@@ -3098,7 +3149,7 @@ describe("google-meet plugin", () => {
|
||||
id: "meet_1",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
transport: "chrome",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
state: "active",
|
||||
createdAt: "2026-04-27T00:00:00.000Z",
|
||||
updatedAt: "2026-04-27T00:00:00.000Z",
|
||||
@@ -3123,7 +3174,7 @@ describe("google-meet plugin", () => {
|
||||
expect(join).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
message: "Say exactly: hello.",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
}),
|
||||
);
|
||||
expect(speak).not.toHaveBeenCalled();
|
||||
@@ -3145,7 +3196,7 @@ describe("google-meet plugin", () => {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
mode: "transcribe",
|
||||
}),
|
||||
).rejects.toThrow("test_speech requires mode: realtime");
|
||||
).rejects.toThrow("test_speech requires mode: agent or bidi");
|
||||
});
|
||||
|
||||
it("rejects realtime and Twilio modes for test listen", async () => {
|
||||
@@ -3159,7 +3210,7 @@ describe("google-meet plugin", () => {
|
||||
await expect(
|
||||
runtime.testListen({
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
}),
|
||||
).rejects.toThrow("test_listen requires mode: transcribe");
|
||||
|
||||
@@ -3240,7 +3291,7 @@ describe("google-meet plugin", () => {
|
||||
const { methods, nodesInvoke } = setup(
|
||||
{
|
||||
defaultTransport: "chrome-node",
|
||||
defaultMode: "realtime",
|
||||
defaultMode: "agent",
|
||||
},
|
||||
{
|
||||
nodesInvokeHandler: async ({ command, params }) => {
|
||||
@@ -3437,6 +3488,7 @@ describe("google-meet plugin", () => {
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
try {
|
||||
const { methods, runCommandWithTimeout } = setup({
|
||||
defaultMode: "bidi",
|
||||
chrome: {
|
||||
audioBridgeHealthCommand: ["bridge", "status"],
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
@@ -3478,6 +3530,136 @@ describe("google-meet plugin", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("uses realtime transcription plus regular TTS in Chrome agent mode", async () => {
|
||||
let callbacks: Parameters<RealtimeTranscriptionProviderPlugin["createSession"]>[0] | undefined;
|
||||
const sendAudio = vi.fn();
|
||||
const sttSession = {
|
||||
connect: vi.fn(async () => {}),
|
||||
sendAudio,
|
||||
close: vi.fn(),
|
||||
isConnected: vi.fn(() => true),
|
||||
};
|
||||
const provider: RealtimeTranscriptionProviderPlugin = {
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
autoSelectOrder: 1,
|
||||
resolveConfig: ({ rawConfig }) => rawConfig,
|
||||
isConfigured: () => true,
|
||||
createSession: (req) => {
|
||||
callbacks = req;
|
||||
return sttSession;
|
||||
},
|
||||
};
|
||||
const inputStdout = new PassThrough();
|
||||
const outputStdinWrites: Buffer[] = [];
|
||||
const makeProcess = (stdio: {
|
||||
stdin?: { write(chunk: unknown): unknown } | null;
|
||||
stdout?: { on(event: "data", listener: (chunk: unknown) => void): unknown } | null;
|
||||
}): TestBridgeProcess => {
|
||||
const proc = new EventEmitter() as unknown as TestBridgeProcess;
|
||||
proc.stdin = stdio.stdin;
|
||||
proc.stdout = stdio.stdout;
|
||||
proc.stderr = new PassThrough();
|
||||
proc.killed = false;
|
||||
proc.kill = vi.fn(() => {
|
||||
proc.killed = true;
|
||||
return true;
|
||||
});
|
||||
return proc;
|
||||
};
|
||||
const outputStdin = new Writable({
|
||||
write(chunk, _encoding, done) {
|
||||
outputStdinWrites.push(Buffer.from(chunk));
|
||||
done();
|
||||
},
|
||||
});
|
||||
const inputProcess = makeProcess({ stdout: inputStdout, stdin: null });
|
||||
const outputProcess = makeProcess({ stdin: outputStdin, stdout: null });
|
||||
const spawnMock = vi.fn().mockReturnValueOnce(outputProcess).mockReturnValueOnce(inputProcess);
|
||||
const sessionStore: Record<string, unknown> = {};
|
||||
const runtime = {
|
||||
tts: {
|
||||
textToSpeechTelephony: vi.fn(async () => ({
|
||||
success: true,
|
||||
audioBuffer: Buffer.from([1, 0, 2, 0]),
|
||||
sampleRate: 24_000,
|
||||
})),
|
||||
},
|
||||
agent: {
|
||||
resolveAgentDir: vi.fn(() => "/tmp/agent"),
|
||||
resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
|
||||
ensureAgentWorkspace: vi.fn(async () => {}),
|
||||
session: {
|
||||
resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
|
||||
loadSessionStore: vi.fn(() => sessionStore),
|
||||
saveSessionStore: vi.fn(async () => {}),
|
||||
updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore as never)),
|
||||
resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
|
||||
},
|
||||
runEmbeddedPiAgent: vi.fn(async () => ({
|
||||
payloads: [{ text: "Use the Portugal launch data." }],
|
||||
meta: {},
|
||||
})),
|
||||
resolveAgentTimeoutMs: vi.fn(() => 1000),
|
||||
},
|
||||
};
|
||||
|
||||
const handle = await startCommandAgentAudioBridge({
|
||||
config: resolveGoogleMeetConfig({
|
||||
realtime: { provider: "openai", agentId: "jay", introMessage: "" },
|
||||
}),
|
||||
fullConfig: {} as never,
|
||||
runtime: runtime as never,
|
||||
meetingSessionId: "meet-1",
|
||||
inputCommand: ["capture-meet"],
|
||||
outputCommand: ["play-meet"],
|
||||
logger: noopLogger,
|
||||
providers: [provider],
|
||||
spawn: spawnMock,
|
||||
});
|
||||
|
||||
inputStdout.write(Buffer.from([1, 0, 2, 0, 3, 0, 4, 0]));
|
||||
callbacks?.onTranscript?.("Please summarize the launch.");
|
||||
await new Promise((resolve) => setTimeout(resolve, 1100));
|
||||
|
||||
expect(sendAudio).toHaveBeenCalledWith(expect.any(Buffer));
|
||||
expect(runtime.agent.runEmbeddedPiAgent).toHaveBeenCalled();
|
||||
expect(runtime.tts.textToSpeechTelephony).toHaveBeenCalledWith({
|
||||
text: "Use the Portugal launch data.",
|
||||
cfg: {},
|
||||
});
|
||||
expect(Buffer.concat(outputStdinWrites)).toEqual(Buffer.from([1, 0, 2, 0]));
|
||||
expect(handle.getHealth()).toMatchObject({
|
||||
providerConnected: true,
|
||||
audioInputActive: true,
|
||||
audioOutputActive: true,
|
||||
realtimeTranscriptLines: 2,
|
||||
lastRealtimeTranscriptRole: "assistant",
|
||||
});
|
||||
await handle.stop();
|
||||
});
|
||||
|
||||
it("preserves telephony TTS output formats when routing Google Meet agent audio", () => {
|
||||
const ulaw = Buffer.from([0xff, 0x7f, 0x00]);
|
||||
const pcmBridgeConfig = resolveGoogleMeetConfig({ chrome: { audioFormat: "pcm16-24khz" } });
|
||||
const ulawBridgeConfig = resolveGoogleMeetConfig({ chrome: { audioFormat: "g711-ulaw-8khz" } });
|
||||
|
||||
expect(
|
||||
convertGoogleMeetTtsAudioForBridge(ulaw, 8_000, ulawBridgeConfig, "raw-8khz-8bit-mono-mulaw"),
|
||||
).toEqual(ulaw);
|
||||
const pcmForMeet = convertGoogleMeetTtsAudioForBridge(
|
||||
ulaw,
|
||||
8_000,
|
||||
pcmBridgeConfig,
|
||||
"ulaw_8000",
|
||||
);
|
||||
expect(pcmForMeet.byteLength).toBe(18);
|
||||
expect(pcmForMeet).not.toEqual(ulaw);
|
||||
expect(() =>
|
||||
convertGoogleMeetTtsAudioForBridge(Buffer.from([1, 2, 3]), 8_000, pcmBridgeConfig, "mp3"),
|
||||
).toThrow("Unsupported telephony TTS output format");
|
||||
});
|
||||
|
||||
it("pipes Chrome command-pair audio through the realtime provider", async () => {
|
||||
let callbacks: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0] | undefined;
|
||||
const sendAudio = vi.fn();
|
||||
|
||||
@@ -52,7 +52,7 @@ const googleMeetConfigSchema = {
|
||||
},
|
||||
defaultMode: {
|
||||
label: "Default Mode",
|
||||
help: "Realtime starts the duplex voice model loop. Transcribe joins/observes without the realtime talk-back bridge.",
|
||||
help: "Agent uses realtime transcription plus regular OpenClaw TTS. Bidi uses the realtime voice model directly. Transcribe observes only.",
|
||||
},
|
||||
"chrome.audioBackend": {
|
||||
label: "Chrome Audio Backend",
|
||||
@@ -152,7 +152,7 @@ const googleMeetConfigSchema = {
|
||||
"voiceCall.introMessage": { label: "Voice Call Intro Message", advanced: true },
|
||||
"realtime.strategy": {
|
||||
label: "Realtime Strategy",
|
||||
help: "Agent routes participant speech through OpenClaw before speaking; bidi lets the realtime model answer directly.",
|
||||
help: "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins.",
|
||||
},
|
||||
"realtime.provider": {
|
||||
label: "Realtime Provider",
|
||||
@@ -238,9 +238,9 @@ const GoogleMeetToolSchema = Type.Object({
|
||||
),
|
||||
mode: Type.Optional(
|
||||
Type.String({
|
||||
enum: ["realtime", "transcribe"],
|
||||
enum: ["agent", "bidi", "realtime", "transcribe"],
|
||||
description:
|
||||
"Join mode. realtime starts live listen/talk-back through the realtime voice model; transcribe joins without the realtime talk-back bridge.",
|
||||
"Join mode. agent uses realtime transcription, the configured OpenClaw agent, and regular TTS. bidi uses the realtime voice model directly. realtime is a compatibility alias for agent. transcribe joins observe-only.",
|
||||
}),
|
||||
),
|
||||
dialInNumber: Type.Optional(
|
||||
@@ -328,7 +328,14 @@ function normalizeTransport(value: unknown): GoogleMeetTransport | undefined {
|
||||
}
|
||||
|
||||
function normalizeMode(value: unknown): GoogleMeetMode | undefined {
|
||||
return value === "realtime" || value === "transcribe" ? value : undefined;
|
||||
if (value === "realtime") {
|
||||
return "agent";
|
||||
}
|
||||
return value === "agent" || value === "bidi" || value === "transcribe" ? value : undefined;
|
||||
}
|
||||
|
||||
function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean {
|
||||
return mode === "agent" || mode === "bidi";
|
||||
}
|
||||
|
||||
function resolveMeetingInput(config: GoogleMeetConfig, value: unknown): string {
|
||||
@@ -418,9 +425,9 @@ function isGoogleMeetAgentToolActionUnsupportedOnHost(params: {
|
||||
const transport = normalizeTransport(params.raw.transport) ?? params.config.defaultTransport;
|
||||
const mode =
|
||||
action === "test_speech"
|
||||
? "realtime"
|
||||
? "agent"
|
||||
: (normalizeMode(params.raw.mode) ?? params.config.defaultMode);
|
||||
return transport === "chrome" && mode === "realtime";
|
||||
return transport === "chrome" && isGoogleMeetTalkBackMode(mode);
|
||||
}
|
||||
|
||||
function assertGoogleMeetAgentToolActionSupported(params: {
|
||||
@@ -431,7 +438,7 @@ function assertGoogleMeetAgentToolActionSupported(params: {
|
||||
return;
|
||||
}
|
||||
throw new Error(
|
||||
"Google Meet local Chrome realtime audio is macOS-only. On this host, use mode: transcribe, transport: twilio, or transport: chrome-node backed by a macOS node.",
|
||||
"Google Meet local Chrome talk-back audio is macOS-only. On this host, use mode: transcribe, transport: twilio, or transport: chrome-node backed by a macOS node.",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -998,7 +1005,7 @@ export default definePluginEntry({
|
||||
name: "google_meet",
|
||||
label: "Google Meet",
|
||||
description:
|
||||
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome realtime audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for realtime Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
|
||||
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline, local audio missing, or missing Twilio dial plan, surface that blocker instead of retrying or switching transports. Twilio cannot dial a Meet URL directly: provide dialInNumber plus optional pin/dtmfSequence, or configure twilio.defaultDialInNumber. Offline nodes are diagnostics only, not usable candidates. If local Chrome talk-back audio is unsupported on this OS, use mode=transcribe, transport=twilio, or a macOS chrome-node for agent/bidi Chrome. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
|
||||
parameters: GoogleMeetToolSchema,
|
||||
async execute(_toolCallId, params) {
|
||||
const raw = asParamRecord(params);
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
},
|
||||
"defaultMode": {
|
||||
"label": "Default Mode",
|
||||
"help": "Realtime voice is the default."
|
||||
"help": "Agent uses realtime transcription plus regular OpenClaw TTS. Bidi uses the realtime voice model directly. Transcribe observes only."
|
||||
},
|
||||
"chrome.audioBackend": {
|
||||
"label": "Chrome Audio Backend",
|
||||
@@ -145,7 +145,7 @@
|
||||
},
|
||||
"realtime.strategy": {
|
||||
"label": "Realtime Strategy",
|
||||
"help": "Agent routes participant speech through OpenClaw before speaking; bidi lets the realtime model answer directly."
|
||||
"help": "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins."
|
||||
},
|
||||
"realtime.provider": {
|
||||
"label": "Realtime Provider",
|
||||
@@ -227,8 +227,8 @@
|
||||
},
|
||||
"defaultMode": {
|
||||
"type": "string",
|
||||
"enum": ["realtime", "transcribe"],
|
||||
"default": "realtime"
|
||||
"enum": ["agent", "bidi", "realtime", "transcribe"],
|
||||
"default": "agent"
|
||||
},
|
||||
"chrome": {
|
||||
"type": "object",
|
||||
@@ -422,7 +422,7 @@
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string",
|
||||
"default": "You are joining a private Google Meet as an OpenClaw voice transport. Keep spoken replies brief and natural. In agent strategy, wait for OpenClaw consult results and speak them exactly. In bidi strategy, answer directly and call openclaw_agent_consult for deeper reasoning, current information, or tools."
|
||||
"default": "You are joining a private Google Meet as an OpenClaw voice transport. Keep spoken replies brief and natural. In agent mode, wait for OpenClaw consult results and speak them exactly. In bidi mode, answer directly and call openclaw_agent_consult for deeper reasoning, current information, or tools."
|
||||
},
|
||||
"introMessage": {
|
||||
"type": "string",
|
||||
|
||||
@@ -228,7 +228,7 @@ describe("google-meet CLI", () => {
|
||||
{
|
||||
id: "audio-bridge",
|
||||
ok: true,
|
||||
message: "Chrome command-pair realtime audio bridge configured (pcm16-24khz)",
|
||||
message: "Chrome command-pair talk-back audio bridge configured (pcm16-24khz)",
|
||||
},
|
||||
],
|
||||
}),
|
||||
@@ -236,7 +236,7 @@ describe("google-meet CLI", () => {
|
||||
}).parseAsync(["googlemeet", "setup"], { from: "user" });
|
||||
expect(stdout.output()).toContain("Google Meet setup: OK");
|
||||
expect(stdout.output()).toContain(
|
||||
"[ok] audio-bridge: Chrome command-pair realtime audio bridge configured (pcm16-24khz)",
|
||||
"[ok] audio-bridge: Chrome command-pair talk-back audio bridge configured (pcm16-24khz)",
|
||||
);
|
||||
expect(stdout.output()).not.toContain('"checks"');
|
||||
} finally {
|
||||
@@ -675,7 +675,7 @@ describe("google-meet CLI", () => {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
state: "active",
|
||||
transport: "twilio",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
participantIdentity: "Twilio PSTN participant",
|
||||
createdAt: "2026-04-25T00:00:00.000Z",
|
||||
updatedAt: "2026-04-25T00:00:01.000Z",
|
||||
@@ -704,7 +704,7 @@ describe("google-meet CLI", () => {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
state: "active",
|
||||
transport: "chrome-node",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
participantIdentity: "signed-in Google Chrome profile on a paired node",
|
||||
createdAt: "2026-04-25T00:00:00.000Z",
|
||||
updatedAt: "2026-04-25T00:00:01.000Z",
|
||||
@@ -908,7 +908,7 @@ describe("google-meet CLI", () => {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
state: "active",
|
||||
transport: "chrome-node",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
participantIdentity: "signed-in Google Chrome profile on a paired node",
|
||||
createdAt: "2026-04-25T00:00:00.000Z",
|
||||
updatedAt: "2026-04-25T00:00:01.000Z",
|
||||
@@ -964,7 +964,7 @@ describe("google-meet CLI", () => {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
state: "active",
|
||||
transport: "twilio",
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
participantIdentity: "Twilio phone participant",
|
||||
createdAt: "2026-04-25T00:00:00.000Z",
|
||||
updatedAt: "2026-04-25T00:00:01.000Z",
|
||||
|
||||
@@ -1481,10 +1481,7 @@ export function registerGoogleMeetCli(params: {
|
||||
)
|
||||
.option("--no-join", "Only create the meeting URL; do not join it")
|
||||
.option("--transport <transport>", "Join transport: chrome, chrome-node, or twilio")
|
||||
.option(
|
||||
"--mode <mode>",
|
||||
"Join mode: realtime for live talk-back, transcribe for observe/control",
|
||||
)
|
||||
.option("--mode <mode>", "Join mode: agent, bidi, or transcribe")
|
||||
.option("--message <text>", "Realtime speech to trigger after join")
|
||||
.option("--dial-in-number <phone>", "Meet dial-in number for Twilio transport")
|
||||
.option("--pin <pin>", "Meet phone PIN; # is appended if omitted")
|
||||
@@ -1665,10 +1662,7 @@ export function registerGoogleMeetCli(params: {
|
||||
.command("join")
|
||||
.argument("[url]", "Explicit https://meet.google.com/... URL")
|
||||
.option("--transport <transport>", "Transport: chrome, chrome-node, or twilio")
|
||||
.option(
|
||||
"--mode <mode>",
|
||||
"Mode: realtime for live talk-back, transcribe to join without the realtime voice bridge",
|
||||
)
|
||||
.option("--mode <mode>", "Mode: agent, bidi, or transcribe")
|
||||
.option("--message <text>", "Realtime speech to trigger after join")
|
||||
.option("--dial-in-number <phone>", "Meet dial-in number for Twilio transport")
|
||||
.option("--pin <pin>", "Meet phone PIN; # is appended if omitted")
|
||||
@@ -1703,10 +1697,7 @@ export function registerGoogleMeetCli(params: {
|
||||
.command("test-speech")
|
||||
.argument("[url]", "Explicit https://meet.google.com/... URL")
|
||||
.option("--transport <transport>", "Transport: chrome, chrome-node, or twilio")
|
||||
.option(
|
||||
"--mode <mode>",
|
||||
"Mode: realtime for live talk-back, transcribe to join without the realtime voice bridge",
|
||||
)
|
||||
.option("--mode <mode>", "Mode: agent, bidi, or transcribe")
|
||||
.option(
|
||||
"--message <text>",
|
||||
"Realtime speech to trigger",
|
||||
|
||||
@@ -9,7 +9,8 @@ import {
|
||||
} from "openclaw/plugin-sdk/text-runtime";
|
||||
|
||||
export type GoogleMeetTransport = "chrome" | "chrome-node" | "twilio";
|
||||
export type GoogleMeetMode = "realtime" | "transcribe";
|
||||
export type GoogleMeetMode = "agent" | "bidi" | "transcribe";
|
||||
export type GoogleMeetModeInput = GoogleMeetMode | "realtime";
|
||||
export type GoogleMeetRealtimeStrategy = "agent" | "bidi";
|
||||
type GoogleMeetChromeAudioFormat = "pcm16-24khz" | "g711-ulaw-8khz";
|
||||
export type GoogleMeetToolPolicy = RealtimeVoiceAgentConsultToolPolicy;
|
||||
@@ -162,7 +163,7 @@ const DEFAULT_GOOGLE_MEET_BARGE_IN_RMS_THRESHOLD = 650;
|
||||
const DEFAULT_GOOGLE_MEET_BARGE_IN_PEAK_THRESHOLD = 2500;
|
||||
const DEFAULT_GOOGLE_MEET_BARGE_IN_COOLDOWN_MS = 900;
|
||||
|
||||
const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS = `You are joining a private Google Meet as an OpenClaw voice transport. Keep spoken replies brief and natural. In agent strategy, wait for OpenClaw consult results and speak them exactly. In bidi strategy, answer directly and call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} for deeper reasoning, current information, or tools.`;
|
||||
const DEFAULT_GOOGLE_MEET_REALTIME_INSTRUCTIONS = `You are joining a private Google Meet as an OpenClaw voice transport. Keep spoken replies brief and natural. In agent mode, wait for OpenClaw consult results and speak them exactly. In bidi mode, answer directly and call ${REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME} for deeper reasoning, current information, or tools.`;
|
||||
const DEFAULT_GOOGLE_MEET_REALTIME_INTRO_MESSAGE = "Say exactly: I'm here and listening.";
|
||||
|
||||
const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
|
||||
@@ -172,7 +173,7 @@ const DEFAULT_GOOGLE_MEET_CONFIG: GoogleMeetConfig = {
|
||||
enrollmentAcknowledged: false,
|
||||
},
|
||||
defaultTransport: "chrome",
|
||||
defaultMode: "realtime",
|
||||
defaultMode: "agent",
|
||||
chrome: {
|
||||
audioBackend: "blackhole-2ch",
|
||||
audioFormat: DEFAULT_GOOGLE_MEET_CHROME_AUDIO_FORMAT,
|
||||
@@ -325,7 +326,12 @@ function resolveTransport(value: unknown, fallback: GoogleMeetTransport): Google
|
||||
|
||||
function resolveMode(value: unknown, fallback: GoogleMeetMode): GoogleMeetMode {
|
||||
const normalized = normalizeOptionalLowercaseString(value);
|
||||
return normalized === "realtime" || normalized === "transcribe" ? normalized : fallback;
|
||||
if (normalized === "realtime") {
|
||||
return "agent";
|
||||
}
|
||||
return normalized === "agent" || normalized === "bidi" || normalized === "transcribe"
|
||||
? normalized
|
||||
: fallback;
|
||||
}
|
||||
|
||||
function resolveRealtimeStrategy(
|
||||
|
||||
@@ -16,7 +16,10 @@ function normalizeTransport(value: unknown): GoogleMeetTransport | undefined {
|
||||
}
|
||||
|
||||
function normalizeMode(value: unknown): GoogleMeetMode | undefined {
|
||||
return value === "realtime" || value === "transcribe" ? value : undefined;
|
||||
if (value === "realtime") {
|
||||
return "agent";
|
||||
}
|
||||
return value === "agent" || value === "bidi" || value === "transcribe" ? value : undefined;
|
||||
}
|
||||
|
||||
function normalizeGoogleMeetAccessType(value: unknown): GoogleMeetAccessType | undefined {
|
||||
|
||||
@@ -284,7 +284,7 @@ function startChrome(params: Record<string, unknown>) {
|
||||
|
||||
let bridgeId: string | undefined;
|
||||
let audioBridge: { type: "external-command" | "node-command-pair" } | undefined;
|
||||
if (mode === "realtime") {
|
||||
if (mode === "agent" || mode === "bidi" || mode === "realtime") {
|
||||
assertBlackHoleAvailable(Math.min(timeoutMs, 10_000));
|
||||
|
||||
const healthCommand = readStringArray(params.audioBridgeHealthCommand);
|
||||
@@ -299,6 +299,11 @@ function startChrome(params: Record<string, unknown>) {
|
||||
|
||||
const bridgeCommand = readStringArray(params.audioBridgeCommand);
|
||||
if (bridgeCommand) {
|
||||
if (mode === "agent") {
|
||||
throw new Error(
|
||||
"Chrome agent mode requires audioInputCommand and audioOutputCommand so OpenClaw can run STT and regular TTS directly.",
|
||||
);
|
||||
}
|
||||
const bridge = runCommandWithTimeout(bridgeCommand, timeoutMs);
|
||||
if (bridge.code !== 0) {
|
||||
throw new Error(
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import type {
|
||||
RealtimeTranscriptionProviderPlugin,
|
||||
RealtimeTranscriptionSession,
|
||||
} from "openclaw/plugin-sdk/realtime-transcription";
|
||||
import {
|
||||
createRealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
@@ -23,7 +27,10 @@ import {
|
||||
recordGoogleMeetRealtimeEvent,
|
||||
resolveGoogleMeetRealtimeAudioFormat,
|
||||
resolveGoogleMeetRealtimeProvider,
|
||||
resolveGoogleMeetRealtimeTranscriptionProvider,
|
||||
isGoogleMeetLikelyAssistantEchoTranscript,
|
||||
convertGoogleMeetBridgeAudioForStt,
|
||||
convertGoogleMeetTtsAudioForBridge,
|
||||
type GoogleMeetRealtimeEventEntry,
|
||||
type GoogleMeetRealtimeTranscriptEntry,
|
||||
} from "./realtime.js";
|
||||
@@ -49,6 +56,307 @@ function readString(value: unknown): string | undefined {
|
||||
return typeof value === "string" && value.trim() ? value : undefined;
|
||||
}
|
||||
|
||||
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const sayExactly = trimmed.match(/^say exactly:\s*(?<text>.+)$/is)?.groups?.text?.trim();
|
||||
if (sayExactly) {
|
||||
return sayExactly.replace(/^["']|["']$/g, "").trim() || trimmed;
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export async function startNodeAgentAudioBridge(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
runtime: PluginRuntime;
|
||||
meetingSessionId: string;
|
||||
nodeId: string;
|
||||
bridgeId: string;
|
||||
logger: RuntimeLogger;
|
||||
providers?: RealtimeTranscriptionProviderPlugin[];
|
||||
}): Promise<ChromeNodeRealtimeAudioBridgeHandle> {
|
||||
let stopped = false;
|
||||
let sttSession: RealtimeTranscriptionSession | null = null;
|
||||
let realtimeReady = false;
|
||||
let lastInputAt: string | undefined;
|
||||
let lastOutputAt: string | undefined;
|
||||
let lastInputBytes = 0;
|
||||
let lastOutputBytes = 0;
|
||||
let suppressedInputBytes = 0;
|
||||
let lastSuppressedInputAt: string | undefined;
|
||||
let suppressInputUntil = 0;
|
||||
let lastOutputPlayableUntilMs = 0;
|
||||
let consecutiveInputErrors = 0;
|
||||
let lastInputError: string | undefined;
|
||||
const resolved = resolveGoogleMeetRealtimeTranscriptionProvider({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
let ttsQueue = Promise.resolve();
|
||||
|
||||
const stop = async () => {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
stopped = true;
|
||||
if (agentConsultDebounceTimer) {
|
||||
clearTimeout(agentConsultDebounceTimer);
|
||||
agentConsultDebounceTimer = undefined;
|
||||
}
|
||||
try {
|
||||
sttSession?.close();
|
||||
} catch (error) {
|
||||
params.logger.debug?.(
|
||||
`[google-meet] node agent transcription bridge close ignored: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
}
|
||||
try {
|
||||
await params.runtime.nodes.invoke({
|
||||
nodeId: params.nodeId,
|
||||
command: "googlemeet.chrome",
|
||||
params: { action: "stop", bridgeId: params.bridgeId },
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
} catch (error) {
|
||||
params.logger.debug?.(
|
||||
`[google-meet] node audio bridge stop ignored: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const pushOutputAudio = async (audio: Buffer) => {
|
||||
const suppression = extendGoogleMeetOutputEchoSuppression({
|
||||
audio,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
nowMs: Date.now(),
|
||||
lastOutputPlayableUntilMs,
|
||||
suppressInputUntilMs: suppressInputUntil,
|
||||
});
|
||||
suppressInputUntil = suppression.suppressInputUntilMs;
|
||||
lastOutputPlayableUntilMs = suppression.lastOutputPlayableUntilMs;
|
||||
lastOutputAt = new Date().toISOString();
|
||||
lastOutputBytes += audio.byteLength;
|
||||
await params.runtime.nodes.invoke({
|
||||
nodeId: params.nodeId,
|
||||
command: "googlemeet.chrome",
|
||||
params: {
|
||||
action: "pushAudio",
|
||||
bridgeId: params.bridgeId,
|
||||
base64: Buffer.from(audio).toString("base64"),
|
||||
},
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
};
|
||||
|
||||
const enqueueSpeakText = (text: string | undefined) => {
|
||||
const normalized = normalizeGoogleMeetTtsPromptText(text);
|
||||
if (!normalized || stopped) {
|
||||
return;
|
||||
}
|
||||
ttsQueue = ttsQueue
|
||||
.then(async () => {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
recordGoogleMeetRealtimeTranscript(transcript, "assistant", normalized);
|
||||
params.logger.info(`[google-meet] node agent assistant: ${normalized}`);
|
||||
const result = await params.runtime.tts.textToSpeechTelephony({
|
||||
text: normalized,
|
||||
cfg: params.fullConfig,
|
||||
});
|
||||
if (!result.success || !result.audioBuffer || !result.sampleRate) {
|
||||
throw new Error(result.error ?? "TTS conversion failed");
|
||||
}
|
||||
await pushOutputAudio(
|
||||
convertGoogleMeetTtsAudioForBridge(
|
||||
result.audioBuffer,
|
||||
result.sampleRate,
|
||||
params.config,
|
||||
result.outputFormat,
|
||||
),
|
||||
);
|
||||
})
|
||||
.catch((error) => {
|
||||
params.logger.warn(`[google-meet] node agent TTS failed: ${formatErrorMessage(error)}`);
|
||||
});
|
||||
};
|
||||
|
||||
const runAgentConsultForUserTranscript = async (question: string): Promise<void> => {
|
||||
const trimmed = question.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
if (agentConsultActive) {
|
||||
pendingAgentQuestion = trimmed;
|
||||
return;
|
||||
}
|
||||
agentConsultActive = true;
|
||||
let nextQuestion: string | undefined = trimmed;
|
||||
try {
|
||||
while (nextQuestion) {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
const currentQuestion = nextQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
params.logger.info(`[google-meet] node agent consult: ${currentQuestion}`);
|
||||
const result = await consultOpenClawAgentForGoogleMeet({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
logger: params.logger,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
args: {
|
||||
question: currentQuestion,
|
||||
responseStyle: "Brief, natural spoken answer for a live meeting.",
|
||||
},
|
||||
transcript,
|
||||
});
|
||||
enqueueSpeakText(result.text);
|
||||
nextQuestion = pendingAgentQuestion;
|
||||
}
|
||||
} catch (error) {
|
||||
params.logger.warn(`[google-meet] node agent consult failed: ${formatErrorMessage(error)}`);
|
||||
enqueueSpeakText("I hit an error while checking that. Please try again.");
|
||||
} finally {
|
||||
agentConsultActive = false;
|
||||
const queuedQuestion = pendingAgentQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
if (queuedQuestion && !stopped) {
|
||||
void runAgentConsultForUserTranscript(queuedQuestion);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const enqueueAgentConsultForUserTranscript = (question: string): void => {
|
||||
const trimmed = question.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
pendingAgentQuestion = pendingAgentQuestion ? `${pendingAgentQuestion}\n${trimmed}` : trimmed;
|
||||
if (agentConsultDebounceTimer) {
|
||||
clearTimeout(agentConsultDebounceTimer);
|
||||
}
|
||||
agentConsultDebounceTimer = setTimeout(() => {
|
||||
agentConsultDebounceTimer = undefined;
|
||||
const queuedQuestion = pendingAgentQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
if (queuedQuestion && !stopped) {
|
||||
void runAgentConsultForUserTranscript(queuedQuestion);
|
||||
}
|
||||
}, GOOGLE_MEET_AGENT_TRANSCRIPT_DEBOUNCE_MS);
|
||||
agentConsultDebounceTimer.unref?.();
|
||||
};
|
||||
|
||||
sttSession = resolved.provider.createSession({
|
||||
providerConfig: resolved.providerConfig,
|
||||
onTranscript: (text) => {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
recordGoogleMeetRealtimeTranscript(transcript, "user", trimmed);
|
||||
params.logger.info(`[google-meet] node agent user: ${trimmed}`);
|
||||
if (isGoogleMeetLikelyAssistantEchoTranscript({ transcript, text: trimmed })) {
|
||||
params.logger.info(
|
||||
`[google-meet] node agent ignored assistant echo transcript: ${trimmed}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
enqueueAgentConsultForUserTranscript(trimmed);
|
||||
},
|
||||
onError: (error) => {
|
||||
params.logger.warn(
|
||||
`[google-meet] node agent transcription bridge failed: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
void stop();
|
||||
},
|
||||
});
|
||||
await sttSession.connect();
|
||||
realtimeReady = true;
|
||||
|
||||
void (async () => {
|
||||
for (;;) {
|
||||
if (stopped) {
|
||||
break;
|
||||
}
|
||||
try {
|
||||
const raw = await params.runtime.nodes.invoke({
|
||||
nodeId: params.nodeId,
|
||||
command: "googlemeet.chrome",
|
||||
params: { action: "pullAudio", bridgeId: params.bridgeId, timeoutMs: 250 },
|
||||
timeoutMs: 2_000,
|
||||
});
|
||||
const result = asRecord(asRecord(raw).payload ?? raw);
|
||||
consecutiveInputErrors = 0;
|
||||
lastInputError = undefined;
|
||||
const base64 = readString(result.base64);
|
||||
if (base64) {
|
||||
const audio = Buffer.from(base64, "base64");
|
||||
if (Date.now() < suppressInputUntil) {
|
||||
lastSuppressedInputAt = new Date().toISOString();
|
||||
suppressedInputBytes += audio.byteLength;
|
||||
continue;
|
||||
}
|
||||
lastInputAt = new Date().toISOString();
|
||||
lastInputBytes += audio.byteLength;
|
||||
sttSession?.sendAudio(convertGoogleMeetBridgeAudioForStt(audio, params.config));
|
||||
}
|
||||
if (result.closed === true) {
|
||||
await stop();
|
||||
}
|
||||
} catch (error) {
|
||||
if (!stopped) {
|
||||
const message = formatErrorMessage(error);
|
||||
consecutiveInputErrors += 1;
|
||||
lastInputError = message;
|
||||
params.logger.warn(
|
||||
`[google-meet] node agent audio input failed (${consecutiveInputErrors}/5): ${message}`,
|
||||
);
|
||||
if (consecutiveInputErrors >= 5 || /unknown bridgeId|bridge is not open/i.test(message)) {
|
||||
await stop();
|
||||
} else {
|
||||
await new Promise((resolve) => setTimeout(resolve, 250));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})();
|
||||
|
||||
return {
|
||||
type: "node-command-pair",
|
||||
providerId: resolved.provider.id,
|
||||
nodeId: params.nodeId,
|
||||
bridgeId: params.bridgeId,
|
||||
speak: enqueueSpeakText,
|
||||
getHealth: () => ({
|
||||
providerConnected: sttSession?.isConnected() ?? false,
|
||||
realtimeReady,
|
||||
audioInputActive: lastInputBytes > 0,
|
||||
audioOutputActive: lastOutputBytes > 0,
|
||||
lastInputAt,
|
||||
lastOutputAt,
|
||||
lastSuppressedInputAt,
|
||||
lastInputBytes,
|
||||
lastOutputBytes,
|
||||
suppressedInputBytes,
|
||||
...getGoogleMeetRealtimeTranscriptHealth(transcript),
|
||||
consecutiveInputErrors,
|
||||
lastInputError,
|
||||
bridgeClosed: stopped,
|
||||
}),
|
||||
stop,
|
||||
};
|
||||
}
|
||||
|
||||
export async function startNodeRealtimeAudioBridge(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
|
||||
@@ -3,10 +3,20 @@ import type { Writable } from "node:stream";
|
||||
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
||||
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
||||
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import {
|
||||
getRealtimeTranscriptionProvider,
|
||||
listRealtimeTranscriptionProviders,
|
||||
type RealtimeTranscriptionProviderConfig,
|
||||
type RealtimeTranscriptionProviderPlugin,
|
||||
type RealtimeTranscriptionSession,
|
||||
} from "openclaw/plugin-sdk/realtime-transcription";
|
||||
import {
|
||||
createRealtimeVoiceBridgeSession,
|
||||
convertPcmToMulaw8k,
|
||||
mulawToPcm,
|
||||
REALTIME_VOICE_AUDIO_FORMAT_G711_ULAW_8KHZ,
|
||||
REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
|
||||
resamplePcm,
|
||||
resolveConfiguredRealtimeVoiceProvider,
|
||||
type RealtimeVoiceBridgeSession,
|
||||
type RealtimeVoiceBridgeEvent,
|
||||
@@ -56,6 +66,11 @@ type ResolvedRealtimeProvider = {
|
||||
providerConfig: RealtimeVoiceProviderConfig;
|
||||
};
|
||||
|
||||
type ResolvedRealtimeTranscriptionProvider = {
|
||||
provider: RealtimeTranscriptionProviderPlugin;
|
||||
providerConfig: RealtimeTranscriptionProviderConfig;
|
||||
};
|
||||
|
||||
export type GoogleMeetRealtimeTranscriptEntry = {
|
||||
at: string;
|
||||
role: "user" | "assistant";
|
||||
@@ -243,6 +258,100 @@ export function resolveGoogleMeetRealtimeAudioFormat(config: GoogleMeetConfig) {
|
||||
: REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ;
|
||||
}
|
||||
|
||||
export function convertGoogleMeetBridgeAudioForStt(
|
||||
audio: Buffer,
|
||||
config: GoogleMeetConfig,
|
||||
): Buffer {
|
||||
if (config.chrome.audioFormat === "g711-ulaw-8khz") {
|
||||
return audio;
|
||||
}
|
||||
return convertPcmToMulaw8k(audio, 24_000);
|
||||
}
|
||||
|
||||
export function convertGoogleMeetTtsAudioForBridge(
|
||||
audio: Buffer,
|
||||
sampleRate: number,
|
||||
config: GoogleMeetConfig,
|
||||
outputFormat?: string,
|
||||
): Buffer {
|
||||
const sourceFormat = sourceTelephonyTtsFormat(outputFormat);
|
||||
if (
|
||||
config.chrome.audioFormat === "g711-ulaw-8khz" &&
|
||||
sourceFormat === "mulaw" &&
|
||||
sampleRate === 8_000
|
||||
) {
|
||||
return audio;
|
||||
}
|
||||
const pcm = decodeGoogleMeetTelephonyTtsAudio(audio, sourceFormat);
|
||||
return config.chrome.audioFormat === "g711-ulaw-8khz"
|
||||
? convertPcmToMulaw8k(pcm, sampleRate)
|
||||
: resamplePcm(pcm, sampleRate, 24_000);
|
||||
}
|
||||
|
||||
type GoogleMeetTelephonyTtsFormat = "pcm" | "mulaw" | "alaw";
|
||||
|
||||
function sourceTelephonyTtsFormat(outputFormat: string | undefined): GoogleMeetTelephonyTtsFormat {
|
||||
const normalized = outputFormat?.trim().toLowerCase().replaceAll("_", "-") ?? "";
|
||||
if (
|
||||
!normalized ||
|
||||
normalized === "pcm" ||
|
||||
normalized.startsWith("pcm-") ||
|
||||
normalized.includes("pcm16") ||
|
||||
normalized.includes("16bit-mono-pcm")
|
||||
) {
|
||||
return "pcm";
|
||||
}
|
||||
if (
|
||||
normalized === "mulaw" ||
|
||||
normalized === "ulaw" ||
|
||||
normalized.includes("mu-law") ||
|
||||
normalized.includes("mulaw") ||
|
||||
normalized.includes("ulaw")
|
||||
) {
|
||||
return "mulaw";
|
||||
}
|
||||
if (normalized === "alaw" || normalized.includes("a-law") || normalized.includes("alaw")) {
|
||||
return "alaw";
|
||||
}
|
||||
throw new Error(`Unsupported telephony TTS output format for Google Meet: ${outputFormat}`);
|
||||
}
|
||||
|
||||
function decodeGoogleMeetTelephonyTtsAudio(
|
||||
audio: Buffer,
|
||||
sourceFormat: GoogleMeetTelephonyTtsFormat,
|
||||
): Buffer {
|
||||
switch (sourceFormat) {
|
||||
case "pcm":
|
||||
return audio;
|
||||
case "mulaw":
|
||||
return mulawToPcm(audio);
|
||||
case "alaw":
|
||||
return alawToPcm(audio);
|
||||
}
|
||||
return unsupportedGoogleMeetTelephonyTtsFormat(sourceFormat);
|
||||
}
|
||||
|
||||
function unsupportedGoogleMeetTelephonyTtsFormat(_format: never): never {
|
||||
throw new Error("Unsupported telephony TTS output format for Google Meet");
|
||||
}
|
||||
|
||||
function alawToPcm(alaw: Buffer): Buffer {
|
||||
const pcm = Buffer.alloc(alaw.length * 2);
|
||||
for (let index = 0; index < alaw.length; index += 1) {
|
||||
pcm.writeInt16LE(alawByteToLinear(alaw[index] ?? 0), index * 2);
|
||||
}
|
||||
return pcm;
|
||||
}
|
||||
|
||||
function alawByteToLinear(value: number): number {
|
||||
const aLaw = value ^ 0x55;
|
||||
const sign = aLaw & 0x80;
|
||||
const exponent = (aLaw & 0x70) >> 4;
|
||||
const mantissa = aLaw & 0x0f;
|
||||
let sample = exponent === 0 ? (mantissa << 4) + 8 : ((mantissa << 4) + 0x108) << (exponent - 1);
|
||||
return sign ? sample : -sample;
|
||||
}
|
||||
|
||||
export function resolveGoogleMeetRealtimeProvider(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
@@ -258,6 +367,40 @@ export function resolveGoogleMeetRealtimeProvider(params: {
|
||||
});
|
||||
}
|
||||
|
||||
export function resolveGoogleMeetRealtimeTranscriptionProvider(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
providers?: RealtimeTranscriptionProviderPlugin[];
|
||||
}): ResolvedRealtimeTranscriptionProvider {
|
||||
const providers = params.providers ?? listRealtimeTranscriptionProviders(params.fullConfig);
|
||||
if (providers.length === 0) {
|
||||
throw new Error("No configured realtime transcription provider registered");
|
||||
}
|
||||
const configuredProvider = params.config.realtime.provider
|
||||
? (params.providers?.find(
|
||||
(entry) =>
|
||||
entry.id === params.config.realtime.provider ||
|
||||
entry.aliases?.includes(params.config.realtime.provider ?? ""),
|
||||
) ?? getRealtimeTranscriptionProvider(params.config.realtime.provider, params.fullConfig))
|
||||
: undefined;
|
||||
const provider = configuredProvider ?? providers[0];
|
||||
if (!provider) {
|
||||
throw new Error("No configured realtime transcription provider registered");
|
||||
}
|
||||
const rawConfig = params.config.realtime.provider
|
||||
? (params.config.realtime.providers[params.config.realtime.provider] ??
|
||||
params.config.realtime.providers[provider.id] ??
|
||||
{})
|
||||
: (params.config.realtime.providers[provider.id] ?? {});
|
||||
const providerConfig = provider.resolveConfig
|
||||
? provider.resolveConfig({ cfg: params.fullConfig, rawConfig })
|
||||
: rawConfig;
|
||||
if (!provider.isConfigured({ cfg: params.fullConfig, providerConfig })) {
|
||||
throw new Error(`Realtime transcription provider "${provider.id}" is not configured`);
|
||||
}
|
||||
return { provider, providerConfig };
|
||||
}
|
||||
|
||||
export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
|
||||
return [
|
||||
"Speak this exact OpenClaw answer to the meeting, without adding, removing, or rephrasing words.",
|
||||
@@ -265,6 +408,319 @@ export function buildGoogleMeetSpeakExactUserMessage(text: string): string {
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const sayExactly = trimmed.match(/^say exactly:\s*(?<text>.+)$/is)?.groups?.text?.trim();
|
||||
if (sayExactly) {
|
||||
return sayExactly.replace(/^["']|["']$/g, "").trim() || trimmed;
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export async function startCommandAgentAudioBridge(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
runtime: PluginRuntime;
|
||||
meetingSessionId: string;
|
||||
inputCommand: string[];
|
||||
outputCommand: string[];
|
||||
logger: RuntimeLogger;
|
||||
providers?: RealtimeTranscriptionProviderPlugin[];
|
||||
spawn?: SpawnFn;
|
||||
}): Promise<ChromeRealtimeAudioBridgeHandle> {
|
||||
const input = splitCommand(params.inputCommand);
|
||||
const output = splitCommand(params.outputCommand);
|
||||
const spawnFn: SpawnFn =
|
||||
params.spawn ??
|
||||
((command, args, options) => spawn(command, args, options) as unknown as BridgeProcess);
|
||||
const outputProcess = spawnFn(output.command, output.args, {
|
||||
stdio: ["pipe", "ignore", "pipe"],
|
||||
});
|
||||
const inputProcess = spawnFn(input.command, input.args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
let stopped = false;
|
||||
let sttSession: RealtimeTranscriptionSession | null = null;
|
||||
let realtimeReady = false;
|
||||
let lastInputAt: string | undefined;
|
||||
let lastOutputAt: string | undefined;
|
||||
let lastInputBytes = 0;
|
||||
let lastOutputBytes = 0;
|
||||
let suppressedInputBytes = 0;
|
||||
let lastSuppressedInputAt: string | undefined;
|
||||
let suppressInputUntil = 0;
|
||||
let lastOutputPlayableUntilMs = 0;
|
||||
let agentConsultActive = false;
|
||||
let pendingAgentQuestion: string | undefined;
|
||||
let agentConsultDebounceTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
let ttsQueue = Promise.resolve();
|
||||
const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
|
||||
const resolved = resolveGoogleMeetRealtimeTranscriptionProvider({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
providers: params.providers,
|
||||
});
|
||||
|
||||
const terminateProcess = (proc: BridgeProcess, signal: NodeJS.Signals = "SIGTERM") => {
|
||||
if (proc.killed && signal !== "SIGKILL") {
|
||||
return;
|
||||
}
|
||||
let exited = false;
|
||||
proc.on("exit", () => {
|
||||
exited = true;
|
||||
});
|
||||
try {
|
||||
proc.kill(signal);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
if (signal === "SIGKILL") {
|
||||
return;
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
if (!exited) {
|
||||
try {
|
||||
proc.kill("SIGKILL");
|
||||
} catch {
|
||||
// Process may have exited after the grace check.
|
||||
}
|
||||
}
|
||||
}, 1000);
|
||||
timer.unref?.();
|
||||
};
|
||||
|
||||
const stop = async () => {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
stopped = true;
|
||||
if (agentConsultDebounceTimer) {
|
||||
clearTimeout(agentConsultDebounceTimer);
|
||||
agentConsultDebounceTimer = undefined;
|
||||
}
|
||||
try {
|
||||
sttSession?.close();
|
||||
} catch (error) {
|
||||
params.logger.debug?.(
|
||||
`[google-meet] agent transcription bridge close ignored: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
}
|
||||
terminateProcess(inputProcess);
|
||||
terminateProcess(outputProcess);
|
||||
};
|
||||
|
||||
const fail = (label: string) => (error: Error) => {
|
||||
params.logger.warn(`[google-meet] ${label} failed: ${formatErrorMessage(error)}`);
|
||||
void stop();
|
||||
};
|
||||
inputProcess.on("error", fail("audio input command"));
|
||||
inputProcess.on("exit", (code, signal) => {
|
||||
if (!stopped) {
|
||||
params.logger.warn(`[google-meet] audio input command exited (${code ?? signal ?? "done"})`);
|
||||
void stop();
|
||||
}
|
||||
});
|
||||
inputProcess.stderr?.on("data", (chunk) => {
|
||||
params.logger.debug?.(`[google-meet] audio input: ${String(chunk).trim()}`);
|
||||
});
|
||||
outputProcess.on("error", fail("audio output command"));
|
||||
outputProcess.stdin?.on?.("error", fail("audio output command"));
|
||||
outputProcess.on("exit", (code, signal) => {
|
||||
if (!stopped) {
|
||||
params.logger.warn(`[google-meet] audio output command exited (${code ?? signal ?? "done"})`);
|
||||
void stop();
|
||||
}
|
||||
});
|
||||
outputProcess.stderr?.on("data", (chunk) => {
|
||||
params.logger.debug?.(`[google-meet] audio output: ${String(chunk).trim()}`);
|
||||
});
|
||||
|
||||
const writeOutputAudio = (audio: Buffer) => {
|
||||
const suppression = extendGoogleMeetOutputEchoSuppression({
|
||||
audio,
|
||||
audioFormat: params.config.chrome.audioFormat,
|
||||
nowMs: Date.now(),
|
||||
lastOutputPlayableUntilMs,
|
||||
suppressInputUntilMs: suppressInputUntil,
|
||||
});
|
||||
suppressInputUntil = suppression.suppressInputUntilMs;
|
||||
lastOutputPlayableUntilMs = suppression.lastOutputPlayableUntilMs;
|
||||
lastOutputAt = new Date().toISOString();
|
||||
lastOutputBytes += audio.byteLength;
|
||||
try {
|
||||
outputProcess.stdin?.write(audio);
|
||||
} catch (error) {
|
||||
fail("audio output command")(error as Error);
|
||||
}
|
||||
};
|
||||
|
||||
const enqueueSpeakText = (text: string | undefined) => {
|
||||
const normalized = normalizeGoogleMeetTtsPromptText(text);
|
||||
if (!normalized || stopped) {
|
||||
return;
|
||||
}
|
||||
ttsQueue = ttsQueue
|
||||
.then(async () => {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
recordGoogleMeetRealtimeTranscript(transcript, "assistant", normalized);
|
||||
params.logger.info(`[google-meet] agent assistant: ${normalized}`);
|
||||
const result = await params.runtime.tts.textToSpeechTelephony({
|
||||
text: normalized,
|
||||
cfg: params.fullConfig,
|
||||
});
|
||||
if (!result.success || !result.audioBuffer || !result.sampleRate) {
|
||||
throw new Error(result.error ?? "TTS conversion failed");
|
||||
}
|
||||
writeOutputAudio(
|
||||
convertGoogleMeetTtsAudioForBridge(
|
||||
result.audioBuffer,
|
||||
result.sampleRate,
|
||||
params.config,
|
||||
result.outputFormat,
|
||||
),
|
||||
);
|
||||
})
|
||||
.catch((error) => {
|
||||
params.logger.warn(`[google-meet] agent TTS failed: ${formatErrorMessage(error)}`);
|
||||
});
|
||||
};
|
||||
|
||||
const runAgentConsultForUserTranscript = async (question: string): Promise<void> => {
|
||||
const trimmed = question.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
if (agentConsultActive) {
|
||||
pendingAgentQuestion = trimmed;
|
||||
return;
|
||||
}
|
||||
agentConsultActive = true;
|
||||
let nextQuestion: string | undefined = trimmed;
|
||||
try {
|
||||
while (nextQuestion) {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
const currentQuestion = nextQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
params.logger.info(`[google-meet] agent consult: ${currentQuestion}`);
|
||||
const result = await consultOpenClawAgentForGoogleMeet({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
logger: params.logger,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
args: {
|
||||
question: currentQuestion,
|
||||
responseStyle: "Brief, natural spoken answer for a live meeting.",
|
||||
},
|
||||
transcript,
|
||||
});
|
||||
enqueueSpeakText(result.text);
|
||||
nextQuestion = pendingAgentQuestion;
|
||||
}
|
||||
} catch (error) {
|
||||
params.logger.warn(`[google-meet] agent consult failed: ${formatErrorMessage(error)}`);
|
||||
enqueueSpeakText("I hit an error while checking that. Please try again.");
|
||||
} finally {
|
||||
agentConsultActive = false;
|
||||
const queuedQuestion = pendingAgentQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
if (queuedQuestion && !stopped) {
|
||||
void runAgentConsultForUserTranscript(queuedQuestion);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const enqueueAgentConsultForUserTranscript = (question: string): void => {
|
||||
const trimmed = question.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
pendingAgentQuestion = pendingAgentQuestion ? `${pendingAgentQuestion}\n${trimmed}` : trimmed;
|
||||
if (agentConsultDebounceTimer) {
|
||||
clearTimeout(agentConsultDebounceTimer);
|
||||
}
|
||||
agentConsultDebounceTimer = setTimeout(() => {
|
||||
agentConsultDebounceTimer = undefined;
|
||||
const queuedQuestion = pendingAgentQuestion;
|
||||
pendingAgentQuestion = undefined;
|
||||
if (queuedQuestion && !stopped) {
|
||||
void runAgentConsultForUserTranscript(queuedQuestion);
|
||||
}
|
||||
}, GOOGLE_MEET_AGENT_TRANSCRIPT_DEBOUNCE_MS);
|
||||
agentConsultDebounceTimer.unref?.();
|
||||
};
|
||||
|
||||
sttSession = resolved.provider.createSession({
|
||||
providerConfig: resolved.providerConfig,
|
||||
onTranscript: (text) => {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed || stopped) {
|
||||
return;
|
||||
}
|
||||
recordGoogleMeetRealtimeTranscript(transcript, "user", trimmed);
|
||||
params.logger.info(`[google-meet] agent user: ${trimmed}`);
|
||||
if (isGoogleMeetLikelyAssistantEchoTranscript({ transcript, text: trimmed })) {
|
||||
params.logger.info(`[google-meet] agent ignored assistant echo transcript: ${trimmed}`);
|
||||
return;
|
||||
}
|
||||
enqueueAgentConsultForUserTranscript(trimmed);
|
||||
},
|
||||
onError: (error) => {
|
||||
params.logger.warn(
|
||||
`[google-meet] agent transcription bridge failed: ${formatErrorMessage(error)}`,
|
||||
);
|
||||
void stop();
|
||||
},
|
||||
});
|
||||
|
||||
await sttSession.connect();
|
||||
realtimeReady = true;
|
||||
|
||||
inputProcess.stdout?.on("data", (chunk) => {
|
||||
if (stopped) {
|
||||
return;
|
||||
}
|
||||
const audio = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
||||
if (Date.now() < suppressInputUntil) {
|
||||
lastSuppressedInputAt = new Date().toISOString();
|
||||
suppressedInputBytes += audio.byteLength;
|
||||
return;
|
||||
}
|
||||
lastInputAt = new Date().toISOString();
|
||||
lastInputBytes += audio.byteLength;
|
||||
sttSession?.sendAudio(convertGoogleMeetBridgeAudioForStt(audio, params.config));
|
||||
});
|
||||
|
||||
return {
|
||||
providerId: resolved.provider.id,
|
||||
inputCommand: params.inputCommand,
|
||||
outputCommand: params.outputCommand,
|
||||
speak: enqueueSpeakText,
|
||||
getHealth: () => ({
|
||||
providerConnected: sttSession?.isConnected() ?? false,
|
||||
realtimeReady,
|
||||
audioInputActive: lastInputBytes > 0,
|
||||
audioOutputActive: lastOutputBytes > 0,
|
||||
lastInputAt,
|
||||
lastOutputAt,
|
||||
lastSuppressedInputAt,
|
||||
lastInputBytes,
|
||||
lastOutputBytes,
|
||||
suppressedInputBytes,
|
||||
...getGoogleMeetRealtimeTranscriptHealth(transcript),
|
||||
bridgeClosed: stopped,
|
||||
}),
|
||||
stop,
|
||||
};
|
||||
}
|
||||
|
||||
export async function startCommandRealtimeAudioBridge(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
|
||||
@@ -64,6 +64,10 @@ function resolveMode(input: GoogleMeetMode | undefined, config: GoogleMeetConfig
|
||||
return input ?? config.defaultMode;
|
||||
}
|
||||
|
||||
function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean {
|
||||
return mode === "agent" || mode === "bidi";
|
||||
}
|
||||
|
||||
function hasRealtimeAudioOutputAdvanced(
|
||||
health: GoogleMeetChromeHealth | undefined,
|
||||
startOutputBytes: number,
|
||||
@@ -125,7 +129,7 @@ function evaluateSpeechReadiness(session: GoogleMeetSession): {
|
||||
reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
|
||||
message?: string;
|
||||
} {
|
||||
if (session.mode !== "realtime" || !session.chrome) {
|
||||
if (!isGoogleMeetTalkBackMode(session.mode) || !session.chrome) {
|
||||
return { ready: true };
|
||||
}
|
||||
if (!isManagedChromeBrowserSession(session)) {
|
||||
@@ -278,7 +282,7 @@ export class GoogleMeetRuntime {
|
||||
});
|
||||
}
|
||||
}
|
||||
if (transport === "chrome" && mode === "realtime") {
|
||||
if (transport === "chrome" && isGoogleMeetTalkBackMode(mode)) {
|
||||
try {
|
||||
await assertBlackHole2chAvailable({
|
||||
runtime: this.params.runtime,
|
||||
@@ -313,7 +317,7 @@ export class GoogleMeetRuntime {
|
||||
ok: commands.length > 0 && missingCommands.length === 0,
|
||||
message:
|
||||
commands.length === 0
|
||||
? "Chrome realtime audio commands are not configured"
|
||||
? "Chrome talk-back audio commands are not configured"
|
||||
: missingCommands.length === 0
|
||||
? `Chrome audio command${commands.length === 1 ? "" : "s"} available: ${commands.join(", ")}`
|
||||
: `Chrome audio command${missingCommands.length === 1 ? "" : "s"} missing: ${missingCommands.join(", ")}`,
|
||||
@@ -368,7 +372,7 @@ export class GoogleMeetRuntime {
|
||||
];
|
||||
reusable.updatedAt = nowIso();
|
||||
const spoken =
|
||||
mode === "realtime" && speechInstructions
|
||||
isGoogleMeetTalkBackMode(mode) && speechInstructions
|
||||
? await this.#speakWhenReady(reusable, speechInstructions)
|
||||
: false;
|
||||
return { session: reusable, spoken };
|
||||
@@ -391,8 +395,8 @@ export class GoogleMeetRuntime {
|
||||
? "signed-in Google Chrome profile on a paired node"
|
||||
: "signed-in Google Chrome profile",
|
||||
realtime: {
|
||||
enabled: mode === "realtime",
|
||||
strategy: this.params.config.realtime.strategy,
|
||||
enabled: isGoogleMeetTalkBackMode(mode),
|
||||
strategy: mode === "bidi" ? "bidi" : "agent",
|
||||
provider: this.params.config.realtime.provider,
|
||||
model: this.params.config.realtime.model,
|
||||
toolPolicy: this.params.config.realtime.toolPolicy,
|
||||
@@ -435,7 +439,7 @@ export class GoogleMeetRuntime {
|
||||
? transport === "chrome-node"
|
||||
? "Chrome node transport joins as the signed-in Google profile on the selected node and routes realtime audio through the node bridge."
|
||||
: "Chrome transport joins as the signed-in Google profile and routes realtime audio through the configured bridge."
|
||||
: mode === "realtime"
|
||||
: isGoogleMeetTalkBackMode(mode)
|
||||
? "Chrome transport joins as the signed-in Google profile and expects BlackHole 2ch audio routing."
|
||||
: "Chrome transport joins as the signed-in Google profile without starting the realtime audio bridge.",
|
||||
);
|
||||
@@ -459,12 +463,11 @@ export class GoogleMeetRuntime {
|
||||
dialInNumber,
|
||||
dtmfSequence,
|
||||
logger: this.params.logger,
|
||||
message:
|
||||
mode === "realtime"
|
||||
? (request.message ??
|
||||
this.params.config.voiceCall.introMessage ??
|
||||
this.params.config.realtime.introMessage)
|
||||
: undefined,
|
||||
message: isGoogleMeetTalkBackMode(mode)
|
||||
? (request.message ??
|
||||
this.params.config.voiceCall.introMessage ??
|
||||
this.params.config.realtime.introMessage)
|
||||
: undefined,
|
||||
})
|
||||
: undefined;
|
||||
delegatedTwilioSpoken = Boolean(voiceCallResult?.introSent);
|
||||
@@ -501,7 +504,7 @@ export class GoogleMeetRuntime {
|
||||
const spoken =
|
||||
transport === "twilio"
|
||||
? delegatedTwilioSpoken
|
||||
: mode === "realtime" && speechInstructions
|
||||
: isGoogleMeetTalkBackMode(mode) && speechInstructions
|
||||
? await this.#speakWhenReady(session, speechInstructions)
|
||||
: false;
|
||||
return { session, spoken };
|
||||
@@ -613,7 +616,7 @@ export class GoogleMeetRuntime {
|
||||
}> {
|
||||
if (request.mode === "transcribe") {
|
||||
throw new Error(
|
||||
"test_speech requires mode: realtime; use join mode: transcribe for observe-only sessions.",
|
||||
"test_speech requires mode: agent or bidi; use join mode: transcribe for observe-only sessions.",
|
||||
);
|
||||
}
|
||||
const url = normalizeMeetUrl(request.url);
|
||||
@@ -625,14 +628,14 @@ export class GoogleMeetRuntime {
|
||||
session.state === "active" &&
|
||||
isSameMeetUrlForReuse(session.url, url) &&
|
||||
session.transport === transport &&
|
||||
session.mode === "realtime",
|
||||
isGoogleMeetTalkBackMode(session.mode),
|
||||
);
|
||||
const startOutputBytes = existingSession?.chrome?.health?.lastOutputBytes ?? 0;
|
||||
const result = await this.join({
|
||||
...request,
|
||||
transport,
|
||||
url,
|
||||
mode: "realtime",
|
||||
mode: "agent",
|
||||
message: request.message ?? "Say exactly: Google Meet speech test complete.",
|
||||
});
|
||||
let health = result.session.chrome?.health;
|
||||
@@ -687,9 +690,9 @@ export class GoogleMeetRuntime {
|
||||
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
|
||||
session: GoogleMeetSession;
|
||||
}> {
|
||||
if (request.mode === "realtime") {
|
||||
if (request.mode && isGoogleMeetTalkBackMode(request.mode)) {
|
||||
throw new Error(
|
||||
"test_listen requires mode: transcribe; use test_speech for realtime talk-back.",
|
||||
"test_listen requires mode: transcribe; use test_speech for talk-back sessions.",
|
||||
);
|
||||
}
|
||||
const url = normalizeMeetUrl(request.url);
|
||||
@@ -780,7 +783,11 @@ export class GoogleMeetRuntime {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
if (!options.force && session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
|
||||
if (
|
||||
!options.force &&
|
||||
isGoogleMeetTalkBackMode(session.mode) &&
|
||||
evaluateSpeechReadiness(session).ready
|
||||
) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
@@ -838,7 +845,7 @@ export class GoogleMeetRuntime {
|
||||
|
||||
async #ensureChromeRealtimeBridge(session: GoogleMeetSession) {
|
||||
if (
|
||||
session.mode !== "realtime" ||
|
||||
!isGoogleMeetTalkBackMode(session.mode) ||
|
||||
session.transport !== "chrome" ||
|
||||
session.state !== "active" ||
|
||||
!session.chrome ||
|
||||
|
||||
@@ -109,7 +109,8 @@ export function getGoogleMeetSetupStatus(
|
||||
const mode = options?.mode ?? config.defaultMode;
|
||||
const transport = options?.transport ?? config.defaultTransport;
|
||||
const needsChromeRealtimeAudio =
|
||||
mode === "realtime" && (transport === "chrome" || transport === "chrome-node");
|
||||
(mode === "agent" || mode === "bidi") &&
|
||||
(transport === "chrome" || transport === "chrome-node");
|
||||
const pluginEntries = asRecord(asRecord(fullConfig.plugins).entries);
|
||||
const pluginAllow = asRecord(fullConfig.plugins).allow;
|
||||
const voiceCallEntry = asRecord(pluginEntries["voice-call"]);
|
||||
@@ -142,17 +143,24 @@ export function getGoogleMeetSetupStatus(
|
||||
});
|
||||
|
||||
if (needsChromeRealtimeAudio) {
|
||||
const hasCommandPair = Boolean(
|
||||
config.chrome.audioInputCommand && config.chrome.audioOutputCommand,
|
||||
);
|
||||
const hasExternalBridge = Boolean(config.chrome.audioBridgeCommand);
|
||||
const agentModeExternalBridgeInvalid = mode === "agent" && hasExternalBridge;
|
||||
checks.push({
|
||||
id: "audio-bridge",
|
||||
ok: Boolean(
|
||||
config.chrome.audioBridgeCommand ||
|
||||
(config.chrome.audioInputCommand && config.chrome.audioOutputCommand),
|
||||
),
|
||||
message: config.chrome.audioBridgeCommand
|
||||
? "Chrome audio bridge command configured"
|
||||
: config.chrome.audioInputCommand && config.chrome.audioOutputCommand
|
||||
? `Chrome command-pair realtime audio bridge configured (${config.chrome.audioFormat})`
|
||||
: "Chrome realtime audio bridge not configured",
|
||||
ok:
|
||||
mode === "agent"
|
||||
? hasCommandPair && !agentModeExternalBridgeInvalid
|
||||
: hasExternalBridge || hasCommandPair,
|
||||
message: agentModeExternalBridgeInvalid
|
||||
? "Chrome agent mode requires chrome.audioInputCommand and chrome.audioOutputCommand; chrome.audioBridgeCommand is bidi-only"
|
||||
: hasExternalBridge
|
||||
? "Chrome audio bridge command configured"
|
||||
: hasCommandPair
|
||||
? `Chrome command-pair talk-back audio bridge configured (${config.chrome.audioFormat})`
|
||||
: "Chrome talk-back audio bridge not configured",
|
||||
});
|
||||
} else if (transport === "chrome" || transport === "chrome-node") {
|
||||
checks.push({
|
||||
|
||||
@@ -2,12 +2,14 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
|
||||
import { callGatewayFromCli } from "openclaw/plugin-sdk/gateway-runtime";
|
||||
import type { PluginRuntime } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import type { RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
|
||||
import type { GoogleMeetConfig } from "../config.js";
|
||||
import type { GoogleMeetConfig, GoogleMeetMode } from "../config.js";
|
||||
import {
|
||||
startNodeAgentAudioBridge,
|
||||
startNodeRealtimeAudioBridge,
|
||||
type ChromeNodeRealtimeAudioBridgeHandle,
|
||||
} from "../realtime-node.js";
|
||||
import {
|
||||
startCommandAgentAudioBridge,
|
||||
startCommandRealtimeAudioBridge,
|
||||
type ChromeRealtimeAudioBridgeHandle,
|
||||
} from "../realtime.js";
|
||||
@@ -46,6 +48,10 @@ export const __testing = {
|
||||
meetStatusScriptForTest: meetStatusScript,
|
||||
};
|
||||
|
||||
function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean {
|
||||
return mode === "agent" || mode === "bidi";
|
||||
}
|
||||
|
||||
export function outputMentionsBlackHole2ch(output: string): boolean {
|
||||
return /\bBlackHole\s+2ch\b/i.test(output);
|
||||
}
|
||||
@@ -86,7 +92,7 @@ export async function launchChromeMeet(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
meetingSessionId: string;
|
||||
mode: "realtime" | "transcribe";
|
||||
mode: GoogleMeetMode;
|
||||
url: string;
|
||||
logger: RuntimeLogger;
|
||||
}): Promise<{
|
||||
@@ -97,7 +103,7 @@ export async function launchChromeMeet(params: {
|
||||
browser?: GoogleMeetChromeHealth;
|
||||
}> {
|
||||
const checkRealtimeAudioPrerequisites = async () => {
|
||||
if (params.mode !== "realtime") {
|
||||
if (!isGoogleMeetTalkBackMode(params.mode)) {
|
||||
return;
|
||||
}
|
||||
await assertBlackHole2chAvailable({
|
||||
@@ -123,10 +129,15 @@ export async function launchChromeMeet(params: {
|
||||
| ({ type: "command-pair" } & ChromeRealtimeAudioBridgeHandle)
|
||||
| undefined
|
||||
> => {
|
||||
if (params.mode !== "realtime") {
|
||||
if (!isGoogleMeetTalkBackMode(params.mode)) {
|
||||
return undefined;
|
||||
}
|
||||
if (params.config.chrome.audioBridgeCommand) {
|
||||
if (params.mode === "agent") {
|
||||
throw new Error(
|
||||
"Chrome agent mode requires chrome.audioInputCommand and chrome.audioOutputCommand so OpenClaw can run STT and regular TTS directly.",
|
||||
);
|
||||
}
|
||||
const bridge = await params.runtime.system.runCommandWithTimeout(
|
||||
params.config.chrome.audioBridgeCommand,
|
||||
{ timeoutMs: params.config.chrome.joinTimeoutMs },
|
||||
@@ -140,20 +151,33 @@ export async function launchChromeMeet(params: {
|
||||
}
|
||||
if (!params.config.chrome.audioInputCommand || !params.config.chrome.audioOutputCommand) {
|
||||
throw new Error(
|
||||
"Chrome realtime mode requires chrome.audioInputCommand and chrome.audioOutputCommand, or chrome.audioBridgeCommand for an external bridge.",
|
||||
"Chrome talk-back mode requires chrome.audioInputCommand and chrome.audioOutputCommand, or chrome.audioBridgeCommand for an external bridge.",
|
||||
);
|
||||
}
|
||||
return {
|
||||
type: "command-pair",
|
||||
...(await startCommandRealtimeAudioBridge({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
})),
|
||||
...(params.mode === "agent"
|
||||
? await startCommandAgentAudioBridge({
|
||||
config: params.config,
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
})
|
||||
: await startCommandRealtimeAudioBridge({
|
||||
config: {
|
||||
...params.config,
|
||||
realtime: { ...params.config.realtime, strategy: "bidi" },
|
||||
},
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
inputCommand: params.config.chrome.audioInputCommand,
|
||||
outputCommand: params.config.chrome.audioOutputCommand,
|
||||
logger: params.logger,
|
||||
})),
|
||||
};
|
||||
};
|
||||
|
||||
@@ -170,7 +194,7 @@ export async function launchChromeMeet(params: {
|
||||
url: params.url,
|
||||
});
|
||||
const shouldStartRealtimeBridge =
|
||||
params.mode === "realtime" &&
|
||||
isGoogleMeetTalkBackMode(params.mode) &&
|
||||
result.browser?.inCall === true &&
|
||||
result.browser.micMuted !== true &&
|
||||
result.browser.manualActionRequired !== true;
|
||||
@@ -387,7 +411,7 @@ function meetStatusScript(params: {
|
||||
}
|
||||
if (!readOnly && allowMicrophone && mic && /turn on microphone/i.test(buttonLabel(mic))) {
|
||||
mic.click();
|
||||
notes.push("Attempted to turn on the Meet microphone for realtime mode.");
|
||||
notes.push("Attempted to turn on the Meet microphone for talk-back mode.");
|
||||
}
|
||||
if (!readOnly && !allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
|
||||
mic.click();
|
||||
@@ -595,7 +619,7 @@ async function openMeetWithBrowserProxy(params: {
|
||||
runtime: PluginRuntime;
|
||||
nodeId: string;
|
||||
config: GoogleMeetConfig;
|
||||
mode: "realtime" | "transcribe";
|
||||
mode: GoogleMeetMode;
|
||||
url: string;
|
||||
}): Promise<{ launched: boolean; browser?: GoogleMeetChromeHealth }> {
|
||||
return await openMeetWithBrowserRequest({
|
||||
@@ -617,7 +641,7 @@ async function openMeetWithBrowserProxy(params: {
|
||||
async function openMeetWithBrowserRequest(params: {
|
||||
callBrowser: BrowserRequestCaller;
|
||||
config: GoogleMeetConfig;
|
||||
mode: "realtime" | "transcribe";
|
||||
mode: GoogleMeetMode;
|
||||
url: string;
|
||||
}): Promise<{ launched: boolean; browser?: GoogleMeetChromeHealth }> {
|
||||
if (!params.config.chrome.launch) {
|
||||
@@ -670,7 +694,7 @@ async function openMeetWithBrowserRequest(params: {
|
||||
}
|
||||
|
||||
const permissionNotes = await grantMeetMediaPermissions({
|
||||
allowMicrophone: params.mode === "realtime",
|
||||
allowMicrophone: isGoogleMeetTalkBackMode(params.mode),
|
||||
callBrowser: params.callBrowser,
|
||||
targetId,
|
||||
timeoutMs,
|
||||
@@ -691,7 +715,7 @@ async function openMeetWithBrowserRequest(params: {
|
||||
kind: "evaluate",
|
||||
targetId,
|
||||
fn: meetStatusScript({
|
||||
allowMicrophone: params.mode === "realtime",
|
||||
allowMicrophone: isGoogleMeetTalkBackMode(params.mode),
|
||||
captureCaptions: params.mode === "transcribe",
|
||||
guestName: params.config.chrome.guestName,
|
||||
autoJoin: params.config.chrome.autoJoin,
|
||||
@@ -700,7 +724,10 @@ async function openMeetWithBrowserRequest(params: {
|
||||
timeoutMs: Math.min(timeoutMs, 10_000),
|
||||
});
|
||||
browser = mergeBrowserNotes(parseMeetBrowserStatus(evaluated) ?? browser, permissionNotes);
|
||||
if (browser?.inCall === true && (params.mode !== "realtime" || browser.micMuted !== true)) {
|
||||
if (
|
||||
browser?.inCall === true &&
|
||||
(!isGoogleMeetTalkBackMode(params.mode) || browser.micMuted !== true)
|
||||
) {
|
||||
return { launched: true, browser };
|
||||
}
|
||||
if (browser?.manualActionRequired === true) {
|
||||
@@ -747,7 +774,7 @@ function isRecoverableMeetTab(tab: BrowserTab, url?: string): boolean {
|
||||
async function inspectRecoverableMeetTab(params: {
|
||||
callBrowser: BrowserRequestCaller;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
mode?: GoogleMeetMode;
|
||||
readOnly?: boolean;
|
||||
timeoutMs: number;
|
||||
tab: BrowserTab;
|
||||
@@ -807,7 +834,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
|
||||
export async function recoverCurrentMeetTab(params: {
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
mode?: GoogleMeetMode;
|
||||
readOnly?: boolean;
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
@@ -856,7 +883,7 @@ export async function recoverCurrentMeetTab(params: {
|
||||
export async function recoverCurrentMeetTabOnNode(params: {
|
||||
runtime: PluginRuntime;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
mode?: GoogleMeetMode;
|
||||
readOnly?: boolean;
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
@@ -923,7 +950,7 @@ export async function launchChromeMeetOnNode(params: {
|
||||
config: GoogleMeetConfig;
|
||||
fullConfig: OpenClawConfig;
|
||||
meetingSessionId: string;
|
||||
mode: "realtime" | "transcribe";
|
||||
mode: GoogleMeetMode;
|
||||
url: string;
|
||||
logger: RuntimeLogger;
|
||||
}): Promise<{
|
||||
@@ -985,8 +1012,16 @@ export async function launchChromeMeetOnNode(params: {
|
||||
if (!result.bridgeId) {
|
||||
throw new Error("Google Meet node did not return an audio bridge id.");
|
||||
}
|
||||
const bridge = await startNodeRealtimeAudioBridge({
|
||||
config: params.config,
|
||||
const bridge = await (
|
||||
params.mode === "agent" ? startNodeAgentAudioBridge : startNodeRealtimeAudioBridge
|
||||
)({
|
||||
config:
|
||||
params.mode === "agent"
|
||||
? params.config
|
||||
: {
|
||||
...params.config,
|
||||
realtime: { ...params.config.realtime, strategy: "bidi" },
|
||||
},
|
||||
fullConfig: params.fullConfig,
|
||||
runtime: params.runtime,
|
||||
meetingSessionId: params.meetingSessionId,
|
||||
|
||||
Reference in New Issue
Block a user