mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 05:40:44 +00:00
feat(google-meet): add transcribe caption health
This commit is contained in:
@@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Dependencies: refresh bundled runtime and plugin dependency pins, including Pi 0.71.1, OpenAI 6.35.0, Codex 0.128.0, Zod 4.4.1, and Matrix 41.4.0. Thanks @mariozechner.
|
||||
- Agents/workspace: add `agents.defaults.skipOptionalBootstrapFiles` for skipping selected optional workspace files during bootstrap without disabling required workspace setup. (#62110) Thanks @mainstay22.
|
||||
- Plugins/CLI: add first-class `git:` plugin installs with ref checkout, commit metadata, normal scanner/staging, and `plugins update` support for recorded git sources. Thanks @badlogic.
|
||||
- Google Meet: add live caption health for Chrome transcribe mode, including caption observer state, transcript counters, last caption text, and recent transcript lines in status and doctor output. Refs #72478. Thanks @DougButdorf.
|
||||
- Voice Call/Google Meet: add Twilio Meet join phase logs around pre-connect DTMF, realtime stream setup, and initial greeting handoff for easier live-call debugging. Thanks @donkeykong91 and @PfanP.
|
||||
- macOS app: move recent session context rows into a Context submenu while keeping usage and cost details root-level, so the menu bar companion stays compact with many active sessions. Thanks @guti.
|
||||
- Gateway/SDK: add SDK-facing tools.invoke RPC with shared HTTP policy, typed approval/refusal results, and SDK helper support. Refs #74705. Thanks @BunsDev and @ai-hpc.
|
||||
|
||||
@@ -169,7 +169,12 @@ and will not talk back into the meeting. Chrome joins in this mode also avoid
|
||||
OpenClaw's microphone/camera permission grant and avoid the Meet **Use
|
||||
microphone** path. If Meet shows an audio-choice interstitial, automation tries
|
||||
the no-microphone path and otherwise reports a manual action instead of opening
|
||||
the local microphone.
|
||||
the local microphone. In transcribe mode, managed Chrome transports also install
|
||||
a best-effort Meet caption observer. `googlemeet status --json` and
|
||||
`googlemeet doctor` surface `captioning`, `captionsEnabledAttempted`,
|
||||
`transcriptLines`, `lastCaptionAt`, `lastCaptionSpeaker`, `lastCaptionText`,
|
||||
and a short `recentTranscript` tail so operators can tell whether the browser
|
||||
joined the call and whether Meet captions are producing text.
|
||||
|
||||
During realtime sessions, `google_meet` status includes browser and audio bridge
|
||||
health such as `inCall`, `manualActionRequired`, `providerConnected`,
|
||||
@@ -1294,9 +1299,15 @@ openclaw googlemeet doctor
|
||||
```
|
||||
|
||||
Use `mode: "realtime"` for listen/talk-back. `mode: "transcribe"` intentionally
|
||||
does not start the duplex realtime voice bridge. `googlemeet test-speech`
|
||||
always checks the realtime path and reports whether bridge output bytes were
|
||||
observed for that invocation. If `speechOutputVerified` is false and
|
||||
does not start the duplex realtime voice bridge. For observe-only debugging,
|
||||
run `openclaw googlemeet status --json <session-id>` after participants speak
|
||||
and check `captioning`, `transcriptLines`, and `lastCaptionText`. If `inCall` is
|
||||
true but `transcriptLines` stays at `0`, Meet captions may be disabled, no one
|
||||
has spoken since the observer was installed, the Meet UI changed, or live
|
||||
captions are unavailable for the meeting language/account.
|
||||
|
||||
`googlemeet test-speech` always checks the realtime path and reports whether
|
||||
bridge output bytes were observed for that invocation. If `speechOutputVerified` is false and
|
||||
`speechOutputTimedOut` is true, the realtime provider may have accepted the
|
||||
utterance but OpenClaw did not see new output bytes reach the Chrome audio
|
||||
bridge.
|
||||
|
||||
@@ -3,6 +3,7 @@ import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { PassThrough, Writable } from "node:stream";
|
||||
import { createContext, Script } from "node:vm";
|
||||
import type { RealtimeVoiceProviderPlugin } from "openclaw/plugin-sdk/realtime-voice";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import plugin, { __testing as googleMeetPluginTesting } from "./index.js";
|
||||
@@ -1635,7 +1636,25 @@ describe("google-meet plugin", () => {
|
||||
const { methods, runCommandWithTimeout } = setup({
|
||||
defaultMode: "transcribe",
|
||||
});
|
||||
const callGatewayFromCli = mockLocalMeetBrowserRequest();
|
||||
const callGatewayFromCli = mockLocalMeetBrowserRequest({
|
||||
inCall: true,
|
||||
micMuted: true,
|
||||
captioning: true,
|
||||
captionsEnabledAttempted: true,
|
||||
transcriptLines: 1,
|
||||
lastCaptionAt: "2026-04-27T10:00:00.000Z",
|
||||
lastCaptionSpeaker: "Alice",
|
||||
lastCaptionText: "Can everyone hear the agent?",
|
||||
recentTranscript: [
|
||||
{
|
||||
at: "2026-04-27T10:00:00.000Z",
|
||||
speaker: "Alice",
|
||||
text: "Can everyone hear the agent?",
|
||||
},
|
||||
],
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
});
|
||||
const handler = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
@@ -1666,17 +1685,292 @@ describe("google-meet plugin", () => {
|
||||
([, , request]) => (request as { path?: string }).path === "/permissions/grant",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(respond.mock.calls[0]?.[1]).toMatchObject({
|
||||
session: {
|
||||
chrome: {
|
||||
health: {
|
||||
captioning: true,
|
||||
captionsEnabledAttempted: true,
|
||||
transcriptLines: 1,
|
||||
lastCaptionSpeaker: "Alice",
|
||||
lastCaptionText: "Can everyone hear the agent?",
|
||||
recentTranscript: [
|
||||
{
|
||||
speaker: "Alice",
|
||||
text: "Can everyone hear the agent?",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const actCall = callGatewayFromCli.mock.calls.find(
|
||||
([, , request]) => (request as { path?: string }).path === "/act",
|
||||
);
|
||||
expect(String((actCall?.[2] as { body?: { fn?: string } } | undefined)?.body?.fn)).toContain(
|
||||
"const allowMicrophone = false",
|
||||
);
|
||||
expect(String((actCall?.[2] as { body?: { fn?: string } } | undefined)?.body?.fn)).toContain(
|
||||
"const captureCaptions = true",
|
||||
);
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("refreshes observe-only caption health when status is requested", async () => {
|
||||
let openedTab = false;
|
||||
let actCount = 0;
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as {
|
||||
path?: string;
|
||||
body?: { targetId?: string; url?: string };
|
||||
};
|
||||
if (request.path === "/tabs") {
|
||||
return openedTab
|
||||
? {
|
||||
tabs: [
|
||||
{
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
],
|
||||
}
|
||||
: { tabs: [] };
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
openedTab = true;
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
actCount += 1;
|
||||
return {
|
||||
result: JSON.stringify(
|
||||
actCount === 1
|
||||
? {
|
||||
inCall: true,
|
||||
captioning: false,
|
||||
captionsEnabledAttempted: true,
|
||||
transcriptLines: 0,
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}
|
||||
: {
|
||||
inCall: true,
|
||||
captioning: true,
|
||||
captionsEnabledAttempted: true,
|
||||
transcriptLines: 1,
|
||||
lastCaptionAt: "2026-04-27T10:00:00.000Z",
|
||||
lastCaptionSpeaker: "Alice",
|
||||
lastCaptionText: "Please capture this.",
|
||||
recentTranscript: [
|
||||
{
|
||||
at: "2026-04-27T10:00:00.000Z",
|
||||
speaker: "Alice",
|
||||
text: "Please capture this.",
|
||||
},
|
||||
],
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
),
|
||||
};
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup({
|
||||
defaultMode: "transcribe",
|
||||
defaultTransport: "chrome",
|
||||
});
|
||||
|
||||
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
})) as { session: { id: string; chrome?: { health?: { transcriptLines?: number } } } };
|
||||
expect(join.session.chrome?.health?.transcriptLines).toBe(0);
|
||||
|
||||
const status = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.status", {
|
||||
sessionId: join.session.id,
|
||||
})) as {
|
||||
session?: {
|
||||
chrome?: {
|
||||
health?: {
|
||||
captioning?: boolean;
|
||||
transcriptLines?: number;
|
||||
lastCaptionText?: string;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
expect(status.session?.chrome?.health).toMatchObject({
|
||||
captioning: true,
|
||||
transcriptLines: 1,
|
||||
lastCaptionText: "Please capture this.",
|
||||
});
|
||||
expect(callGatewayFromCli).toHaveBeenCalledWith(
|
||||
"browser.request",
|
||||
expect.any(Object),
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
path: "/tabs/focus",
|
||||
body: { targetId: "local-meet-tab" },
|
||||
}),
|
||||
{ progress: false },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not mutate realtime browser prompts when status is requested", async () => {
|
||||
let openedTab = false;
|
||||
const { methods, nodesInvoke } = setup(
|
||||
{
|
||||
defaultMode: "realtime",
|
||||
defaultTransport: "chrome-node",
|
||||
},
|
||||
{
|
||||
nodesInvokeHandler: async ({ command, params }) => {
|
||||
const raw = params as { path?: string; body?: { url?: string; targetId?: string } };
|
||||
if (command === "browser.proxy") {
|
||||
if (raw.path === "/tabs") {
|
||||
return { payload: { result: { running: true, tabs: [] } } };
|
||||
}
|
||||
if (raw.path === "/tabs/open") {
|
||||
openedTab = true;
|
||||
return {
|
||||
payload: {
|
||||
result: {
|
||||
targetId: "tab-1",
|
||||
title: "Meet",
|
||||
url: raw.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
if (raw.path === "/tabs/focus" || raw.path === "/permissions/grant") {
|
||||
return { payload: { result: { ok: true } } };
|
||||
}
|
||||
if (raw.path === "/act") {
|
||||
return {
|
||||
payload: {
|
||||
result: {
|
||||
ok: true,
|
||||
targetId: raw.body?.targetId ?? "tab-1",
|
||||
result: JSON.stringify({
|
||||
inCall: false,
|
||||
manualActionRequired: true,
|
||||
manualActionReason: "meet-audio-choice-required",
|
||||
manualActionMessage: "Choose the Meet microphone path manually.",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
}),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
if (command === "googlemeet.chrome") {
|
||||
return { payload: { launched: openedTab } };
|
||||
}
|
||||
throw new Error(`unexpected invoke ${command}`);
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
})) as { session: { id: string } };
|
||||
nodesInvoke.mockClear();
|
||||
|
||||
const status = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.status", {
|
||||
sessionId: join.session.id,
|
||||
})) as { session?: { chrome?: { health?: { manualActionRequired?: boolean } } } };
|
||||
|
||||
expect(status.session?.chrome?.health?.manualActionRequired).toBe(true);
|
||||
expect(nodesInvoke).not.toHaveBeenCalledWith(
|
||||
expect.objectContaining({ command: "browser.proxy" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("retries caption enable until the captions button is available", () => {
|
||||
const makeButton = (label: string) => ({
|
||||
disabled: false,
|
||||
innerText: "",
|
||||
textContent: "",
|
||||
click: vi.fn(),
|
||||
getAttribute: vi.fn((name: string) => (name === "aria-label" ? label : null)),
|
||||
});
|
||||
const leaveButton = makeButton("Leave call");
|
||||
const captionButton = makeButton("Turn on captions");
|
||||
const page = {
|
||||
buttons: [leaveButton],
|
||||
};
|
||||
const windowState: Record<string, unknown> = {};
|
||||
const document = {
|
||||
body: { innerText: "", textContent: "" },
|
||||
title: "Meet",
|
||||
querySelector: vi.fn(() => null),
|
||||
querySelectorAll: vi.fn((selector: string) => {
|
||||
if (selector === "button") {
|
||||
return page.buttons;
|
||||
}
|
||||
if (selector === "input") {
|
||||
return [];
|
||||
}
|
||||
return [];
|
||||
}),
|
||||
};
|
||||
const context = createContext({
|
||||
Date,
|
||||
JSON,
|
||||
String,
|
||||
document,
|
||||
location: {
|
||||
href: "https://meet.google.com/abc-defg-hij",
|
||||
hostname: "meet.google.com",
|
||||
},
|
||||
MutationObserver: class {
|
||||
observe = vi.fn();
|
||||
},
|
||||
window: windowState,
|
||||
});
|
||||
const inspect = new Script(
|
||||
`(${chromeTransportTesting.meetStatusScriptForTest({
|
||||
allowMicrophone: false,
|
||||
autoJoin: false,
|
||||
captureCaptions: true,
|
||||
guestName: "OpenClaw Agent",
|
||||
})})`,
|
||||
).runInContext(context) as () => string;
|
||||
|
||||
const first = JSON.parse(inspect()) as { captionsEnabledAttempted?: boolean };
|
||||
const stateAfterFirst = windowState.__openclawMeetCaptions as { enabledAttempted?: boolean };
|
||||
expect(first.captionsEnabledAttempted).toBe(false);
|
||||
expect(stateAfterFirst.enabledAttempted).toBe(false);
|
||||
expect(captionButton.click).not.toHaveBeenCalled();
|
||||
|
||||
page.buttons = [leaveButton, captionButton];
|
||||
const second = JSON.parse(inspect()) as { captionsEnabledAttempted?: boolean };
|
||||
const stateAfterSecond = windowState.__openclawMeetCaptions as { enabledAttempted?: boolean };
|
||||
expect(second.captionsEnabledAttempted).toBe(true);
|
||||
expect(stateAfterSecond.enabledAttempted).toBe(true);
|
||||
expect(captionButton.click).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("joins Chrome on a paired node without local Chrome or BlackHole", async () => {
|
||||
const { methods, nodesList, nodesInvoke } = setup(
|
||||
{
|
||||
|
||||
@@ -676,7 +676,7 @@ export default definePluginEntry({
|
||||
async ({ params, respond }: GatewayRequestHandlerOptions) => {
|
||||
try {
|
||||
const rt = await ensureRuntime();
|
||||
respond(true, rt.status(normalizeOptionalString(params?.sessionId)));
|
||||
respond(true, await rt.status(normalizeOptionalString(params?.sessionId)));
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
}
|
||||
|
||||
@@ -603,7 +603,7 @@ describe("google-meet CLI", () => {
|
||||
try {
|
||||
await setupCli({
|
||||
runtime: {
|
||||
status: () => ({
|
||||
status: async () => ({
|
||||
found: true,
|
||||
sessions: [
|
||||
{
|
||||
@@ -684,7 +684,7 @@ describe("google-meet CLI", () => {
|
||||
try {
|
||||
await setupCli({
|
||||
runtime: {
|
||||
status: () => ({
|
||||
status: async () => ({
|
||||
found: true,
|
||||
session: {
|
||||
id: "meet_1",
|
||||
@@ -703,6 +703,11 @@ describe("google-meet CLI", () => {
|
||||
audioBridge: { type: "node-command-pair", provider: "openai" },
|
||||
health: {
|
||||
inCall: true,
|
||||
captioning: true,
|
||||
transcriptLines: 2,
|
||||
lastCaptionAt: "2026-04-25T00:00:03.000Z",
|
||||
lastCaptionSpeaker: "Alice",
|
||||
lastCaptionText: "Can everyone hear OpenClaw?",
|
||||
providerConnected: true,
|
||||
realtimeReady: true,
|
||||
audioInputActive: true,
|
||||
@@ -720,6 +725,9 @@ describe("google-meet CLI", () => {
|
||||
expect(stdout.output()).toContain("session: meet_1");
|
||||
expect(stdout.output()).toContain("node: node-1");
|
||||
expect(stdout.output()).toContain("provider connected: yes");
|
||||
expect(stdout.output()).toContain("captioning: yes");
|
||||
expect(stdout.output()).toContain("transcript lines: 2");
|
||||
expect(stdout.output()).toContain("last caption text: Alice: Can everyone hear OpenClaw?");
|
||||
expect(stdout.output()).toContain("audio input active: yes");
|
||||
expect(stdout.output()).toContain("audio output active: no");
|
||||
} finally {
|
||||
@@ -732,7 +740,7 @@ describe("google-meet CLI", () => {
|
||||
try {
|
||||
await setupCli({
|
||||
runtime: {
|
||||
status: () => ({
|
||||
status: async () => ({
|
||||
found: true,
|
||||
session: {
|
||||
id: "meet_1",
|
||||
|
||||
@@ -237,7 +237,7 @@ function formatDuration(value: number | undefined): string {
|
||||
: `${minutes}m ${seconds.toString().padStart(2, "0")}s`;
|
||||
}
|
||||
|
||||
function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): void {
|
||||
function writeDoctorStatus(status: Awaited<ReturnType<GoogleMeetRuntime["status"]>>): void {
|
||||
if (!status.found) {
|
||||
writeStdoutLine("Google Meet session: not found");
|
||||
return;
|
||||
@@ -272,6 +272,10 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
|
||||
session.chrome?.audioBridge?.provider ?? session.realtime.provider ?? "n/a",
|
||||
);
|
||||
writeStdoutLine("in call: %s", formatBoolean(health?.inCall));
|
||||
writeStdoutLine("lobby waiting: %s", formatBoolean(health?.lobbyWaiting));
|
||||
writeStdoutLine("captioning: %s", formatBoolean(health?.captioning));
|
||||
writeStdoutLine("transcript lines: %s", health?.transcriptLines ?? 0);
|
||||
writeStdoutLine("last caption: %s", formatOptional(health?.lastCaptionAt));
|
||||
writeStdoutLine("manual action: %s", formatBoolean(health?.manualActionRequired));
|
||||
if (health?.manualActionRequired) {
|
||||
writeStdoutLine("manual reason: %s", formatOptional(health.manualActionReason));
|
||||
@@ -298,6 +302,10 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
|
||||
);
|
||||
writeStdoutLine("bridge closed: %s", formatBoolean(health?.bridgeClosed));
|
||||
writeStdoutLine("browser url: %s", formatOptional(health?.browserUrl));
|
||||
if (health?.lastCaptionText) {
|
||||
const speaker = health.lastCaptionSpeaker ? `${health.lastCaptionSpeaker}: ` : "";
|
||||
writeStdoutLine("last caption text: %s%s", speaker, health.lastCaptionText);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1947,7 +1955,7 @@ export function registerGoogleMeetCli(params: {
|
||||
.option("--json", "Print JSON output", false)
|
||||
.action(async (sessionId?: string) => {
|
||||
const rt = await params.ensureRuntime();
|
||||
writeStdoutJson(rt.status(sessionId));
|
||||
writeStdoutJson(await rt.status(sessionId));
|
||||
});
|
||||
|
||||
root
|
||||
@@ -1974,7 +1982,7 @@ export function registerGoogleMeetCli(params: {
|
||||
return;
|
||||
}
|
||||
const rt = await params.ensureRuntime();
|
||||
const status = rt.status(sessionId);
|
||||
const status = await rt.status(sessionId);
|
||||
if (options.json) {
|
||||
writeStdoutJson(status);
|
||||
return;
|
||||
|
||||
@@ -169,16 +169,23 @@ export class GoogleMeetRuntime {
|
||||
return [...this.#sessions.values()].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt));
|
||||
}
|
||||
|
||||
status(sessionId?: string): {
|
||||
async status(sessionId?: string): Promise<{
|
||||
found: boolean;
|
||||
session?: GoogleMeetSession;
|
||||
sessions?: GoogleMeetSession[];
|
||||
} {
|
||||
}> {
|
||||
this.#refreshHealth(sessionId);
|
||||
if (!sessionId) {
|
||||
return { found: true, sessions: this.list() };
|
||||
const sessions = [...this.#sessions.values()].toSorted((a, b) =>
|
||||
a.createdAt.localeCompare(b.createdAt),
|
||||
);
|
||||
await Promise.all(sessions.map((session) => this.#refreshCaptionHealthForSession(session)));
|
||||
return { found: true, sessions };
|
||||
}
|
||||
const session = this.#sessions.get(sessionId);
|
||||
if (session) {
|
||||
await this.#refreshCaptionHealthForSession(session);
|
||||
}
|
||||
return session ? { found: true, session } : { found: false };
|
||||
}
|
||||
|
||||
@@ -590,8 +597,20 @@ export class GoogleMeetRuntime {
|
||||
};
|
||||
}
|
||||
|
||||
async #refreshCaptionHealthForSession(session: GoogleMeetSession) {
|
||||
if (session.mode !== "transcribe") {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
await this.#refreshBrowserHealthForChromeSession(session);
|
||||
}
|
||||
|
||||
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
|
||||
if (!isManagedChromeBrowserSession(session) || evaluateSpeechReadiness(session).ready) {
|
||||
if (!isManagedChromeBrowserSession(session)) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
if (session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
@@ -601,10 +620,12 @@ export class GoogleMeetRuntime {
|
||||
? await recoverCurrentMeetTabOnNode({
|
||||
runtime: this.params.runtime,
|
||||
config: this.params.config,
|
||||
mode: session.mode,
|
||||
url: session.url,
|
||||
})
|
||||
: await recoverCurrentMeetTab({
|
||||
config: this.params.config,
|
||||
mode: session.mode,
|
||||
url: session.url,
|
||||
});
|
||||
if (result.found && result.browser && session.chrome) {
|
||||
|
||||
@@ -43,6 +43,7 @@ export const __testing = {
|
||||
setDepsForTest(deps: { callGatewayFromCli?: typeof callGatewayFromCli } | null) {
|
||||
chromeTransportDeps.callGatewayFromCli = deps?.callGatewayFromCli ?? callGatewayFromCli;
|
||||
},
|
||||
meetStatusScriptForTest: meetStatusScript,
|
||||
};
|
||||
|
||||
export function outputMentionsBlackHole2ch(output: string): boolean {
|
||||
@@ -209,6 +210,15 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef
|
||||
const parsed = JSON.parse(raw) as {
|
||||
inCall?: boolean;
|
||||
micMuted?: boolean;
|
||||
lobbyWaiting?: boolean;
|
||||
leaveReason?: string;
|
||||
captioning?: boolean;
|
||||
captionsEnabledAttempted?: boolean;
|
||||
transcriptLines?: number;
|
||||
lastCaptionAt?: string;
|
||||
lastCaptionSpeaker?: string;
|
||||
lastCaptionText?: string;
|
||||
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
|
||||
manualActionMessage?: string;
|
||||
@@ -219,6 +229,15 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef
|
||||
return {
|
||||
inCall: parsed.inCall,
|
||||
micMuted: parsed.micMuted,
|
||||
lobbyWaiting: parsed.lobbyWaiting,
|
||||
leaveReason: parsed.leaveReason,
|
||||
captioning: parsed.captioning,
|
||||
captionsEnabledAttempted: parsed.captionsEnabledAttempted,
|
||||
transcriptLines: parsed.transcriptLines,
|
||||
lastCaptionAt: parsed.lastCaptionAt,
|
||||
lastCaptionSpeaker: parsed.lastCaptionSpeaker,
|
||||
lastCaptionText: parsed.lastCaptionText,
|
||||
recentTranscript: parsed.recentTranscript,
|
||||
manualActionRequired: parsed.manualActionRequired,
|
||||
manualActionReason: parsed.manualActionReason,
|
||||
manualActionMessage: parsed.manualActionMessage,
|
||||
@@ -306,11 +325,13 @@ async function grantMeetMediaPermissions(params: {
|
||||
function meetStatusScript(params: {
|
||||
allowMicrophone: boolean;
|
||||
autoJoin: boolean;
|
||||
captureCaptions: boolean;
|
||||
guestName: string;
|
||||
}) {
|
||||
return `() => {
|
||||
const text = (node) => (node?.innerText || node?.textContent || "").trim();
|
||||
const allowMicrophone = ${JSON.stringify(params.allowMicrophone)};
|
||||
const captureCaptions = ${JSON.stringify(params.captureCaptions)};
|
||||
const buttons = [...document.querySelectorAll('button')];
|
||||
const notes = [];
|
||||
const findButton = (pattern) =>
|
||||
@@ -356,6 +377,95 @@ function meetStatusScript(params: {
|
||||
notes.push("Skipped Meet microphone prompt for observe-only mode.");
|
||||
}
|
||||
const inCall = buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button)));
|
||||
let captioning = false;
|
||||
let captionsEnabledAttempted = false;
|
||||
let transcriptLines = 0;
|
||||
let lastCaptionAt;
|
||||
let lastCaptionSpeaker;
|
||||
let lastCaptionText;
|
||||
let recentTranscript = [];
|
||||
const captionSelector = '[role="region"][aria-label*="aption" i], [aria-live="polite"][role="region"], div[aria-live="polite"]';
|
||||
const captionState = (() => {
|
||||
if (!captureCaptions) return undefined;
|
||||
const w = window;
|
||||
if (!inCall && !w.__openclawMeetCaptions) return undefined;
|
||||
if (!w.__openclawMeetCaptions) {
|
||||
w.__openclawMeetCaptions = {
|
||||
enabledAttempted: false,
|
||||
observerInstalled: false,
|
||||
lines: [],
|
||||
seen: {}
|
||||
};
|
||||
}
|
||||
return w.__openclawMeetCaptions;
|
||||
})();
|
||||
const recordCaption = (speaker, captionText) => {
|
||||
if (!captionState) return;
|
||||
const clean = String(captionText || "").replace(/\\s+/g, " ").trim();
|
||||
const cleanSpeaker = String(speaker || "").replace(/\\s+/g, " ").trim();
|
||||
if (!clean || clean.length < 2) return;
|
||||
if (/^(turn on captions|turn off captions|captions)$/i.test(clean)) return;
|
||||
const key = (cleanSpeaker + "\\n" + clean).toLowerCase();
|
||||
if (captionState.seen[key]) return;
|
||||
captionState.seen[key] = true;
|
||||
const entry = { at: new Date().toISOString(), speaker: cleanSpeaker || undefined, text: clean };
|
||||
captionState.lines.push(entry);
|
||||
if (captionState.lines.length > 50) captionState.lines.splice(0, captionState.lines.length - 50);
|
||||
};
|
||||
const scrapeCaptions = () => {
|
||||
if (!captionState) return;
|
||||
const regions = [...document.querySelectorAll(captionSelector)];
|
||||
for (const region of regions) {
|
||||
const raw = text(region);
|
||||
if (!raw) continue;
|
||||
const pieces = raw.split(/\\n+/).map((part) => part.trim()).filter(Boolean);
|
||||
if (pieces.length >= 2) {
|
||||
recordCaption(pieces[0], pieces.slice(1).join(" "));
|
||||
} else {
|
||||
recordCaption("", pieces[0] || raw);
|
||||
}
|
||||
}
|
||||
};
|
||||
if (captionState) {
|
||||
if (inCall && !captionState.enabledAttempted) {
|
||||
const captionButton = findButton(/turn on captions|show captions|captions/i);
|
||||
const captionLabel = captionButton ? (captionButton.getAttribute("aria-label") || captionButton.getAttribute("data-tooltip") || text(captionButton)) : "";
|
||||
if (captionButton) {
|
||||
captionState.enabledAttempted = true;
|
||||
captionsEnabledAttempted = true;
|
||||
if (!/turn off captions|hide captions/i.test(captionLabel)) {
|
||||
captionButton.click();
|
||||
notes.push("Attempted to enable Meet captions for observe-only transcript health.");
|
||||
}
|
||||
}
|
||||
} else if (captionState.enabledAttempted) {
|
||||
captionsEnabledAttempted = true;
|
||||
}
|
||||
if (inCall && !captionState.observerInstalled) {
|
||||
captionState.observerInstalled = true;
|
||||
new MutationObserver(scrapeCaptions).observe(document.body, {
|
||||
childList: true,
|
||||
subtree: true,
|
||||
characterData: true
|
||||
});
|
||||
notes.push("Installed Meet caption observer for observe-only transcript health.");
|
||||
}
|
||||
if (inCall) {
|
||||
scrapeCaptions();
|
||||
}
|
||||
const lines = Array.isArray(captionState.lines) ? captionState.lines : [];
|
||||
const last = lines[lines.length - 1];
|
||||
captioning = document.querySelector(captionSelector) !== null || lines.length > 0;
|
||||
transcriptLines = lines.length;
|
||||
lastCaptionAt = last?.at;
|
||||
lastCaptionSpeaker = last?.speaker;
|
||||
lastCaptionText = last?.text;
|
||||
recentTranscript = lines.slice(-5);
|
||||
}
|
||||
const lobbyWaiting = !inCall && /asking to be let in|you.?ll join when someone lets you in|waiting to be let in|ask to join/i.test(pageText);
|
||||
const leaveReason = /you left the meeting|you.?ve left the meeting|removed from the meeting|you were removed|call ended|meeting ended/i.test(pageText)
|
||||
? pageText.match(/you left the meeting|you.?ve left the meeting|removed from the meeting|you were removed|call ended|meeting ended/i)?.[0]
|
||||
: undefined;
|
||||
let manualActionReason;
|
||||
let manualActionMessage;
|
||||
if (!inCall && (host === "accounts.google.com" || /use your google account|to continue to google meet|choose an account|sign in to (join|continue)/i.test(pageText))) {
|
||||
@@ -380,6 +490,15 @@ function meetStatusScript(params: {
|
||||
clickedMicrophoneChoice: Boolean(allowMicrophone && microphoneChoice),
|
||||
inCall,
|
||||
micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined,
|
||||
lobbyWaiting,
|
||||
leaveReason,
|
||||
captioning,
|
||||
captionsEnabledAttempted,
|
||||
transcriptLines,
|
||||
lastCaptionAt,
|
||||
lastCaptionSpeaker,
|
||||
lastCaptionText,
|
||||
recentTranscript,
|
||||
manualActionRequired: Boolean(manualActionReason),
|
||||
manualActionReason,
|
||||
manualActionMessage,
|
||||
@@ -490,6 +609,7 @@ async function openMeetWithBrowserRequest(params: {
|
||||
targetId,
|
||||
fn: meetStatusScript({
|
||||
allowMicrophone: params.mode === "realtime",
|
||||
captureCaptions: params.mode === "transcribe",
|
||||
guestName: params.config.chrome.guestName,
|
||||
autoJoin: params.config.chrome.autoJoin,
|
||||
}),
|
||||
@@ -544,10 +664,12 @@ function isRecoverableMeetTab(tab: BrowserTab, url?: string): boolean {
|
||||
async function inspectRecoverableMeetTab(params: {
|
||||
callBrowser: BrowserRequestCaller;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
timeoutMs: number;
|
||||
tab: BrowserTab;
|
||||
targetId: string;
|
||||
}) {
|
||||
const allowMicrophone = params.mode !== "transcribe";
|
||||
await params.callBrowser({
|
||||
method: "POST",
|
||||
path: "/tabs/focus",
|
||||
@@ -555,7 +677,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
timeoutMs: Math.min(params.timeoutMs, 5_000),
|
||||
});
|
||||
const permissionNotes = await grantMeetMediaPermissions({
|
||||
allowMicrophone: true,
|
||||
allowMicrophone,
|
||||
callBrowser: params.callBrowser,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
@@ -566,7 +688,8 @@ async function inspectRecoverableMeetTab(params: {
|
||||
kind: "evaluate",
|
||||
targetId: params.targetId,
|
||||
fn: meetStatusScript({
|
||||
allowMicrophone: true,
|
||||
allowMicrophone,
|
||||
captureCaptions: params.mode === "transcribe",
|
||||
guestName: params.config.chrome.guestName,
|
||||
autoJoin: false,
|
||||
}),
|
||||
@@ -596,6 +719,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
|
||||
export async function recoverCurrentMeetTab(params: {
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
transport: "chrome";
|
||||
@@ -631,6 +755,7 @@ export async function recoverCurrentMeetTab(params: {
|
||||
...(await inspectRecoverableMeetTab({
|
||||
callBrowser: callLocalBrowserRequest,
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
timeoutMs,
|
||||
tab,
|
||||
targetId,
|
||||
@@ -641,6 +766,7 @@ export async function recoverCurrentMeetTab(params: {
|
||||
export async function recoverCurrentMeetTabOnNode(params: {
|
||||
runtime: PluginRuntime;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
transport: "chrome-node";
|
||||
@@ -692,6 +818,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
|
||||
timeoutMs: request.timeoutMs,
|
||||
}),
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
timeoutMs,
|
||||
tab,
|
||||
targetId,
|
||||
|
||||
@@ -28,6 +28,19 @@ export type GoogleMeetSpeechBlockedReason =
|
||||
export type GoogleMeetChromeHealth = {
|
||||
inCall?: boolean;
|
||||
micMuted?: boolean;
|
||||
lobbyWaiting?: boolean;
|
||||
leaveReason?: string;
|
||||
captioning?: boolean;
|
||||
captionsEnabledAttempted?: boolean;
|
||||
transcriptLines?: number;
|
||||
lastCaptionAt?: string;
|
||||
lastCaptionSpeaker?: string;
|
||||
lastCaptionText?: string;
|
||||
recentTranscript?: Array<{
|
||||
at?: string;
|
||||
speaker?: string;
|
||||
text: string;
|
||||
}>;
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: GoogleMeetManualActionReason;
|
||||
manualActionMessage?: string;
|
||||
|
||||
Reference in New Issue
Block a user