feat(google-meet): add transcribe caption health

This commit is contained in:
Peter Steinberger
2026-05-01 14:29:23 +01:00
parent f9b47ad2a1
commit f221bc85a0
9 changed files with 501 additions and 18 deletions

View File

@@ -9,6 +9,7 @@ Docs: https://docs.openclaw.ai
- Dependencies: refresh bundled runtime and plugin dependency pins, including Pi 0.71.1, OpenAI 6.35.0, Codex 0.128.0, Zod 4.4.1, and Matrix 41.4.0. Thanks @mariozechner.
- Agents/workspace: add `agents.defaults.skipOptionalBootstrapFiles` for skipping selected optional workspace files during bootstrap without disabling required workspace setup. (#62110) Thanks @mainstay22.
- Plugins/CLI: add first-class `git:` plugin installs with ref checkout, commit metadata, normal scanner/staging, and `plugins update` support for recorded git sources. Thanks @badlogic.
- Google Meet: add live caption health for Chrome transcribe mode, including caption observer state, transcript counters, last caption text, and recent transcript lines in status and doctor output. Refs #72478. Thanks @DougButdorf.
- Voice Call/Google Meet: add Twilio Meet join phase logs around pre-connect DTMF, realtime stream setup, and initial greeting handoff for easier live-call debugging. Thanks @donkeykong91 and @PfanP.
- macOS app: move recent session context rows into a Context submenu while keeping usage and cost details root-level, so the menu bar companion stays compact with many active sessions. Thanks @guti.
- Gateway/SDK: add SDK-facing tools.invoke RPC with shared HTTP policy, typed approval/refusal results, and SDK helper support. Refs #74705. Thanks @BunsDev and @ai-hpc.

View File

@@ -169,7 +169,12 @@ and will not talk back into the meeting. Chrome joins in this mode also avoid
OpenClaw's microphone/camera permission grant and avoid the Meet **Use
microphone** path. If Meet shows an audio-choice interstitial, automation tries
the no-microphone path and otherwise reports a manual action instead of opening
the local microphone.
the local microphone. In transcribe mode, managed Chrome transports also install
a best-effort Meet caption observer. `googlemeet status --json` and
`googlemeet doctor` surface `captioning`, `captionsEnabledAttempted`,
`transcriptLines`, `lastCaptionAt`, `lastCaptionSpeaker`, `lastCaptionText`,
and a short `recentTranscript` tail so operators can tell whether the browser
joined the call and whether Meet captions are producing text.
During realtime sessions, `google_meet` status includes browser and audio bridge
health such as `inCall`, `manualActionRequired`, `providerConnected`,
@@ -1294,9 +1299,15 @@ openclaw googlemeet doctor
```
Use `mode: "realtime"` for listen/talk-back. `mode: "transcribe"` intentionally
does not start the duplex realtime voice bridge. `googlemeet test-speech`
always checks the realtime path and reports whether bridge output bytes were
observed for that invocation. If `speechOutputVerified` is false and
does not start the duplex realtime voice bridge. For observe-only debugging,
run `openclaw googlemeet status --json <session-id>` after participants speak
and check `captioning`, `transcriptLines`, and `lastCaptionText`. If `inCall` is
true but `transcriptLines` stays at `0`, Meet captions may be disabled, no one
has spoken since the observer was installed, the Meet UI changed, or live
captions are unavailable for the meeting language/account.
`googlemeet test-speech` always checks the realtime path and reports whether
bridge output bytes were observed for that invocation. If `speechOutputVerified` is false and
`speechOutputTimedOut` is true, the realtime provider may have accepted the
utterance but OpenClaw did not see new output bytes reach the Chrome audio
bridge.

View File

@@ -3,6 +3,7 @@ import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import path from "node:path";
import { PassThrough, Writable } from "node:stream";
import { createContext, Script } from "node:vm";
import type { RealtimeVoiceProviderPlugin } from "openclaw/plugin-sdk/realtime-voice";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import plugin, { __testing as googleMeetPluginTesting } from "./index.js";
@@ -1635,7 +1636,25 @@ describe("google-meet plugin", () => {
const { methods, runCommandWithTimeout } = setup({
defaultMode: "transcribe",
});
const callGatewayFromCli = mockLocalMeetBrowserRequest();
const callGatewayFromCli = mockLocalMeetBrowserRequest({
inCall: true,
micMuted: true,
captioning: true,
captionsEnabledAttempted: true,
transcriptLines: 1,
lastCaptionAt: "2026-04-27T10:00:00.000Z",
lastCaptionSpeaker: "Alice",
lastCaptionText: "Can everyone hear the agent?",
recentTranscript: [
{
at: "2026-04-27T10:00:00.000Z",
speaker: "Alice",
text: "Can everyone hear the agent?",
},
],
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
});
const handler = methods.get("googlemeet.join") as
| ((ctx: {
params: Record<string, unknown>;
@@ -1666,17 +1685,292 @@ describe("google-meet plugin", () => {
([, , request]) => (request as { path?: string }).path === "/permissions/grant",
),
).toBe(false);
expect(respond.mock.calls[0]?.[1]).toMatchObject({
session: {
chrome: {
health: {
captioning: true,
captionsEnabledAttempted: true,
transcriptLines: 1,
lastCaptionSpeaker: "Alice",
lastCaptionText: "Can everyone hear the agent?",
recentTranscript: [
{
speaker: "Alice",
text: "Can everyone hear the agent?",
},
],
},
},
},
});
const actCall = callGatewayFromCli.mock.calls.find(
([, , request]) => (request as { path?: string }).path === "/act",
);
expect(String((actCall?.[2] as { body?: { fn?: string } } | undefined)?.body?.fn)).toContain(
"const allowMicrophone = false",
);
expect(String((actCall?.[2] as { body?: { fn?: string } } | undefined)?.body?.fn)).toContain(
"const captureCaptions = true",
);
} finally {
Object.defineProperty(process, "platform", { value: originalPlatform });
}
});
it("refreshes observe-only caption health when status is requested", async () => {
let openedTab = false;
let actCount = 0;
const callGatewayFromCli = vi.fn(
async (
_method: string,
_opts: unknown,
params?: unknown,
_extra?: unknown,
): Promise<Record<string, unknown>> => {
const request = params as {
path?: string;
body?: { targetId?: string; url?: string };
};
if (request.path === "/tabs") {
return openedTab
? {
tabs: [
{
targetId: "local-meet-tab",
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
},
],
}
: { tabs: [] };
}
if (request.path === "/tabs/open") {
openedTab = true;
return {
targetId: "local-meet-tab",
title: "Meet",
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
};
}
if (request.path === "/tabs/focus") {
return { ok: true };
}
if (request.path === "/act") {
actCount += 1;
return {
result: JSON.stringify(
actCount === 1
? {
inCall: true,
captioning: false,
captionsEnabledAttempted: true,
transcriptLines: 0,
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
}
: {
inCall: true,
captioning: true,
captionsEnabledAttempted: true,
transcriptLines: 1,
lastCaptionAt: "2026-04-27T10:00:00.000Z",
lastCaptionSpeaker: "Alice",
lastCaptionText: "Please capture this.",
recentTranscript: [
{
at: "2026-04-27T10:00:00.000Z",
speaker: "Alice",
text: "Please capture this.",
},
],
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
},
),
};
}
throw new Error(`unexpected browser request path ${request.path}`);
},
);
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
const { methods } = setup({
defaultMode: "transcribe",
defaultTransport: "chrome",
});
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
url: "https://meet.google.com/abc-defg-hij",
})) as { session: { id: string; chrome?: { health?: { transcriptLines?: number } } } };
expect(join.session.chrome?.health?.transcriptLines).toBe(0);
const status = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.status", {
sessionId: join.session.id,
})) as {
session?: {
chrome?: {
health?: {
captioning?: boolean;
transcriptLines?: number;
lastCaptionText?: string;
};
};
};
};
expect(status.session?.chrome?.health).toMatchObject({
captioning: true,
transcriptLines: 1,
lastCaptionText: "Please capture this.",
});
expect(callGatewayFromCli).toHaveBeenCalledWith(
"browser.request",
expect.any(Object),
expect.objectContaining({
method: "POST",
path: "/tabs/focus",
body: { targetId: "local-meet-tab" },
}),
{ progress: false },
);
});
it("does not mutate realtime browser prompts when status is requested", async () => {
let openedTab = false;
const { methods, nodesInvoke } = setup(
{
defaultMode: "realtime",
defaultTransport: "chrome-node",
},
{
nodesInvokeHandler: async ({ command, params }) => {
const raw = params as { path?: string; body?: { url?: string; targetId?: string } };
if (command === "browser.proxy") {
if (raw.path === "/tabs") {
return { payload: { result: { running: true, tabs: [] } } };
}
if (raw.path === "/tabs/open") {
openedTab = true;
return {
payload: {
result: {
targetId: "tab-1",
title: "Meet",
url: raw.body?.url ?? "https://meet.google.com/abc-defg-hij",
},
},
};
}
if (raw.path === "/tabs/focus" || raw.path === "/permissions/grant") {
return { payload: { result: { ok: true } } };
}
if (raw.path === "/act") {
return {
payload: {
result: {
ok: true,
targetId: raw.body?.targetId ?? "tab-1",
result: JSON.stringify({
inCall: false,
manualActionRequired: true,
manualActionReason: "meet-audio-choice-required",
manualActionMessage: "Choose the Meet microphone path manually.",
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
}),
},
},
};
}
}
if (command === "googlemeet.chrome") {
return { payload: { launched: openedTab } };
}
throw new Error(`unexpected invoke ${command}`);
},
},
);
const join = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.join", {
url: "https://meet.google.com/abc-defg-hij",
})) as { session: { id: string } };
nodesInvoke.mockClear();
const status = (await invokeGoogleMeetGatewayMethodForTest(methods, "googlemeet.status", {
sessionId: join.session.id,
})) as { session?: { chrome?: { health?: { manualActionRequired?: boolean } } } };
expect(status.session?.chrome?.health?.manualActionRequired).toBe(true);
expect(nodesInvoke).not.toHaveBeenCalledWith(
expect.objectContaining({ command: "browser.proxy" }),
);
});
it("retries caption enable until the captions button is available", () => {
const makeButton = (label: string) => ({
disabled: false,
innerText: "",
textContent: "",
click: vi.fn(),
getAttribute: vi.fn((name: string) => (name === "aria-label" ? label : null)),
});
const leaveButton = makeButton("Leave call");
const captionButton = makeButton("Turn on captions");
const page = {
buttons: [leaveButton],
};
const windowState: Record<string, unknown> = {};
const document = {
body: { innerText: "", textContent: "" },
title: "Meet",
querySelector: vi.fn(() => null),
querySelectorAll: vi.fn((selector: string) => {
if (selector === "button") {
return page.buttons;
}
if (selector === "input") {
return [];
}
return [];
}),
};
const context = createContext({
Date,
JSON,
String,
document,
location: {
href: "https://meet.google.com/abc-defg-hij",
hostname: "meet.google.com",
},
MutationObserver: class {
observe = vi.fn();
},
window: windowState,
});
const inspect = new Script(
`(${chromeTransportTesting.meetStatusScriptForTest({
allowMicrophone: false,
autoJoin: false,
captureCaptions: true,
guestName: "OpenClaw Agent",
})})`,
).runInContext(context) as () => string;
const first = JSON.parse(inspect()) as { captionsEnabledAttempted?: boolean };
const stateAfterFirst = windowState.__openclawMeetCaptions as { enabledAttempted?: boolean };
expect(first.captionsEnabledAttempted).toBe(false);
expect(stateAfterFirst.enabledAttempted).toBe(false);
expect(captionButton.click).not.toHaveBeenCalled();
page.buttons = [leaveButton, captionButton];
const second = JSON.parse(inspect()) as { captionsEnabledAttempted?: boolean };
const stateAfterSecond = windowState.__openclawMeetCaptions as { enabledAttempted?: boolean };
expect(second.captionsEnabledAttempted).toBe(true);
expect(stateAfterSecond.enabledAttempted).toBe(true);
expect(captionButton.click).toHaveBeenCalledTimes(1);
});
it("joins Chrome on a paired node without local Chrome or BlackHole", async () => {
const { methods, nodesList, nodesInvoke } = setup(
{

View File

@@ -676,7 +676,7 @@ export default definePluginEntry({
async ({ params, respond }: GatewayRequestHandlerOptions) => {
try {
const rt = await ensureRuntime();
respond(true, rt.status(normalizeOptionalString(params?.sessionId)));
respond(true, await rt.status(normalizeOptionalString(params?.sessionId)));
} catch (err) {
sendError(respond, err);
}

View File

@@ -603,7 +603,7 @@ describe("google-meet CLI", () => {
try {
await setupCli({
runtime: {
status: () => ({
status: async () => ({
found: true,
sessions: [
{
@@ -684,7 +684,7 @@ describe("google-meet CLI", () => {
try {
await setupCli({
runtime: {
status: () => ({
status: async () => ({
found: true,
session: {
id: "meet_1",
@@ -703,6 +703,11 @@ describe("google-meet CLI", () => {
audioBridge: { type: "node-command-pair", provider: "openai" },
health: {
inCall: true,
captioning: true,
transcriptLines: 2,
lastCaptionAt: "2026-04-25T00:00:03.000Z",
lastCaptionSpeaker: "Alice",
lastCaptionText: "Can everyone hear OpenClaw?",
providerConnected: true,
realtimeReady: true,
audioInputActive: true,
@@ -720,6 +725,9 @@ describe("google-meet CLI", () => {
expect(stdout.output()).toContain("session: meet_1");
expect(stdout.output()).toContain("node: node-1");
expect(stdout.output()).toContain("provider connected: yes");
expect(stdout.output()).toContain("captioning: yes");
expect(stdout.output()).toContain("transcript lines: 2");
expect(stdout.output()).toContain("last caption text: Alice: Can everyone hear OpenClaw?");
expect(stdout.output()).toContain("audio input active: yes");
expect(stdout.output()).toContain("audio output active: no");
} finally {
@@ -732,7 +740,7 @@ describe("google-meet CLI", () => {
try {
await setupCli({
runtime: {
status: () => ({
status: async () => ({
found: true,
session: {
id: "meet_1",

View File

@@ -237,7 +237,7 @@ function formatDuration(value: number | undefined): string {
: `${minutes}m ${seconds.toString().padStart(2, "0")}s`;
}
function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): void {
function writeDoctorStatus(status: Awaited<ReturnType<GoogleMeetRuntime["status"]>>): void {
if (!status.found) {
writeStdoutLine("Google Meet session: not found");
return;
@@ -272,6 +272,10 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
session.chrome?.audioBridge?.provider ?? session.realtime.provider ?? "n/a",
);
writeStdoutLine("in call: %s", formatBoolean(health?.inCall));
writeStdoutLine("lobby waiting: %s", formatBoolean(health?.lobbyWaiting));
writeStdoutLine("captioning: %s", formatBoolean(health?.captioning));
writeStdoutLine("transcript lines: %s", health?.transcriptLines ?? 0);
writeStdoutLine("last caption: %s", formatOptional(health?.lastCaptionAt));
writeStdoutLine("manual action: %s", formatBoolean(health?.manualActionRequired));
if (health?.manualActionRequired) {
writeStdoutLine("manual reason: %s", formatOptional(health.manualActionReason));
@@ -298,6 +302,10 @@ function writeDoctorStatus(status: ReturnType<GoogleMeetRuntime["status"]>): voi
);
writeStdoutLine("bridge closed: %s", formatBoolean(health?.bridgeClosed));
writeStdoutLine("browser url: %s", formatOptional(health?.browserUrl));
if (health?.lastCaptionText) {
const speaker = health.lastCaptionSpeaker ? `${health.lastCaptionSpeaker}: ` : "";
writeStdoutLine("last caption text: %s%s", speaker, health.lastCaptionText);
}
}
}
@@ -1947,7 +1955,7 @@ export function registerGoogleMeetCli(params: {
.option("--json", "Print JSON output", false)
.action(async (sessionId?: string) => {
const rt = await params.ensureRuntime();
writeStdoutJson(rt.status(sessionId));
writeStdoutJson(await rt.status(sessionId));
});
root
@@ -1974,7 +1982,7 @@ export function registerGoogleMeetCli(params: {
return;
}
const rt = await params.ensureRuntime();
const status = rt.status(sessionId);
const status = await rt.status(sessionId);
if (options.json) {
writeStdoutJson(status);
return;

View File

@@ -169,16 +169,23 @@ export class GoogleMeetRuntime {
return [...this.#sessions.values()].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt));
}
status(sessionId?: string): {
async status(sessionId?: string): Promise<{
found: boolean;
session?: GoogleMeetSession;
sessions?: GoogleMeetSession[];
} {
}> {
this.#refreshHealth(sessionId);
if (!sessionId) {
return { found: true, sessions: this.list() };
const sessions = [...this.#sessions.values()].toSorted((a, b) =>
a.createdAt.localeCompare(b.createdAt),
);
await Promise.all(sessions.map((session) => this.#refreshCaptionHealthForSession(session)));
return { found: true, sessions };
}
const session = this.#sessions.get(sessionId);
if (session) {
await this.#refreshCaptionHealthForSession(session);
}
return session ? { found: true, session } : { found: false };
}
@@ -590,8 +597,20 @@ export class GoogleMeetRuntime {
};
}
async #refreshCaptionHealthForSession(session: GoogleMeetSession) {
if (session.mode !== "transcribe") {
this.#refreshSpeechReadiness(session);
return;
}
await this.#refreshBrowserHealthForChromeSession(session);
}
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
if (!isManagedChromeBrowserSession(session) || evaluateSpeechReadiness(session).ready) {
if (!isManagedChromeBrowserSession(session)) {
this.#refreshSpeechReadiness(session);
return;
}
if (session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
this.#refreshSpeechReadiness(session);
return;
}
@@ -601,10 +620,12 @@ export class GoogleMeetRuntime {
? await recoverCurrentMeetTabOnNode({
runtime: this.params.runtime,
config: this.params.config,
mode: session.mode,
url: session.url,
})
: await recoverCurrentMeetTab({
config: this.params.config,
mode: session.mode,
url: session.url,
});
if (result.found && result.browser && session.chrome) {

View File

@@ -43,6 +43,7 @@ export const __testing = {
setDepsForTest(deps: { callGatewayFromCli?: typeof callGatewayFromCli } | null) {
chromeTransportDeps.callGatewayFromCli = deps?.callGatewayFromCli ?? callGatewayFromCli;
},
meetStatusScriptForTest: meetStatusScript,
};
export function outputMentionsBlackHole2ch(output: string): boolean {
@@ -209,6 +210,15 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef
const parsed = JSON.parse(raw) as {
inCall?: boolean;
micMuted?: boolean;
lobbyWaiting?: boolean;
leaveReason?: string;
captioning?: boolean;
captionsEnabledAttempted?: boolean;
transcriptLines?: number;
lastCaptionAt?: string;
lastCaptionSpeaker?: string;
lastCaptionText?: string;
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
manualActionMessage?: string;
@@ -219,6 +229,15 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef
return {
inCall: parsed.inCall,
micMuted: parsed.micMuted,
lobbyWaiting: parsed.lobbyWaiting,
leaveReason: parsed.leaveReason,
captioning: parsed.captioning,
captionsEnabledAttempted: parsed.captionsEnabledAttempted,
transcriptLines: parsed.transcriptLines,
lastCaptionAt: parsed.lastCaptionAt,
lastCaptionSpeaker: parsed.lastCaptionSpeaker,
lastCaptionText: parsed.lastCaptionText,
recentTranscript: parsed.recentTranscript,
manualActionRequired: parsed.manualActionRequired,
manualActionReason: parsed.manualActionReason,
manualActionMessage: parsed.manualActionMessage,
@@ -306,11 +325,13 @@ async function grantMeetMediaPermissions(params: {
function meetStatusScript(params: {
allowMicrophone: boolean;
autoJoin: boolean;
captureCaptions: boolean;
guestName: string;
}) {
return `() => {
const text = (node) => (node?.innerText || node?.textContent || "").trim();
const allowMicrophone = ${JSON.stringify(params.allowMicrophone)};
const captureCaptions = ${JSON.stringify(params.captureCaptions)};
const buttons = [...document.querySelectorAll('button')];
const notes = [];
const findButton = (pattern) =>
@@ -356,6 +377,95 @@ function meetStatusScript(params: {
notes.push("Skipped Meet microphone prompt for observe-only mode.");
}
const inCall = buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button)));
let captioning = false;
let captionsEnabledAttempted = false;
let transcriptLines = 0;
let lastCaptionAt;
let lastCaptionSpeaker;
let lastCaptionText;
let recentTranscript = [];
const captionSelector = '[role="region"][aria-label*="aption" i], [aria-live="polite"][role="region"], div[aria-live="polite"]';
const captionState = (() => {
if (!captureCaptions) return undefined;
const w = window;
if (!inCall && !w.__openclawMeetCaptions) return undefined;
if (!w.__openclawMeetCaptions) {
w.__openclawMeetCaptions = {
enabledAttempted: false,
observerInstalled: false,
lines: [],
seen: {}
};
}
return w.__openclawMeetCaptions;
})();
const recordCaption = (speaker, captionText) => {
if (!captionState) return;
const clean = String(captionText || "").replace(/\\s+/g, " ").trim();
const cleanSpeaker = String(speaker || "").replace(/\\s+/g, " ").trim();
if (!clean || clean.length < 2) return;
if (/^(turn on captions|turn off captions|captions)$/i.test(clean)) return;
const key = (cleanSpeaker + "\\n" + clean).toLowerCase();
if (captionState.seen[key]) return;
captionState.seen[key] = true;
const entry = { at: new Date().toISOString(), speaker: cleanSpeaker || undefined, text: clean };
captionState.lines.push(entry);
if (captionState.lines.length > 50) captionState.lines.splice(0, captionState.lines.length - 50);
};
const scrapeCaptions = () => {
if (!captionState) return;
const regions = [...document.querySelectorAll(captionSelector)];
for (const region of regions) {
const raw = text(region);
if (!raw) continue;
const pieces = raw.split(/\\n+/).map((part) => part.trim()).filter(Boolean);
if (pieces.length >= 2) {
recordCaption(pieces[0], pieces.slice(1).join(" "));
} else {
recordCaption("", pieces[0] || raw);
}
}
};
if (captionState) {
if (inCall && !captionState.enabledAttempted) {
const captionButton = findButton(/turn on captions|show captions|captions/i);
const captionLabel = captionButton ? (captionButton.getAttribute("aria-label") || captionButton.getAttribute("data-tooltip") || text(captionButton)) : "";
if (captionButton) {
captionState.enabledAttempted = true;
captionsEnabledAttempted = true;
if (!/turn off captions|hide captions/i.test(captionLabel)) {
captionButton.click();
notes.push("Attempted to enable Meet captions for observe-only transcript health.");
}
}
} else if (captionState.enabledAttempted) {
captionsEnabledAttempted = true;
}
if (inCall && !captionState.observerInstalled) {
captionState.observerInstalled = true;
new MutationObserver(scrapeCaptions).observe(document.body, {
childList: true,
subtree: true,
characterData: true
});
notes.push("Installed Meet caption observer for observe-only transcript health.");
}
if (inCall) {
scrapeCaptions();
}
const lines = Array.isArray(captionState.lines) ? captionState.lines : [];
const last = lines[lines.length - 1];
captioning = document.querySelector(captionSelector) !== null || lines.length > 0;
transcriptLines = lines.length;
lastCaptionAt = last?.at;
lastCaptionSpeaker = last?.speaker;
lastCaptionText = last?.text;
recentTranscript = lines.slice(-5);
}
const lobbyWaiting = !inCall && /asking to be let in|you.?ll join when someone lets you in|waiting to be let in|ask to join/i.test(pageText);
const leaveReason = /you left the meeting|you.?ve left the meeting|removed from the meeting|you were removed|call ended|meeting ended/i.test(pageText)
? pageText.match(/you left the meeting|you.?ve left the meeting|removed from the meeting|you were removed|call ended|meeting ended/i)?.[0]
: undefined;
let manualActionReason;
let manualActionMessage;
if (!inCall && (host === "accounts.google.com" || /use your google account|to continue to google meet|choose an account|sign in to (join|continue)/i.test(pageText))) {
@@ -380,6 +490,15 @@ function meetStatusScript(params: {
clickedMicrophoneChoice: Boolean(allowMicrophone && microphoneChoice),
inCall,
micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined,
lobbyWaiting,
leaveReason,
captioning,
captionsEnabledAttempted,
transcriptLines,
lastCaptionAt,
lastCaptionSpeaker,
lastCaptionText,
recentTranscript,
manualActionRequired: Boolean(manualActionReason),
manualActionReason,
manualActionMessage,
@@ -490,6 +609,7 @@ async function openMeetWithBrowserRequest(params: {
targetId,
fn: meetStatusScript({
allowMicrophone: params.mode === "realtime",
captureCaptions: params.mode === "transcribe",
guestName: params.config.chrome.guestName,
autoJoin: params.config.chrome.autoJoin,
}),
@@ -544,10 +664,12 @@ function isRecoverableMeetTab(tab: BrowserTab, url?: string): boolean {
async function inspectRecoverableMeetTab(params: {
callBrowser: BrowserRequestCaller;
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
timeoutMs: number;
tab: BrowserTab;
targetId: string;
}) {
const allowMicrophone = params.mode !== "transcribe";
await params.callBrowser({
method: "POST",
path: "/tabs/focus",
@@ -555,7 +677,7 @@ async function inspectRecoverableMeetTab(params: {
timeoutMs: Math.min(params.timeoutMs, 5_000),
});
const permissionNotes = await grantMeetMediaPermissions({
allowMicrophone: true,
allowMicrophone,
callBrowser: params.callBrowser,
timeoutMs: params.timeoutMs,
});
@@ -566,7 +688,8 @@ async function inspectRecoverableMeetTab(params: {
kind: "evaluate",
targetId: params.targetId,
fn: meetStatusScript({
allowMicrophone: true,
allowMicrophone,
captureCaptions: params.mode === "transcribe",
guestName: params.config.chrome.guestName,
autoJoin: false,
}),
@@ -596,6 +719,7 @@ async function inspectRecoverableMeetTab(params: {
export async function recoverCurrentMeetTab(params: {
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
url?: string;
}): Promise<{
transport: "chrome";
@@ -631,6 +755,7 @@ export async function recoverCurrentMeetTab(params: {
...(await inspectRecoverableMeetTab({
callBrowser: callLocalBrowserRequest,
config: params.config,
mode: params.mode,
timeoutMs,
tab,
targetId,
@@ -641,6 +766,7 @@ export async function recoverCurrentMeetTab(params: {
export async function recoverCurrentMeetTabOnNode(params: {
runtime: PluginRuntime;
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
url?: string;
}): Promise<{
transport: "chrome-node";
@@ -692,6 +818,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
timeoutMs: request.timeoutMs,
}),
config: params.config,
mode: params.mode,
timeoutMs,
tab,
targetId,

View File

@@ -28,6 +28,19 @@ export type GoogleMeetSpeechBlockedReason =
export type GoogleMeetChromeHealth = {
inCall?: boolean;
micMuted?: boolean;
lobbyWaiting?: boolean;
leaveReason?: string;
captioning?: boolean;
captionsEnabledAttempted?: boolean;
transcriptLines?: number;
lastCaptionAt?: string;
lastCaptionSpeaker?: string;
lastCaptionText?: string;
recentTranscript?: Array<{
at?: string;
speaker?: string;
text: string;
}>;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetManualActionReason;
manualActionMessage?: string;