fix: prove Google Meet listen health (#74824)

This commit is contained in:
Peter Steinberger
2026-05-02 07:52:11 +01:00
parent f2c1a56bbd
commit 9ddfe52ff9
8 changed files with 324 additions and 3 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
- Plugins/beta: prepare Brave, Codex, Feishu, Synology Chat, Tlon, and Twitch for `2026.5.1-beta.1` npm and ClawHub publishing. Thanks @vincentkoc.
- Providers/xAI: add Grok 4.3 to the bundled catalog and make it the default xAI chat model.
- Google Meet: let API-created rooms set `accessType` and `entryPointAccess`, and add `googlemeet end-active-conference` for closing managed spaces after a call. (#74824) Thanks @BsnizND.
- Google Meet: add `googlemeet test-listen` and the matching `google_meet` `test_listen` action so transcribe-mode joins wait for real caption or transcript movement before reporting listen-first health. Refs #72478. Thanks @DougButdorf.
- Plugins/ClawHub: prefer versioned ClawPack artifacts when ClawHub publishes digest metadata, verifying the ClawPack response header and downloaded bytes before installing. Thanks @vincentkoc.
- Plugins/ClawHub: persist ClawPack digest metadata on ClawHub plugin install and update records so registry refreshes and download verification can reuse stored artifact facts. Thanks @vincentkoc.
- Plugins/ClawHub: allow official bundled-plugin cutovers to prefer ClawHub installs with npm fallback only when the ClawHub package or version is absent. Thanks @vincentkoc.

View File

@@ -193,6 +193,10 @@ a best-effort Meet caption observer. `googlemeet status --json` and
`transcriptLines`, `lastCaptionAt`, `lastCaptionSpeaker`, `lastCaptionText`,
and a short `recentTranscript` tail so operators can tell whether the browser
joined the call and whether Meet captions are producing text.
Use `openclaw googlemeet test-listen <meet-url> --transport chrome-node` when
you need a yes/no probe: it joins in transcribe mode, waits for fresh caption or
transcript movement, and returns `listenVerified`, `listenTimedOut`, manual
action fields, and the latest caption health.
During realtime sessions, `google_meet` status includes browser and audio bridge
health such as `inCall`, `manualActionRequired`, `providerConnected`,
@@ -820,6 +824,18 @@ And they can end the active conference for a known room:
}
```
For listen-first validation, agents should use `test_listen` before claiming the
meeting is useful:
```json
{
"action": "test_listen",
"url": "https://meet.google.com/abc-defg-hij",
"transport": "chrome-node",
"timeoutMs": 30000
}
```
Run the guarded live smoke against a real retained meeting:
```bash
@@ -828,6 +844,14 @@ OPENCLAW_GOOGLE_MEET_LIVE_MEETING=https://meet.google.com/abc-defg-hij \
pnpm test:live -- extensions/google-meet/google-meet.live.test.ts
```
Run the live listen-first browser probe against a meeting where someone will
speak with Meet captions available:
```bash
openclaw googlemeet setup --transport chrome-node --mode transcribe
openclaw googlemeet test-listen https://meet.google.com/abc-defg-hij --transport chrome-node --timeout-ms 30000
```
Live smoke environment:
- `OPENCLAW_LIVE_TEST=1` enables guarded live tests.
@@ -1297,7 +1321,8 @@ openclaw nodes status --connected
### Browser opens but agent cannot join
Run `googlemeet test-speech` and inspect the returned Chrome health. If it
Run `googlemeet test-listen` for observe-only joins or `googlemeet test-speech`
for realtime joins, then inspect the returned Chrome health. If either probe
reports `manualActionRequired: true`, show `manualActionMessage` to the operator
and stop retrying until the browser action is complete.

View File

@@ -560,6 +560,7 @@ describe("google-meet plugin", () => {
"end_active_conference",
"speak",
"test_speech",
"test_listen",
],
description: expect.stringContaining("recover_current_tab"),
},
@@ -2395,6 +2396,52 @@ describe("google-meet plugin", () => {
expect(result.details).toMatchObject({ createdSession: true });
});
it("exposes a test-listen action that proves transcript movement", async () => {
const { tools, nodesInvoke } = setup(
{
defaultTransport: "chrome-node",
},
{
browserActResult: {
inCall: true,
captioning: true,
transcriptLines: 1,
lastCaptionText: "hello from the meeting",
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
},
nodesInvokeResult: {
payload: {
launched: true,
},
},
},
);
const tool = tools[0] as {
execute: (
id: string,
params: unknown,
) => Promise<{ details: { listenVerified?: boolean; transcriptLines?: number } }>;
};
const result = await tool.execute("id", {
action: "test_listen",
url: "https://meet.google.com/abc-defg-hij",
timeoutMs: 100,
});
expect(nodesInvoke).toHaveBeenCalledWith(
expect.objectContaining({
command: "googlemeet.chrome",
params: expect.objectContaining({
action: "start",
mode: "transcribe",
}),
}),
);
expect(result.details).toMatchObject({ listenVerified: true, transcriptLines: 1 });
});
it("does not start a second realtime response for test speech", async () => {
const runtime = new GoogleMeetRuntime({
config: resolveGoogleMeetConfig({}),
@@ -2456,6 +2503,29 @@ describe("google-meet plugin", () => {
).rejects.toThrow("test_speech requires mode: realtime");
});
it("rejects realtime and Twilio modes for test listen", async () => {
const runtime = new GoogleMeetRuntime({
config: resolveGoogleMeetConfig({}),
fullConfig: {} as never,
runtime: {} as never,
logger: noopLogger,
});
await expect(
runtime.testListen({
url: "https://meet.google.com/abc-defg-hij",
mode: "realtime",
}),
).rejects.toThrow("test_listen requires mode: transcribe");
await expect(
runtime.testListen({
url: "https://meet.google.com/abc-defg-hij",
transport: "twilio",
}),
).rejects.toThrow("test_listen supports chrome or chrome-node");
});
it("reports manual action when the browser profile needs Google login", async () => {
const { tools } = setup(
{

View File

@@ -205,6 +205,7 @@ const GoogleMeetToolSchema = Type.Object({
"end_active_conference",
"speak",
"test_speech",
"test_listen",
],
description:
"Google Meet action to run. create creates and joins by default; pass join=false to only mint a URL. After a timeout or unclear browser state, call recover_current_tab before retrying join.",
@@ -243,6 +244,7 @@ const GoogleMeetToolSchema = Type.Object({
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
message: Type.Optional(Type.String({ description: "Realtime instructions to speak now" })),
timeoutMs: Type.Optional(Type.Number({ description: "Probe timeout in milliseconds" })),
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
today: Type.Optional(
Type.Boolean({
@@ -360,7 +362,8 @@ type GoogleMeetGatewayToolAction =
| "leave"
| "end_active_conference"
| "speak"
| "test_speech";
| "test_speech"
| "test_listen";
function googleMeetGatewayMethodForToolAction(action: GoogleMeetGatewayToolAction): string {
switch (action) {
@@ -370,6 +373,8 @@ function googleMeetGatewayMethodForToolAction(action: GoogleMeetGatewayToolActio
return "googlemeet.setup";
case "test_speech":
return "googlemeet.testSpeech";
case "test_listen":
return "googlemeet.testListen";
case "end_active_conference":
return "googlemeet.endActiveConference";
default:
@@ -917,11 +922,29 @@ export default definePluginEntry({
},
);
api.registerGatewayMethod(
"googlemeet.testListen",
async ({ params, respond }: GatewayRequestHandlerOptions) => {
try {
const rt = await ensureRuntime();
const result = await rt.testListen({
url: resolveMeetingInput(config, params?.url),
transport: normalizeTransport(params?.transport),
mode: normalizeMode(params?.mode),
timeoutMs: typeof params?.timeoutMs === "number" ? params.timeoutMs : undefined,
});
respond(true, result);
} catch (err) {
sendError(respond, err);
}
},
);
api.registerTool({
name: "google_meet",
label: "Google Meet",
description:
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_speech; if it reports a Chrome node offline or local audio missing, surface that blocker instead of retrying or switching transports. Offline nodes are diagnostics only, not usable candidates. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline or local audio missing, surface that blocker instead of retrying or switching transports. Offline nodes are diagnostics only, not usable candidates. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
parameters: GoogleMeetToolSchema,
async execute(_toolCallId, params) {
const raw = asParamRecord(params);
@@ -938,6 +961,11 @@ export default definePluginEntry({
await callGoogleMeetGatewayFromTool({ config, action: "test_speech", raw }),
);
}
case "test_listen": {
return json(
await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }),
);
}
case "status": {
return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw }));
}

View File

@@ -689,6 +689,55 @@ describe("google-meet CLI", () => {
}
});
it("runs a listen-first health probe", async () => {
const testListen = vi.fn(async () => ({
createdSession: true,
listenVerified: true,
listenTimedOut: false,
transcriptLines: 1,
session: {
id: "meet_1",
url: "https://meet.google.com/abc-defg-hij",
state: "active",
transport: "chrome-node",
mode: "transcribe",
participantIdentity: "signed-in Google Chrome profile on a paired node",
createdAt: "2026-04-25T00:00:00.000Z",
updatedAt: "2026-04-25T00:00:01.000Z",
realtime: { enabled: false, provider: "openai", toolPolicy: "safe-read-only" },
notes: [],
},
}));
const stdout = captureStdout();
try {
await setupCli({
runtime: { testListen },
}).parseAsync(
[
"googlemeet",
"test-listen",
"https://meet.google.com/abc-defg-hij",
"--transport",
"chrome-node",
"--timeout-ms",
"30000",
],
{ from: "user" },
);
expect(testListen).toHaveBeenCalledWith({
url: "https://meet.google.com/abc-defg-hij",
transport: "chrome-node",
timeoutMs: 30000,
});
expect(JSON.parse(stdout.output())).toMatchObject({
listenVerified: true,
transcriptLines: 1,
});
} finally {
stdout.restore();
}
});
it("prints a dry-run export manifest without writing files", async () => {
stubMeetArtifactsApi();
const stdout = captureStdout();

View File

@@ -37,6 +37,7 @@ type JoinOptions = {
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
message?: string;
timeoutMs?: string;
dialInNumber?: string;
pin?: string;
dtmfSequence?: string;
@@ -228,6 +229,17 @@ function formatOptional(value: unknown): string {
return typeof value === "string" && value.trim() ? value : "n/a";
}
function parsePositiveNumber(value: string | undefined, label: string): number | undefined {
if (value === undefined) {
return undefined;
}
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed <= 0) {
throw new Error(`${label} must be a positive number`);
}
return parsed;
}
function formatDuration(value: number | undefined): string {
if (value === undefined) {
return "n/a";
@@ -1567,6 +1579,22 @@ export function registerGoogleMeetCli(params: {
);
});
root
.command("test-listen")
.argument("[url]", "Explicit https://meet.google.com/... URL")
.option("--transport <transport>", "Transport: chrome or chrome-node")
.option("--timeout-ms <ms>", "How long to wait for fresh captions/transcript movement")
.action(async (url: string | undefined, options: JoinOptions) => {
const rt = await params.ensureRuntime();
writeStdoutJson(
await rt.testListen({
url: resolveMeetingInput(params.config, url),
transport: options.transport,
timeoutMs: parsePositiveNumber(options.timeoutMs, "timeout-ms"),
}),
);
});
root
.command("resolve-space")
.description("Resolve a Meet URL, meeting code, or spaces/{id} to its canonical space")

View File

@@ -66,6 +66,43 @@ function hasRealtimeAudioOutputAdvanced(
return (health?.lastOutputBytes ?? 0) > startOutputBytes;
}
type TranscriptCheckpoint = {
lines: number;
lastCaptionAt?: string;
lastCaptionText?: string;
};
function transcriptCheckpoint(health: GoogleMeetChromeHealth | undefined): TranscriptCheckpoint {
return {
lines: health?.transcriptLines ?? 0,
lastCaptionAt: health?.lastCaptionAt,
lastCaptionText: health?.lastCaptionText,
};
}
function hasTranscriptAdvanced(
health: GoogleMeetChromeHealth | undefined,
start: TranscriptCheckpoint,
): boolean {
if ((health?.transcriptLines ?? 0) > start.lines) {
return true;
}
if (health?.lastCaptionAt && health.lastCaptionAt !== start.lastCaptionAt) {
return true;
}
return Boolean(health?.lastCaptionText && health.lastCaptionText !== start.lastCaptionText);
}
function resolveProbeTimeoutMs(input: number | undefined, fallback: number): number {
if (input === undefined) {
return Math.min(Math.max(fallback, 1), 120_000);
}
if (!Number.isFinite(input) || input <= 0) {
throw new Error("timeoutMs must be a positive number");
}
return Math.min(Math.trunc(input), 120_000);
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
@@ -597,6 +634,88 @@ export class GoogleMeetRuntime {
};
}
async testListen(request: GoogleMeetJoinRequest): Promise<{
createdSession: boolean;
inCall?: boolean;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
manualActionMessage?: string;
listenVerified: boolean;
listenTimedOut: boolean;
captioning?: boolean;
captionsEnabledAttempted?: boolean;
transcriptLines?: number;
lastCaptionAt?: string;
lastCaptionSpeaker?: string;
lastCaptionText?: string;
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
session: GoogleMeetSession;
}> {
if (request.mode === "realtime") {
throw new Error(
"test_listen requires mode: transcribe; use test_speech for realtime talk-back.",
);
}
const url = normalizeMeetUrl(request.url);
const transport = resolveTransport(request.transport, this.params.config);
if (transport === "twilio") {
throw new Error("test_listen supports chrome or chrome-node transports");
}
const beforeSessions = this.list();
const before = new Set(beforeSessions.map((session) => session.id));
const existingSession = beforeSessions.find(
(session) =>
session.state === "active" &&
isSameMeetUrlForReuse(session.url, url) &&
session.transport === transport &&
session.mode === "transcribe",
);
const start = transcriptCheckpoint(existingSession?.chrome?.health);
const result = await this.join({
...request,
transport,
url,
mode: "transcribe",
message: undefined,
});
let health = result.session.chrome?.health;
const timeoutMs = resolveProbeTimeoutMs(
request.timeoutMs,
this.params.config.chrome.joinTimeoutMs,
);
const shouldWait =
health?.manualActionRequired !== true && isManagedChromeBrowserSession(result.session);
if (shouldWait && !hasTranscriptAdvanced(health, start)) {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
await sleep(250);
await this.#refreshCaptionHealthForSession(result.session);
health = result.session.chrome?.health;
if (health?.manualActionRequired || hasTranscriptAdvanced(health, start)) {
break;
}
}
}
const listenVerified = hasTranscriptAdvanced(health, start);
return {
createdSession: !before.has(result.session.id),
inCall: health?.inCall,
manualActionRequired: health?.manualActionRequired,
manualActionReason: health?.manualActionReason,
manualActionMessage: health?.manualActionMessage,
listenVerified,
listenTimedOut: shouldWait && !listenVerified && health?.manualActionRequired !== true,
captioning: health?.captioning,
captionsEnabledAttempted: health?.captionsEnabledAttempted,
transcriptLines: health?.transcriptLines,
lastCaptionAt: health?.lastCaptionAt,
lastCaptionSpeaker: health?.lastCaptionSpeaker,
lastCaptionText: health?.lastCaptionText,
recentTranscript: health?.recentTranscript,
session: result.session,
};
}
async #refreshCaptionHealthForSession(session: GoogleMeetSession) {
if (session.mode !== "transcribe") {
this.#refreshSpeechReadiness(session);

View File

@@ -7,6 +7,7 @@ export type GoogleMeetJoinRequest = {
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
message?: string;
timeoutMs?: number;
dialInNumber?: string;
pin?: string;
dtmfSequence?: string;