mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 11:20:43 +00:00
fix: prove Google Meet listen health (#74824)
This commit is contained in:
@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Plugins/beta: prepare Brave, Codex, Feishu, Synology Chat, Tlon, and Twitch for `2026.5.1-beta.1` npm and ClawHub publishing. Thanks @vincentkoc.
|
||||
- Providers/xAI: add Grok 4.3 to the bundled catalog and make it the default xAI chat model.
|
||||
- Google Meet: let API-created rooms set `accessType` and `entryPointAccess`, and add `googlemeet end-active-conference` for closing managed spaces after a call. (#74824) Thanks @BsnizND.
|
||||
- Google Meet: add `googlemeet test-listen` and the matching `google_meet` `test_listen` action so transcribe-mode joins wait for real caption or transcript movement before reporting listen-first health. Refs #72478. Thanks @DougButdorf.
|
||||
- Plugins/ClawHub: prefer versioned ClawPack artifacts when ClawHub publishes digest metadata, verifying the ClawPack response header and downloaded bytes before installing. Thanks @vincentkoc.
|
||||
- Plugins/ClawHub: persist ClawPack digest metadata on ClawHub plugin install and update records so registry refreshes and download verification can reuse stored artifact facts. Thanks @vincentkoc.
|
||||
- Plugins/ClawHub: allow official bundled-plugin cutovers to prefer ClawHub installs with npm fallback only when the ClawHub package or version is absent. Thanks @vincentkoc.
|
||||
|
||||
@@ -193,6 +193,10 @@ a best-effort Meet caption observer. `googlemeet status --json` and
|
||||
`transcriptLines`, `lastCaptionAt`, `lastCaptionSpeaker`, `lastCaptionText`,
|
||||
and a short `recentTranscript` tail so operators can tell whether the browser
|
||||
joined the call and whether Meet captions are producing text.
|
||||
Use `openclaw googlemeet test-listen <meet-url> --transport chrome-node` when
|
||||
you need a yes/no probe: it joins in transcribe mode, waits for fresh caption or
|
||||
transcript movement, and returns `listenVerified`, `listenTimedOut`, manual
|
||||
action fields, and the latest caption health.
|
||||
|
||||
During realtime sessions, `google_meet` status includes browser and audio bridge
|
||||
health such as `inCall`, `manualActionRequired`, `providerConnected`,
|
||||
@@ -820,6 +824,18 @@ And they can end the active conference for a known room:
|
||||
}
|
||||
```
|
||||
|
||||
For listen-first validation, agents should use `test_listen` before claiming the
|
||||
meeting is useful:
|
||||
|
||||
```json
|
||||
{
|
||||
"action": "test_listen",
|
||||
"url": "https://meet.google.com/abc-defg-hij",
|
||||
"transport": "chrome-node",
|
||||
"timeoutMs": 30000
|
||||
}
|
||||
```
|
||||
|
||||
Run the guarded live smoke against a real retained meeting:
|
||||
|
||||
```bash
|
||||
@@ -828,6 +844,14 @@ OPENCLAW_GOOGLE_MEET_LIVE_MEETING=https://meet.google.com/abc-defg-hij \
|
||||
pnpm test:live -- extensions/google-meet/google-meet.live.test.ts
|
||||
```
|
||||
|
||||
Run the live listen-first browser probe against a meeting where someone will
|
||||
speak with Meet captions available:
|
||||
|
||||
```bash
|
||||
openclaw googlemeet setup --transport chrome-node --mode transcribe
|
||||
openclaw googlemeet test-listen https://meet.google.com/abc-defg-hij --transport chrome-node --timeout-ms 30000
|
||||
```
|
||||
|
||||
Live smoke environment:
|
||||
|
||||
- `OPENCLAW_LIVE_TEST=1` enables guarded live tests.
|
||||
@@ -1297,7 +1321,8 @@ openclaw nodes status --connected
|
||||
|
||||
### Browser opens but agent cannot join
|
||||
|
||||
Run `googlemeet test-speech` and inspect the returned Chrome health. If it
|
||||
Run `googlemeet test-listen` for observe-only joins or `googlemeet test-speech`
|
||||
for realtime joins, then inspect the returned Chrome health. If either probe
|
||||
reports `manualActionRequired: true`, show `manualActionMessage` to the operator
|
||||
and stop retrying until the browser action is complete.
|
||||
|
||||
|
||||
@@ -560,6 +560,7 @@ describe("google-meet plugin", () => {
|
||||
"end_active_conference",
|
||||
"speak",
|
||||
"test_speech",
|
||||
"test_listen",
|
||||
],
|
||||
description: expect.stringContaining("recover_current_tab"),
|
||||
},
|
||||
@@ -2395,6 +2396,52 @@ describe("google-meet plugin", () => {
|
||||
expect(result.details).toMatchObject({ createdSession: true });
|
||||
});
|
||||
|
||||
it("exposes a test-listen action that proves transcript movement", async () => {
|
||||
const { tools, nodesInvoke } = setup(
|
||||
{
|
||||
defaultTransport: "chrome-node",
|
||||
},
|
||||
{
|
||||
browserActResult: {
|
||||
inCall: true,
|
||||
captioning: true,
|
||||
transcriptLines: 1,
|
||||
lastCaptionText: "hello from the meeting",
|
||||
title: "Meet call",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
nodesInvokeResult: {
|
||||
payload: {
|
||||
launched: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
);
|
||||
const tool = tools[0] as {
|
||||
execute: (
|
||||
id: string,
|
||||
params: unknown,
|
||||
) => Promise<{ details: { listenVerified?: boolean; transcriptLines?: number } }>;
|
||||
};
|
||||
|
||||
const result = await tool.execute("id", {
|
||||
action: "test_listen",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
timeoutMs: 100,
|
||||
});
|
||||
|
||||
expect(nodesInvoke).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
command: "googlemeet.chrome",
|
||||
params: expect.objectContaining({
|
||||
action: "start",
|
||||
mode: "transcribe",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(result.details).toMatchObject({ listenVerified: true, transcriptLines: 1 });
|
||||
});
|
||||
|
||||
it("does not start a second realtime response for test speech", async () => {
|
||||
const runtime = new GoogleMeetRuntime({
|
||||
config: resolveGoogleMeetConfig({}),
|
||||
@@ -2456,6 +2503,29 @@ describe("google-meet plugin", () => {
|
||||
).rejects.toThrow("test_speech requires mode: realtime");
|
||||
});
|
||||
|
||||
it("rejects realtime and Twilio modes for test listen", async () => {
|
||||
const runtime = new GoogleMeetRuntime({
|
||||
config: resolveGoogleMeetConfig({}),
|
||||
fullConfig: {} as never,
|
||||
runtime: {} as never,
|
||||
logger: noopLogger,
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.testListen({
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
mode: "realtime",
|
||||
}),
|
||||
).rejects.toThrow("test_listen requires mode: transcribe");
|
||||
|
||||
await expect(
|
||||
runtime.testListen({
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
transport: "twilio",
|
||||
}),
|
||||
).rejects.toThrow("test_listen supports chrome or chrome-node");
|
||||
});
|
||||
|
||||
it("reports manual action when the browser profile needs Google login", async () => {
|
||||
const { tools } = setup(
|
||||
{
|
||||
|
||||
@@ -205,6 +205,7 @@ const GoogleMeetToolSchema = Type.Object({
|
||||
"end_active_conference",
|
||||
"speak",
|
||||
"test_speech",
|
||||
"test_listen",
|
||||
],
|
||||
description:
|
||||
"Google Meet action to run. create creates and joins by default; pass join=false to only mint a URL. After a timeout or unclear browser state, call recover_current_tab before retrying join.",
|
||||
@@ -243,6 +244,7 @@ const GoogleMeetToolSchema = Type.Object({
|
||||
dtmfSequence: Type.Optional(Type.String({ description: "Explicit DTMF sequence for Twilio" })),
|
||||
sessionId: Type.Optional(Type.String({ description: "Meet session ID" })),
|
||||
message: Type.Optional(Type.String({ description: "Realtime instructions to speak now" })),
|
||||
timeoutMs: Type.Optional(Type.Number({ description: "Probe timeout in milliseconds" })),
|
||||
meeting: Type.Optional(Type.String({ description: "Meet URL, meeting code, or spaces/{id}" })),
|
||||
today: Type.Optional(
|
||||
Type.Boolean({
|
||||
@@ -360,7 +362,8 @@ type GoogleMeetGatewayToolAction =
|
||||
| "leave"
|
||||
| "end_active_conference"
|
||||
| "speak"
|
||||
| "test_speech";
|
||||
| "test_speech"
|
||||
| "test_listen";
|
||||
|
||||
function googleMeetGatewayMethodForToolAction(action: GoogleMeetGatewayToolAction): string {
|
||||
switch (action) {
|
||||
@@ -370,6 +373,8 @@ function googleMeetGatewayMethodForToolAction(action: GoogleMeetGatewayToolActio
|
||||
return "googlemeet.setup";
|
||||
case "test_speech":
|
||||
return "googlemeet.testSpeech";
|
||||
case "test_listen":
|
||||
return "googlemeet.testListen";
|
||||
case "end_active_conference":
|
||||
return "googlemeet.endActiveConference";
|
||||
default:
|
||||
@@ -917,11 +922,29 @@ export default definePluginEntry({
|
||||
},
|
||||
);
|
||||
|
||||
api.registerGatewayMethod(
|
||||
"googlemeet.testListen",
|
||||
async ({ params, respond }: GatewayRequestHandlerOptions) => {
|
||||
try {
|
||||
const rt = await ensureRuntime();
|
||||
const result = await rt.testListen({
|
||||
url: resolveMeetingInput(config, params?.url),
|
||||
transport: normalizeTransport(params?.transport),
|
||||
mode: normalizeMode(params?.mode),
|
||||
timeoutMs: typeof params?.timeoutMs === "number" ? params.timeoutMs : undefined,
|
||||
});
|
||||
respond(true, result);
|
||||
} catch (err) {
|
||||
sendError(respond, err);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
api.registerTool({
|
||||
name: "google_meet",
|
||||
label: "Google Meet",
|
||||
description:
|
||||
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_speech; if it reports a Chrome node offline or local audio missing, surface that blocker instead of retrying or switching transports. Offline nodes are diagnostics only, not usable candidates. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
|
||||
"Join and track Google Meet sessions through Chrome or Twilio. Call setup_status before join/create/test_listen/test_speech; if it reports a Chrome node offline or local audio missing, surface that blocker instead of retrying or switching transports. Offline nodes are diagnostics only, not usable candidates. If a Meet tab is already open after a timeout, call recover_current_tab before retrying join to report login, permission, or admission blockers without opening another tab.",
|
||||
parameters: GoogleMeetToolSchema,
|
||||
async execute(_toolCallId, params) {
|
||||
const raw = asParamRecord(params);
|
||||
@@ -938,6 +961,11 @@ export default definePluginEntry({
|
||||
await callGoogleMeetGatewayFromTool({ config, action: "test_speech", raw }),
|
||||
);
|
||||
}
|
||||
case "test_listen": {
|
||||
return json(
|
||||
await callGoogleMeetGatewayFromTool({ config, action: "test_listen", raw }),
|
||||
);
|
||||
}
|
||||
case "status": {
|
||||
return json(await callGoogleMeetGatewayFromTool({ config, action: "status", raw }));
|
||||
}
|
||||
|
||||
@@ -689,6 +689,55 @@ describe("google-meet CLI", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("runs a listen-first health probe", async () => {
|
||||
const testListen = vi.fn(async () => ({
|
||||
createdSession: true,
|
||||
listenVerified: true,
|
||||
listenTimedOut: false,
|
||||
transcriptLines: 1,
|
||||
session: {
|
||||
id: "meet_1",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
state: "active",
|
||||
transport: "chrome-node",
|
||||
mode: "transcribe",
|
||||
participantIdentity: "signed-in Google Chrome profile on a paired node",
|
||||
createdAt: "2026-04-25T00:00:00.000Z",
|
||||
updatedAt: "2026-04-25T00:00:01.000Z",
|
||||
realtime: { enabled: false, provider: "openai", toolPolicy: "safe-read-only" },
|
||||
notes: [],
|
||||
},
|
||||
}));
|
||||
const stdout = captureStdout();
|
||||
try {
|
||||
await setupCli({
|
||||
runtime: { testListen },
|
||||
}).parseAsync(
|
||||
[
|
||||
"googlemeet",
|
||||
"test-listen",
|
||||
"https://meet.google.com/abc-defg-hij",
|
||||
"--transport",
|
||||
"chrome-node",
|
||||
"--timeout-ms",
|
||||
"30000",
|
||||
],
|
||||
{ from: "user" },
|
||||
);
|
||||
expect(testListen).toHaveBeenCalledWith({
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
transport: "chrome-node",
|
||||
timeoutMs: 30000,
|
||||
});
|
||||
expect(JSON.parse(stdout.output())).toMatchObject({
|
||||
listenVerified: true,
|
||||
transcriptLines: 1,
|
||||
});
|
||||
} finally {
|
||||
stdout.restore();
|
||||
}
|
||||
});
|
||||
|
||||
it("prints a dry-run export manifest without writing files", async () => {
|
||||
stubMeetArtifactsApi();
|
||||
const stdout = captureStdout();
|
||||
|
||||
@@ -37,6 +37,7 @@ type JoinOptions = {
|
||||
transport?: GoogleMeetTransport;
|
||||
mode?: GoogleMeetMode;
|
||||
message?: string;
|
||||
timeoutMs?: string;
|
||||
dialInNumber?: string;
|
||||
pin?: string;
|
||||
dtmfSequence?: string;
|
||||
@@ -228,6 +229,17 @@ function formatOptional(value: unknown): string {
|
||||
return typeof value === "string" && value.trim() ? value : "n/a";
|
||||
}
|
||||
|
||||
function parsePositiveNumber(value: string | undefined, label: string): number | undefined {
|
||||
if (value === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
throw new Error(`${label} must be a positive number`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function formatDuration(value: number | undefined): string {
|
||||
if (value === undefined) {
|
||||
return "n/a";
|
||||
@@ -1567,6 +1579,22 @@ export function registerGoogleMeetCli(params: {
|
||||
);
|
||||
});
|
||||
|
||||
root
|
||||
.command("test-listen")
|
||||
.argument("[url]", "Explicit https://meet.google.com/... URL")
|
||||
.option("--transport <transport>", "Transport: chrome or chrome-node")
|
||||
.option("--timeout-ms <ms>", "How long to wait for fresh captions/transcript movement")
|
||||
.action(async (url: string | undefined, options: JoinOptions) => {
|
||||
const rt = await params.ensureRuntime();
|
||||
writeStdoutJson(
|
||||
await rt.testListen({
|
||||
url: resolveMeetingInput(params.config, url),
|
||||
transport: options.transport,
|
||||
timeoutMs: parsePositiveNumber(options.timeoutMs, "timeout-ms"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
root
|
||||
.command("resolve-space")
|
||||
.description("Resolve a Meet URL, meeting code, or spaces/{id} to its canonical space")
|
||||
|
||||
@@ -66,6 +66,43 @@ function hasRealtimeAudioOutputAdvanced(
|
||||
return (health?.lastOutputBytes ?? 0) > startOutputBytes;
|
||||
}
|
||||
|
||||
type TranscriptCheckpoint = {
|
||||
lines: number;
|
||||
lastCaptionAt?: string;
|
||||
lastCaptionText?: string;
|
||||
};
|
||||
|
||||
function transcriptCheckpoint(health: GoogleMeetChromeHealth | undefined): TranscriptCheckpoint {
|
||||
return {
|
||||
lines: health?.transcriptLines ?? 0,
|
||||
lastCaptionAt: health?.lastCaptionAt,
|
||||
lastCaptionText: health?.lastCaptionText,
|
||||
};
|
||||
}
|
||||
|
||||
function hasTranscriptAdvanced(
|
||||
health: GoogleMeetChromeHealth | undefined,
|
||||
start: TranscriptCheckpoint,
|
||||
): boolean {
|
||||
if ((health?.transcriptLines ?? 0) > start.lines) {
|
||||
return true;
|
||||
}
|
||||
if (health?.lastCaptionAt && health.lastCaptionAt !== start.lastCaptionAt) {
|
||||
return true;
|
||||
}
|
||||
return Boolean(health?.lastCaptionText && health.lastCaptionText !== start.lastCaptionText);
|
||||
}
|
||||
|
||||
function resolveProbeTimeoutMs(input: number | undefined, fallback: number): number {
|
||||
if (input === undefined) {
|
||||
return Math.min(Math.max(fallback, 1), 120_000);
|
||||
}
|
||||
if (!Number.isFinite(input) || input <= 0) {
|
||||
throw new Error("timeoutMs must be a positive number");
|
||||
}
|
||||
return Math.min(Math.trunc(input), 120_000);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -597,6 +634,88 @@ export class GoogleMeetRuntime {
|
||||
};
|
||||
}
|
||||
|
||||
async testListen(request: GoogleMeetJoinRequest): Promise<{
|
||||
createdSession: boolean;
|
||||
inCall?: boolean;
|
||||
manualActionRequired?: boolean;
|
||||
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
|
||||
manualActionMessage?: string;
|
||||
listenVerified: boolean;
|
||||
listenTimedOut: boolean;
|
||||
captioning?: boolean;
|
||||
captionsEnabledAttempted?: boolean;
|
||||
transcriptLines?: number;
|
||||
lastCaptionAt?: string;
|
||||
lastCaptionSpeaker?: string;
|
||||
lastCaptionText?: string;
|
||||
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
|
||||
session: GoogleMeetSession;
|
||||
}> {
|
||||
if (request.mode === "realtime") {
|
||||
throw new Error(
|
||||
"test_listen requires mode: transcribe; use test_speech for realtime talk-back.",
|
||||
);
|
||||
}
|
||||
const url = normalizeMeetUrl(request.url);
|
||||
const transport = resolveTransport(request.transport, this.params.config);
|
||||
if (transport === "twilio") {
|
||||
throw new Error("test_listen supports chrome or chrome-node transports");
|
||||
}
|
||||
const beforeSessions = this.list();
|
||||
const before = new Set(beforeSessions.map((session) => session.id));
|
||||
const existingSession = beforeSessions.find(
|
||||
(session) =>
|
||||
session.state === "active" &&
|
||||
isSameMeetUrlForReuse(session.url, url) &&
|
||||
session.transport === transport &&
|
||||
session.mode === "transcribe",
|
||||
);
|
||||
const start = transcriptCheckpoint(existingSession?.chrome?.health);
|
||||
const result = await this.join({
|
||||
...request,
|
||||
transport,
|
||||
url,
|
||||
mode: "transcribe",
|
||||
message: undefined,
|
||||
});
|
||||
let health = result.session.chrome?.health;
|
||||
const timeoutMs = resolveProbeTimeoutMs(
|
||||
request.timeoutMs,
|
||||
this.params.config.chrome.joinTimeoutMs,
|
||||
);
|
||||
const shouldWait =
|
||||
health?.manualActionRequired !== true && isManagedChromeBrowserSession(result.session);
|
||||
if (shouldWait && !hasTranscriptAdvanced(health, start)) {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(250);
|
||||
await this.#refreshCaptionHealthForSession(result.session);
|
||||
health = result.session.chrome?.health;
|
||||
if (health?.manualActionRequired || hasTranscriptAdvanced(health, start)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
const listenVerified = hasTranscriptAdvanced(health, start);
|
||||
return {
|
||||
createdSession: !before.has(result.session.id),
|
||||
inCall: health?.inCall,
|
||||
manualActionRequired: health?.manualActionRequired,
|
||||
manualActionReason: health?.manualActionReason,
|
||||
manualActionMessage: health?.manualActionMessage,
|
||||
listenVerified,
|
||||
listenTimedOut: shouldWait && !listenVerified && health?.manualActionRequired !== true,
|
||||
captioning: health?.captioning,
|
||||
captionsEnabledAttempted: health?.captionsEnabledAttempted,
|
||||
transcriptLines: health?.transcriptLines,
|
||||
lastCaptionAt: health?.lastCaptionAt,
|
||||
lastCaptionSpeaker: health?.lastCaptionSpeaker,
|
||||
lastCaptionText: health?.lastCaptionText,
|
||||
recentTranscript: health?.recentTranscript,
|
||||
session: result.session,
|
||||
};
|
||||
}
|
||||
|
||||
async #refreshCaptionHealthForSession(session: GoogleMeetSession) {
|
||||
if (session.mode !== "transcribe") {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
|
||||
@@ -7,6 +7,7 @@ export type GoogleMeetJoinRequest = {
|
||||
transport?: GoogleMeetTransport;
|
||||
mode?: GoogleMeetMode;
|
||||
message?: string;
|
||||
timeoutMs?: number;
|
||||
dialInNumber?: string;
|
||||
pin?: string;
|
||||
dtmfSequence?: string;
|
||||
|
||||
Reference in New Issue
Block a user