fix: report google meet manual actions

This commit is contained in:
Peter Steinberger
2026-04-24 18:26:46 +01:00
parent cba92f893d
commit 0c70cb3b9c
5 changed files with 121 additions and 9 deletions

View File

@@ -214,6 +214,12 @@ phrase, and prints session health:
openclaw googlemeet test-speech https://meet.google.com/abc-defg-hij
```
If the browser profile is not signed in, Meet is waiting for host admission, or
Chrome needs microphone/camera permission, the join/test-speech result reports
`manualActionRequired: true` with `manualActionReason` and
`manualActionMessage`. Agents should stop retrying the join, report that message
to the operator, and retry only after the manual browser action is complete.
If `chromeNode.node` is omitted, OpenClaw auto-selects only when exactly one
connected node advertises both `googlemeet.chrome` and browser control. If
several capable nodes are connected, set `chromeNode.node` to the node id,
@@ -460,6 +466,9 @@ report it. Use `action: "leave"` to mark a session ended.
- `inCall`: Chrome appears to be inside the Meet call
- `micMuted`: best-effort Meet microphone state
- `manualActionRequired` / `manualActionReason` / `manualActionMessage`: the
browser profile needs manual login, Meet host admission, permissions, or
browser-control repair before speech can work
- `providerConnected` / `realtimeReady`: realtime voice bridge state
- `lastInputAt` / `lastOutputAt`: last audio seen from or sent to the bridge

View File

@@ -82,6 +82,7 @@ function setup(
options: {
nodesListResult?: NodeListResult;
nodesInvokeResult?: unknown;
browserActResult?: Record<string, unknown>;
nodesInvokeHandler?: (params: {
nodeId: string;
command: string;
@@ -134,12 +135,14 @@ function setup(
result: {
ok: true,
targetId: proxy.body?.targetId ?? "tab-1",
result: JSON.stringify({
inCall: true,
micMuted: false,
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
}),
result: JSON.stringify(
options.browserActResult ?? {
inCall: true,
micMuted: false,
title: "Meet call",
url: "https://meet.google.com/abc-defg-hij",
},
),
},
},
};
@@ -706,6 +709,61 @@ describe("google-meet plugin", () => {
expect(result.details).toMatchObject({ createdSession: true });
});
it("reports manual action when the browser profile needs Google login", async () => {
const { tools } = setup(
{
defaultTransport: "chrome-node",
},
{
browserActResult: {
inCall: false,
manualActionRequired: true,
manualActionReason: "google-login-required",
manualActionMessage:
"Sign in to Google in the OpenClaw browser profile, then retry the Meet join.",
title: "Sign in - Google Accounts",
url: "https://accounts.google.com/signin",
},
nodesInvokeResult: {
payload: {
launched: true,
},
},
},
);
const tool = tools[0] as {
execute: (
id: string,
params: unknown,
) => Promise<{
details: {
manualActionRequired?: boolean;
manualActionReason?: string;
session?: { chrome?: { health?: { manualActionRequired?: boolean } } };
};
}>;
};
const result = await tool.execute("id", {
action: "test_speech",
url: "https://meet.google.com/abc-defg-hij",
message: "Say exactly: hello.",
});
expect(result.details).toMatchObject({
manualActionRequired: true,
manualActionReason: "google-login-required",
session: {
chrome: {
health: {
manualActionRequired: true,
manualActionReason: "google-login-required",
},
},
},
});
});
it("explains when chrome-node has no capable paired node", async () => {
const { tools } = setup(
{

View File

@@ -274,6 +274,9 @@ export class GoogleMeetRuntime {
async testSpeech(request: GoogleMeetJoinRequest): Promise<{
createdSession: boolean;
inCall?: boolean;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
manualActionMessage?: string;
spoken: boolean;
session: GoogleMeetSession;
}> {
@@ -283,9 +286,13 @@ export class GoogleMeetRuntime {
result.session.id,
request.message ?? "Say exactly: Google Meet speech test complete.",
).spoken;
const health = result.session.chrome?.health;
return {
createdSession: !before.has(result.session.id),
inCall: result.session.chrome?.health?.inCall,
inCall: health?.inCall,
manualActionRequired: health?.manualActionRequired,
manualActionReason: health?.manualActionReason,
manualActionMessage: health?.manualActionMessage,
spoken,
session: result.session,
};

View File

@@ -292,12 +292,18 @@ function parseMeetBrowserStatus(result: unknown): GoogleMeetChromeHealth | undef
const parsed = JSON.parse(raw) as {
inCall?: boolean;
micMuted?: boolean;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
manualActionMessage?: string;
url?: string;
title?: string;
};
return {
inCall: parsed.inCall,
micMuted: parsed.micMuted,
manualActionRequired: parsed.manualActionRequired,
manualActionReason: parsed.manualActionReason,
manualActionMessage: parsed.manualActionMessage,
browserUrl: parsed.url,
browserTitle: parsed.title,
status: "browser-control",
@@ -317,17 +323,36 @@ function meetStatusScript(params: { guestName: string; autoJoin: boolean }) {
input.dispatchEvent(new Event('change', { bubbles: true }));
}
const buttons = [...document.querySelectorAll('button')];
const pageText = text(document.body).toLowerCase();
const host = location.hostname.toLowerCase();
const pageUrl = location.href;
const join = ${JSON.stringify(params.autoJoin)}
? buttons.find((button) => /join now|ask to join/i.test(text(button)) && !button.disabled)
: null;
if (join) join.click();
const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button)));
const inCall = buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button)));
let manualActionReason;
let manualActionMessage;
if (!inCall && (host === "accounts.google.com" || /use your google account|to continue to google meet|choose an account|sign in to (join|continue)/i.test(pageText))) {
manualActionReason = "google-login-required";
manualActionMessage = "Sign in to Google in the OpenClaw browser profile, then retry the Meet join.";
} else if (!inCall && /asking to be let in|you.?ll join when someone lets you in|waiting to be let in|ask to join/i.test(pageText)) {
manualActionReason = "meet-admission-required";
manualActionMessage = "Admit the OpenClaw browser participant in Google Meet, then retry speech.";
} else if (!inCall && /allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera)/i.test(pageText)) {
manualActionReason = "meet-permission-required";
manualActionMessage = "Allow microphone/camera permissions for Meet in the OpenClaw browser profile, then retry.";
}
return JSON.stringify({
clickedJoin: Boolean(join),
inCall: buttons.some((button) => /leave call/i.test(button.getAttribute('aria-label') || text(button))),
inCall,
micMuted: mic ? /turn on microphone/i.test(mic.getAttribute('aria-label') || text(mic)) : undefined,
manualActionRequired: Boolean(manualActionReason),
manualActionReason,
manualActionMessage,
title: document.title,
url: location.href
url: pageUrl
});
}`;
}
@@ -424,6 +449,10 @@ async function openMeetWithBrowserProxy(params: {
browser = {
...browser,
inCall: false,
manualActionRequired: true,
manualActionReason: "browser-control-unavailable",
manualActionMessage:
"Open the OpenClaw browser profile, finish Google Meet login, admission, or permission prompts, then retry.",
notes: [
`Browser control could not inspect or auto-join Meet: ${
error instanceof Error ? error.message : String(error)

View File

@@ -12,9 +12,18 @@ export type GoogleMeetJoinRequest = {
dtmfSequence?: string;
};
export type GoogleMeetManualActionReason =
| "google-login-required"
| "meet-admission-required"
| "meet-permission-required"
| "browser-control-unavailable";
export type GoogleMeetChromeHealth = {
inCall?: boolean;
micMuted?: boolean;
manualActionRequired?: boolean;
manualActionReason?: GoogleMeetManualActionReason;
manualActionMessage?: string;
providerConnected?: boolean;
realtimeReady?: boolean;
lastInputAt?: string;