fix: retry delayed Google Meet speech

This commit is contained in:
Peter Steinberger
2026-05-03 22:58:49 +01:00
parent dd32254607
commit b5d240332f
4 changed files with 182 additions and 17 deletions

View File

@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Google Meet: refresh realtime browser state during status and retry delayed speech after Meet finishes joining, so a just-opened in-call tab no longer leaves speech stuck behind stale `not-in-call` health.
- Google Meet: grant Meet media permissions through the Playwright browser context when CDP grants do not affect the attached Chrome page, and report in-call microphone/speaker permission problems instead of marking realtime speech ready.
- Google Chat: update the setup example to use the accepted `groups.<space>.enabled` key instead of the legacy `allow` alias, with a schema regression for the documented group shape. Thanks @vincentkoc.
- Control UI/WebChat: collapse duplicate in-flight internal text sends onto the active Gateway run so rapid repeat submits do not start fresh `agent:main:main` dispatches. Fixes #75737. Thanks @dsdsddd1 and @BunsDev.

View File

@@ -2599,6 +2599,112 @@ describe("google-meet plugin", () => {
expect(result.details).toMatchObject({ createdSession: true });
});
it("refreshes realtime browser state in status after a delayed Meet join", async () => {
const originalPlatform = process.platform;
Object.defineProperty(process, "platform", { value: "darwin" });
try {
let browserState: Record<string, unknown> = {
inCall: false,
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
};
let opened = false;
const callGatewayFromCli = vi.fn(
async (
_method: string,
_opts: unknown,
params?: unknown,
_extra?: unknown,
): Promise<Record<string, unknown>> => {
const request = params as {
path?: string;
body?: { targetId?: string; url?: string };
};
if (request.path === "/tabs") {
return {
tabs: opened
? [
{
targetId: "local-meet-tab",
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
},
]
: [],
};
}
if (request.path === "/tabs/open") {
opened = true;
return {
targetId: "local-meet-tab",
title: "Meet",
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
};
}
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
return { ok: true };
}
if (request.path === "/act") {
return { result: JSON.stringify(browserState) };
}
throw new Error(`unexpected browser request path ${request.path}`);
},
);
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
const { methods } = setup({
chrome: {
audioBridgeCommand: ["bridge", "start"],
waitForInCallMs: 1,
},
realtime: { introMessage: "" },
});
const join = methods.get("googlemeet.join") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const status = methods.get("googlemeet.status") as
| ((ctx: {
params: Record<string, unknown>;
respond: ReturnType<typeof vi.fn>;
}) => Promise<void>)
| undefined;
const joinRespond = vi.fn();
const statusRespond = vi.fn();
await join?.({
params: { url: "https://meet.google.com/abc-defg-hij" },
respond: joinRespond,
});
expect(joinRespond.mock.calls[0]?.[1]).toMatchObject({
session: { chrome: { health: { inCall: false } } },
});
browserState = {
inCall: true,
micMuted: false,
title: "Meet",
url: "https://meet.google.com/abc-defg-hij",
};
await status?.({ params: {}, respond: statusRespond });
expect(statusRespond.mock.calls[0]?.[1]).toMatchObject({
sessions: [
{
chrome: {
health: {
inCall: true,
speechReady: true,
},
},
},
],
});
} finally {
Object.defineProperty(process, "platform", { value: originalPlatform });
}
});
it("exposes a test-listen action that proves transcript movement", async () => {
const { tools, nodesInvoke } = setup(
{

View File

@@ -216,12 +216,12 @@ export class GoogleMeetRuntime {
const sessions = [...this.#sessions.values()].toSorted((a, b) =>
a.createdAt.localeCompare(b.createdAt),
);
await Promise.all(sessions.map((session) => this.#refreshCaptionHealthForSession(session)));
await Promise.all(sessions.map((session) => this.#refreshStatusHealthForSession(session)));
return { found: true, sessions };
}
const session = this.#sessions.get(sessionId);
if (session) {
await this.#refreshCaptionHealthForSession(session);
await this.#refreshStatusHealthForSession(session);
}
return session ? { found: true, session } : { found: false };
}
@@ -357,7 +357,7 @@ export class GoogleMeetRuntime {
reusable.updatedAt = nowIso();
const spoken =
mode === "realtime" && speechInstructions
? (await this.speak(reusable.id, speechInstructions)).spoken
? await this.#speakWhenReady(reusable, speechInstructions)
: false;
return { session: reusable, spoken };
}
@@ -506,7 +506,7 @@ export class GoogleMeetRuntime {
transport === "twilio"
? delegatedTwilioSpoken
: mode === "realtime" && speechInstructions
? (await this.speak(session.id, speechInstructions)).spoken
? await this.#speakWhenReady(session, speechInstructions)
: false;
return { session, spoken };
}
@@ -570,6 +570,34 @@ export class GoogleMeetRuntime {
return { found: true, spoken: true, session };
}
async #speakWhenReady(session: GoogleMeetSession, instructions: string): Promise<boolean> {
let result = await this.speak(session.id, instructions);
if (result.spoken || !session.chrome?.audioBridge || session.transport === "twilio") {
return result.spoken;
}
const waitMs = Math.min(
Math.max(0, this.params.config.chrome.waitForInCallMs),
Math.max(0, this.params.config.chrome.joinTimeoutMs),
);
const deadline = Date.now() + waitMs;
while (Date.now() < deadline) {
await sleep(250);
result = await this.speak(session.id, instructions);
if (result.spoken) {
return true;
}
const health = result.session?.chrome?.health;
if (health?.manualActionRequired || result.session?.state !== "active") {
return false;
}
const blocked = health?.speechBlockedReason;
if (blocked && blocked !== "not-in-call" && blocked !== "browser-unverified") {
return false;
}
}
return false;
}
async testSpeech(request: GoogleMeetJoinRequest): Promise<{
createdSession: boolean;
inCall?: boolean;
@@ -735,12 +763,27 @@ export class GoogleMeetRuntime {
await this.#refreshBrowserHealthForChromeSession(session);
}
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
async #refreshStatusHealthForSession(session: GoogleMeetSession) {
if (session.transport === "chrome" || session.transport === "chrome-node") {
if (session.chrome?.health?.manualActionRequired) {
this.#refreshSpeechReadiness(session);
return;
}
await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true });
return;
}
this.#refreshSpeechReadiness(session);
}
async #refreshBrowserHealthForChromeSession(
session: GoogleMeetSession,
options: { force?: boolean; readOnly?: boolean } = {},
) {
if (!isManagedChromeBrowserSession(session)) {
this.#refreshSpeechReadiness(session);
return;
}
if (session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
if (!options.force && session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
this.#refreshSpeechReadiness(session);
return;
}
@@ -751,11 +794,13 @@ export class GoogleMeetRuntime {
runtime: this.params.runtime,
config: this.params.config,
mode: session.mode,
readOnly: options.readOnly,
url: session.url,
})
: await recoverCurrentMeetTab({
config: this.params.config,
mode: session.mode,
readOnly: options.readOnly,
url: session.url,
});
if (result.found && result.browser && session.chrome) {
@@ -775,6 +820,9 @@ export class GoogleMeetRuntime {
#refreshSpeechReadiness(session: GoogleMeetSession) {
const readiness = evaluateSpeechReadiness(session);
if (readiness.ready) {
session.notes = session.notes.filter((note) => !note.startsWith("Realtime speech blocked:"));
}
if (session.chrome) {
session.chrome.health = {
...session.chrome.health,

View File

@@ -327,11 +327,13 @@ function meetStatusScript(params: {
autoJoin: boolean;
captureCaptions: boolean;
guestName: string;
readOnly?: boolean;
}) {
return `() => {
const text = (node) => (node?.innerText || node?.textContent || "").trim();
const allowMicrophone = ${JSON.stringify(params.allowMicrophone)};
const captureCaptions = ${JSON.stringify(params.captureCaptions)};
const readOnly = ${JSON.stringify(Boolean(params.readOnly))};
const buttons = [...document.querySelectorAll('button')];
const buttonLabel = (button) =>
[
@@ -351,7 +353,7 @@ function meetStatusScript(params: {
const input = [...document.querySelectorAll('input')].find((el) =>
/your name/i.test(el.getAttribute('aria-label') || el.placeholder || '')
);
if (${JSON.stringify(params.autoJoin)} && input && !input.value) {
if (!readOnly && ${JSON.stringify(params.autoJoin)} && input && !input.value) {
input.focus();
input.value = ${JSON.stringify(params.guestName)};
input.dispatchEvent(new Event('input', { bubbles: true }));
@@ -363,20 +365,20 @@ function meetStatusScript(params: {
const pageUrl = location.href;
const permissionNeeded = /permission needed|microphone problem|speaker problem|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(permissionText);
const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button)));
if (!allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
if (!readOnly && !allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
mic.click();
notes.push("Muted Meet microphone for observe-only mode.");
}
const join = ${JSON.stringify(params.autoJoin)}
const join = !readOnly && ${JSON.stringify(params.autoJoin)}
? findButton(/join now|ask to join/i)
: null;
if (join) join.click();
const microphoneChoice = findButton(/\\buse microphone\\b/i);
const noMicrophoneChoice = findButton(/\\b(continue|join|use) without (microphone|mic)\\b|\\bnot now\\b/i);
if (allowMicrophone && microphoneChoice) {
if (!readOnly && allowMicrophone && microphoneChoice) {
microphoneChoice.click();
notes.push("Accepted Meet microphone prompt with browser automation.");
} else if (!allowMicrophone && noMicrophoneChoice) {
} else if (!readOnly && !allowMicrophone && noMicrophoneChoice) {
noMicrophoneChoice.click();
notes.push("Skipped Meet microphone prompt for observe-only mode.");
}
@@ -431,7 +433,7 @@ function meetStatusScript(params: {
}
};
if (captionState) {
if (inCall && !captionState.enabledAttempted) {
if (!readOnly && inCall && !captionState.enabledAttempted) {
const captionButton = findButton(/turn on captions|show captions|captions/i);
const captionLabel = captionButton ? (captionButton.getAttribute("aria-label") || captionButton.getAttribute("data-tooltip") || text(captionButton)) : "";
if (captionButton) {
@@ -669,6 +671,7 @@ async function inspectRecoverableMeetTab(params: {
callBrowser: BrowserRequestCaller;
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
readOnly?: boolean;
timeoutMs: number;
tab: BrowserTab;
targetId: string;
@@ -680,11 +683,13 @@ async function inspectRecoverableMeetTab(params: {
body: { targetId: params.targetId },
timeoutMs: Math.min(params.timeoutMs, 5_000),
});
const permissionNotes = await grantMeetMediaPermissions({
allowMicrophone,
callBrowser: params.callBrowser,
timeoutMs: params.timeoutMs,
});
const permissionNotes = params.readOnly
? []
: await grantMeetMediaPermissions({
allowMicrophone,
callBrowser: params.callBrowser,
timeoutMs: params.timeoutMs,
});
const evaluated = await params.callBrowser({
method: "POST",
path: "/act",
@@ -696,6 +701,7 @@ async function inspectRecoverableMeetTab(params: {
captureCaptions: params.mode === "transcribe",
guestName: params.config.chrome.guestName,
autoJoin: false,
readOnly: params.readOnly,
}),
},
timeoutMs: Math.min(params.timeoutMs, 10_000),
@@ -724,6 +730,7 @@ async function inspectRecoverableMeetTab(params: {
export async function recoverCurrentMeetTab(params: {
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
readOnly?: boolean;
url?: string;
}): Promise<{
transport: "chrome";
@@ -760,6 +767,7 @@ export async function recoverCurrentMeetTab(params: {
callBrowser: callLocalBrowserRequest,
config: params.config,
mode: params.mode,
readOnly: params.readOnly,
timeoutMs,
tab,
targetId,
@@ -771,6 +779,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
runtime: PluginRuntime;
config: GoogleMeetConfig;
mode?: "realtime" | "transcribe";
readOnly?: boolean;
url?: string;
}): Promise<{
transport: "chrome-node";
@@ -823,6 +832,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
}),
config: params.config,
mode: params.mode,
readOnly: params.readOnly,
timeoutMs,
tab,
targetId,