mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-06 06:30:42 +00:00
fix: retry delayed Google Meet speech
This commit is contained in:
@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Google Meet: refresh realtime browser state during status and retry delayed speech after Meet finishes joining, so a just-opened in-call tab no longer leaves speech stuck behind stale `not-in-call` health.
|
||||
- Google Meet: grant Meet media permissions through the Playwright browser context when CDP grants do not affect the attached Chrome page, and report in-call microphone/speaker permission problems instead of marking realtime speech ready.
|
||||
- Google Chat: update the setup example to use the accepted `groups.<space>.enabled` key instead of the legacy `allow` alias, with a schema regression for the documented group shape. Thanks @vincentkoc.
|
||||
- Control UI/WebChat: collapse duplicate in-flight internal text sends onto the active Gateway run so rapid repeat submits do not start fresh `agent:main:main` dispatches. Fixes #75737. Thanks @dsdsddd1 and @BunsDev.
|
||||
|
||||
@@ -2599,6 +2599,112 @@ describe("google-meet plugin", () => {
|
||||
expect(result.details).toMatchObject({ createdSession: true });
|
||||
});
|
||||
|
||||
it("refreshes realtime browser state in status after a delayed Meet join", async () => {
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "darwin" });
|
||||
try {
|
||||
let browserState: Record<string, unknown> = {
|
||||
inCall: false,
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
let opened = false;
|
||||
const callGatewayFromCli = vi.fn(
|
||||
async (
|
||||
_method: string,
|
||||
_opts: unknown,
|
||||
params?: unknown,
|
||||
_extra?: unknown,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const request = params as {
|
||||
path?: string;
|
||||
body?: { targetId?: string; url?: string };
|
||||
};
|
||||
if (request.path === "/tabs") {
|
||||
return {
|
||||
tabs: opened
|
||||
? [
|
||||
{
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
},
|
||||
]
|
||||
: [],
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/open") {
|
||||
opened = true;
|
||||
return {
|
||||
targetId: "local-meet-tab",
|
||||
title: "Meet",
|
||||
url: request.body?.url ?? "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
}
|
||||
if (request.path === "/tabs/focus" || request.path === "/permissions/grant") {
|
||||
return { ok: true };
|
||||
}
|
||||
if (request.path === "/act") {
|
||||
return { result: JSON.stringify(browserState) };
|
||||
}
|
||||
throw new Error(`unexpected browser request path ${request.path}`);
|
||||
},
|
||||
);
|
||||
chromeTransportTesting.setDepsForTest({ callGatewayFromCli });
|
||||
const { methods } = setup({
|
||||
chrome: {
|
||||
audioBridgeCommand: ["bridge", "start"],
|
||||
waitForInCallMs: 1,
|
||||
},
|
||||
realtime: { introMessage: "" },
|
||||
});
|
||||
const join = methods.get("googlemeet.join") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const status = methods.get("googlemeet.status") as
|
||||
| ((ctx: {
|
||||
params: Record<string, unknown>;
|
||||
respond: ReturnType<typeof vi.fn>;
|
||||
}) => Promise<void>)
|
||||
| undefined;
|
||||
const joinRespond = vi.fn();
|
||||
const statusRespond = vi.fn();
|
||||
|
||||
await join?.({
|
||||
params: { url: "https://meet.google.com/abc-defg-hij" },
|
||||
respond: joinRespond,
|
||||
});
|
||||
expect(joinRespond.mock.calls[0]?.[1]).toMatchObject({
|
||||
session: { chrome: { health: { inCall: false } } },
|
||||
});
|
||||
browserState = {
|
||||
inCall: true,
|
||||
micMuted: false,
|
||||
title: "Meet",
|
||||
url: "https://meet.google.com/abc-defg-hij",
|
||||
};
|
||||
await status?.({ params: {}, respond: statusRespond });
|
||||
|
||||
expect(statusRespond.mock.calls[0]?.[1]).toMatchObject({
|
||||
sessions: [
|
||||
{
|
||||
chrome: {
|
||||
health: {
|
||||
inCall: true,
|
||||
speechReady: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform });
|
||||
}
|
||||
});
|
||||
|
||||
it("exposes a test-listen action that proves transcript movement", async () => {
|
||||
const { tools, nodesInvoke } = setup(
|
||||
{
|
||||
|
||||
@@ -216,12 +216,12 @@ export class GoogleMeetRuntime {
|
||||
const sessions = [...this.#sessions.values()].toSorted((a, b) =>
|
||||
a.createdAt.localeCompare(b.createdAt),
|
||||
);
|
||||
await Promise.all(sessions.map((session) => this.#refreshCaptionHealthForSession(session)));
|
||||
await Promise.all(sessions.map((session) => this.#refreshStatusHealthForSession(session)));
|
||||
return { found: true, sessions };
|
||||
}
|
||||
const session = this.#sessions.get(sessionId);
|
||||
if (session) {
|
||||
await this.#refreshCaptionHealthForSession(session);
|
||||
await this.#refreshStatusHealthForSession(session);
|
||||
}
|
||||
return session ? { found: true, session } : { found: false };
|
||||
}
|
||||
@@ -357,7 +357,7 @@ export class GoogleMeetRuntime {
|
||||
reusable.updatedAt = nowIso();
|
||||
const spoken =
|
||||
mode === "realtime" && speechInstructions
|
||||
? (await this.speak(reusable.id, speechInstructions)).spoken
|
||||
? await this.#speakWhenReady(reusable, speechInstructions)
|
||||
: false;
|
||||
return { session: reusable, spoken };
|
||||
}
|
||||
@@ -506,7 +506,7 @@ export class GoogleMeetRuntime {
|
||||
transport === "twilio"
|
||||
? delegatedTwilioSpoken
|
||||
: mode === "realtime" && speechInstructions
|
||||
? (await this.speak(session.id, speechInstructions)).spoken
|
||||
? await this.#speakWhenReady(session, speechInstructions)
|
||||
: false;
|
||||
return { session, spoken };
|
||||
}
|
||||
@@ -570,6 +570,34 @@ export class GoogleMeetRuntime {
|
||||
return { found: true, spoken: true, session };
|
||||
}
|
||||
|
||||
async #speakWhenReady(session: GoogleMeetSession, instructions: string): Promise<boolean> {
|
||||
let result = await this.speak(session.id, instructions);
|
||||
if (result.spoken || !session.chrome?.audioBridge || session.transport === "twilio") {
|
||||
return result.spoken;
|
||||
}
|
||||
const waitMs = Math.min(
|
||||
Math.max(0, this.params.config.chrome.waitForInCallMs),
|
||||
Math.max(0, this.params.config.chrome.joinTimeoutMs),
|
||||
);
|
||||
const deadline = Date.now() + waitMs;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(250);
|
||||
result = await this.speak(session.id, instructions);
|
||||
if (result.spoken) {
|
||||
return true;
|
||||
}
|
||||
const health = result.session?.chrome?.health;
|
||||
if (health?.manualActionRequired || result.session?.state !== "active") {
|
||||
return false;
|
||||
}
|
||||
const blocked = health?.speechBlockedReason;
|
||||
if (blocked && blocked !== "not-in-call" && blocked !== "browser-unverified") {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async testSpeech(request: GoogleMeetJoinRequest): Promise<{
|
||||
createdSession: boolean;
|
||||
inCall?: boolean;
|
||||
@@ -735,12 +763,27 @@ export class GoogleMeetRuntime {
|
||||
await this.#refreshBrowserHealthForChromeSession(session);
|
||||
}
|
||||
|
||||
async #refreshBrowserHealthForChromeSession(session: GoogleMeetSession) {
|
||||
async #refreshStatusHealthForSession(session: GoogleMeetSession) {
|
||||
if (session.transport === "chrome" || session.transport === "chrome-node") {
|
||||
if (session.chrome?.health?.manualActionRequired) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true });
|
||||
return;
|
||||
}
|
||||
this.#refreshSpeechReadiness(session);
|
||||
}
|
||||
|
||||
async #refreshBrowserHealthForChromeSession(
|
||||
session: GoogleMeetSession,
|
||||
options: { force?: boolean; readOnly?: boolean } = {},
|
||||
) {
|
||||
if (!isManagedChromeBrowserSession(session)) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
if (session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
|
||||
if (!options.force && session.mode === "realtime" && evaluateSpeechReadiness(session).ready) {
|
||||
this.#refreshSpeechReadiness(session);
|
||||
return;
|
||||
}
|
||||
@@ -751,11 +794,13 @@ export class GoogleMeetRuntime {
|
||||
runtime: this.params.runtime,
|
||||
config: this.params.config,
|
||||
mode: session.mode,
|
||||
readOnly: options.readOnly,
|
||||
url: session.url,
|
||||
})
|
||||
: await recoverCurrentMeetTab({
|
||||
config: this.params.config,
|
||||
mode: session.mode,
|
||||
readOnly: options.readOnly,
|
||||
url: session.url,
|
||||
});
|
||||
if (result.found && result.browser && session.chrome) {
|
||||
@@ -775,6 +820,9 @@ export class GoogleMeetRuntime {
|
||||
|
||||
#refreshSpeechReadiness(session: GoogleMeetSession) {
|
||||
const readiness = evaluateSpeechReadiness(session);
|
||||
if (readiness.ready) {
|
||||
session.notes = session.notes.filter((note) => !note.startsWith("Realtime speech blocked:"));
|
||||
}
|
||||
if (session.chrome) {
|
||||
session.chrome.health = {
|
||||
...session.chrome.health,
|
||||
|
||||
@@ -327,11 +327,13 @@ function meetStatusScript(params: {
|
||||
autoJoin: boolean;
|
||||
captureCaptions: boolean;
|
||||
guestName: string;
|
||||
readOnly?: boolean;
|
||||
}) {
|
||||
return `() => {
|
||||
const text = (node) => (node?.innerText || node?.textContent || "").trim();
|
||||
const allowMicrophone = ${JSON.stringify(params.allowMicrophone)};
|
||||
const captureCaptions = ${JSON.stringify(params.captureCaptions)};
|
||||
const readOnly = ${JSON.stringify(Boolean(params.readOnly))};
|
||||
const buttons = [...document.querySelectorAll('button')];
|
||||
const buttonLabel = (button) =>
|
||||
[
|
||||
@@ -351,7 +353,7 @@ function meetStatusScript(params: {
|
||||
const input = [...document.querySelectorAll('input')].find((el) =>
|
||||
/your name/i.test(el.getAttribute('aria-label') || el.placeholder || '')
|
||||
);
|
||||
if (${JSON.stringify(params.autoJoin)} && input && !input.value) {
|
||||
if (!readOnly && ${JSON.stringify(params.autoJoin)} && input && !input.value) {
|
||||
input.focus();
|
||||
input.value = ${JSON.stringify(params.guestName)};
|
||||
input.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
@@ -363,20 +365,20 @@ function meetStatusScript(params: {
|
||||
const pageUrl = location.href;
|
||||
const permissionNeeded = /permission needed|microphone problem|speaker problem|allow.*(microphone|camera)|blocked.*(microphone|camera)|permission.*(microphone|camera|speaker)/i.test(permissionText);
|
||||
const mic = buttons.find((button) => /turn off microphone|turn on microphone|microphone/i.test(button.getAttribute('aria-label') || text(button)));
|
||||
if (!allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
|
||||
if (!readOnly && !allowMicrophone && mic && /turn off microphone/i.test(mic.getAttribute('aria-label') || text(mic))) {
|
||||
mic.click();
|
||||
notes.push("Muted Meet microphone for observe-only mode.");
|
||||
}
|
||||
const join = ${JSON.stringify(params.autoJoin)}
|
||||
const join = !readOnly && ${JSON.stringify(params.autoJoin)}
|
||||
? findButton(/join now|ask to join/i)
|
||||
: null;
|
||||
if (join) join.click();
|
||||
const microphoneChoice = findButton(/\\buse microphone\\b/i);
|
||||
const noMicrophoneChoice = findButton(/\\b(continue|join|use) without (microphone|mic)\\b|\\bnot now\\b/i);
|
||||
if (allowMicrophone && microphoneChoice) {
|
||||
if (!readOnly && allowMicrophone && microphoneChoice) {
|
||||
microphoneChoice.click();
|
||||
notes.push("Accepted Meet microphone prompt with browser automation.");
|
||||
} else if (!allowMicrophone && noMicrophoneChoice) {
|
||||
} else if (!readOnly && !allowMicrophone && noMicrophoneChoice) {
|
||||
noMicrophoneChoice.click();
|
||||
notes.push("Skipped Meet microphone prompt for observe-only mode.");
|
||||
}
|
||||
@@ -431,7 +433,7 @@ function meetStatusScript(params: {
|
||||
}
|
||||
};
|
||||
if (captionState) {
|
||||
if (inCall && !captionState.enabledAttempted) {
|
||||
if (!readOnly && inCall && !captionState.enabledAttempted) {
|
||||
const captionButton = findButton(/turn on captions|show captions|captions/i);
|
||||
const captionLabel = captionButton ? (captionButton.getAttribute("aria-label") || captionButton.getAttribute("data-tooltip") || text(captionButton)) : "";
|
||||
if (captionButton) {
|
||||
@@ -669,6 +671,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
callBrowser: BrowserRequestCaller;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
readOnly?: boolean;
|
||||
timeoutMs: number;
|
||||
tab: BrowserTab;
|
||||
targetId: string;
|
||||
@@ -680,11 +683,13 @@ async function inspectRecoverableMeetTab(params: {
|
||||
body: { targetId: params.targetId },
|
||||
timeoutMs: Math.min(params.timeoutMs, 5_000),
|
||||
});
|
||||
const permissionNotes = await grantMeetMediaPermissions({
|
||||
allowMicrophone,
|
||||
callBrowser: params.callBrowser,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const permissionNotes = params.readOnly
|
||||
? []
|
||||
: await grantMeetMediaPermissions({
|
||||
allowMicrophone,
|
||||
callBrowser: params.callBrowser,
|
||||
timeoutMs: params.timeoutMs,
|
||||
});
|
||||
const evaluated = await params.callBrowser({
|
||||
method: "POST",
|
||||
path: "/act",
|
||||
@@ -696,6 +701,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
captureCaptions: params.mode === "transcribe",
|
||||
guestName: params.config.chrome.guestName,
|
||||
autoJoin: false,
|
||||
readOnly: params.readOnly,
|
||||
}),
|
||||
},
|
||||
timeoutMs: Math.min(params.timeoutMs, 10_000),
|
||||
@@ -724,6 +730,7 @@ async function inspectRecoverableMeetTab(params: {
|
||||
export async function recoverCurrentMeetTab(params: {
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
readOnly?: boolean;
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
transport: "chrome";
|
||||
@@ -760,6 +767,7 @@ export async function recoverCurrentMeetTab(params: {
|
||||
callBrowser: callLocalBrowserRequest,
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
readOnly: params.readOnly,
|
||||
timeoutMs,
|
||||
tab,
|
||||
targetId,
|
||||
@@ -771,6 +779,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
|
||||
runtime: PluginRuntime;
|
||||
config: GoogleMeetConfig;
|
||||
mode?: "realtime" | "transcribe";
|
||||
readOnly?: boolean;
|
||||
url?: string;
|
||||
}): Promise<{
|
||||
transport: "chrome-node";
|
||||
@@ -823,6 +832,7 @@ export async function recoverCurrentMeetTabOnNode(params: {
|
||||
}),
|
||||
config: params.config,
|
||||
mode: params.mode,
|
||||
readOnly: params.readOnly,
|
||||
timeoutMs,
|
||||
tab,
|
||||
targetId,
|
||||
|
||||
Reference in New Issue
Block a user