fix(google-meet): hide realtime alias from agent schema

This commit is contained in:
Peter Steinberger
2026-05-04 03:21:46 +01:00
parent b0b5983ce3
commit 30b201eff0
8 changed files with 76 additions and 50 deletions

View File

@@ -42,6 +42,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Diagnostics: keep webhook/message OTEL attributes and Prometheus delivery labels low-cardinality and omit raw chat/message IDs from spans, so progress-draft and message-tool modes do not leak high-cardinality messaging identifiers.
- Google Meet: stop advertising legacy `mode: "realtime"` to agents and config UIs, while keeping it as a hidden compatibility alias for `mode: "agent"`, so new joins use the STT -> OpenClaw agent -> TTS path instead of selecting the direct realtime voice fallback.
- Telegram: render shared interactive reply buttons in reply delivery so plugin approval messages show inline keyboards. (#76238) Thanks @keshavbotagent.
- Agents/cli-runner: drop a saved `claude-cli` resume sessionId at preparation time when its on-disk transcript no longer exists in `~/.claude/projects/`, so a stale binding from a half-installed `update.run` cannot trap follow-up runs (auto-reply / Telegram direct) in a `claude --resume` timeout loop; the run starts fresh and the new sessionId is written back through the existing post-run flow. (#77030; refs #77011) Thanks @openperf.
- Release validation: install the cross-OS TypeScript harness through Windows-safe Node/npm shims so native Windows package checks reach the OpenClaw smoke suites instead of exiting before artifact capture. Thanks @vincentkoc.

View File

@@ -1,5 +1,5 @@
---
summary: "Google Meet plugin: join explicit Meet URLs through Chrome or Twilio with realtime voice defaults"
summary: "Google Meet plugin: join explicit Meet URLs through Chrome or Twilio with agent talk-back defaults"
read_when:
- You want an OpenClaw agent to join a Google Meet call
- You want an OpenClaw agent to create a new Google Meet call
@@ -12,12 +12,12 @@ Google Meet participant support for OpenClaw — the plugin is explicit by desig
- It only joins an explicit `https://meet.google.com/...` URL.
- It can create a new Meet space through the Google Meet API, then join the
returned URL.
- `realtime` voice is the default mode.
- Realtime voice can call back into the full OpenClaw agent when deeper
reasoning or tools are needed.
- Agents choose the join behavior with `mode`: use `realtime` for live
listen/talk-back, or `transcribe` to join/control the browser without the
realtime voice bridge.
- `agent` is the default talk-back mode: realtime transcription listens, the
configured OpenClaw agent answers, and regular OpenClaw TTS speaks into Meet.
- `bidi` remains available as the fallback direct realtime voice model mode.
- Agents choose the join behavior with `mode`: use `agent` for live
listen/talk-back, `bidi` for direct realtime voice fallback, or `transcribe`
to join/control the browser without the talk-back bridge.
- Auth starts as personal Google OAuth or an already signed-in Chrome profile.
- There is no automatic consent announcement.
- The default Chrome audio backend is `BlackHole 2ch`.
@@ -29,9 +29,10 @@ Google Meet participant support for OpenClaw — the plugin is explicit by desig
## Quick start
Install the local audio dependencies and configure a backend realtime voice
provider. OpenAI is the default; Google Gemini Live also works with
`realtime.provider: "google"`:
Install the local audio dependencies and configure a realtime transcription
provider plus regular OpenClaw TTS. OpenAI is the default transcription
provider; Google Gemini Live also works with `realtime.provider: "google"` for
`bidi` mode:
```bash
brew install blackhole-2ch sox
@@ -130,7 +131,7 @@ participation.
Create a new meeting and join it:
```bash
openclaw googlemeet create --transport chrome-node --mode realtime
openclaw googlemeet create --transport chrome-node --mode agent
```
For API-created rooms, use Google Meet `SpaceConfig.accessType` when you want
@@ -138,7 +139,7 @@ the room's no-knock policy to be explicit instead of inherited from the Google
account defaults:
```bash
openclaw googlemeet create --access-type OPEN --transport chrome-node --mode realtime
openclaw googlemeet create --access-type OPEN --transport chrome-node --mode agent
```
`OPEN` lets anyone with the Meet URL join without knocking. `TRUSTED` lets the
@@ -177,15 +178,15 @@ can explain which path was used. `create` joins the new meeting by default and
returns `joined: true` plus the join session. To only mint the URL, use
`create --no-join` on the CLI or pass `"join": false` to the tool.
Or tell an agent: "Create a Google Meet, join it with realtime voice, and send
me the link." The agent should call `google_meet` with `action: "create"` and
then share the returned `meetingUri`.
Or tell an agent: "Create a Google Meet, join it with the agent talk-back mode,
and send me the link." The agent should call `google_meet` with
`action: "create"` and then share the returned `meetingUri`.
```json
{
"action": "create",
"transport": "chrome-node",
"mode": "realtime"
"mode": "agent"
}
```
@@ -818,7 +819,7 @@ Agents can also create an API-backed room with an explicit access policy:
{
"action": "create",
"transport": "chrome-node",
"mode": "realtime",
"mode": "agent",
"accessType": "OPEN"
}
```
@@ -1000,8 +1001,8 @@ Set the plugin config under `plugins.entries.google-meet.config`:
Defaults:
- `defaultTransport: "chrome"`
- `defaultMode: "agent"` (`"realtime"` is accepted as a compatibility alias for
`"agent"`)
- `defaultMode: "agent"` (`"realtime"` is accepted only as a legacy
compatibility alias for `"agent"`; new tool calls should say `"agent"`)
- `chromeNode.node`: optional node id/name/IP for `chrome-node`
- `chrome.audioBackend: "blackhole-2ch"`
- `chrome.guestName: "OpenClaw Agent"`: name used on the signed-out Meet guest
@@ -1011,7 +1012,7 @@ Defaults:
- `chrome.reuseExistingTab: true`: activate an existing Meet tab instead of
opening duplicates
- `chrome.waitForInCallMs: 20000`: wait for the Meet tab to report in-call
before the realtime intro is triggered
before the talk-back intro is triggered
- `chrome.audioFormat: "pcm16-24khz"`: command-pair audio format. Use
`"g711-ulaw-8khz"` only for legacy/custom command pairs that still emit
telephony audio.
@@ -1140,8 +1141,8 @@ Gateway host, so model credentials stay there. With the default `mode: "agent"`,
the realtime transcription provider handles listening, the configured OpenClaw
agent produces the answer, and regular OpenClaw TTS speaks it into Meet. Use
`mode: "bidi"` when you want the realtime voice model to answer directly.
`mode: "realtime"` remains accepted as a compatibility alias for
`mode: "agent"`.
Raw `mode: "realtime"` remains accepted as a legacy compatibility alias for
`mode: "agent"`, but it is no longer advertised in the agent tool schema.
Use `action: "status"` to list active sessions or inspect a session ID. Use
`action: "speak"` with `sessionId` and `message` to make the realtime agent

View File

@@ -386,6 +386,7 @@ describe("google-meet plugin", () => {
oauth: {},
auth: { provider: "google-oauth" },
});
expect(resolveGoogleMeetConfig({ defaultMode: "realtime" }).defaultMode).toBe("agent");
expect(resolveGoogleMeetConfig({}).realtime.instructions).toContain("openclaw_agent_consult");
});
@@ -621,7 +622,7 @@ describe("google-meet plugin", () => {
description: expect.stringContaining("recover_current_tab"),
},
transport: { type: "string", enum: ["chrome", "chrome-node", "twilio"] },
mode: { type: "string", enum: ["agent", "bidi", "realtime", "transcribe"] },
mode: { type: "string", enum: ["agent", "bidi", "transcribe"] },
},
});
});
@@ -3154,7 +3155,12 @@ describe("google-meet plugin", () => {
createdAt: "2026-04-27T00:00:00.000Z",
updatedAt: "2026-04-27T00:00:00.000Z",
participantIdentity: "signed-in Google Chrome profile",
realtime: { enabled: true, provider: "openai", toolPolicy: "safe-read-only" },
realtime: {
enabled: true,
strategy: "agent",
transcriptionProvider: "openai",
toolPolicy: "safe-read-only",
},
chrome: {
audioBackend: "blackhole-2ch",
launched: true,

View File

@@ -155,10 +155,14 @@ const googleMeetConfigSchema = {
help: "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins.",
},
"realtime.provider": {
label: "Realtime Provider",
help: "Defaults to OpenAI; uses OPENAI_API_KEY when no provider config is set.",
label: "Speech Provider",
help: "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider.",
},
"realtime.model": {
label: "Bidi Realtime Model",
help: "Only used by mode=bidi. Agent mode answers with the configured OpenClaw agent and regular TTS.",
advanced: true,
},
"realtime.model": { label: "Realtime Model", advanced: true },
"realtime.instructions": { label: "Realtime Instructions", advanced: true },
"realtime.introMessage": {
label: "Realtime Intro Message",
@@ -238,9 +242,9 @@ const GoogleMeetToolSchema = Type.Object({
),
mode: Type.Optional(
Type.String({
enum: ["agent", "bidi", "realtime", "transcribe"],
enum: ["agent", "bidi", "transcribe"],
description:
"Join mode. agent uses realtime transcription, the configured OpenClaw agent, and regular TTS. bidi uses the realtime voice model directly. realtime is a compatibility alias for agent. transcribe joins observe-only.",
"Join mode. agent uses realtime transcription, the configured OpenClaw agent, and regular TTS. bidi uses the realtime voice model directly. transcribe joins observe-only.",
}),
),
dialInNumber: Type.Optional(

View File

@@ -148,11 +148,12 @@
"help": "Legacy realtime alias setting. Use mode=agent or mode=bidi for new Meet joins."
},
"realtime.provider": {
"label": "Realtime Provider",
"help": "Defaults to OpenAI; uses OPENAI_API_KEY when no provider config is set."
"label": "Speech Provider",
"help": "Agent mode uses this for realtime transcription. Bidi mode uses it as the realtime voice provider."
},
"realtime.model": {
"label": "Realtime Model",
"label": "Bidi Realtime Model",
"help": "Only used by mode=bidi. Agent mode answers with the configured OpenClaw agent and regular TTS.",
"advanced": true
},
"realtime.instructions": {
@@ -227,7 +228,7 @@
},
"defaultMode": {
"type": "string",
"enum": ["agent", "bidi", "realtime", "transcribe"],
"enum": ["agent", "bidi", "transcribe"],
"default": "agent"
},
"chrome": {

View File

@@ -11,7 +11,7 @@ import {
listGoogleMeetCalendarEvents,
type GoogleMeetCalendarLookupResult,
} from "./calendar.js";
import type { GoogleMeetConfig, GoogleMeetMode, GoogleMeetTransport } from "./config.js";
import type { GoogleMeetConfig, GoogleMeetModeInput, GoogleMeetTransport } from "./config.js";
import { hasCreateSpaceConfigInput, resolveCreateSpaceConfig } from "./create.js";
import {
buildGoogleMeetPreflightReport,
@@ -37,7 +37,7 @@ import type { GoogleMeetRuntime } from "./runtime.js";
type JoinOptions = {
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
mode?: GoogleMeetModeInput;
message?: string;
timeoutMs?: string;
dialInNumber?: string;
@@ -134,7 +134,7 @@ export type GoogleMeetExportManifest = {
type SetupOptions = {
json?: boolean;
mode?: GoogleMeetMode;
mode?: GoogleMeetModeInput;
transport?: GoogleMeetTransport;
};
@@ -181,7 +181,7 @@ type CreateOptions = {
entryPointAccess?: string;
join?: boolean;
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
mode?: GoogleMeetModeInput;
message?: string;
dialInNumber?: string;
pin?: string;
@@ -349,12 +349,17 @@ function writeDoctorStatus(status: Awaited<ReturnType<GoogleMeetRuntime["status"
}
writeStdoutLine("node: %s", session.chrome?.nodeId ?? "local/none");
writeStdoutLine("audio bridge: %s", session.chrome?.audioBridge?.type ?? "none");
const bridgeProvider =
session.chrome?.audioBridge?.provider ??
session.realtime.transcriptionProvider ??
session.realtime.provider ??
"n/a";
writeStdoutLine(
"provider: %s",
session.chrome?.audioBridge?.provider ?? session.realtime.provider ?? "n/a",
session.mode === "agent" ? "transcription provider: %s" : "provider: %s",
bridgeProvider,
);
if (session.realtime.enabled) {
writeStdoutLine("realtime strategy: %s", session.realtime.strategy ?? "agent");
writeStdoutLine("talk-back mode: %s", session.realtime.strategy ?? session.mode);
}
writeStdoutLine("in call: %s", formatBoolean(health?.inCall));
writeStdoutLine("lobby waiting: %s", formatBoolean(health?.lobbyWaiting));
@@ -2268,7 +2273,7 @@ export function registerGoogleMeetCli(params: {
.command("setup")
.description("Show Google Meet transport setup status")
.option("--transport <transport>", "Transport to check: chrome, chrome-node, or twilio")
.option("--mode <mode>", "Mode to check: realtime or transcribe")
.option("--mode <mode>", "Mode to check: agent, bidi, or transcribe")
.option("--json", "Print JSON output", false)
.action(async (options: SetupOptions) => {
const rt = await params.ensureRuntime();

View File

@@ -3,7 +3,12 @@ import type { OpenClawConfig } from "openclaw/plugin-sdk/config-types";
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
import type { PluginRuntime, RuntimeLogger } from "openclaw/plugin-sdk/plugin-runtime";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import type { GoogleMeetConfig, GoogleMeetMode, GoogleMeetTransport } from "./config.js";
import type {
GoogleMeetConfig,
GoogleMeetMode,
GoogleMeetModeInput,
GoogleMeetTransport,
} from "./config.js";
import { addGoogleMeetSetupCheck, getGoogleMeetSetupStatus } from "./setup.js";
import { isSameMeetUrlForReuse, resolveChromeNodeInfo } from "./transports/chrome-browser-proxy.js";
import { createMeetWithBrowserProxyOnNode } from "./transports/chrome-create.js";
@@ -60,8 +65,8 @@ function resolveTransport(input: GoogleMeetTransport | undefined, config: Google
return input ?? config.defaultTransport;
}
function resolveMode(input: GoogleMeetMode | undefined, config: GoogleMeetConfig) {
return input ?? config.defaultMode;
function resolveMode(input: GoogleMeetModeInput | undefined, config: GoogleMeetConfig) {
return input === "realtime" ? "agent" : (input ?? config.defaultMode);
}
function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean {
@@ -245,7 +250,7 @@ export class GoogleMeetRuntime {
async setupStatus(
options: {
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
mode?: GoogleMeetModeInput;
dialInNumber?: string;
} = {},
) {
@@ -397,8 +402,9 @@ export class GoogleMeetRuntime {
realtime: {
enabled: isGoogleMeetTalkBackMode(mode),
strategy: mode === "bidi" ? "bidi" : "agent",
provider: this.params.config.realtime.provider,
model: this.params.config.realtime.model,
provider: mode === "bidi" ? this.params.config.realtime.provider : undefined,
model: mode === "bidi" ? this.params.config.realtime.model : undefined,
transcriptionProvider: mode === "agent" ? this.params.config.realtime.provider : undefined,
toolPolicy: this.params.config.realtime.toolPolicy,
},
notes: [],
@@ -690,7 +696,8 @@ export class GoogleMeetRuntime {
recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
session: GoogleMeetSession;
}> {
if (request.mode && isGoogleMeetTalkBackMode(request.mode)) {
const requestedMode = request.mode ? resolveMode(request.mode, this.params.config) : undefined;
if (requestedMode && isGoogleMeetTalkBackMode(requestedMode)) {
throw new Error(
"test_listen requires mode: transcribe; use test_speech for talk-back sessions.",
);

View File

@@ -1,11 +1,11 @@
import type { GoogleMeetMode, GoogleMeetTransport } from "../config.js";
import type { GoogleMeetMode, GoogleMeetModeInput, GoogleMeetTransport } from "../config.js";
type GoogleMeetSessionState = "active" | "ended";
export type GoogleMeetJoinRequest = {
url: string;
transport?: GoogleMeetTransport;
mode?: GoogleMeetMode;
mode?: GoogleMeetModeInput;
message?: string;
timeoutMs?: number;
dialInNumber?: string;
@@ -106,6 +106,7 @@ export type GoogleMeetSession = {
strategy?: string;
provider?: string;
model?: string;
transcriptionProvider?: string;
toolPolicy: string;
};
chrome?: {